aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-01-26 12:50:16 -0500
committerGreg Kroah-Hartman <gregkh@suse.de>2010-03-15 12:06:17 -0400
commit21a6adcde06e129b055caa3256e65a97a2986770 (patch)
tree56663f2682b5114b92335c7c53ce26e1449ac8cf
parent69cb5f7cdc28a5352a03c16bbaa0a92cdf31b9d4 (diff)
perf: Reimplement frequency driven sampling
commit abd50713944c8ea9e0af5b7bffa0aacae21cc91a upstream. There was a bug in the old period code that caused intel_pmu_enable_all() or native_write_msr_safe() to show up quite high in the profiles. In staring at that code it made my head hurt, so I rewrote it in a hopefully simpler fashion. Its now fully symetric between tick and overflow driven adjustments and uses less data to boot. The only complication is that it basically wants to do a u128 division. The code approximates that in a rather simple truncate until it fits fashion, taking care to balance the terms while truncating. This version does not generate that sampling artefact. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--include/linux/perf_event.h5
-rw-r--r--kernel/perf_event.c132
2 files changed, 94 insertions, 43 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a177698d95e2..c8ea0c77a625 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -496,9 +496,8 @@ struct hw_perf_event {
496 atomic64_t period_left; 496 atomic64_t period_left;
497 u64 interrupts; 497 u64 interrupts;
498 498
499 u64 freq_count; 499 u64 freq_time_stamp;
500 u64 freq_interrupts; 500 u64 freq_count_stamp;
501 u64 freq_stamp;
502#endif 501#endif
503}; 502};
504 503
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2fa4301ffadb..b707465b0613 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1350,14 +1350,83 @@ static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
1350 1350
1351static void perf_log_throttle(struct perf_event *event, int enable); 1351static void perf_log_throttle(struct perf_event *event, int enable);
1352 1352
1353static void perf_adjust_period(struct perf_event *event, u64 events) 1353static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
1354{
1355 u64 frequency = event->attr.sample_freq;
1356 u64 sec = NSEC_PER_SEC;
1357 u64 divisor, dividend;
1358
1359 int count_fls, nsec_fls, frequency_fls, sec_fls;
1360
1361 count_fls = fls64(count);
1362 nsec_fls = fls64(nsec);
1363 frequency_fls = fls64(frequency);
1364 sec_fls = 30;
1365
1366 /*
1367 * We got @count in @nsec, with a target of sample_freq HZ
1368 * the target period becomes:
1369 *
1370 * @count * 10^9
1371 * period = -------------------
1372 * @nsec * sample_freq
1373 *
1374 */
1375
1376 /*
1377 * Reduce accuracy by one bit such that @a and @b converge
1378 * to a similar magnitude.
1379 */
1380#define REDUCE_FLS(a, b) \
1381do { \
1382 if (a##_fls > b##_fls) { \
1383 a >>= 1; \
1384 a##_fls--; \
1385 } else { \
1386 b >>= 1; \
1387 b##_fls--; \
1388 } \
1389} while (0)
1390
1391 /*
1392 * Reduce accuracy until either term fits in a u64, then proceed with
1393 * the other, so that finally we can do a u64/u64 division.
1394 */
1395 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
1396 REDUCE_FLS(nsec, frequency);
1397 REDUCE_FLS(sec, count);
1398 }
1399
1400 if (count_fls + sec_fls > 64) {
1401 divisor = nsec * frequency;
1402
1403 while (count_fls + sec_fls > 64) {
1404 REDUCE_FLS(count, sec);
1405 divisor >>= 1;
1406 }
1407
1408 dividend = count * sec;
1409 } else {
1410 dividend = count * sec;
1411
1412 while (nsec_fls + frequency_fls > 64) {
1413 REDUCE_FLS(nsec, frequency);
1414 dividend >>= 1;
1415 }
1416
1417 divisor = nsec * frequency;
1418 }
1419
1420 return div64_u64(dividend, divisor);
1421}
1422
1423static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1354{ 1424{
1355 struct hw_perf_event *hwc = &event->hw; 1425 struct hw_perf_event *hwc = &event->hw;
1356 u64 period, sample_period; 1426 u64 period, sample_period;
1357 s64 delta; 1427 s64 delta;
1358 1428
1359 events *= hwc->sample_period; 1429 period = perf_calculate_period(event, nsec, count);
1360 period = div64_u64(events, event->attr.sample_freq);
1361 1430
1362 delta = (s64)(period - hwc->sample_period); 1431 delta = (s64)(period - hwc->sample_period);
1363 delta = (delta + 7) / 8; /* low pass filter */ 1432 delta = (delta + 7) / 8; /* low pass filter */
@@ -1368,13 +1437,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events)
1368 sample_period = 1; 1437 sample_period = 1;
1369 1438
1370 hwc->sample_period = sample_period; 1439 hwc->sample_period = sample_period;
1440
1441 if (atomic64_read(&hwc->period_left) > 8*sample_period) {
1442 perf_disable();
1443 event->pmu->disable(event);
1444 atomic64_set(&hwc->period_left, 0);
1445 event->pmu->enable(event);
1446 perf_enable();
1447 }
1371} 1448}
1372 1449
1373static void perf_ctx_adjust_freq(struct perf_event_context *ctx) 1450static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1374{ 1451{
1375 struct perf_event *event; 1452 struct perf_event *event;
1376 struct hw_perf_event *hwc; 1453 struct hw_perf_event *hwc;
1377 u64 interrupts, freq; 1454 u64 interrupts, now;
1455 s64 delta;
1378 1456
1379 raw_spin_lock(&ctx->lock); 1457 raw_spin_lock(&ctx->lock);
1380 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1458 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
@@ -1395,44 +1473,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1395 if (interrupts == MAX_INTERRUPTS) { 1473 if (interrupts == MAX_INTERRUPTS) {
1396 perf_log_throttle(event, 1); 1474 perf_log_throttle(event, 1);
1397 event->pmu->unthrottle(event); 1475 event->pmu->unthrottle(event);
1398 interrupts = 2*sysctl_perf_event_sample_rate/HZ;
1399 } 1476 }
1400 1477
1401 if (!event->attr.freq || !event->attr.sample_freq) 1478 if (!event->attr.freq || !event->attr.sample_freq)
1402 continue; 1479 continue;
1403 1480
1404 /* 1481 event->pmu->read(event);
1405 * if the specified freq < HZ then we need to skip ticks 1482 now = atomic64_read(&event->count);
1406 */ 1483 delta = now - hwc->freq_count_stamp;
1407 if (event->attr.sample_freq < HZ) { 1484 hwc->freq_count_stamp = now;
1408 freq = event->attr.sample_freq;
1409
1410 hwc->freq_count += freq;
1411 hwc->freq_interrupts += interrupts;
1412
1413 if (hwc->freq_count < HZ)
1414 continue;
1415
1416 interrupts = hwc->freq_interrupts;
1417 hwc->freq_interrupts = 0;
1418 hwc->freq_count -= HZ;
1419 } else
1420 freq = HZ;
1421
1422 perf_adjust_period(event, freq * interrupts);
1423 1485
1424 /* 1486 if (delta > 0)
1425 * In order to avoid being stalled by an (accidental) huge 1487 perf_adjust_period(event, TICK_NSEC, delta);
1426 * sample period, force reset the sample period if we didn't
1427 * get any events in this freq period.
1428 */
1429 if (!interrupts) {
1430 perf_disable();
1431 event->pmu->disable(event);
1432 atomic64_set(&hwc->period_left, 0);
1433 event->pmu->enable(event);
1434 perf_enable();
1435 }
1436 } 1488 }
1437 raw_spin_unlock(&ctx->lock); 1489 raw_spin_unlock(&ctx->lock);
1438} 1490}
@@ -3688,12 +3740,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3688 3740
3689 if (event->attr.freq) { 3741 if (event->attr.freq) {
3690 u64 now = perf_clock(); 3742 u64 now = perf_clock();
3691 s64 delta = now - hwc->freq_stamp; 3743 s64 delta = now - hwc->freq_time_stamp;
3692 3744
3693 hwc->freq_stamp = now; 3745 hwc->freq_time_stamp = now;
3694 3746
3695 if (delta > 0 && delta < TICK_NSEC) 3747 if (delta > 0 && delta < 2*TICK_NSEC)
3696 perf_adjust_period(event, NSEC_PER_SEC / (int)delta); 3748 perf_adjust_period(event, delta, hwc->last_period);
3697 } 3749 }
3698 3750
3699 /* 3751 /*