diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-01-26 12:50:16 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2010-03-15 12:06:17 -0400 |
commit | 21a6adcde06e129b055caa3256e65a97a2986770 (patch) | |
tree | 56663f2682b5114b92335c7c53ce26e1449ac8cf | |
parent | 69cb5f7cdc28a5352a03c16bbaa0a92cdf31b9d4 (diff) |
perf: Reimplement frequency driven sampling
commit abd50713944c8ea9e0af5b7bffa0aacae21cc91a upstream.
There was a bug in the old period code that caused intel_pmu_enable_all()
or native_write_msr_safe() to show up quite high in the profiles.
In staring at that code it made my head hurt, so I rewrote it in a
hopefully simpler fashion. Its now fully symetric between tick and
overflow driven adjustments and uses less data to boot.
The only complication is that it basically wants to do a u128 division.
The code approximates that in a rather simple truncate until it fits
fashion, taking care to balance the terms while truncating.
This version does not generate that sampling artefact.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r-- | include/linux/perf_event.h | 5 | ||||
-rw-r--r-- | kernel/perf_event.c | 132 |
2 files changed, 94 insertions, 43 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a177698d95e2..c8ea0c77a625 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -496,9 +496,8 @@ struct hw_perf_event { | |||
496 | atomic64_t period_left; | 496 | atomic64_t period_left; |
497 | u64 interrupts; | 497 | u64 interrupts; |
498 | 498 | ||
499 | u64 freq_count; | 499 | u64 freq_time_stamp; |
500 | u64 freq_interrupts; | 500 | u64 freq_count_stamp; |
501 | u64 freq_stamp; | ||
502 | #endif | 501 | #endif |
503 | }; | 502 | }; |
504 | 503 | ||
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2fa4301ffadb..b707465b0613 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -1350,14 +1350,83 @@ static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | |||
1350 | 1350 | ||
1351 | static void perf_log_throttle(struct perf_event *event, int enable); | 1351 | static void perf_log_throttle(struct perf_event *event, int enable); |
1352 | 1352 | ||
1353 | static void perf_adjust_period(struct perf_event *event, u64 events) | 1353 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
1354 | { | ||
1355 | u64 frequency = event->attr.sample_freq; | ||
1356 | u64 sec = NSEC_PER_SEC; | ||
1357 | u64 divisor, dividend; | ||
1358 | |||
1359 | int count_fls, nsec_fls, frequency_fls, sec_fls; | ||
1360 | |||
1361 | count_fls = fls64(count); | ||
1362 | nsec_fls = fls64(nsec); | ||
1363 | frequency_fls = fls64(frequency); | ||
1364 | sec_fls = 30; | ||
1365 | |||
1366 | /* | ||
1367 | * We got @count in @nsec, with a target of sample_freq HZ | ||
1368 | * the target period becomes: | ||
1369 | * | ||
1370 | * @count * 10^9 | ||
1371 | * period = ------------------- | ||
1372 | * @nsec * sample_freq | ||
1373 | * | ||
1374 | */ | ||
1375 | |||
1376 | /* | ||
1377 | * Reduce accuracy by one bit such that @a and @b converge | ||
1378 | * to a similar magnitude. | ||
1379 | */ | ||
1380 | #define REDUCE_FLS(a, b) \ | ||
1381 | do { \ | ||
1382 | if (a##_fls > b##_fls) { \ | ||
1383 | a >>= 1; \ | ||
1384 | a##_fls--; \ | ||
1385 | } else { \ | ||
1386 | b >>= 1; \ | ||
1387 | b##_fls--; \ | ||
1388 | } \ | ||
1389 | } while (0) | ||
1390 | |||
1391 | /* | ||
1392 | * Reduce accuracy until either term fits in a u64, then proceed with | ||
1393 | * the other, so that finally we can do a u64/u64 division. | ||
1394 | */ | ||
1395 | while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { | ||
1396 | REDUCE_FLS(nsec, frequency); | ||
1397 | REDUCE_FLS(sec, count); | ||
1398 | } | ||
1399 | |||
1400 | if (count_fls + sec_fls > 64) { | ||
1401 | divisor = nsec * frequency; | ||
1402 | |||
1403 | while (count_fls + sec_fls > 64) { | ||
1404 | REDUCE_FLS(count, sec); | ||
1405 | divisor >>= 1; | ||
1406 | } | ||
1407 | |||
1408 | dividend = count * sec; | ||
1409 | } else { | ||
1410 | dividend = count * sec; | ||
1411 | |||
1412 | while (nsec_fls + frequency_fls > 64) { | ||
1413 | REDUCE_FLS(nsec, frequency); | ||
1414 | dividend >>= 1; | ||
1415 | } | ||
1416 | |||
1417 | divisor = nsec * frequency; | ||
1418 | } | ||
1419 | |||
1420 | return div64_u64(dividend, divisor); | ||
1421 | } | ||
1422 | |||
1423 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | ||
1354 | { | 1424 | { |
1355 | struct hw_perf_event *hwc = &event->hw; | 1425 | struct hw_perf_event *hwc = &event->hw; |
1356 | u64 period, sample_period; | 1426 | u64 period, sample_period; |
1357 | s64 delta; | 1427 | s64 delta; |
1358 | 1428 | ||
1359 | events *= hwc->sample_period; | 1429 | period = perf_calculate_period(event, nsec, count); |
1360 | period = div64_u64(events, event->attr.sample_freq); | ||
1361 | 1430 | ||
1362 | delta = (s64)(period - hwc->sample_period); | 1431 | delta = (s64)(period - hwc->sample_period); |
1363 | delta = (delta + 7) / 8; /* low pass filter */ | 1432 | delta = (delta + 7) / 8; /* low pass filter */ |
@@ -1368,13 +1437,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) | |||
1368 | sample_period = 1; | 1437 | sample_period = 1; |
1369 | 1438 | ||
1370 | hwc->sample_period = sample_period; | 1439 | hwc->sample_period = sample_period; |
1440 | |||
1441 | if (atomic64_read(&hwc->period_left) > 8*sample_period) { | ||
1442 | perf_disable(); | ||
1443 | event->pmu->disable(event); | ||
1444 | atomic64_set(&hwc->period_left, 0); | ||
1445 | event->pmu->enable(event); | ||
1446 | perf_enable(); | ||
1447 | } | ||
1371 | } | 1448 | } |
1372 | 1449 | ||
1373 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1450 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) |
1374 | { | 1451 | { |
1375 | struct perf_event *event; | 1452 | struct perf_event *event; |
1376 | struct hw_perf_event *hwc; | 1453 | struct hw_perf_event *hwc; |
1377 | u64 interrupts, freq; | 1454 | u64 interrupts, now; |
1455 | s64 delta; | ||
1378 | 1456 | ||
1379 | raw_spin_lock(&ctx->lock); | 1457 | raw_spin_lock(&ctx->lock); |
1380 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 1458 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
@@ -1395,44 +1473,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1395 | if (interrupts == MAX_INTERRUPTS) { | 1473 | if (interrupts == MAX_INTERRUPTS) { |
1396 | perf_log_throttle(event, 1); | 1474 | perf_log_throttle(event, 1); |
1397 | event->pmu->unthrottle(event); | 1475 | event->pmu->unthrottle(event); |
1398 | interrupts = 2*sysctl_perf_event_sample_rate/HZ; | ||
1399 | } | 1476 | } |
1400 | 1477 | ||
1401 | if (!event->attr.freq || !event->attr.sample_freq) | 1478 | if (!event->attr.freq || !event->attr.sample_freq) |
1402 | continue; | 1479 | continue; |
1403 | 1480 | ||
1404 | /* | 1481 | event->pmu->read(event); |
1405 | * if the specified freq < HZ then we need to skip ticks | 1482 | now = atomic64_read(&event->count); |
1406 | */ | 1483 | delta = now - hwc->freq_count_stamp; |
1407 | if (event->attr.sample_freq < HZ) { | 1484 | hwc->freq_count_stamp = now; |
1408 | freq = event->attr.sample_freq; | ||
1409 | |||
1410 | hwc->freq_count += freq; | ||
1411 | hwc->freq_interrupts += interrupts; | ||
1412 | |||
1413 | if (hwc->freq_count < HZ) | ||
1414 | continue; | ||
1415 | |||
1416 | interrupts = hwc->freq_interrupts; | ||
1417 | hwc->freq_interrupts = 0; | ||
1418 | hwc->freq_count -= HZ; | ||
1419 | } else | ||
1420 | freq = HZ; | ||
1421 | |||
1422 | perf_adjust_period(event, freq * interrupts); | ||
1423 | 1485 | ||
1424 | /* | 1486 | if (delta > 0) |
1425 | * In order to avoid being stalled by an (accidental) huge | 1487 | perf_adjust_period(event, TICK_NSEC, delta); |
1426 | * sample period, force reset the sample period if we didn't | ||
1427 | * get any events in this freq period. | ||
1428 | */ | ||
1429 | if (!interrupts) { | ||
1430 | perf_disable(); | ||
1431 | event->pmu->disable(event); | ||
1432 | atomic64_set(&hwc->period_left, 0); | ||
1433 | event->pmu->enable(event); | ||
1434 | perf_enable(); | ||
1435 | } | ||
1436 | } | 1488 | } |
1437 | raw_spin_unlock(&ctx->lock); | 1489 | raw_spin_unlock(&ctx->lock); |
1438 | } | 1490 | } |
@@ -3688,12 +3740,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3688 | 3740 | ||
3689 | if (event->attr.freq) { | 3741 | if (event->attr.freq) { |
3690 | u64 now = perf_clock(); | 3742 | u64 now = perf_clock(); |
3691 | s64 delta = now - hwc->freq_stamp; | 3743 | s64 delta = now - hwc->freq_time_stamp; |
3692 | 3744 | ||
3693 | hwc->freq_stamp = now; | 3745 | hwc->freq_time_stamp = now; |
3694 | 3746 | ||
3695 | if (delta > 0 && delta < TICK_NSEC) | 3747 | if (delta > 0 && delta < 2*TICK_NSEC) |
3696 | perf_adjust_period(event, NSEC_PER_SEC / (int)delta); | 3748 | perf_adjust_period(event, delta, hwc->last_period); |
3697 | } | 3749 | } |
3698 | 3750 | ||
3699 | /* | 3751 | /* |