diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-01-26 12:50:16 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-01-27 02:39:33 -0500 |
commit | abd50713944c8ea9e0af5b7bffa0aacae21cc91a (patch) | |
tree | c75a352aa13821a41791877f25d2f048568827b0 /kernel | |
parent | ef12a141306c90336a3a10d40213ecd98624d274 (diff) |
perf: Reimplement frequency driven sampling
There was a bug in the old period code that caused intel_pmu_enable_all()
or native_write_msr_safe() to show up quite high in the profiles.
In staring at that code it made my head hurt, so I rewrote it in a
hopefully simpler fashion. Its now fully symetric between tick and
overflow driven adjustments and uses less data to boot.
The only complication is that it basically wants to do a u128 division.
The code approximates that in a rather simple truncate until it fits
fashion, taking care to balance the terms while truncating.
This version does not generate that sampling artefact.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/perf_event.c | 132 |
1 files changed, 92 insertions, 40 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index edc46b92b508..251fb9552492 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
1423 | 1423 | ||
1424 | static void perf_log_throttle(struct perf_event *event, int enable); | 1424 | static void perf_log_throttle(struct perf_event *event, int enable); |
1425 | 1425 | ||
1426 | static void perf_adjust_period(struct perf_event *event, u64 events) | 1426 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
1427 | { | ||
1428 | u64 frequency = event->attr.sample_freq; | ||
1429 | u64 sec = NSEC_PER_SEC; | ||
1430 | u64 divisor, dividend; | ||
1431 | |||
1432 | int count_fls, nsec_fls, frequency_fls, sec_fls; | ||
1433 | |||
1434 | count_fls = fls64(count); | ||
1435 | nsec_fls = fls64(nsec); | ||
1436 | frequency_fls = fls64(frequency); | ||
1437 | sec_fls = 30; | ||
1438 | |||
1439 | /* | ||
1440 | * We got @count in @nsec, with a target of sample_freq HZ | ||
1441 | * the target period becomes: | ||
1442 | * | ||
1443 | * @count * 10^9 | ||
1444 | * period = ------------------- | ||
1445 | * @nsec * sample_freq | ||
1446 | * | ||
1447 | */ | ||
1448 | |||
1449 | /* | ||
1450 | * Reduce accuracy by one bit such that @a and @b converge | ||
1451 | * to a similar magnitude. | ||
1452 | */ | ||
1453 | #define REDUCE_FLS(a, b) \ | ||
1454 | do { \ | ||
1455 | if (a##_fls > b##_fls) { \ | ||
1456 | a >>= 1; \ | ||
1457 | a##_fls--; \ | ||
1458 | } else { \ | ||
1459 | b >>= 1; \ | ||
1460 | b##_fls--; \ | ||
1461 | } \ | ||
1462 | } while (0) | ||
1463 | |||
1464 | /* | ||
1465 | * Reduce accuracy until either term fits in a u64, then proceed with | ||
1466 | * the other, so that finally we can do a u64/u64 division. | ||
1467 | */ | ||
1468 | while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { | ||
1469 | REDUCE_FLS(nsec, frequency); | ||
1470 | REDUCE_FLS(sec, count); | ||
1471 | } | ||
1472 | |||
1473 | if (count_fls + sec_fls > 64) { | ||
1474 | divisor = nsec * frequency; | ||
1475 | |||
1476 | while (count_fls + sec_fls > 64) { | ||
1477 | REDUCE_FLS(count, sec); | ||
1478 | divisor >>= 1; | ||
1479 | } | ||
1480 | |||
1481 | dividend = count * sec; | ||
1482 | } else { | ||
1483 | dividend = count * sec; | ||
1484 | |||
1485 | while (nsec_fls + frequency_fls > 64) { | ||
1486 | REDUCE_FLS(nsec, frequency); | ||
1487 | dividend >>= 1; | ||
1488 | } | ||
1489 | |||
1490 | divisor = nsec * frequency; | ||
1491 | } | ||
1492 | |||
1493 | return div64_u64(dividend, divisor); | ||
1494 | } | ||
1495 | |||
1496 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | ||
1427 | { | 1497 | { |
1428 | struct hw_perf_event *hwc = &event->hw; | 1498 | struct hw_perf_event *hwc = &event->hw; |
1429 | u64 period, sample_period; | 1499 | u64 period, sample_period; |
1430 | s64 delta; | 1500 | s64 delta; |
1431 | 1501 | ||
1432 | events *= hwc->sample_period; | 1502 | period = perf_calculate_period(event, nsec, count); |
1433 | period = div64_u64(events, event->attr.sample_freq); | ||
1434 | 1503 | ||
1435 | delta = (s64)(period - hwc->sample_period); | 1504 | delta = (s64)(period - hwc->sample_period); |
1436 | delta = (delta + 7) / 8; /* low pass filter */ | 1505 | delta = (delta + 7) / 8; /* low pass filter */ |
@@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) | |||
1441 | sample_period = 1; | 1510 | sample_period = 1; |
1442 | 1511 | ||
1443 | hwc->sample_period = sample_period; | 1512 | hwc->sample_period = sample_period; |
1513 | |||
1514 | if (atomic64_read(&hwc->period_left) > 8*sample_period) { | ||
1515 | perf_disable(); | ||
1516 | event->pmu->disable(event); | ||
1517 | atomic64_set(&hwc->period_left, 0); | ||
1518 | event->pmu->enable(event); | ||
1519 | perf_enable(); | ||
1520 | } | ||
1444 | } | 1521 | } |
1445 | 1522 | ||
1446 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1523 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) |
1447 | { | 1524 | { |
1448 | struct perf_event *event; | 1525 | struct perf_event *event; |
1449 | struct hw_perf_event *hwc; | 1526 | struct hw_perf_event *hwc; |
1450 | u64 interrupts, freq; | 1527 | u64 interrupts, now; |
1528 | s64 delta; | ||
1451 | 1529 | ||
1452 | raw_spin_lock(&ctx->lock); | 1530 | raw_spin_lock(&ctx->lock); |
1453 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 1531 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
@@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1468 | if (interrupts == MAX_INTERRUPTS) { | 1546 | if (interrupts == MAX_INTERRUPTS) { |
1469 | perf_log_throttle(event, 1); | 1547 | perf_log_throttle(event, 1); |
1470 | event->pmu->unthrottle(event); | 1548 | event->pmu->unthrottle(event); |
1471 | interrupts = 2*sysctl_perf_event_sample_rate/HZ; | ||
1472 | } | 1549 | } |
1473 | 1550 | ||
1474 | if (!event->attr.freq || !event->attr.sample_freq) | 1551 | if (!event->attr.freq || !event->attr.sample_freq) |
1475 | continue; | 1552 | continue; |
1476 | 1553 | ||
1477 | /* | 1554 | event->pmu->read(event); |
1478 | * if the specified freq < HZ then we need to skip ticks | 1555 | now = atomic64_read(&event->count); |
1479 | */ | 1556 | delta = now - hwc->freq_count_stamp; |
1480 | if (event->attr.sample_freq < HZ) { | 1557 | hwc->freq_count_stamp = now; |
1481 | freq = event->attr.sample_freq; | ||
1482 | |||
1483 | hwc->freq_count += freq; | ||
1484 | hwc->freq_interrupts += interrupts; | ||
1485 | |||
1486 | if (hwc->freq_count < HZ) | ||
1487 | continue; | ||
1488 | |||
1489 | interrupts = hwc->freq_interrupts; | ||
1490 | hwc->freq_interrupts = 0; | ||
1491 | hwc->freq_count -= HZ; | ||
1492 | } else | ||
1493 | freq = HZ; | ||
1494 | |||
1495 | perf_adjust_period(event, freq * interrupts); | ||
1496 | 1558 | ||
1497 | /* | 1559 | if (delta > 0) |
1498 | * In order to avoid being stalled by an (accidental) huge | 1560 | perf_adjust_period(event, TICK_NSEC, delta); |
1499 | * sample period, force reset the sample period if we didn't | ||
1500 | * get any events in this freq period. | ||
1501 | */ | ||
1502 | if (!interrupts) { | ||
1503 | perf_disable(); | ||
1504 | event->pmu->disable(event); | ||
1505 | atomic64_set(&hwc->period_left, 0); | ||
1506 | event->pmu->enable(event); | ||
1507 | perf_enable(); | ||
1508 | } | ||
1509 | } | 1561 | } |
1510 | raw_spin_unlock(&ctx->lock); | 1562 | raw_spin_unlock(&ctx->lock); |
1511 | } | 1563 | } |
@@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3768 | 3820 | ||
3769 | if (event->attr.freq) { | 3821 | if (event->attr.freq) { |
3770 | u64 now = perf_clock(); | 3822 | u64 now = perf_clock(); |
3771 | s64 delta = now - hwc->freq_stamp; | 3823 | s64 delta = now - hwc->freq_time_stamp; |
3772 | 3824 | ||
3773 | hwc->freq_stamp = now; | 3825 | hwc->freq_time_stamp = now; |
3774 | 3826 | ||
3775 | if (delta > 0 && delta < TICK_NSEC) | 3827 | if (delta > 0 && delta < 2*TICK_NSEC) |
3776 | perf_adjust_period(event, NSEC_PER_SEC / (int)delta); | 3828 | perf_adjust_period(event, delta, hwc->last_period); |
3777 | } | 3829 | } |
3778 | 3830 | ||
3779 | /* | 3831 | /* |