aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-01-26 12:50:16 -0500
committerIngo Molnar <mingo@elte.hu>2010-01-27 02:39:33 -0500
commitabd50713944c8ea9e0af5b7bffa0aacae21cc91a (patch)
treec75a352aa13821a41791877f25d2f048568827b0 /kernel
parentef12a141306c90336a3a10d40213ecd98624d274 (diff)
perf: Reimplement frequency driven sampling
There was a bug in the old period code that caused intel_pmu_enable_all() or native_write_msr_safe() to show up quite high in the profiles. In staring at that code it made my head hurt, so I rewrote it in a hopefully simpler fashion. Its now fully symetric between tick and overflow driven adjustments and uses less data to boot. The only complication is that it basically wants to do a u128 division. The code approximates that in a rather simple truncate until it fits fashion, taking care to balance the terms while truncating. This version does not generate that sampling artefact. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Cc: <stable@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/perf_event.c132
1 files changed, 92 insertions, 40 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index edc46b92b508..251fb9552492 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task)
1423 1423
1424static void perf_log_throttle(struct perf_event *event, int enable); 1424static void perf_log_throttle(struct perf_event *event, int enable);
1425 1425
1426static void perf_adjust_period(struct perf_event *event, u64 events) 1426static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
1427{
1428 u64 frequency = event->attr.sample_freq;
1429 u64 sec = NSEC_PER_SEC;
1430 u64 divisor, dividend;
1431
1432 int count_fls, nsec_fls, frequency_fls, sec_fls;
1433
1434 count_fls = fls64(count);
1435 nsec_fls = fls64(nsec);
1436 frequency_fls = fls64(frequency);
1437 sec_fls = 30;
1438
1439 /*
1440 * We got @count in @nsec, with a target of sample_freq HZ
1441 * the target period becomes:
1442 *
1443 * @count * 10^9
1444 * period = -------------------
1445 * @nsec * sample_freq
1446 *
1447 */
1448
1449 /*
1450 * Reduce accuracy by one bit such that @a and @b converge
1451 * to a similar magnitude.
1452 */
1453#define REDUCE_FLS(a, b) \
1454do { \
1455 if (a##_fls > b##_fls) { \
1456 a >>= 1; \
1457 a##_fls--; \
1458 } else { \
1459 b >>= 1; \
1460 b##_fls--; \
1461 } \
1462} while (0)
1463
1464 /*
1465 * Reduce accuracy until either term fits in a u64, then proceed with
1466 * the other, so that finally we can do a u64/u64 division.
1467 */
1468 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
1469 REDUCE_FLS(nsec, frequency);
1470 REDUCE_FLS(sec, count);
1471 }
1472
1473 if (count_fls + sec_fls > 64) {
1474 divisor = nsec * frequency;
1475
1476 while (count_fls + sec_fls > 64) {
1477 REDUCE_FLS(count, sec);
1478 divisor >>= 1;
1479 }
1480
1481 dividend = count * sec;
1482 } else {
1483 dividend = count * sec;
1484
1485 while (nsec_fls + frequency_fls > 64) {
1486 REDUCE_FLS(nsec, frequency);
1487 dividend >>= 1;
1488 }
1489
1490 divisor = nsec * frequency;
1491 }
1492
1493 return div64_u64(dividend, divisor);
1494}
1495
1496static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1427{ 1497{
1428 struct hw_perf_event *hwc = &event->hw; 1498 struct hw_perf_event *hwc = &event->hw;
1429 u64 period, sample_period; 1499 u64 period, sample_period;
1430 s64 delta; 1500 s64 delta;
1431 1501
1432 events *= hwc->sample_period; 1502 period = perf_calculate_period(event, nsec, count);
1433 period = div64_u64(events, event->attr.sample_freq);
1434 1503
1435 delta = (s64)(period - hwc->sample_period); 1504 delta = (s64)(period - hwc->sample_period);
1436 delta = (delta + 7) / 8; /* low pass filter */ 1505 delta = (delta + 7) / 8; /* low pass filter */
@@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events)
1441 sample_period = 1; 1510 sample_period = 1;
1442 1511
1443 hwc->sample_period = sample_period; 1512 hwc->sample_period = sample_period;
1513
1514 if (atomic64_read(&hwc->period_left) > 8*sample_period) {
1515 perf_disable();
1516 event->pmu->disable(event);
1517 atomic64_set(&hwc->period_left, 0);
1518 event->pmu->enable(event);
1519 perf_enable();
1520 }
1444} 1521}
1445 1522
1446static void perf_ctx_adjust_freq(struct perf_event_context *ctx) 1523static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1447{ 1524{
1448 struct perf_event *event; 1525 struct perf_event *event;
1449 struct hw_perf_event *hwc; 1526 struct hw_perf_event *hwc;
1450 u64 interrupts, freq; 1527 u64 interrupts, now;
1528 s64 delta;
1451 1529
1452 raw_spin_lock(&ctx->lock); 1530 raw_spin_lock(&ctx->lock);
1453 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1531 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
@@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1468 if (interrupts == MAX_INTERRUPTS) { 1546 if (interrupts == MAX_INTERRUPTS) {
1469 perf_log_throttle(event, 1); 1547 perf_log_throttle(event, 1);
1470 event->pmu->unthrottle(event); 1548 event->pmu->unthrottle(event);
1471 interrupts = 2*sysctl_perf_event_sample_rate/HZ;
1472 } 1549 }
1473 1550
1474 if (!event->attr.freq || !event->attr.sample_freq) 1551 if (!event->attr.freq || !event->attr.sample_freq)
1475 continue; 1552 continue;
1476 1553
1477 /* 1554 event->pmu->read(event);
1478 * if the specified freq < HZ then we need to skip ticks 1555 now = atomic64_read(&event->count);
1479 */ 1556 delta = now - hwc->freq_count_stamp;
1480 if (event->attr.sample_freq < HZ) { 1557 hwc->freq_count_stamp = now;
1481 freq = event->attr.sample_freq;
1482
1483 hwc->freq_count += freq;
1484 hwc->freq_interrupts += interrupts;
1485
1486 if (hwc->freq_count < HZ)
1487 continue;
1488
1489 interrupts = hwc->freq_interrupts;
1490 hwc->freq_interrupts = 0;
1491 hwc->freq_count -= HZ;
1492 } else
1493 freq = HZ;
1494
1495 perf_adjust_period(event, freq * interrupts);
1496 1558
1497 /* 1559 if (delta > 0)
1498 * In order to avoid being stalled by an (accidental) huge 1560 perf_adjust_period(event, TICK_NSEC, delta);
1499 * sample period, force reset the sample period if we didn't
1500 * get any events in this freq period.
1501 */
1502 if (!interrupts) {
1503 perf_disable();
1504 event->pmu->disable(event);
1505 atomic64_set(&hwc->period_left, 0);
1506 event->pmu->enable(event);
1507 perf_enable();
1508 }
1509 } 1561 }
1510 raw_spin_unlock(&ctx->lock); 1562 raw_spin_unlock(&ctx->lock);
1511} 1563}
@@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3768 3820
3769 if (event->attr.freq) { 3821 if (event->attr.freq) {
3770 u64 now = perf_clock(); 3822 u64 now = perf_clock();
3771 s64 delta = now - hwc->freq_stamp; 3823 s64 delta = now - hwc->freq_time_stamp;
3772 3824
3773 hwc->freq_stamp = now; 3825 hwc->freq_time_stamp = now;
3774 3826
3775 if (delta > 0 && delta < TICK_NSEC) 3827 if (delta > 0 && delta < 2*TICK_NSEC)
3776 perf_adjust_period(event, NSEC_PER_SEC / (int)delta); 3828 perf_adjust_period(event, delta, hwc->last_period);
3777 } 3829 }
3778 3830
3779 /* 3831 /*