aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2012-01-26 11:03:19 -0500
committerIngo Molnar <mingo@elte.hu>2012-01-27 06:06:39 -0500
commite050e3f0a71bf7dc2c148b35caff0234decc8198 (patch)
treedf3069475c20d0ab238735a7a1837db9ae080610 /kernel/events
parent74ea15d909b31158f9b63190a95b52bc05586d4b (diff)
perf: Fix broken interrupt rate throttling
This patch fixes the sampling interrupt throttling mechanism. It was broken in v3.2. Events were not being unthrottled. The unthrottling mechanism required that events be checked at each timer tick. This patch solves this problem and also separates: - unthrottling - multiplexing - frequency-mode period adjustments Not all of them need to be executed at each timer tick. This third version of the patch is based on my original patch + PeterZ proposal (https://lkml.org/lkml/2012/1/7/87). At each timer tick, for each context: - if the current CPU has throttled events, we unthrottle events - if context has frequency-based events, we adjust sampling periods - if we have reached the jiffies interval, we multiplex (rotate) We decoupled rotation (multiplexing) from frequency-mode sampling period adjustments. They should not necessarily happen at the same rate. Multiplexing is subject to jiffies_interval (currently at 1 but could be higher once the tunable is exposed via sysfs). We have grouped frequency-mode adjustment and unthrottling into the same routine to minimize code duplication. When throttled while in frequency mode, we scan the events only once. We have fixed the threshold enforcement code in __perf_event_overflow(). There was a bug whereby it would allow more than the authorized rate because an increment of hwc->interrupts was not executed at the right place. The patch was tested with low sampling limit (2000) and fixed periods, frequency mode, overcommitted PMU. On a 2.1GHz AMD CPU: $ cat /proc/sys/kernel/perf_event_max_sample_rate 2000 We set a rate of 3000 samples/sec (2.1GHz/3000 = 700000): $ perf record -e cycles,cycles -c 700000 noploop 10 $ perf report -D | tail -21 Aggregated stats: TOTAL events: 80086 MMAP events: 88 COMM events: 2 EXIT events: 4 THROTTLE events: 19996 UNTHROTTLE events: 19996 SAMPLE events: 40000 cycles stats: TOTAL events: 40006 MMAP events: 5 COMM events: 1 EXIT events: 4 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 cycles stats: TOTAL events: 39996 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 For 10s, the cap is 2x2000x10 = 40000 samples. We get exactly that: 20000 samples/event. Signed-off-by: Stephane Eranian <eranian@google.com> Cc: <stable@kernel.org> # v3.2+ Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20120126160319.GA5655@quad Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c104
1 files changed, 66 insertions, 38 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 32b48c88971..ba36013cfb2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2300,6 +2300,9 @@ do { \
2300 return div64_u64(dividend, divisor); 2300 return div64_u64(dividend, divisor);
2301} 2301}
2302 2302
2303static DEFINE_PER_CPU(int, perf_throttled_count);
2304static DEFINE_PER_CPU(u64, perf_throttled_seq);
2305
2303static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 2306static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
2304{ 2307{
2305 struct hw_perf_event *hwc = &event->hw; 2308 struct hw_perf_event *hwc = &event->hw;
@@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
2325 } 2328 }
2326} 2329}
2327 2330
2328static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) 2331/*
2332 * combine freq adjustment with unthrottling to avoid two passes over the
2333 * events. At the same time, make sure, having freq events does not change
2334 * the rate of unthrottling as that would introduce bias.
2335 */
2336static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
2337 int needs_unthr)
2329{ 2338{
2330 struct perf_event *event; 2339 struct perf_event *event;
2331 struct hw_perf_event *hwc; 2340 struct hw_perf_event *hwc;
2332 u64 interrupts, now; 2341 u64 now, period = TICK_NSEC;
2333 s64 delta; 2342 s64 delta;
2334 2343
2335 if (!ctx->nr_freq) 2344 /*
2345 * only need to iterate over all events iff:
2346 * - context have events in frequency mode (needs freq adjust)
2347 * - there are events to unthrottle on this cpu
2348 */
2349 if (!(ctx->nr_freq || needs_unthr))
2336 return; 2350 return;
2337 2351
2352 raw_spin_lock(&ctx->lock);
2353
2338 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 2354 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
2339 if (event->state != PERF_EVENT_STATE_ACTIVE) 2355 if (event->state != PERF_EVENT_STATE_ACTIVE)
2340 continue; 2356 continue;
@@ -2344,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2344 2360
2345 hwc = &event->hw; 2361 hwc = &event->hw;
2346 2362
2347 interrupts = hwc->interrupts; 2363 if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
2348 hwc->interrupts = 0; 2364 hwc->interrupts = 0;
2349
2350 /*
2351 * unthrottle events on the tick
2352 */
2353 if (interrupts == MAX_INTERRUPTS) {
2354 perf_log_throttle(event, 1); 2365 perf_log_throttle(event, 1);
2355 event->pmu->start(event, 0); 2366 event->pmu->start(event, 0);
2356 } 2367 }
@@ -2358,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2358 if (!event->attr.freq || !event->attr.sample_freq) 2369 if (!event->attr.freq || !event->attr.sample_freq)
2359 continue; 2370 continue;
2360 2371
2361 event->pmu->read(event); 2372 /*
2373 * stop the event and update event->count
2374 */
2375 event->pmu->stop(event, PERF_EF_UPDATE);
2376
2362 now = local64_read(&event->count); 2377 now = local64_read(&event->count);
2363 delta = now - hwc->freq_count_stamp; 2378 delta = now - hwc->freq_count_stamp;
2364 hwc->freq_count_stamp = now; 2379 hwc->freq_count_stamp = now;
2365 2380
2381 /*
2382 * restart the event
2383 * reload only if value has changed
2384 */
2366 if (delta > 0) 2385 if (delta > 0)
2367 perf_adjust_period(event, period, delta); 2386 perf_adjust_period(event, period, delta);
2387
2388 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
2368 } 2389 }
2390
2391 raw_spin_unlock(&ctx->lock);
2369} 2392}
2370 2393
2371/* 2394/*
@@ -2388,16 +2411,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
2388 */ 2411 */
2389static void perf_rotate_context(struct perf_cpu_context *cpuctx) 2412static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2390{ 2413{
2391 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
2392 struct perf_event_context *ctx = NULL; 2414 struct perf_event_context *ctx = NULL;
2393 int rotate = 0, remove = 1, freq = 0; 2415 int rotate = 0, remove = 1;
2394 2416
2395 if (cpuctx->ctx.nr_events) { 2417 if (cpuctx->ctx.nr_events) {
2396 remove = 0; 2418 remove = 0;
2397 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 2419 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
2398 rotate = 1; 2420 rotate = 1;
2399 if (cpuctx->ctx.nr_freq)
2400 freq = 1;
2401 } 2421 }
2402 2422
2403 ctx = cpuctx->task_ctx; 2423 ctx = cpuctx->task_ctx;
@@ -2405,37 +2425,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2405 remove = 0; 2425 remove = 0;
2406 if (ctx->nr_events != ctx->nr_active) 2426 if (ctx->nr_events != ctx->nr_active)
2407 rotate = 1; 2427 rotate = 1;
2408 if (ctx->nr_freq)
2409 freq = 1;
2410 } 2428 }
2411 2429
2412 if (!rotate && !freq) 2430 if (!rotate)
2413 goto done; 2431 goto done;
2414 2432
2415 perf_ctx_lock(cpuctx, cpuctx->task_ctx); 2433 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2416 perf_pmu_disable(cpuctx->ctx.pmu); 2434 perf_pmu_disable(cpuctx->ctx.pmu);
2417 2435
2418 if (freq) { 2436 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2419 perf_ctx_adjust_freq(&cpuctx->ctx, interval); 2437 if (ctx)
2420 if (ctx) 2438 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2421 perf_ctx_adjust_freq(ctx, interval);
2422 }
2423
2424 if (rotate) {
2425 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2426 if (ctx)
2427 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2428 2439
2429 rotate_ctx(&cpuctx->ctx); 2440 rotate_ctx(&cpuctx->ctx);
2430 if (ctx) 2441 if (ctx)
2431 rotate_ctx(ctx); 2442 rotate_ctx(ctx);
2432 2443
2433 perf_event_sched_in(cpuctx, ctx, current); 2444 perf_event_sched_in(cpuctx, ctx, current);
2434 }
2435 2445
2436 perf_pmu_enable(cpuctx->ctx.pmu); 2446 perf_pmu_enable(cpuctx->ctx.pmu);
2437 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); 2447 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2438
2439done: 2448done:
2440 if (remove) 2449 if (remove)
2441 list_del_init(&cpuctx->rotation_list); 2450 list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2454,22 @@ void perf_event_task_tick(void)
2445{ 2454{
2446 struct list_head *head = &__get_cpu_var(rotation_list); 2455 struct list_head *head = &__get_cpu_var(rotation_list);
2447 struct perf_cpu_context *cpuctx, *tmp; 2456 struct perf_cpu_context *cpuctx, *tmp;
2457 struct perf_event_context *ctx;
2458 int throttled;
2448 2459
2449 WARN_ON(!irqs_disabled()); 2460 WARN_ON(!irqs_disabled());
2450 2461
2462 __this_cpu_inc(perf_throttled_seq);
2463 throttled = __this_cpu_xchg(perf_throttled_count, 0);
2464
2451 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { 2465 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
2466 ctx = &cpuctx->ctx;
2467 perf_adjust_freq_unthr_context(ctx, throttled);
2468
2469 ctx = cpuctx->task_ctx;
2470 if (ctx)
2471 perf_adjust_freq_unthr_context(ctx, throttled);
2472
2452 if (cpuctx->jiffies_interval == 1 || 2473 if (cpuctx->jiffies_interval == 1 ||
2453 !(jiffies % cpuctx->jiffies_interval)) 2474 !(jiffies % cpuctx->jiffies_interval))
2454 perf_rotate_context(cpuctx); 2475 perf_rotate_context(cpuctx);
@@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event,
4509{ 4530{
4510 int events = atomic_read(&event->event_limit); 4531 int events = atomic_read(&event->event_limit);
4511 struct hw_perf_event *hwc = &event->hw; 4532 struct hw_perf_event *hwc = &event->hw;
4533 u64 seq;
4512 int ret = 0; 4534 int ret = 0;
4513 4535
4514 /* 4536 /*
@@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event,
4518 if (unlikely(!is_sampling_event(event))) 4540 if (unlikely(!is_sampling_event(event)))
4519 return 0; 4541 return 0;
4520 4542
4521 if (unlikely(hwc->interrupts >= max_samples_per_tick)) { 4543 seq = __this_cpu_read(perf_throttled_seq);
4522 if (throttle) { 4544 if (seq != hwc->interrupts_seq) {
4545 hwc->interrupts_seq = seq;
4546 hwc->interrupts = 1;
4547 } else {
4548 hwc->interrupts++;
4549 if (unlikely(throttle
4550 && hwc->interrupts >= max_samples_per_tick)) {
4551 __this_cpu_inc(perf_throttled_count);
4523 hwc->interrupts = MAX_INTERRUPTS; 4552 hwc->interrupts = MAX_INTERRUPTS;
4524 perf_log_throttle(event, 0); 4553 perf_log_throttle(event, 0);
4525 ret = 1; 4554 ret = 1;
4526 } 4555 }
4527 } else 4556 }
4528 hwc->interrupts++;
4529 4557
4530 if (event->attr.freq) { 4558 if (event->attr.freq) {
4531 u64 now = perf_clock(); 4559 u64 now = perf_clock();