aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-11-16 08:38:16 -0500
committerIngo Molnar <mingo@elte.hu>2011-12-06 02:33:52 -0500
commit0f5a2601284237e2ba089389fd75d67f77626cef (patch)
tree37eedc660f09a36cfbd6b2a2c28e8cd0d1dbe167 /kernel
parentd6c1c49de577fa292af2449817364b7d89b574d8 (diff)
perf: Avoid a useless pmu_disable() in the perf-tick
Gleb writes: > Currently pmu is disabled and re-enabled on each timer interrupt even > when no rotation or frequency adjustment is needed. On Intel CPU this > results in two writes into PERF_GLOBAL_CTRL MSR per tick. On bare metal > it does not cause significant slowdown, but when running perf in a virtual > machine it leads to 20% slowdown on my machine. Cure this by keeping a perf_event_context::nr_freq counter that counts the number of active events that require frequency adjustments and use this in a similar fashion to the already existing nr_events != nr_active test in perf_rotate_context(). By being able to exclude both rotation and frequency adjustments a-priory for the common case we can avoid the otherwise superfluous PMU disable. Suggested-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-515yhoatehd3gza7we9fapaa@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c48
1 files changed, 32 insertions, 16 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a355ffb0b28f..b3fed52aaf20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1130,6 +1130,8 @@ event_sched_out(struct perf_event *event,
1130 if (!is_software_event(event)) 1130 if (!is_software_event(event))
1131 cpuctx->active_oncpu--; 1131 cpuctx->active_oncpu--;
1132 ctx->nr_active--; 1132 ctx->nr_active--;
1133 if (event->attr.freq && event->attr.sample_freq)
1134 ctx->nr_freq--;
1133 if (event->attr.exclusive || !cpuctx->active_oncpu) 1135 if (event->attr.exclusive || !cpuctx->active_oncpu)
1134 cpuctx->exclusive = 0; 1136 cpuctx->exclusive = 0;
1135} 1137}
@@ -1407,6 +1409,8 @@ event_sched_in(struct perf_event *event,
1407 if (!is_software_event(event)) 1409 if (!is_software_event(event))
1408 cpuctx->active_oncpu++; 1410 cpuctx->active_oncpu++;
1409 ctx->nr_active++; 1411 ctx->nr_active++;
1412 if (event->attr.freq && event->attr.sample_freq)
1413 ctx->nr_freq++;
1410 1414
1411 if (event->attr.exclusive) 1415 if (event->attr.exclusive)
1412 cpuctx->exclusive = 1; 1416 cpuctx->exclusive = 1;
@@ -2329,6 +2333,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2329 u64 interrupts, now; 2333 u64 interrupts, now;
2330 s64 delta; 2334 s64 delta;
2331 2335
2336 if (!ctx->nr_freq)
2337 return;
2338
2332 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 2339 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
2333 if (event->state != PERF_EVENT_STATE_ACTIVE) 2340 if (event->state != PERF_EVENT_STATE_ACTIVE)
2334 continue; 2341 continue;
@@ -2384,12 +2391,14 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2384{ 2391{
2385 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; 2392 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
2386 struct perf_event_context *ctx = NULL; 2393 struct perf_event_context *ctx = NULL;
2387 int rotate = 0, remove = 1; 2394 int rotate = 0, remove = 1, freq = 0;
2388 2395
2389 if (cpuctx->ctx.nr_events) { 2396 if (cpuctx->ctx.nr_events) {
2390 remove = 0; 2397 remove = 0;
2391 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 2398 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
2392 rotate = 1; 2399 rotate = 1;
2400 if (cpuctx->ctx.nr_freq)
2401 freq = 1;
2393 } 2402 }
2394 2403
2395 ctx = cpuctx->task_ctx; 2404 ctx = cpuctx->task_ctx;
@@ -2397,33 +2406,40 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2397 remove = 0; 2406 remove = 0;
2398 if (ctx->nr_events != ctx->nr_active) 2407 if (ctx->nr_events != ctx->nr_active)
2399 rotate = 1; 2408 rotate = 1;
2409 if (ctx->nr_freq)
2410 freq = 1;
2400 } 2411 }
2401 2412
2413 if (!rotate && !freq)
2414 goto done;
2415
2402 perf_ctx_lock(cpuctx, cpuctx->task_ctx); 2416 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2403 perf_pmu_disable(cpuctx->ctx.pmu); 2417 perf_pmu_disable(cpuctx->ctx.pmu);
2404 perf_ctx_adjust_freq(&cpuctx->ctx, interval);
2405 if (ctx)
2406 perf_ctx_adjust_freq(ctx, interval);
2407 2418
2408 if (!rotate) 2419 if (freq) {
2409 goto done; 2420 perf_ctx_adjust_freq(&cpuctx->ctx, interval);
2421 if (ctx)
2422 perf_ctx_adjust_freq(ctx, interval);
2423 }
2410 2424
2411 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2425 if (rotate) {
2412 if (ctx) 2426 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2413 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); 2427 if (ctx)
2428 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2414 2429
2415 rotate_ctx(&cpuctx->ctx); 2430 rotate_ctx(&cpuctx->ctx);
2416 if (ctx) 2431 if (ctx)
2417 rotate_ctx(ctx); 2432 rotate_ctx(ctx);
2433
2434 perf_event_sched_in(cpuctx, ctx, current);
2435 }
2418 2436
2419 perf_event_sched_in(cpuctx, ctx, current); 2437 perf_pmu_enable(cpuctx->ctx.pmu);
2438 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2420 2439
2421done: 2440done:
2422 if (remove) 2441 if (remove)
2423 list_del_init(&cpuctx->rotation_list); 2442 list_del_init(&cpuctx->rotation_list);
2424
2425 perf_pmu_enable(cpuctx->ctx.pmu);
2426 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2427} 2443}
2428 2444
2429void perf_event_task_tick(void) 2445void perf_event_task_tick(void)