diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2011-11-16 08:38:16 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-12-06 02:33:52 -0500 |
commit | 0f5a2601284237e2ba089389fd75d67f77626cef (patch) | |
tree | 37eedc660f09a36cfbd6b2a2c28e8cd0d1dbe167 /kernel/events | |
parent | d6c1c49de577fa292af2449817364b7d89b574d8 (diff) |
perf: Avoid a useless pmu_disable() in the perf-tick
Gleb writes:
> Currently pmu is disabled and re-enabled on each timer interrupt even
> when no rotation or frequency adjustment is needed. On Intel CPU this
> results in two writes into PERF_GLOBAL_CTRL MSR per tick. On bare metal
> it does not cause significant slowdown, but when running perf in a virtual
> machine it leads to 20% slowdown on my machine.
Cure this by keeping a perf_event_context::nr_freq counter that counts the
number of active events that require frequency adjustments and use this in a
similar fashion to the already existing nr_events != nr_active test in
perf_rotate_context().
By being able to exclude both rotation and frequency adjustments a-priory for
the common case we can avoid the otherwise superfluous PMU disable.
Suggested-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-515yhoatehd3gza7we9fapaa@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 48 |
1 files changed, 32 insertions, 16 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index a355ffb0b28f..b3fed52aaf20 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1130,6 +1130,8 @@ event_sched_out(struct perf_event *event, | |||
1130 | if (!is_software_event(event)) | 1130 | if (!is_software_event(event)) |
1131 | cpuctx->active_oncpu--; | 1131 | cpuctx->active_oncpu--; |
1132 | ctx->nr_active--; | 1132 | ctx->nr_active--; |
1133 | if (event->attr.freq && event->attr.sample_freq) | ||
1134 | ctx->nr_freq--; | ||
1133 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 1135 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
1134 | cpuctx->exclusive = 0; | 1136 | cpuctx->exclusive = 0; |
1135 | } | 1137 | } |
@@ -1407,6 +1409,8 @@ event_sched_in(struct perf_event *event, | |||
1407 | if (!is_software_event(event)) | 1409 | if (!is_software_event(event)) |
1408 | cpuctx->active_oncpu++; | 1410 | cpuctx->active_oncpu++; |
1409 | ctx->nr_active++; | 1411 | ctx->nr_active++; |
1412 | if (event->attr.freq && event->attr.sample_freq) | ||
1413 | ctx->nr_freq++; | ||
1410 | 1414 | ||
1411 | if (event->attr.exclusive) | 1415 | if (event->attr.exclusive) |
1412 | cpuctx->exclusive = 1; | 1416 | cpuctx->exclusive = 1; |
@@ -2329,6 +2333,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
2329 | u64 interrupts, now; | 2333 | u64 interrupts, now; |
2330 | s64 delta; | 2334 | s64 delta; |
2331 | 2335 | ||
2336 | if (!ctx->nr_freq) | ||
2337 | return; | ||
2338 | |||
2332 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 2339 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
2333 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 2340 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
2334 | continue; | 2341 | continue; |
@@ -2384,12 +2391,14 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
2384 | { | 2391 | { |
2385 | u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; | 2392 | u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; |
2386 | struct perf_event_context *ctx = NULL; | 2393 | struct perf_event_context *ctx = NULL; |
2387 | int rotate = 0, remove = 1; | 2394 | int rotate = 0, remove = 1, freq = 0; |
2388 | 2395 | ||
2389 | if (cpuctx->ctx.nr_events) { | 2396 | if (cpuctx->ctx.nr_events) { |
2390 | remove = 0; | 2397 | remove = 0; |
2391 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) | 2398 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) |
2392 | rotate = 1; | 2399 | rotate = 1; |
2400 | if (cpuctx->ctx.nr_freq) | ||
2401 | freq = 1; | ||
2393 | } | 2402 | } |
2394 | 2403 | ||
2395 | ctx = cpuctx->task_ctx; | 2404 | ctx = cpuctx->task_ctx; |
@@ -2397,33 +2406,40 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
2397 | remove = 0; | 2406 | remove = 0; |
2398 | if (ctx->nr_events != ctx->nr_active) | 2407 | if (ctx->nr_events != ctx->nr_active) |
2399 | rotate = 1; | 2408 | rotate = 1; |
2409 | if (ctx->nr_freq) | ||
2410 | freq = 1; | ||
2400 | } | 2411 | } |
2401 | 2412 | ||
2413 | if (!rotate && !freq) | ||
2414 | goto done; | ||
2415 | |||
2402 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | 2416 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); |
2403 | perf_pmu_disable(cpuctx->ctx.pmu); | 2417 | perf_pmu_disable(cpuctx->ctx.pmu); |
2404 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); | ||
2405 | if (ctx) | ||
2406 | perf_ctx_adjust_freq(ctx, interval); | ||
2407 | 2418 | ||
2408 | if (!rotate) | 2419 | if (freq) { |
2409 | goto done; | 2420 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); |
2421 | if (ctx) | ||
2422 | perf_ctx_adjust_freq(ctx, interval); | ||
2423 | } | ||
2410 | 2424 | ||
2411 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 2425 | if (rotate) { |
2412 | if (ctx) | 2426 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
2413 | ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); | 2427 | if (ctx) |
2428 | ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); | ||
2414 | 2429 | ||
2415 | rotate_ctx(&cpuctx->ctx); | 2430 | rotate_ctx(&cpuctx->ctx); |
2416 | if (ctx) | 2431 | if (ctx) |
2417 | rotate_ctx(ctx); | 2432 | rotate_ctx(ctx); |
2433 | |||
2434 | perf_event_sched_in(cpuctx, ctx, current); | ||
2435 | } | ||
2418 | 2436 | ||
2419 | perf_event_sched_in(cpuctx, ctx, current); | 2437 | perf_pmu_enable(cpuctx->ctx.pmu); |
2438 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2420 | 2439 | ||
2421 | done: | 2440 | done: |
2422 | if (remove) | 2441 | if (remove) |
2423 | list_del_init(&cpuctx->rotation_list); | 2442 | list_del_init(&cpuctx->rotation_list); |
2424 | |||
2425 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
2426 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2427 | } | 2443 | } |
2428 | 2444 | ||
2429 | void perf_event_task_tick(void) | 2445 | void perf_event_task_tick(void) |