diff options
| author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-12-17 07:16:32 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2009-12-17 07:21:36 -0500 |
| commit | 5d27c23df09b702868d9a3bff86ec6abd22963ac (patch) | |
| tree | ce29a1253087067b3a0e4239d0936eede870ab0b | |
| parent | 06d65bda75341485d32f33da474b0664819ad497 (diff) | |
perf events: Dont report side-band events on each cpu for per-task-per-cpu events
Acme noticed that his FORK/MMAP numbers were inflated by about
the same factor as his cpu-count.
This led to the discovery of a few more sites that need to
respect the event->cpu filter.
Reported-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20091217121830.215333434@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | kernel/perf_event.c | 32 |
1 files changed, 18 insertions, 14 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 8ab86988bd24..03cc061398d1 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -1381,6 +1381,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
| 1381 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1381 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
| 1382 | continue; | 1382 | continue; |
| 1383 | 1383 | ||
| 1384 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
| 1385 | continue; | ||
| 1386 | |||
| 1384 | hwc = &event->hw; | 1387 | hwc = &event->hw; |
| 1385 | 1388 | ||
| 1386 | interrupts = hwc->interrupts; | 1389 | interrupts = hwc->interrupts; |
| @@ -3265,6 +3268,9 @@ static void perf_event_task_output(struct perf_event *event, | |||
| 3265 | 3268 | ||
| 3266 | static int perf_event_task_match(struct perf_event *event) | 3269 | static int perf_event_task_match(struct perf_event *event) |
| 3267 | { | 3270 | { |
| 3271 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
| 3272 | return 0; | ||
| 3273 | |||
| 3268 | if (event->attr.comm || event->attr.mmap || event->attr.task) | 3274 | if (event->attr.comm || event->attr.mmap || event->attr.task) |
| 3269 | return 1; | 3275 | return 1; |
| 3270 | 3276 | ||
| @@ -3290,12 +3296,11 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
| 3290 | rcu_read_lock(); | 3296 | rcu_read_lock(); |
| 3291 | cpuctx = &get_cpu_var(perf_cpu_context); | 3297 | cpuctx = &get_cpu_var(perf_cpu_context); |
| 3292 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3298 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
| 3293 | put_cpu_var(perf_cpu_context); | ||
| 3294 | |||
| 3295 | if (!ctx) | 3299 | if (!ctx) |
| 3296 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); | 3300 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); |
| 3297 | if (ctx) | 3301 | if (ctx) |
| 3298 | perf_event_task_ctx(ctx, task_event); | 3302 | perf_event_task_ctx(ctx, task_event); |
| 3303 | put_cpu_var(perf_cpu_context); | ||
| 3299 | rcu_read_unlock(); | 3304 | rcu_read_unlock(); |
| 3300 | } | 3305 | } |
| 3301 | 3306 | ||
| @@ -3372,6 +3377,9 @@ static void perf_event_comm_output(struct perf_event *event, | |||
| 3372 | 3377 | ||
| 3373 | static int perf_event_comm_match(struct perf_event *event) | 3378 | static int perf_event_comm_match(struct perf_event *event) |
| 3374 | { | 3379 | { |
| 3380 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
| 3381 | return 0; | ||
| 3382 | |||
| 3375 | if (event->attr.comm) | 3383 | if (event->attr.comm) |
| 3376 | return 1; | 3384 | return 1; |
| 3377 | 3385 | ||
| @@ -3408,15 +3416,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 3408 | rcu_read_lock(); | 3416 | rcu_read_lock(); |
| 3409 | cpuctx = &get_cpu_var(perf_cpu_context); | 3417 | cpuctx = &get_cpu_var(perf_cpu_context); |
| 3410 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3418 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
| 3411 | put_cpu_var(perf_cpu_context); | ||
| 3412 | |||
| 3413 | /* | ||
| 3414 | * doesn't really matter which of the child contexts the | ||
| 3415 | * events ends up in. | ||
| 3416 | */ | ||
| 3417 | ctx = rcu_dereference(current->perf_event_ctxp); | 3419 | ctx = rcu_dereference(current->perf_event_ctxp); |
| 3418 | if (ctx) | 3420 | if (ctx) |
| 3419 | perf_event_comm_ctx(ctx, comm_event); | 3421 | perf_event_comm_ctx(ctx, comm_event); |
| 3422 | put_cpu_var(perf_cpu_context); | ||
| 3420 | rcu_read_unlock(); | 3423 | rcu_read_unlock(); |
| 3421 | } | 3424 | } |
| 3422 | 3425 | ||
| @@ -3491,6 +3494,9 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
| 3491 | static int perf_event_mmap_match(struct perf_event *event, | 3494 | static int perf_event_mmap_match(struct perf_event *event, |
| 3492 | struct perf_mmap_event *mmap_event) | 3495 | struct perf_mmap_event *mmap_event) |
| 3493 | { | 3496 | { |
| 3497 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
| 3498 | return 0; | ||
| 3499 | |||
| 3494 | if (event->attr.mmap) | 3500 | if (event->attr.mmap) |
| 3495 | return 1; | 3501 | return 1; |
| 3496 | 3502 | ||
| @@ -3564,15 +3570,10 @@ got_name: | |||
| 3564 | rcu_read_lock(); | 3570 | rcu_read_lock(); |
| 3565 | cpuctx = &get_cpu_var(perf_cpu_context); | 3571 | cpuctx = &get_cpu_var(perf_cpu_context); |
| 3566 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); | 3572 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); |
| 3567 | put_cpu_var(perf_cpu_context); | ||
| 3568 | |||
| 3569 | /* | ||
| 3570 | * doesn't really matter which of the child contexts the | ||
| 3571 | * events ends up in. | ||
| 3572 | */ | ||
| 3573 | ctx = rcu_dereference(current->perf_event_ctxp); | 3573 | ctx = rcu_dereference(current->perf_event_ctxp); |
| 3574 | if (ctx) | 3574 | if (ctx) |
| 3575 | perf_event_mmap_ctx(ctx, mmap_event); | 3575 | perf_event_mmap_ctx(ctx, mmap_event); |
| 3576 | put_cpu_var(perf_cpu_context); | ||
| 3576 | rcu_read_unlock(); | 3577 | rcu_read_unlock(); |
| 3577 | 3578 | ||
| 3578 | kfree(buf); | 3579 | kfree(buf); |
| @@ -3863,6 +3864,9 @@ static int perf_swevent_match(struct perf_event *event, | |||
| 3863 | struct perf_sample_data *data, | 3864 | struct perf_sample_data *data, |
| 3864 | struct pt_regs *regs) | 3865 | struct pt_regs *regs) |
| 3865 | { | 3866 | { |
| 3867 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
| 3868 | return 0; | ||
| 3869 | |||
| 3866 | if (!perf_swevent_is_counting(event)) | 3870 | if (!perf_swevent_is_counting(event)) |
| 3867 | return 0; | 3871 | return 0; |
| 3868 | 3872 | ||
