diff options
author | Peter Zijlstra <peterz@infradead.org> | 2014-05-02 10:56:01 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-05-07 05:33:14 -0400 |
commit | 46ce0fe97a6be7532ce6126bb26ce89fed81528c (patch) | |
tree | b47c910ca19eba6be3e0b8c08e99b9ab8b4903c2 /kernel/events | |
parent | 38583f095c5a8138ae2a1c9173d0fd8a9f10e8aa (diff) |
perf: Fix race in removing an event
When removing a (sibling) event we do:
raw_spin_lock_irq(&ctx->lock);
perf_group_detach(event);
raw_spin_unlock_irq(&ctx->lock);
<hole>
perf_remove_from_context(event);
raw_spin_lock_irq(&ctx->lock);
...
raw_spin_unlock_irq(&ctx->lock);
Now, assuming the event is a sibling, it will be 'unreachable' for
things like ctx_sched_out() because that iterates the
groups->siblings, and we just unhooked the sibling.
So, if during <hole> we get ctx_sched_out(), it will miss the event
and not call event_sched_out() on it, leaving it programmed on the
PMU.
The subsequent perf_remove_from_context() call will find the ctx is
inactive and only call list_del_event() to remove the event from all
other lists.
Hereafter we can proceed to free the event; while still programmed!
Close this hole by moving perf_group_detach() inside the same
ctx->lock region(s) perf_remove_from_context() has.
The condition on inherited events only in __perf_event_exit_task() is
likely complete crap because non-inherited events are part of groups
too and we're tearing down just the same. But leave that for another
patch.
Most-likely-Fixes: e03a9a55b4e ("perf: Change close() semantics for group events")
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Vince Weaver <vincent.weaver@maine.edu>
Much-staring-at-traces-by: Vince Weaver <vincent.weaver@maine.edu>
Much-staring-at-traces-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140505093124.GN17778@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index f83a71a3e46d..ea899e2b5593 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1443,6 +1443,11 @@ group_sched_out(struct perf_event *group_event, | |||
1443 | cpuctx->exclusive = 0; | 1443 | cpuctx->exclusive = 0; |
1444 | } | 1444 | } |
1445 | 1445 | ||
1446 | struct remove_event { | ||
1447 | struct perf_event *event; | ||
1448 | bool detach_group; | ||
1449 | }; | ||
1450 | |||
1446 | /* | 1451 | /* |
1447 | * Cross CPU call to remove a performance event | 1452 | * Cross CPU call to remove a performance event |
1448 | * | 1453 | * |
@@ -1451,12 +1456,15 @@ group_sched_out(struct perf_event *group_event, | |||
1451 | */ | 1456 | */ |
1452 | static int __perf_remove_from_context(void *info) | 1457 | static int __perf_remove_from_context(void *info) |
1453 | { | 1458 | { |
1454 | struct perf_event *event = info; | 1459 | struct remove_event *re = info; |
1460 | struct perf_event *event = re->event; | ||
1455 | struct perf_event_context *ctx = event->ctx; | 1461 | struct perf_event_context *ctx = event->ctx; |
1456 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1462 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1457 | 1463 | ||
1458 | raw_spin_lock(&ctx->lock); | 1464 | raw_spin_lock(&ctx->lock); |
1459 | event_sched_out(event, cpuctx, ctx); | 1465 | event_sched_out(event, cpuctx, ctx); |
1466 | if (re->detach_group) | ||
1467 | perf_group_detach(event); | ||
1460 | list_del_event(event, ctx); | 1468 | list_del_event(event, ctx); |
1461 | if (!ctx->nr_events && cpuctx->task_ctx == ctx) { | 1469 | if (!ctx->nr_events && cpuctx->task_ctx == ctx) { |
1462 | ctx->is_active = 0; | 1470 | ctx->is_active = 0; |
@@ -1481,10 +1489,14 @@ static int __perf_remove_from_context(void *info) | |||
1481 | * When called from perf_event_exit_task, it's OK because the | 1489 | * When called from perf_event_exit_task, it's OK because the |
1482 | * context has been detached from its task. | 1490 | * context has been detached from its task. |
1483 | */ | 1491 | */ |
1484 | static void perf_remove_from_context(struct perf_event *event) | 1492 | static void perf_remove_from_context(struct perf_event *event, bool detach_group) |
1485 | { | 1493 | { |
1486 | struct perf_event_context *ctx = event->ctx; | 1494 | struct perf_event_context *ctx = event->ctx; |
1487 | struct task_struct *task = ctx->task; | 1495 | struct task_struct *task = ctx->task; |
1496 | struct remove_event re = { | ||
1497 | .event = event, | ||
1498 | .detach_group = detach_group, | ||
1499 | }; | ||
1488 | 1500 | ||
1489 | lockdep_assert_held(&ctx->mutex); | 1501 | lockdep_assert_held(&ctx->mutex); |
1490 | 1502 | ||
@@ -1493,12 +1505,12 @@ static void perf_remove_from_context(struct perf_event *event) | |||
1493 | * Per cpu events are removed via an smp call and | 1505 | * Per cpu events are removed via an smp call and |
1494 | * the removal is always successful. | 1506 | * the removal is always successful. |
1495 | */ | 1507 | */ |
1496 | cpu_function_call(event->cpu, __perf_remove_from_context, event); | 1508 | cpu_function_call(event->cpu, __perf_remove_from_context, &re); |
1497 | return; | 1509 | return; |
1498 | } | 1510 | } |
1499 | 1511 | ||
1500 | retry: | 1512 | retry: |
1501 | if (!task_function_call(task, __perf_remove_from_context, event)) | 1513 | if (!task_function_call(task, __perf_remove_from_context, &re)) |
1502 | return; | 1514 | return; |
1503 | 1515 | ||
1504 | raw_spin_lock_irq(&ctx->lock); | 1516 | raw_spin_lock_irq(&ctx->lock); |
@@ -1515,6 +1527,8 @@ retry: | |||
1515 | * Since the task isn't running, its safe to remove the event, us | 1527 | * Since the task isn't running, its safe to remove the event, us |
1516 | * holding the ctx->lock ensures the task won't get scheduled in. | 1528 | * holding the ctx->lock ensures the task won't get scheduled in. |
1517 | */ | 1529 | */ |
1530 | if (detach_group) | ||
1531 | perf_group_detach(event); | ||
1518 | list_del_event(event, ctx); | 1532 | list_del_event(event, ctx); |
1519 | raw_spin_unlock_irq(&ctx->lock); | 1533 | raw_spin_unlock_irq(&ctx->lock); |
1520 | } | 1534 | } |
@@ -3281,10 +3295,7 @@ int perf_event_release_kernel(struct perf_event *event) | |||
3281 | * to trigger the AB-BA case. | 3295 | * to trigger the AB-BA case. |
3282 | */ | 3296 | */ |
3283 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | 3297 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); |
3284 | raw_spin_lock_irq(&ctx->lock); | 3298 | perf_remove_from_context(event, true); |
3285 | perf_group_detach(event); | ||
3286 | raw_spin_unlock_irq(&ctx->lock); | ||
3287 | perf_remove_from_context(event); | ||
3288 | mutex_unlock(&ctx->mutex); | 3299 | mutex_unlock(&ctx->mutex); |
3289 | 3300 | ||
3290 | free_event(event); | 3301 | free_event(event); |
@@ -7165,7 +7176,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7165 | struct perf_event_context *gctx = group_leader->ctx; | 7176 | struct perf_event_context *gctx = group_leader->ctx; |
7166 | 7177 | ||
7167 | mutex_lock(&gctx->mutex); | 7178 | mutex_lock(&gctx->mutex); |
7168 | perf_remove_from_context(group_leader); | 7179 | perf_remove_from_context(group_leader, false); |
7169 | 7180 | ||
7170 | /* | 7181 | /* |
7171 | * Removing from the context ends up with disabled | 7182 | * Removing from the context ends up with disabled |
@@ -7175,7 +7186,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7175 | perf_event__state_init(group_leader); | 7186 | perf_event__state_init(group_leader); |
7176 | list_for_each_entry(sibling, &group_leader->sibling_list, | 7187 | list_for_each_entry(sibling, &group_leader->sibling_list, |
7177 | group_entry) { | 7188 | group_entry) { |
7178 | perf_remove_from_context(sibling); | 7189 | perf_remove_from_context(sibling, false); |
7179 | perf_event__state_init(sibling); | 7190 | perf_event__state_init(sibling); |
7180 | put_ctx(gctx); | 7191 | put_ctx(gctx); |
7181 | } | 7192 | } |
@@ -7305,7 +7316,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
7305 | mutex_lock(&src_ctx->mutex); | 7316 | mutex_lock(&src_ctx->mutex); |
7306 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, | 7317 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, |
7307 | event_entry) { | 7318 | event_entry) { |
7308 | perf_remove_from_context(event); | 7319 | perf_remove_from_context(event, false); |
7309 | unaccount_event_cpu(event, src_cpu); | 7320 | unaccount_event_cpu(event, src_cpu); |
7310 | put_ctx(src_ctx); | 7321 | put_ctx(src_ctx); |
7311 | list_add(&event->migrate_entry, &events); | 7322 | list_add(&event->migrate_entry, &events); |
@@ -7367,13 +7378,7 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
7367 | struct perf_event_context *child_ctx, | 7378 | struct perf_event_context *child_ctx, |
7368 | struct task_struct *child) | 7379 | struct task_struct *child) |
7369 | { | 7380 | { |
7370 | if (child_event->parent) { | 7381 | perf_remove_from_context(child_event, !!child_event->parent); |
7371 | raw_spin_lock_irq(&child_ctx->lock); | ||
7372 | perf_group_detach(child_event); | ||
7373 | raw_spin_unlock_irq(&child_ctx->lock); | ||
7374 | } | ||
7375 | |||
7376 | perf_remove_from_context(child_event); | ||
7377 | 7382 | ||
7378 | /* | 7383 | /* |
7379 | * It can happen that the parent exits first, and has events | 7384 | * It can happen that the parent exits first, and has events |
@@ -7857,14 +7862,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) | |||
7857 | 7862 | ||
7858 | static void __perf_event_exit_context(void *__info) | 7863 | static void __perf_event_exit_context(void *__info) |
7859 | { | 7864 | { |
7865 | struct remove_event re = { .detach_group = false }; | ||
7860 | struct perf_event_context *ctx = __info; | 7866 | struct perf_event_context *ctx = __info; |
7861 | struct perf_event *event; | ||
7862 | 7867 | ||
7863 | perf_pmu_rotate_stop(ctx->pmu); | 7868 | perf_pmu_rotate_stop(ctx->pmu); |
7864 | 7869 | ||
7865 | rcu_read_lock(); | 7870 | rcu_read_lock(); |
7866 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) | 7871 | list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) |
7867 | __perf_remove_from_context(event); | 7872 | __perf_remove_from_context(&re); |
7868 | rcu_read_unlock(); | 7873 | rcu_read_unlock(); |
7869 | } | 7874 | } |
7870 | 7875 | ||