aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2014-05-02 10:56:01 -0400
committerIngo Molnar <mingo@kernel.org>2014-05-07 05:33:14 -0400
commit46ce0fe97a6be7532ce6126bb26ce89fed81528c (patch)
treeb47c910ca19eba6be3e0b8c08e99b9ab8b4903c2 /kernel/events
parent38583f095c5a8138ae2a1c9173d0fd8a9f10e8aa (diff)
perf: Fix race in removing an event
When removing a (sibling) event we do: raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); raw_spin_unlock_irq(&ctx->lock); <hole> perf_remove_from_context(event); raw_spin_lock_irq(&ctx->lock); ... raw_spin_unlock_irq(&ctx->lock); Now, assuming the event is a sibling, it will be 'unreachable' for things like ctx_sched_out() because that iterates the groups->siblings, and we just unhooked the sibling. So, if during <hole> we get ctx_sched_out(), it will miss the event and not call event_sched_out() on it, leaving it programmed on the PMU. The subsequent perf_remove_from_context() call will find the ctx is inactive and only call list_del_event() to remove the event from all other lists. Hereafter we can proceed to free the event; while still programmed! Close this hole by moving perf_group_detach() inside the same ctx->lock region(s) perf_remove_from_context() has. The condition on inherited events only in __perf_event_exit_task() is likely complete crap because non-inherited events are part of groups too and we're tearing down just the same. But leave that for another patch. Most-likely-Fixes: e03a9a55b4e ("perf: Change close() semantics for group events") Reported-by: Vince Weaver <vincent.weaver@maine.edu> Tested-by: Vince Weaver <vincent.weaver@maine.edu> Much-staring-at-traces-by: Vince Weaver <vincent.weaver@maine.edu> Much-staring-at-traces-by: Thomas Gleixner <tglx@linutronix.de> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20140505093124.GN17778@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c47
1 files changed, 26 insertions, 21 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f83a71a3e46d..ea899e2b5593 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1443,6 +1443,11 @@ group_sched_out(struct perf_event *group_event,
1443 cpuctx->exclusive = 0; 1443 cpuctx->exclusive = 0;
1444} 1444}
1445 1445
1446struct remove_event {
1447 struct perf_event *event;
1448 bool detach_group;
1449};
1450
1446/* 1451/*
1447 * Cross CPU call to remove a performance event 1452 * Cross CPU call to remove a performance event
1448 * 1453 *
@@ -1451,12 +1456,15 @@ group_sched_out(struct perf_event *group_event,
1451 */ 1456 */
1452static int __perf_remove_from_context(void *info) 1457static int __perf_remove_from_context(void *info)
1453{ 1458{
1454 struct perf_event *event = info; 1459 struct remove_event *re = info;
1460 struct perf_event *event = re->event;
1455 struct perf_event_context *ctx = event->ctx; 1461 struct perf_event_context *ctx = event->ctx;
1456 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1462 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1457 1463
1458 raw_spin_lock(&ctx->lock); 1464 raw_spin_lock(&ctx->lock);
1459 event_sched_out(event, cpuctx, ctx); 1465 event_sched_out(event, cpuctx, ctx);
1466 if (re->detach_group)
1467 perf_group_detach(event);
1460 list_del_event(event, ctx); 1468 list_del_event(event, ctx);
1461 if (!ctx->nr_events && cpuctx->task_ctx == ctx) { 1469 if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
1462 ctx->is_active = 0; 1470 ctx->is_active = 0;
@@ -1481,10 +1489,14 @@ static int __perf_remove_from_context(void *info)
1481 * When called from perf_event_exit_task, it's OK because the 1489 * When called from perf_event_exit_task, it's OK because the
1482 * context has been detached from its task. 1490 * context has been detached from its task.
1483 */ 1491 */
1484static void perf_remove_from_context(struct perf_event *event) 1492static void perf_remove_from_context(struct perf_event *event, bool detach_group)
1485{ 1493{
1486 struct perf_event_context *ctx = event->ctx; 1494 struct perf_event_context *ctx = event->ctx;
1487 struct task_struct *task = ctx->task; 1495 struct task_struct *task = ctx->task;
1496 struct remove_event re = {
1497 .event = event,
1498 .detach_group = detach_group,
1499 };
1488 1500
1489 lockdep_assert_held(&ctx->mutex); 1501 lockdep_assert_held(&ctx->mutex);
1490 1502
@@ -1493,12 +1505,12 @@ static void perf_remove_from_context(struct perf_event *event)
1493 * Per cpu events are removed via an smp call and 1505 * Per cpu events are removed via an smp call and
1494 * the removal is always successful. 1506 * the removal is always successful.
1495 */ 1507 */
1496 cpu_function_call(event->cpu, __perf_remove_from_context, event); 1508 cpu_function_call(event->cpu, __perf_remove_from_context, &re);
1497 return; 1509 return;
1498 } 1510 }
1499 1511
1500retry: 1512retry:
1501 if (!task_function_call(task, __perf_remove_from_context, event)) 1513 if (!task_function_call(task, __perf_remove_from_context, &re))
1502 return; 1514 return;
1503 1515
1504 raw_spin_lock_irq(&ctx->lock); 1516 raw_spin_lock_irq(&ctx->lock);
@@ -1515,6 +1527,8 @@ retry:
1515 * Since the task isn't running, its safe to remove the event, us 1527 * Since the task isn't running, its safe to remove the event, us
1516 * holding the ctx->lock ensures the task won't get scheduled in. 1528 * holding the ctx->lock ensures the task won't get scheduled in.
1517 */ 1529 */
1530 if (detach_group)
1531 perf_group_detach(event);
1518 list_del_event(event, ctx); 1532 list_del_event(event, ctx);
1519 raw_spin_unlock_irq(&ctx->lock); 1533 raw_spin_unlock_irq(&ctx->lock);
1520} 1534}
@@ -3281,10 +3295,7 @@ int perf_event_release_kernel(struct perf_event *event)
3281 * to trigger the AB-BA case. 3295 * to trigger the AB-BA case.
3282 */ 3296 */
3283 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 3297 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
3284 raw_spin_lock_irq(&ctx->lock); 3298 perf_remove_from_context(event, true);
3285 perf_group_detach(event);
3286 raw_spin_unlock_irq(&ctx->lock);
3287 perf_remove_from_context(event);
3288 mutex_unlock(&ctx->mutex); 3299 mutex_unlock(&ctx->mutex);
3289 3300
3290 free_event(event); 3301 free_event(event);
@@ -7165,7 +7176,7 @@ SYSCALL_DEFINE5(perf_event_open,
7165 struct perf_event_context *gctx = group_leader->ctx; 7176 struct perf_event_context *gctx = group_leader->ctx;
7166 7177
7167 mutex_lock(&gctx->mutex); 7178 mutex_lock(&gctx->mutex);
7168 perf_remove_from_context(group_leader); 7179 perf_remove_from_context(group_leader, false);
7169 7180
7170 /* 7181 /*
7171 * Removing from the context ends up with disabled 7182 * Removing from the context ends up with disabled
@@ -7175,7 +7186,7 @@ SYSCALL_DEFINE5(perf_event_open,
7175 perf_event__state_init(group_leader); 7186 perf_event__state_init(group_leader);
7176 list_for_each_entry(sibling, &group_leader->sibling_list, 7187 list_for_each_entry(sibling, &group_leader->sibling_list,
7177 group_entry) { 7188 group_entry) {
7178 perf_remove_from_context(sibling); 7189 perf_remove_from_context(sibling, false);
7179 perf_event__state_init(sibling); 7190 perf_event__state_init(sibling);
7180 put_ctx(gctx); 7191 put_ctx(gctx);
7181 } 7192 }
@@ -7305,7 +7316,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7305 mutex_lock(&src_ctx->mutex); 7316 mutex_lock(&src_ctx->mutex);
7306 list_for_each_entry_safe(event, tmp, &src_ctx->event_list, 7317 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
7307 event_entry) { 7318 event_entry) {
7308 perf_remove_from_context(event); 7319 perf_remove_from_context(event, false);
7309 unaccount_event_cpu(event, src_cpu); 7320 unaccount_event_cpu(event, src_cpu);
7310 put_ctx(src_ctx); 7321 put_ctx(src_ctx);
7311 list_add(&event->migrate_entry, &events); 7322 list_add(&event->migrate_entry, &events);
@@ -7367,13 +7378,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7367 struct perf_event_context *child_ctx, 7378 struct perf_event_context *child_ctx,
7368 struct task_struct *child) 7379 struct task_struct *child)
7369{ 7380{
7370 if (child_event->parent) { 7381 perf_remove_from_context(child_event, !!child_event->parent);
7371 raw_spin_lock_irq(&child_ctx->lock);
7372 perf_group_detach(child_event);
7373 raw_spin_unlock_irq(&child_ctx->lock);
7374 }
7375
7376 perf_remove_from_context(child_event);
7377 7382
7378 /* 7383 /*
7379 * It can happen that the parent exits first, and has events 7384 * It can happen that the parent exits first, and has events
@@ -7857,14 +7862,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
7857 7862
7858static void __perf_event_exit_context(void *__info) 7863static void __perf_event_exit_context(void *__info)
7859{ 7864{
7865 struct remove_event re = { .detach_group = false };
7860 struct perf_event_context *ctx = __info; 7866 struct perf_event_context *ctx = __info;
7861 struct perf_event *event;
7862 7867
7863 perf_pmu_rotate_stop(ctx->pmu); 7868 perf_pmu_rotate_stop(ctx->pmu);
7864 7869
7865 rcu_read_lock(); 7870 rcu_read_lock();
7866 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) 7871 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
7867 __perf_remove_from_context(event); 7872 __perf_remove_from_context(&re);
7868 rcu_read_unlock(); 7873 rcu_read_unlock();
7869} 7874}
7870 7875