diff options
author | Peter Zijlstra <peterz@infradead.org> | 2014-06-23 10:12:42 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-07-16 07:18:42 -0400 |
commit | 4a1c0f262f88e2676fda80a6bf80e7dbccae1dcb (patch) | |
tree | 4c309aa5302c0a30438a62f7761a08a7f2adf6d5 /kernel | |
parent | 7711fe4fc2606712125cff1a55ce00df2ae0f1fb (diff) |
perf: Fix lockdep warning on process exit
Sasha Levin reported:
> While fuzzing with trinity inside a KVM tools guest running the latest -next
> kernel I've stumbled on the following spew:
>
> ======================================================
> [ INFO: possible circular locking dependency detected ]
> 3.15.0-next-20140613-sasha-00026-g6dd125d-dirty #654 Not tainted
> -------------------------------------------------------
> trinity-c578/9725 is trying to acquire lock:
> (&(&pool->lock)->rlock){-.-...}, at: __queue_work (kernel/workqueue.c:1346)
>
> but task is already holding lock:
> (&ctx->lock){-.....}, at: perf_event_exit_task (kernel/events/core.c:7471 kernel/events/core.c:7533)
>
> which lock already depends on the new lock.
> 1 lock held by trinity-c578/9725:
> #0: (&ctx->lock){-.....}, at: perf_event_exit_task (kernel/events/core.c:7471 kernel/events/core.c:7533)
>
> Call Trace:
> dump_stack (lib/dump_stack.c:52)
> print_circular_bug (kernel/locking/lockdep.c:1216)
> __lock_acquire (kernel/locking/lockdep.c:1840 kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131 kernel/locking/lockdep.c:3182)
> lock_acquire (./arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602)
> _raw_spin_lock (include/linux/spinlock_api_smp.h:143 kernel/locking/spinlock.c:151)
> __queue_work (kernel/workqueue.c:1346)
> queue_work_on (kernel/workqueue.c:1424)
> free_object (lib/debugobjects.c:209)
> __debug_check_no_obj_freed (lib/debugobjects.c:715)
> debug_check_no_obj_freed (lib/debugobjects.c:727)
> kmem_cache_free (mm/slub.c:2683 mm/slub.c:2711)
> free_task (kernel/fork.c:221)
> __put_task_struct (kernel/fork.c:250)
> put_ctx (include/linux/sched.h:1855 kernel/events/core.c:898)
> perf_event_exit_task (kernel/events/core.c:907 kernel/events/core.c:7478 kernel/events/core.c:7533)
> do_exit (kernel/exit.c:766)
> do_group_exit (kernel/exit.c:884)
> get_signal_to_deliver (kernel/signal.c:2347)
> do_signal (arch/x86/kernel/signal.c:698)
> do_notify_resume (arch/x86/kernel/signal.c:751)
> int_signal (arch/x86/kernel/entry_64.S:600)
Urgh.. so the only way I can make that happen is through:
perf_event_exit_task_context()
raw_spin_lock(&child_ctx->lock);
unclone_ctx(child_ctx)
put_ctx(ctx->parent_ctx);
raw_spin_unlock_irqrestore(&child_ctx->lock);
And we can avoid this by doing the change below.
I can't immediately see how this changed recently, but given that you
say it's easy to reproduce, lets fix this.
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140623141242.GB19860@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index c46b02bfe179..6b17ac1b0c2a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -7486,7 +7486,7 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
7486 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | 7486 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) |
7487 | { | 7487 | { |
7488 | struct perf_event *child_event, *next; | 7488 | struct perf_event *child_event, *next; |
7489 | struct perf_event_context *child_ctx; | 7489 | struct perf_event_context *child_ctx, *parent_ctx; |
7490 | unsigned long flags; | 7490 | unsigned long flags; |
7491 | 7491 | ||
7492 | if (likely(!child->perf_event_ctxp[ctxn])) { | 7492 | if (likely(!child->perf_event_ctxp[ctxn])) { |
@@ -7511,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
7511 | raw_spin_lock(&child_ctx->lock); | 7511 | raw_spin_lock(&child_ctx->lock); |
7512 | task_ctx_sched_out(child_ctx); | 7512 | task_ctx_sched_out(child_ctx); |
7513 | child->perf_event_ctxp[ctxn] = NULL; | 7513 | child->perf_event_ctxp[ctxn] = NULL; |
7514 | |||
7515 | /* | ||
7516 | * In order to avoid freeing: child_ctx->parent_ctx->task | ||
7517 | * under perf_event_context::lock, grab another reference. | ||
7518 | */ | ||
7519 | parent_ctx = child_ctx->parent_ctx; | ||
7520 | if (parent_ctx) | ||
7521 | get_ctx(parent_ctx); | ||
7522 | |||
7514 | /* | 7523 | /* |
7515 | * If this context is a clone; unclone it so it can't get | 7524 | * If this context is a clone; unclone it so it can't get |
7516 | * swapped to another process while we're removing all | 7525 | * swapped to another process while we're removing all |
@@ -7521,6 +7530,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
7521 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | 7530 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); |
7522 | 7531 | ||
7523 | /* | 7532 | /* |
7533 | * Now that we no longer hold perf_event_context::lock, drop | ||
7534 | * our extra child_ctx->parent_ctx reference. | ||
7535 | */ | ||
7536 | if (parent_ctx) | ||
7537 | put_ctx(parent_ctx); | ||
7538 | |||
7539 | /* | ||
7524 | * Report the task dead after unscheduling the events so that we | 7540 | * Report the task dead after unscheduling the events so that we |
7525 | * won't get any samples after PERF_RECORD_EXIT. We can however still | 7541 | * won't get any samples after PERF_RECORD_EXIT. We can however still |
7526 | * get a few PERF_RECORD_READ events. | 7542 | * get a few PERF_RECORD_READ events. |