aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-01-25 08:09:54 -0500
committerIngo Molnar <mingo@kernel.org>2016-01-28 14:06:36 -0500
commit6a3351b612b72c558910c88a43e2ef6d7d68bc97 (patch)
tree0429ba6efb8a4c654ac4c267395653430818829f /kernel/events
parent78cd2c748f459739ff864dd9308c0f6caf7f6e41 (diff)
perf: Fix race in perf_event_exit_task_context()
There is a race between perf_event_exit_task_context() and orphans_remove_work() which results in a use-after-free. We mark ctx->task with TASK_TOMBSTONE to indicate a context is 'dead', under ctx->lock. After which point event_function_call() on any event of that context will NOP A concurrent orphans_remove_work() will only hold ctx->mutex for the list iteration and not serialize against this. Therefore its possible that orphans_remove_work()'s perf_remove_from_context() call will fail, but we'll continue to free the event, with the result of free'd memory still being on lists and everything. Once perf_event_exit_task_context() gets around to acquiring ctx->mutex it too will iterate the event list, encounter the already free'd event and proceed to free it _again_. This fails with the WARN in free_event(). Plug the race by having perf_event_exit_task_context() hold ctx::mutex over the whole tear-down, thereby 'naturally' serializing against all other sites, including the orphan work. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: alexander.shishkin@linux.intel.com Cc: dsahern@gmail.com Cc: namhyung@kernel.org Link: http://lkml.kernel.org/r/20160125130954.GY6357@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c50
1 files changed, 29 insertions, 21 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6759f2a332d7..1d243fadfd12 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8748,14 +8748,40 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
8748{ 8748{
8749 struct perf_event_context *child_ctx, *clone_ctx = NULL; 8749 struct perf_event_context *child_ctx, *clone_ctx = NULL;
8750 struct perf_event *child_event, *next; 8750 struct perf_event *child_event, *next;
8751 unsigned long flags;
8752 8751
8753 WARN_ON_ONCE(child != current); 8752 WARN_ON_ONCE(child != current);
8754 8753
8755 child_ctx = perf_lock_task_context(child, ctxn, &flags); 8754 child_ctx = perf_pin_task_context(child, ctxn);
8756 if (!child_ctx) 8755 if (!child_ctx)
8757 return; 8756 return;
8758 8757
8758 /*
8759 * In order to reduce the amount of tricky in ctx tear-down, we hold
8760 * ctx::mutex over the entire thing. This serializes against almost
8761 * everything that wants to access the ctx.
8762 *
8763 * The exception is sys_perf_event_open() /
8764 * perf_event_create_kernel_count() which does find_get_context()
8765 * without ctx::mutex (it cannot because of the move_group double mutex
8766 * lock thing). See the comments in perf_install_in_context().
8767 *
8768 * We can recurse on the same lock type through:
8769 *
8770 * __perf_event_exit_task()
8771 * sync_child_event()
8772 * put_event()
8773 * mutex_lock(&ctx->mutex)
8774 *
8775 * But since its the parent context it won't be the same instance.
8776 */
8777 mutex_lock(&child_ctx->mutex);
8778
8779 /*
8780 * In a single ctx::lock section, de-schedule the events and detach the
8781 * context from the task such that we cannot ever get it scheduled back
8782 * in.
8783 */
8784 raw_spin_lock_irq(&child_ctx->lock);
8759 task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx); 8785 task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
8760 8786
8761 /* 8787 /*
@@ -8767,14 +8793,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
8767 WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE); 8793 WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
8768 put_task_struct(current); /* cannot be last */ 8794 put_task_struct(current); /* cannot be last */
8769 8795
8770 /*
8771 * If this context is a clone; unclone it so it can't get
8772 * swapped to another process while we're removing all
8773 * the events from it.
8774 */
8775 clone_ctx = unclone_ctx(child_ctx); 8796 clone_ctx = unclone_ctx(child_ctx);
8776 update_context_time(child_ctx); 8797 raw_spin_unlock_irq(&child_ctx->lock);
8777 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
8778 8798
8779 if (clone_ctx) 8799 if (clone_ctx)
8780 put_ctx(clone_ctx); 8800 put_ctx(clone_ctx);
@@ -8786,18 +8806,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
8786 */ 8806 */
8787 perf_event_task(child, child_ctx, 0); 8807 perf_event_task(child, child_ctx, 0);
8788 8808
8789 /*
8790 * We can recurse on the same lock type through:
8791 *
8792 * __perf_event_exit_task()
8793 * sync_child_event()
8794 * put_event()
8795 * mutex_lock(&ctx->mutex)
8796 *
8797 * But since its the parent context it won't be the same instance.
8798 */
8799 mutex_lock(&child_ctx->mutex);
8800
8801 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) 8809 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
8802 __perf_event_exit_task(child_event, child_ctx, child); 8810 __perf_event_exit_task(child_event, child_ctx, child);
8803 8811