diff options
author | Peter Zijlstra <peterz@infradead.org> | 2016-01-25 08:09:54 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-01-28 14:06:36 -0500 |
commit | 6a3351b612b72c558910c88a43e2ef6d7d68bc97 (patch) | |
tree | 0429ba6efb8a4c654ac4c267395653430818829f /kernel/events | |
parent | 78cd2c748f459739ff864dd9308c0f6caf7f6e41 (diff) |
perf: Fix race in perf_event_exit_task_context()
There is a race between perf_event_exit_task_context() and
orphans_remove_work() which results in a use-after-free.
We mark ctx->task with TASK_TOMBSTONE to indicate a context is
'dead', under ctx->lock. After which point event_function_call()
on any event of that context will NOP
A concurrent orphans_remove_work() will only hold ctx->mutex for
the list iteration and not serialize against this. Therefore its
possible that orphans_remove_work()'s perf_remove_from_context()
call will fail, but we'll continue to free the event, with the
result of free'd memory still being on lists and everything.
Once perf_event_exit_task_context() gets around to acquiring
ctx->mutex it too will iterate the event list, encounter the
already free'd event and proceed to free it _again_. This fails
with the WARN in free_event().
Plug the race by having perf_event_exit_task_context() hold
ctx::mutex over the whole tear-down, thereby 'naturally'
serializing against all other sites, including the orphan work.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: alexander.shishkin@linux.intel.com
Cc: dsahern@gmail.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/20160125130954.GY6357@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 50 |
1 files changed, 29 insertions, 21 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 6759f2a332d7..1d243fadfd12 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -8748,14 +8748,40 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
8748 | { | 8748 | { |
8749 | struct perf_event_context *child_ctx, *clone_ctx = NULL; | 8749 | struct perf_event_context *child_ctx, *clone_ctx = NULL; |
8750 | struct perf_event *child_event, *next; | 8750 | struct perf_event *child_event, *next; |
8751 | unsigned long flags; | ||
8752 | 8751 | ||
8753 | WARN_ON_ONCE(child != current); | 8752 | WARN_ON_ONCE(child != current); |
8754 | 8753 | ||
8755 | child_ctx = perf_lock_task_context(child, ctxn, &flags); | 8754 | child_ctx = perf_pin_task_context(child, ctxn); |
8756 | if (!child_ctx) | 8755 | if (!child_ctx) |
8757 | return; | 8756 | return; |
8758 | 8757 | ||
8758 | /* | ||
8759 | * In order to reduce the amount of tricky in ctx tear-down, we hold | ||
8760 | * ctx::mutex over the entire thing. This serializes against almost | ||
8761 | * everything that wants to access the ctx. | ||
8762 | * | ||
8763 | * The exception is sys_perf_event_open() / | ||
8764 | * perf_event_create_kernel_count() which does find_get_context() | ||
8765 | * without ctx::mutex (it cannot because of the move_group double mutex | ||
8766 | * lock thing). See the comments in perf_install_in_context(). | ||
8767 | * | ||
8768 | * We can recurse on the same lock type through: | ||
8769 | * | ||
8770 | * __perf_event_exit_task() | ||
8771 | * sync_child_event() | ||
8772 | * put_event() | ||
8773 | * mutex_lock(&ctx->mutex) | ||
8774 | * | ||
8775 | * But since its the parent context it won't be the same instance. | ||
8776 | */ | ||
8777 | mutex_lock(&child_ctx->mutex); | ||
8778 | |||
8779 | /* | ||
8780 | * In a single ctx::lock section, de-schedule the events and detach the | ||
8781 | * context from the task such that we cannot ever get it scheduled back | ||
8782 | * in. | ||
8783 | */ | ||
8784 | raw_spin_lock_irq(&child_ctx->lock); | ||
8759 | task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx); | 8785 | task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx); |
8760 | 8786 | ||
8761 | /* | 8787 | /* |
@@ -8767,14 +8793,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
8767 | WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE); | 8793 | WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE); |
8768 | put_task_struct(current); /* cannot be last */ | 8794 | put_task_struct(current); /* cannot be last */ |
8769 | 8795 | ||
8770 | /* | ||
8771 | * If this context is a clone; unclone it so it can't get | ||
8772 | * swapped to another process while we're removing all | ||
8773 | * the events from it. | ||
8774 | */ | ||
8775 | clone_ctx = unclone_ctx(child_ctx); | 8796 | clone_ctx = unclone_ctx(child_ctx); |
8776 | update_context_time(child_ctx); | 8797 | raw_spin_unlock_irq(&child_ctx->lock); |
8777 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | ||
8778 | 8798 | ||
8779 | if (clone_ctx) | 8799 | if (clone_ctx) |
8780 | put_ctx(clone_ctx); | 8800 | put_ctx(clone_ctx); |
@@ -8786,18 +8806,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
8786 | */ | 8806 | */ |
8787 | perf_event_task(child, child_ctx, 0); | 8807 | perf_event_task(child, child_ctx, 0); |
8788 | 8808 | ||
8789 | /* | ||
8790 | * We can recurse on the same lock type through: | ||
8791 | * | ||
8792 | * __perf_event_exit_task() | ||
8793 | * sync_child_event() | ||
8794 | * put_event() | ||
8795 | * mutex_lock(&ctx->mutex) | ||
8796 | * | ||
8797 | * But since its the parent context it won't be the same instance. | ||
8798 | */ | ||
8799 | mutex_lock(&child_ctx->mutex); | ||
8800 | |||
8801 | list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) | 8809 | list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) |
8802 | __perf_event_exit_task(child_event, child_ctx, child); | 8810 | __perf_event_exit_task(child_event, child_ctx, child); |
8803 | 8811 | ||