Merge branch 'perf/urgent' into perf/core, to fix up fixes before queueing up new changes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2016-03-31 03:55:12 -0400
committer: Ingo Molnar <mingo@kernel.org> 2016-03-31 03:55:12 -0400
commit: 84c48d8d01b74a2b98c164513ca5e37c73166825 (patch)
tree: f157fe93864dd9a08029cb62b50d707e7db186d5
parent: 643cb15ba07260faadd9fcfabac4f5d9d0ddc053 (diff)
parent: 85dc600263c2291cea33bffa90038808ee64198b (diff)
2 files changed, 58 insertions, 9 deletions
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 3ea25c3917c0..feb90f6730e8 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -28,10 +28,46 @@ static u32 ibs_caps;
 #define IBS_FETCH_CONFIG_MASK   (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
 #define IBS_OP_CONFIG_MASK      IBS_OP_MAX_CNT
+/*
+ * IBS states:
+ *
+ * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
+ * and any further add()s must fail.
+ *
+ * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
+ * complicated by the fact that the IBS hardware can send late NMIs (ie. after
+ * we've cleared the EN bit).
+ *
+ * In order to consume these late NMIs we have the STOPPED state, any NMI that
+ * happens after we've cleared the EN state will clear this bit and report the
+ * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
+ * someone else can consume our BIT and our NMI will go unhandled).
+ *
+ * And since we cannot set/clear this separate bit together with the EN bit,
+ * there are races; if we cleared STARTED early, an NMI could land in
+ * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
+ * could happen if the period is small enough), and consume our STOPPED bit
+ * and trigger streams of unhandled NMIs.
+ *
+ * If, however, we clear STARTED late, an NMI can hit between clearing the
+ * EN bit and clearing STARTED, still see STARTED set and process the event.
+ * If this event will have the VALID bit clear, we bail properly, but this
+ * is not a given. With VALID set we can end up calling pmu::stop() again
+ * (the throttle logic) and trigger the WARNs in there.
+ *
+ * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
+ * nesting, and clear STARTED late, so that we have a well defined state over
+ * the clearing of the EN bit.
+ *
+ * XXX: we could probably be using !atomic bitops for all this.
+ */
 enum ibs_states {
        IBS_ENABLED     = 0,
        IBS_STARTED     = 1,
        IBS_STOPPING    = 2,
+        IBS_STOPPED     = 3,
        IBS_MAX_STATES,
 };
@@ -377,11 +413,10 @@ static void perf_ibs_start(struct perf_event *event, int flags)
        perf_ibs_set_period(perf_ibs, hwc, &period);
        /*
-         * Set STARTED before enabling the hardware, such that
+         * Set STARTED before enabling the hardware, such that a subsequent NMI
-         * a subsequent NMI must observe it. Then clear STOPPING
+         * must observe it.
-         * such that we don't consume NMIs by accident.
         */
-        set_bit(IBS_STARTED, pcpu->state);
+        set_bit(IBS_STARTED,    pcpu->state);
        clear_bit(IBS_STOPPING, pcpu->state);
        perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
@@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
        u64 config;
        int stopping;
+        if (test_and_set_bit(IBS_STOPPING, pcpu->state))
+                return;
        stopping = test_bit(IBS_STARTED, pcpu->state);
        if (!stopping && (hwc->state & PERF_HES_UPTODATE))
@@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
        if (stopping) {
                /*
-                 * Set STOPPING before disabling the hardware, such that it
+                 * Set STOPPED before disabling the hardware, such that it
                 * must be visible to NMIs the moment we clear the EN bit,
                 * at which point we can generate an !VALID sample which
                 * we need to consume.
                 */
-                set_bit(IBS_STOPPING, pcpu->state);
+                set_bit(IBS_STOPPED, pcpu->state);
                perf_ibs_disable_event(perf_ibs, hwc, config);
                /*
                 * Clear STARTED after disabling the hardware; if it were
@@ -556,7 +594,7 @@ fail:
                 * with samples that even have the valid bit cleared.
                 * Mark all this NMIs as handled.
                 */
-                if (test_and_clear_bit(IBS_STOPPING, pcpu->state))
+                if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
                        return 1;
                return 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index de24fbce5277..52bedc5a5aaa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2417,14 +2417,24 @@ static void ctx_sched_out(struct perf_event_context *ctx,
                        cpuctx->task_ctx = NULL;
        }
-        is_active ^= ctx->is_active; /* changed bits */
+        /*
+         * Always update time if it was set; not only when it changes.
+         * Otherwise we can 'forget' to update time for any but the last
+         * context we sched out. For example:
+         *
+         *   ctx_sched_out(.event_type = EVENT_FLEXIBLE)
+         *   ctx_sched_out(.event_type = EVENT_PINNED)
+         *
+         * would only update time for the pinned events.
+         */
        if (is_active & EVENT_TIME) {
                /* update (and stop) ctx time */
                update_context_time(ctx);
                update_cgrp_time_from_cpuctx(cpuctx);
        }
+        is_active ^= ctx->is_active; /* changed bits */
        if (!ctx->nr_active || !(is_active & EVENT_ALL))
                return;
@@ -8532,6 +8542,7 @@ SYSCALL_DEFINE5(perf_event_open,
                                        f_flags);
        if (IS_ERR(event_file)) {
                err = PTR_ERR(event_file);
+                event_file = NULL;
                goto err_context;
        }
author	Ingo Molnar <mingo@kernel.org>	2016-03-31 03:55:12 -0400
committer	Ingo Molnar <mingo@kernel.org>	2016-03-31 03:55:12 -0400
commit	84c48d8d01b74a2b98c164513ca5e37c73166825 (patch)
tree	f157fe93864dd9a08029cb62b50d707e7db186d5
parent	643cb15ba07260faadd9fcfabac4f5d9d0ddc053 (diff)
parent	85dc600263c2291cea33bffa90038808ee64198b (diff)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 3ea25c3917c0..feb90f6730e8 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c
@@ -28,10 +28,46 @@ static u32 ibs_caps;
28	#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN \| IBS_FETCH_MAX_CNT)	28	#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN \| IBS_FETCH_MAX_CNT)
29	#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT	29	#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
30		30
		31
		32	/*
		33	* IBS states:
		34	*
		35	* ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
		36	* and any further add()s must fail.
		37	*
		38	* STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
		39	* complicated by the fact that the IBS hardware can send late NMIs (ie. after
		40	* we've cleared the EN bit).
		41	*
		42	* In order to consume these late NMIs we have the STOPPED state, any NMI that
		43	* happens after we've cleared the EN state will clear this bit and report the
		44	* NMI handled (this is fundamentally racy in the face or multiple NMI sources,
		45	* someone else can consume our BIT and our NMI will go unhandled).
		46	*
		47	* And since we cannot set/clear this separate bit together with the EN bit,
		48	* there are races; if we cleared STARTED early, an NMI could land in
		49	* between clearing STARTED and clearing the EN bit (in fact multiple NMIs
		50	* could happen if the period is small enough), and consume our STOPPED bit
		51	* and trigger streams of unhandled NMIs.
		52	*
		53	* If, however, we clear STARTED late, an NMI can hit between clearing the
		54	* EN bit and clearing STARTED, still see STARTED set and process the event.
		55	* If this event will have the VALID bit clear, we bail properly, but this
		56	* is not a given. With VALID set we can end up calling pmu::stop() again
		57	* (the throttle logic) and trigger the WARNs in there.
		58	*
		59	* So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
		60	* nesting, and clear STARTED late, so that we have a well defined state over
		61	* the clearing of the EN bit.
		62	*
		63	* XXX: we could probably be using !atomic bitops for all this.
		64	*/
		65
31	enum ibs_states {	66	enum ibs_states {
32	IBS_ENABLED = 0,	67	IBS_ENABLED = 0,
33	IBS_STARTED = 1,	68	IBS_STARTED = 1,
34	IBS_STOPPING = 2,	69	IBS_STOPPING = 2,
		70	IBS_STOPPED = 3,
35		71
36	IBS_MAX_STATES,	72	IBS_MAX_STATES,
37	};	73	};
@@ -377,11 +413,10 @@ static void perf_ibs_start(struct perf_event *event, int flags)
377		413
378	perf_ibs_set_period(perf_ibs, hwc, &period);	414	perf_ibs_set_period(perf_ibs, hwc, &period);
379	/*	415	/*
380	* Set STARTED before enabling the hardware, such that	416	* Set STARTED before enabling the hardware, such that a subsequent NMI
381	* a subsequent NMI must observe it. Then clear STOPPING	417	* must observe it.
382	* such that we don't consume NMIs by accident.
383	*/	418	*/
384	set_bit(IBS_STARTED, pcpu->state);	419	set_bit(IBS_STARTED, pcpu->state);
385	clear_bit(IBS_STOPPING, pcpu->state);	420	clear_bit(IBS_STOPPING, pcpu->state);
386	perf_ibs_enable_event(perf_ibs, hwc, period >> 4);	421	perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
387		422
@@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
396	u64 config;	431	u64 config;
397	int stopping;	432	int stopping;
398		433
		434	if (test_and_set_bit(IBS_STOPPING, pcpu->state))
		435	return;
		436
399	stopping = test_bit(IBS_STARTED, pcpu->state);	437	stopping = test_bit(IBS_STARTED, pcpu->state);
400		438
401	if (!stopping && (hwc->state & PERF_HES_UPTODATE))	439	if (!stopping && (hwc->state & PERF_HES_UPTODATE))
@@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
405		443
406	if (stopping) {	444	if (stopping) {
407	/*	445	/*
408	* Set STOPPING before disabling the hardware, such that it	446	* Set STOPPED before disabling the hardware, such that it
409	* must be visible to NMIs the moment we clear the EN bit,	447	* must be visible to NMIs the moment we clear the EN bit,
410	* at which point we can generate an !VALID sample which	448	* at which point we can generate an !VALID sample which
411	* we need to consume.	449	* we need to consume.
412	*/	450	*/
413	set_bit(IBS_STOPPING, pcpu->state);	451	set_bit(IBS_STOPPED, pcpu->state);
414	perf_ibs_disable_event(perf_ibs, hwc, config);	452	perf_ibs_disable_event(perf_ibs, hwc, config);
415	/*	453	/*
416	* Clear STARTED after disabling the hardware; if it were	454	* Clear STARTED after disabling the hardware; if it were
@@ -556,7 +594,7 @@ fail:
556	* with samples that even have the valid bit cleared.	594	* with samples that even have the valid bit cleared.
557	* Mark all this NMIs as handled.	595	* Mark all this NMIs as handled.
558	*/	596	*/
559	if (test_and_clear_bit(IBS_STOPPING, pcpu->state))	597	if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
560	return 1;	598	return 1;
561		599
562	return 0;	600	return 0;


diff --git a/kernel/events/core.c b/kernel/events/core.c index de24fbce5277..52bedc5a5aaa 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c
@@ -2417,14 +2417,24 @@ static void ctx_sched_out(struct perf_event_context *ctx,
2417	cpuctx->task_ctx = NULL;	2417	cpuctx->task_ctx = NULL;
2418	}	2418	}
2419		2419
2420	is_active ^= ctx->is_active; /* changed bits */	2420	/*
2421		2421	* Always update time if it was set; not only when it changes.
		2422	* Otherwise we can 'forget' to update time for any but the last
		2423	* context we sched out. For example:
		2424	*
		2425	* ctx_sched_out(.event_type = EVENT_FLEXIBLE)
		2426	* ctx_sched_out(.event_type = EVENT_PINNED)
		2427	*
		2428	* would only update time for the pinned events.
		2429	*/
2422	if (is_active & EVENT_TIME) {	2430	if (is_active & EVENT_TIME) {
2423	/* update (and stop) ctx time */	2431	/* update (and stop) ctx time */
2424	update_context_time(ctx);	2432	update_context_time(ctx);
2425	update_cgrp_time_from_cpuctx(cpuctx);	2433	update_cgrp_time_from_cpuctx(cpuctx);
2426	}	2434	}
2427		2435
		2436	is_active ^= ctx->is_active; /* changed bits */
		2437
2428	if (!ctx->nr_active \|\| !(is_active & EVENT_ALL))	2438	if (!ctx->nr_active \|\| !(is_active & EVENT_ALL))
2429	return;	2439	return;
2430		2440
@@ -8532,6 +8542,7 @@ SYSCALL_DEFINE5(perf_event_open,
8532	f_flags);	8542	f_flags);
8533	if (IS_ERR(event_file)) {	8543	if (IS_ERR(event_file)) {
8534	err = PTR_ERR(event_file);	8544	err = PTR_ERR(event_file);
		8545	event_file = NULL;
8535	goto err_context;	8546	goto err_context;
8536	}	8547	}
8537		8548