perf/x86: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state when Intel extra_reg allocation changes event state. Validation shouldn't change any persistent state. Cloning events in validate_{event,group}() isn't really pretty either, so add a few special cases to avoid modifying the event state. The code is restructured to minimize the special case impact. Reported-by: Zheng Yan <zheng.z.yan@linux.intel.com> Acked-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2012-06-05 09:30:31 -0400
committer: Ingo Molnar <mingo@kernel.org> 2012-06-06 10:59:44 -0400
commit: b430f7c4706aeba4270c7ab7744fc504b9315e1c (patch)
tree: ca92a63aa1499981c30fccb63db1270596af2aad
parent: 436d03faf6961b30e13b2d0967aea9d772d6cf44 (diff)
3 files changed, 66 insertions, 28 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da0183..cb608383e4f6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
                if (!cpuc->shared_regs)
                        goto error;
        }
+        cpuc->is_fake = 1;
        return cpuc;
 error:
        free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf54493..83794d8e6af0 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
        unsigned int            group_flag;
+        int                     is_fake;
        /*
         * Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..965baa2fa790 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
        return NULL;
 }
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
 {
        if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-                return false;
+                return idx;
-        if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+        if (idx == EXTRA_REG_RSP_0)
-                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+                return EXTRA_REG_RSP_1;
-                event->hw.config |= 0x01bb;
-                event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
+        if (idx == EXTRA_REG_RSP_1)
-                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
+                return EXTRA_REG_RSP_0;
-        } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+        return idx;
+}
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+        event->hw.extra_reg.idx = idx;
+        if (idx == EXTRA_REG_RSP_0) {
                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
                event->hw.config |= 0x01b7;
-                event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+        } else if (idx == EXTRA_REG_RSP_1) {
+                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+                event->hw.config |= 0x01bb;
+                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
        }
-        if (event->hw.extra_reg.idx == orig_idx)
-                return false;
-        return true;
 }
 /*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
        struct event_constraint *c = &emptyconstraint;
        struct er_account *era;
        unsigned long flags;
-        int orig_idx = reg->idx;
+        int idx = reg->idx;
-        /* already allocated shared msr */
+        /*
-        if (reg->alloc)
+         * reg->alloc can be set due to existing state, so for fake cpuc we
+         * need to ignore this, otherwise we might fail to allocate proper fake
+         * state for this extra reg constraint. Also see the comment below.
+         */
+        if (reg->alloc && !cpuc->is_fake)
                return NULL; /* call x86_get_event_constraint() */
 again:
-        era = &cpuc->shared_regs->regs[reg->idx];
+        era = &cpuc->shared_regs->regs[idx];
        /*
         * we use spin_lock_irqsave() to avoid lockdep issues when
         * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
        if (!atomic_read(&era->ref) || era->config == reg->config) {
+                /*
+                 * If its a fake cpuc -- as per validate_{group,event}() we
+                 * shouldn't touch event state and we can avoid doing so
+                 * since both will only call get_event_constraints() once
+                 * on each event, this avoids the need for reg->alloc.
+                 *
+                 * Not doing the ER fixup will only result in era->reg being
+                 * wrong, but since we won't actually try and program hardware
+                 * this isn't a problem either.
+                 */
+                if (!cpuc->is_fake) {
+                        if (idx != reg->idx)
+                                intel_fixup_er(event, idx);
+                        /*
+                         * x86_schedule_events() can call get_event_constraints()
+                         * multiple times on events in the case of incremental
+                         * scheduling(). reg->alloc ensures we only do the ER
+                         * allocation once.
+                         */
+                        reg->alloc = 1;
+                }
                /* lock in msr value */
                era->config = reg->config;
                era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
                /* one more user */
                atomic_inc(&era->ref);
-                /* no need to reallocate during incremental event scheduling */
-                reg->alloc = 1;
                /*
                 * need to call x86_get_event_constraint()
                 * to check if associated event has constraints
                 */
                c = NULL;
-        } else if (intel_try_alt_er(event, orig_idx)) {
+        } else {
-                raw_spin_unlock_irqrestore(&era->lock, flags);
+                idx = intel_alt_er(idx);
-                goto again;
+                if (idx != reg->idx) {
+                        raw_spin_unlock_irqrestore(&era->lock, flags);
+                        goto again;
+                }
        }
        raw_spin_unlock_irqrestore(&era->lock, flags);
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
        struct er_account *era;
        /*
-         * only put constraint if extra reg was actually
+         * Only put constraint if extra reg was actually allocated. Also takes
-         * allocated. Also takes care of event which do
+         * care of event which do not use an extra shared reg.
-         * not use an extra shared reg
+         *
+         * Also, if this is a fake cpuc we shouldn't touch any event state
+         * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+         * either since it'll be thrown out.
         */
-        if (!reg->alloc)
+        if (!reg->alloc || cpuc->is_fake)
                return;
        era = &cpuc->shared_regs->regs[reg->idx];
author	Peter Zijlstra <peterz@infradead.org>	2012-06-05 09:30:31 -0400
committer	Ingo Molnar <mingo@kernel.org>	2012-06-06 10:59:44 -0400
commit	b430f7c4706aeba4270c7ab7744fc504b9315e1c (patch)
tree	ca92a63aa1499981c30fccb63db1270596af2aad
parent	436d03faf6961b30e13b2d0967aea9d772d6cf44 (diff)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da0183..cb608383e4f6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
1496	if (!cpuc->shared_regs)	1496	if (!cpuc->shared_regs)
1497	goto error;	1497	goto error;
1498	}	1498	}
		1499	cpuc->is_fake = 1;
1499	return cpuc;	1500	return cpuc;
1500	error:	1501	error:
1501	free_fake_cpuc(cpuc);	1502	free_fake_cpuc(cpuc);


diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf54493..83794d8e6af0 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
117	struct perf_event event_list[X86_PMC_IDX_MAX]; / in enabled order */	117	struct perf_event event_list[X86_PMC_IDX_MAX]; / in enabled order */
118		118
119	unsigned int group_flag;	119	unsigned int group_flag;
		120	int is_fake;
120		121
121	/*	122	/*
122	* Intel DebugStore bits	123	* Intel DebugStore bits


diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6aef..965baa2fa790 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
1119	return NULL;	1119	return NULL;
1120	}	1120	}
1121		1121
1122	static bool intel_try_alt_er(struct perf_event *event, int orig_idx)	1122	static int intel_alt_er(int idx)
1123	{	1123	{
1124	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))	1124	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1125	return false;	1125	return idx;
1126		1126
1127	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {	1127	if (idx == EXTRA_REG_RSP_0)
1128	event->hw.config &= ~INTEL_ARCH_EVENT_MASK;	1128	return EXTRA_REG_RSP_1;
1129	event->hw.config \|= 0x01bb;	1129
1130	event->hw.extra_reg.idx = EXTRA_REG_RSP_1;	1130	if (idx == EXTRA_REG_RSP_1)
1131	event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;	1131	return EXTRA_REG_RSP_0;
1132	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {	1132
		1133	return idx;
		1134	}
		1135
		1136	static void intel_fixup_er(struct perf_event *event, int idx)
		1137	{
		1138	event->hw.extra_reg.idx = idx;
		1139
		1140	if (idx == EXTRA_REG_RSP_0) {
1133	event->hw.config &= ~INTEL_ARCH_EVENT_MASK;	1141	event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1134	event->hw.config \|= 0x01b7;	1142	event->hw.config \|= 0x01b7;
1135	event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1136	event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;	1143	event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
		1144	} else if (idx == EXTRA_REG_RSP_1) {
		1145	event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
		1146	event->hw.config \|= 0x01bb;
		1147	event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1137	}	1148	}
1138
1139	if (event->hw.extra_reg.idx == orig_idx)
1140	return false;
1141
1142	return true;
1143	}	1149	}
1144		1150
1145	/*	1151	/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1157	struct event_constraint *c = &emptyconstraint;	1163	struct event_constraint *c = &emptyconstraint;
1158	struct er_account *era;	1164	struct er_account *era;
1159	unsigned long flags;	1165	unsigned long flags;
1160	int orig_idx = reg->idx;	1166	int idx = reg->idx;
1161		1167
1162	/* already allocated shared msr */	1168	/*
1163	if (reg->alloc)	1169	* reg->alloc can be set due to existing state, so for fake cpuc we
		1170	* need to ignore this, otherwise we might fail to allocate proper fake
		1171	* state for this extra reg constraint. Also see the comment below.
		1172	*/
		1173	if (reg->alloc && !cpuc->is_fake)
1164	return NULL; /* call x86_get_event_constraint() */	1174	return NULL; /* call x86_get_event_constraint() */
1165		1175
1166	again:	1176	again:
1167	era = &cpuc->shared_regs->regs[reg->idx];	1177	era = &cpuc->shared_regs->regs[idx];
1168	/*	1178	/*
1169	* we use spin_lock_irqsave() to avoid lockdep issues when	1179	* we use spin_lock_irqsave() to avoid lockdep issues when
1170	* passing a fake cpuc	1180	* passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
1173		1183
1174	if (!atomic_read(&era->ref) \|\| era->config == reg->config) {	1184	if (!atomic_read(&era->ref) \|\| era->config == reg->config) {
1175		1185
		1186	/*
		1187	* If its a fake cpuc -- as per validate_{group,event}() we
		1188	* shouldn't touch event state and we can avoid doing so
		1189	* since both will only call get_event_constraints() once
		1190	* on each event, this avoids the need for reg->alloc.
		1191	*
		1192	* Not doing the ER fixup will only result in era->reg being
		1193	* wrong, but since we won't actually try and program hardware
		1194	* this isn't a problem either.
		1195	*/
		1196	if (!cpuc->is_fake) {
		1197	if (idx != reg->idx)
		1198	intel_fixup_er(event, idx);
		1199
		1200	/*
		1201	* x86_schedule_events() can call get_event_constraints()
		1202	* multiple times on events in the case of incremental
		1203	* scheduling(). reg->alloc ensures we only do the ER
		1204	* allocation once.
		1205	*/
		1206	reg->alloc = 1;
		1207	}
		1208
1176	/* lock in msr value */	1209	/* lock in msr value */
1177	era->config = reg->config;	1210	era->config = reg->config;
1178	era->reg = reg->reg;	1211	era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
1180	/* one more user */	1213	/* one more user */
1181	atomic_inc(&era->ref);	1214	atomic_inc(&era->ref);
1182		1215
1183	/* no need to reallocate during incremental event scheduling */
1184	reg->alloc = 1;
1185
1186	/*	1216	/*
1187	* need to call x86_get_event_constraint()	1217	* need to call x86_get_event_constraint()
1188	* to check if associated event has constraints	1218	* to check if associated event has constraints
1189	*/	1219	*/
1190	c = NULL;	1220	c = NULL;
1191	} else if (intel_try_alt_er(event, orig_idx)) {	1221	} else {
1192	raw_spin_unlock_irqrestore(&era->lock, flags);	1222	idx = intel_alt_er(idx);
1193	goto again;	1223	if (idx != reg->idx) {
		1224	raw_spin_unlock_irqrestore(&era->lock, flags);
		1225	goto again;
		1226	}
1194	}	1227	}
1195	raw_spin_unlock_irqrestore(&era->lock, flags);	1228	raw_spin_unlock_irqrestore(&era->lock, flags);
1196		1229
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1204	struct er_account *era;	1237	struct er_account *era;
1205		1238
1206	/*	1239	/*
1207	* only put constraint if extra reg was actually	1240	* Only put constraint if extra reg was actually allocated. Also takes
1208	* allocated. Also takes care of event which do	1241	* care of event which do not use an extra shared reg.
1209	* not use an extra shared reg	1242	*
		1243	* Also, if this is a fake cpuc we shouldn't touch any event state
		1244	* (reg->alloc) and we don't care about leaving inconsistent cpuc state
		1245	* either since it'll be thrown out.
1210	*/	1246	*/
1211	if (!reg->alloc)	1247	if (!reg->alloc \|\| cpuc->is_fake)
1212	return;	1248	return;
1213		1249
1214	era = &cpuc->shared_regs->regs[reg->idx];	1250	era = &cpuc->shared_regs->regs[reg->idx];