diff options
| author | Peter Zijlstra <peterz@infradead.org> | 2012-06-05 09:30:31 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-06-06 10:59:44 -0400 |
| commit | b430f7c4706aeba4270c7ab7744fc504b9315e1c (patch) | |
| tree | ca92a63aa1499981c30fccb63db1270596af2aad | |
| parent | 436d03faf6961b30e13b2d0967aea9d772d6cf44 (diff) | |
perf/x86: Fix Intel shared extra MSR allocation
Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.
Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.
The code is restructured to minimize the special case impact.
Reported-by: Zheng Yan <zheng.z.yan@linux.intel.com>
Acked-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 92 |
3 files changed, 66 insertions, 28 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da0183..cb608383e4f6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) | |||
| 1496 | if (!cpuc->shared_regs) | 1496 | if (!cpuc->shared_regs) |
| 1497 | goto error; | 1497 | goto error; |
| 1498 | } | 1498 | } |
| 1499 | cpuc->is_fake = 1; | ||
| 1499 | return cpuc; | 1500 | return cpuc; |
| 1500 | error: | 1501 | error: |
| 1501 | free_fake_cpuc(cpuc); | 1502 | free_fake_cpuc(cpuc); |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf54493..83794d8e6af0 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
| @@ -117,6 +117,7 @@ struct cpu_hw_events { | |||
| 117 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 117 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
| 118 | 118 | ||
| 119 | unsigned int group_flag; | 119 | unsigned int group_flag; |
| 120 | int is_fake; | ||
| 120 | 121 | ||
| 121 | /* | 122 | /* |
| 122 | * Intel DebugStore bits | 123 | * Intel DebugStore bits |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6aef..965baa2fa790 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event) | |||
| 1119 | return NULL; | 1119 | return NULL; |
| 1120 | } | 1120 | } |
| 1121 | 1121 | ||
| 1122 | static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | 1122 | static int intel_alt_er(int idx) |
| 1123 | { | 1123 | { |
| 1124 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) | 1124 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) |
| 1125 | return false; | 1125 | return idx; |
| 1126 | 1126 | ||
| 1127 | if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { | 1127 | if (idx == EXTRA_REG_RSP_0) |
| 1128 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1128 | return EXTRA_REG_RSP_1; |
| 1129 | event->hw.config |= 0x01bb; | 1129 | |
| 1130 | event->hw.extra_reg.idx = EXTRA_REG_RSP_1; | 1130 | if (idx == EXTRA_REG_RSP_1) |
| 1131 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | 1131 | return EXTRA_REG_RSP_0; |
| 1132 | } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { | 1132 | |
| 1133 | return idx; | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | static void intel_fixup_er(struct perf_event *event, int idx) | ||
| 1137 | { | ||
| 1138 | event->hw.extra_reg.idx = idx; | ||
| 1139 | |||
| 1140 | if (idx == EXTRA_REG_RSP_0) { | ||
| 1133 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1141 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
| 1134 | event->hw.config |= 0x01b7; | 1142 | event->hw.config |= 0x01b7; |
| 1135 | event->hw.extra_reg.idx = EXTRA_REG_RSP_0; | ||
| 1136 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; | 1143 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; |
| 1144 | } else if (idx == EXTRA_REG_RSP_1) { | ||
| 1145 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | ||
| 1146 | event->hw.config |= 0x01bb; | ||
| 1147 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | ||
| 1137 | } | 1148 | } |
| 1138 | |||
| 1139 | if (event->hw.extra_reg.idx == orig_idx) | ||
| 1140 | return false; | ||
| 1141 | |||
| 1142 | return true; | ||
| 1143 | } | 1149 | } |
| 1144 | 1150 | ||
| 1145 | /* | 1151 | /* |
| @@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | |||
| 1157 | struct event_constraint *c = &emptyconstraint; | 1163 | struct event_constraint *c = &emptyconstraint; |
| 1158 | struct er_account *era; | 1164 | struct er_account *era; |
| 1159 | unsigned long flags; | 1165 | unsigned long flags; |
| 1160 | int orig_idx = reg->idx; | 1166 | int idx = reg->idx; |
| 1161 | 1167 | ||
| 1162 | /* already allocated shared msr */ | 1168 | /* |
| 1163 | if (reg->alloc) | 1169 | * reg->alloc can be set due to existing state, so for fake cpuc we |
| 1170 | * need to ignore this, otherwise we might fail to allocate proper fake | ||
| 1171 | * state for this extra reg constraint. Also see the comment below. | ||
| 1172 | */ | ||
| 1173 | if (reg->alloc && !cpuc->is_fake) | ||
| 1164 | return NULL; /* call x86_get_event_constraint() */ | 1174 | return NULL; /* call x86_get_event_constraint() */ |
| 1165 | 1175 | ||
| 1166 | again: | 1176 | again: |
| 1167 | era = &cpuc->shared_regs->regs[reg->idx]; | 1177 | era = &cpuc->shared_regs->regs[idx]; |
| 1168 | /* | 1178 | /* |
| 1169 | * we use spin_lock_irqsave() to avoid lockdep issues when | 1179 | * we use spin_lock_irqsave() to avoid lockdep issues when |
| 1170 | * passing a fake cpuc | 1180 | * passing a fake cpuc |
| @@ -1173,6 +1183,29 @@ again: | |||
| 1173 | 1183 | ||
| 1174 | if (!atomic_read(&era->ref) || era->config == reg->config) { | 1184 | if (!atomic_read(&era->ref) || era->config == reg->config) { |
| 1175 | 1185 | ||
| 1186 | /* | ||
| 1187 | * If its a fake cpuc -- as per validate_{group,event}() we | ||
| 1188 | * shouldn't touch event state and we can avoid doing so | ||
| 1189 | * since both will only call get_event_constraints() once | ||
| 1190 | * on each event, this avoids the need for reg->alloc. | ||
| 1191 | * | ||
| 1192 | * Not doing the ER fixup will only result in era->reg being | ||
| 1193 | * wrong, but since we won't actually try and program hardware | ||
| 1194 | * this isn't a problem either. | ||
| 1195 | */ | ||
| 1196 | if (!cpuc->is_fake) { | ||
| 1197 | if (idx != reg->idx) | ||
| 1198 | intel_fixup_er(event, idx); | ||
| 1199 | |||
| 1200 | /* | ||
| 1201 | * x86_schedule_events() can call get_event_constraints() | ||
| 1202 | * multiple times on events in the case of incremental | ||
| 1203 | * scheduling(). reg->alloc ensures we only do the ER | ||
| 1204 | * allocation once. | ||
| 1205 | */ | ||
| 1206 | reg->alloc = 1; | ||
| 1207 | } | ||
| 1208 | |||
| 1176 | /* lock in msr value */ | 1209 | /* lock in msr value */ |
| 1177 | era->config = reg->config; | 1210 | era->config = reg->config; |
| 1178 | era->reg = reg->reg; | 1211 | era->reg = reg->reg; |
| @@ -1180,17 +1213,17 @@ again: | |||
| 1180 | /* one more user */ | 1213 | /* one more user */ |
| 1181 | atomic_inc(&era->ref); | 1214 | atomic_inc(&era->ref); |
| 1182 | 1215 | ||
| 1183 | /* no need to reallocate during incremental event scheduling */ | ||
| 1184 | reg->alloc = 1; | ||
| 1185 | |||
| 1186 | /* | 1216 | /* |
| 1187 | * need to call x86_get_event_constraint() | 1217 | * need to call x86_get_event_constraint() |
| 1188 | * to check if associated event has constraints | 1218 | * to check if associated event has constraints |
| 1189 | */ | 1219 | */ |
| 1190 | c = NULL; | 1220 | c = NULL; |
| 1191 | } else if (intel_try_alt_er(event, orig_idx)) { | 1221 | } else { |
| 1192 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1222 | idx = intel_alt_er(idx); |
| 1193 | goto again; | 1223 | if (idx != reg->idx) { |
| 1224 | raw_spin_unlock_irqrestore(&era->lock, flags); | ||
| 1225 | goto again; | ||
| 1226 | } | ||
| 1194 | } | 1227 | } |
| 1195 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1228 | raw_spin_unlock_irqrestore(&era->lock, flags); |
| 1196 | 1229 | ||
| @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, | |||
| 1204 | struct er_account *era; | 1237 | struct er_account *era; |
| 1205 | 1238 | ||
| 1206 | /* | 1239 | /* |
| 1207 | * only put constraint if extra reg was actually | 1240 | * Only put constraint if extra reg was actually allocated. Also takes |
| 1208 | * allocated. Also takes care of event which do | 1241 | * care of event which do not use an extra shared reg. |
| 1209 | * not use an extra shared reg | 1242 | * |
| 1243 | * Also, if this is a fake cpuc we shouldn't touch any event state | ||
| 1244 | * (reg->alloc) and we don't care about leaving inconsistent cpuc state | ||
| 1245 | * either since it'll be thrown out. | ||
| 1210 | */ | 1246 | */ |
| 1211 | if (!reg->alloc) | 1247 | if (!reg->alloc || cpuc->is_fake) |
| 1212 | return; | 1248 | return; |
| 1213 | 1249 | ||
| 1214 | era = &cpuc->shared_regs->regs[reg->idx]; | 1250 | era = &cpuc->shared_regs->regs[reg->idx]; |
