diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 145 |
1 files changed, 108 insertions, 37 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6aef..187c294bc658 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event) | |||
1119 | return NULL; | 1119 | return NULL; |
1120 | } | 1120 | } |
1121 | 1121 | ||
1122 | static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | 1122 | static int intel_alt_er(int idx) |
1123 | { | 1123 | { |
1124 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) | 1124 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) |
1125 | return false; | 1125 | return idx; |
1126 | 1126 | ||
1127 | if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { | 1127 | if (idx == EXTRA_REG_RSP_0) |
1128 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1128 | return EXTRA_REG_RSP_1; |
1129 | event->hw.config |= 0x01bb; | 1129 | |
1130 | event->hw.extra_reg.idx = EXTRA_REG_RSP_1; | 1130 | if (idx == EXTRA_REG_RSP_1) |
1131 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | 1131 | return EXTRA_REG_RSP_0; |
1132 | } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { | 1132 | |
1133 | return idx; | ||
1134 | } | ||
1135 | |||
1136 | static void intel_fixup_er(struct perf_event *event, int idx) | ||
1137 | { | ||
1138 | event->hw.extra_reg.idx = idx; | ||
1139 | |||
1140 | if (idx == EXTRA_REG_RSP_0) { | ||
1133 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1141 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
1134 | event->hw.config |= 0x01b7; | 1142 | event->hw.config |= 0x01b7; |
1135 | event->hw.extra_reg.idx = EXTRA_REG_RSP_0; | ||
1136 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; | 1143 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; |
1144 | } else if (idx == EXTRA_REG_RSP_1) { | ||
1145 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | ||
1146 | event->hw.config |= 0x01bb; | ||
1147 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | ||
1137 | } | 1148 | } |
1138 | |||
1139 | if (event->hw.extra_reg.idx == orig_idx) | ||
1140 | return false; | ||
1141 | |||
1142 | return true; | ||
1143 | } | 1149 | } |
1144 | 1150 | ||
1145 | /* | 1151 | /* |
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | |||
1157 | struct event_constraint *c = &emptyconstraint; | 1163 | struct event_constraint *c = &emptyconstraint; |
1158 | struct er_account *era; | 1164 | struct er_account *era; |
1159 | unsigned long flags; | 1165 | unsigned long flags; |
1160 | int orig_idx = reg->idx; | 1166 | int idx = reg->idx; |
1161 | 1167 | ||
1162 | /* already allocated shared msr */ | 1168 | /* |
1163 | if (reg->alloc) | 1169 | * reg->alloc can be set due to existing state, so for fake cpuc we |
1170 | * need to ignore this, otherwise we might fail to allocate proper fake | ||
1171 | * state for this extra reg constraint. Also see the comment below. | ||
1172 | */ | ||
1173 | if (reg->alloc && !cpuc->is_fake) | ||
1164 | return NULL; /* call x86_get_event_constraint() */ | 1174 | return NULL; /* call x86_get_event_constraint() */ |
1165 | 1175 | ||
1166 | again: | 1176 | again: |
1167 | era = &cpuc->shared_regs->regs[reg->idx]; | 1177 | era = &cpuc->shared_regs->regs[idx]; |
1168 | /* | 1178 | /* |
1169 | * we use spin_lock_irqsave() to avoid lockdep issues when | 1179 | * we use spin_lock_irqsave() to avoid lockdep issues when |
1170 | * passing a fake cpuc | 1180 | * passing a fake cpuc |
@@ -1173,6 +1183,29 @@ again: | |||
1173 | 1183 | ||
1174 | if (!atomic_read(&era->ref) || era->config == reg->config) { | 1184 | if (!atomic_read(&era->ref) || era->config == reg->config) { |
1175 | 1185 | ||
1186 | /* | ||
1187 | * If its a fake cpuc -- as per validate_{group,event}() we | ||
1188 | * shouldn't touch event state and we can avoid doing so | ||
1189 | * since both will only call get_event_constraints() once | ||
1190 | * on each event, this avoids the need for reg->alloc. | ||
1191 | * | ||
1192 | * Not doing the ER fixup will only result in era->reg being | ||
1193 | * wrong, but since we won't actually try and program hardware | ||
1194 | * this isn't a problem either. | ||
1195 | */ | ||
1196 | if (!cpuc->is_fake) { | ||
1197 | if (idx != reg->idx) | ||
1198 | intel_fixup_er(event, idx); | ||
1199 | |||
1200 | /* | ||
1201 | * x86_schedule_events() can call get_event_constraints() | ||
1202 | * multiple times on events in the case of incremental | ||
1203 | * scheduling(). reg->alloc ensures we only do the ER | ||
1204 | * allocation once. | ||
1205 | */ | ||
1206 | reg->alloc = 1; | ||
1207 | } | ||
1208 | |||
1176 | /* lock in msr value */ | 1209 | /* lock in msr value */ |
1177 | era->config = reg->config; | 1210 | era->config = reg->config; |
1178 | era->reg = reg->reg; | 1211 | era->reg = reg->reg; |
@@ -1180,17 +1213,17 @@ again: | |||
1180 | /* one more user */ | 1213 | /* one more user */ |
1181 | atomic_inc(&era->ref); | 1214 | atomic_inc(&era->ref); |
1182 | 1215 | ||
1183 | /* no need to reallocate during incremental event scheduling */ | ||
1184 | reg->alloc = 1; | ||
1185 | |||
1186 | /* | 1216 | /* |
1187 | * need to call x86_get_event_constraint() | 1217 | * need to call x86_get_event_constraint() |
1188 | * to check if associated event has constraints | 1218 | * to check if associated event has constraints |
1189 | */ | 1219 | */ |
1190 | c = NULL; | 1220 | c = NULL; |
1191 | } else if (intel_try_alt_er(event, orig_idx)) { | 1221 | } else { |
1192 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1222 | idx = intel_alt_er(idx); |
1193 | goto again; | 1223 | if (idx != reg->idx) { |
1224 | raw_spin_unlock_irqrestore(&era->lock, flags); | ||
1225 | goto again; | ||
1226 | } | ||
1194 | } | 1227 | } |
1195 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1228 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1196 | 1229 | ||
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, | |||
1204 | struct er_account *era; | 1237 | struct er_account *era; |
1205 | 1238 | ||
1206 | /* | 1239 | /* |
1207 | * only put constraint if extra reg was actually | 1240 | * Only put constraint if extra reg was actually allocated. Also takes |
1208 | * allocated. Also takes care of event which do | 1241 | * care of event which do not use an extra shared reg. |
1209 | * not use an extra shared reg | 1242 | * |
1243 | * Also, if this is a fake cpuc we shouldn't touch any event state | ||
1244 | * (reg->alloc) and we don't care about leaving inconsistent cpuc state | ||
1245 | * either since it'll be thrown out. | ||
1210 | */ | 1246 | */ |
1211 | if (!reg->alloc) | 1247 | if (!reg->alloc || cpuc->is_fake) |
1212 | return; | 1248 | return; |
1213 | 1249 | ||
1214 | era = &cpuc->shared_regs->regs[reg->idx]; | 1250 | era = &cpuc->shared_regs->regs[reg->idx]; |
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | |||
1300 | intel_put_shared_regs_event_constraints(cpuc, event); | 1336 | intel_put_shared_regs_event_constraints(cpuc, event); |
1301 | } | 1337 | } |
1302 | 1338 | ||
1303 | static int intel_pmu_hw_config(struct perf_event *event) | 1339 | static void intel_pebs_aliases_core2(struct perf_event *event) |
1304 | { | 1340 | { |
1305 | int ret = x86_pmu_hw_config(event); | 1341 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { |
1306 | |||
1307 | if (ret) | ||
1308 | return ret; | ||
1309 | |||
1310 | if (event->attr.precise_ip && | ||
1311 | (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
1312 | /* | 1342 | /* |
1313 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | 1343 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P |
1314 | * (0x003c) so that we can use it with PEBS. | 1344 | * (0x003c) so that we can use it with PEBS. |
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
1329 | */ | 1359 | */ |
1330 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); | 1360 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); |
1331 | 1361 | ||
1362 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | ||
1363 | event->hw.config = alt_config; | ||
1364 | } | ||
1365 | } | ||
1366 | |||
1367 | static void intel_pebs_aliases_snb(struct perf_event *event) | ||
1368 | { | ||
1369 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
1370 | /* | ||
1371 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | ||
1372 | * (0x003c) so that we can use it with PEBS. | ||
1373 | * | ||
1374 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't | ||
1375 | * PEBS capable. However we can use UOPS_RETIRED.ALL | ||
1376 | * (0x01c2), which is a PEBS capable event, to get the same | ||
1377 | * count. | ||
1378 | * | ||
1379 | * UOPS_RETIRED.ALL counts the number of cycles that retires | ||
1380 | * CNTMASK micro-ops. By setting CNTMASK to a value (16) | ||
1381 | * larger than the maximum number of micro-ops that can be | ||
1382 | * retired per cycle (4) and then inverting the condition, we | ||
1383 | * count all cycles that retire 16 or less micro-ops, which | ||
1384 | * is every cycle. | ||
1385 | * | ||
1386 | * Thereby we gain a PEBS capable cycle counter. | ||
1387 | */ | ||
1388 | u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); | ||
1332 | 1389 | ||
1333 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | 1390 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
1334 | event->hw.config = alt_config; | 1391 | event->hw.config = alt_config; |
1335 | } | 1392 | } |
1393 | } | ||
1394 | |||
1395 | static int intel_pmu_hw_config(struct perf_event *event) | ||
1396 | { | ||
1397 | int ret = x86_pmu_hw_config(event); | ||
1398 | |||
1399 | if (ret) | ||
1400 | return ret; | ||
1401 | |||
1402 | if (event->attr.precise_ip && x86_pmu.pebs_aliases) | ||
1403 | x86_pmu.pebs_aliases(event); | ||
1336 | 1404 | ||
1337 | if (intel_pmu_needs_lbr_smpl(event)) { | 1405 | if (intel_pmu_needs_lbr_smpl(event)) { |
1338 | ret = intel_pmu_setup_lbr_filter(event); | 1406 | ret = intel_pmu_setup_lbr_filter(event); |
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1607 | .max_period = (1ULL << 31) - 1, | 1675 | .max_period = (1ULL << 31) - 1, |
1608 | .get_event_constraints = intel_get_event_constraints, | 1676 | .get_event_constraints = intel_get_event_constraints, |
1609 | .put_event_constraints = intel_put_event_constraints, | 1677 | .put_event_constraints = intel_put_event_constraints, |
1678 | .pebs_aliases = intel_pebs_aliases_core2, | ||
1610 | 1679 | ||
1611 | .format_attrs = intel_arch3_formats_attr, | 1680 | .format_attrs = intel_arch3_formats_attr, |
1612 | 1681 | ||
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void) | |||
1840 | break; | 1909 | break; |
1841 | 1910 | ||
1842 | case 42: /* SandyBridge */ | 1911 | case 42: /* SandyBridge */ |
1843 | x86_add_quirk(intel_sandybridge_quirk); | ||
1844 | case 45: /* SandyBridge, "Romely-EP" */ | 1912 | case 45: /* SandyBridge, "Romely-EP" */ |
1913 | x86_add_quirk(intel_sandybridge_quirk); | ||
1914 | case 58: /* IvyBridge */ | ||
1845 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1915 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1846 | sizeof(hw_cache_event_ids)); | 1916 | sizeof(hw_cache_event_ids)); |
1847 | 1917 | ||
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void) | |||
1849 | 1919 | ||
1850 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1920 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1851 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; | 1921 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
1922 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; | ||
1852 | x86_pmu.extra_regs = intel_snb_extra_regs; | 1923 | x86_pmu.extra_regs = intel_snb_extra_regs; |
1853 | /* all extra regs are per-cpu when HT is on */ | 1924 | /* all extra regs are per-cpu when HT is on */ |
1854 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 1925 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |