aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c145
1 files changed, 108 insertions, 37 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..187c294bc658 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
1119 return NULL; 1119 return NULL;
1120} 1120}
1121 1121
1122static bool intel_try_alt_er(struct perf_event *event, int orig_idx) 1122static int intel_alt_er(int idx)
1123{ 1123{
1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) 1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1125 return false; 1125 return idx;
1126 1126
1127 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { 1127 if (idx == EXTRA_REG_RSP_0)
1128 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1128 return EXTRA_REG_RSP_1;
1129 event->hw.config |= 0x01bb; 1129
1130 event->hw.extra_reg.idx = EXTRA_REG_RSP_1; 1130 if (idx == EXTRA_REG_RSP_1)
1131 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 1131 return EXTRA_REG_RSP_0;
1132 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { 1132
1133 return idx;
1134}
1135
1136static void intel_fixup_er(struct perf_event *event, int idx)
1137{
1138 event->hw.extra_reg.idx = idx;
1139
1140 if (idx == EXTRA_REG_RSP_0) {
1133 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1141 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1134 event->hw.config |= 0x01b7; 1142 event->hw.config |= 0x01b7;
1135 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1136 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 1143 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1144 } else if (idx == EXTRA_REG_RSP_1) {
1145 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1146 event->hw.config |= 0x01bb;
1147 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1137 } 1148 }
1138
1139 if (event->hw.extra_reg.idx == orig_idx)
1140 return false;
1141
1142 return true;
1143} 1149}
1144 1150
1145/* 1151/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1157 struct event_constraint *c = &emptyconstraint; 1163 struct event_constraint *c = &emptyconstraint;
1158 struct er_account *era; 1164 struct er_account *era;
1159 unsigned long flags; 1165 unsigned long flags;
1160 int orig_idx = reg->idx; 1166 int idx = reg->idx;
1161 1167
1162 /* already allocated shared msr */ 1168 /*
1163 if (reg->alloc) 1169 * reg->alloc can be set due to existing state, so for fake cpuc we
1170 * need to ignore this, otherwise we might fail to allocate proper fake
1171 * state for this extra reg constraint. Also see the comment below.
1172 */
1173 if (reg->alloc && !cpuc->is_fake)
1164 return NULL; /* call x86_get_event_constraint() */ 1174 return NULL; /* call x86_get_event_constraint() */
1165 1175
1166again: 1176again:
1167 era = &cpuc->shared_regs->regs[reg->idx]; 1177 era = &cpuc->shared_regs->regs[idx];
1168 /* 1178 /*
1169 * we use spin_lock_irqsave() to avoid lockdep issues when 1179 * we use spin_lock_irqsave() to avoid lockdep issues when
1170 * passing a fake cpuc 1180 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
1173 1183
1174 if (!atomic_read(&era->ref) || era->config == reg->config) { 1184 if (!atomic_read(&era->ref) || era->config == reg->config) {
1175 1185
1186 /*
1187 * If its a fake cpuc -- as per validate_{group,event}() we
1188 * shouldn't touch event state and we can avoid doing so
1189 * since both will only call get_event_constraints() once
1190 * on each event, this avoids the need for reg->alloc.
1191 *
1192 * Not doing the ER fixup will only result in era->reg being
1193 * wrong, but since we won't actually try and program hardware
1194 * this isn't a problem either.
1195 */
1196 if (!cpuc->is_fake) {
1197 if (idx != reg->idx)
1198 intel_fixup_er(event, idx);
1199
1200 /*
1201 * x86_schedule_events() can call get_event_constraints()
1202 * multiple times on events in the case of incremental
1203 * scheduling(). reg->alloc ensures we only do the ER
1204 * allocation once.
1205 */
1206 reg->alloc = 1;
1207 }
1208
1176 /* lock in msr value */ 1209 /* lock in msr value */
1177 era->config = reg->config; 1210 era->config = reg->config;
1178 era->reg = reg->reg; 1211 era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
1180 /* one more user */ 1213 /* one more user */
1181 atomic_inc(&era->ref); 1214 atomic_inc(&era->ref);
1182 1215
1183 /* no need to reallocate during incremental event scheduling */
1184 reg->alloc = 1;
1185
1186 /* 1216 /*
1187 * need to call x86_get_event_constraint() 1217 * need to call x86_get_event_constraint()
1188 * to check if associated event has constraints 1218 * to check if associated event has constraints
1189 */ 1219 */
1190 c = NULL; 1220 c = NULL;
1191 } else if (intel_try_alt_er(event, orig_idx)) { 1221 } else {
1192 raw_spin_unlock_irqrestore(&era->lock, flags); 1222 idx = intel_alt_er(idx);
1193 goto again; 1223 if (idx != reg->idx) {
1224 raw_spin_unlock_irqrestore(&era->lock, flags);
1225 goto again;
1226 }
1194 } 1227 }
1195 raw_spin_unlock_irqrestore(&era->lock, flags); 1228 raw_spin_unlock_irqrestore(&era->lock, flags);
1196 1229
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1204 struct er_account *era; 1237 struct er_account *era;
1205 1238
1206 /* 1239 /*
1207 * only put constraint if extra reg was actually 1240 * Only put constraint if extra reg was actually allocated. Also takes
1208 * allocated. Also takes care of event which do 1241 * care of event which do not use an extra shared reg.
1209 * not use an extra shared reg 1242 *
1243 * Also, if this is a fake cpuc we shouldn't touch any event state
1244 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
1245 * either since it'll be thrown out.
1210 */ 1246 */
1211 if (!reg->alloc) 1247 if (!reg->alloc || cpuc->is_fake)
1212 return; 1248 return;
1213 1249
1214 era = &cpuc->shared_regs->regs[reg->idx]; 1250 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1300 intel_put_shared_regs_event_constraints(cpuc, event); 1336 intel_put_shared_regs_event_constraints(cpuc, event);
1301} 1337}
1302 1338
1303static int intel_pmu_hw_config(struct perf_event *event) 1339static void intel_pebs_aliases_core2(struct perf_event *event)
1304{ 1340{
1305 int ret = x86_pmu_hw_config(event); 1341 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1306
1307 if (ret)
1308 return ret;
1309
1310 if (event->attr.precise_ip &&
1311 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1312 /* 1342 /*
1313 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 1343 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1314 * (0x003c) so that we can use it with PEBS. 1344 * (0x003c) so that we can use it with PEBS.
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
1329 */ 1359 */
1330 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 1360 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1331 1361
1362 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1363 event->hw.config = alt_config;
1364 }
1365}
1366
1367static void intel_pebs_aliases_snb(struct perf_event *event)
1368{
1369 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1370 /*
1371 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1372 * (0x003c) so that we can use it with PEBS.
1373 *
1374 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1375 * PEBS capable. However we can use UOPS_RETIRED.ALL
1376 * (0x01c2), which is a PEBS capable event, to get the same
1377 * count.
1378 *
1379 * UOPS_RETIRED.ALL counts the number of cycles that retires
1380 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
1381 * larger than the maximum number of micro-ops that can be
1382 * retired per cycle (4) and then inverting the condition, we
1383 * count all cycles that retire 16 or less micro-ops, which
1384 * is every cycle.
1385 *
1386 * Thereby we gain a PEBS capable cycle counter.
1387 */
1388 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
1332 1389
1333 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1390 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1334 event->hw.config = alt_config; 1391 event->hw.config = alt_config;
1335 } 1392 }
1393}
1394
1395static int intel_pmu_hw_config(struct perf_event *event)
1396{
1397 int ret = x86_pmu_hw_config(event);
1398
1399 if (ret)
1400 return ret;
1401
1402 if (event->attr.precise_ip && x86_pmu.pebs_aliases)
1403 x86_pmu.pebs_aliases(event);
1336 1404
1337 if (intel_pmu_needs_lbr_smpl(event)) { 1405 if (intel_pmu_needs_lbr_smpl(event)) {
1338 ret = intel_pmu_setup_lbr_filter(event); 1406 ret = intel_pmu_setup_lbr_filter(event);
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1607 .max_period = (1ULL << 31) - 1, 1675 .max_period = (1ULL << 31) - 1,
1608 .get_event_constraints = intel_get_event_constraints, 1676 .get_event_constraints = intel_get_event_constraints,
1609 .put_event_constraints = intel_put_event_constraints, 1677 .put_event_constraints = intel_put_event_constraints,
1678 .pebs_aliases = intel_pebs_aliases_core2,
1610 1679
1611 .format_attrs = intel_arch3_formats_attr, 1680 .format_attrs = intel_arch3_formats_attr,
1612 1681
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void)
1840 break; 1909 break;
1841 1910
1842 case 42: /* SandyBridge */ 1911 case 42: /* SandyBridge */
1843 x86_add_quirk(intel_sandybridge_quirk);
1844 case 45: /* SandyBridge, "Romely-EP" */ 1912 case 45: /* SandyBridge, "Romely-EP" */
1913 x86_add_quirk(intel_sandybridge_quirk);
1914 case 58: /* IvyBridge */
1845 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1915 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1846 sizeof(hw_cache_event_ids)); 1916 sizeof(hw_cache_event_ids));
1847 1917
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void)
1849 1919
1850 x86_pmu.event_constraints = intel_snb_event_constraints; 1920 x86_pmu.event_constraints = intel_snb_event_constraints;
1851 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1921 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1922 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
1852 x86_pmu.extra_regs = intel_snb_extra_regs; 1923 x86_pmu.extra_regs = intel_snb_extra_regs;
1853 /* all extra regs are per-cpu when HT is on */ 1924 /* all extra regs are per-cpu when HT is on */
1854 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1925 x86_pmu.er_flags |= ERF_HAS_RSP_1;