diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 136 |
1 files changed, 122 insertions, 14 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a9e22073bd56..fbc9210b45bc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/export.h> | 14 | #include <linux/export.h> |
15 | 15 | ||
16 | #include <asm/cpufeature.h> | ||
16 | #include <asm/hardirq.h> | 17 | #include <asm/hardirq.h> |
17 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
18 | 19 | ||
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = { | |||
190 | NULL, | 191 | NULL, |
191 | }; | 192 | }; |
192 | 193 | ||
194 | static struct event_constraint intel_hsw_event_constraints[] = { | ||
195 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
196 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
197 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ | ||
198 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */ | ||
199 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | ||
200 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | ||
201 | /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ | ||
202 | INTEL_EVENT_CONSTRAINT(0x08a3, 0x4), | ||
203 | /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ | ||
204 | INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4), | ||
205 | /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ | ||
206 | INTEL_EVENT_CONSTRAINT(0x04a3, 0xf), | ||
207 | EVENT_CONSTRAINT_END | ||
208 | }; | ||
209 | |||
193 | static u64 intel_pmu_event_map(int hw_event) | 210 | static u64 intel_pmu_event_map(int hw_event) |
194 | { | 211 | { |
195 | return intel_perfmon_event_map[hw_event]; | 212 | return intel_perfmon_event_map[hw_event]; |
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) | |||
872 | return true; | 889 | return true; |
873 | 890 | ||
874 | /* implicit branch sampling to correct PEBS skid */ | 891 | /* implicit branch sampling to correct PEBS skid */ |
875 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | 892 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 && |
893 | x86_pmu.intel_cap.pebs_format < 2) | ||
876 | return true; | 894 | return true; |
877 | 895 | ||
878 | return false; | 896 | return false; |
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1167 | cpuc = &__get_cpu_var(cpu_hw_events); | 1185 | cpuc = &__get_cpu_var(cpu_hw_events); |
1168 | 1186 | ||
1169 | /* | 1187 | /* |
1170 | * Some chipsets need to unmask the LVTPC in a particular spot | 1188 | * No known reason to not always do late ACK, |
1171 | * inside the nmi handler. As a result, the unmasking was pushed | 1189 | * but just in case do it opt-in. |
1172 | * into all the nmi handlers. | ||
1173 | * | ||
1174 | * This handler doesn't seem to have any issues with the unmasking | ||
1175 | * so it was left at the top. | ||
1176 | */ | 1190 | */ |
1177 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1191 | if (!x86_pmu.late_ack) |
1178 | 1192 | apic_write(APIC_LVTPC, APIC_DM_NMI); | |
1179 | intel_pmu_disable_all(); | 1193 | intel_pmu_disable_all(); |
1180 | handled = intel_pmu_drain_bts_buffer(); | 1194 | handled = intel_pmu_drain_bts_buffer(); |
1181 | status = intel_pmu_get_status(); | 1195 | status = intel_pmu_get_status(); |
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1188 | again: | 1202 | again: |
1189 | intel_pmu_ack_status(status); | 1203 | intel_pmu_ack_status(status); |
1190 | if (++loops > 100) { | 1204 | if (++loops > 100) { |
1191 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | 1205 | static bool warned = false; |
1192 | perf_event_print_debug(); | 1206 | if (!warned) { |
1207 | WARN(1, "perfevents: irq loop stuck!\n"); | ||
1208 | perf_event_print_debug(); | ||
1209 | warned = true; | ||
1210 | } | ||
1193 | intel_pmu_reset(); | 1211 | intel_pmu_reset(); |
1194 | goto done; | 1212 | goto done; |
1195 | } | 1213 | } |
@@ -1235,6 +1253,13 @@ again: | |||
1235 | 1253 | ||
1236 | done: | 1254 | done: |
1237 | intel_pmu_enable_all(0); | 1255 | intel_pmu_enable_all(0); |
1256 | /* | ||
1257 | * Only unmask the NMI after the overflow counters | ||
1258 | * have been reset. This avoids spurious NMIs on | ||
1259 | * Haswell CPUs. | ||
1260 | */ | ||
1261 | if (x86_pmu.late_ack) | ||
1262 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1238 | return handled; | 1263 | return handled; |
1239 | } | 1264 | } |
1240 | 1265 | ||
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
1425 | if (x86_pmu.event_constraints) { | 1450 | if (x86_pmu.event_constraints) { |
1426 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1451 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1427 | if ((event->hw.config & c->cmask) == c->code) { | 1452 | if ((event->hw.config & c->cmask) == c->code) { |
1428 | /* hw.flags zeroed at initialization */ | ||
1429 | event->hw.flags |= c->flags; | 1453 | event->hw.flags |= c->flags; |
1430 | return c; | 1454 | return c; |
1431 | } | 1455 | } |
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
1473 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1497 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
1474 | struct perf_event *event) | 1498 | struct perf_event *event) |
1475 | { | 1499 | { |
1476 | event->hw.flags = 0; | ||
1477 | intel_put_shared_regs_event_constraints(cpuc, event); | 1500 | intel_put_shared_regs_event_constraints(cpuc, event); |
1478 | } | 1501 | } |
1479 | 1502 | ||
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added) | |||
1646 | } | 1669 | } |
1647 | } | 1670 | } |
1648 | 1671 | ||
1672 | static int hsw_hw_config(struct perf_event *event) | ||
1673 | { | ||
1674 | int ret = intel_pmu_hw_config(event); | ||
1675 | |||
1676 | if (ret) | ||
1677 | return ret; | ||
1678 | if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) | ||
1679 | return 0; | ||
1680 | event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); | ||
1681 | |||
1682 | /* | ||
1683 | * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with | ||
1684 | * PEBS or in ANY thread mode. Since the results are non-sensical forbid | ||
1685 | * this combination. | ||
1686 | */ | ||
1687 | if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && | ||
1688 | ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || | ||
1689 | event->attr.precise_ip > 0)) | ||
1690 | return -EOPNOTSUPP; | ||
1691 | |||
1692 | return 0; | ||
1693 | } | ||
1694 | |||
1695 | static struct event_constraint counter2_constraint = | ||
1696 | EVENT_CONSTRAINT(0, 0x4, 0); | ||
1697 | |||
1698 | static struct event_constraint * | ||
1699 | hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
1700 | { | ||
1701 | struct event_constraint *c = intel_get_event_constraints(cpuc, event); | ||
1702 | |||
1703 | /* Handle special quirk on in_tx_checkpointed only in counter 2 */ | ||
1704 | if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { | ||
1705 | if (c->idxmsk64 & (1U << 2)) | ||
1706 | return &counter2_constraint; | ||
1707 | return &emptyconstraint; | ||
1708 | } | ||
1709 | |||
1710 | return c; | ||
1711 | } | ||
1712 | |||
1649 | PMU_FORMAT_ATTR(event, "config:0-7" ); | 1713 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
1650 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | 1714 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
1651 | PMU_FORMAT_ATTR(edge, "config:18" ); | 1715 | PMU_FORMAT_ATTR(edge, "config:18" ); |
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" ); | |||
1653 | PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ | 1717 | PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ |
1654 | PMU_FORMAT_ATTR(inv, "config:23" ); | 1718 | PMU_FORMAT_ATTR(inv, "config:23" ); |
1655 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); | 1719 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); |
1720 | PMU_FORMAT_ATTR(in_tx, "config:32"); | ||
1721 | PMU_FORMAT_ATTR(in_tx_cp, "config:33"); | ||
1656 | 1722 | ||
1657 | static struct attribute *intel_arch_formats_attr[] = { | 1723 | static struct attribute *intel_arch_formats_attr[] = { |
1658 | &format_attr_event.attr, | 1724 | &format_attr_event.attr, |
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = { | |||
1807 | &format_attr_any.attr, | 1873 | &format_attr_any.attr, |
1808 | &format_attr_inv.attr, | 1874 | &format_attr_inv.attr, |
1809 | &format_attr_cmask.attr, | 1875 | &format_attr_cmask.attr, |
1876 | &format_attr_in_tx.attr, | ||
1877 | &format_attr_in_tx_cp.attr, | ||
1810 | 1878 | ||
1811 | &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ | 1879 | &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ |
1812 | &format_attr_ldlat.attr, /* PEBS load latency */ | 1880 | &format_attr_ldlat.attr, /* PEBS load latency */ |
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void) | |||
1966 | } | 2034 | } |
1967 | } | 2035 | } |
1968 | 2036 | ||
2037 | EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); | ||
2038 | EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") | ||
2039 | |||
2040 | static struct attribute *hsw_events_attrs[] = { | ||
2041 | EVENT_PTR(mem_ld_hsw), | ||
2042 | EVENT_PTR(mem_st_hsw), | ||
2043 | NULL | ||
2044 | }; | ||
2045 | |||
1969 | __init int intel_pmu_init(void) | 2046 | __init int intel_pmu_init(void) |
1970 | { | 2047 | { |
1971 | union cpuid10_edx edx; | 2048 | union cpuid10_edx edx; |
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void) | |||
2189 | break; | 2266 | break; |
2190 | 2267 | ||
2191 | 2268 | ||
2269 | case 60: /* Haswell Client */ | ||
2270 | case 70: | ||
2271 | case 71: | ||
2272 | case 63: | ||
2273 | x86_pmu.late_ack = true; | ||
2274 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | ||
2275 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | ||
2276 | |||
2277 | intel_pmu_lbr_init_snb(); | ||
2278 | |||
2279 | x86_pmu.event_constraints = intel_hsw_event_constraints; | ||
2280 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; | ||
2281 | x86_pmu.extra_regs = intel_snb_extra_regs; | ||
2282 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; | ||
2283 | /* all extra regs are per-cpu when HT is on */ | ||
2284 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
2285 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | ||
2286 | |||
2287 | x86_pmu.hw_config = hsw_hw_config; | ||
2288 | x86_pmu.get_event_constraints = hsw_get_event_constraints; | ||
2289 | x86_pmu.cpu_events = hsw_events_attrs; | ||
2290 | pr_cont("Haswell events, "); | ||
2291 | break; | ||
2292 | |||
2192 | default: | 2293 | default: |
2193 | switch (x86_pmu.version) { | 2294 | switch (x86_pmu.version) { |
2194 | case 1: | 2295 | case 1: |
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void) | |||
2227 | * counter, so do not extend mask to generic counters | 2328 | * counter, so do not extend mask to generic counters |
2228 | */ | 2329 | */ |
2229 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 2330 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
2230 | if (c->cmask != X86_RAW_EVENT_MASK | 2331 | if (c->cmask != FIXED_EVENT_FLAGS |
2231 | || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { | 2332 | || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { |
2232 | continue; | 2333 | continue; |
2233 | } | 2334 | } |
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void) | |||
2237 | } | 2338 | } |
2238 | } | 2339 | } |
2239 | 2340 | ||
2341 | /* Support full width counters using alternative MSR range */ | ||
2342 | if (x86_pmu.intel_cap.full_width_write) { | ||
2343 | x86_pmu.max_period = x86_pmu.cntval_mask; | ||
2344 | x86_pmu.perfctr = MSR_IA32_PMC0; | ||
2345 | pr_cont("full-width counters, "); | ||
2346 | } | ||
2347 | |||
2240 | return 0; | 2348 | return 0; |
2241 | } | 2349 | } |