aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c136
1 files changed, 122 insertions, 14 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a9e22073bd56..fbc9210b45bc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/export.h> 14#include <linux/export.h>
15 15
16#include <asm/cpufeature.h>
16#include <asm/hardirq.h> 17#include <asm/hardirq.h>
17#include <asm/apic.h> 18#include <asm/apic.h>
18 19
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
190 NULL, 191 NULL,
191}; 192};
192 193
194static struct event_constraint intel_hsw_event_constraints[] = {
195 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
196 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
197 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
198 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
199 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
200 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
201 /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
202 INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
203 /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
204 INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
205 /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
206 INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
207 EVENT_CONSTRAINT_END
208};
209
193static u64 intel_pmu_event_map(int hw_event) 210static u64 intel_pmu_event_map(int hw_event)
194{ 211{
195 return intel_perfmon_event_map[hw_event]; 212 return intel_perfmon_event_map[hw_event];
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
872 return true; 889 return true;
873 890
874 /* implicit branch sampling to correct PEBS skid */ 891 /* implicit branch sampling to correct PEBS skid */
875 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) 892 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
893 x86_pmu.intel_cap.pebs_format < 2)
876 return true; 894 return true;
877 895
878 return false; 896 return false;
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1167 cpuc = &__get_cpu_var(cpu_hw_events); 1185 cpuc = &__get_cpu_var(cpu_hw_events);
1168 1186
1169 /* 1187 /*
1170 * Some chipsets need to unmask the LVTPC in a particular spot 1188 * No known reason to not always do late ACK,
1171 * inside the nmi handler. As a result, the unmasking was pushed 1189 * but just in case do it opt-in.
1172 * into all the nmi handlers.
1173 *
1174 * This handler doesn't seem to have any issues with the unmasking
1175 * so it was left at the top.
1176 */ 1190 */
1177 apic_write(APIC_LVTPC, APIC_DM_NMI); 1191 if (!x86_pmu.late_ack)
1178 1192 apic_write(APIC_LVTPC, APIC_DM_NMI);
1179 intel_pmu_disable_all(); 1193 intel_pmu_disable_all();
1180 handled = intel_pmu_drain_bts_buffer(); 1194 handled = intel_pmu_drain_bts_buffer();
1181 status = intel_pmu_get_status(); 1195 status = intel_pmu_get_status();
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1188again: 1202again:
1189 intel_pmu_ack_status(status); 1203 intel_pmu_ack_status(status);
1190 if (++loops > 100) { 1204 if (++loops > 100) {
1191 WARN_ONCE(1, "perfevents: irq loop stuck!\n"); 1205 static bool warned = false;
1192 perf_event_print_debug(); 1206 if (!warned) {
1207 WARN(1, "perfevents: irq loop stuck!\n");
1208 perf_event_print_debug();
1209 warned = true;
1210 }
1193 intel_pmu_reset(); 1211 intel_pmu_reset();
1194 goto done; 1212 goto done;
1195 } 1213 }
@@ -1235,6 +1253,13 @@ again:
1235 1253
1236done: 1254done:
1237 intel_pmu_enable_all(0); 1255 intel_pmu_enable_all(0);
1256 /*
1257 * Only unmask the NMI after the overflow counters
1258 * have been reset. This avoids spurious NMIs on
1259 * Haswell CPUs.
1260 */
1261 if (x86_pmu.late_ack)
1262 apic_write(APIC_LVTPC, APIC_DM_NMI);
1238 return handled; 1263 return handled;
1239} 1264}
1240 1265
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1425 if (x86_pmu.event_constraints) { 1450 if (x86_pmu.event_constraints) {
1426 for_each_event_constraint(c, x86_pmu.event_constraints) { 1451 for_each_event_constraint(c, x86_pmu.event_constraints) {
1427 if ((event->hw.config & c->cmask) == c->code) { 1452 if ((event->hw.config & c->cmask) == c->code) {
1428 /* hw.flags zeroed at initialization */
1429 event->hw.flags |= c->flags; 1453 event->hw.flags |= c->flags;
1430 return c; 1454 return c;
1431 } 1455 }
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1473static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1497static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1474 struct perf_event *event) 1498 struct perf_event *event)
1475{ 1499{
1476 event->hw.flags = 0;
1477 intel_put_shared_regs_event_constraints(cpuc, event); 1500 intel_put_shared_regs_event_constraints(cpuc, event);
1478} 1501}
1479 1502
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added)
1646 } 1669 }
1647} 1670}
1648 1671
1672static int hsw_hw_config(struct perf_event *event)
1673{
1674 int ret = intel_pmu_hw_config(event);
1675
1676 if (ret)
1677 return ret;
1678 if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
1679 return 0;
1680 event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
1681
1682 /*
1683 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
1684 * PEBS or in ANY thread mode. Since the results are non-sensical forbid
1685 * this combination.
1686 */
1687 if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
1688 ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
1689 event->attr.precise_ip > 0))
1690 return -EOPNOTSUPP;
1691
1692 return 0;
1693}
1694
1695static struct event_constraint counter2_constraint =
1696 EVENT_CONSTRAINT(0, 0x4, 0);
1697
1698static struct event_constraint *
1699hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1700{
1701 struct event_constraint *c = intel_get_event_constraints(cpuc, event);
1702
1703 /* Handle special quirk on in_tx_checkpointed only in counter 2 */
1704 if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
1705 if (c->idxmsk64 & (1U << 2))
1706 return &counter2_constraint;
1707 return &emptyconstraint;
1708 }
1709
1710 return c;
1711}
1712
1649PMU_FORMAT_ATTR(event, "config:0-7" ); 1713PMU_FORMAT_ATTR(event, "config:0-7" );
1650PMU_FORMAT_ATTR(umask, "config:8-15" ); 1714PMU_FORMAT_ATTR(umask, "config:8-15" );
1651PMU_FORMAT_ATTR(edge, "config:18" ); 1715PMU_FORMAT_ATTR(edge, "config:18" );
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" );
1653PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ 1717PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
1654PMU_FORMAT_ATTR(inv, "config:23" ); 1718PMU_FORMAT_ATTR(inv, "config:23" );
1655PMU_FORMAT_ATTR(cmask, "config:24-31" ); 1719PMU_FORMAT_ATTR(cmask, "config:24-31" );
1720PMU_FORMAT_ATTR(in_tx, "config:32");
1721PMU_FORMAT_ATTR(in_tx_cp, "config:33");
1656 1722
1657static struct attribute *intel_arch_formats_attr[] = { 1723static struct attribute *intel_arch_formats_attr[] = {
1658 &format_attr_event.attr, 1724 &format_attr_event.attr,
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
1807 &format_attr_any.attr, 1873 &format_attr_any.attr,
1808 &format_attr_inv.attr, 1874 &format_attr_inv.attr,
1809 &format_attr_cmask.attr, 1875 &format_attr_cmask.attr,
1876 &format_attr_in_tx.attr,
1877 &format_attr_in_tx_cp.attr,
1810 1878
1811 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 1879 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1812 &format_attr_ldlat.attr, /* PEBS load latency */ 1880 &format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void)
1966 } 2034 }
1967} 2035}
1968 2036
2037EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
2038EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
2039
2040static struct attribute *hsw_events_attrs[] = {
2041 EVENT_PTR(mem_ld_hsw),
2042 EVENT_PTR(mem_st_hsw),
2043 NULL
2044};
2045
1969__init int intel_pmu_init(void) 2046__init int intel_pmu_init(void)
1970{ 2047{
1971 union cpuid10_edx edx; 2048 union cpuid10_edx edx;
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void)
2189 break; 2266 break;
2190 2267
2191 2268
2269 case 60: /* Haswell Client */
2270 case 70:
2271 case 71:
2272 case 63:
2273 x86_pmu.late_ack = true;
2274 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2275 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2276
2277 intel_pmu_lbr_init_snb();
2278
2279 x86_pmu.event_constraints = intel_hsw_event_constraints;
2280 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2281 x86_pmu.extra_regs = intel_snb_extra_regs;
2282 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2283 /* all extra regs are per-cpu when HT is on */
2284 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2285 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2286
2287 x86_pmu.hw_config = hsw_hw_config;
2288 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2289 x86_pmu.cpu_events = hsw_events_attrs;
2290 pr_cont("Haswell events, ");
2291 break;
2292
2192 default: 2293 default:
2193 switch (x86_pmu.version) { 2294 switch (x86_pmu.version) {
2194 case 1: 2295 case 1:
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void)
2227 * counter, so do not extend mask to generic counters 2328 * counter, so do not extend mask to generic counters
2228 */ 2329 */
2229 for_each_event_constraint(c, x86_pmu.event_constraints) { 2330 for_each_event_constraint(c, x86_pmu.event_constraints) {
2230 if (c->cmask != X86_RAW_EVENT_MASK 2331 if (c->cmask != FIXED_EVENT_FLAGS
2231 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { 2332 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
2232 continue; 2333 continue;
2233 } 2334 }
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void)
2237 } 2338 }
2238 } 2339 }
2239 2340
2341 /* Support full width counters using alternative MSR range */
2342 if (x86_pmu.intel_cap.full_width_write) {
2343 x86_pmu.max_period = x86_pmu.cntval_mask;
2344 x86_pmu.perfctr = MSR_IA32_PMC0;
2345 pr_cont("full-width counters, ");
2346 }
2347
2240 return 0; 2348 return 0;
2241} 2349}