diff options
-rw-r--r-- | arch/x86/include/uapi/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 25 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 133 |
5 files changed, 178 insertions, 10 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 892ce40a7470..b31798d5e62e 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -71,6 +71,7 @@ | |||
71 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 71 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
72 | #define MSR_IA32_DS_AREA 0x00000600 | 72 | #define MSR_IA32_DS_AREA 0x00000600 |
73 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 73 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
74 | #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 | ||
74 | 75 | ||
75 | #define MSR_MTRRfix64K_00000 0x00000250 | 76 | #define MSR_MTRRfix64K_00000 0x00000250 |
76 | #define MSR_MTRRfix16K_80000 0x00000258 | 77 | #define MSR_MTRRfix16K_80000 0x00000258 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8ba51518f689..5ed7a4c5baf7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1363,7 +1363,7 @@ static __init struct attribute **merge_attr(struct attribute **a, struct attribu | |||
1363 | return new; | 1363 | return new; |
1364 | } | 1364 | } |
1365 | 1365 | ||
1366 | static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, | 1366 | ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, |
1367 | char *page) | 1367 | char *page) |
1368 | { | 1368 | { |
1369 | struct perf_pmu_events_attr *pmu_attr = \ | 1369 | struct perf_pmu_events_attr *pmu_attr = \ |
@@ -1494,6 +1494,9 @@ static int __init init_hw_perf_events(void) | |||
1494 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | 1494 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ |
1495 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; | 1495 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; |
1496 | 1496 | ||
1497 | if (x86_pmu.event_attrs) | ||
1498 | x86_pmu_events_group.attrs = x86_pmu.event_attrs; | ||
1499 | |||
1497 | if (!x86_pmu.events_sysfs_show) | 1500 | if (!x86_pmu.events_sysfs_show) |
1498 | x86_pmu_events_group.attrs = &empty_attrs; | 1501 | x86_pmu_events_group.attrs = &empty_attrs; |
1499 | else | 1502 | else |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 9686d38eb458..f3a9a94e4d22 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -46,6 +46,7 @@ enum extra_reg_type { | |||
46 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | 46 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
47 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 47 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
48 | EXTRA_REG_LBR = 2, /* lbr_select */ | 48 | EXTRA_REG_LBR = 2, /* lbr_select */ |
49 | EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ | ||
49 | 50 | ||
50 | EXTRA_REG_MAX /* number of entries needed */ | 51 | EXTRA_REG_MAX /* number of entries needed */ |
51 | }; | 52 | }; |
@@ -61,6 +62,10 @@ struct event_constraint { | |||
61 | int overlap; | 62 | int overlap; |
62 | int flags; | 63 | int flags; |
63 | }; | 64 | }; |
65 | /* | ||
66 | * struct event_constraint flags | ||
67 | */ | ||
68 | #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ | ||
64 | 69 | ||
65 | struct amd_nb { | 70 | struct amd_nb { |
66 | int nb_id; /* NorthBridge id */ | 71 | int nb_id; /* NorthBridge id */ |
@@ -233,6 +238,10 @@ struct cpu_hw_events { | |||
233 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ | 238 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ |
234 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | 239 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) |
235 | 240 | ||
241 | #define INTEL_PLD_CONSTRAINT(c, n) \ | ||
242 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ | ||
243 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) | ||
244 | |||
236 | #define EVENT_CONSTRAINT_END \ | 245 | #define EVENT_CONSTRAINT_END \ |
237 | EVENT_CONSTRAINT(0, 0, 0) | 246 | EVENT_CONSTRAINT(0, 0, 0) |
238 | 247 | ||
@@ -262,12 +271,22 @@ struct extra_reg { | |||
262 | .msr = (ms), \ | 271 | .msr = (ms), \ |
263 | .config_mask = (m), \ | 272 | .config_mask = (m), \ |
264 | .valid_mask = (vm), \ | 273 | .valid_mask = (vm), \ |
265 | .idx = EXTRA_REG_##i \ | 274 | .idx = EXTRA_REG_##i, \ |
266 | } | 275 | } |
267 | 276 | ||
268 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ | 277 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
269 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) | 278 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) |
270 | 279 | ||
280 | #define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ | ||
281 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ | ||
282 | ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) | ||
283 | |||
284 | #define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ | ||
285 | INTEL_UEVENT_EXTRA_REG(c, \ | ||
286 | MSR_PEBS_LD_LAT_THRESHOLD, \ | ||
287 | 0xffff, \ | ||
288 | LDLAT) | ||
289 | |||
271 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | 290 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) |
272 | 291 | ||
273 | union perf_capabilities { | 292 | union perf_capabilities { |
@@ -357,6 +376,7 @@ struct x86_pmu { | |||
357 | */ | 376 | */ |
358 | int attr_rdpmc; | 377 | int attr_rdpmc; |
359 | struct attribute **format_attrs; | 378 | struct attribute **format_attrs; |
379 | struct attribute **event_attrs; | ||
360 | 380 | ||
361 | ssize_t (*events_sysfs_show)(char *page, u64 config); | 381 | ssize_t (*events_sysfs_show)(char *page, u64 config); |
362 | struct attribute **cpu_events; | 382 | struct attribute **cpu_events; |
@@ -648,6 +668,9 @@ int p6_pmu_init(void); | |||
648 | 668 | ||
649 | int knc_pmu_init(void); | 669 | int knc_pmu_init(void); |
650 | 670 | ||
671 | ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, | ||
672 | char *page); | ||
673 | |||
651 | #else /* CONFIG_CPU_SUP_INTEL */ | 674 | #else /* CONFIG_CPU_SUP_INTEL */ |
652 | 675 | ||
653 | static inline void reserve_ds_buffers(void) | 676 | static inline void reserve_ds_buffers(void) |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index df3beaac3397..d5ea5a03cd37 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
81 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = | 81 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
82 | { | 82 | { |
83 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 83 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
84 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), | ||
84 | EVENT_EXTRA_END | 85 | EVENT_EXTRA_END |
85 | }; | 86 | }; |
86 | 87 | ||
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | |||
136 | { | 137 | { |
137 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 138 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
138 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), | 139 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
140 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), | ||
139 | EVENT_EXTRA_END | 141 | EVENT_EXTRA_END |
140 | }; | 142 | }; |
141 | 143 | ||
@@ -155,9 +157,23 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
155 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { | 157 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { |
156 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), | 158 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), |
157 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), | 159 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), |
160 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | ||
158 | EVENT_EXTRA_END | 161 | EVENT_EXTRA_END |
159 | }; | 162 | }; |
160 | 163 | ||
164 | EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); | ||
165 | EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); | ||
166 | |||
167 | struct attribute *nhm_events_attrs[] = { | ||
168 | EVENT_PTR(mem_ld_nhm), | ||
169 | NULL, | ||
170 | }; | ||
171 | |||
172 | struct attribute *snb_events_attrs[] = { | ||
173 | EVENT_PTR(mem_ld_snb), | ||
174 | NULL, | ||
175 | }; | ||
176 | |||
161 | static u64 intel_pmu_event_map(int hw_event) | 177 | static u64 intel_pmu_event_map(int hw_event) |
162 | { | 178 | { |
163 | return intel_perfmon_event_map[hw_event]; | 179 | return intel_perfmon_event_map[hw_event]; |
@@ -2035,6 +2051,8 @@ __init int intel_pmu_init(void) | |||
2035 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 2051 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
2036 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 2052 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
2037 | 2053 | ||
2054 | x86_pmu.cpu_events = nhm_events_attrs; | ||
2055 | |||
2038 | /* UOPS_ISSUED.STALLED_CYCLES */ | 2056 | /* UOPS_ISSUED.STALLED_CYCLES */ |
2039 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2057 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2040 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2058 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2078,6 +2096,8 @@ __init int intel_pmu_init(void) | |||
2078 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 2096 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
2079 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2097 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2080 | 2098 | ||
2099 | x86_pmu.cpu_events = nhm_events_attrs; | ||
2100 | |||
2081 | /* UOPS_ISSUED.STALLED_CYCLES */ | 2101 | /* UOPS_ISSUED.STALLED_CYCLES */ |
2082 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2102 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2083 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2103 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2106,6 +2126,8 @@ __init int intel_pmu_init(void) | |||
2106 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2126 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2107 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 2127 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
2108 | 2128 | ||
2129 | x86_pmu.cpu_events = snb_events_attrs; | ||
2130 | |||
2109 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 2131 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
2110 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2132 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2111 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2133 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2132,6 +2154,8 @@ __init int intel_pmu_init(void) | |||
2132 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2154 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2133 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 2155 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
2134 | 2156 | ||
2157 | x86_pmu.cpu_events = snb_events_attrs; | ||
2158 | |||
2135 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 2159 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
2136 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2160 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2137 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2161 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index f30d85bcbda9..a6400bd0463c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -24,6 +24,92 @@ struct pebs_record_32 { | |||
24 | 24 | ||
25 | */ | 25 | */ |
26 | 26 | ||
27 | union intel_x86_pebs_dse { | ||
28 | u64 val; | ||
29 | struct { | ||
30 | unsigned int ld_dse:4; | ||
31 | unsigned int ld_stlb_miss:1; | ||
32 | unsigned int ld_locked:1; | ||
33 | unsigned int ld_reserved:26; | ||
34 | }; | ||
35 | struct { | ||
36 | unsigned int st_l1d_hit:1; | ||
37 | unsigned int st_reserved1:3; | ||
38 | unsigned int st_stlb_miss:1; | ||
39 | unsigned int st_locked:1; | ||
40 | unsigned int st_reserved2:26; | ||
41 | }; | ||
42 | }; | ||
43 | |||
44 | |||
45 | /* | ||
46 | * Map PEBS Load Latency Data Source encodings to generic | ||
47 | * memory data source information | ||
48 | */ | ||
49 | #define P(a, b) PERF_MEM_S(a, b) | ||
50 | #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) | ||
51 | #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) | ||
52 | |||
53 | static const u64 pebs_data_source[] = { | ||
54 | P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ | ||
55 | OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ | ||
56 | OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ | ||
57 | OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ | ||
58 | OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ | ||
59 | OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ | ||
60 | OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ | ||
61 | OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ | ||
62 | OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ | ||
63 | OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ | ||
64 | OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ | ||
65 | OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ | ||
66 | OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */ | ||
67 | OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */ | ||
68 | OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */ | ||
69 | OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ | ||
70 | }; | ||
71 | |||
72 | static u64 load_latency_data(u64 status) | ||
73 | { | ||
74 | union intel_x86_pebs_dse dse; | ||
75 | u64 val; | ||
76 | int model = boot_cpu_data.x86_model; | ||
77 | int fam = boot_cpu_data.x86; | ||
78 | |||
79 | dse.val = status; | ||
80 | |||
81 | /* | ||
82 | * use the mapping table for bit 0-3 | ||
83 | */ | ||
84 | val = pebs_data_source[dse.ld_dse]; | ||
85 | |||
86 | /* | ||
87 | * Nehalem models do not support TLB, Lock infos | ||
88 | */ | ||
89 | if (fam == 0x6 && (model == 26 || model == 30 | ||
90 | || model == 31 || model == 46)) { | ||
91 | val |= P(TLB, NA) | P(LOCK, NA); | ||
92 | return val; | ||
93 | } | ||
94 | /* | ||
95 | * bit 4: TLB access | ||
96 | * 0 = did not miss 2nd level TLB | ||
97 | * 1 = missed 2nd level TLB | ||
98 | */ | ||
99 | if (dse.ld_stlb_miss) | ||
100 | val |= P(TLB, MISS) | P(TLB, L2); | ||
101 | else | ||
102 | val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); | ||
103 | |||
104 | /* | ||
105 | * bit 5: locked prefix | ||
106 | */ | ||
107 | if (dse.ld_locked) | ||
108 | val |= P(LOCK, LOCKED); | ||
109 | |||
110 | return val; | ||
111 | } | ||
112 | |||
27 | struct pebs_record_core { | 113 | struct pebs_record_core { |
28 | u64 flags, ip; | 114 | u64 flags, ip; |
29 | u64 ax, bx, cx, dx; | 115 | u64 ax, bx, cx, dx; |
@@ -364,7 +450,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { | |||
364 | }; | 450 | }; |
365 | 451 | ||
366 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { | 452 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { |
367 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ | 453 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ |
368 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 454 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
369 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ | 455 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
370 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ | 456 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ |
@@ -379,7 +465,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = { | |||
379 | }; | 465 | }; |
380 | 466 | ||
381 | struct event_constraint intel_westmere_pebs_event_constraints[] = { | 467 | struct event_constraint intel_westmere_pebs_event_constraints[] = { |
382 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ | 468 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ |
383 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 469 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
384 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ | 470 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
385 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ | 471 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ |
@@ -399,7 +485,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { | |||
399 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | 485 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ |
400 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | 486 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
401 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | 487 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ |
402 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ | 488 | INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ |
403 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ | 489 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ |
404 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | 490 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
405 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | 491 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
@@ -413,7 +499,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { | |||
413 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | 499 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ |
414 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | 500 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
415 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | 501 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ |
416 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ | 502 | INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ |
417 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ | 503 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ |
418 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | 504 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
419 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | 505 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
@@ -448,6 +534,9 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
448 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 534 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
449 | 535 | ||
450 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 536 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
537 | |||
538 | if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) | ||
539 | cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); | ||
451 | } | 540 | } |
452 | 541 | ||
453 | void intel_pmu_pebs_disable(struct perf_event *event) | 542 | void intel_pmu_pebs_disable(struct perf_event *event) |
@@ -560,20 +649,48 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
560 | struct pt_regs *iregs, void *__pebs) | 649 | struct pt_regs *iregs, void *__pebs) |
561 | { | 650 | { |
562 | /* | 651 | /* |
563 | * We cast to pebs_record_core since that is a subset of | 652 | * We cast to pebs_record_nhm to get the load latency data |
564 | * both formats and we don't use the other fields in this | 653 | * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used |
565 | * routine. | ||
566 | */ | 654 | */ |
567 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 655 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
568 | struct pebs_record_core *pebs = __pebs; | 656 | struct pebs_record_nhm *pebs = __pebs; |
569 | struct perf_sample_data data; | 657 | struct perf_sample_data data; |
570 | struct pt_regs regs; | 658 | struct pt_regs regs; |
659 | u64 sample_type; | ||
660 | int fll; | ||
571 | 661 | ||
572 | if (!intel_pmu_save_and_restart(event)) | 662 | if (!intel_pmu_save_and_restart(event)) |
573 | return; | 663 | return; |
574 | 664 | ||
665 | fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; | ||
666 | |||
575 | perf_sample_data_init(&data, 0, event->hw.last_period); | 667 | perf_sample_data_init(&data, 0, event->hw.last_period); |
576 | 668 | ||
669 | data.period = event->hw.last_period; | ||
670 | sample_type = event->attr.sample_type; | ||
671 | |||
672 | /* | ||
673 | * if PEBS-LL or PreciseStore | ||
674 | */ | ||
675 | if (fll) { | ||
676 | if (sample_type & PERF_SAMPLE_ADDR) | ||
677 | data.addr = pebs->dla; | ||
678 | |||
679 | /* | ||
680 | * Use latency for weight (only avail with PEBS-LL) | ||
681 | */ | ||
682 | if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) | ||
683 | data.weight = pebs->lat; | ||
684 | |||
685 | /* | ||
686 | * data.data_src encodes the data source | ||
687 | */ | ||
688 | if (sample_type & PERF_SAMPLE_DATA_SRC) { | ||
689 | if (fll) | ||
690 | data.data_src.val = load_latency_data(pebs->dse); | ||
691 | } | ||
692 | } | ||
693 | |||
577 | /* | 694 | /* |
578 | * We use the interrupt regs as a base because the PEBS record | 695 | * We use the interrupt regs as a base because the PEBS record |
579 | * does not contain a full regs set, specifically it seems to | 696 | * does not contain a full regs set, specifically it seems to |