diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-04-01 15:41:35 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-04-01 15:41:35 -0400 |
commit | b847d0501afec4c7b12eb276aec10a2834f953ea (patch) | |
tree | 69ff786e4280456345a03de9f70f85baf0e34d66 /arch | |
parent | 0a11953851213fd1d3eebcb68b4a537d458c70c2 (diff) | |
parent | d06f7911792780c6e973a137b766530c8d031aeb (diff) |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
* Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf sched lat'
back working.
* We don't use Newt anymore, just plain libslang.
* Kill a bunch of die() calls, from Namhyung Kim.
* Add --no-demangle to report/top, from Namhyung Kim.
* Fix dependency of the python binding wrt libtraceevent, from Naohiro Aota.
* Introduce per core aggregation in 'perf stat', from Stephane Eranian.
* Add memory profiling via PEBS, from Stephane Eranian.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/uapi/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 61 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 56 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 35 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 182 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 |
6 files changed, 312 insertions, 25 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 892ce40a7470..b31798d5e62e 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -71,6 +71,7 @@ | |||
71 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 71 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
72 | #define MSR_IA32_DS_AREA 0x00000600 | 72 | #define MSR_IA32_DS_AREA 0x00000600 |
73 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 73 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
74 | #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 | ||
74 | 75 | ||
75 | #define MSR_MTRRfix64K_00000 0x00000250 | 76 | #define MSR_MTRRfix64K_00000 0x00000250 |
76 | #define MSR_MTRRfix16K_80000 0x00000258 | 77 | #define MSR_MTRRfix16K_80000 0x00000258 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bf0f01aea994..5ed7a4c5baf7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = { | |||
1316 | */ | 1316 | */ |
1317 | static void __init filter_events(struct attribute **attrs) | 1317 | static void __init filter_events(struct attribute **attrs) |
1318 | { | 1318 | { |
1319 | struct device_attribute *d; | ||
1320 | struct perf_pmu_events_attr *pmu_attr; | ||
1319 | int i, j; | 1321 | int i, j; |
1320 | 1322 | ||
1321 | for (i = 0; attrs[i]; i++) { | 1323 | for (i = 0; attrs[i]; i++) { |
1324 | d = (struct device_attribute *)attrs[i]; | ||
1325 | pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); | ||
1326 | /* str trumps id */ | ||
1327 | if (pmu_attr->event_str) | ||
1328 | continue; | ||
1322 | if (x86_pmu.event_map(i)) | 1329 | if (x86_pmu.event_map(i)) |
1323 | continue; | 1330 | continue; |
1324 | 1331 | ||
@@ -1330,22 +1337,45 @@ static void __init filter_events(struct attribute **attrs) | |||
1330 | } | 1337 | } |
1331 | } | 1338 | } |
1332 | 1339 | ||
1333 | static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, | 1340 | /* Merge two pointer arrays */ |
1341 | static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b) | ||
1342 | { | ||
1343 | struct attribute **new; | ||
1344 | int j, i; | ||
1345 | |||
1346 | for (j = 0; a[j]; j++) | ||
1347 | ; | ||
1348 | for (i = 0; b[i]; i++) | ||
1349 | j++; | ||
1350 | j++; | ||
1351 | |||
1352 | new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); | ||
1353 | if (!new) | ||
1354 | return NULL; | ||
1355 | |||
1356 | j = 0; | ||
1357 | for (i = 0; a[i]; i++) | ||
1358 | new[j++] = a[i]; | ||
1359 | for (i = 0; b[i]; i++) | ||
1360 | new[j++] = b[i]; | ||
1361 | new[j] = NULL; | ||
1362 | |||
1363 | return new; | ||
1364 | } | ||
1365 | |||
1366 | ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, | ||
1334 | char *page) | 1367 | char *page) |
1335 | { | 1368 | { |
1336 | struct perf_pmu_events_attr *pmu_attr = \ | 1369 | struct perf_pmu_events_attr *pmu_attr = \ |
1337 | container_of(attr, struct perf_pmu_events_attr, attr); | 1370 | container_of(attr, struct perf_pmu_events_attr, attr); |
1338 | |||
1339 | u64 config = x86_pmu.event_map(pmu_attr->id); | 1371 | u64 config = x86_pmu.event_map(pmu_attr->id); |
1340 | return x86_pmu.events_sysfs_show(page, config); | ||
1341 | } | ||
1342 | 1372 | ||
1343 | #define EVENT_VAR(_id) event_attr_##_id | 1373 | /* string trumps id */ |
1344 | #define EVENT_PTR(_id) &event_attr_##_id.attr.attr | 1374 | if (pmu_attr->event_str) |
1375 | return sprintf(page, "%s", pmu_attr->event_str); | ||
1345 | 1376 | ||
1346 | #define EVENT_ATTR(_name, _id) \ | 1377 | return x86_pmu.events_sysfs_show(page, config); |
1347 | PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ | 1378 | } |
1348 | events_sysfs_show) | ||
1349 | 1379 | ||
1350 | EVENT_ATTR(cpu-cycles, CPU_CYCLES ); | 1380 | EVENT_ATTR(cpu-cycles, CPU_CYCLES ); |
1351 | EVENT_ATTR(instructions, INSTRUCTIONS ); | 1381 | EVENT_ATTR(instructions, INSTRUCTIONS ); |
@@ -1459,16 +1489,27 @@ static int __init init_hw_perf_events(void) | |||
1459 | 1489 | ||
1460 | unconstrained = (struct event_constraint) | 1490 | unconstrained = (struct event_constraint) |
1461 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1491 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1462 | 0, x86_pmu.num_counters, 0); | 1492 | 0, x86_pmu.num_counters, 0, 0); |
1463 | 1493 | ||
1464 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | 1494 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ |
1465 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; | 1495 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; |
1466 | 1496 | ||
1497 | if (x86_pmu.event_attrs) | ||
1498 | x86_pmu_events_group.attrs = x86_pmu.event_attrs; | ||
1499 | |||
1467 | if (!x86_pmu.events_sysfs_show) | 1500 | if (!x86_pmu.events_sysfs_show) |
1468 | x86_pmu_events_group.attrs = &empty_attrs; | 1501 | x86_pmu_events_group.attrs = &empty_attrs; |
1469 | else | 1502 | else |
1470 | filter_events(x86_pmu_events_group.attrs); | 1503 | filter_events(x86_pmu_events_group.attrs); |
1471 | 1504 | ||
1505 | if (x86_pmu.cpu_events) { | ||
1506 | struct attribute **tmp; | ||
1507 | |||
1508 | tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); | ||
1509 | if (!WARN_ON(!tmp)) | ||
1510 | x86_pmu_events_group.attrs = tmp; | ||
1511 | } | ||
1512 | |||
1472 | pr_info("... version: %d\n", x86_pmu.version); | 1513 | pr_info("... version: %d\n", x86_pmu.version); |
1473 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); | 1514 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1474 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); | 1515 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 7f5c75c2afdd..ba9aadfa683b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -46,6 +46,7 @@ enum extra_reg_type { | |||
46 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | 46 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
47 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 47 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
48 | EXTRA_REG_LBR = 2, /* lbr_select */ | 48 | EXTRA_REG_LBR = 2, /* lbr_select */ |
49 | EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ | ||
49 | 50 | ||
50 | EXTRA_REG_MAX /* number of entries needed */ | 51 | EXTRA_REG_MAX /* number of entries needed */ |
51 | }; | 52 | }; |
@@ -59,7 +60,13 @@ struct event_constraint { | |||
59 | u64 cmask; | 60 | u64 cmask; |
60 | int weight; | 61 | int weight; |
61 | int overlap; | 62 | int overlap; |
63 | int flags; | ||
62 | }; | 64 | }; |
65 | /* | ||
66 | * struct event_constraint flags | ||
67 | */ | ||
68 | #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ | ||
69 | #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ | ||
63 | 70 | ||
64 | struct amd_nb { | 71 | struct amd_nb { |
65 | int nb_id; /* NorthBridge id */ | 72 | int nb_id; /* NorthBridge id */ |
@@ -170,16 +177,17 @@ struct cpu_hw_events { | |||
170 | void *kfree_on_online; | 177 | void *kfree_on_online; |
171 | }; | 178 | }; |
172 | 179 | ||
173 | #define __EVENT_CONSTRAINT(c, n, m, w, o) {\ | 180 | #define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\ |
174 | { .idxmsk64 = (n) }, \ | 181 | { .idxmsk64 = (n) }, \ |
175 | .code = (c), \ | 182 | .code = (c), \ |
176 | .cmask = (m), \ | 183 | .cmask = (m), \ |
177 | .weight = (w), \ | 184 | .weight = (w), \ |
178 | .overlap = (o), \ | 185 | .overlap = (o), \ |
186 | .flags = f, \ | ||
179 | } | 187 | } |
180 | 188 | ||
181 | #define EVENT_CONSTRAINT(c, n, m) \ | 189 | #define EVENT_CONSTRAINT(c, n, m) \ |
182 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) | 190 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) |
183 | 191 | ||
184 | /* | 192 | /* |
185 | * The overlap flag marks event constraints with overlapping counter | 193 | * The overlap flag marks event constraints with overlapping counter |
@@ -203,7 +211,7 @@ struct cpu_hw_events { | |||
203 | * and its counter masks must be kept at a minimum. | 211 | * and its counter masks must be kept at a minimum. |
204 | */ | 212 | */ |
205 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ | 213 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ |
206 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) | 214 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0) |
207 | 215 | ||
208 | /* | 216 | /* |
209 | * Constraint on the Event code. | 217 | * Constraint on the Event code. |
@@ -231,6 +239,14 @@ struct cpu_hw_events { | |||
231 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ | 239 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ |
232 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | 240 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) |
233 | 241 | ||
242 | #define INTEL_PLD_CONSTRAINT(c, n) \ | ||
243 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ | ||
244 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) | ||
245 | |||
246 | #define INTEL_PST_CONSTRAINT(c, n) \ | ||
247 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ | ||
248 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) | ||
249 | |||
234 | #define EVENT_CONSTRAINT_END \ | 250 | #define EVENT_CONSTRAINT_END \ |
235 | EVENT_CONSTRAINT(0, 0, 0) | 251 | EVENT_CONSTRAINT(0, 0, 0) |
236 | 252 | ||
@@ -260,12 +276,22 @@ struct extra_reg { | |||
260 | .msr = (ms), \ | 276 | .msr = (ms), \ |
261 | .config_mask = (m), \ | 277 | .config_mask = (m), \ |
262 | .valid_mask = (vm), \ | 278 | .valid_mask = (vm), \ |
263 | .idx = EXTRA_REG_##i \ | 279 | .idx = EXTRA_REG_##i, \ |
264 | } | 280 | } |
265 | 281 | ||
266 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ | 282 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
267 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) | 283 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) |
268 | 284 | ||
285 | #define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ | ||
286 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ | ||
287 | ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) | ||
288 | |||
289 | #define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ | ||
290 | INTEL_UEVENT_EXTRA_REG(c, \ | ||
291 | MSR_PEBS_LD_LAT_THRESHOLD, \ | ||
292 | 0xffff, \ | ||
293 | LDLAT) | ||
294 | |||
269 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | 295 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) |
270 | 296 | ||
271 | union perf_capabilities { | 297 | union perf_capabilities { |
@@ -355,8 +381,10 @@ struct x86_pmu { | |||
355 | */ | 381 | */ |
356 | int attr_rdpmc; | 382 | int attr_rdpmc; |
357 | struct attribute **format_attrs; | 383 | struct attribute **format_attrs; |
384 | struct attribute **event_attrs; | ||
358 | 385 | ||
359 | ssize_t (*events_sysfs_show)(char *page, u64 config); | 386 | ssize_t (*events_sysfs_show)(char *page, u64 config); |
387 | struct attribute **cpu_events; | ||
360 | 388 | ||
361 | /* | 389 | /* |
362 | * CPU Hotplug hooks | 390 | * CPU Hotplug hooks |
@@ -421,6 +449,23 @@ do { \ | |||
421 | #define ERF_NO_HT_SHARING 1 | 449 | #define ERF_NO_HT_SHARING 1 |
422 | #define ERF_HAS_RSP_1 2 | 450 | #define ERF_HAS_RSP_1 2 |
423 | 451 | ||
452 | #define EVENT_VAR(_id) event_attr_##_id | ||
453 | #define EVENT_PTR(_id) &event_attr_##_id.attr.attr | ||
454 | |||
455 | #define EVENT_ATTR(_name, _id) \ | ||
456 | static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ | ||
457 | .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ | ||
458 | .id = PERF_COUNT_HW_##_id, \ | ||
459 | .event_str = NULL, \ | ||
460 | }; | ||
461 | |||
462 | #define EVENT_ATTR_STR(_name, v, str) \ | ||
463 | static struct perf_pmu_events_attr event_attr_##v = { \ | ||
464 | .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ | ||
465 | .id = 0, \ | ||
466 | .event_str = str, \ | ||
467 | }; | ||
468 | |||
424 | extern struct x86_pmu x86_pmu __read_mostly; | 469 | extern struct x86_pmu x86_pmu __read_mostly; |
425 | 470 | ||
426 | DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | 471 | DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); |
@@ -628,6 +673,9 @@ int p6_pmu_init(void); | |||
628 | 673 | ||
629 | int knc_pmu_init(void); | 674 | int knc_pmu_init(void); |
630 | 675 | ||
676 | ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, | ||
677 | char *page); | ||
678 | |||
631 | #else /* CONFIG_CPU_SUP_INTEL */ | 679 | #else /* CONFIG_CPU_SUP_INTEL */ |
632 | 680 | ||
633 | static inline void reserve_ds_buffers(void) | 681 | static inline void reserve_ds_buffers(void) |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dab7580c47ae..e84c4ba44b59 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
81 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = | 81 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
82 | { | 82 | { |
83 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 83 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
84 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), | ||
84 | EVENT_EXTRA_END | 85 | EVENT_EXTRA_END |
85 | }; | 86 | }; |
86 | 87 | ||
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | |||
136 | { | 137 | { |
137 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 138 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
138 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), | 139 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
140 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), | ||
139 | EVENT_EXTRA_END | 141 | EVENT_EXTRA_END |
140 | }; | 142 | }; |
141 | 143 | ||
@@ -155,9 +157,25 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
155 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { | 157 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { |
156 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), | 158 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), |
157 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), | 159 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), |
160 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | ||
158 | EVENT_EXTRA_END | 161 | EVENT_EXTRA_END |
159 | }; | 162 | }; |
160 | 163 | ||
164 | EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); | ||
165 | EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); | ||
166 | EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); | ||
167 | |||
168 | struct attribute *nhm_events_attrs[] = { | ||
169 | EVENT_PTR(mem_ld_nhm), | ||
170 | NULL, | ||
171 | }; | ||
172 | |||
173 | struct attribute *snb_events_attrs[] = { | ||
174 | EVENT_PTR(mem_ld_snb), | ||
175 | EVENT_PTR(mem_st_snb), | ||
176 | NULL, | ||
177 | }; | ||
178 | |||
161 | static u64 intel_pmu_event_map(int hw_event) | 179 | static u64 intel_pmu_event_map(int hw_event) |
162 | { | 180 | { |
163 | return intel_perfmon_event_map[hw_event]; | 181 | return intel_perfmon_event_map[hw_event]; |
@@ -1392,8 +1410,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
1392 | 1410 | ||
1393 | if (x86_pmu.event_constraints) { | 1411 | if (x86_pmu.event_constraints) { |
1394 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1412 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1395 | if ((event->hw.config & c->cmask) == c->code) | 1413 | if ((event->hw.config & c->cmask) == c->code) { |
1414 | /* hw.flags zeroed at initialization */ | ||
1415 | event->hw.flags |= c->flags; | ||
1396 | return c; | 1416 | return c; |
1417 | } | ||
1397 | } | 1418 | } |
1398 | } | 1419 | } |
1399 | 1420 | ||
@@ -1438,6 +1459,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
1438 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1459 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
1439 | struct perf_event *event) | 1460 | struct perf_event *event) |
1440 | { | 1461 | { |
1462 | event->hw.flags = 0; | ||
1441 | intel_put_shared_regs_event_constraints(cpuc, event); | 1463 | intel_put_shared_regs_event_constraints(cpuc, event); |
1442 | } | 1464 | } |
1443 | 1465 | ||
@@ -1761,6 +1783,8 @@ static void intel_pmu_flush_branch_stack(void) | |||
1761 | 1783 | ||
1762 | PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); | 1784 | PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); |
1763 | 1785 | ||
1786 | PMU_FORMAT_ATTR(ldlat, "config1:0-15"); | ||
1787 | |||
1764 | static struct attribute *intel_arch3_formats_attr[] = { | 1788 | static struct attribute *intel_arch3_formats_attr[] = { |
1765 | &format_attr_event.attr, | 1789 | &format_attr_event.attr, |
1766 | &format_attr_umask.attr, | 1790 | &format_attr_umask.attr, |
@@ -1771,6 +1795,7 @@ static struct attribute *intel_arch3_formats_attr[] = { | |||
1771 | &format_attr_cmask.attr, | 1795 | &format_attr_cmask.attr, |
1772 | 1796 | ||
1773 | &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ | 1797 | &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ |
1798 | &format_attr_ldlat.attr, /* PEBS load latency */ | ||
1774 | NULL, | 1799 | NULL, |
1775 | }; | 1800 | }; |
1776 | 1801 | ||
@@ -2031,6 +2056,8 @@ __init int intel_pmu_init(void) | |||
2031 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 2056 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
2032 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 2057 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
2033 | 2058 | ||
2059 | x86_pmu.cpu_events = nhm_events_attrs; | ||
2060 | |||
2034 | /* UOPS_ISSUED.STALLED_CYCLES */ | 2061 | /* UOPS_ISSUED.STALLED_CYCLES */ |
2035 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2062 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2036 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2063 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2074,6 +2101,8 @@ __init int intel_pmu_init(void) | |||
2074 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 2101 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
2075 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2102 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2076 | 2103 | ||
2104 | x86_pmu.cpu_events = nhm_events_attrs; | ||
2105 | |||
2077 | /* UOPS_ISSUED.STALLED_CYCLES */ | 2106 | /* UOPS_ISSUED.STALLED_CYCLES */ |
2078 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2107 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2079 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2108 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2102,6 +2131,8 @@ __init int intel_pmu_init(void) | |||
2102 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2131 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2103 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 2132 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
2104 | 2133 | ||
2134 | x86_pmu.cpu_events = snb_events_attrs; | ||
2135 | |||
2105 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 2136 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
2106 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2137 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2107 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2138 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2128,6 +2159,8 @@ __init int intel_pmu_init(void) | |||
2128 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2159 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2129 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 2160 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
2130 | 2161 | ||
2162 | x86_pmu.cpu_events = snb_events_attrs; | ||
2163 | |||
2131 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 2164 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
2132 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2165 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2133 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2166 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 826054a4f2ee..36dc13d1ad02 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -24,6 +24,130 @@ struct pebs_record_32 { | |||
24 | 24 | ||
25 | */ | 25 | */ |
26 | 26 | ||
27 | union intel_x86_pebs_dse { | ||
28 | u64 val; | ||
29 | struct { | ||
30 | unsigned int ld_dse:4; | ||
31 | unsigned int ld_stlb_miss:1; | ||
32 | unsigned int ld_locked:1; | ||
33 | unsigned int ld_reserved:26; | ||
34 | }; | ||
35 | struct { | ||
36 | unsigned int st_l1d_hit:1; | ||
37 | unsigned int st_reserved1:3; | ||
38 | unsigned int st_stlb_miss:1; | ||
39 | unsigned int st_locked:1; | ||
40 | unsigned int st_reserved2:26; | ||
41 | }; | ||
42 | }; | ||
43 | |||
44 | |||
45 | /* | ||
46 | * Map PEBS Load Latency Data Source encodings to generic | ||
47 | * memory data source information | ||
48 | */ | ||
49 | #define P(a, b) PERF_MEM_S(a, b) | ||
50 | #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) | ||
51 | #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) | ||
52 | |||
53 | static const u64 pebs_data_source[] = { | ||
54 | P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ | ||
55 | OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ | ||
56 | OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ | ||
57 | OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ | ||
58 | OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ | ||
59 | OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ | ||
60 | OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ | ||
61 | OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ | ||
62 | OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ | ||
63 | OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ | ||
64 | OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ | ||
65 | OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ | ||
66 | OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */ | ||
67 | OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */ | ||
68 | OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */ | ||
69 | OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ | ||
70 | }; | ||
71 | |||
72 | static u64 precise_store_data(u64 status) | ||
73 | { | ||
74 | union intel_x86_pebs_dse dse; | ||
75 | u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2); | ||
76 | |||
77 | dse.val = status; | ||
78 | |||
79 | /* | ||
80 | * bit 4: TLB access | ||
81 | * 1 = stored missed 2nd level TLB | ||
82 | * | ||
83 | * so it either hit the walker or the OS | ||
84 | * otherwise hit 2nd level TLB | ||
85 | */ | ||
86 | if (dse.st_stlb_miss) | ||
87 | val |= P(TLB, MISS); | ||
88 | else | ||
89 | val |= P(TLB, HIT); | ||
90 | |||
91 | /* | ||
92 | * bit 0: hit L1 data cache | ||
93 | * if not set, then all we know is that | ||
94 | * it missed L1D | ||
95 | */ | ||
96 | if (dse.st_l1d_hit) | ||
97 | val |= P(LVL, HIT); | ||
98 | else | ||
99 | val |= P(LVL, MISS); | ||
100 | |||
101 | /* | ||
102 | * bit 5: Locked prefix | ||
103 | */ | ||
104 | if (dse.st_locked) | ||
105 | val |= P(LOCK, LOCKED); | ||
106 | |||
107 | return val; | ||
108 | } | ||
109 | |||
110 | static u64 load_latency_data(u64 status) | ||
111 | { | ||
112 | union intel_x86_pebs_dse dse; | ||
113 | u64 val; | ||
114 | int model = boot_cpu_data.x86_model; | ||
115 | int fam = boot_cpu_data.x86; | ||
116 | |||
117 | dse.val = status; | ||
118 | |||
119 | /* | ||
120 | * use the mapping table for bit 0-3 | ||
121 | */ | ||
122 | val = pebs_data_source[dse.ld_dse]; | ||
123 | |||
124 | /* | ||
125 | * Nehalem models do not support TLB, Lock infos | ||
126 | */ | ||
127 | if (fam == 0x6 && (model == 26 || model == 30 | ||
128 | || model == 31 || model == 46)) { | ||
129 | val |= P(TLB, NA) | P(LOCK, NA); | ||
130 | return val; | ||
131 | } | ||
132 | /* | ||
133 | * bit 4: TLB access | ||
134 | * 0 = did not miss 2nd level TLB | ||
135 | * 1 = missed 2nd level TLB | ||
136 | */ | ||
137 | if (dse.ld_stlb_miss) | ||
138 | val |= P(TLB, MISS) | P(TLB, L2); | ||
139 | else | ||
140 | val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); | ||
141 | |||
142 | /* | ||
143 | * bit 5: locked prefix | ||
144 | */ | ||
145 | if (dse.ld_locked) | ||
146 | val |= P(LOCK, LOCKED); | ||
147 | |||
148 | return val; | ||
149 | } | ||
150 | |||
27 | struct pebs_record_core { | 151 | struct pebs_record_core { |
28 | u64 flags, ip; | 152 | u64 flags, ip; |
29 | u64 ax, bx, cx, dx; | 153 | u64 ax, bx, cx, dx; |
@@ -364,7 +488,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { | |||
364 | }; | 488 | }; |
365 | 489 | ||
366 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { | 490 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { |
367 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ | 491 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ |
368 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 492 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
369 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ | 493 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
370 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ | 494 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ |
@@ -379,7 +503,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = { | |||
379 | }; | 503 | }; |
380 | 504 | ||
381 | struct event_constraint intel_westmere_pebs_event_constraints[] = { | 505 | struct event_constraint intel_westmere_pebs_event_constraints[] = { |
382 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ | 506 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ |
383 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 507 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
384 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ | 508 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
385 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ | 509 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ |
@@ -399,7 +523,8 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { | |||
399 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | 523 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ |
400 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | 524 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
401 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | 525 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ |
402 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ | 526 | INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ |
527 | INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ | ||
403 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ | 528 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ |
404 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | 529 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
405 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | 530 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
@@ -413,7 +538,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { | |||
413 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | 538 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ |
414 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | 539 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
415 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | 540 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ |
416 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ | 541 | INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ |
542 | INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ | ||
417 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ | 543 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ |
418 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | 544 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
419 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | 545 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
@@ -430,8 +556,10 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) | |||
430 | 556 | ||
431 | if (x86_pmu.pebs_constraints) { | 557 | if (x86_pmu.pebs_constraints) { |
432 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | 558 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { |
433 | if ((event->hw.config & c->cmask) == c->code) | 559 | if ((event->hw.config & c->cmask) == c->code) { |
560 | event->hw.flags |= c->flags; | ||
434 | return c; | 561 | return c; |
562 | } | ||
435 | } | 563 | } |
436 | } | 564 | } |
437 | 565 | ||
@@ -446,6 +574,11 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
446 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 574 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
447 | 575 | ||
448 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 576 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
577 | |||
578 | if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) | ||
579 | cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); | ||
580 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) | ||
581 | cpuc->pebs_enabled |= 1ULL << 63; | ||
449 | } | 582 | } |
450 | 583 | ||
451 | void intel_pmu_pebs_disable(struct perf_event *event) | 584 | void intel_pmu_pebs_disable(struct perf_event *event) |
@@ -558,20 +691,51 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
558 | struct pt_regs *iregs, void *__pebs) | 691 | struct pt_regs *iregs, void *__pebs) |
559 | { | 692 | { |
560 | /* | 693 | /* |
561 | * We cast to pebs_record_core since that is a subset of | 694 | * We cast to pebs_record_nhm to get the load latency data |
562 | * both formats and we don't use the other fields in this | 695 | * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used |
563 | * routine. | ||
564 | */ | 696 | */ |
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 697 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
566 | struct pebs_record_core *pebs = __pebs; | 698 | struct pebs_record_nhm *pebs = __pebs; |
567 | struct perf_sample_data data; | 699 | struct perf_sample_data data; |
568 | struct pt_regs regs; | 700 | struct pt_regs regs; |
701 | u64 sample_type; | ||
702 | int fll, fst; | ||
569 | 703 | ||
570 | if (!intel_pmu_save_and_restart(event)) | 704 | if (!intel_pmu_save_and_restart(event)) |
571 | return; | 705 | return; |
572 | 706 | ||
707 | fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; | ||
708 | fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; | ||
709 | |||
573 | perf_sample_data_init(&data, 0, event->hw.last_period); | 710 | perf_sample_data_init(&data, 0, event->hw.last_period); |
574 | 711 | ||
712 | data.period = event->hw.last_period; | ||
713 | sample_type = event->attr.sample_type; | ||
714 | |||
715 | /* | ||
716 | * if PEBS-LL or PreciseStore | ||
717 | */ | ||
718 | if (fll || fst) { | ||
719 | if (sample_type & PERF_SAMPLE_ADDR) | ||
720 | data.addr = pebs->dla; | ||
721 | |||
722 | /* | ||
723 | * Use latency for weight (only avail with PEBS-LL) | ||
724 | */ | ||
725 | if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) | ||
726 | data.weight = pebs->lat; | ||
727 | |||
728 | /* | ||
729 | * data.data_src encodes the data source | ||
730 | */ | ||
731 | if (sample_type & PERF_SAMPLE_DATA_SRC) { | ||
732 | if (fll) | ||
733 | data.data_src.val = load_latency_data(pebs->dse); | ||
734 | else | ||
735 | data.data_src.val = precise_store_data(pebs->dse); | ||
736 | } | ||
737 | } | ||
738 | |||
575 | /* | 739 | /* |
576 | * We use the interrupt regs as a base because the PEBS record | 740 | * We use the interrupt regs as a base because the PEBS record |
577 | * does not contain a full regs set, specifically it seems to | 741 | * does not contain a full regs set, specifically it seems to |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index b43200dbfe7e..75da9e18b128 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type) | |||
2438 | 2438 | ||
2439 | type->unconstrainted = (struct event_constraint) | 2439 | type->unconstrainted = (struct event_constraint) |
2440 | __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, | 2440 | __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, |
2441 | 0, type->num_counters, 0); | 2441 | 0, type->num_counters, 0, 0); |
2442 | 2442 | ||
2443 | for (i = 0; i < type->num_boxes; i++) { | 2443 | for (i = 0; i < type->num_boxes; i++) { |
2444 | pmus[i].func_id = -1; | 2444 | pmus[i].func_id = -1; |