aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-04-01 15:41:35 -0400
committerIngo Molnar <mingo@kernel.org>2013-04-01 15:41:35 -0400
commitb847d0501afec4c7b12eb276aec10a2834f953ea (patch)
tree69ff786e4280456345a03de9f70f85baf0e34d66 /arch
parent0a11953851213fd1d3eebcb68b4a537d458c70c2 (diff)
parentd06f7911792780c6e973a137b766530c8d031aeb (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: * Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf sched lat' back working. * We don't use Newt anymore, just plain libslang. * Kill a bunch of die() calls, from Namhyung Kim. * Add --no-demangle to report/top, from Namhyung Kim. * Fix dependency of the python binding wrt libtraceevent, from Naohiro Aota. * Introduce per core aggregation in 'perf stat', from Stephane Eranian. * Add memory profiling via PEBS, from Stephane Eranian. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c61
-rw-r--r--arch/x86/kernel/cpu/perf_event.h56
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c35
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c182
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c2
6 files changed, 312 insertions, 25 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 892ce40a7470..b31798d5e62e 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -71,6 +71,7 @@
71#define MSR_IA32_PEBS_ENABLE 0x000003f1 71#define MSR_IA32_PEBS_ENABLE 0x000003f1
72#define MSR_IA32_DS_AREA 0x00000600 72#define MSR_IA32_DS_AREA 0x00000600
73#define MSR_IA32_PERF_CAPABILITIES 0x00000345 73#define MSR_IA32_PERF_CAPABILITIES 0x00000345
74#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
74 75
75#define MSR_MTRRfix64K_00000 0x00000250 76#define MSR_MTRRfix64K_00000 0x00000250
76#define MSR_MTRRfix16K_80000 0x00000258 77#define MSR_MTRRfix16K_80000 0x00000258
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bf0f01aea994..5ed7a4c5baf7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = {
1316 */ 1316 */
1317static void __init filter_events(struct attribute **attrs) 1317static void __init filter_events(struct attribute **attrs)
1318{ 1318{
1319 struct device_attribute *d;
1320 struct perf_pmu_events_attr *pmu_attr;
1319 int i, j; 1321 int i, j;
1320 1322
1321 for (i = 0; attrs[i]; i++) { 1323 for (i = 0; attrs[i]; i++) {
1324 d = (struct device_attribute *)attrs[i];
1325 pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
1326 /* str trumps id */
1327 if (pmu_attr->event_str)
1328 continue;
1322 if (x86_pmu.event_map(i)) 1329 if (x86_pmu.event_map(i))
1323 continue; 1330 continue;
1324 1331
@@ -1330,22 +1337,45 @@ static void __init filter_events(struct attribute **attrs)
1330 } 1337 }
1331} 1338}
1332 1339
1333static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, 1340/* Merge two pointer arrays */
1341static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
1342{
1343 struct attribute **new;
1344 int j, i;
1345
1346 for (j = 0; a[j]; j++)
1347 ;
1348 for (i = 0; b[i]; i++)
1349 j++;
1350 j++;
1351
1352 new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
1353 if (!new)
1354 return NULL;
1355
1356 j = 0;
1357 for (i = 0; a[i]; i++)
1358 new[j++] = a[i];
1359 for (i = 0; b[i]; i++)
1360 new[j++] = b[i];
1361 new[j] = NULL;
1362
1363 return new;
1364}
1365
1366ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
1334 char *page) 1367 char *page)
1335{ 1368{
1336 struct perf_pmu_events_attr *pmu_attr = \ 1369 struct perf_pmu_events_attr *pmu_attr = \
1337 container_of(attr, struct perf_pmu_events_attr, attr); 1370 container_of(attr, struct perf_pmu_events_attr, attr);
1338
1339 u64 config = x86_pmu.event_map(pmu_attr->id); 1371 u64 config = x86_pmu.event_map(pmu_attr->id);
1340 return x86_pmu.events_sysfs_show(page, config);
1341}
1342 1372
1343#define EVENT_VAR(_id) event_attr_##_id 1373 /* string trumps id */
1344#define EVENT_PTR(_id) &event_attr_##_id.attr.attr 1374 if (pmu_attr->event_str)
1375 return sprintf(page, "%s", pmu_attr->event_str);
1345 1376
1346#define EVENT_ATTR(_name, _id) \ 1377 return x86_pmu.events_sysfs_show(page, config);
1347 PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ 1378}
1348 events_sysfs_show)
1349 1379
1350EVENT_ATTR(cpu-cycles, CPU_CYCLES ); 1380EVENT_ATTR(cpu-cycles, CPU_CYCLES );
1351EVENT_ATTR(instructions, INSTRUCTIONS ); 1381EVENT_ATTR(instructions, INSTRUCTIONS );
@@ -1459,16 +1489,27 @@ static int __init init_hw_perf_events(void)
1459 1489
1460 unconstrained = (struct event_constraint) 1490 unconstrained = (struct event_constraint)
1461 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1491 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1462 0, x86_pmu.num_counters, 0); 1492 0, x86_pmu.num_counters, 0, 0);
1463 1493
1464 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ 1494 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1465 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1495 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1466 1496
1497 if (x86_pmu.event_attrs)
1498 x86_pmu_events_group.attrs = x86_pmu.event_attrs;
1499
1467 if (!x86_pmu.events_sysfs_show) 1500 if (!x86_pmu.events_sysfs_show)
1468 x86_pmu_events_group.attrs = &empty_attrs; 1501 x86_pmu_events_group.attrs = &empty_attrs;
1469 else 1502 else
1470 filter_events(x86_pmu_events_group.attrs); 1503 filter_events(x86_pmu_events_group.attrs);
1471 1504
1505 if (x86_pmu.cpu_events) {
1506 struct attribute **tmp;
1507
1508 tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
1509 if (!WARN_ON(!tmp))
1510 x86_pmu_events_group.attrs = tmp;
1511 }
1512
1472 pr_info("... version: %d\n", x86_pmu.version); 1513 pr_info("... version: %d\n", x86_pmu.version);
1473 pr_info("... bit width: %d\n", x86_pmu.cntval_bits); 1514 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1474 pr_info("... generic registers: %d\n", x86_pmu.num_counters); 1515 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7f5c75c2afdd..ba9aadfa683b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -46,6 +46,7 @@ enum extra_reg_type {
46 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ 46 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
48 EXTRA_REG_LBR = 2, /* lbr_select */ 48 EXTRA_REG_LBR = 2, /* lbr_select */
49 EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
49 50
50 EXTRA_REG_MAX /* number of entries needed */ 51 EXTRA_REG_MAX /* number of entries needed */
51}; 52};
@@ -59,7 +60,13 @@ struct event_constraint {
59 u64 cmask; 60 u64 cmask;
60 int weight; 61 int weight;
61 int overlap; 62 int overlap;
63 int flags;
62}; 64};
65/*
66 * struct event_constraint flags
67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
63 70
64struct amd_nb { 71struct amd_nb {
65 int nb_id; /* NorthBridge id */ 72 int nb_id; /* NorthBridge id */
@@ -170,16 +177,17 @@ struct cpu_hw_events {
170 void *kfree_on_online; 177 void *kfree_on_online;
171}; 178};
172 179
173#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ 180#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
174 { .idxmsk64 = (n) }, \ 181 { .idxmsk64 = (n) }, \
175 .code = (c), \ 182 .code = (c), \
176 .cmask = (m), \ 183 .cmask = (m), \
177 .weight = (w), \ 184 .weight = (w), \
178 .overlap = (o), \ 185 .overlap = (o), \
186 .flags = f, \
179} 187}
180 188
181#define EVENT_CONSTRAINT(c, n, m) \ 189#define EVENT_CONSTRAINT(c, n, m) \
182 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) 190 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
183 191
184/* 192/*
185 * The overlap flag marks event constraints with overlapping counter 193 * The overlap flag marks event constraints with overlapping counter
@@ -203,7 +211,7 @@ struct cpu_hw_events {
203 * and its counter masks must be kept at a minimum. 211 * and its counter masks must be kept at a minimum.
204 */ 212 */
205#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ 213#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
206 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) 214 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
207 215
208/* 216/*
209 * Constraint on the Event code. 217 * Constraint on the Event code.
@@ -231,6 +239,14 @@ struct cpu_hw_events {
231#define INTEL_UEVENT_CONSTRAINT(c, n) \ 239#define INTEL_UEVENT_CONSTRAINT(c, n) \
232 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 240 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
233 241
242#define INTEL_PLD_CONSTRAINT(c, n) \
243 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
244 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
245
246#define INTEL_PST_CONSTRAINT(c, n) \
247 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
248 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
249
234#define EVENT_CONSTRAINT_END \ 250#define EVENT_CONSTRAINT_END \
235 EVENT_CONSTRAINT(0, 0, 0) 251 EVENT_CONSTRAINT(0, 0, 0)
236 252
@@ -260,12 +276,22 @@ struct extra_reg {
260 .msr = (ms), \ 276 .msr = (ms), \
261 .config_mask = (m), \ 277 .config_mask = (m), \
262 .valid_mask = (vm), \ 278 .valid_mask = (vm), \
263 .idx = EXTRA_REG_##i \ 279 .idx = EXTRA_REG_##i, \
264 } 280 }
265 281
266#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ 282#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
267 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) 283 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
268 284
285#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
286 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
287 ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
288
289#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
290 INTEL_UEVENT_EXTRA_REG(c, \
291 MSR_PEBS_LD_LAT_THRESHOLD, \
292 0xffff, \
293 LDLAT)
294
269#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) 295#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
270 296
271union perf_capabilities { 297union perf_capabilities {
@@ -355,8 +381,10 @@ struct x86_pmu {
355 */ 381 */
356 int attr_rdpmc; 382 int attr_rdpmc;
357 struct attribute **format_attrs; 383 struct attribute **format_attrs;
384 struct attribute **event_attrs;
358 385
359 ssize_t (*events_sysfs_show)(char *page, u64 config); 386 ssize_t (*events_sysfs_show)(char *page, u64 config);
387 struct attribute **cpu_events;
360 388
361 /* 389 /*
362 * CPU Hotplug hooks 390 * CPU Hotplug hooks
@@ -421,6 +449,23 @@ do { \
421#define ERF_NO_HT_SHARING 1 449#define ERF_NO_HT_SHARING 1
422#define ERF_HAS_RSP_1 2 450#define ERF_HAS_RSP_1 2
423 451
452#define EVENT_VAR(_id) event_attr_##_id
453#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
454
455#define EVENT_ATTR(_name, _id) \
456static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
457 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
458 .id = PERF_COUNT_HW_##_id, \
459 .event_str = NULL, \
460};
461
462#define EVENT_ATTR_STR(_name, v, str) \
463static struct perf_pmu_events_attr event_attr_##v = { \
464 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
465 .id = 0, \
466 .event_str = str, \
467};
468
424extern struct x86_pmu x86_pmu __read_mostly; 469extern struct x86_pmu x86_pmu __read_mostly;
425 470
426DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 471DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -628,6 +673,9 @@ int p6_pmu_init(void);
628 673
629int knc_pmu_init(void); 674int knc_pmu_init(void);
630 675
676ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
677 char *page);
678
631#else /* CONFIG_CPU_SUP_INTEL */ 679#else /* CONFIG_CPU_SUP_INTEL */
632 680
633static inline void reserve_ds_buffers(void) 681static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index dab7580c47ae..e84c4ba44b59 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
82{ 82{
83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
84 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
84 EVENT_EXTRA_END 85 EVENT_EXTRA_END
85}; 86};
86 87
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
136{ 137{
137 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 138 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
138 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 139 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
140 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
139 EVENT_EXTRA_END 141 EVENT_EXTRA_END
140}; 142};
141 143
@@ -155,9 +157,25 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
155static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 157static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
156 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), 158 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
157 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), 159 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
160 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
158 EVENT_EXTRA_END 161 EVENT_EXTRA_END
159}; 162};
160 163
164EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
165EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
166EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
167
168struct attribute *nhm_events_attrs[] = {
169 EVENT_PTR(mem_ld_nhm),
170 NULL,
171};
172
173struct attribute *snb_events_attrs[] = {
174 EVENT_PTR(mem_ld_snb),
175 EVENT_PTR(mem_st_snb),
176 NULL,
177};
178
161static u64 intel_pmu_event_map(int hw_event) 179static u64 intel_pmu_event_map(int hw_event)
162{ 180{
163 return intel_perfmon_event_map[hw_event]; 181 return intel_perfmon_event_map[hw_event];
@@ -1392,8 +1410,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1392 1410
1393 if (x86_pmu.event_constraints) { 1411 if (x86_pmu.event_constraints) {
1394 for_each_event_constraint(c, x86_pmu.event_constraints) { 1412 for_each_event_constraint(c, x86_pmu.event_constraints) {
1395 if ((event->hw.config & c->cmask) == c->code) 1413 if ((event->hw.config & c->cmask) == c->code) {
1414 /* hw.flags zeroed at initialization */
1415 event->hw.flags |= c->flags;
1396 return c; 1416 return c;
1417 }
1397 } 1418 }
1398 } 1419 }
1399 1420
@@ -1438,6 +1459,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1438static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1459static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1439 struct perf_event *event) 1460 struct perf_event *event)
1440{ 1461{
1462 event->hw.flags = 0;
1441 intel_put_shared_regs_event_constraints(cpuc, event); 1463 intel_put_shared_regs_event_constraints(cpuc, event);
1442} 1464}
1443 1465
@@ -1761,6 +1783,8 @@ static void intel_pmu_flush_branch_stack(void)
1761 1783
1762PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 1784PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
1763 1785
1786PMU_FORMAT_ATTR(ldlat, "config1:0-15");
1787
1764static struct attribute *intel_arch3_formats_attr[] = { 1788static struct attribute *intel_arch3_formats_attr[] = {
1765 &format_attr_event.attr, 1789 &format_attr_event.attr,
1766 &format_attr_umask.attr, 1790 &format_attr_umask.attr,
@@ -1771,6 +1795,7 @@ static struct attribute *intel_arch3_formats_attr[] = {
1771 &format_attr_cmask.attr, 1795 &format_attr_cmask.attr,
1772 1796
1773 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 1797 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1798 &format_attr_ldlat.attr, /* PEBS load latency */
1774 NULL, 1799 NULL,
1775}; 1800};
1776 1801
@@ -2031,6 +2056,8 @@ __init int intel_pmu_init(void)
2031 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 2056 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
2032 x86_pmu.extra_regs = intel_nehalem_extra_regs; 2057 x86_pmu.extra_regs = intel_nehalem_extra_regs;
2033 2058
2059 x86_pmu.cpu_events = nhm_events_attrs;
2060
2034 /* UOPS_ISSUED.STALLED_CYCLES */ 2061 /* UOPS_ISSUED.STALLED_CYCLES */
2035 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2062 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2036 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2063 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2074,6 +2101,8 @@ __init int intel_pmu_init(void)
2074 x86_pmu.extra_regs = intel_westmere_extra_regs; 2101 x86_pmu.extra_regs = intel_westmere_extra_regs;
2075 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2102 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2076 2103
2104 x86_pmu.cpu_events = nhm_events_attrs;
2105
2077 /* UOPS_ISSUED.STALLED_CYCLES */ 2106 /* UOPS_ISSUED.STALLED_CYCLES */
2078 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2107 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2079 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2108 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2102,6 +2131,8 @@ __init int intel_pmu_init(void)
2102 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2131 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2103 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2132 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2104 2133
2134 x86_pmu.cpu_events = snb_events_attrs;
2135
2105 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2136 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2106 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2137 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2107 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2138 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2128,6 +2159,8 @@ __init int intel_pmu_init(void)
2128 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2159 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2129 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2160 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2130 2161
2162 x86_pmu.cpu_events = snb_events_attrs;
2163
2131 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2164 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2132 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2165 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2133 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2166 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 826054a4f2ee..36dc13d1ad02 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -24,6 +24,130 @@ struct pebs_record_32 {
24 24
25 */ 25 */
26 26
27union intel_x86_pebs_dse {
28 u64 val;
29 struct {
30 unsigned int ld_dse:4;
31 unsigned int ld_stlb_miss:1;
32 unsigned int ld_locked:1;
33 unsigned int ld_reserved:26;
34 };
35 struct {
36 unsigned int st_l1d_hit:1;
37 unsigned int st_reserved1:3;
38 unsigned int st_stlb_miss:1;
39 unsigned int st_locked:1;
40 unsigned int st_reserved2:26;
41 };
42};
43
44
45/*
46 * Map PEBS Load Latency Data Source encodings to generic
47 * memory data source information
48 */
49#define P(a, b) PERF_MEM_S(a, b)
50#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
51#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
52
53static const u64 pebs_data_source[] = {
54 P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
55 OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
56 OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
57 OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
58 OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
59 OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
60 OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
61 OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
62 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
63 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
64 OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
65 OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
66 OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
67 OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
68 OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
69 OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
70};
71
72static u64 precise_store_data(u64 status)
73{
74 union intel_x86_pebs_dse dse;
75 u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
76
77 dse.val = status;
78
79 /*
80 * bit 4: TLB access
81 * 1 = stored missed 2nd level TLB
82 *
83 * so it either hit the walker or the OS
84 * otherwise hit 2nd level TLB
85 */
86 if (dse.st_stlb_miss)
87 val |= P(TLB, MISS);
88 else
89 val |= P(TLB, HIT);
90
91 /*
92 * bit 0: hit L1 data cache
93 * if not set, then all we know is that
94 * it missed L1D
95 */
96 if (dse.st_l1d_hit)
97 val |= P(LVL, HIT);
98 else
99 val |= P(LVL, MISS);
100
101 /*
102 * bit 5: Locked prefix
103 */
104 if (dse.st_locked)
105 val |= P(LOCK, LOCKED);
106
107 return val;
108}
109
110static u64 load_latency_data(u64 status)
111{
112 union intel_x86_pebs_dse dse;
113 u64 val;
114 int model = boot_cpu_data.x86_model;
115 int fam = boot_cpu_data.x86;
116
117 dse.val = status;
118
119 /*
120 * use the mapping table for bit 0-3
121 */
122 val = pebs_data_source[dse.ld_dse];
123
124 /*
125 * Nehalem models do not support TLB, Lock infos
126 */
127 if (fam == 0x6 && (model == 26 || model == 30
128 || model == 31 || model == 46)) {
129 val |= P(TLB, NA) | P(LOCK, NA);
130 return val;
131 }
132 /*
133 * bit 4: TLB access
134 * 0 = did not miss 2nd level TLB
135 * 1 = missed 2nd level TLB
136 */
137 if (dse.ld_stlb_miss)
138 val |= P(TLB, MISS) | P(TLB, L2);
139 else
140 val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
141
142 /*
143 * bit 5: locked prefix
144 */
145 if (dse.ld_locked)
146 val |= P(LOCK, LOCKED);
147
148 return val;
149}
150
27struct pebs_record_core { 151struct pebs_record_core {
28 u64 flags, ip; 152 u64 flags, ip;
29 u64 ax, bx, cx, dx; 153 u64 ax, bx, cx, dx;
@@ -364,7 +488,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
364}; 488};
365 489
366struct event_constraint intel_nehalem_pebs_event_constraints[] = { 490struct event_constraint intel_nehalem_pebs_event_constraints[] = {
367 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 491 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
368 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 492 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
369 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 493 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
370 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ 494 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
@@ -379,7 +503,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
379}; 503};
380 504
381struct event_constraint intel_westmere_pebs_event_constraints[] = { 505struct event_constraint intel_westmere_pebs_event_constraints[] = {
382 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 506 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
383 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 507 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
384 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 508 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
385 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ 509 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
@@ -399,7 +523,8 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
399 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 523 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 524 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 525 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 526 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
527 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 528 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 529 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 530 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -413,7 +538,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
413 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 538 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
414 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 539 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
415 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 540 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
416 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 541 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
542 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
417 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 543 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
418 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 544 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
419 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 545 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -430,8 +556,10 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
430 556
431 if (x86_pmu.pebs_constraints) { 557 if (x86_pmu.pebs_constraints) {
432 for_each_event_constraint(c, x86_pmu.pebs_constraints) { 558 for_each_event_constraint(c, x86_pmu.pebs_constraints) {
433 if ((event->hw.config & c->cmask) == c->code) 559 if ((event->hw.config & c->cmask) == c->code) {
560 event->hw.flags |= c->flags;
434 return c; 561 return c;
562 }
435 } 563 }
436 } 564 }
437 565
@@ -446,6 +574,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
446 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 574 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
447 575
448 cpuc->pebs_enabled |= 1ULL << hwc->idx; 576 cpuc->pebs_enabled |= 1ULL << hwc->idx;
577
578 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
579 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
580 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
581 cpuc->pebs_enabled |= 1ULL << 63;
449} 582}
450 583
451void intel_pmu_pebs_disable(struct perf_event *event) 584void intel_pmu_pebs_disable(struct perf_event *event)
@@ -558,20 +691,51 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
558 struct pt_regs *iregs, void *__pebs) 691 struct pt_regs *iregs, void *__pebs)
559{ 692{
560 /* 693 /*
561 * We cast to pebs_record_core since that is a subset of 694 * We cast to pebs_record_nhm to get the load latency data
562 * both formats and we don't use the other fields in this 695 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
563 * routine.
564 */ 696 */
565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 697 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
566 struct pebs_record_core *pebs = __pebs; 698 struct pebs_record_nhm *pebs = __pebs;
567 struct perf_sample_data data; 699 struct perf_sample_data data;
568 struct pt_regs regs; 700 struct pt_regs regs;
701 u64 sample_type;
702 int fll, fst;
569 703
570 if (!intel_pmu_save_and_restart(event)) 704 if (!intel_pmu_save_and_restart(event))
571 return; 705 return;
572 706
707 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
708 fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
709
573 perf_sample_data_init(&data, 0, event->hw.last_period); 710 perf_sample_data_init(&data, 0, event->hw.last_period);
574 711
712 data.period = event->hw.last_period;
713 sample_type = event->attr.sample_type;
714
715 /*
716 * if PEBS-LL or PreciseStore
717 */
718 if (fll || fst) {
719 if (sample_type & PERF_SAMPLE_ADDR)
720 data.addr = pebs->dla;
721
722 /*
723 * Use latency for weight (only avail with PEBS-LL)
724 */
725 if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
726 data.weight = pebs->lat;
727
728 /*
729 * data.data_src encodes the data source
730 */
731 if (sample_type & PERF_SAMPLE_DATA_SRC) {
732 if (fll)
733 data.data_src.val = load_latency_data(pebs->dse);
734 else
735 data.data_src.val = precise_store_data(pebs->dse);
736 }
737 }
738
575 /* 739 /*
576 * We use the interrupt regs as a base because the PEBS record 740 * We use the interrupt regs as a base because the PEBS record
577 * does not contain a full regs set, specifically it seems to 741 * does not contain a full regs set, specifically it seems to
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index b43200dbfe7e..75da9e18b128 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
2438 2438
2439 type->unconstrainted = (struct event_constraint) 2439 type->unconstrainted = (struct event_constraint)
2440 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 2440 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
2441 0, type->num_counters, 0); 2441 0, type->num_counters, 0, 0);
2442 2442
2443 for (i = 0; i < type->num_boxes; i++) { 2443 for (i = 0; i < type->num_boxes; i++) {
2444 pmus[i].func_id = -1; 2444 pmus[i].func_id = -1;