aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2013-01-24 10:10:32 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-04-01 11:16:31 -0400
commitf20093eef5f7843a25adfc0512617d4b1ff1aa6e (patch)
tree1e1d008f98adab4477e3803ed24f3f2a22b34aaf /arch/x86
parentd6be9ad6c960f43800a6f118932bc8a5a4eadcd1 (diff)
perf/x86: Add memory profiling via PEBS Load Latency
This patch adds support for memory profiling using the PEBS Load Latency facility. Load accesses are sampled by HW and the instruction address, data address, load latency, data source, tlb, locked information can be saved in the sampling buffer if using the PERF_SAMPLE_COST (for latency), PERF_SAMPLE_ADDR, PERF_SAMPLE_DATA_SRC types. To enable PEBS Load Latency, users have to use the model specific event: - on NHM/WSM: MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD - on SNB/IVB: MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD To make things easier, this patch also exports a generic alias via sysfs: mem-loads. It export the right event encoding based on the host CPU and can be used directly by the perf tool. Loosely based on Intel's Lin Ming patch posted on LKML in July 2011. Signed-off-by: Stephane Eranian <eranian@google.com> Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-9-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c24
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c133
5 files changed, 178 insertions, 10 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 892ce40a7470..b31798d5e62e 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -71,6 +71,7 @@
71#define MSR_IA32_PEBS_ENABLE 0x000003f1 71#define MSR_IA32_PEBS_ENABLE 0x000003f1
72#define MSR_IA32_DS_AREA 0x00000600 72#define MSR_IA32_DS_AREA 0x00000600
73#define MSR_IA32_PERF_CAPABILITIES 0x00000345 73#define MSR_IA32_PERF_CAPABILITIES 0x00000345
74#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
74 75
75#define MSR_MTRRfix64K_00000 0x00000250 76#define MSR_MTRRfix64K_00000 0x00000250
76#define MSR_MTRRfix16K_80000 0x00000258 77#define MSR_MTRRfix16K_80000 0x00000258
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8ba51518f689..5ed7a4c5baf7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1363,7 +1363,7 @@ static __init struct attribute **merge_attr(struct attribute **a, struct attribu
1363 return new; 1363 return new;
1364} 1364}
1365 1365
1366static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, 1366ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
1367 char *page) 1367 char *page)
1368{ 1368{
1369 struct perf_pmu_events_attr *pmu_attr = \ 1369 struct perf_pmu_events_attr *pmu_attr = \
@@ -1494,6 +1494,9 @@ static int __init init_hw_perf_events(void)
1494 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ 1494 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1495 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1495 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1496 1496
1497 if (x86_pmu.event_attrs)
1498 x86_pmu_events_group.attrs = x86_pmu.event_attrs;
1499
1497 if (!x86_pmu.events_sysfs_show) 1500 if (!x86_pmu.events_sysfs_show)
1498 x86_pmu_events_group.attrs = &empty_attrs; 1501 x86_pmu_events_group.attrs = &empty_attrs;
1499 else 1502 else
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 9686d38eb458..f3a9a94e4d22 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -46,6 +46,7 @@ enum extra_reg_type {
46 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ 46 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
48 EXTRA_REG_LBR = 2, /* lbr_select */ 48 EXTRA_REG_LBR = 2, /* lbr_select */
49 EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
49 50
50 EXTRA_REG_MAX /* number of entries needed */ 51 EXTRA_REG_MAX /* number of entries needed */
51}; 52};
@@ -61,6 +62,10 @@ struct event_constraint {
61 int overlap; 62 int overlap;
62 int flags; 63 int flags;
63}; 64};
65/*
66 * struct event_constraint flags
67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
64 69
65struct amd_nb { 70struct amd_nb {
66 int nb_id; /* NorthBridge id */ 71 int nb_id; /* NorthBridge id */
@@ -233,6 +238,10 @@ struct cpu_hw_events {
233#define INTEL_UEVENT_CONSTRAINT(c, n) \ 238#define INTEL_UEVENT_CONSTRAINT(c, n) \
234 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 239 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
235 240
241#define INTEL_PLD_CONSTRAINT(c, n) \
242 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
243 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
244
236#define EVENT_CONSTRAINT_END \ 245#define EVENT_CONSTRAINT_END \
237 EVENT_CONSTRAINT(0, 0, 0) 246 EVENT_CONSTRAINT(0, 0, 0)
238 247
@@ -262,12 +271,22 @@ struct extra_reg {
262 .msr = (ms), \ 271 .msr = (ms), \
263 .config_mask = (m), \ 272 .config_mask = (m), \
264 .valid_mask = (vm), \ 273 .valid_mask = (vm), \
265 .idx = EXTRA_REG_##i \ 274 .idx = EXTRA_REG_##i, \
266 } 275 }
267 276
268#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ 277#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
269 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) 278 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
270 279
280#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
281 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
282 ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
283
284#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
285 INTEL_UEVENT_EXTRA_REG(c, \
286 MSR_PEBS_LD_LAT_THRESHOLD, \
287 0xffff, \
288 LDLAT)
289
271#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) 290#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
272 291
273union perf_capabilities { 292union perf_capabilities {
@@ -357,6 +376,7 @@ struct x86_pmu {
357 */ 376 */
358 int attr_rdpmc; 377 int attr_rdpmc;
359 struct attribute **format_attrs; 378 struct attribute **format_attrs;
379 struct attribute **event_attrs;
360 380
361 ssize_t (*events_sysfs_show)(char *page, u64 config); 381 ssize_t (*events_sysfs_show)(char *page, u64 config);
362 struct attribute **cpu_events; 382 struct attribute **cpu_events;
@@ -648,6 +668,9 @@ int p6_pmu_init(void);
648 668
649int knc_pmu_init(void); 669int knc_pmu_init(void);
650 670
671ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
672 char *page);
673
651#else /* CONFIG_CPU_SUP_INTEL */ 674#else /* CONFIG_CPU_SUP_INTEL */
652 675
653static inline void reserve_ds_buffers(void) 676static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index df3beaac3397..d5ea5a03cd37 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
82{ 82{
83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
84 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
84 EVENT_EXTRA_END 85 EVENT_EXTRA_END
85}; 86};
86 87
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
136{ 137{
137 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 138 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
138 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 139 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
140 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
139 EVENT_EXTRA_END 141 EVENT_EXTRA_END
140}; 142};
141 143
@@ -155,9 +157,23 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
155static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 157static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
156 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), 158 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
157 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), 159 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
160 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
158 EVENT_EXTRA_END 161 EVENT_EXTRA_END
159}; 162};
160 163
164EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
165EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
166
167struct attribute *nhm_events_attrs[] = {
168 EVENT_PTR(mem_ld_nhm),
169 NULL,
170};
171
172struct attribute *snb_events_attrs[] = {
173 EVENT_PTR(mem_ld_snb),
174 NULL,
175};
176
161static u64 intel_pmu_event_map(int hw_event) 177static u64 intel_pmu_event_map(int hw_event)
162{ 178{
163 return intel_perfmon_event_map[hw_event]; 179 return intel_perfmon_event_map[hw_event];
@@ -2035,6 +2051,8 @@ __init int intel_pmu_init(void)
2035 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 2051 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
2036 x86_pmu.extra_regs = intel_nehalem_extra_regs; 2052 x86_pmu.extra_regs = intel_nehalem_extra_regs;
2037 2053
2054 x86_pmu.cpu_events = nhm_events_attrs;
2055
2038 /* UOPS_ISSUED.STALLED_CYCLES */ 2056 /* UOPS_ISSUED.STALLED_CYCLES */
2039 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2057 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2040 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2058 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2078,6 +2096,8 @@ __init int intel_pmu_init(void)
2078 x86_pmu.extra_regs = intel_westmere_extra_regs; 2096 x86_pmu.extra_regs = intel_westmere_extra_regs;
2079 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2097 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2080 2098
2099 x86_pmu.cpu_events = nhm_events_attrs;
2100
2081 /* UOPS_ISSUED.STALLED_CYCLES */ 2101 /* UOPS_ISSUED.STALLED_CYCLES */
2082 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2102 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2083 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2103 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2106,6 +2126,8 @@ __init int intel_pmu_init(void)
2106 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2126 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2107 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2127 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2108 2128
2129 x86_pmu.cpu_events = snb_events_attrs;
2130
2109 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2131 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2110 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2132 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2111 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2133 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2132,6 +2154,8 @@ __init int intel_pmu_init(void)
2132 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2154 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2133 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2155 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2134 2156
2157 x86_pmu.cpu_events = snb_events_attrs;
2158
2135 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2159 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2136 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2160 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2137 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2161 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index f30d85bcbda9..a6400bd0463c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -24,6 +24,92 @@ struct pebs_record_32 {
24 24
25 */ 25 */
26 26
27union intel_x86_pebs_dse {
28 u64 val;
29 struct {
30 unsigned int ld_dse:4;
31 unsigned int ld_stlb_miss:1;
32 unsigned int ld_locked:1;
33 unsigned int ld_reserved:26;
34 };
35 struct {
36 unsigned int st_l1d_hit:1;
37 unsigned int st_reserved1:3;
38 unsigned int st_stlb_miss:1;
39 unsigned int st_locked:1;
40 unsigned int st_reserved2:26;
41 };
42};
43
44
45/*
46 * Map PEBS Load Latency Data Source encodings to generic
47 * memory data source information
48 */
49#define P(a, b) PERF_MEM_S(a, b)
50#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
51#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
52
53static const u64 pebs_data_source[] = {
54 P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
55 OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
56 OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
57 OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
58 OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
59 OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
60 OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
61 OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
62 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
63 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
64 OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
65 OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
66 OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
67 OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
68 OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
69 OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
70};
71
72static u64 load_latency_data(u64 status)
73{
74 union intel_x86_pebs_dse dse;
75 u64 val;
76 int model = boot_cpu_data.x86_model;
77 int fam = boot_cpu_data.x86;
78
79 dse.val = status;
80
81 /*
82 * use the mapping table for bit 0-3
83 */
84 val = pebs_data_source[dse.ld_dse];
85
86 /*
87 * Nehalem models do not support TLB, Lock infos
88 */
89 if (fam == 0x6 && (model == 26 || model == 30
90 || model == 31 || model == 46)) {
91 val |= P(TLB, NA) | P(LOCK, NA);
92 return val;
93 }
94 /*
95 * bit 4: TLB access
96 * 0 = did not miss 2nd level TLB
97 * 1 = missed 2nd level TLB
98 */
99 if (dse.ld_stlb_miss)
100 val |= P(TLB, MISS) | P(TLB, L2);
101 else
102 val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
103
104 /*
105 * bit 5: locked prefix
106 */
107 if (dse.ld_locked)
108 val |= P(LOCK, LOCKED);
109
110 return val;
111}
112
27struct pebs_record_core { 113struct pebs_record_core {
28 u64 flags, ip; 114 u64 flags, ip;
29 u64 ax, bx, cx, dx; 115 u64 ax, bx, cx, dx;
@@ -364,7 +450,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
364}; 450};
365 451
366struct event_constraint intel_nehalem_pebs_event_constraints[] = { 452struct event_constraint intel_nehalem_pebs_event_constraints[] = {
367 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 453 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
368 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 454 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
369 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 455 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
370 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ 456 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
@@ -379,7 +465,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
379}; 465};
380 466
381struct event_constraint intel_westmere_pebs_event_constraints[] = { 467struct event_constraint intel_westmere_pebs_event_constraints[] = {
382 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 468 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
383 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 469 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
384 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 470 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
385 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ 471 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
@@ -399,7 +485,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
399 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 485 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 486 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 487 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 488 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 489 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 490 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 491 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -413,7 +499,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
413 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 499 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
414 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 500 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
415 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 501 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
416 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 502 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
417 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 503 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
418 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 504 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
419 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 505 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -448,6 +534,9 @@ void intel_pmu_pebs_enable(struct perf_event *event)
448 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 534 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
449 535
450 cpuc->pebs_enabled |= 1ULL << hwc->idx; 536 cpuc->pebs_enabled |= 1ULL << hwc->idx;
537
538 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
539 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
451} 540}
452 541
453void intel_pmu_pebs_disable(struct perf_event *event) 542void intel_pmu_pebs_disable(struct perf_event *event)
@@ -560,20 +649,48 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
560 struct pt_regs *iregs, void *__pebs) 649 struct pt_regs *iregs, void *__pebs)
561{ 650{
562 /* 651 /*
563 * We cast to pebs_record_core since that is a subset of 652 * We cast to pebs_record_nhm to get the load latency data
564 * both formats and we don't use the other fields in this 653 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
565 * routine.
566 */ 654 */
567 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 655 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
568 struct pebs_record_core *pebs = __pebs; 656 struct pebs_record_nhm *pebs = __pebs;
569 struct perf_sample_data data; 657 struct perf_sample_data data;
570 struct pt_regs regs; 658 struct pt_regs regs;
659 u64 sample_type;
660 int fll;
571 661
572 if (!intel_pmu_save_and_restart(event)) 662 if (!intel_pmu_save_and_restart(event))
573 return; 663 return;
574 664
665 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
666
575 perf_sample_data_init(&data, 0, event->hw.last_period); 667 perf_sample_data_init(&data, 0, event->hw.last_period);
576 668
669 data.period = event->hw.last_period;
670 sample_type = event->attr.sample_type;
671
672 /*
673 * if PEBS-LL or PreciseStore
674 */
675 if (fll) {
676 if (sample_type & PERF_SAMPLE_ADDR)
677 data.addr = pebs->dla;
678
679 /*
680 * Use latency for weight (only avail with PEBS-LL)
681 */
682 if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
683 data.weight = pebs->lat;
684
685 /*
686 * data.data_src encodes the data source
687 */
688 if (sample_type & PERF_SAMPLE_DATA_SRC) {
689 if (fll)
690 data.data_src.val = load_latency_data(pebs->dse);
691 }
692 }
693
577 /* 694 /*
578 * We use the interrupt regs as a base because the PEBS record 695 * We use the interrupt regs as a base because the PEBS record
579 * does not contain a full regs set, specifically it seems to 696 * does not contain a full regs set, specifically it seems to