aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c24
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c133
5 files changed, 178 insertions, 10 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 892ce40a7470..b31798d5e62e 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -71,6 +71,7 @@
71#define MSR_IA32_PEBS_ENABLE 0x000003f1 71#define MSR_IA32_PEBS_ENABLE 0x000003f1
72#define MSR_IA32_DS_AREA 0x00000600 72#define MSR_IA32_DS_AREA 0x00000600
73#define MSR_IA32_PERF_CAPABILITIES 0x00000345 73#define MSR_IA32_PERF_CAPABILITIES 0x00000345
74#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
74 75
75#define MSR_MTRRfix64K_00000 0x00000250 76#define MSR_MTRRfix64K_00000 0x00000250
76#define MSR_MTRRfix16K_80000 0x00000258 77#define MSR_MTRRfix16K_80000 0x00000258
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8ba51518f689..5ed7a4c5baf7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1363,7 +1363,7 @@ static __init struct attribute **merge_attr(struct attribute **a, struct attribu
1363 return new; 1363 return new;
1364} 1364}
1365 1365
1366static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, 1366ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
1367 char *page) 1367 char *page)
1368{ 1368{
1369 struct perf_pmu_events_attr *pmu_attr = \ 1369 struct perf_pmu_events_attr *pmu_attr = \
@@ -1494,6 +1494,9 @@ static int __init init_hw_perf_events(void)
1494 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ 1494 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1495 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1495 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1496 1496
1497 if (x86_pmu.event_attrs)
1498 x86_pmu_events_group.attrs = x86_pmu.event_attrs;
1499
1497 if (!x86_pmu.events_sysfs_show) 1500 if (!x86_pmu.events_sysfs_show)
1498 x86_pmu_events_group.attrs = &empty_attrs; 1501 x86_pmu_events_group.attrs = &empty_attrs;
1499 else 1502 else
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 9686d38eb458..f3a9a94e4d22 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -46,6 +46,7 @@ enum extra_reg_type {
46 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ 46 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
48 EXTRA_REG_LBR = 2, /* lbr_select */ 48 EXTRA_REG_LBR = 2, /* lbr_select */
49 EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
49 50
50 EXTRA_REG_MAX /* number of entries needed */ 51 EXTRA_REG_MAX /* number of entries needed */
51}; 52};
@@ -61,6 +62,10 @@ struct event_constraint {
61 int overlap; 62 int overlap;
62 int flags; 63 int flags;
63}; 64};
65/*
66 * struct event_constraint flags
67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
64 69
65struct amd_nb { 70struct amd_nb {
66 int nb_id; /* NorthBridge id */ 71 int nb_id; /* NorthBridge id */
@@ -233,6 +238,10 @@ struct cpu_hw_events {
233#define INTEL_UEVENT_CONSTRAINT(c, n) \ 238#define INTEL_UEVENT_CONSTRAINT(c, n) \
234 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 239 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
235 240
241#define INTEL_PLD_CONSTRAINT(c, n) \
242 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
243 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
244
236#define EVENT_CONSTRAINT_END \ 245#define EVENT_CONSTRAINT_END \
237 EVENT_CONSTRAINT(0, 0, 0) 246 EVENT_CONSTRAINT(0, 0, 0)
238 247
@@ -262,12 +271,22 @@ struct extra_reg {
262 .msr = (ms), \ 271 .msr = (ms), \
263 .config_mask = (m), \ 272 .config_mask = (m), \
264 .valid_mask = (vm), \ 273 .valid_mask = (vm), \
265 .idx = EXTRA_REG_##i \ 274 .idx = EXTRA_REG_##i, \
266 } 275 }
267 276
268#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ 277#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
269 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) 278 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
270 279
280#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
281 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
282 ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
283
284#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
285 INTEL_UEVENT_EXTRA_REG(c, \
286 MSR_PEBS_LD_LAT_THRESHOLD, \
287 0xffff, \
288 LDLAT)
289
271#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) 290#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
272 291
273union perf_capabilities { 292union perf_capabilities {
@@ -357,6 +376,7 @@ struct x86_pmu {
357 */ 376 */
358 int attr_rdpmc; 377 int attr_rdpmc;
359 struct attribute **format_attrs; 378 struct attribute **format_attrs;
379 struct attribute **event_attrs;
360 380
361 ssize_t (*events_sysfs_show)(char *page, u64 config); 381 ssize_t (*events_sysfs_show)(char *page, u64 config);
362 struct attribute **cpu_events; 382 struct attribute **cpu_events;
@@ -648,6 +668,9 @@ int p6_pmu_init(void);
648 668
649int knc_pmu_init(void); 669int knc_pmu_init(void);
650 670
671ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
672 char *page);
673
651#else /* CONFIG_CPU_SUP_INTEL */ 674#else /* CONFIG_CPU_SUP_INTEL */
652 675
653static inline void reserve_ds_buffers(void) 676static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index df3beaac3397..d5ea5a03cd37 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
82{ 82{
83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
84 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
84 EVENT_EXTRA_END 85 EVENT_EXTRA_END
85}; 86};
86 87
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
136{ 137{
137 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 138 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
138 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 139 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
140 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
139 EVENT_EXTRA_END 141 EVENT_EXTRA_END
140}; 142};
141 143
@@ -155,9 +157,23 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
155static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 157static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
156 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), 158 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
157 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), 159 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
160 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
158 EVENT_EXTRA_END 161 EVENT_EXTRA_END
159}; 162};
160 163
164EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
165EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
166
167struct attribute *nhm_events_attrs[] = {
168 EVENT_PTR(mem_ld_nhm),
169 NULL,
170};
171
172struct attribute *snb_events_attrs[] = {
173 EVENT_PTR(mem_ld_snb),
174 NULL,
175};
176
161static u64 intel_pmu_event_map(int hw_event) 177static u64 intel_pmu_event_map(int hw_event)
162{ 178{
163 return intel_perfmon_event_map[hw_event]; 179 return intel_perfmon_event_map[hw_event];
@@ -2035,6 +2051,8 @@ __init int intel_pmu_init(void)
2035 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 2051 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
2036 x86_pmu.extra_regs = intel_nehalem_extra_regs; 2052 x86_pmu.extra_regs = intel_nehalem_extra_regs;
2037 2053
2054 x86_pmu.cpu_events = nhm_events_attrs;
2055
2038 /* UOPS_ISSUED.STALLED_CYCLES */ 2056 /* UOPS_ISSUED.STALLED_CYCLES */
2039 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2057 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2040 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2058 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2078,6 +2096,8 @@ __init int intel_pmu_init(void)
2078 x86_pmu.extra_regs = intel_westmere_extra_regs; 2096 x86_pmu.extra_regs = intel_westmere_extra_regs;
2079 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2097 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2080 2098
2099 x86_pmu.cpu_events = nhm_events_attrs;
2100
2081 /* UOPS_ISSUED.STALLED_CYCLES */ 2101 /* UOPS_ISSUED.STALLED_CYCLES */
2082 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2102 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2083 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2103 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2106,6 +2126,8 @@ __init int intel_pmu_init(void)
2106 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2126 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2107 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2127 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2108 2128
2129 x86_pmu.cpu_events = snb_events_attrs;
2130
2109 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2131 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2110 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2132 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2111 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2133 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2132,6 +2154,8 @@ __init int intel_pmu_init(void)
2132 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2154 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2133 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2155 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2134 2156
2157 x86_pmu.cpu_events = snb_events_attrs;
2158
2135 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2159 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2136 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2160 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2137 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2161 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index f30d85bcbda9..a6400bd0463c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -24,6 +24,92 @@ struct pebs_record_32 {
24 24
25 */ 25 */
26 26
27union intel_x86_pebs_dse {
28 u64 val;
29 struct {
30 unsigned int ld_dse:4;
31 unsigned int ld_stlb_miss:1;
32 unsigned int ld_locked:1;
33 unsigned int ld_reserved:26;
34 };
35 struct {
36 unsigned int st_l1d_hit:1;
37 unsigned int st_reserved1:3;
38 unsigned int st_stlb_miss:1;
39 unsigned int st_locked:1;
40 unsigned int st_reserved2:26;
41 };
42};
43
44
45/*
46 * Map PEBS Load Latency Data Source encodings to generic
47 * memory data source information
48 */
49#define P(a, b) PERF_MEM_S(a, b)
50#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
51#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
52
53static const u64 pebs_data_source[] = {
54 P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
55 OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
56 OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
57 OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
58 OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
59 OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
60 OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
61 OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
62 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
63 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
64 OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
65 OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
66 OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
67 OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
68 OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
69 OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
70};
71
72static u64 load_latency_data(u64 status)
73{
74 union intel_x86_pebs_dse dse;
75 u64 val;
76 int model = boot_cpu_data.x86_model;
77 int fam = boot_cpu_data.x86;
78
79 dse.val = status;
80
81 /*
82 * use the mapping table for bit 0-3
83 */
84 val = pebs_data_source[dse.ld_dse];
85
86 /*
87 * Nehalem models do not support TLB, Lock infos
88 */
89 if (fam == 0x6 && (model == 26 || model == 30
90 || model == 31 || model == 46)) {
91 val |= P(TLB, NA) | P(LOCK, NA);
92 return val;
93 }
94 /*
95 * bit 4: TLB access
96 * 0 = did not miss 2nd level TLB
97 * 1 = missed 2nd level TLB
98 */
99 if (dse.ld_stlb_miss)
100 val |= P(TLB, MISS) | P(TLB, L2);
101 else
102 val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
103
104 /*
105 * bit 5: locked prefix
106 */
107 if (dse.ld_locked)
108 val |= P(LOCK, LOCKED);
109
110 return val;
111}
112
27struct pebs_record_core { 113struct pebs_record_core {
28 u64 flags, ip; 114 u64 flags, ip;
29 u64 ax, bx, cx, dx; 115 u64 ax, bx, cx, dx;
@@ -364,7 +450,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
364}; 450};
365 451
366struct event_constraint intel_nehalem_pebs_event_constraints[] = { 452struct event_constraint intel_nehalem_pebs_event_constraints[] = {
367 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 453 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
368 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 454 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
369 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 455 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
370 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ 456 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
@@ -379,7 +465,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
379}; 465};
380 466
381struct event_constraint intel_westmere_pebs_event_constraints[] = { 467struct event_constraint intel_westmere_pebs_event_constraints[] = {
382 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 468 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
383 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 469 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
384 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 470 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
385 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ 471 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
@@ -399,7 +485,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
399 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 485 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 486 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 487 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 488 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 489 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 490 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 491 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -413,7 +499,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
413 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 499 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
414 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 500 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
415 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 501 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
416 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 502 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
417 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 503 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
418 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 504 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
419 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 505 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -448,6 +534,9 @@ void intel_pmu_pebs_enable(struct perf_event *event)
448 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 534 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
449 535
450 cpuc->pebs_enabled |= 1ULL << hwc->idx; 536 cpuc->pebs_enabled |= 1ULL << hwc->idx;
537
538 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
539 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
451} 540}
452 541
453void intel_pmu_pebs_disable(struct perf_event *event) 542void intel_pmu_pebs_disable(struct perf_event *event)
@@ -560,20 +649,48 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
560 struct pt_regs *iregs, void *__pebs) 649 struct pt_regs *iregs, void *__pebs)
561{ 650{
562 /* 651 /*
563 * We cast to pebs_record_core since that is a subset of 652 * We cast to pebs_record_nhm to get the load latency data
564 * both formats and we don't use the other fields in this 653 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
565 * routine.
566 */ 654 */
567 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 655 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
568 struct pebs_record_core *pebs = __pebs; 656 struct pebs_record_nhm *pebs = __pebs;
569 struct perf_sample_data data; 657 struct perf_sample_data data;
570 struct pt_regs regs; 658 struct pt_regs regs;
659 u64 sample_type;
660 int fll;
571 661
572 if (!intel_pmu_save_and_restart(event)) 662 if (!intel_pmu_save_and_restart(event))
573 return; 663 return;
574 664
665 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
666
575 perf_sample_data_init(&data, 0, event->hw.last_period); 667 perf_sample_data_init(&data, 0, event->hw.last_period);
576 668
669 data.period = event->hw.last_period;
670 sample_type = event->attr.sample_type;
671
672 /*
673 * if PEBS-LL or PreciseStore
674 */
675 if (fll) {
676 if (sample_type & PERF_SAMPLE_ADDR)
677 data.addr = pebs->dla;
678
679 /*
680 * Use latency for weight (only avail with PEBS-LL)
681 */
682 if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
683 data.weight = pebs->lat;
684
685 /*
686 * data.data_src encodes the data source
687 */
688 if (sample_type & PERF_SAMPLE_DATA_SRC) {
689 if (fll)
690 data.data_src.val = load_latency_data(pebs->dse);
691 }
692 }
693
577 /* 694 /*
578 * We use the interrupt regs as a base because the PEBS record 695 * We use the interrupt regs as a base because the PEBS record
579 * does not contain a full regs set, specifically it seems to 696 * does not contain a full regs set, specifically it seems to