aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLin Ming <ming.m.lin@intel.com>2010-03-18 06:33:12 -0400
committerIngo Molnar <mingo@elte.hu>2010-03-18 12:04:02 -0400
commitcb7d6b5053e86598735d9af19930f5929f007b7f (patch)
treeb707d913470443c05bd637d4b7fbab3926e0ba1c
parentf34edbc1cdb0f8f83d94e1d668dd6e41abf0defb (diff)
perf, x86: Add cache events for the Pentium-4 PMU
Move the HT bit setting code from p4_pmu_event_map to p4_hw_config. So the cache events can get HT bit set correctly. Tested on my P4 desktop, below 6 cache events work: L1-dcache-load-misses LLC-load-misses dTLB-load-misses dTLB-store-misses iTLB-loads iTLB-load-misses Signed-off-by: Lin Ming <ming.m.lin@intel.com> Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Peter Zijlstra <peterz@infradead.org> LKML-Reference: <1268908392.13901.128.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h10
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c153
3 files changed, 159 insertions, 6 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1cd58cdbc03f..aef562c0a647 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -357,6 +357,8 @@
357#define MSR_P4_U2L_ESCR0 0x000003b0 357#define MSR_P4_U2L_ESCR0 0x000003b0
358#define MSR_P4_U2L_ESCR1 0x000003b1 358#define MSR_P4_U2L_ESCR1 0x000003b1
359 359
360#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
361
360/* Intel Core-based CPU performance counters */ 362/* Intel Core-based CPU performance counters */
361#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 363#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
362#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a 364#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 7d3406a2773c..871249cf4d2b 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -708,4 +708,14 @@ enum P4_EVENTS_ATTR {
708 P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), 708 P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1),
709}; 709};
710 710
711enum {
712 KEY_P4_L1D_OP_READ_RESULT_MISS,
713 KEY_P4_LL_OP_READ_RESULT_MISS,
714 KEY_P4_DTLB_OP_READ_RESULT_MISS,
715 KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
716 KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
717 KEY_P4_ITLB_OP_READ_RESULT_MISS,
718 KEY_P4_UOP_TYPE,
719};
720
711#endif /* PERF_EVENT_P4_H */ 721#endif /* PERF_EVENT_P4_H */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 3e97ed3904cc..b7bf9911198c 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -19,6 +19,11 @@ struct p4_event_template {
19 u64 config; /* packed predefined bits */ 19 u64 config; /* packed predefined bits */
20 int dep; /* upstream dependency event index */ 20 int dep; /* upstream dependency event index */
21 int key; /* index into p4_templates */ 21 int key; /* index into p4_templates */
22 u64 msr; /*
23 * the high 32 bits set into MSR_IA32_PEBS_ENABLE and
24 * the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT
25 * for cache events
26 */
22 unsigned int emask; /* ESCR EventMask */ 27 unsigned int emask; /* ESCR EventMask */
23 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 28 unsigned int escr_msr[2]; /* ESCR MSR for this event */
24 unsigned int cntr[2]; /* counter index (offset) */ 29 unsigned int cntr[2]; /* counter index (offset) */
@@ -31,6 +36,67 @@ struct p4_pmu_res {
31 36
32static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); 37static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config);
33 38
39#define P4_CACHE_EVENT_CONFIG(event, bit) \
40 p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \
41 p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \
42 p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT)
43
44static __initconst u64 p4_hw_cache_event_ids
45 [PERF_COUNT_HW_CACHE_MAX]
46 [PERF_COUNT_HW_CACHE_OP_MAX]
47 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
48{
49 [ C(L1D ) ] = {
50 [ C(OP_READ) ] = {
51 [ C(RESULT_ACCESS) ] = 0x0,
52 /* 1stL_cache_load_miss_retired */
53 [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
54 | KEY_P4_L1D_OP_READ_RESULT_MISS,
55 },
56 },
57 [ C(LL ) ] = {
58 [ C(OP_READ) ] = {
59 [ C(RESULT_ACCESS) ] = 0x0,
60 /* 2ndL_cache_load_miss_retired */
61 [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
62 | KEY_P4_LL_OP_READ_RESULT_MISS,
63 },
64 },
65 [ C(DTLB) ] = {
66 [ C(OP_READ) ] = {
67 [ C(RESULT_ACCESS) ] = 0x0,
68 /* DTLB_load_miss_retired */
69 [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
70 | KEY_P4_DTLB_OP_READ_RESULT_MISS,
71 },
72 [ C(OP_WRITE) ] = {
73 [ C(RESULT_ACCESS) ] = 0x0,
74 /* DTLB_store_miss_retired */
75 [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
76 | KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
77 },
78 },
79 [ C(ITLB) ] = {
80 [ C(OP_READ) ] = {
81 /* ITLB_reference.HIT */
82 [ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT)
83 | KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
84
85 /* ITLB_reference.MISS */
86 [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS)
87 | KEY_P4_ITLB_OP_READ_RESULT_MISS,
88 },
89 [ C(OP_WRITE) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 [ C(OP_PREFETCH) ] = {
94 [ C(RESULT_ACCESS) ] = -1,
95 [ C(RESULT_MISS) ] = -1,
96 },
97 },
98};
99
34/* 100/*
35 * WARN: CCCR1 doesn't have a working enable bit so try to not 101 * WARN: CCCR1 doesn't have a working enable bit so try to not
36 * use it if possible 102 * use it if possible
@@ -121,11 +187,77 @@ struct p4_event_template p4_templates[] = {
121 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 187 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
122 .cntr = { 0, 2 }, 188 .cntr = { 0, 2 },
123 }, 189 },
124 [7] = { 190 [KEY_P4_L1D_OP_READ_RESULT_MISS] = {
191 .opcode = P4_REPLAY_EVENT,
192 .config = 0,
193 .dep = -1,
194 .msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0),
195 .key = KEY_P4_L1D_OP_READ_RESULT_MISS,
196 .emask =
197 P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
198 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
199 .cntr = { 16, 17 },
200 },
201 [KEY_P4_LL_OP_READ_RESULT_MISS] = {
202 .opcode = P4_REPLAY_EVENT,
203 .config = 0,
204 .dep = -1,
205 .msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0),
206 .key = KEY_P4_LL_OP_READ_RESULT_MISS,
207 .emask =
208 P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
209 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
210 .cntr = { 16, 17 },
211 },
212 [KEY_P4_DTLB_OP_READ_RESULT_MISS] = {
213 .opcode = P4_REPLAY_EVENT,
214 .config = 0,
215 .dep = -1,
216 .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0),
217 .key = KEY_P4_DTLB_OP_READ_RESULT_MISS,
218 .emask =
219 P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
220 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
221 .cntr = { 16, 17 },
222 },
223 [KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = {
224 .opcode = P4_REPLAY_EVENT,
225 .config = 0,
226 .dep = -1,
227 .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1),
228 .key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
229 .emask =
230 P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
231 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
232 .cntr = { 16, 17 },
233 },
234 [KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = {
235 .opcode = P4_ITLB_REFERENCE,
236 .config = 0,
237 .dep = -1,
238 .msr = 0,
239 .key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
240 .emask =
241 P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT),
242 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
243 .cntr = { 0, 2 },
244 },
245 [KEY_P4_ITLB_OP_READ_RESULT_MISS] = {
246 .opcode = P4_ITLB_REFERENCE,
247 .config = 0,
248 .dep = -1,
249 .msr = 0,
250 .key = KEY_P4_ITLB_OP_READ_RESULT_MISS,
251 .emask =
252 P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS),
253 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
254 .cntr = { 0, 2 },
255 },
256 [KEY_P4_UOP_TYPE] = {
125 .opcode = P4_UOP_TYPE, 257 .opcode = P4_UOP_TYPE,
126 .config = 0, 258 .config = 0,
127 .dep = -1, 259 .dep = -1,
128 .key = 7, 260 .key = KEY_P4_UOP_TYPE,
129 .emask = 261 .emask =
130 P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | 262 P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) |
131 P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), 263 P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
@@ -155,10 +287,6 @@ static u64 p4_pmu_event_map(int hw_event)
155 config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); 287 config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT);
156 config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); 288 config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED);
157 289
158 /* on HT machine we need a special bit */
159 if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id()))
160 config = p4_set_ht_bit(config);
161
162 return config; 290 return config;
163} 291}
164 292
@@ -211,6 +339,10 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
211 /* Count user and OS events unless not requested to */ 339 /* Count user and OS events unless not requested to */
212 hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, 340 hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel,
213 attr->exclude_user)); 341 attr->exclude_user));
342 /* on HT machine we need a special bit */
343 if (p4_ht_active() && p4_ht_thread(cpu))
344 hwc->config = p4_set_ht_bit(hwc->config);
345
214 return 0; 346 return 0;
215} 347}
216 348
@@ -271,6 +403,12 @@ static void p4_pmu_enable_event(struct perf_event *event)
271 pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); 403 pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx);
272 return; 404 return;
273 } 405 }
406
407 if (tpl->msr) {
408 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32);
409 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff);
410 }
411
274 escr_base = (u64)tpl->escr_msr[thread]; 412 escr_base = (u64)tpl->escr_msr[thread];
275 413
276 /* 414 /*
@@ -577,6 +715,9 @@ static __init int p4_pmu_init(void)
577 return -ENODEV; 715 return -ENODEV;
578 } 716 }
579 717
718 memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
719 sizeof(hw_cache_event_ids));
720
580 pr_cont("Netburst events, "); 721 pr_cont("Netburst events, ");
581 722
582 x86_pmu = p4_pmu; 723 x86_pmu = p4_pmu;