diff options
author | Lin Ming <ming.m.lin@intel.com> | 2010-03-18 06:33:12 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-18 12:04:02 -0400 |
commit | cb7d6b5053e86598735d9af19930f5929f007b7f (patch) | |
tree | b707d913470443c05bd637d4b7fbab3926e0ba1c | |
parent | f34edbc1cdb0f8f83d94e1d668dd6e41abf0defb (diff) |
perf, x86: Add cache events for the Pentium-4 PMU
Move the HT bit setting code from p4_pmu_event_map to
p4_hw_config. So the cache events can get HT bit set correctly.
Tested on my P4 desktop, below 6 cache events work:
L1-dcache-load-misses
LLC-load-misses
dTLB-load-misses
dTLB-store-misses
iTLB-loads
iTLB-load-misses
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1268908392.13901.128.camel@minggr.sh.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event_p4.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 153 |
3 files changed, 159 insertions, 6 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1cd58cdbc03f..aef562c0a647 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -357,6 +357,8 @@ | |||
357 | #define MSR_P4_U2L_ESCR0 0x000003b0 | 357 | #define MSR_P4_U2L_ESCR0 0x000003b0 |
358 | #define MSR_P4_U2L_ESCR1 0x000003b1 | 358 | #define MSR_P4_U2L_ESCR1 0x000003b1 |
359 | 359 | ||
360 | #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 | ||
361 | |||
360 | /* Intel Core-based CPU performance counters */ | 362 | /* Intel Core-based CPU performance counters */ |
361 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 | 363 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 |
362 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a | 364 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 7d3406a2773c..871249cf4d2b 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -708,4 +708,14 @@ enum P4_EVENTS_ATTR { | |||
708 | P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), | 708 | P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), |
709 | }; | 709 | }; |
710 | 710 | ||
711 | enum { | ||
712 | KEY_P4_L1D_OP_READ_RESULT_MISS, | ||
713 | KEY_P4_LL_OP_READ_RESULT_MISS, | ||
714 | KEY_P4_DTLB_OP_READ_RESULT_MISS, | ||
715 | KEY_P4_DTLB_OP_WRITE_RESULT_MISS, | ||
716 | KEY_P4_ITLB_OP_READ_RESULT_ACCESS, | ||
717 | KEY_P4_ITLB_OP_READ_RESULT_MISS, | ||
718 | KEY_P4_UOP_TYPE, | ||
719 | }; | ||
720 | |||
711 | #endif /* PERF_EVENT_P4_H */ | 721 | #endif /* PERF_EVENT_P4_H */ |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 3e97ed3904cc..b7bf9911198c 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -19,6 +19,11 @@ struct p4_event_template { | |||
19 | u64 config; /* packed predefined bits */ | 19 | u64 config; /* packed predefined bits */ |
20 | int dep; /* upstream dependency event index */ | 20 | int dep; /* upstream dependency event index */ |
21 | int key; /* index into p4_templates */ | 21 | int key; /* index into p4_templates */ |
22 | u64 msr; /* | ||
23 | * the high 32 bits set into MSR_IA32_PEBS_ENABLE and | ||
24 | * the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT | ||
25 | * for cache events | ||
26 | */ | ||
22 | unsigned int emask; /* ESCR EventMask */ | 27 | unsigned int emask; /* ESCR EventMask */ |
23 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | 28 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ |
24 | unsigned int cntr[2]; /* counter index (offset) */ | 29 | unsigned int cntr[2]; /* counter index (offset) */ |
@@ -31,6 +36,67 @@ struct p4_pmu_res { | |||
31 | 36 | ||
32 | static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); | 37 | static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); |
33 | 38 | ||
39 | #define P4_CACHE_EVENT_CONFIG(event, bit) \ | ||
40 | p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \ | ||
41 | p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \ | ||
42 | p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT) | ||
43 | |||
44 | static __initconst u64 p4_hw_cache_event_ids | ||
45 | [PERF_COUNT_HW_CACHE_MAX] | ||
46 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
47 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
48 | { | ||
49 | [ C(L1D ) ] = { | ||
50 | [ C(OP_READ) ] = { | ||
51 | [ C(RESULT_ACCESS) ] = 0x0, | ||
52 | /* 1stL_cache_load_miss_retired */ | ||
53 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) | ||
54 | | KEY_P4_L1D_OP_READ_RESULT_MISS, | ||
55 | }, | ||
56 | }, | ||
57 | [ C(LL ) ] = { | ||
58 | [ C(OP_READ) ] = { | ||
59 | [ C(RESULT_ACCESS) ] = 0x0, | ||
60 | /* 2ndL_cache_load_miss_retired */ | ||
61 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) | ||
62 | | KEY_P4_LL_OP_READ_RESULT_MISS, | ||
63 | }, | ||
64 | }, | ||
65 | [ C(DTLB) ] = { | ||
66 | [ C(OP_READ) ] = { | ||
67 | [ C(RESULT_ACCESS) ] = 0x0, | ||
68 | /* DTLB_load_miss_retired */ | ||
69 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) | ||
70 | | KEY_P4_DTLB_OP_READ_RESULT_MISS, | ||
71 | }, | ||
72 | [ C(OP_WRITE) ] = { | ||
73 | [ C(RESULT_ACCESS) ] = 0x0, | ||
74 | /* DTLB_store_miss_retired */ | ||
75 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) | ||
76 | | KEY_P4_DTLB_OP_WRITE_RESULT_MISS, | ||
77 | }, | ||
78 | }, | ||
79 | [ C(ITLB) ] = { | ||
80 | [ C(OP_READ) ] = { | ||
81 | /* ITLB_reference.HIT */ | ||
82 | [ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT) | ||
83 | | KEY_P4_ITLB_OP_READ_RESULT_ACCESS, | ||
84 | |||
85 | /* ITLB_reference.MISS */ | ||
86 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS) | ||
87 | | KEY_P4_ITLB_OP_READ_RESULT_MISS, | ||
88 | }, | ||
89 | [ C(OP_WRITE) ] = { | ||
90 | [ C(RESULT_ACCESS) ] = -1, | ||
91 | [ C(RESULT_MISS) ] = -1, | ||
92 | }, | ||
93 | [ C(OP_PREFETCH) ] = { | ||
94 | [ C(RESULT_ACCESS) ] = -1, | ||
95 | [ C(RESULT_MISS) ] = -1, | ||
96 | }, | ||
97 | }, | ||
98 | }; | ||
99 | |||
34 | /* | 100 | /* |
35 | * WARN: CCCR1 doesn't have a working enable bit so try to not | 101 | * WARN: CCCR1 doesn't have a working enable bit so try to not |
36 | * use it if possible | 102 | * use it if possible |
@@ -121,11 +187,77 @@ struct p4_event_template p4_templates[] = { | |||
121 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 187 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
122 | .cntr = { 0, 2 }, | 188 | .cntr = { 0, 2 }, |
123 | }, | 189 | }, |
124 | [7] = { | 190 | [KEY_P4_L1D_OP_READ_RESULT_MISS] = { |
191 | .opcode = P4_REPLAY_EVENT, | ||
192 | .config = 0, | ||
193 | .dep = -1, | ||
194 | .msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0), | ||
195 | .key = KEY_P4_L1D_OP_READ_RESULT_MISS, | ||
196 | .emask = | ||
197 | P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), | ||
198 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, | ||
199 | .cntr = { 16, 17 }, | ||
200 | }, | ||
201 | [KEY_P4_LL_OP_READ_RESULT_MISS] = { | ||
202 | .opcode = P4_REPLAY_EVENT, | ||
203 | .config = 0, | ||
204 | .dep = -1, | ||
205 | .msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0), | ||
206 | .key = KEY_P4_LL_OP_READ_RESULT_MISS, | ||
207 | .emask = | ||
208 | P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), | ||
209 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, | ||
210 | .cntr = { 16, 17 }, | ||
211 | }, | ||
212 | [KEY_P4_DTLB_OP_READ_RESULT_MISS] = { | ||
213 | .opcode = P4_REPLAY_EVENT, | ||
214 | .config = 0, | ||
215 | .dep = -1, | ||
216 | .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0), | ||
217 | .key = KEY_P4_DTLB_OP_READ_RESULT_MISS, | ||
218 | .emask = | ||
219 | P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), | ||
220 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, | ||
221 | .cntr = { 16, 17 }, | ||
222 | }, | ||
223 | [KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = { | ||
224 | .opcode = P4_REPLAY_EVENT, | ||
225 | .config = 0, | ||
226 | .dep = -1, | ||
227 | .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1), | ||
228 | .key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS, | ||
229 | .emask = | ||
230 | P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), | ||
231 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, | ||
232 | .cntr = { 16, 17 }, | ||
233 | }, | ||
234 | [KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = { | ||
235 | .opcode = P4_ITLB_REFERENCE, | ||
236 | .config = 0, | ||
237 | .dep = -1, | ||
238 | .msr = 0, | ||
239 | .key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS, | ||
240 | .emask = | ||
241 | P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT), | ||
242 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
243 | .cntr = { 0, 2 }, | ||
244 | }, | ||
245 | [KEY_P4_ITLB_OP_READ_RESULT_MISS] = { | ||
246 | .opcode = P4_ITLB_REFERENCE, | ||
247 | .config = 0, | ||
248 | .dep = -1, | ||
249 | .msr = 0, | ||
250 | .key = KEY_P4_ITLB_OP_READ_RESULT_MISS, | ||
251 | .emask = | ||
252 | P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS), | ||
253 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
254 | .cntr = { 0, 2 }, | ||
255 | }, | ||
256 | [KEY_P4_UOP_TYPE] = { | ||
125 | .opcode = P4_UOP_TYPE, | 257 | .opcode = P4_UOP_TYPE, |
126 | .config = 0, | 258 | .config = 0, |
127 | .dep = -1, | 259 | .dep = -1, |
128 | .key = 7, | 260 | .key = KEY_P4_UOP_TYPE, |
129 | .emask = | 261 | .emask = |
130 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | | 262 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | |
131 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), | 263 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), |
@@ -155,10 +287,6 @@ static u64 p4_pmu_event_map(int hw_event) | |||
155 | config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); | 287 | config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); |
156 | config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); | 288 | config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); |
157 | 289 | ||
158 | /* on HT machine we need a special bit */ | ||
159 | if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) | ||
160 | config = p4_set_ht_bit(config); | ||
161 | |||
162 | return config; | 290 | return config; |
163 | } | 291 | } |
164 | 292 | ||
@@ -211,6 +339,10 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) | |||
211 | /* Count user and OS events unless not requested to */ | 339 | /* Count user and OS events unless not requested to */ |
212 | hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, | 340 | hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, |
213 | attr->exclude_user)); | 341 | attr->exclude_user)); |
342 | /* on HT machine we need a special bit */ | ||
343 | if (p4_ht_active() && p4_ht_thread(cpu)) | ||
344 | hwc->config = p4_set_ht_bit(hwc->config); | ||
345 | |||
214 | return 0; | 346 | return 0; |
215 | } | 347 | } |
216 | 348 | ||
@@ -271,6 +403,12 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
271 | pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); | 403 | pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); |
272 | return; | 404 | return; |
273 | } | 405 | } |
406 | |||
407 | if (tpl->msr) { | ||
408 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32); | ||
409 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff); | ||
410 | } | ||
411 | |||
274 | escr_base = (u64)tpl->escr_msr[thread]; | 412 | escr_base = (u64)tpl->escr_msr[thread]; |
275 | 413 | ||
276 | /* | 414 | /* |
@@ -577,6 +715,9 @@ static __init int p4_pmu_init(void) | |||
577 | return -ENODEV; | 715 | return -ENODEV; |
578 | } | 716 | } |
579 | 717 | ||
718 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, | ||
719 | sizeof(hw_cache_event_ids)); | ||
720 | |||
580 | pr_cont("Netburst events, "); | 721 | pr_cont("Netburst events, "); |
581 | 722 | ||
582 | x86_pmu = p4_pmu; | 723 | x86_pmu = p4_pmu; |