diff options
Diffstat (limited to 'arch/x86/oprofile/op_model_p4.c')
-rw-r--r-- | arch/x86/oprofile/op_model_p4.c | 722 |
1 files changed, 722 insertions, 0 deletions
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c new file mode 100644 index 000000000000..47925927b12f --- /dev/null +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -0,0 +1,722 @@ | |||
1 | /** | ||
2 | * @file op_model_p4.c | ||
3 | * P4 model-specific MSR operations | ||
4 | * | ||
5 | * @remark Copyright 2002 OProfile authors | ||
6 | * @remark Read the file COPYING | ||
7 | * | ||
8 | * @author Graydon Hoare | ||
9 | */ | ||
10 | |||
11 | #include <linux/oprofile.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <asm/msr.h> | ||
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/fixmap.h> | ||
16 | #include <asm/apic.h> | ||
17 | #include <asm/nmi.h> | ||
18 | |||
19 | #include "op_x86_model.h" | ||
20 | #include "op_counter.h" | ||
21 | |||
22 | #define NUM_EVENTS 39 | ||
23 | |||
24 | #define NUM_COUNTERS_NON_HT 8 | ||
25 | #define NUM_ESCRS_NON_HT 45 | ||
26 | #define NUM_CCCRS_NON_HT 18 | ||
27 | #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) | ||
28 | |||
29 | #define NUM_COUNTERS_HT2 4 | ||
30 | #define NUM_ESCRS_HT2 23 | ||
31 | #define NUM_CCCRS_HT2 9 | ||
32 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) | ||
33 | |||
34 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; | ||
35 | static unsigned int num_controls = NUM_CONTROLS_NON_HT; | ||
36 | |||
37 | /* this has to be checked dynamically since the | ||
38 | hyper-threadedness of a chip is discovered at | ||
39 | kernel boot-time. */ | ||
40 | static inline void setup_num_counters(void) | ||
41 | { | ||
42 | #ifdef CONFIG_SMP | ||
43 | if (smp_num_siblings == 2){ | ||
44 | num_counters = NUM_COUNTERS_HT2; | ||
45 | num_controls = NUM_CONTROLS_HT2; | ||
46 | } | ||
47 | #endif | ||
48 | } | ||
49 | |||
50 | static int inline addr_increment(void) | ||
51 | { | ||
52 | #ifdef CONFIG_SMP | ||
53 | return smp_num_siblings == 2 ? 2 : 1; | ||
54 | #else | ||
55 | return 1; | ||
56 | #endif | ||
57 | } | ||
58 | |||
59 | |||
60 | /* tables to simulate simplified hardware view of p4 registers */ | ||
61 | struct p4_counter_binding { | ||
62 | int virt_counter; | ||
63 | int counter_address; | ||
64 | int cccr_address; | ||
65 | }; | ||
66 | |||
67 | struct p4_event_binding { | ||
68 | int escr_select; /* value to put in CCCR */ | ||
69 | int event_select; /* value to put in ESCR */ | ||
70 | struct { | ||
71 | int virt_counter; /* for this counter... */ | ||
72 | int escr_address; /* use this ESCR */ | ||
73 | } bindings[2]; | ||
74 | }; | ||
75 | |||
76 | /* nb: these CTR_* defines are a duplicate of defines in | ||
77 | event/i386.p4*events. */ | ||
78 | |||
79 | |||
80 | #define CTR_BPU_0 (1 << 0) | ||
81 | #define CTR_MS_0 (1 << 1) | ||
82 | #define CTR_FLAME_0 (1 << 2) | ||
83 | #define CTR_IQ_4 (1 << 3) | ||
84 | #define CTR_BPU_2 (1 << 4) | ||
85 | #define CTR_MS_2 (1 << 5) | ||
86 | #define CTR_FLAME_2 (1 << 6) | ||
87 | #define CTR_IQ_5 (1 << 7) | ||
88 | |||
89 | static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { | ||
90 | { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, | ||
91 | { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, | ||
92 | { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, | ||
93 | { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, | ||
94 | { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, | ||
95 | { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, | ||
96 | { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, | ||
97 | { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } | ||
98 | }; | ||
99 | |||
100 | #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT | ||
101 | |||
102 | /* p4 event codes in libop/op_event.h are indices into this table. */ | ||
103 | |||
104 | static struct p4_event_binding p4_events[NUM_EVENTS] = { | ||
105 | |||
106 | { /* BRANCH_RETIRED */ | ||
107 | 0x05, 0x06, | ||
108 | { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
109 | {CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
110 | }, | ||
111 | |||
112 | { /* MISPRED_BRANCH_RETIRED */ | ||
113 | 0x04, 0x03, | ||
114 | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, | ||
115 | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } | ||
116 | }, | ||
117 | |||
118 | { /* TC_DELIVER_MODE */ | ||
119 | 0x01, 0x01, | ||
120 | { { CTR_MS_0, MSR_P4_TC_ESCR0}, | ||
121 | { CTR_MS_2, MSR_P4_TC_ESCR1} } | ||
122 | }, | ||
123 | |||
124 | { /* BPU_FETCH_REQUEST */ | ||
125 | 0x00, 0x03, | ||
126 | { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, | ||
127 | { CTR_BPU_2, MSR_P4_BPU_ESCR1} } | ||
128 | }, | ||
129 | |||
130 | { /* ITLB_REFERENCE */ | ||
131 | 0x03, 0x18, | ||
132 | { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, | ||
133 | { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } | ||
134 | }, | ||
135 | |||
136 | { /* MEMORY_CANCEL */ | ||
137 | 0x05, 0x02, | ||
138 | { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, | ||
139 | { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } | ||
140 | }, | ||
141 | |||
142 | { /* MEMORY_COMPLETE */ | ||
143 | 0x02, 0x08, | ||
144 | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, | ||
145 | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } | ||
146 | }, | ||
147 | |||
148 | { /* LOAD_PORT_REPLAY */ | ||
149 | 0x02, 0x04, | ||
150 | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, | ||
151 | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } | ||
152 | }, | ||
153 | |||
154 | { /* STORE_PORT_REPLAY */ | ||
155 | 0x02, 0x05, | ||
156 | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, | ||
157 | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } | ||
158 | }, | ||
159 | |||
160 | { /* MOB_LOAD_REPLAY */ | ||
161 | 0x02, 0x03, | ||
162 | { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, | ||
163 | { CTR_BPU_2, MSR_P4_MOB_ESCR1} } | ||
164 | }, | ||
165 | |||
166 | { /* PAGE_WALK_TYPE */ | ||
167 | 0x04, 0x01, | ||
168 | { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, | ||
169 | { CTR_BPU_2, MSR_P4_PMH_ESCR1} } | ||
170 | }, | ||
171 | |||
172 | { /* BSQ_CACHE_REFERENCE */ | ||
173 | 0x07, 0x0c, | ||
174 | { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, | ||
175 | { CTR_BPU_2, MSR_P4_BSU_ESCR1} } | ||
176 | }, | ||
177 | |||
178 | { /* IOQ_ALLOCATION */ | ||
179 | 0x06, 0x03, | ||
180 | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, | ||
181 | { 0, 0 } } | ||
182 | }, | ||
183 | |||
184 | { /* IOQ_ACTIVE_ENTRIES */ | ||
185 | 0x06, 0x1a, | ||
186 | { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, | ||
187 | { 0, 0 } } | ||
188 | }, | ||
189 | |||
190 | { /* FSB_DATA_ACTIVITY */ | ||
191 | 0x06, 0x17, | ||
192 | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, | ||
193 | { CTR_BPU_2, MSR_P4_FSB_ESCR1} } | ||
194 | }, | ||
195 | |||
196 | { /* BSQ_ALLOCATION */ | ||
197 | 0x07, 0x05, | ||
198 | { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, | ||
199 | { 0, 0 } } | ||
200 | }, | ||
201 | |||
202 | { /* BSQ_ACTIVE_ENTRIES */ | ||
203 | 0x07, 0x06, | ||
204 | { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, | ||
205 | { 0, 0 } } | ||
206 | }, | ||
207 | |||
208 | { /* X87_ASSIST */ | ||
209 | 0x05, 0x03, | ||
210 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
211 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
212 | }, | ||
213 | |||
214 | { /* SSE_INPUT_ASSIST */ | ||
215 | 0x01, 0x34, | ||
216 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
217 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
218 | }, | ||
219 | |||
220 | { /* PACKED_SP_UOP */ | ||
221 | 0x01, 0x08, | ||
222 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
223 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
224 | }, | ||
225 | |||
226 | { /* PACKED_DP_UOP */ | ||
227 | 0x01, 0x0c, | ||
228 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
229 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
230 | }, | ||
231 | |||
232 | { /* SCALAR_SP_UOP */ | ||
233 | 0x01, 0x0a, | ||
234 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
235 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
236 | }, | ||
237 | |||
238 | { /* SCALAR_DP_UOP */ | ||
239 | 0x01, 0x0e, | ||
240 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
241 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
242 | }, | ||
243 | |||
244 | { /* 64BIT_MMX_UOP */ | ||
245 | 0x01, 0x02, | ||
246 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
247 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
248 | }, | ||
249 | |||
250 | { /* 128BIT_MMX_UOP */ | ||
251 | 0x01, 0x1a, | ||
252 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
253 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
254 | }, | ||
255 | |||
256 | { /* X87_FP_UOP */ | ||
257 | 0x01, 0x04, | ||
258 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
259 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
260 | }, | ||
261 | |||
262 | { /* X87_SIMD_MOVES_UOP */ | ||
263 | 0x01, 0x2e, | ||
264 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
265 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
266 | }, | ||
267 | |||
268 | { /* MACHINE_CLEAR */ | ||
269 | 0x05, 0x02, | ||
270 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
271 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
272 | }, | ||
273 | |||
274 | { /* GLOBAL_POWER_EVENTS */ | ||
275 | 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, | ||
276 | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, | ||
277 | { CTR_BPU_2, MSR_P4_FSB_ESCR1} } | ||
278 | }, | ||
279 | |||
280 | { /* TC_MS_XFER */ | ||
281 | 0x00, 0x05, | ||
282 | { { CTR_MS_0, MSR_P4_MS_ESCR0}, | ||
283 | { CTR_MS_2, MSR_P4_MS_ESCR1} } | ||
284 | }, | ||
285 | |||
286 | { /* UOP_QUEUE_WRITES */ | ||
287 | 0x00, 0x09, | ||
288 | { { CTR_MS_0, MSR_P4_MS_ESCR0}, | ||
289 | { CTR_MS_2, MSR_P4_MS_ESCR1} } | ||
290 | }, | ||
291 | |||
292 | { /* FRONT_END_EVENT */ | ||
293 | 0x05, 0x08, | ||
294 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
295 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
296 | }, | ||
297 | |||
298 | { /* EXECUTION_EVENT */ | ||
299 | 0x05, 0x0c, | ||
300 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
301 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
302 | }, | ||
303 | |||
304 | { /* REPLAY_EVENT */ | ||
305 | 0x05, 0x09, | ||
306 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
307 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
308 | }, | ||
309 | |||
310 | { /* INSTR_RETIRED */ | ||
311 | 0x04, 0x02, | ||
312 | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, | ||
313 | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } | ||
314 | }, | ||
315 | |||
316 | { /* UOPS_RETIRED */ | ||
317 | 0x04, 0x01, | ||
318 | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, | ||
319 | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } | ||
320 | }, | ||
321 | |||
322 | { /* UOP_TYPE */ | ||
323 | 0x02, 0x02, | ||
324 | { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, | ||
325 | { CTR_IQ_5, MSR_P4_RAT_ESCR1} } | ||
326 | }, | ||
327 | |||
328 | { /* RETIRED_MISPRED_BRANCH_TYPE */ | ||
329 | 0x02, 0x05, | ||
330 | { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, | ||
331 | { CTR_MS_2, MSR_P4_TBPU_ESCR1} } | ||
332 | }, | ||
333 | |||
334 | { /* RETIRED_BRANCH_TYPE */ | ||
335 | 0x02, 0x04, | ||
336 | { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, | ||
337 | { CTR_MS_2, MSR_P4_TBPU_ESCR1} } | ||
338 | } | ||
339 | }; | ||
340 | |||
341 | |||
342 | #define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) | ||
343 | |||
344 | #define ESCR_RESERVED_BITS 0x80000003 | ||
345 | #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) | ||
346 | #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) | ||
347 | #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) | ||
348 | #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) | ||
349 | #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) | ||
350 | #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) | ||
351 | #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) | ||
352 | #define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) | ||
353 | #define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) | ||
354 | |||
355 | #define CCCR_RESERVED_BITS 0x38030FFF | ||
356 | #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) | ||
357 | #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) | ||
358 | #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) | ||
359 | #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) | ||
360 | #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) | ||
361 | #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) | ||
362 | #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) | ||
363 | #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) | ||
364 | #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) | ||
365 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) | ||
366 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) | ||
367 | |||
368 | #define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) | ||
369 | #define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) | ||
370 | #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) | ||
371 | #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) | ||
372 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) | ||
373 | |||
374 | |||
375 | /* this assigns a "stagger" to the current CPU, which is used throughout | ||
376 | the code in this module as an extra array offset, to select the "even" | ||
377 | or "odd" part of all the divided resources. */ | ||
378 | static unsigned int get_stagger(void) | ||
379 | { | ||
380 | #ifdef CONFIG_SMP | ||
381 | int cpu = smp_processor_id(); | ||
382 | return (cpu != first_cpu(cpu_sibling_map[cpu])); | ||
383 | #endif | ||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | |||
388 | /* finally, mediate access to a real hardware counter | ||
389 | by passing a "virtual" counter numer to this macro, | ||
390 | along with your stagger setting. */ | ||
391 | #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) | ||
392 | |||
393 | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; | ||
394 | |||
395 | |||
396 | static void p4_fill_in_addresses(struct op_msrs * const msrs) | ||
397 | { | ||
398 | unsigned int i; | ||
399 | unsigned int addr, cccraddr, stag; | ||
400 | |||
401 | setup_num_counters(); | ||
402 | stag = get_stagger(); | ||
403 | |||
404 | /* initialize some registers */ | ||
405 | for (i = 0; i < num_counters; ++i) { | ||
406 | msrs->counters[i].addr = 0; | ||
407 | } | ||
408 | for (i = 0; i < num_controls; ++i) { | ||
409 | msrs->controls[i].addr = 0; | ||
410 | } | ||
411 | |||
412 | /* the counter & cccr registers we pay attention to */ | ||
413 | for (i = 0; i < num_counters; ++i) { | ||
414 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; | ||
415 | cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; | ||
416 | if (reserve_perfctr_nmi(addr)){ | ||
417 | msrs->counters[i].addr = addr; | ||
418 | msrs->controls[i].addr = cccraddr; | ||
419 | } | ||
420 | } | ||
421 | |||
422 | /* 43 ESCR registers in three or four discontiguous group */ | ||
423 | for (addr = MSR_P4_BSU_ESCR0 + stag; | ||
424 | addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { | ||
425 | if (reserve_evntsel_nmi(addr)) | ||
426 | msrs->controls[i].addr = addr; | ||
427 | } | ||
428 | |||
429 | /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 | ||
430 | * to avoid special case in nmi_{save|restore}_registers() */ | ||
431 | if (boot_cpu_data.x86_model >= 0x3) { | ||
432 | for (addr = MSR_P4_BSU_ESCR0 + stag; | ||
433 | addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { | ||
434 | if (reserve_evntsel_nmi(addr)) | ||
435 | msrs->controls[i].addr = addr; | ||
436 | } | ||
437 | } else { | ||
438 | for (addr = MSR_P4_IQ_ESCR0 + stag; | ||
439 | addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { | ||
440 | if (reserve_evntsel_nmi(addr)) | ||
441 | msrs->controls[i].addr = addr; | ||
442 | } | ||
443 | } | ||
444 | |||
445 | for (addr = MSR_P4_RAT_ESCR0 + stag; | ||
446 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { | ||
447 | if (reserve_evntsel_nmi(addr)) | ||
448 | msrs->controls[i].addr = addr; | ||
449 | } | ||
450 | |||
451 | for (addr = MSR_P4_MS_ESCR0 + stag; | ||
452 | addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { | ||
453 | if (reserve_evntsel_nmi(addr)) | ||
454 | msrs->controls[i].addr = addr; | ||
455 | } | ||
456 | |||
457 | for (addr = MSR_P4_IX_ESCR0 + stag; | ||
458 | addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { | ||
459 | if (reserve_evntsel_nmi(addr)) | ||
460 | msrs->controls[i].addr = addr; | ||
461 | } | ||
462 | |||
463 | /* there are 2 remaining non-contiguously located ESCRs */ | ||
464 | |||
465 | if (num_counters == NUM_COUNTERS_NON_HT) { | ||
466 | /* standard non-HT CPUs handle both remaining ESCRs*/ | ||
467 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) | ||
468 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | ||
469 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) | ||
470 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | ||
471 | |||
472 | } else if (stag == 0) { | ||
473 | /* HT CPUs give the first remainder to the even thread, as | ||
474 | the 32nd control register */ | ||
475 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) | ||
476 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | ||
477 | |||
478 | } else { | ||
479 | /* and two copies of the second to the odd thread, | ||
480 | for the 22st and 23nd control registers */ | ||
481 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { | ||
482 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | ||
483 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | ||
484 | } | ||
485 | } | ||
486 | } | ||
487 | |||
488 | |||
489 | static void pmc_setup_one_p4_counter(unsigned int ctr) | ||
490 | { | ||
491 | int i; | ||
492 | int const maxbind = 2; | ||
493 | unsigned int cccr = 0; | ||
494 | unsigned int escr = 0; | ||
495 | unsigned int high = 0; | ||
496 | unsigned int counter_bit; | ||
497 | struct p4_event_binding *ev = NULL; | ||
498 | unsigned int stag; | ||
499 | |||
500 | stag = get_stagger(); | ||
501 | |||
502 | /* convert from counter *number* to counter *bit* */ | ||
503 | counter_bit = 1 << VIRT_CTR(stag, ctr); | ||
504 | |||
505 | /* find our event binding structure. */ | ||
506 | if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { | ||
507 | printk(KERN_ERR | ||
508 | "oprofile: P4 event code 0x%lx out of range\n", | ||
509 | counter_config[ctr].event); | ||
510 | return; | ||
511 | } | ||
512 | |||
513 | ev = &(p4_events[counter_config[ctr].event - 1]); | ||
514 | |||
515 | for (i = 0; i < maxbind; i++) { | ||
516 | if (ev->bindings[i].virt_counter & counter_bit) { | ||
517 | |||
518 | /* modify ESCR */ | ||
519 | ESCR_READ(escr, high, ev, i); | ||
520 | ESCR_CLEAR(escr); | ||
521 | if (stag == 0) { | ||
522 | ESCR_SET_USR_0(escr, counter_config[ctr].user); | ||
523 | ESCR_SET_OS_0(escr, counter_config[ctr].kernel); | ||
524 | } else { | ||
525 | ESCR_SET_USR_1(escr, counter_config[ctr].user); | ||
526 | ESCR_SET_OS_1(escr, counter_config[ctr].kernel); | ||
527 | } | ||
528 | ESCR_SET_EVENT_SELECT(escr, ev->event_select); | ||
529 | ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); | ||
530 | ESCR_WRITE(escr, high, ev, i); | ||
531 | |||
532 | /* modify CCCR */ | ||
533 | CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); | ||
534 | CCCR_CLEAR(cccr); | ||
535 | CCCR_SET_REQUIRED_BITS(cccr); | ||
536 | CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); | ||
537 | if (stag == 0) { | ||
538 | CCCR_SET_PMI_OVF_0(cccr); | ||
539 | } else { | ||
540 | CCCR_SET_PMI_OVF_1(cccr); | ||
541 | } | ||
542 | CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); | ||
543 | return; | ||
544 | } | ||
545 | } | ||
546 | |||
547 | printk(KERN_ERR | ||
548 | "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", | ||
549 | counter_config[ctr].event, stag, ctr); | ||
550 | } | ||
551 | |||
552 | |||
553 | static void p4_setup_ctrs(struct op_msrs const * const msrs) | ||
554 | { | ||
555 | unsigned int i; | ||
556 | unsigned int low, high; | ||
557 | unsigned int stag; | ||
558 | |||
559 | stag = get_stagger(); | ||
560 | |||
561 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
562 | if (! MISC_PMC_ENABLED_P(low)) { | ||
563 | printk(KERN_ERR "oprofile: P4 PMC not available\n"); | ||
564 | return; | ||
565 | } | ||
566 | |||
567 | /* clear the cccrs we will use */ | ||
568 | for (i = 0 ; i < num_counters ; i++) { | ||
569 | if (unlikely(!CTRL_IS_RESERVED(msrs,i))) | ||
570 | continue; | ||
571 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | ||
572 | CCCR_CLEAR(low); | ||
573 | CCCR_SET_REQUIRED_BITS(low); | ||
574 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | ||
575 | } | ||
576 | |||
577 | /* clear all escrs (including those outside our concern) */ | ||
578 | for (i = num_counters; i < num_controls; i++) { | ||
579 | if (unlikely(!CTRL_IS_RESERVED(msrs,i))) | ||
580 | continue; | ||
581 | wrmsr(msrs->controls[i].addr, 0, 0); | ||
582 | } | ||
583 | |||
584 | /* setup all counters */ | ||
585 | for (i = 0 ; i < num_counters ; ++i) { | ||
586 | if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { | ||
587 | reset_value[i] = counter_config[i].count; | ||
588 | pmc_setup_one_p4_counter(i); | ||
589 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); | ||
590 | } else { | ||
591 | reset_value[i] = 0; | ||
592 | } | ||
593 | } | ||
594 | } | ||
595 | |||
596 | |||
597 | static int p4_check_ctrs(struct pt_regs * const regs, | ||
598 | struct op_msrs const * const msrs) | ||
599 | { | ||
600 | unsigned long ctr, low, high, stag, real; | ||
601 | int i; | ||
602 | |||
603 | stag = get_stagger(); | ||
604 | |||
605 | for (i = 0; i < num_counters; ++i) { | ||
606 | |||
607 | if (!reset_value[i]) | ||
608 | continue; | ||
609 | |||
610 | /* | ||
611 | * there is some eccentricity in the hardware which | ||
612 | * requires that we perform 2 extra corrections: | ||
613 | * | ||
614 | * - check both the CCCR:OVF flag for overflow and the | ||
615 | * counter high bit for un-flagged overflows. | ||
616 | * | ||
617 | * - write the counter back twice to ensure it gets | ||
618 | * updated properly. | ||
619 | * | ||
620 | * the former seems to be related to extra NMIs happening | ||
621 | * during the current NMI; the latter is reported as errata | ||
622 | * N15 in intel doc 249199-029, pentium 4 specification | ||
623 | * update, though their suggested work-around does not | ||
624 | * appear to solve the problem. | ||
625 | */ | ||
626 | |||
627 | real = VIRT_CTR(stag, i); | ||
628 | |||
629 | CCCR_READ(low, high, real); | ||
630 | CTR_READ(ctr, high, real); | ||
631 | if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { | ||
632 | oprofile_add_sample(regs, i); | ||
633 | CTR_WRITE(reset_value[i], real); | ||
634 | CCCR_CLEAR_OVF(low); | ||
635 | CCCR_WRITE(low, high, real); | ||
636 | CTR_WRITE(reset_value[i], real); | ||
637 | } | ||
638 | } | ||
639 | |||
640 | /* P4 quirk: you have to re-unmask the apic vector */ | ||
641 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
642 | |||
643 | /* See op_model_ppro.c */ | ||
644 | return 1; | ||
645 | } | ||
646 | |||
647 | |||
648 | static void p4_start(struct op_msrs const * const msrs) | ||
649 | { | ||
650 | unsigned int low, high, stag; | ||
651 | int i; | ||
652 | |||
653 | stag = get_stagger(); | ||
654 | |||
655 | for (i = 0; i < num_counters; ++i) { | ||
656 | if (!reset_value[i]) | ||
657 | continue; | ||
658 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | ||
659 | CCCR_SET_ENABLE(low); | ||
660 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | ||
661 | } | ||
662 | } | ||
663 | |||
664 | |||
665 | static void p4_stop(struct op_msrs const * const msrs) | ||
666 | { | ||
667 | unsigned int low, high, stag; | ||
668 | int i; | ||
669 | |||
670 | stag = get_stagger(); | ||
671 | |||
672 | for (i = 0; i < num_counters; ++i) { | ||
673 | if (!reset_value[i]) | ||
674 | continue; | ||
675 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | ||
676 | CCCR_SET_DISABLE(low); | ||
677 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | ||
678 | } | ||
679 | } | ||
680 | |||
681 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
682 | { | ||
683 | int i; | ||
684 | |||
685 | for (i = 0 ; i < num_counters ; ++i) { | ||
686 | if (CTR_IS_RESERVED(msrs,i)) | ||
687 | release_perfctr_nmi(msrs->counters[i].addr); | ||
688 | } | ||
689 | /* some of the control registers are specially reserved in | ||
690 | * conjunction with the counter registers (hence the starting offset). | ||
691 | * This saves a few bits. | ||
692 | */ | ||
693 | for (i = num_counters ; i < num_controls ; ++i) { | ||
694 | if (CTRL_IS_RESERVED(msrs,i)) | ||
695 | release_evntsel_nmi(msrs->controls[i].addr); | ||
696 | } | ||
697 | } | ||
698 | |||
699 | |||
700 | #ifdef CONFIG_SMP | ||
701 | struct op_x86_model_spec const op_p4_ht2_spec = { | ||
702 | .num_counters = NUM_COUNTERS_HT2, | ||
703 | .num_controls = NUM_CONTROLS_HT2, | ||
704 | .fill_in_addresses = &p4_fill_in_addresses, | ||
705 | .setup_ctrs = &p4_setup_ctrs, | ||
706 | .check_ctrs = &p4_check_ctrs, | ||
707 | .start = &p4_start, | ||
708 | .stop = &p4_stop, | ||
709 | .shutdown = &p4_shutdown | ||
710 | }; | ||
711 | #endif | ||
712 | |||
713 | struct op_x86_model_spec const op_p4_spec = { | ||
714 | .num_counters = NUM_COUNTERS_NON_HT, | ||
715 | .num_controls = NUM_CONTROLS_NON_HT, | ||
716 | .fill_in_addresses = &p4_fill_in_addresses, | ||
717 | .setup_ctrs = &p4_setup_ctrs, | ||
718 | .check_ctrs = &p4_check_ctrs, | ||
719 | .start = &p4_start, | ||
720 | .stop = &p4_stop, | ||
721 | .shutdown = &p4_shutdown | ||
722 | }; | ||