diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-02-26 06:05:05 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-02-26 09:44:04 -0500 |
commit | f22f54f4491acd987a6c5a92de52b60ca8b58b61 (patch) | |
tree | 7eae87b08e828e8f0b1223f267abb004d6a5f7e7 /arch/x86/kernel/cpu | |
parent | 48fb4fdd6b667ebeccbc6cde0a8a5a148d5c6b68 (diff) |
perf_events, x86: Split PMU definitions into separate files
Split amd,p6,intel into separate files so that we can easily deal with
CONFIG_CPU_SUP_* things, needed to make things build now that perf_event.c
relies on symbols from amd.c
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 1524 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 416 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 971 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p6.c | 157 |
4 files changed, 1554 insertions, 1514 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index dd09ccc867d3..641ccb9dddbc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -161,8 +161,6 @@ struct x86_pmu { | |||
161 | 161 | ||
162 | static struct x86_pmu x86_pmu __read_mostly; | 162 | static struct x86_pmu x86_pmu __read_mostly; |
163 | 163 | ||
164 | static raw_spinlock_t amd_nb_lock; | ||
165 | |||
166 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 164 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
167 | .enabled = 1, | 165 | .enabled = 1, |
168 | }; | 166 | }; |
@@ -171,140 +169,6 @@ static int x86_perf_event_set_period(struct perf_event *event, | |||
171 | struct hw_perf_event *hwc, int idx); | 169 | struct hw_perf_event *hwc, int idx); |
172 | 170 | ||
173 | /* | 171 | /* |
174 | * Not sure about some of these | ||
175 | */ | ||
176 | static const u64 p6_perfmon_event_map[] = | ||
177 | { | ||
178 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
179 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
180 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
181 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
182 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
183 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
184 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
185 | }; | ||
186 | |||
187 | static u64 p6_pmu_event_map(int hw_event) | ||
188 | { | ||
189 | return p6_perfmon_event_map[hw_event]; | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Event setting that is specified not to count anything. | ||
194 | * We use this to effectively disable a counter. | ||
195 | * | ||
196 | * L2_RQSTS with 0 MESI unit mask. | ||
197 | */ | ||
198 | #define P6_NOP_EVENT 0x0000002EULL | ||
199 | |||
200 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
201 | { | ||
202 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
203 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
204 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
205 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
206 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
207 | |||
208 | #define P6_EVNTSEL_MASK \ | ||
209 | (P6_EVNTSEL_EVENT_MASK | \ | ||
210 | P6_EVNTSEL_UNIT_MASK | \ | ||
211 | P6_EVNTSEL_EDGE_MASK | \ | ||
212 | P6_EVNTSEL_INV_MASK | \ | ||
213 | P6_EVNTSEL_REG_MASK) | ||
214 | |||
215 | return hw_event & P6_EVNTSEL_MASK; | ||
216 | } | ||
217 | |||
218 | static struct event_constraint intel_p6_event_constraints[] = | ||
219 | { | ||
220 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
221 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
222 | INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
223 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
224 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
225 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
226 | EVENT_CONSTRAINT_END | ||
227 | }; | ||
228 | |||
229 | /* | ||
230 | * Intel PerfMon v3. Used on Core2 and later. | ||
231 | */ | ||
232 | static const u64 intel_perfmon_event_map[] = | ||
233 | { | ||
234 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
235 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
236 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
237 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
238 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
239 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
240 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
241 | }; | ||
242 | |||
243 | static struct event_constraint intel_core_event_constraints[] = | ||
244 | { | ||
245 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
246 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
247 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
248 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
249 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
250 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ | ||
251 | EVENT_CONSTRAINT_END | ||
252 | }; | ||
253 | |||
254 | static struct event_constraint intel_core2_event_constraints[] = | ||
255 | { | ||
256 | FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
257 | FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
258 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
259 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
260 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
261 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
262 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
263 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
264 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
265 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
266 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
267 | EVENT_CONSTRAINT_END | ||
268 | }; | ||
269 | |||
270 | static struct event_constraint intel_nehalem_event_constraints[] = | ||
271 | { | ||
272 | FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
273 | FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
274 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
275 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
276 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
277 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
278 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ | ||
279 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
280 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
281 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
282 | EVENT_CONSTRAINT_END | ||
283 | }; | ||
284 | |||
285 | static struct event_constraint intel_westmere_event_constraints[] = | ||
286 | { | ||
287 | FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
288 | FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
289 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
290 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | ||
291 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
292 | EVENT_CONSTRAINT_END | ||
293 | }; | ||
294 | |||
295 | static struct event_constraint intel_gen_event_constraints[] = | ||
296 | { | ||
297 | FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
298 | FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
299 | EVENT_CONSTRAINT_END | ||
300 | }; | ||
301 | |||
302 | static u64 intel_pmu_event_map(int hw_event) | ||
303 | { | ||
304 | return intel_perfmon_event_map[hw_event]; | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * Generalized hw caching related hw_event table, filled | 172 | * Generalized hw caching related hw_event table, filled |
309 | * in on a per model basis. A value of 0 means | 173 | * in on a per model basis. A value of 0 means |
310 | * 'not supported', -1 means 'hw_event makes no sense on | 174 | * 'not supported', -1 means 'hw_event makes no sense on |
@@ -319,515 +183,6 @@ static u64 __read_mostly hw_cache_event_ids | |||
319 | [PERF_COUNT_HW_CACHE_OP_MAX] | 183 | [PERF_COUNT_HW_CACHE_OP_MAX] |
320 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 184 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
321 | 185 | ||
322 | static __initconst u64 westmere_hw_cache_event_ids | ||
323 | [PERF_COUNT_HW_CACHE_MAX] | ||
324 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
325 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
326 | { | ||
327 | [ C(L1D) ] = { | ||
328 | [ C(OP_READ) ] = { | ||
329 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
330 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ | ||
331 | }, | ||
332 | [ C(OP_WRITE) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
334 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ | ||
335 | }, | ||
336 | [ C(OP_PREFETCH) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
338 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
339 | }, | ||
340 | }, | ||
341 | [ C(L1I ) ] = { | ||
342 | [ C(OP_READ) ] = { | ||
343 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
344 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
345 | }, | ||
346 | [ C(OP_WRITE) ] = { | ||
347 | [ C(RESULT_ACCESS) ] = -1, | ||
348 | [ C(RESULT_MISS) ] = -1, | ||
349 | }, | ||
350 | [ C(OP_PREFETCH) ] = { | ||
351 | [ C(RESULT_ACCESS) ] = 0x0, | ||
352 | [ C(RESULT_MISS) ] = 0x0, | ||
353 | }, | ||
354 | }, | ||
355 | [ C(LL ) ] = { | ||
356 | [ C(OP_READ) ] = { | ||
357 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
358 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
359 | }, | ||
360 | [ C(OP_WRITE) ] = { | ||
361 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
362 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
363 | }, | ||
364 | [ C(OP_PREFETCH) ] = { | ||
365 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
366 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
367 | }, | ||
368 | }, | ||
369 | [ C(DTLB) ] = { | ||
370 | [ C(OP_READ) ] = { | ||
371 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
372 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
373 | }, | ||
374 | [ C(OP_WRITE) ] = { | ||
375 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
376 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
377 | }, | ||
378 | [ C(OP_PREFETCH) ] = { | ||
379 | [ C(RESULT_ACCESS) ] = 0x0, | ||
380 | [ C(RESULT_MISS) ] = 0x0, | ||
381 | }, | ||
382 | }, | ||
383 | [ C(ITLB) ] = { | ||
384 | [ C(OP_READ) ] = { | ||
385 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
386 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ | ||
387 | }, | ||
388 | [ C(OP_WRITE) ] = { | ||
389 | [ C(RESULT_ACCESS) ] = -1, | ||
390 | [ C(RESULT_MISS) ] = -1, | ||
391 | }, | ||
392 | [ C(OP_PREFETCH) ] = { | ||
393 | [ C(RESULT_ACCESS) ] = -1, | ||
394 | [ C(RESULT_MISS) ] = -1, | ||
395 | }, | ||
396 | }, | ||
397 | [ C(BPU ) ] = { | ||
398 | [ C(OP_READ) ] = { | ||
399 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
400 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
401 | }, | ||
402 | [ C(OP_WRITE) ] = { | ||
403 | [ C(RESULT_ACCESS) ] = -1, | ||
404 | [ C(RESULT_MISS) ] = -1, | ||
405 | }, | ||
406 | [ C(OP_PREFETCH) ] = { | ||
407 | [ C(RESULT_ACCESS) ] = -1, | ||
408 | [ C(RESULT_MISS) ] = -1, | ||
409 | }, | ||
410 | }, | ||
411 | }; | ||
412 | |||
413 | static __initconst u64 nehalem_hw_cache_event_ids | ||
414 | [PERF_COUNT_HW_CACHE_MAX] | ||
415 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
416 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
417 | { | ||
418 | [ C(L1D) ] = { | ||
419 | [ C(OP_READ) ] = { | ||
420 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
421 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
422 | }, | ||
423 | [ C(OP_WRITE) ] = { | ||
424 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
425 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
426 | }, | ||
427 | [ C(OP_PREFETCH) ] = { | ||
428 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
429 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
430 | }, | ||
431 | }, | ||
432 | [ C(L1I ) ] = { | ||
433 | [ C(OP_READ) ] = { | ||
434 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
435 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
436 | }, | ||
437 | [ C(OP_WRITE) ] = { | ||
438 | [ C(RESULT_ACCESS) ] = -1, | ||
439 | [ C(RESULT_MISS) ] = -1, | ||
440 | }, | ||
441 | [ C(OP_PREFETCH) ] = { | ||
442 | [ C(RESULT_ACCESS) ] = 0x0, | ||
443 | [ C(RESULT_MISS) ] = 0x0, | ||
444 | }, | ||
445 | }, | ||
446 | [ C(LL ) ] = { | ||
447 | [ C(OP_READ) ] = { | ||
448 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
449 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
450 | }, | ||
451 | [ C(OP_WRITE) ] = { | ||
452 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
453 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
454 | }, | ||
455 | [ C(OP_PREFETCH) ] = { | ||
456 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
457 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
458 | }, | ||
459 | }, | ||
460 | [ C(DTLB) ] = { | ||
461 | [ C(OP_READ) ] = { | ||
462 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
463 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
464 | }, | ||
465 | [ C(OP_WRITE) ] = { | ||
466 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
467 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
468 | }, | ||
469 | [ C(OP_PREFETCH) ] = { | ||
470 | [ C(RESULT_ACCESS) ] = 0x0, | ||
471 | [ C(RESULT_MISS) ] = 0x0, | ||
472 | }, | ||
473 | }, | ||
474 | [ C(ITLB) ] = { | ||
475 | [ C(OP_READ) ] = { | ||
476 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
477 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
478 | }, | ||
479 | [ C(OP_WRITE) ] = { | ||
480 | [ C(RESULT_ACCESS) ] = -1, | ||
481 | [ C(RESULT_MISS) ] = -1, | ||
482 | }, | ||
483 | [ C(OP_PREFETCH) ] = { | ||
484 | [ C(RESULT_ACCESS) ] = -1, | ||
485 | [ C(RESULT_MISS) ] = -1, | ||
486 | }, | ||
487 | }, | ||
488 | [ C(BPU ) ] = { | ||
489 | [ C(OP_READ) ] = { | ||
490 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
491 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
492 | }, | ||
493 | [ C(OP_WRITE) ] = { | ||
494 | [ C(RESULT_ACCESS) ] = -1, | ||
495 | [ C(RESULT_MISS) ] = -1, | ||
496 | }, | ||
497 | [ C(OP_PREFETCH) ] = { | ||
498 | [ C(RESULT_ACCESS) ] = -1, | ||
499 | [ C(RESULT_MISS) ] = -1, | ||
500 | }, | ||
501 | }, | ||
502 | }; | ||
503 | |||
504 | static __initconst u64 core2_hw_cache_event_ids | ||
505 | [PERF_COUNT_HW_CACHE_MAX] | ||
506 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
507 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
508 | { | ||
509 | [ C(L1D) ] = { | ||
510 | [ C(OP_READ) ] = { | ||
511 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
512 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
513 | }, | ||
514 | [ C(OP_WRITE) ] = { | ||
515 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
516 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
517 | }, | ||
518 | [ C(OP_PREFETCH) ] = { | ||
519 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
520 | [ C(RESULT_MISS) ] = 0, | ||
521 | }, | ||
522 | }, | ||
523 | [ C(L1I ) ] = { | ||
524 | [ C(OP_READ) ] = { | ||
525 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
526 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
527 | }, | ||
528 | [ C(OP_WRITE) ] = { | ||
529 | [ C(RESULT_ACCESS) ] = -1, | ||
530 | [ C(RESULT_MISS) ] = -1, | ||
531 | }, | ||
532 | [ C(OP_PREFETCH) ] = { | ||
533 | [ C(RESULT_ACCESS) ] = 0, | ||
534 | [ C(RESULT_MISS) ] = 0, | ||
535 | }, | ||
536 | }, | ||
537 | [ C(LL ) ] = { | ||
538 | [ C(OP_READ) ] = { | ||
539 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
540 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
541 | }, | ||
542 | [ C(OP_WRITE) ] = { | ||
543 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
544 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
545 | }, | ||
546 | [ C(OP_PREFETCH) ] = { | ||
547 | [ C(RESULT_ACCESS) ] = 0, | ||
548 | [ C(RESULT_MISS) ] = 0, | ||
549 | }, | ||
550 | }, | ||
551 | [ C(DTLB) ] = { | ||
552 | [ C(OP_READ) ] = { | ||
553 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
554 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
555 | }, | ||
556 | [ C(OP_WRITE) ] = { | ||
557 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
558 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
559 | }, | ||
560 | [ C(OP_PREFETCH) ] = { | ||
561 | [ C(RESULT_ACCESS) ] = 0, | ||
562 | [ C(RESULT_MISS) ] = 0, | ||
563 | }, | ||
564 | }, | ||
565 | [ C(ITLB) ] = { | ||
566 | [ C(OP_READ) ] = { | ||
567 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
568 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
569 | }, | ||
570 | [ C(OP_WRITE) ] = { | ||
571 | [ C(RESULT_ACCESS) ] = -1, | ||
572 | [ C(RESULT_MISS) ] = -1, | ||
573 | }, | ||
574 | [ C(OP_PREFETCH) ] = { | ||
575 | [ C(RESULT_ACCESS) ] = -1, | ||
576 | [ C(RESULT_MISS) ] = -1, | ||
577 | }, | ||
578 | }, | ||
579 | [ C(BPU ) ] = { | ||
580 | [ C(OP_READ) ] = { | ||
581 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
582 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
583 | }, | ||
584 | [ C(OP_WRITE) ] = { | ||
585 | [ C(RESULT_ACCESS) ] = -1, | ||
586 | [ C(RESULT_MISS) ] = -1, | ||
587 | }, | ||
588 | [ C(OP_PREFETCH) ] = { | ||
589 | [ C(RESULT_ACCESS) ] = -1, | ||
590 | [ C(RESULT_MISS) ] = -1, | ||
591 | }, | ||
592 | }, | ||
593 | }; | ||
594 | |||
595 | static __initconst u64 atom_hw_cache_event_ids | ||
596 | [PERF_COUNT_HW_CACHE_MAX] | ||
597 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
598 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
599 | { | ||
600 | [ C(L1D) ] = { | ||
601 | [ C(OP_READ) ] = { | ||
602 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
603 | [ C(RESULT_MISS) ] = 0, | ||
604 | }, | ||
605 | [ C(OP_WRITE) ] = { | ||
606 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
607 | [ C(RESULT_MISS) ] = 0, | ||
608 | }, | ||
609 | [ C(OP_PREFETCH) ] = { | ||
610 | [ C(RESULT_ACCESS) ] = 0x0, | ||
611 | [ C(RESULT_MISS) ] = 0, | ||
612 | }, | ||
613 | }, | ||
614 | [ C(L1I ) ] = { | ||
615 | [ C(OP_READ) ] = { | ||
616 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
617 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
618 | }, | ||
619 | [ C(OP_WRITE) ] = { | ||
620 | [ C(RESULT_ACCESS) ] = -1, | ||
621 | [ C(RESULT_MISS) ] = -1, | ||
622 | }, | ||
623 | [ C(OP_PREFETCH) ] = { | ||
624 | [ C(RESULT_ACCESS) ] = 0, | ||
625 | [ C(RESULT_MISS) ] = 0, | ||
626 | }, | ||
627 | }, | ||
628 | [ C(LL ) ] = { | ||
629 | [ C(OP_READ) ] = { | ||
630 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
631 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
632 | }, | ||
633 | [ C(OP_WRITE) ] = { | ||
634 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
635 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
636 | }, | ||
637 | [ C(OP_PREFETCH) ] = { | ||
638 | [ C(RESULT_ACCESS) ] = 0, | ||
639 | [ C(RESULT_MISS) ] = 0, | ||
640 | }, | ||
641 | }, | ||
642 | [ C(DTLB) ] = { | ||
643 | [ C(OP_READ) ] = { | ||
644 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
645 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
646 | }, | ||
647 | [ C(OP_WRITE) ] = { | ||
648 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
649 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
650 | }, | ||
651 | [ C(OP_PREFETCH) ] = { | ||
652 | [ C(RESULT_ACCESS) ] = 0, | ||
653 | [ C(RESULT_MISS) ] = 0, | ||
654 | }, | ||
655 | }, | ||
656 | [ C(ITLB) ] = { | ||
657 | [ C(OP_READ) ] = { | ||
658 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
659 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
660 | }, | ||
661 | [ C(OP_WRITE) ] = { | ||
662 | [ C(RESULT_ACCESS) ] = -1, | ||
663 | [ C(RESULT_MISS) ] = -1, | ||
664 | }, | ||
665 | [ C(OP_PREFETCH) ] = { | ||
666 | [ C(RESULT_ACCESS) ] = -1, | ||
667 | [ C(RESULT_MISS) ] = -1, | ||
668 | }, | ||
669 | }, | ||
670 | [ C(BPU ) ] = { | ||
671 | [ C(OP_READ) ] = { | ||
672 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
673 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
674 | }, | ||
675 | [ C(OP_WRITE) ] = { | ||
676 | [ C(RESULT_ACCESS) ] = -1, | ||
677 | [ C(RESULT_MISS) ] = -1, | ||
678 | }, | ||
679 | [ C(OP_PREFETCH) ] = { | ||
680 | [ C(RESULT_ACCESS) ] = -1, | ||
681 | [ C(RESULT_MISS) ] = -1, | ||
682 | }, | ||
683 | }, | ||
684 | }; | ||
685 | |||
686 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
687 | { | ||
688 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
689 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
690 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
691 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
692 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
693 | |||
694 | #define CORE_EVNTSEL_MASK \ | ||
695 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
696 | INTEL_ARCH_UNIT_MASK | \ | ||
697 | INTEL_ARCH_EDGE_MASK | \ | ||
698 | INTEL_ARCH_INV_MASK | \ | ||
699 | INTEL_ARCH_CNT_MASK) | ||
700 | |||
701 | return hw_event & CORE_EVNTSEL_MASK; | ||
702 | } | ||
703 | |||
704 | static __initconst u64 amd_hw_cache_event_ids | ||
705 | [PERF_COUNT_HW_CACHE_MAX] | ||
706 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
707 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
708 | { | ||
709 | [ C(L1D) ] = { | ||
710 | [ C(OP_READ) ] = { | ||
711 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
712 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
713 | }, | ||
714 | [ C(OP_WRITE) ] = { | ||
715 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
716 | [ C(RESULT_MISS) ] = 0, | ||
717 | }, | ||
718 | [ C(OP_PREFETCH) ] = { | ||
719 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
720 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
721 | }, | ||
722 | }, | ||
723 | [ C(L1I ) ] = { | ||
724 | [ C(OP_READ) ] = { | ||
725 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
726 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
727 | }, | ||
728 | [ C(OP_WRITE) ] = { | ||
729 | [ C(RESULT_ACCESS) ] = -1, | ||
730 | [ C(RESULT_MISS) ] = -1, | ||
731 | }, | ||
732 | [ C(OP_PREFETCH) ] = { | ||
733 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
734 | [ C(RESULT_MISS) ] = 0, | ||
735 | }, | ||
736 | }, | ||
737 | [ C(LL ) ] = { | ||
738 | [ C(OP_READ) ] = { | ||
739 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
740 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
741 | }, | ||
742 | [ C(OP_WRITE) ] = { | ||
743 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
744 | [ C(RESULT_MISS) ] = 0, | ||
745 | }, | ||
746 | [ C(OP_PREFETCH) ] = { | ||
747 | [ C(RESULT_ACCESS) ] = 0, | ||
748 | [ C(RESULT_MISS) ] = 0, | ||
749 | }, | ||
750 | }, | ||
751 | [ C(DTLB) ] = { | ||
752 | [ C(OP_READ) ] = { | ||
753 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
754 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
755 | }, | ||
756 | [ C(OP_WRITE) ] = { | ||
757 | [ C(RESULT_ACCESS) ] = 0, | ||
758 | [ C(RESULT_MISS) ] = 0, | ||
759 | }, | ||
760 | [ C(OP_PREFETCH) ] = { | ||
761 | [ C(RESULT_ACCESS) ] = 0, | ||
762 | [ C(RESULT_MISS) ] = 0, | ||
763 | }, | ||
764 | }, | ||
765 | [ C(ITLB) ] = { | ||
766 | [ C(OP_READ) ] = { | ||
767 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
768 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
769 | }, | ||
770 | [ C(OP_WRITE) ] = { | ||
771 | [ C(RESULT_ACCESS) ] = -1, | ||
772 | [ C(RESULT_MISS) ] = -1, | ||
773 | }, | ||
774 | [ C(OP_PREFETCH) ] = { | ||
775 | [ C(RESULT_ACCESS) ] = -1, | ||
776 | [ C(RESULT_MISS) ] = -1, | ||
777 | }, | ||
778 | }, | ||
779 | [ C(BPU ) ] = { | ||
780 | [ C(OP_READ) ] = { | ||
781 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
782 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
783 | }, | ||
784 | [ C(OP_WRITE) ] = { | ||
785 | [ C(RESULT_ACCESS) ] = -1, | ||
786 | [ C(RESULT_MISS) ] = -1, | ||
787 | }, | ||
788 | [ C(OP_PREFETCH) ] = { | ||
789 | [ C(RESULT_ACCESS) ] = -1, | ||
790 | [ C(RESULT_MISS) ] = -1, | ||
791 | }, | ||
792 | }, | ||
793 | }; | ||
794 | |||
795 | /* | ||
796 | * AMD Performance Monitor K7 and later. | ||
797 | */ | ||
798 | static const u64 amd_perfmon_event_map[] = | ||
799 | { | ||
800 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
801 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
802 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
803 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
804 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
805 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
806 | }; | ||
807 | |||
808 | static u64 amd_pmu_event_map(int hw_event) | ||
809 | { | ||
810 | return amd_perfmon_event_map[hw_event]; | ||
811 | } | ||
812 | |||
813 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
814 | { | ||
815 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | ||
816 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
817 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
818 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
819 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
820 | |||
821 | #define K7_EVNTSEL_MASK \ | ||
822 | (K7_EVNTSEL_EVENT_MASK | \ | ||
823 | K7_EVNTSEL_UNIT_MASK | \ | ||
824 | K7_EVNTSEL_EDGE_MASK | \ | ||
825 | K7_EVNTSEL_INV_MASK | \ | ||
826 | K7_EVNTSEL_REG_MASK) | ||
827 | |||
828 | return hw_event & K7_EVNTSEL_MASK; | ||
829 | } | ||
830 | |||
831 | /* | 186 | /* |
832 | * Propagate event elapsed time into the generic event. | 187 | * Propagate event elapsed time into the generic event. |
833 | * Can only be executed on the CPU where the event is active. | 188 | * Can only be executed on the CPU where the event is active. |
@@ -1079,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
1079 | return 0; | 434 | return 0; |
1080 | } | 435 | } |
1081 | 436 | ||
1082 | static void intel_pmu_enable_bts(u64 config) | ||
1083 | { | ||
1084 | unsigned long debugctlmsr; | ||
1085 | |||
1086 | debugctlmsr = get_debugctlmsr(); | ||
1087 | |||
1088 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
1089 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
1090 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
1091 | |||
1092 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
1093 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
1094 | |||
1095 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
1096 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
1097 | |||
1098 | update_debugctlmsr(debugctlmsr); | ||
1099 | } | ||
1100 | |||
1101 | static void intel_pmu_disable_bts(void) | ||
1102 | { | ||
1103 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1104 | unsigned long debugctlmsr; | ||
1105 | |||
1106 | if (!cpuc->ds) | ||
1107 | return; | ||
1108 | |||
1109 | debugctlmsr = get_debugctlmsr(); | ||
1110 | |||
1111 | debugctlmsr &= | ||
1112 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
1113 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
1114 | |||
1115 | update_debugctlmsr(debugctlmsr); | ||
1116 | } | ||
1117 | |||
1118 | /* | 437 | /* |
1119 | * Setup the hardware configuration for a given attr_type | 438 | * Setup the hardware configuration for a given attr_type |
1120 | */ | 439 | */ |
@@ -1223,26 +542,6 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1223 | return 0; | 542 | return 0; |
1224 | } | 543 | } |
1225 | 544 | ||
1226 | static void p6_pmu_disable_all(void) | ||
1227 | { | ||
1228 | u64 val; | ||
1229 | |||
1230 | /* p6 only has one enable register */ | ||
1231 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1232 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1233 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1234 | } | ||
1235 | |||
1236 | static void intel_pmu_disable_all(void) | ||
1237 | { | ||
1238 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1239 | |||
1240 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
1241 | |||
1242 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1243 | intel_pmu_disable_bts(); | ||
1244 | } | ||
1245 | |||
1246 | static void x86_pmu_disable_all(void) | 545 | static void x86_pmu_disable_all(void) |
1247 | { | 546 | { |
1248 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 547 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1278,33 +577,6 @@ void hw_perf_disable(void) | |||
1278 | x86_pmu.disable_all(); | 577 | x86_pmu.disable_all(); |
1279 | } | 578 | } |
1280 | 579 | ||
1281 | static void p6_pmu_enable_all(void) | ||
1282 | { | ||
1283 | unsigned long val; | ||
1284 | |||
1285 | /* p6 only has one enable register */ | ||
1286 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1287 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1288 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1289 | } | ||
1290 | |||
1291 | static void intel_pmu_enable_all(void) | ||
1292 | { | ||
1293 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1294 | |||
1295 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1296 | |||
1297 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
1298 | struct perf_event *event = | ||
1299 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1300 | |||
1301 | if (WARN_ON_ONCE(!event)) | ||
1302 | return; | ||
1303 | |||
1304 | intel_pmu_enable_bts(event->hw.config); | ||
1305 | } | ||
1306 | } | ||
1307 | |||
1308 | static void x86_pmu_enable_all(void) | 580 | static void x86_pmu_enable_all(void) |
1309 | { | 581 | { |
1310 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 582 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1578,20 +850,6 @@ void hw_perf_enable(void) | |||
1578 | x86_pmu.enable_all(); | 850 | x86_pmu.enable_all(); |
1579 | } | 851 | } |
1580 | 852 | ||
1581 | static inline u64 intel_pmu_get_status(void) | ||
1582 | { | ||
1583 | u64 status; | ||
1584 | |||
1585 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
1586 | |||
1587 | return status; | ||
1588 | } | ||
1589 | |||
1590 | static inline void intel_pmu_ack_status(u64 ack) | ||
1591 | { | ||
1592 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
1593 | } | ||
1594 | |||
1595 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
1596 | { | 854 | { |
1597 | (void)checking_wrmsrl(hwc->config_base + idx, | 855 | (void)checking_wrmsrl(hwc->config_base + idx, |
@@ -1603,47 +861,6 @@ static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) | |||
1603 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); | 861 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); |
1604 | } | 862 | } |
1605 | 863 | ||
1606 | static inline void | ||
1607 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1608 | { | ||
1609 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1610 | u64 ctrl_val, mask; | ||
1611 | |||
1612 | mask = 0xfULL << (idx * 4); | ||
1613 | |||
1614 | rdmsrl(hwc->config_base, ctrl_val); | ||
1615 | ctrl_val &= ~mask; | ||
1616 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1617 | } | ||
1618 | |||
1619 | static inline void | ||
1620 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1621 | { | ||
1622 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1623 | u64 val = P6_NOP_EVENT; | ||
1624 | |||
1625 | if (cpuc->enabled) | ||
1626 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1627 | |||
1628 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1629 | } | ||
1630 | |||
1631 | static inline void | ||
1632 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1633 | { | ||
1634 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1635 | intel_pmu_disable_bts(); | ||
1636 | return; | ||
1637 | } | ||
1638 | |||
1639 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1640 | intel_pmu_disable_fixed(hwc, idx); | ||
1641 | return; | ||
1642 | } | ||
1643 | |||
1644 | x86_pmu_disable_event(hwc, idx); | ||
1645 | } | ||
1646 | |||
1647 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 864 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
1648 | 865 | ||
1649 | /* | 866 | /* |
@@ -1702,70 +919,6 @@ x86_perf_event_set_period(struct perf_event *event, | |||
1702 | return ret; | 919 | return ret; |
1703 | } | 920 | } |
1704 | 921 | ||
1705 | static inline void | ||
1706 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1707 | { | ||
1708 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1709 | u64 ctrl_val, bits, mask; | ||
1710 | int err; | ||
1711 | |||
1712 | /* | ||
1713 | * Enable IRQ generation (0x8), | ||
1714 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
1715 | * if requested: | ||
1716 | */ | ||
1717 | bits = 0x8ULL; | ||
1718 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
1719 | bits |= 0x2; | ||
1720 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
1721 | bits |= 0x1; | ||
1722 | |||
1723 | /* | ||
1724 | * ANY bit is supported in v3 and up | ||
1725 | */ | ||
1726 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) | ||
1727 | bits |= 0x4; | ||
1728 | |||
1729 | bits <<= (idx * 4); | ||
1730 | mask = 0xfULL << (idx * 4); | ||
1731 | |||
1732 | rdmsrl(hwc->config_base, ctrl_val); | ||
1733 | ctrl_val &= ~mask; | ||
1734 | ctrl_val |= bits; | ||
1735 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1736 | } | ||
1737 | |||
1738 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1739 | { | ||
1740 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1741 | u64 val; | ||
1742 | |||
1743 | val = hwc->config; | ||
1744 | if (cpuc->enabled) | ||
1745 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1746 | |||
1747 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1748 | } | ||
1749 | |||
1750 | |||
1751 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1752 | { | ||
1753 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1754 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
1755 | return; | ||
1756 | |||
1757 | intel_pmu_enable_bts(hwc->config); | ||
1758 | return; | ||
1759 | } | ||
1760 | |||
1761 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1762 | intel_pmu_enable_fixed(hwc, idx); | ||
1763 | return; | ||
1764 | } | ||
1765 | |||
1766 | __x86_pmu_enable_event(hwc, idx); | ||
1767 | } | ||
1768 | |||
1769 | static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 922 | static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
1770 | { | 923 | { |
1771 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 924 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1887,66 +1040,6 @@ void perf_event_print_debug(void) | |||
1887 | local_irq_restore(flags); | 1040 | local_irq_restore(flags); |
1888 | } | 1041 | } |
1889 | 1042 | ||
1890 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) | ||
1891 | { | ||
1892 | struct debug_store *ds = cpuc->ds; | ||
1893 | struct bts_record { | ||
1894 | u64 from; | ||
1895 | u64 to; | ||
1896 | u64 flags; | ||
1897 | }; | ||
1898 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1899 | struct bts_record *at, *top; | ||
1900 | struct perf_output_handle handle; | ||
1901 | struct perf_event_header header; | ||
1902 | struct perf_sample_data data; | ||
1903 | struct pt_regs regs; | ||
1904 | |||
1905 | if (!event) | ||
1906 | return; | ||
1907 | |||
1908 | if (!ds) | ||
1909 | return; | ||
1910 | |||
1911 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1912 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1913 | |||
1914 | if (top <= at) | ||
1915 | return; | ||
1916 | |||
1917 | ds->bts_index = ds->bts_buffer_base; | ||
1918 | |||
1919 | |||
1920 | data.period = event->hw.last_period; | ||
1921 | data.addr = 0; | ||
1922 | data.raw = NULL; | ||
1923 | regs.ip = 0; | ||
1924 | |||
1925 | /* | ||
1926 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
1927 | * We will overwrite the from and to address before we output | ||
1928 | * the sample. | ||
1929 | */ | ||
1930 | perf_prepare_sample(&header, &data, event, ®s); | ||
1931 | |||
1932 | if (perf_output_begin(&handle, event, | ||
1933 | header.size * (top - at), 1, 1)) | ||
1934 | return; | ||
1935 | |||
1936 | for (; at < top; at++) { | ||
1937 | data.ip = at->from; | ||
1938 | data.addr = at->to; | ||
1939 | |||
1940 | perf_output_sample(&handle, &header, &data, event); | ||
1941 | } | ||
1942 | |||
1943 | perf_output_end(&handle); | ||
1944 | |||
1945 | /* There's new data available. */ | ||
1946 | event->hw.interrupts++; | ||
1947 | event->pending_kill = POLL_IN; | ||
1948 | } | ||
1949 | |||
1950 | static void x86_pmu_stop(struct perf_event *event) | 1043 | static void x86_pmu_stop(struct perf_event *event) |
1951 | { | 1044 | { |
1952 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1045 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1966,10 +1059,6 @@ static void x86_pmu_stop(struct perf_event *event) | |||
1966 | */ | 1059 | */ |
1967 | x86_perf_event_update(event, hwc, idx); | 1060 | x86_perf_event_update(event, hwc, idx); |
1968 | 1061 | ||
1969 | /* Drain the remaining BTS records. */ | ||
1970 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) | ||
1971 | intel_pmu_drain_bts_buffer(cpuc); | ||
1972 | |||
1973 | cpuc->events[idx] = NULL; | 1062 | cpuc->events[idx] = NULL; |
1974 | } | 1063 | } |
1975 | 1064 | ||
@@ -1996,114 +1085,6 @@ static void x86_pmu_disable(struct perf_event *event) | |||
1996 | perf_event_update_userpage(event); | 1085 | perf_event_update_userpage(event); |
1997 | } | 1086 | } |
1998 | 1087 | ||
1999 | /* | ||
2000 | * Save and restart an expired event. Called by NMI contexts, | ||
2001 | * so it has to be careful about preempting normal event ops: | ||
2002 | */ | ||
2003 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
2004 | { | ||
2005 | struct hw_perf_event *hwc = &event->hw; | ||
2006 | int idx = hwc->idx; | ||
2007 | int ret; | ||
2008 | |||
2009 | x86_perf_event_update(event, hwc, idx); | ||
2010 | ret = x86_perf_event_set_period(event, hwc, idx); | ||
2011 | |||
2012 | return ret; | ||
2013 | } | ||
2014 | |||
2015 | static void intel_pmu_reset(void) | ||
2016 | { | ||
2017 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
2018 | unsigned long flags; | ||
2019 | int idx; | ||
2020 | |||
2021 | if (!x86_pmu.num_events) | ||
2022 | return; | ||
2023 | |||
2024 | local_irq_save(flags); | ||
2025 | |||
2026 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
2027 | |||
2028 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
2029 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
2030 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
2031 | } | ||
2032 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
2033 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
2034 | } | ||
2035 | if (ds) | ||
2036 | ds->bts_index = ds->bts_buffer_base; | ||
2037 | |||
2038 | local_irq_restore(flags); | ||
2039 | } | ||
2040 | |||
2041 | /* | ||
2042 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
2043 | * rules apply: | ||
2044 | */ | ||
2045 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
2046 | { | ||
2047 | struct perf_sample_data data; | ||
2048 | struct cpu_hw_events *cpuc; | ||
2049 | int bit, loops; | ||
2050 | u64 ack, status; | ||
2051 | |||
2052 | data.addr = 0; | ||
2053 | data.raw = NULL; | ||
2054 | |||
2055 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
2056 | |||
2057 | perf_disable(); | ||
2058 | intel_pmu_drain_bts_buffer(cpuc); | ||
2059 | status = intel_pmu_get_status(); | ||
2060 | if (!status) { | ||
2061 | perf_enable(); | ||
2062 | return 0; | ||
2063 | } | ||
2064 | |||
2065 | loops = 0; | ||
2066 | again: | ||
2067 | if (++loops > 100) { | ||
2068 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
2069 | perf_event_print_debug(); | ||
2070 | intel_pmu_reset(); | ||
2071 | perf_enable(); | ||
2072 | return 1; | ||
2073 | } | ||
2074 | |||
2075 | inc_irq_stat(apic_perf_irqs); | ||
2076 | ack = status; | ||
2077 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
2078 | struct perf_event *event = cpuc->events[bit]; | ||
2079 | |||
2080 | clear_bit(bit, (unsigned long *) &status); | ||
2081 | if (!test_bit(bit, cpuc->active_mask)) | ||
2082 | continue; | ||
2083 | |||
2084 | if (!intel_pmu_save_and_restart(event)) | ||
2085 | continue; | ||
2086 | |||
2087 | data.period = event->hw.last_period; | ||
2088 | |||
2089 | if (perf_event_overflow(event, 1, &data, regs)) | ||
2090 | intel_pmu_disable_event(&event->hw, bit); | ||
2091 | } | ||
2092 | |||
2093 | intel_pmu_ack_status(ack); | ||
2094 | |||
2095 | /* | ||
2096 | * Repeat if there is more work to be done: | ||
2097 | */ | ||
2098 | status = intel_pmu_get_status(); | ||
2099 | if (status) | ||
2100 | goto again; | ||
2101 | |||
2102 | perf_enable(); | ||
2103 | |||
2104 | return 1; | ||
2105 | } | ||
2106 | |||
2107 | static int x86_pmu_handle_irq(struct pt_regs *regs) | 1088 | static int x86_pmu_handle_irq(struct pt_regs *regs) |
2108 | { | 1089 | { |
2109 | struct perf_sample_data data; | 1090 | struct perf_sample_data data; |
@@ -2216,37 +1197,20 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
2216 | return NOTIFY_STOP; | 1197 | return NOTIFY_STOP; |
2217 | } | 1198 | } |
2218 | 1199 | ||
1200 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | ||
1201 | .notifier_call = perf_event_nmi_handler, | ||
1202 | .next = NULL, | ||
1203 | .priority = 1 | ||
1204 | }; | ||
1205 | |||
2219 | static struct event_constraint unconstrained; | 1206 | static struct event_constraint unconstrained; |
2220 | static struct event_constraint emptyconstraint; | 1207 | static struct event_constraint emptyconstraint; |
2221 | 1208 | ||
2222 | static struct event_constraint bts_constraint = | ||
2223 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
2224 | |||
2225 | static struct event_constraint * | ||
2226 | intel_special_constraints(struct perf_event *event) | ||
2227 | { | ||
2228 | unsigned int hw_event; | ||
2229 | |||
2230 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
2231 | |||
2232 | if (unlikely((hw_event == | ||
2233 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
2234 | (event->hw.sample_period == 1))) { | ||
2235 | |||
2236 | return &bts_constraint; | ||
2237 | } | ||
2238 | return NULL; | ||
2239 | } | ||
2240 | |||
2241 | static struct event_constraint * | 1209 | static struct event_constraint * |
2242 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1210 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
2243 | { | 1211 | { |
2244 | struct event_constraint *c; | 1212 | struct event_constraint *c; |
2245 | 1213 | ||
2246 | c = intel_special_constraints(event); | ||
2247 | if (c) | ||
2248 | return c; | ||
2249 | |||
2250 | if (x86_pmu.event_constraints) { | 1214 | if (x86_pmu.event_constraints) { |
2251 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1215 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
2252 | if ((event->hw.config & c->cmask) == c->code) | 1216 | if ((event->hw.config & c->cmask) == c->code) |
@@ -2257,148 +1221,6 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
2257 | return &unconstrained; | 1221 | return &unconstrained; |
2258 | } | 1222 | } |
2259 | 1223 | ||
2260 | /* | ||
2261 | * AMD64 events are detected based on their event codes. | ||
2262 | */ | ||
2263 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) | ||
2264 | { | ||
2265 | return (hwc->config & 0xe0) == 0xe0; | ||
2266 | } | ||
2267 | |||
2268 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | ||
2269 | struct perf_event *event) | ||
2270 | { | ||
2271 | struct hw_perf_event *hwc = &event->hw; | ||
2272 | struct amd_nb *nb = cpuc->amd_nb; | ||
2273 | int i; | ||
2274 | |||
2275 | /* | ||
2276 | * only care about NB events | ||
2277 | */ | ||
2278 | if (!(nb && amd_is_nb_event(hwc))) | ||
2279 | return; | ||
2280 | |||
2281 | /* | ||
2282 | * need to scan whole list because event may not have | ||
2283 | * been assigned during scheduling | ||
2284 | * | ||
2285 | * no race condition possible because event can only | ||
2286 | * be removed on one CPU at a time AND PMU is disabled | ||
2287 | * when we come here | ||
2288 | */ | ||
2289 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
2290 | if (nb->owners[i] == event) { | ||
2291 | cmpxchg(nb->owners+i, event, NULL); | ||
2292 | break; | ||
2293 | } | ||
2294 | } | ||
2295 | } | ||
2296 | |||
2297 | /* | ||
2298 | * AMD64 NorthBridge events need special treatment because | ||
2299 | * counter access needs to be synchronized across all cores | ||
2300 | * of a package. Refer to BKDG section 3.12 | ||
2301 | * | ||
2302 | * NB events are events measuring L3 cache, Hypertransport | ||
2303 | * traffic. They are identified by an event code >= 0xe00. | ||
2304 | * They measure events on the NorthBride which is shared | ||
2305 | * by all cores on a package. NB events are counted on a | ||
2306 | * shared set of counters. When a NB event is programmed | ||
2307 | * in a counter, the data actually comes from a shared | ||
2308 | * counter. Thus, access to those counters needs to be | ||
2309 | * synchronized. | ||
2310 | * | ||
2311 | * We implement the synchronization such that no two cores | ||
2312 | * can be measuring NB events using the same counters. Thus, | ||
2313 | * we maintain a per-NB allocation table. The available slot | ||
2314 | * is propagated using the event_constraint structure. | ||
2315 | * | ||
2316 | * We provide only one choice for each NB event based on | ||
2317 | * the fact that only NB events have restrictions. Consequently, | ||
2318 | * if a counter is available, there is a guarantee the NB event | ||
2319 | * will be assigned to it. If no slot is available, an empty | ||
2320 | * constraint is returned and scheduling will eventually fail | ||
2321 | * for this event. | ||
2322 | * | ||
2323 | * Note that all cores attached the same NB compete for the same | ||
2324 | * counters to host NB events, this is why we use atomic ops. Some | ||
2325 | * multi-chip CPUs may have more than one NB. | ||
2326 | * | ||
2327 | * Given that resources are allocated (cmpxchg), they must be | ||
2328 | * eventually freed for others to use. This is accomplished by | ||
2329 | * calling amd_put_event_constraints(). | ||
2330 | * | ||
2331 | * Non NB events are not impacted by this restriction. | ||
2332 | */ | ||
2333 | static struct event_constraint * | ||
2334 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
2335 | { | ||
2336 | struct hw_perf_event *hwc = &event->hw; | ||
2337 | struct amd_nb *nb = cpuc->amd_nb; | ||
2338 | struct perf_event *old = NULL; | ||
2339 | int max = x86_pmu.num_events; | ||
2340 | int i, j, k = -1; | ||
2341 | |||
2342 | /* | ||
2343 | * if not NB event or no NB, then no constraints | ||
2344 | */ | ||
2345 | if (!(nb && amd_is_nb_event(hwc))) | ||
2346 | return &unconstrained; | ||
2347 | |||
2348 | /* | ||
2349 | * detect if already present, if so reuse | ||
2350 | * | ||
2351 | * cannot merge with actual allocation | ||
2352 | * because of possible holes | ||
2353 | * | ||
2354 | * event can already be present yet not assigned (in hwc->idx) | ||
2355 | * because of successive calls to x86_schedule_events() from | ||
2356 | * hw_perf_group_sched_in() without hw_perf_enable() | ||
2357 | */ | ||
2358 | for (i = 0; i < max; i++) { | ||
2359 | /* | ||
2360 | * keep track of first free slot | ||
2361 | */ | ||
2362 | if (k == -1 && !nb->owners[i]) | ||
2363 | k = i; | ||
2364 | |||
2365 | /* already present, reuse */ | ||
2366 | if (nb->owners[i] == event) | ||
2367 | goto done; | ||
2368 | } | ||
2369 | /* | ||
2370 | * not present, so grab a new slot | ||
2371 | * starting either at: | ||
2372 | */ | ||
2373 | if (hwc->idx != -1) { | ||
2374 | /* previous assignment */ | ||
2375 | i = hwc->idx; | ||
2376 | } else if (k != -1) { | ||
2377 | /* start from free slot found */ | ||
2378 | i = k; | ||
2379 | } else { | ||
2380 | /* | ||
2381 | * event not found, no slot found in | ||
2382 | * first pass, try again from the | ||
2383 | * beginning | ||
2384 | */ | ||
2385 | i = 0; | ||
2386 | } | ||
2387 | j = i; | ||
2388 | do { | ||
2389 | old = cmpxchg(nb->owners+i, NULL, event); | ||
2390 | if (!old) | ||
2391 | break; | ||
2392 | if (++i == max) | ||
2393 | i = 0; | ||
2394 | } while (i != j); | ||
2395 | done: | ||
2396 | if (!old) | ||
2397 | return &nb->event_constraints[i]; | ||
2398 | |||
2399 | return &emptyconstraint; | ||
2400 | } | ||
2401 | |||
2402 | static int x86_event_sched_in(struct perf_event *event, | 1224 | static int x86_event_sched_in(struct perf_event *event, |
2403 | struct perf_cpu_context *cpuctx) | 1225 | struct perf_cpu_context *cpuctx) |
2404 | { | 1226 | { |
@@ -2509,335 +1331,9 @@ undo: | |||
2509 | return ret; | 1331 | return ret; |
2510 | } | 1332 | } |
2511 | 1333 | ||
2512 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | 1334 | #include "perf_event_amd.c" |
2513 | .notifier_call = perf_event_nmi_handler, | 1335 | #include "perf_event_p6.c" |
2514 | .next = NULL, | 1336 | #include "perf_event_intel.c" |
2515 | .priority = 1 | ||
2516 | }; | ||
2517 | |||
2518 | static __initconst struct x86_pmu p6_pmu = { | ||
2519 | .name = "p6", | ||
2520 | .handle_irq = x86_pmu_handle_irq, | ||
2521 | .disable_all = p6_pmu_disable_all, | ||
2522 | .enable_all = p6_pmu_enable_all, | ||
2523 | .enable = p6_pmu_enable_event, | ||
2524 | .disable = p6_pmu_disable_event, | ||
2525 | .eventsel = MSR_P6_EVNTSEL0, | ||
2526 | .perfctr = MSR_P6_PERFCTR0, | ||
2527 | .event_map = p6_pmu_event_map, | ||
2528 | .raw_event = p6_pmu_raw_event, | ||
2529 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
2530 | .apic = 1, | ||
2531 | .max_period = (1ULL << 31) - 1, | ||
2532 | .version = 0, | ||
2533 | .num_events = 2, | ||
2534 | /* | ||
2535 | * Events have 40 bits implemented. However they are designed such | ||
2536 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
2537 | * effective width of a event for P6-like PMU is 32 bits only. | ||
2538 | * | ||
2539 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
2540 | */ | ||
2541 | .event_bits = 32, | ||
2542 | .event_mask = (1ULL << 32) - 1, | ||
2543 | .get_event_constraints = intel_get_event_constraints, | ||
2544 | .event_constraints = intel_p6_event_constraints | ||
2545 | }; | ||
2546 | |||
2547 | static __initconst struct x86_pmu core_pmu = { | ||
2548 | .name = "core", | ||
2549 | .handle_irq = x86_pmu_handle_irq, | ||
2550 | .disable_all = x86_pmu_disable_all, | ||
2551 | .enable_all = x86_pmu_enable_all, | ||
2552 | .enable = x86_pmu_enable_event, | ||
2553 | .disable = x86_pmu_disable_event, | ||
2554 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
2555 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
2556 | .event_map = intel_pmu_event_map, | ||
2557 | .raw_event = intel_pmu_raw_event, | ||
2558 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
2559 | .apic = 1, | ||
2560 | /* | ||
2561 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
2562 | * so we install an artificial 1<<31 period regardless of | ||
2563 | * the generic event period: | ||
2564 | */ | ||
2565 | .max_period = (1ULL << 31) - 1, | ||
2566 | .get_event_constraints = intel_get_event_constraints, | ||
2567 | .event_constraints = intel_core_event_constraints, | ||
2568 | }; | ||
2569 | |||
2570 | static __initconst struct x86_pmu intel_pmu = { | ||
2571 | .name = "Intel", | ||
2572 | .handle_irq = intel_pmu_handle_irq, | ||
2573 | .disable_all = intel_pmu_disable_all, | ||
2574 | .enable_all = intel_pmu_enable_all, | ||
2575 | .enable = intel_pmu_enable_event, | ||
2576 | .disable = intel_pmu_disable_event, | ||
2577 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
2578 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
2579 | .event_map = intel_pmu_event_map, | ||
2580 | .raw_event = intel_pmu_raw_event, | ||
2581 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
2582 | .apic = 1, | ||
2583 | /* | ||
2584 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
2585 | * so we install an artificial 1<<31 period regardless of | ||
2586 | * the generic event period: | ||
2587 | */ | ||
2588 | .max_period = (1ULL << 31) - 1, | ||
2589 | .enable_bts = intel_pmu_enable_bts, | ||
2590 | .disable_bts = intel_pmu_disable_bts, | ||
2591 | .get_event_constraints = intel_get_event_constraints | ||
2592 | }; | ||
2593 | |||
2594 | static __initconst struct x86_pmu amd_pmu = { | ||
2595 | .name = "AMD", | ||
2596 | .handle_irq = x86_pmu_handle_irq, | ||
2597 | .disable_all = x86_pmu_disable_all, | ||
2598 | .enable_all = x86_pmu_enable_all, | ||
2599 | .enable = x86_pmu_enable_event, | ||
2600 | .disable = x86_pmu_disable_event, | ||
2601 | .eventsel = MSR_K7_EVNTSEL0, | ||
2602 | .perfctr = MSR_K7_PERFCTR0, | ||
2603 | .event_map = amd_pmu_event_map, | ||
2604 | .raw_event = amd_pmu_raw_event, | ||
2605 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
2606 | .num_events = 4, | ||
2607 | .event_bits = 48, | ||
2608 | .event_mask = (1ULL << 48) - 1, | ||
2609 | .apic = 1, | ||
2610 | /* use highest bit to detect overflow */ | ||
2611 | .max_period = (1ULL << 47) - 1, | ||
2612 | .get_event_constraints = amd_get_event_constraints, | ||
2613 | .put_event_constraints = amd_put_event_constraints | ||
2614 | }; | ||
2615 | |||
2616 | static __init int p6_pmu_init(void) | ||
2617 | { | ||
2618 | switch (boot_cpu_data.x86_model) { | ||
2619 | case 1: | ||
2620 | case 3: /* Pentium Pro */ | ||
2621 | case 5: | ||
2622 | case 6: /* Pentium II */ | ||
2623 | case 7: | ||
2624 | case 8: | ||
2625 | case 11: /* Pentium III */ | ||
2626 | case 9: | ||
2627 | case 13: | ||
2628 | /* Pentium M */ | ||
2629 | break; | ||
2630 | default: | ||
2631 | pr_cont("unsupported p6 CPU model %d ", | ||
2632 | boot_cpu_data.x86_model); | ||
2633 | return -ENODEV; | ||
2634 | } | ||
2635 | |||
2636 | x86_pmu = p6_pmu; | ||
2637 | |||
2638 | return 0; | ||
2639 | } | ||
2640 | |||
2641 | static __init int intel_pmu_init(void) | ||
2642 | { | ||
2643 | union cpuid10_edx edx; | ||
2644 | union cpuid10_eax eax; | ||
2645 | unsigned int unused; | ||
2646 | unsigned int ebx; | ||
2647 | int version; | ||
2648 | |||
2649 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
2650 | /* check for P6 processor family */ | ||
2651 | if (boot_cpu_data.x86 == 6) { | ||
2652 | return p6_pmu_init(); | ||
2653 | } else { | ||
2654 | return -ENODEV; | ||
2655 | } | ||
2656 | } | ||
2657 | |||
2658 | /* | ||
2659 | * Check whether the Architectural PerfMon supports | ||
2660 | * Branch Misses Retired hw_event or not. | ||
2661 | */ | ||
2662 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
2663 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
2664 | return -ENODEV; | ||
2665 | |||
2666 | version = eax.split.version_id; | ||
2667 | if (version < 2) | ||
2668 | x86_pmu = core_pmu; | ||
2669 | else | ||
2670 | x86_pmu = intel_pmu; | ||
2671 | |||
2672 | x86_pmu.version = version; | ||
2673 | x86_pmu.num_events = eax.split.num_events; | ||
2674 | x86_pmu.event_bits = eax.split.bit_width; | ||
2675 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
2676 | |||
2677 | /* | ||
2678 | * Quirk: v2 perfmon does not report fixed-purpose events, so | ||
2679 | * assume at least 3 events: | ||
2680 | */ | ||
2681 | if (version > 1) | ||
2682 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | ||
2683 | |||
2684 | /* | ||
2685 | * Install the hw-cache-events table: | ||
2686 | */ | ||
2687 | switch (boot_cpu_data.x86_model) { | ||
2688 | case 14: /* 65 nm core solo/duo, "Yonah" */ | ||
2689 | pr_cont("Core events, "); | ||
2690 | break; | ||
2691 | |||
2692 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
2693 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
2694 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
2695 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
2696 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
2697 | sizeof(hw_cache_event_ids)); | ||
2698 | |||
2699 | x86_pmu.event_constraints = intel_core2_event_constraints; | ||
2700 | pr_cont("Core2 events, "); | ||
2701 | break; | ||
2702 | |||
2703 | case 26: /* 45 nm nehalem, "Bloomfield" */ | ||
2704 | case 30: /* 45 nm nehalem, "Lynnfield" */ | ||
2705 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
2706 | sizeof(hw_cache_event_ids)); | ||
2707 | |||
2708 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | ||
2709 | pr_cont("Nehalem/Corei7 events, "); | ||
2710 | break; | ||
2711 | case 28: | ||
2712 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
2713 | sizeof(hw_cache_event_ids)); | ||
2714 | |||
2715 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
2716 | pr_cont("Atom events, "); | ||
2717 | break; | ||
2718 | |||
2719 | case 37: /* 32 nm nehalem, "Clarkdale" */ | ||
2720 | case 44: /* 32 nm nehalem, "Gulftown" */ | ||
2721 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | ||
2722 | sizeof(hw_cache_event_ids)); | ||
2723 | |||
2724 | x86_pmu.event_constraints = intel_westmere_event_constraints; | ||
2725 | pr_cont("Westmere events, "); | ||
2726 | break; | ||
2727 | default: | ||
2728 | /* | ||
2729 | * default constraints for v2 and up | ||
2730 | */ | ||
2731 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
2732 | pr_cont("generic architected perfmon, "); | ||
2733 | } | ||
2734 | return 0; | ||
2735 | } | ||
2736 | |||
2737 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | ||
2738 | { | ||
2739 | struct amd_nb *nb; | ||
2740 | int i; | ||
2741 | |||
2742 | nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); | ||
2743 | if (!nb) | ||
2744 | return NULL; | ||
2745 | |||
2746 | memset(nb, 0, sizeof(*nb)); | ||
2747 | nb->nb_id = nb_id; | ||
2748 | |||
2749 | /* | ||
2750 | * initialize all possible NB constraints | ||
2751 | */ | ||
2752 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
2753 | set_bit(i, nb->event_constraints[i].idxmsk); | ||
2754 | nb->event_constraints[i].weight = 1; | ||
2755 | } | ||
2756 | return nb; | ||
2757 | } | ||
2758 | |||
2759 | static void amd_pmu_cpu_online(int cpu) | ||
2760 | { | ||
2761 | struct cpu_hw_events *cpu1, *cpu2; | ||
2762 | struct amd_nb *nb = NULL; | ||
2763 | int i, nb_id; | ||
2764 | |||
2765 | if (boot_cpu_data.x86_max_cores < 2) | ||
2766 | return; | ||
2767 | |||
2768 | /* | ||
2769 | * function may be called too early in the | ||
2770 | * boot process, in which case nb_id is bogus | ||
2771 | */ | ||
2772 | nb_id = amd_get_nb_id(cpu); | ||
2773 | if (nb_id == BAD_APICID) | ||
2774 | return; | ||
2775 | |||
2776 | cpu1 = &per_cpu(cpu_hw_events, cpu); | ||
2777 | cpu1->amd_nb = NULL; | ||
2778 | |||
2779 | raw_spin_lock(&amd_nb_lock); | ||
2780 | |||
2781 | for_each_online_cpu(i) { | ||
2782 | cpu2 = &per_cpu(cpu_hw_events, i); | ||
2783 | nb = cpu2->amd_nb; | ||
2784 | if (!nb) | ||
2785 | continue; | ||
2786 | if (nb->nb_id == nb_id) | ||
2787 | goto found; | ||
2788 | } | ||
2789 | |||
2790 | nb = amd_alloc_nb(cpu, nb_id); | ||
2791 | if (!nb) { | ||
2792 | pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); | ||
2793 | raw_spin_unlock(&amd_nb_lock); | ||
2794 | return; | ||
2795 | } | ||
2796 | found: | ||
2797 | nb->refcnt++; | ||
2798 | cpu1->amd_nb = nb; | ||
2799 | |||
2800 | raw_spin_unlock(&amd_nb_lock); | ||
2801 | } | ||
2802 | |||
2803 | static void amd_pmu_cpu_offline(int cpu) | ||
2804 | { | ||
2805 | struct cpu_hw_events *cpuhw; | ||
2806 | |||
2807 | if (boot_cpu_data.x86_max_cores < 2) | ||
2808 | return; | ||
2809 | |||
2810 | cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
2811 | |||
2812 | raw_spin_lock(&amd_nb_lock); | ||
2813 | |||
2814 | if (--cpuhw->amd_nb->refcnt == 0) | ||
2815 | kfree(cpuhw->amd_nb); | ||
2816 | |||
2817 | cpuhw->amd_nb = NULL; | ||
2818 | |||
2819 | raw_spin_unlock(&amd_nb_lock); | ||
2820 | } | ||
2821 | |||
2822 | static __init int amd_pmu_init(void) | ||
2823 | { | ||
2824 | /* Performance-monitoring supported from K7 and later: */ | ||
2825 | if (boot_cpu_data.x86 < 6) | ||
2826 | return -ENODEV; | ||
2827 | |||
2828 | x86_pmu = amd_pmu; | ||
2829 | |||
2830 | /* Events are common for all AMDs */ | ||
2831 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
2832 | sizeof(hw_cache_event_ids)); | ||
2833 | |||
2834 | /* | ||
2835 | * explicitly initialize the boot cpu, other cpus will get | ||
2836 | * the cpu hotplug callbacks from smp_init() | ||
2837 | */ | ||
2838 | amd_pmu_cpu_online(smp_processor_id()); | ||
2839 | return 0; | ||
2840 | } | ||
2841 | 1337 | ||
2842 | static void __init pmu_check_apic(void) | 1338 | static void __init pmu_check_apic(void) |
2843 | { | 1339 | { |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c new file mode 100644 index 000000000000..6d28e08563e8 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -0,0 +1,416 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_AMD | ||
2 | |||
3 | static raw_spinlock_t amd_nb_lock; | ||
4 | |||
5 | static __initconst u64 amd_hw_cache_event_ids | ||
6 | [PERF_COUNT_HW_CACHE_MAX] | ||
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
9 | { | ||
10 | [ C(L1D) ] = { | ||
11 | [ C(OP_READ) ] = { | ||
12 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
13 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
14 | }, | ||
15 | [ C(OP_WRITE) ] = { | ||
16 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
17 | [ C(RESULT_MISS) ] = 0, | ||
18 | }, | ||
19 | [ C(OP_PREFETCH) ] = { | ||
20 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
21 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
22 | }, | ||
23 | }, | ||
24 | [ C(L1I ) ] = { | ||
25 | [ C(OP_READ) ] = { | ||
26 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
27 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
28 | }, | ||
29 | [ C(OP_WRITE) ] = { | ||
30 | [ C(RESULT_ACCESS) ] = -1, | ||
31 | [ C(RESULT_MISS) ] = -1, | ||
32 | }, | ||
33 | [ C(OP_PREFETCH) ] = { | ||
34 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
35 | [ C(RESULT_MISS) ] = 0, | ||
36 | }, | ||
37 | }, | ||
38 | [ C(LL ) ] = { | ||
39 | [ C(OP_READ) ] = { | ||
40 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
41 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
42 | }, | ||
43 | [ C(OP_WRITE) ] = { | ||
44 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
45 | [ C(RESULT_MISS) ] = 0, | ||
46 | }, | ||
47 | [ C(OP_PREFETCH) ] = { | ||
48 | [ C(RESULT_ACCESS) ] = 0, | ||
49 | [ C(RESULT_MISS) ] = 0, | ||
50 | }, | ||
51 | }, | ||
52 | [ C(DTLB) ] = { | ||
53 | [ C(OP_READ) ] = { | ||
54 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
55 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
56 | }, | ||
57 | [ C(OP_WRITE) ] = { | ||
58 | [ C(RESULT_ACCESS) ] = 0, | ||
59 | [ C(RESULT_MISS) ] = 0, | ||
60 | }, | ||
61 | [ C(OP_PREFETCH) ] = { | ||
62 | [ C(RESULT_ACCESS) ] = 0, | ||
63 | [ C(RESULT_MISS) ] = 0, | ||
64 | }, | ||
65 | }, | ||
66 | [ C(ITLB) ] = { | ||
67 | [ C(OP_READ) ] = { | ||
68 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
69 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
70 | }, | ||
71 | [ C(OP_WRITE) ] = { | ||
72 | [ C(RESULT_ACCESS) ] = -1, | ||
73 | [ C(RESULT_MISS) ] = -1, | ||
74 | }, | ||
75 | [ C(OP_PREFETCH) ] = { | ||
76 | [ C(RESULT_ACCESS) ] = -1, | ||
77 | [ C(RESULT_MISS) ] = -1, | ||
78 | }, | ||
79 | }, | ||
80 | [ C(BPU ) ] = { | ||
81 | [ C(OP_READ) ] = { | ||
82 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
83 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
84 | }, | ||
85 | [ C(OP_WRITE) ] = { | ||
86 | [ C(RESULT_ACCESS) ] = -1, | ||
87 | [ C(RESULT_MISS) ] = -1, | ||
88 | }, | ||
89 | [ C(OP_PREFETCH) ] = { | ||
90 | [ C(RESULT_ACCESS) ] = -1, | ||
91 | [ C(RESULT_MISS) ] = -1, | ||
92 | }, | ||
93 | }, | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * AMD Performance Monitor K7 and later. | ||
98 | */ | ||
99 | static const u64 amd_perfmon_event_map[] = | ||
100 | { | ||
101 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
102 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
103 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
104 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
105 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
106 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
107 | }; | ||
108 | |||
109 | static u64 amd_pmu_event_map(int hw_event) | ||
110 | { | ||
111 | return amd_perfmon_event_map[hw_event]; | ||
112 | } | ||
113 | |||
114 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
115 | { | ||
116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | ||
117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
121 | |||
122 | #define K7_EVNTSEL_MASK \ | ||
123 | (K7_EVNTSEL_EVENT_MASK | \ | ||
124 | K7_EVNTSEL_UNIT_MASK | \ | ||
125 | K7_EVNTSEL_EDGE_MASK | \ | ||
126 | K7_EVNTSEL_INV_MASK | \ | ||
127 | K7_EVNTSEL_REG_MASK) | ||
128 | |||
129 | return hw_event & K7_EVNTSEL_MASK; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * AMD64 events are detected based on their event codes. | ||
134 | */ | ||
135 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) | ||
136 | { | ||
137 | return (hwc->config & 0xe0) == 0xe0; | ||
138 | } | ||
139 | |||
140 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | ||
141 | struct perf_event *event) | ||
142 | { | ||
143 | struct hw_perf_event *hwc = &event->hw; | ||
144 | struct amd_nb *nb = cpuc->amd_nb; | ||
145 | int i; | ||
146 | |||
147 | /* | ||
148 | * only care about NB events | ||
149 | */ | ||
150 | if (!(nb && amd_is_nb_event(hwc))) | ||
151 | return; | ||
152 | |||
153 | /* | ||
154 | * need to scan whole list because event may not have | ||
155 | * been assigned during scheduling | ||
156 | * | ||
157 | * no race condition possible because event can only | ||
158 | * be removed on one CPU at a time AND PMU is disabled | ||
159 | * when we come here | ||
160 | */ | ||
161 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
162 | if (nb->owners[i] == event) { | ||
163 | cmpxchg(nb->owners+i, event, NULL); | ||
164 | break; | ||
165 | } | ||
166 | } | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * AMD64 NorthBridge events need special treatment because | ||
171 | * counter access needs to be synchronized across all cores | ||
172 | * of a package. Refer to BKDG section 3.12 | ||
173 | * | ||
174 | * NB events are events measuring L3 cache, Hypertransport | ||
175 | * traffic. They are identified by an event code >= 0xe00. | ||
176 | * They measure events on the NorthBride which is shared | ||
177 | * by all cores on a package. NB events are counted on a | ||
178 | * shared set of counters. When a NB event is programmed | ||
179 | * in a counter, the data actually comes from a shared | ||
180 | * counter. Thus, access to those counters needs to be | ||
181 | * synchronized. | ||
182 | * | ||
183 | * We implement the synchronization such that no two cores | ||
184 | * can be measuring NB events using the same counters. Thus, | ||
185 | * we maintain a per-NB allocation table. The available slot | ||
186 | * is propagated using the event_constraint structure. | ||
187 | * | ||
188 | * We provide only one choice for each NB event based on | ||
189 | * the fact that only NB events have restrictions. Consequently, | ||
190 | * if a counter is available, there is a guarantee the NB event | ||
191 | * will be assigned to it. If no slot is available, an empty | ||
192 | * constraint is returned and scheduling will eventually fail | ||
193 | * for this event. | ||
194 | * | ||
195 | * Note that all cores attached the same NB compete for the same | ||
196 | * counters to host NB events, this is why we use atomic ops. Some | ||
197 | * multi-chip CPUs may have more than one NB. | ||
198 | * | ||
199 | * Given that resources are allocated (cmpxchg), they must be | ||
200 | * eventually freed for others to use. This is accomplished by | ||
201 | * calling amd_put_event_constraints(). | ||
202 | * | ||
203 | * Non NB events are not impacted by this restriction. | ||
204 | */ | ||
205 | static struct event_constraint * | ||
206 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
207 | { | ||
208 | struct hw_perf_event *hwc = &event->hw; | ||
209 | struct amd_nb *nb = cpuc->amd_nb; | ||
210 | struct perf_event *old = NULL; | ||
211 | int max = x86_pmu.num_events; | ||
212 | int i, j, k = -1; | ||
213 | |||
214 | /* | ||
215 | * if not NB event or no NB, then no constraints | ||
216 | */ | ||
217 | if (!(nb && amd_is_nb_event(hwc))) | ||
218 | return &unconstrained; | ||
219 | |||
220 | /* | ||
221 | * detect if already present, if so reuse | ||
222 | * | ||
223 | * cannot merge with actual allocation | ||
224 | * because of possible holes | ||
225 | * | ||
226 | * event can already be present yet not assigned (in hwc->idx) | ||
227 | * because of successive calls to x86_schedule_events() from | ||
228 | * hw_perf_group_sched_in() without hw_perf_enable() | ||
229 | */ | ||
230 | for (i = 0; i < max; i++) { | ||
231 | /* | ||
232 | * keep track of first free slot | ||
233 | */ | ||
234 | if (k == -1 && !nb->owners[i]) | ||
235 | k = i; | ||
236 | |||
237 | /* already present, reuse */ | ||
238 | if (nb->owners[i] == event) | ||
239 | goto done; | ||
240 | } | ||
241 | /* | ||
242 | * not present, so grab a new slot | ||
243 | * starting either at: | ||
244 | */ | ||
245 | if (hwc->idx != -1) { | ||
246 | /* previous assignment */ | ||
247 | i = hwc->idx; | ||
248 | } else if (k != -1) { | ||
249 | /* start from free slot found */ | ||
250 | i = k; | ||
251 | } else { | ||
252 | /* | ||
253 | * event not found, no slot found in | ||
254 | * first pass, try again from the | ||
255 | * beginning | ||
256 | */ | ||
257 | i = 0; | ||
258 | } | ||
259 | j = i; | ||
260 | do { | ||
261 | old = cmpxchg(nb->owners+i, NULL, event); | ||
262 | if (!old) | ||
263 | break; | ||
264 | if (++i == max) | ||
265 | i = 0; | ||
266 | } while (i != j); | ||
267 | done: | ||
268 | if (!old) | ||
269 | return &nb->event_constraints[i]; | ||
270 | |||
271 | return &emptyconstraint; | ||
272 | } | ||
273 | |||
274 | static __initconst struct x86_pmu amd_pmu = { | ||
275 | .name = "AMD", | ||
276 | .handle_irq = x86_pmu_handle_irq, | ||
277 | .disable_all = x86_pmu_disable_all, | ||
278 | .enable_all = x86_pmu_enable_all, | ||
279 | .enable = x86_pmu_enable_event, | ||
280 | .disable = x86_pmu_disable_event, | ||
281 | .eventsel = MSR_K7_EVNTSEL0, | ||
282 | .perfctr = MSR_K7_PERFCTR0, | ||
283 | .event_map = amd_pmu_event_map, | ||
284 | .raw_event = amd_pmu_raw_event, | ||
285 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
286 | .num_events = 4, | ||
287 | .event_bits = 48, | ||
288 | .event_mask = (1ULL << 48) - 1, | ||
289 | .apic = 1, | ||
290 | /* use highest bit to detect overflow */ | ||
291 | .max_period = (1ULL << 47) - 1, | ||
292 | .get_event_constraints = amd_get_event_constraints, | ||
293 | .put_event_constraints = amd_put_event_constraints | ||
294 | }; | ||
295 | |||
296 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | ||
297 | { | ||
298 | struct amd_nb *nb; | ||
299 | int i; | ||
300 | |||
301 | nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); | ||
302 | if (!nb) | ||
303 | return NULL; | ||
304 | |||
305 | memset(nb, 0, sizeof(*nb)); | ||
306 | nb->nb_id = nb_id; | ||
307 | |||
308 | /* | ||
309 | * initialize all possible NB constraints | ||
310 | */ | ||
311 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
312 | set_bit(i, nb->event_constraints[i].idxmsk); | ||
313 | nb->event_constraints[i].weight = 1; | ||
314 | } | ||
315 | return nb; | ||
316 | } | ||
317 | |||
318 | static void amd_pmu_cpu_online(int cpu) | ||
319 | { | ||
320 | struct cpu_hw_events *cpu1, *cpu2; | ||
321 | struct amd_nb *nb = NULL; | ||
322 | int i, nb_id; | ||
323 | |||
324 | if (boot_cpu_data.x86_max_cores < 2) | ||
325 | return; | ||
326 | |||
327 | /* | ||
328 | * function may be called too early in the | ||
329 | * boot process, in which case nb_id is bogus | ||
330 | */ | ||
331 | nb_id = amd_get_nb_id(cpu); | ||
332 | if (nb_id == BAD_APICID) | ||
333 | return; | ||
334 | |||
335 | cpu1 = &per_cpu(cpu_hw_events, cpu); | ||
336 | cpu1->amd_nb = NULL; | ||
337 | |||
338 | raw_spin_lock(&amd_nb_lock); | ||
339 | |||
340 | for_each_online_cpu(i) { | ||
341 | cpu2 = &per_cpu(cpu_hw_events, i); | ||
342 | nb = cpu2->amd_nb; | ||
343 | if (!nb) | ||
344 | continue; | ||
345 | if (nb->nb_id == nb_id) | ||
346 | goto found; | ||
347 | } | ||
348 | |||
349 | nb = amd_alloc_nb(cpu, nb_id); | ||
350 | if (!nb) { | ||
351 | pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); | ||
352 | raw_spin_unlock(&amd_nb_lock); | ||
353 | return; | ||
354 | } | ||
355 | found: | ||
356 | nb->refcnt++; | ||
357 | cpu1->amd_nb = nb; | ||
358 | |||
359 | raw_spin_unlock(&amd_nb_lock); | ||
360 | } | ||
361 | |||
362 | static void amd_pmu_cpu_offline(int cpu) | ||
363 | { | ||
364 | struct cpu_hw_events *cpuhw; | ||
365 | |||
366 | if (boot_cpu_data.x86_max_cores < 2) | ||
367 | return; | ||
368 | |||
369 | cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
370 | |||
371 | raw_spin_lock(&amd_nb_lock); | ||
372 | |||
373 | if (--cpuhw->amd_nb->refcnt == 0) | ||
374 | kfree(cpuhw->amd_nb); | ||
375 | |||
376 | cpuhw->amd_nb = NULL; | ||
377 | |||
378 | raw_spin_unlock(&amd_nb_lock); | ||
379 | } | ||
380 | |||
381 | static __init int amd_pmu_init(void) | ||
382 | { | ||
383 | /* Performance-monitoring supported from K7 and later: */ | ||
384 | if (boot_cpu_data.x86 < 6) | ||
385 | return -ENODEV; | ||
386 | |||
387 | x86_pmu = amd_pmu; | ||
388 | |||
389 | /* Events are common for all AMDs */ | ||
390 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
391 | sizeof(hw_cache_event_ids)); | ||
392 | |||
393 | /* | ||
394 | * explicitly initialize the boot cpu, other cpus will get | ||
395 | * the cpu hotplug callbacks from smp_init() | ||
396 | */ | ||
397 | amd_pmu_cpu_online(smp_processor_id()); | ||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | #else /* CONFIG_CPU_SUP_AMD */ | ||
402 | |||
403 | static int amd_pmu_init(void) | ||
404 | { | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | static void amd_pmu_cpu_online(int cpu) | ||
409 | { | ||
410 | } | ||
411 | |||
412 | static void amd_pmu_cpu_offline(int cpu) | ||
413 | { | ||
414 | } | ||
415 | |||
416 | #endif | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c new file mode 100644 index 000000000000..cf6590cf4a5f --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -0,0 +1,971 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* | ||
4 | * Intel PerfMon v3. Used on Core2 and later. | ||
5 | */ | ||
6 | static const u64 intel_perfmon_event_map[] = | ||
7 | { | ||
8 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
9 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
10 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
11 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
12 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
13 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
14 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
15 | }; | ||
16 | |||
17 | static struct event_constraint intel_core_event_constraints[] = | ||
18 | { | ||
19 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
20 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
21 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
22 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
23 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
24 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ | ||
25 | EVENT_CONSTRAINT_END | ||
26 | }; | ||
27 | |||
28 | static struct event_constraint intel_core2_event_constraints[] = | ||
29 | { | ||
30 | FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
31 | FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
32 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
33 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
34 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
35 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
36 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
37 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
38 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
39 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
40 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
41 | EVENT_CONSTRAINT_END | ||
42 | }; | ||
43 | |||
44 | static struct event_constraint intel_nehalem_event_constraints[] = | ||
45 | { | ||
46 | FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
47 | FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
48 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
49 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
50 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
51 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
52 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ | ||
53 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
54 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
55 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
56 | EVENT_CONSTRAINT_END | ||
57 | }; | ||
58 | |||
59 | static struct event_constraint intel_westmere_event_constraints[] = | ||
60 | { | ||
61 | FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
62 | FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
63 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
64 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | ||
65 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
66 | EVENT_CONSTRAINT_END | ||
67 | }; | ||
68 | |||
69 | static struct event_constraint intel_gen_event_constraints[] = | ||
70 | { | ||
71 | FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
72 | FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
73 | EVENT_CONSTRAINT_END | ||
74 | }; | ||
75 | |||
76 | static u64 intel_pmu_event_map(int hw_event) | ||
77 | { | ||
78 | return intel_perfmon_event_map[hw_event]; | ||
79 | } | ||
80 | |||
81 | static __initconst u64 westmere_hw_cache_event_ids | ||
82 | [PERF_COUNT_HW_CACHE_MAX] | ||
83 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
84 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
85 | { | ||
86 | [ C(L1D) ] = { | ||
87 | [ C(OP_READ) ] = { | ||
88 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
89 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ | ||
90 | }, | ||
91 | [ C(OP_WRITE) ] = { | ||
92 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
93 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ | ||
94 | }, | ||
95 | [ C(OP_PREFETCH) ] = { | ||
96 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
97 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
98 | }, | ||
99 | }, | ||
100 | [ C(L1I ) ] = { | ||
101 | [ C(OP_READ) ] = { | ||
102 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
103 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
104 | }, | ||
105 | [ C(OP_WRITE) ] = { | ||
106 | [ C(RESULT_ACCESS) ] = -1, | ||
107 | [ C(RESULT_MISS) ] = -1, | ||
108 | }, | ||
109 | [ C(OP_PREFETCH) ] = { | ||
110 | [ C(RESULT_ACCESS) ] = 0x0, | ||
111 | [ C(RESULT_MISS) ] = 0x0, | ||
112 | }, | ||
113 | }, | ||
114 | [ C(LL ) ] = { | ||
115 | [ C(OP_READ) ] = { | ||
116 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
117 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
118 | }, | ||
119 | [ C(OP_WRITE) ] = { | ||
120 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
121 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
122 | }, | ||
123 | [ C(OP_PREFETCH) ] = { | ||
124 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
125 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
126 | }, | ||
127 | }, | ||
128 | [ C(DTLB) ] = { | ||
129 | [ C(OP_READ) ] = { | ||
130 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
131 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
132 | }, | ||
133 | [ C(OP_WRITE) ] = { | ||
134 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
135 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
136 | }, | ||
137 | [ C(OP_PREFETCH) ] = { | ||
138 | [ C(RESULT_ACCESS) ] = 0x0, | ||
139 | [ C(RESULT_MISS) ] = 0x0, | ||
140 | }, | ||
141 | }, | ||
142 | [ C(ITLB) ] = { | ||
143 | [ C(OP_READ) ] = { | ||
144 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
145 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ | ||
146 | }, | ||
147 | [ C(OP_WRITE) ] = { | ||
148 | [ C(RESULT_ACCESS) ] = -1, | ||
149 | [ C(RESULT_MISS) ] = -1, | ||
150 | }, | ||
151 | [ C(OP_PREFETCH) ] = { | ||
152 | [ C(RESULT_ACCESS) ] = -1, | ||
153 | [ C(RESULT_MISS) ] = -1, | ||
154 | }, | ||
155 | }, | ||
156 | [ C(BPU ) ] = { | ||
157 | [ C(OP_READ) ] = { | ||
158 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
159 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
160 | }, | ||
161 | [ C(OP_WRITE) ] = { | ||
162 | [ C(RESULT_ACCESS) ] = -1, | ||
163 | [ C(RESULT_MISS) ] = -1, | ||
164 | }, | ||
165 | [ C(OP_PREFETCH) ] = { | ||
166 | [ C(RESULT_ACCESS) ] = -1, | ||
167 | [ C(RESULT_MISS) ] = -1, | ||
168 | }, | ||
169 | }, | ||
170 | }; | ||
171 | |||
172 | static __initconst u64 nehalem_hw_cache_event_ids | ||
173 | [PERF_COUNT_HW_CACHE_MAX] | ||
174 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
175 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
176 | { | ||
177 | [ C(L1D) ] = { | ||
178 | [ C(OP_READ) ] = { | ||
179 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
180 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
181 | }, | ||
182 | [ C(OP_WRITE) ] = { | ||
183 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
184 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
185 | }, | ||
186 | [ C(OP_PREFETCH) ] = { | ||
187 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
188 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
189 | }, | ||
190 | }, | ||
191 | [ C(L1I ) ] = { | ||
192 | [ C(OP_READ) ] = { | ||
193 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
194 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
195 | }, | ||
196 | [ C(OP_WRITE) ] = { | ||
197 | [ C(RESULT_ACCESS) ] = -1, | ||
198 | [ C(RESULT_MISS) ] = -1, | ||
199 | }, | ||
200 | [ C(OP_PREFETCH) ] = { | ||
201 | [ C(RESULT_ACCESS) ] = 0x0, | ||
202 | [ C(RESULT_MISS) ] = 0x0, | ||
203 | }, | ||
204 | }, | ||
205 | [ C(LL ) ] = { | ||
206 | [ C(OP_READ) ] = { | ||
207 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
208 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
209 | }, | ||
210 | [ C(OP_WRITE) ] = { | ||
211 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
212 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
213 | }, | ||
214 | [ C(OP_PREFETCH) ] = { | ||
215 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
216 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
217 | }, | ||
218 | }, | ||
219 | [ C(DTLB) ] = { | ||
220 | [ C(OP_READ) ] = { | ||
221 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
222 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
223 | }, | ||
224 | [ C(OP_WRITE) ] = { | ||
225 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
226 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
227 | }, | ||
228 | [ C(OP_PREFETCH) ] = { | ||
229 | [ C(RESULT_ACCESS) ] = 0x0, | ||
230 | [ C(RESULT_MISS) ] = 0x0, | ||
231 | }, | ||
232 | }, | ||
233 | [ C(ITLB) ] = { | ||
234 | [ C(OP_READ) ] = { | ||
235 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
236 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
237 | }, | ||
238 | [ C(OP_WRITE) ] = { | ||
239 | [ C(RESULT_ACCESS) ] = -1, | ||
240 | [ C(RESULT_MISS) ] = -1, | ||
241 | }, | ||
242 | [ C(OP_PREFETCH) ] = { | ||
243 | [ C(RESULT_ACCESS) ] = -1, | ||
244 | [ C(RESULT_MISS) ] = -1, | ||
245 | }, | ||
246 | }, | ||
247 | [ C(BPU ) ] = { | ||
248 | [ C(OP_READ) ] = { | ||
249 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
250 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
251 | }, | ||
252 | [ C(OP_WRITE) ] = { | ||
253 | [ C(RESULT_ACCESS) ] = -1, | ||
254 | [ C(RESULT_MISS) ] = -1, | ||
255 | }, | ||
256 | [ C(OP_PREFETCH) ] = { | ||
257 | [ C(RESULT_ACCESS) ] = -1, | ||
258 | [ C(RESULT_MISS) ] = -1, | ||
259 | }, | ||
260 | }, | ||
261 | }; | ||
262 | |||
263 | static __initconst u64 core2_hw_cache_event_ids | ||
264 | [PERF_COUNT_HW_CACHE_MAX] | ||
265 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
266 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
267 | { | ||
268 | [ C(L1D) ] = { | ||
269 | [ C(OP_READ) ] = { | ||
270 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
271 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
272 | }, | ||
273 | [ C(OP_WRITE) ] = { | ||
274 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
275 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
276 | }, | ||
277 | [ C(OP_PREFETCH) ] = { | ||
278 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
279 | [ C(RESULT_MISS) ] = 0, | ||
280 | }, | ||
281 | }, | ||
282 | [ C(L1I ) ] = { | ||
283 | [ C(OP_READ) ] = { | ||
284 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
285 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
286 | }, | ||
287 | [ C(OP_WRITE) ] = { | ||
288 | [ C(RESULT_ACCESS) ] = -1, | ||
289 | [ C(RESULT_MISS) ] = -1, | ||
290 | }, | ||
291 | [ C(OP_PREFETCH) ] = { | ||
292 | [ C(RESULT_ACCESS) ] = 0, | ||
293 | [ C(RESULT_MISS) ] = 0, | ||
294 | }, | ||
295 | }, | ||
296 | [ C(LL ) ] = { | ||
297 | [ C(OP_READ) ] = { | ||
298 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
299 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
300 | }, | ||
301 | [ C(OP_WRITE) ] = { | ||
302 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
303 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
304 | }, | ||
305 | [ C(OP_PREFETCH) ] = { | ||
306 | [ C(RESULT_ACCESS) ] = 0, | ||
307 | [ C(RESULT_MISS) ] = 0, | ||
308 | }, | ||
309 | }, | ||
310 | [ C(DTLB) ] = { | ||
311 | [ C(OP_READ) ] = { | ||
312 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
313 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
314 | }, | ||
315 | [ C(OP_WRITE) ] = { | ||
316 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
317 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
318 | }, | ||
319 | [ C(OP_PREFETCH) ] = { | ||
320 | [ C(RESULT_ACCESS) ] = 0, | ||
321 | [ C(RESULT_MISS) ] = 0, | ||
322 | }, | ||
323 | }, | ||
324 | [ C(ITLB) ] = { | ||
325 | [ C(OP_READ) ] = { | ||
326 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
327 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
328 | }, | ||
329 | [ C(OP_WRITE) ] = { | ||
330 | [ C(RESULT_ACCESS) ] = -1, | ||
331 | [ C(RESULT_MISS) ] = -1, | ||
332 | }, | ||
333 | [ C(OP_PREFETCH) ] = { | ||
334 | [ C(RESULT_ACCESS) ] = -1, | ||
335 | [ C(RESULT_MISS) ] = -1, | ||
336 | }, | ||
337 | }, | ||
338 | [ C(BPU ) ] = { | ||
339 | [ C(OP_READ) ] = { | ||
340 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
341 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
342 | }, | ||
343 | [ C(OP_WRITE) ] = { | ||
344 | [ C(RESULT_ACCESS) ] = -1, | ||
345 | [ C(RESULT_MISS) ] = -1, | ||
346 | }, | ||
347 | [ C(OP_PREFETCH) ] = { | ||
348 | [ C(RESULT_ACCESS) ] = -1, | ||
349 | [ C(RESULT_MISS) ] = -1, | ||
350 | }, | ||
351 | }, | ||
352 | }; | ||
353 | |||
354 | static __initconst u64 atom_hw_cache_event_ids | ||
355 | [PERF_COUNT_HW_CACHE_MAX] | ||
356 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
357 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
358 | { | ||
359 | [ C(L1D) ] = { | ||
360 | [ C(OP_READ) ] = { | ||
361 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
362 | [ C(RESULT_MISS) ] = 0, | ||
363 | }, | ||
364 | [ C(OP_WRITE) ] = { | ||
365 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
366 | [ C(RESULT_MISS) ] = 0, | ||
367 | }, | ||
368 | [ C(OP_PREFETCH) ] = { | ||
369 | [ C(RESULT_ACCESS) ] = 0x0, | ||
370 | [ C(RESULT_MISS) ] = 0, | ||
371 | }, | ||
372 | }, | ||
373 | [ C(L1I ) ] = { | ||
374 | [ C(OP_READ) ] = { | ||
375 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
376 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
377 | }, | ||
378 | [ C(OP_WRITE) ] = { | ||
379 | [ C(RESULT_ACCESS) ] = -1, | ||
380 | [ C(RESULT_MISS) ] = -1, | ||
381 | }, | ||
382 | [ C(OP_PREFETCH) ] = { | ||
383 | [ C(RESULT_ACCESS) ] = 0, | ||
384 | [ C(RESULT_MISS) ] = 0, | ||
385 | }, | ||
386 | }, | ||
387 | [ C(LL ) ] = { | ||
388 | [ C(OP_READ) ] = { | ||
389 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
390 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
391 | }, | ||
392 | [ C(OP_WRITE) ] = { | ||
393 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
394 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
395 | }, | ||
396 | [ C(OP_PREFETCH) ] = { | ||
397 | [ C(RESULT_ACCESS) ] = 0, | ||
398 | [ C(RESULT_MISS) ] = 0, | ||
399 | }, | ||
400 | }, | ||
401 | [ C(DTLB) ] = { | ||
402 | [ C(OP_READ) ] = { | ||
403 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
404 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
405 | }, | ||
406 | [ C(OP_WRITE) ] = { | ||
407 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
408 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
409 | }, | ||
410 | [ C(OP_PREFETCH) ] = { | ||
411 | [ C(RESULT_ACCESS) ] = 0, | ||
412 | [ C(RESULT_MISS) ] = 0, | ||
413 | }, | ||
414 | }, | ||
415 | [ C(ITLB) ] = { | ||
416 | [ C(OP_READ) ] = { | ||
417 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
418 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
419 | }, | ||
420 | [ C(OP_WRITE) ] = { | ||
421 | [ C(RESULT_ACCESS) ] = -1, | ||
422 | [ C(RESULT_MISS) ] = -1, | ||
423 | }, | ||
424 | [ C(OP_PREFETCH) ] = { | ||
425 | [ C(RESULT_ACCESS) ] = -1, | ||
426 | [ C(RESULT_MISS) ] = -1, | ||
427 | }, | ||
428 | }, | ||
429 | [ C(BPU ) ] = { | ||
430 | [ C(OP_READ) ] = { | ||
431 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
432 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
433 | }, | ||
434 | [ C(OP_WRITE) ] = { | ||
435 | [ C(RESULT_ACCESS) ] = -1, | ||
436 | [ C(RESULT_MISS) ] = -1, | ||
437 | }, | ||
438 | [ C(OP_PREFETCH) ] = { | ||
439 | [ C(RESULT_ACCESS) ] = -1, | ||
440 | [ C(RESULT_MISS) ] = -1, | ||
441 | }, | ||
442 | }, | ||
443 | }; | ||
444 | |||
445 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
446 | { | ||
447 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
448 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
449 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
450 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
451 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
452 | |||
453 | #define CORE_EVNTSEL_MASK \ | ||
454 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
455 | INTEL_ARCH_UNIT_MASK | \ | ||
456 | INTEL_ARCH_EDGE_MASK | \ | ||
457 | INTEL_ARCH_INV_MASK | \ | ||
458 | INTEL_ARCH_CNT_MASK) | ||
459 | |||
460 | return hw_event & CORE_EVNTSEL_MASK; | ||
461 | } | ||
462 | |||
463 | static void intel_pmu_enable_bts(u64 config) | ||
464 | { | ||
465 | unsigned long debugctlmsr; | ||
466 | |||
467 | debugctlmsr = get_debugctlmsr(); | ||
468 | |||
469 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
470 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
471 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
472 | |||
473 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
474 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
475 | |||
476 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
477 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
478 | |||
479 | update_debugctlmsr(debugctlmsr); | ||
480 | } | ||
481 | |||
482 | static void intel_pmu_disable_bts(void) | ||
483 | { | ||
484 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
485 | unsigned long debugctlmsr; | ||
486 | |||
487 | if (!cpuc->ds) | ||
488 | return; | ||
489 | |||
490 | debugctlmsr = get_debugctlmsr(); | ||
491 | |||
492 | debugctlmsr &= | ||
493 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
494 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
495 | |||
496 | update_debugctlmsr(debugctlmsr); | ||
497 | } | ||
498 | |||
499 | static void intel_pmu_disable_all(void) | ||
500 | { | ||
501 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
502 | |||
503 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
504 | |||
505 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
506 | intel_pmu_disable_bts(); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_enable_all(void) | ||
510 | { | ||
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
512 | |||
513 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
514 | |||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
516 | struct perf_event *event = | ||
517 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
518 | |||
519 | if (WARN_ON_ONCE(!event)) | ||
520 | return; | ||
521 | |||
522 | intel_pmu_enable_bts(event->hw.config); | ||
523 | } | ||
524 | } | ||
525 | |||
526 | static inline u64 intel_pmu_get_status(void) | ||
527 | { | ||
528 | u64 status; | ||
529 | |||
530 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
531 | |||
532 | return status; | ||
533 | } | ||
534 | |||
535 | static inline void intel_pmu_ack_status(u64 ack) | ||
536 | { | ||
537 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
538 | } | ||
539 | |||
540 | static inline void | ||
541 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) | ||
542 | { | ||
543 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
544 | u64 ctrl_val, mask; | ||
545 | |||
546 | mask = 0xfULL << (idx * 4); | ||
547 | |||
548 | rdmsrl(hwc->config_base, ctrl_val); | ||
549 | ctrl_val &= ~mask; | ||
550 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | ||
551 | } | ||
552 | |||
553 | static void intel_pmu_drain_bts_buffer(void) | ||
554 | { | ||
555 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
556 | struct debug_store *ds = cpuc->ds; | ||
557 | struct bts_record { | ||
558 | u64 from; | ||
559 | u64 to; | ||
560 | u64 flags; | ||
561 | }; | ||
562 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
563 | struct bts_record *at, *top; | ||
564 | struct perf_output_handle handle; | ||
565 | struct perf_event_header header; | ||
566 | struct perf_sample_data data; | ||
567 | struct pt_regs regs; | ||
568 | |||
569 | if (!event) | ||
570 | return; | ||
571 | |||
572 | if (!ds) | ||
573 | return; | ||
574 | |||
575 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
576 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
577 | |||
578 | if (top <= at) | ||
579 | return; | ||
580 | |||
581 | ds->bts_index = ds->bts_buffer_base; | ||
582 | |||
583 | |||
584 | data.period = event->hw.last_period; | ||
585 | data.addr = 0; | ||
586 | data.raw = NULL; | ||
587 | regs.ip = 0; | ||
588 | |||
589 | /* | ||
590 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
591 | * We will overwrite the from and to address before we output | ||
592 | * the sample. | ||
593 | */ | ||
594 | perf_prepare_sample(&header, &data, event, ®s); | ||
595 | |||
596 | if (perf_output_begin(&handle, event, | ||
597 | header.size * (top - at), 1, 1)) | ||
598 | return; | ||
599 | |||
600 | for (; at < top; at++) { | ||
601 | data.ip = at->from; | ||
602 | data.addr = at->to; | ||
603 | |||
604 | perf_output_sample(&handle, &header, &data, event); | ||
605 | } | ||
606 | |||
607 | perf_output_end(&handle); | ||
608 | |||
609 | /* There's new data available. */ | ||
610 | event->hw.interrupts++; | ||
611 | event->pending_kill = POLL_IN; | ||
612 | } | ||
613 | |||
614 | static inline void | ||
615 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
616 | { | ||
617 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
618 | intel_pmu_disable_bts(); | ||
619 | intel_pmu_drain_bts_buffer(); | ||
620 | return; | ||
621 | } | ||
622 | |||
623 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
624 | intel_pmu_disable_fixed(hwc, idx); | ||
625 | return; | ||
626 | } | ||
627 | |||
628 | x86_pmu_disable_event(hwc, idx); | ||
629 | } | ||
630 | |||
631 | static inline void | ||
632 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) | ||
633 | { | ||
634 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
635 | u64 ctrl_val, bits, mask; | ||
636 | int err; | ||
637 | |||
638 | /* | ||
639 | * Enable IRQ generation (0x8), | ||
640 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
641 | * if requested: | ||
642 | */ | ||
643 | bits = 0x8ULL; | ||
644 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
645 | bits |= 0x2; | ||
646 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
647 | bits |= 0x1; | ||
648 | |||
649 | /* | ||
650 | * ANY bit is supported in v3 and up | ||
651 | */ | ||
652 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) | ||
653 | bits |= 0x4; | ||
654 | |||
655 | bits <<= (idx * 4); | ||
656 | mask = 0xfULL << (idx * 4); | ||
657 | |||
658 | rdmsrl(hwc->config_base, ctrl_val); | ||
659 | ctrl_val &= ~mask; | ||
660 | ctrl_val |= bits; | ||
661 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
662 | } | ||
663 | |||
664 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
665 | { | ||
666 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
667 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
668 | return; | ||
669 | |||
670 | intel_pmu_enable_bts(hwc->config); | ||
671 | return; | ||
672 | } | ||
673 | |||
674 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
675 | intel_pmu_enable_fixed(hwc, idx); | ||
676 | return; | ||
677 | } | ||
678 | |||
679 | __x86_pmu_enable_event(hwc, idx); | ||
680 | } | ||
681 | |||
682 | /* | ||
683 | * Save and restart an expired event. Called by NMI contexts, | ||
684 | * so it has to be careful about preempting normal event ops: | ||
685 | */ | ||
686 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
687 | { | ||
688 | struct hw_perf_event *hwc = &event->hw; | ||
689 | int idx = hwc->idx; | ||
690 | int ret; | ||
691 | |||
692 | x86_perf_event_update(event, hwc, idx); | ||
693 | ret = x86_perf_event_set_period(event, hwc, idx); | ||
694 | |||
695 | return ret; | ||
696 | } | ||
697 | |||
698 | static void intel_pmu_reset(void) | ||
699 | { | ||
700 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
701 | unsigned long flags; | ||
702 | int idx; | ||
703 | |||
704 | if (!x86_pmu.num_events) | ||
705 | return; | ||
706 | |||
707 | local_irq_save(flags); | ||
708 | |||
709 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
710 | |||
711 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
712 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
713 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
714 | } | ||
715 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
716 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
717 | } | ||
718 | if (ds) | ||
719 | ds->bts_index = ds->bts_buffer_base; | ||
720 | |||
721 | local_irq_restore(flags); | ||
722 | } | ||
723 | |||
724 | /* | ||
725 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
726 | * rules apply: | ||
727 | */ | ||
728 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
729 | { | ||
730 | struct perf_sample_data data; | ||
731 | struct cpu_hw_events *cpuc; | ||
732 | int bit, loops; | ||
733 | u64 ack, status; | ||
734 | |||
735 | data.addr = 0; | ||
736 | data.raw = NULL; | ||
737 | |||
738 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
739 | |||
740 | perf_disable(); | ||
741 | intel_pmu_drain_bts_buffer(); | ||
742 | status = intel_pmu_get_status(); | ||
743 | if (!status) { | ||
744 | perf_enable(); | ||
745 | return 0; | ||
746 | } | ||
747 | |||
748 | loops = 0; | ||
749 | again: | ||
750 | if (++loops > 100) { | ||
751 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
752 | perf_event_print_debug(); | ||
753 | intel_pmu_reset(); | ||
754 | perf_enable(); | ||
755 | return 1; | ||
756 | } | ||
757 | |||
758 | inc_irq_stat(apic_perf_irqs); | ||
759 | ack = status; | ||
760 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
761 | struct perf_event *event = cpuc->events[bit]; | ||
762 | |||
763 | clear_bit(bit, (unsigned long *) &status); | ||
764 | if (!test_bit(bit, cpuc->active_mask)) | ||
765 | continue; | ||
766 | |||
767 | if (!intel_pmu_save_and_restart(event)) | ||
768 | continue; | ||
769 | |||
770 | data.period = event->hw.last_period; | ||
771 | |||
772 | if (perf_event_overflow(event, 1, &data, regs)) | ||
773 | intel_pmu_disable_event(&event->hw, bit); | ||
774 | } | ||
775 | |||
776 | intel_pmu_ack_status(ack); | ||
777 | |||
778 | /* | ||
779 | * Repeat if there is more work to be done: | ||
780 | */ | ||
781 | status = intel_pmu_get_status(); | ||
782 | if (status) | ||
783 | goto again; | ||
784 | |||
785 | perf_enable(); | ||
786 | |||
787 | return 1; | ||
788 | } | ||
789 | |||
790 | static struct event_constraint bts_constraint = | ||
791 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
792 | |||
793 | static struct event_constraint * | ||
794 | intel_special_constraints(struct perf_event *event) | ||
795 | { | ||
796 | unsigned int hw_event; | ||
797 | |||
798 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
799 | |||
800 | if (unlikely((hw_event == | ||
801 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
802 | (event->hw.sample_period == 1))) { | ||
803 | |||
804 | return &bts_constraint; | ||
805 | } | ||
806 | return NULL; | ||
807 | } | ||
808 | |||
809 | static struct event_constraint * | ||
810 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
811 | { | ||
812 | struct event_constraint *c; | ||
813 | |||
814 | c = intel_special_constraints(event); | ||
815 | if (c) | ||
816 | return c; | ||
817 | |||
818 | return x86_get_event_constraints(cpuc, event); | ||
819 | } | ||
820 | |||
821 | static __initconst struct x86_pmu core_pmu = { | ||
822 | .name = "core", | ||
823 | .handle_irq = x86_pmu_handle_irq, | ||
824 | .disable_all = x86_pmu_disable_all, | ||
825 | .enable_all = x86_pmu_enable_all, | ||
826 | .enable = x86_pmu_enable_event, | ||
827 | .disable = x86_pmu_disable_event, | ||
828 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
829 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
830 | .event_map = intel_pmu_event_map, | ||
831 | .raw_event = intel_pmu_raw_event, | ||
832 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
833 | .apic = 1, | ||
834 | /* | ||
835 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
836 | * so we install an artificial 1<<31 period regardless of | ||
837 | * the generic event period: | ||
838 | */ | ||
839 | .max_period = (1ULL << 31) - 1, | ||
840 | .get_event_constraints = intel_get_event_constraints, | ||
841 | .event_constraints = intel_core_event_constraints, | ||
842 | }; | ||
843 | |||
844 | static __initconst struct x86_pmu intel_pmu = { | ||
845 | .name = "Intel", | ||
846 | .handle_irq = intel_pmu_handle_irq, | ||
847 | .disable_all = intel_pmu_disable_all, | ||
848 | .enable_all = intel_pmu_enable_all, | ||
849 | .enable = intel_pmu_enable_event, | ||
850 | .disable = intel_pmu_disable_event, | ||
851 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
852 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
853 | .event_map = intel_pmu_event_map, | ||
854 | .raw_event = intel_pmu_raw_event, | ||
855 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
856 | .apic = 1, | ||
857 | /* | ||
858 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
859 | * so we install an artificial 1<<31 period regardless of | ||
860 | * the generic event period: | ||
861 | */ | ||
862 | .max_period = (1ULL << 31) - 1, | ||
863 | .enable_bts = intel_pmu_enable_bts, | ||
864 | .disable_bts = intel_pmu_disable_bts, | ||
865 | .get_event_constraints = intel_get_event_constraints | ||
866 | }; | ||
867 | |||
868 | static __init int intel_pmu_init(void) | ||
869 | { | ||
870 | union cpuid10_edx edx; | ||
871 | union cpuid10_eax eax; | ||
872 | unsigned int unused; | ||
873 | unsigned int ebx; | ||
874 | int version; | ||
875 | |||
876 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
877 | /* check for P6 processor family */ | ||
878 | if (boot_cpu_data.x86 == 6) { | ||
879 | return p6_pmu_init(); | ||
880 | } else { | ||
881 | return -ENODEV; | ||
882 | } | ||
883 | } | ||
884 | |||
885 | /* | ||
886 | * Check whether the Architectural PerfMon supports | ||
887 | * Branch Misses Retired hw_event or not. | ||
888 | */ | ||
889 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
890 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
891 | return -ENODEV; | ||
892 | |||
893 | version = eax.split.version_id; | ||
894 | if (version < 2) | ||
895 | x86_pmu = core_pmu; | ||
896 | else | ||
897 | x86_pmu = intel_pmu; | ||
898 | |||
899 | x86_pmu.version = version; | ||
900 | x86_pmu.num_events = eax.split.num_events; | ||
901 | x86_pmu.event_bits = eax.split.bit_width; | ||
902 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
903 | |||
904 | /* | ||
905 | * Quirk: v2 perfmon does not report fixed-purpose events, so | ||
906 | * assume at least 3 events: | ||
907 | */ | ||
908 | if (version > 1) | ||
909 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | ||
910 | |||
911 | /* | ||
912 | * Install the hw-cache-events table: | ||
913 | */ | ||
914 | switch (boot_cpu_data.x86_model) { | ||
915 | case 14: /* 65 nm core solo/duo, "Yonah" */ | ||
916 | pr_cont("Core events, "); | ||
917 | break; | ||
918 | |||
919 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
920 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
921 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
922 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
923 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
924 | sizeof(hw_cache_event_ids)); | ||
925 | |||
926 | x86_pmu.event_constraints = intel_core2_event_constraints; | ||
927 | pr_cont("Core2 events, "); | ||
928 | break; | ||
929 | |||
930 | case 26: /* 45 nm nehalem, "Bloomfield" */ | ||
931 | case 30: /* 45 nm nehalem, "Lynnfield" */ | ||
932 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
933 | sizeof(hw_cache_event_ids)); | ||
934 | |||
935 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | ||
936 | pr_cont("Nehalem/Corei7 events, "); | ||
937 | break; | ||
938 | case 28: | ||
939 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
940 | sizeof(hw_cache_event_ids)); | ||
941 | |||
942 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
943 | pr_cont("Atom events, "); | ||
944 | break; | ||
945 | |||
946 | case 37: /* 32 nm nehalem, "Clarkdale" */ | ||
947 | case 44: /* 32 nm nehalem, "Gulftown" */ | ||
948 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | ||
949 | sizeof(hw_cache_event_ids)); | ||
950 | |||
951 | x86_pmu.event_constraints = intel_westmere_event_constraints; | ||
952 | pr_cont("Westmere events, "); | ||
953 | break; | ||
954 | default: | ||
955 | /* | ||
956 | * default constraints for v2 and up | ||
957 | */ | ||
958 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
959 | pr_cont("generic architected perfmon, "); | ||
960 | } | ||
961 | return 0; | ||
962 | } | ||
963 | |||
964 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
965 | |||
966 | static int intel_pmu_init(void) | ||
967 | { | ||
968 | return 0; | ||
969 | } | ||
970 | |||
971 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c new file mode 100644 index 000000000000..1ca5ba078afd --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -0,0 +1,157 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* | ||
4 | * Not sure about some of these | ||
5 | */ | ||
6 | static const u64 p6_perfmon_event_map[] = | ||
7 | { | ||
8 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
9 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
10 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
11 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
12 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
13 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
14 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
15 | }; | ||
16 | |||
17 | static u64 p6_pmu_event_map(int hw_event) | ||
18 | { | ||
19 | return p6_perfmon_event_map[hw_event]; | ||
20 | } | ||
21 | |||
22 | /* | ||
23 | * Event setting that is specified not to count anything. | ||
24 | * We use this to effectively disable a counter. | ||
25 | * | ||
26 | * L2_RQSTS with 0 MESI unit mask. | ||
27 | */ | ||
28 | #define P6_NOP_EVENT 0x0000002EULL | ||
29 | |||
30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
31 | { | ||
32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
37 | |||
38 | #define P6_EVNTSEL_MASK \ | ||
39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
40 | P6_EVNTSEL_UNIT_MASK | \ | ||
41 | P6_EVNTSEL_EDGE_MASK | \ | ||
42 | P6_EVNTSEL_INV_MASK | \ | ||
43 | P6_EVNTSEL_REG_MASK) | ||
44 | |||
45 | return hw_event & P6_EVNTSEL_MASK; | ||
46 | } | ||
47 | |||
48 | static struct event_constraint p6_event_constraints[] = | ||
49 | { | ||
50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
51 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
52 | INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
53 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
54 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
55 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
56 | EVENT_CONSTRAINT_END | ||
57 | }; | ||
58 | |||
59 | static void p6_pmu_disable_all(void) | ||
60 | { | ||
61 | u64 val; | ||
62 | |||
63 | /* p6 only has one enable register */ | ||
64 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
65 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
66 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
67 | } | ||
68 | |||
69 | static void p6_pmu_enable_all(void) | ||
70 | { | ||
71 | unsigned long val; | ||
72 | |||
73 | /* p6 only has one enable register */ | ||
74 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
75 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
76 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
77 | } | ||
78 | |||
79 | static inline void | ||
80 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
81 | { | ||
82 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
83 | u64 val = P6_NOP_EVENT; | ||
84 | |||
85 | if (cpuc->enabled) | ||
86 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
87 | |||
88 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
89 | } | ||
90 | |||
91 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
92 | { | ||
93 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
94 | u64 val; | ||
95 | |||
96 | val = hwc->config; | ||
97 | if (cpuc->enabled) | ||
98 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
99 | |||
100 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
101 | } | ||
102 | |||
103 | static __initconst struct x86_pmu p6_pmu = { | ||
104 | .name = "p6", | ||
105 | .handle_irq = x86_pmu_handle_irq, | ||
106 | .disable_all = p6_pmu_disable_all, | ||
107 | .enable_all = p6_pmu_enable_all, | ||
108 | .enable = p6_pmu_enable_event, | ||
109 | .disable = p6_pmu_disable_event, | ||
110 | .eventsel = MSR_P6_EVNTSEL0, | ||
111 | .perfctr = MSR_P6_PERFCTR0, | ||
112 | .event_map = p6_pmu_event_map, | ||
113 | .raw_event = p6_pmu_raw_event, | ||
114 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
115 | .apic = 1, | ||
116 | .max_period = (1ULL << 31) - 1, | ||
117 | .version = 0, | ||
118 | .num_events = 2, | ||
119 | /* | ||
120 | * Events have 40 bits implemented. However they are designed such | ||
121 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
122 | * effective width of a event for P6-like PMU is 32 bits only. | ||
123 | * | ||
124 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
125 | */ | ||
126 | .event_bits = 32, | ||
127 | .event_mask = (1ULL << 32) - 1, | ||
128 | .get_event_constraints = x86_get_event_constraints, | ||
129 | .event_constraints = p6_event_constraints, | ||
130 | }; | ||
131 | |||
132 | static __init int p6_pmu_init(void) | ||
133 | { | ||
134 | switch (boot_cpu_data.x86_model) { | ||
135 | case 1: | ||
136 | case 3: /* Pentium Pro */ | ||
137 | case 5: | ||
138 | case 6: /* Pentium II */ | ||
139 | case 7: | ||
140 | case 8: | ||
141 | case 11: /* Pentium III */ | ||
142 | case 9: | ||
143 | case 13: | ||
144 | /* Pentium M */ | ||
145 | break; | ||
146 | default: | ||
147 | pr_cont("unsupported p6 CPU model %d ", | ||
148 | boot_cpu_data.x86_model); | ||
149 | return -ENODEV; | ||
150 | } | ||
151 | |||
152 | x86_pmu = p6_pmu; | ||
153 | |||
154 | return 0; | ||
155 | } | ||
156 | |||
157 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||