aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-02-26 06:05:05 -0500
committerIngo Molnar <mingo@elte.hu>2010-02-26 09:44:04 -0500
commitf22f54f4491acd987a6c5a92de52b60ca8b58b61 (patch)
tree7eae87b08e828e8f0b1223f267abb004d6a5f7e7 /arch/x86/kernel/cpu
parent48fb4fdd6b667ebeccbc6cde0a8a5a148d5c6b68 (diff)
perf_events, x86: Split PMU definitions into separate files
Split amd,p6,intel into separate files so that we can easily deal with CONFIG_CPU_SUP_* things, needed to make things build now that perf_event.c relies on symbols from amd.c Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c1524
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c416
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c971
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c157
4 files changed, 1554 insertions, 1514 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index dd09ccc867d3..641ccb9dddbc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -161,8 +161,6 @@ struct x86_pmu {
161 161
162static struct x86_pmu x86_pmu __read_mostly; 162static struct x86_pmu x86_pmu __read_mostly;
163 163
164static raw_spinlock_t amd_nb_lock;
165
166static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 164static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
167 .enabled = 1, 165 .enabled = 1,
168}; 166};
@@ -171,140 +169,6 @@ static int x86_perf_event_set_period(struct perf_event *event,
171 struct hw_perf_event *hwc, int idx); 169 struct hw_perf_event *hwc, int idx);
172 170
173/* 171/*
174 * Not sure about some of these
175 */
176static const u64 p6_perfmon_event_map[] =
177{
178 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
179 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
180 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
181 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
182 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
183 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
184 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
185};
186
187static u64 p6_pmu_event_map(int hw_event)
188{
189 return p6_perfmon_event_map[hw_event];
190}
191
192/*
193 * Event setting that is specified not to count anything.
194 * We use this to effectively disable a counter.
195 *
196 * L2_RQSTS with 0 MESI unit mask.
197 */
198#define P6_NOP_EVENT 0x0000002EULL
199
200static u64 p6_pmu_raw_event(u64 hw_event)
201{
202#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
203#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
204#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
205#define P6_EVNTSEL_INV_MASK 0x00800000ULL
206#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
207
208#define P6_EVNTSEL_MASK \
209 (P6_EVNTSEL_EVENT_MASK | \
210 P6_EVNTSEL_UNIT_MASK | \
211 P6_EVNTSEL_EDGE_MASK | \
212 P6_EVNTSEL_INV_MASK | \
213 P6_EVNTSEL_REG_MASK)
214
215 return hw_event & P6_EVNTSEL_MASK;
216}
217
218static struct event_constraint intel_p6_event_constraints[] =
219{
220 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
221 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
222 INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
223 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
224 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
225 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
226 EVENT_CONSTRAINT_END
227};
228
229/*
230 * Intel PerfMon v3. Used on Core2 and later.
231 */
232static const u64 intel_perfmon_event_map[] =
233{
234 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
235 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
236 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
237 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
238 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
239 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
240 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
241};
242
243static struct event_constraint intel_core_event_constraints[] =
244{
245 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
246 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
247 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
248 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
249 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
250 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
251 EVENT_CONSTRAINT_END
252};
253
254static struct event_constraint intel_core2_event_constraints[] =
255{
256 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
257 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
258 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
259 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
260 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
261 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
262 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
263 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
264 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
265 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
266 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
267 EVENT_CONSTRAINT_END
268};
269
270static struct event_constraint intel_nehalem_event_constraints[] =
271{
272 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
273 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
274 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
275 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
276 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
277 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
278 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
279 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
280 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
281 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
282 EVENT_CONSTRAINT_END
283};
284
285static struct event_constraint intel_westmere_event_constraints[] =
286{
287 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
288 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
289 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
290 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
291 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
292 EVENT_CONSTRAINT_END
293};
294
295static struct event_constraint intel_gen_event_constraints[] =
296{
297 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
298 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
299 EVENT_CONSTRAINT_END
300};
301
302static u64 intel_pmu_event_map(int hw_event)
303{
304 return intel_perfmon_event_map[hw_event];
305}
306
307/*
308 * Generalized hw caching related hw_event table, filled 172 * Generalized hw caching related hw_event table, filled
309 * in on a per model basis. A value of 0 means 173 * in on a per model basis. A value of 0 means
310 * 'not supported', -1 means 'hw_event makes no sense on 174 * 'not supported', -1 means 'hw_event makes no sense on
@@ -319,515 +183,6 @@ static u64 __read_mostly hw_cache_event_ids
319 [PERF_COUNT_HW_CACHE_OP_MAX] 183 [PERF_COUNT_HW_CACHE_OP_MAX]
320 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 184 [PERF_COUNT_HW_CACHE_RESULT_MAX];
321 185
322static __initconst u64 westmere_hw_cache_event_ids
323 [PERF_COUNT_HW_CACHE_MAX]
324 [PERF_COUNT_HW_CACHE_OP_MAX]
325 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
326{
327 [ C(L1D) ] = {
328 [ C(OP_READ) ] = {
329 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
330 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
331 },
332 [ C(OP_WRITE) ] = {
333 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
334 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
335 },
336 [ C(OP_PREFETCH) ] = {
337 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
338 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
339 },
340 },
341 [ C(L1I ) ] = {
342 [ C(OP_READ) ] = {
343 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
344 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
345 },
346 [ C(OP_WRITE) ] = {
347 [ C(RESULT_ACCESS) ] = -1,
348 [ C(RESULT_MISS) ] = -1,
349 },
350 [ C(OP_PREFETCH) ] = {
351 [ C(RESULT_ACCESS) ] = 0x0,
352 [ C(RESULT_MISS) ] = 0x0,
353 },
354 },
355 [ C(LL ) ] = {
356 [ C(OP_READ) ] = {
357 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
358 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
359 },
360 [ C(OP_WRITE) ] = {
361 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
362 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
363 },
364 [ C(OP_PREFETCH) ] = {
365 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
366 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
367 },
368 },
369 [ C(DTLB) ] = {
370 [ C(OP_READ) ] = {
371 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
372 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
373 },
374 [ C(OP_WRITE) ] = {
375 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
376 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
377 },
378 [ C(OP_PREFETCH) ] = {
379 [ C(RESULT_ACCESS) ] = 0x0,
380 [ C(RESULT_MISS) ] = 0x0,
381 },
382 },
383 [ C(ITLB) ] = {
384 [ C(OP_READ) ] = {
385 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
386 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
387 },
388 [ C(OP_WRITE) ] = {
389 [ C(RESULT_ACCESS) ] = -1,
390 [ C(RESULT_MISS) ] = -1,
391 },
392 [ C(OP_PREFETCH) ] = {
393 [ C(RESULT_ACCESS) ] = -1,
394 [ C(RESULT_MISS) ] = -1,
395 },
396 },
397 [ C(BPU ) ] = {
398 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
400 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
401 },
402 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = -1,
404 [ C(RESULT_MISS) ] = -1,
405 },
406 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = -1,
408 [ C(RESULT_MISS) ] = -1,
409 },
410 },
411};
412
413static __initconst u64 nehalem_hw_cache_event_ids
414 [PERF_COUNT_HW_CACHE_MAX]
415 [PERF_COUNT_HW_CACHE_OP_MAX]
416 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
417{
418 [ C(L1D) ] = {
419 [ C(OP_READ) ] = {
420 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
421 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
422 },
423 [ C(OP_WRITE) ] = {
424 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
425 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
426 },
427 [ C(OP_PREFETCH) ] = {
428 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
429 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
430 },
431 },
432 [ C(L1I ) ] = {
433 [ C(OP_READ) ] = {
434 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
435 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
436 },
437 [ C(OP_WRITE) ] = {
438 [ C(RESULT_ACCESS) ] = -1,
439 [ C(RESULT_MISS) ] = -1,
440 },
441 [ C(OP_PREFETCH) ] = {
442 [ C(RESULT_ACCESS) ] = 0x0,
443 [ C(RESULT_MISS) ] = 0x0,
444 },
445 },
446 [ C(LL ) ] = {
447 [ C(OP_READ) ] = {
448 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
449 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
450 },
451 [ C(OP_WRITE) ] = {
452 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
453 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
454 },
455 [ C(OP_PREFETCH) ] = {
456 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
457 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
458 },
459 },
460 [ C(DTLB) ] = {
461 [ C(OP_READ) ] = {
462 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
463 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
464 },
465 [ C(OP_WRITE) ] = {
466 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
467 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
468 },
469 [ C(OP_PREFETCH) ] = {
470 [ C(RESULT_ACCESS) ] = 0x0,
471 [ C(RESULT_MISS) ] = 0x0,
472 },
473 },
474 [ C(ITLB) ] = {
475 [ C(OP_READ) ] = {
476 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
477 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
478 },
479 [ C(OP_WRITE) ] = {
480 [ C(RESULT_ACCESS) ] = -1,
481 [ C(RESULT_MISS) ] = -1,
482 },
483 [ C(OP_PREFETCH) ] = {
484 [ C(RESULT_ACCESS) ] = -1,
485 [ C(RESULT_MISS) ] = -1,
486 },
487 },
488 [ C(BPU ) ] = {
489 [ C(OP_READ) ] = {
490 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
491 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
492 },
493 [ C(OP_WRITE) ] = {
494 [ C(RESULT_ACCESS) ] = -1,
495 [ C(RESULT_MISS) ] = -1,
496 },
497 [ C(OP_PREFETCH) ] = {
498 [ C(RESULT_ACCESS) ] = -1,
499 [ C(RESULT_MISS) ] = -1,
500 },
501 },
502};
503
504static __initconst u64 core2_hw_cache_event_ids
505 [PERF_COUNT_HW_CACHE_MAX]
506 [PERF_COUNT_HW_CACHE_OP_MAX]
507 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
508{
509 [ C(L1D) ] = {
510 [ C(OP_READ) ] = {
511 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
512 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
513 },
514 [ C(OP_WRITE) ] = {
515 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
516 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
517 },
518 [ C(OP_PREFETCH) ] = {
519 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
520 [ C(RESULT_MISS) ] = 0,
521 },
522 },
523 [ C(L1I ) ] = {
524 [ C(OP_READ) ] = {
525 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
526 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
527 },
528 [ C(OP_WRITE) ] = {
529 [ C(RESULT_ACCESS) ] = -1,
530 [ C(RESULT_MISS) ] = -1,
531 },
532 [ C(OP_PREFETCH) ] = {
533 [ C(RESULT_ACCESS) ] = 0,
534 [ C(RESULT_MISS) ] = 0,
535 },
536 },
537 [ C(LL ) ] = {
538 [ C(OP_READ) ] = {
539 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
540 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
541 },
542 [ C(OP_WRITE) ] = {
543 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
544 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
545 },
546 [ C(OP_PREFETCH) ] = {
547 [ C(RESULT_ACCESS) ] = 0,
548 [ C(RESULT_MISS) ] = 0,
549 },
550 },
551 [ C(DTLB) ] = {
552 [ C(OP_READ) ] = {
553 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
554 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
555 },
556 [ C(OP_WRITE) ] = {
557 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
558 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
559 },
560 [ C(OP_PREFETCH) ] = {
561 [ C(RESULT_ACCESS) ] = 0,
562 [ C(RESULT_MISS) ] = 0,
563 },
564 },
565 [ C(ITLB) ] = {
566 [ C(OP_READ) ] = {
567 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
568 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
569 },
570 [ C(OP_WRITE) ] = {
571 [ C(RESULT_ACCESS) ] = -1,
572 [ C(RESULT_MISS) ] = -1,
573 },
574 [ C(OP_PREFETCH) ] = {
575 [ C(RESULT_ACCESS) ] = -1,
576 [ C(RESULT_MISS) ] = -1,
577 },
578 },
579 [ C(BPU ) ] = {
580 [ C(OP_READ) ] = {
581 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
582 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
583 },
584 [ C(OP_WRITE) ] = {
585 [ C(RESULT_ACCESS) ] = -1,
586 [ C(RESULT_MISS) ] = -1,
587 },
588 [ C(OP_PREFETCH) ] = {
589 [ C(RESULT_ACCESS) ] = -1,
590 [ C(RESULT_MISS) ] = -1,
591 },
592 },
593};
594
595static __initconst u64 atom_hw_cache_event_ids
596 [PERF_COUNT_HW_CACHE_MAX]
597 [PERF_COUNT_HW_CACHE_OP_MAX]
598 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
599{
600 [ C(L1D) ] = {
601 [ C(OP_READ) ] = {
602 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
603 [ C(RESULT_MISS) ] = 0,
604 },
605 [ C(OP_WRITE) ] = {
606 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
607 [ C(RESULT_MISS) ] = 0,
608 },
609 [ C(OP_PREFETCH) ] = {
610 [ C(RESULT_ACCESS) ] = 0x0,
611 [ C(RESULT_MISS) ] = 0,
612 },
613 },
614 [ C(L1I ) ] = {
615 [ C(OP_READ) ] = {
616 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
617 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
618 },
619 [ C(OP_WRITE) ] = {
620 [ C(RESULT_ACCESS) ] = -1,
621 [ C(RESULT_MISS) ] = -1,
622 },
623 [ C(OP_PREFETCH) ] = {
624 [ C(RESULT_ACCESS) ] = 0,
625 [ C(RESULT_MISS) ] = 0,
626 },
627 },
628 [ C(LL ) ] = {
629 [ C(OP_READ) ] = {
630 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
631 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
632 },
633 [ C(OP_WRITE) ] = {
634 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
635 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
636 },
637 [ C(OP_PREFETCH) ] = {
638 [ C(RESULT_ACCESS) ] = 0,
639 [ C(RESULT_MISS) ] = 0,
640 },
641 },
642 [ C(DTLB) ] = {
643 [ C(OP_READ) ] = {
644 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
645 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
646 },
647 [ C(OP_WRITE) ] = {
648 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
649 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
650 },
651 [ C(OP_PREFETCH) ] = {
652 [ C(RESULT_ACCESS) ] = 0,
653 [ C(RESULT_MISS) ] = 0,
654 },
655 },
656 [ C(ITLB) ] = {
657 [ C(OP_READ) ] = {
658 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
659 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
660 },
661 [ C(OP_WRITE) ] = {
662 [ C(RESULT_ACCESS) ] = -1,
663 [ C(RESULT_MISS) ] = -1,
664 },
665 [ C(OP_PREFETCH) ] = {
666 [ C(RESULT_ACCESS) ] = -1,
667 [ C(RESULT_MISS) ] = -1,
668 },
669 },
670 [ C(BPU ) ] = {
671 [ C(OP_READ) ] = {
672 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
673 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
674 },
675 [ C(OP_WRITE) ] = {
676 [ C(RESULT_ACCESS) ] = -1,
677 [ C(RESULT_MISS) ] = -1,
678 },
679 [ C(OP_PREFETCH) ] = {
680 [ C(RESULT_ACCESS) ] = -1,
681 [ C(RESULT_MISS) ] = -1,
682 },
683 },
684};
685
686static u64 intel_pmu_raw_event(u64 hw_event)
687{
688#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
689#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
690#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
691#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
692#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
693
694#define CORE_EVNTSEL_MASK \
695 (INTEL_ARCH_EVTSEL_MASK | \
696 INTEL_ARCH_UNIT_MASK | \
697 INTEL_ARCH_EDGE_MASK | \
698 INTEL_ARCH_INV_MASK | \
699 INTEL_ARCH_CNT_MASK)
700
701 return hw_event & CORE_EVNTSEL_MASK;
702}
703
704static __initconst u64 amd_hw_cache_event_ids
705 [PERF_COUNT_HW_CACHE_MAX]
706 [PERF_COUNT_HW_CACHE_OP_MAX]
707 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
708{
709 [ C(L1D) ] = {
710 [ C(OP_READ) ] = {
711 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
712 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
713 },
714 [ C(OP_WRITE) ] = {
715 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
716 [ C(RESULT_MISS) ] = 0,
717 },
718 [ C(OP_PREFETCH) ] = {
719 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
720 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
721 },
722 },
723 [ C(L1I ) ] = {
724 [ C(OP_READ) ] = {
725 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
726 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
727 },
728 [ C(OP_WRITE) ] = {
729 [ C(RESULT_ACCESS) ] = -1,
730 [ C(RESULT_MISS) ] = -1,
731 },
732 [ C(OP_PREFETCH) ] = {
733 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
734 [ C(RESULT_MISS) ] = 0,
735 },
736 },
737 [ C(LL ) ] = {
738 [ C(OP_READ) ] = {
739 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
740 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
741 },
742 [ C(OP_WRITE) ] = {
743 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
744 [ C(RESULT_MISS) ] = 0,
745 },
746 [ C(OP_PREFETCH) ] = {
747 [ C(RESULT_ACCESS) ] = 0,
748 [ C(RESULT_MISS) ] = 0,
749 },
750 },
751 [ C(DTLB) ] = {
752 [ C(OP_READ) ] = {
753 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
754 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
755 },
756 [ C(OP_WRITE) ] = {
757 [ C(RESULT_ACCESS) ] = 0,
758 [ C(RESULT_MISS) ] = 0,
759 },
760 [ C(OP_PREFETCH) ] = {
761 [ C(RESULT_ACCESS) ] = 0,
762 [ C(RESULT_MISS) ] = 0,
763 },
764 },
765 [ C(ITLB) ] = {
766 [ C(OP_READ) ] = {
767 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
768 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
769 },
770 [ C(OP_WRITE) ] = {
771 [ C(RESULT_ACCESS) ] = -1,
772 [ C(RESULT_MISS) ] = -1,
773 },
774 [ C(OP_PREFETCH) ] = {
775 [ C(RESULT_ACCESS) ] = -1,
776 [ C(RESULT_MISS) ] = -1,
777 },
778 },
779 [ C(BPU ) ] = {
780 [ C(OP_READ) ] = {
781 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
782 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
783 },
784 [ C(OP_WRITE) ] = {
785 [ C(RESULT_ACCESS) ] = -1,
786 [ C(RESULT_MISS) ] = -1,
787 },
788 [ C(OP_PREFETCH) ] = {
789 [ C(RESULT_ACCESS) ] = -1,
790 [ C(RESULT_MISS) ] = -1,
791 },
792 },
793};
794
795/*
796 * AMD Performance Monitor K7 and later.
797 */
798static const u64 amd_perfmon_event_map[] =
799{
800 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
801 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
802 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
803 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
804 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
805 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
806};
807
808static u64 amd_pmu_event_map(int hw_event)
809{
810 return amd_perfmon_event_map[hw_event];
811}
812
813static u64 amd_pmu_raw_event(u64 hw_event)
814{
815#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
816#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
817#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
818#define K7_EVNTSEL_INV_MASK 0x000800000ULL
819#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
820
821#define K7_EVNTSEL_MASK \
822 (K7_EVNTSEL_EVENT_MASK | \
823 K7_EVNTSEL_UNIT_MASK | \
824 K7_EVNTSEL_EDGE_MASK | \
825 K7_EVNTSEL_INV_MASK | \
826 K7_EVNTSEL_REG_MASK)
827
828 return hw_event & K7_EVNTSEL_MASK;
829}
830
831/* 186/*
832 * Propagate event elapsed time into the generic event. 187 * Propagate event elapsed time into the generic event.
833 * Can only be executed on the CPU where the event is active. 188 * Can only be executed on the CPU where the event is active.
@@ -1079,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
1079 return 0; 434 return 0;
1080} 435}
1081 436
1082static void intel_pmu_enable_bts(u64 config)
1083{
1084 unsigned long debugctlmsr;
1085
1086 debugctlmsr = get_debugctlmsr();
1087
1088 debugctlmsr |= X86_DEBUGCTL_TR;
1089 debugctlmsr |= X86_DEBUGCTL_BTS;
1090 debugctlmsr |= X86_DEBUGCTL_BTINT;
1091
1092 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
1093 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
1094
1095 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
1096 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
1097
1098 update_debugctlmsr(debugctlmsr);
1099}
1100
1101static void intel_pmu_disable_bts(void)
1102{
1103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1104 unsigned long debugctlmsr;
1105
1106 if (!cpuc->ds)
1107 return;
1108
1109 debugctlmsr = get_debugctlmsr();
1110
1111 debugctlmsr &=
1112 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
1113 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
1114
1115 update_debugctlmsr(debugctlmsr);
1116}
1117
1118/* 437/*
1119 * Setup the hardware configuration for a given attr_type 438 * Setup the hardware configuration for a given attr_type
1120 */ 439 */
@@ -1223,26 +542,6 @@ static int __hw_perf_event_init(struct perf_event *event)
1223 return 0; 542 return 0;
1224} 543}
1225 544
1226static void p6_pmu_disable_all(void)
1227{
1228 u64 val;
1229
1230 /* p6 only has one enable register */
1231 rdmsrl(MSR_P6_EVNTSEL0, val);
1232 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1233 wrmsrl(MSR_P6_EVNTSEL0, val);
1234}
1235
1236static void intel_pmu_disable_all(void)
1237{
1238 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1239
1240 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1241
1242 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1243 intel_pmu_disable_bts();
1244}
1245
1246static void x86_pmu_disable_all(void) 545static void x86_pmu_disable_all(void)
1247{ 546{
1248 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 547 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1278,33 +577,6 @@ void hw_perf_disable(void)
1278 x86_pmu.disable_all(); 577 x86_pmu.disable_all();
1279} 578}
1280 579
1281static void p6_pmu_enable_all(void)
1282{
1283 unsigned long val;
1284
1285 /* p6 only has one enable register */
1286 rdmsrl(MSR_P6_EVNTSEL0, val);
1287 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1288 wrmsrl(MSR_P6_EVNTSEL0, val);
1289}
1290
1291static void intel_pmu_enable_all(void)
1292{
1293 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1294
1295 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1296
1297 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1298 struct perf_event *event =
1299 cpuc->events[X86_PMC_IDX_FIXED_BTS];
1300
1301 if (WARN_ON_ONCE(!event))
1302 return;
1303
1304 intel_pmu_enable_bts(event->hw.config);
1305 }
1306}
1307
1308static void x86_pmu_enable_all(void) 580static void x86_pmu_enable_all(void)
1309{ 581{
1310 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 582 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1578,20 +850,6 @@ void hw_perf_enable(void)
1578 x86_pmu.enable_all(); 850 x86_pmu.enable_all();
1579} 851}
1580 852
1581static inline u64 intel_pmu_get_status(void)
1582{
1583 u64 status;
1584
1585 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1586
1587 return status;
1588}
1589
1590static inline void intel_pmu_ack_status(u64 ack)
1591{
1592 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
1593}
1594
1595static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1596{ 854{
1597 (void)checking_wrmsrl(hwc->config_base + idx, 855 (void)checking_wrmsrl(hwc->config_base + idx,
@@ -1603,47 +861,6 @@ static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1603 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 861 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1604} 862}
1605 863
1606static inline void
1607intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1608{
1609 int idx = __idx - X86_PMC_IDX_FIXED;
1610 u64 ctrl_val, mask;
1611
1612 mask = 0xfULL << (idx * 4);
1613
1614 rdmsrl(hwc->config_base, ctrl_val);
1615 ctrl_val &= ~mask;
1616 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
1617}
1618
1619static inline void
1620p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1621{
1622 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1623 u64 val = P6_NOP_EVENT;
1624
1625 if (cpuc->enabled)
1626 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1627
1628 (void)checking_wrmsrl(hwc->config_base + idx, val);
1629}
1630
1631static inline void
1632intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1633{
1634 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1635 intel_pmu_disable_bts();
1636 return;
1637 }
1638
1639 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1640 intel_pmu_disable_fixed(hwc, idx);
1641 return;
1642 }
1643
1644 x86_pmu_disable_event(hwc, idx);
1645}
1646
1647static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 864static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1648 865
1649/* 866/*
@@ -1702,70 +919,6 @@ x86_perf_event_set_period(struct perf_event *event,
1702 return ret; 919 return ret;
1703} 920}
1704 921
1705static inline void
1706intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1707{
1708 int idx = __idx - X86_PMC_IDX_FIXED;
1709 u64 ctrl_val, bits, mask;
1710 int err;
1711
1712 /*
1713 * Enable IRQ generation (0x8),
1714 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1715 * if requested:
1716 */
1717 bits = 0x8ULL;
1718 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1719 bits |= 0x2;
1720 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1721 bits |= 0x1;
1722
1723 /*
1724 * ANY bit is supported in v3 and up
1725 */
1726 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
1727 bits |= 0x4;
1728
1729 bits <<= (idx * 4);
1730 mask = 0xfULL << (idx * 4);
1731
1732 rdmsrl(hwc->config_base, ctrl_val);
1733 ctrl_val &= ~mask;
1734 ctrl_val |= bits;
1735 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1736}
1737
1738static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1739{
1740 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1741 u64 val;
1742
1743 val = hwc->config;
1744 if (cpuc->enabled)
1745 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1746
1747 (void)checking_wrmsrl(hwc->config_base + idx, val);
1748}
1749
1750
1751static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1752{
1753 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1754 if (!__get_cpu_var(cpu_hw_events).enabled)
1755 return;
1756
1757 intel_pmu_enable_bts(hwc->config);
1758 return;
1759 }
1760
1761 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1762 intel_pmu_enable_fixed(hwc, idx);
1763 return;
1764 }
1765
1766 __x86_pmu_enable_event(hwc, idx);
1767}
1768
1769static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 922static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1770{ 923{
1771 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 924 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1887,66 +1040,6 @@ void perf_event_print_debug(void)
1887 local_irq_restore(flags); 1040 local_irq_restore(flags);
1888} 1041}
1889 1042
1890static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1891{
1892 struct debug_store *ds = cpuc->ds;
1893 struct bts_record {
1894 u64 from;
1895 u64 to;
1896 u64 flags;
1897 };
1898 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1899 struct bts_record *at, *top;
1900 struct perf_output_handle handle;
1901 struct perf_event_header header;
1902 struct perf_sample_data data;
1903 struct pt_regs regs;
1904
1905 if (!event)
1906 return;
1907
1908 if (!ds)
1909 return;
1910
1911 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1912 top = (struct bts_record *)(unsigned long)ds->bts_index;
1913
1914 if (top <= at)
1915 return;
1916
1917 ds->bts_index = ds->bts_buffer_base;
1918
1919
1920 data.period = event->hw.last_period;
1921 data.addr = 0;
1922 data.raw = NULL;
1923 regs.ip = 0;
1924
1925 /*
1926 * Prepare a generic sample, i.e. fill in the invariant fields.
1927 * We will overwrite the from and to address before we output
1928 * the sample.
1929 */
1930 perf_prepare_sample(&header, &data, event, &regs);
1931
1932 if (perf_output_begin(&handle, event,
1933 header.size * (top - at), 1, 1))
1934 return;
1935
1936 for (; at < top; at++) {
1937 data.ip = at->from;
1938 data.addr = at->to;
1939
1940 perf_output_sample(&handle, &header, &data, event);
1941 }
1942
1943 perf_output_end(&handle);
1944
1945 /* There's new data available. */
1946 event->hw.interrupts++;
1947 event->pending_kill = POLL_IN;
1948}
1949
1950static void x86_pmu_stop(struct perf_event *event) 1043static void x86_pmu_stop(struct perf_event *event)
1951{ 1044{
1952 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1045 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1966,10 +1059,6 @@ static void x86_pmu_stop(struct perf_event *event)
1966 */ 1059 */
1967 x86_perf_event_update(event, hwc, idx); 1060 x86_perf_event_update(event, hwc, idx);
1968 1061
1969 /* Drain the remaining BTS records. */
1970 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1971 intel_pmu_drain_bts_buffer(cpuc);
1972
1973 cpuc->events[idx] = NULL; 1062 cpuc->events[idx] = NULL;
1974} 1063}
1975 1064
@@ -1996,114 +1085,6 @@ static void x86_pmu_disable(struct perf_event *event)
1996 perf_event_update_userpage(event); 1085 perf_event_update_userpage(event);
1997} 1086}
1998 1087
1999/*
2000 * Save and restart an expired event. Called by NMI contexts,
2001 * so it has to be careful about preempting normal event ops:
2002 */
2003static int intel_pmu_save_and_restart(struct perf_event *event)
2004{
2005 struct hw_perf_event *hwc = &event->hw;
2006 int idx = hwc->idx;
2007 int ret;
2008
2009 x86_perf_event_update(event, hwc, idx);
2010 ret = x86_perf_event_set_period(event, hwc, idx);
2011
2012 return ret;
2013}
2014
2015static void intel_pmu_reset(void)
2016{
2017 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
2018 unsigned long flags;
2019 int idx;
2020
2021 if (!x86_pmu.num_events)
2022 return;
2023
2024 local_irq_save(flags);
2025
2026 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
2027
2028 for (idx = 0; idx < x86_pmu.num_events; idx++) {
2029 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
2030 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
2031 }
2032 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
2033 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
2034 }
2035 if (ds)
2036 ds->bts_index = ds->bts_buffer_base;
2037
2038 local_irq_restore(flags);
2039}
2040
2041/*
2042 * This handler is triggered by the local APIC, so the APIC IRQ handling
2043 * rules apply:
2044 */
2045static int intel_pmu_handle_irq(struct pt_regs *regs)
2046{
2047 struct perf_sample_data data;
2048 struct cpu_hw_events *cpuc;
2049 int bit, loops;
2050 u64 ack, status;
2051
2052 data.addr = 0;
2053 data.raw = NULL;
2054
2055 cpuc = &__get_cpu_var(cpu_hw_events);
2056
2057 perf_disable();
2058 intel_pmu_drain_bts_buffer(cpuc);
2059 status = intel_pmu_get_status();
2060 if (!status) {
2061 perf_enable();
2062 return 0;
2063 }
2064
2065 loops = 0;
2066again:
2067 if (++loops > 100) {
2068 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
2069 perf_event_print_debug();
2070 intel_pmu_reset();
2071 perf_enable();
2072 return 1;
2073 }
2074
2075 inc_irq_stat(apic_perf_irqs);
2076 ack = status;
2077 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
2078 struct perf_event *event = cpuc->events[bit];
2079
2080 clear_bit(bit, (unsigned long *) &status);
2081 if (!test_bit(bit, cpuc->active_mask))
2082 continue;
2083
2084 if (!intel_pmu_save_and_restart(event))
2085 continue;
2086
2087 data.period = event->hw.last_period;
2088
2089 if (perf_event_overflow(event, 1, &data, regs))
2090 intel_pmu_disable_event(&event->hw, bit);
2091 }
2092
2093 intel_pmu_ack_status(ack);
2094
2095 /*
2096 * Repeat if there is more work to be done:
2097 */
2098 status = intel_pmu_get_status();
2099 if (status)
2100 goto again;
2101
2102 perf_enable();
2103
2104 return 1;
2105}
2106
2107static int x86_pmu_handle_irq(struct pt_regs *regs) 1088static int x86_pmu_handle_irq(struct pt_regs *regs)
2108{ 1089{
2109 struct perf_sample_data data; 1090 struct perf_sample_data data;
@@ -2216,37 +1197,20 @@ perf_event_nmi_handler(struct notifier_block *self,
2216 return NOTIFY_STOP; 1197 return NOTIFY_STOP;
2217} 1198}
2218 1199
1200static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1201 .notifier_call = perf_event_nmi_handler,
1202 .next = NULL,
1203 .priority = 1
1204};
1205
2219static struct event_constraint unconstrained; 1206static struct event_constraint unconstrained;
2220static struct event_constraint emptyconstraint; 1207static struct event_constraint emptyconstraint;
2221 1208
2222static struct event_constraint bts_constraint =
2223 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
2224
2225static struct event_constraint *
2226intel_special_constraints(struct perf_event *event)
2227{
2228 unsigned int hw_event;
2229
2230 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
2231
2232 if (unlikely((hw_event ==
2233 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
2234 (event->hw.sample_period == 1))) {
2235
2236 return &bts_constraint;
2237 }
2238 return NULL;
2239}
2240
2241static struct event_constraint * 1209static struct event_constraint *
2242intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1210x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2243{ 1211{
2244 struct event_constraint *c; 1212 struct event_constraint *c;
2245 1213
2246 c = intel_special_constraints(event);
2247 if (c)
2248 return c;
2249
2250 if (x86_pmu.event_constraints) { 1214 if (x86_pmu.event_constraints) {
2251 for_each_event_constraint(c, x86_pmu.event_constraints) { 1215 for_each_event_constraint(c, x86_pmu.event_constraints) {
2252 if ((event->hw.config & c->cmask) == c->code) 1216 if ((event->hw.config & c->cmask) == c->code)
@@ -2257,148 +1221,6 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
2257 return &unconstrained; 1221 return &unconstrained;
2258} 1222}
2259 1223
2260/*
2261 * AMD64 events are detected based on their event codes.
2262 */
2263static inline int amd_is_nb_event(struct hw_perf_event *hwc)
2264{
2265 return (hwc->config & 0xe0) == 0xe0;
2266}
2267
2268static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
2269 struct perf_event *event)
2270{
2271 struct hw_perf_event *hwc = &event->hw;
2272 struct amd_nb *nb = cpuc->amd_nb;
2273 int i;
2274
2275 /*
2276 * only care about NB events
2277 */
2278 if (!(nb && amd_is_nb_event(hwc)))
2279 return;
2280
2281 /*
2282 * need to scan whole list because event may not have
2283 * been assigned during scheduling
2284 *
2285 * no race condition possible because event can only
2286 * be removed on one CPU at a time AND PMU is disabled
2287 * when we come here
2288 */
2289 for (i = 0; i < x86_pmu.num_events; i++) {
2290 if (nb->owners[i] == event) {
2291 cmpxchg(nb->owners+i, event, NULL);
2292 break;
2293 }
2294 }
2295}
2296
2297 /*
2298 * AMD64 NorthBridge events need special treatment because
2299 * counter access needs to be synchronized across all cores
2300 * of a package. Refer to BKDG section 3.12
2301 *
2302 * NB events are events measuring L3 cache, Hypertransport
2303 * traffic. They are identified by an event code >= 0xe00.
2304 * They measure events on the NorthBride which is shared
2305 * by all cores on a package. NB events are counted on a
2306 * shared set of counters. When a NB event is programmed
2307 * in a counter, the data actually comes from a shared
2308 * counter. Thus, access to those counters needs to be
2309 * synchronized.
2310 *
2311 * We implement the synchronization such that no two cores
2312 * can be measuring NB events using the same counters. Thus,
2313 * we maintain a per-NB allocation table. The available slot
2314 * is propagated using the event_constraint structure.
2315 *
2316 * We provide only one choice for each NB event based on
2317 * the fact that only NB events have restrictions. Consequently,
2318 * if a counter is available, there is a guarantee the NB event
2319 * will be assigned to it. If no slot is available, an empty
2320 * constraint is returned and scheduling will eventually fail
2321 * for this event.
2322 *
2323 * Note that all cores attached the same NB compete for the same
2324 * counters to host NB events, this is why we use atomic ops. Some
2325 * multi-chip CPUs may have more than one NB.
2326 *
2327 * Given that resources are allocated (cmpxchg), they must be
2328 * eventually freed for others to use. This is accomplished by
2329 * calling amd_put_event_constraints().
2330 *
2331 * Non NB events are not impacted by this restriction.
2332 */
2333static struct event_constraint *
2334amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2335{
2336 struct hw_perf_event *hwc = &event->hw;
2337 struct amd_nb *nb = cpuc->amd_nb;
2338 struct perf_event *old = NULL;
2339 int max = x86_pmu.num_events;
2340 int i, j, k = -1;
2341
2342 /*
2343 * if not NB event or no NB, then no constraints
2344 */
2345 if (!(nb && amd_is_nb_event(hwc)))
2346 return &unconstrained;
2347
2348 /*
2349 * detect if already present, if so reuse
2350 *
2351 * cannot merge with actual allocation
2352 * because of possible holes
2353 *
2354 * event can already be present yet not assigned (in hwc->idx)
2355 * because of successive calls to x86_schedule_events() from
2356 * hw_perf_group_sched_in() without hw_perf_enable()
2357 */
2358 for (i = 0; i < max; i++) {
2359 /*
2360 * keep track of first free slot
2361 */
2362 if (k == -1 && !nb->owners[i])
2363 k = i;
2364
2365 /* already present, reuse */
2366 if (nb->owners[i] == event)
2367 goto done;
2368 }
2369 /*
2370 * not present, so grab a new slot
2371 * starting either at:
2372 */
2373 if (hwc->idx != -1) {
2374 /* previous assignment */
2375 i = hwc->idx;
2376 } else if (k != -1) {
2377 /* start from free slot found */
2378 i = k;
2379 } else {
2380 /*
2381 * event not found, no slot found in
2382 * first pass, try again from the
2383 * beginning
2384 */
2385 i = 0;
2386 }
2387 j = i;
2388 do {
2389 old = cmpxchg(nb->owners+i, NULL, event);
2390 if (!old)
2391 break;
2392 if (++i == max)
2393 i = 0;
2394 } while (i != j);
2395done:
2396 if (!old)
2397 return &nb->event_constraints[i];
2398
2399 return &emptyconstraint;
2400}
2401
2402static int x86_event_sched_in(struct perf_event *event, 1224static int x86_event_sched_in(struct perf_event *event,
2403 struct perf_cpu_context *cpuctx) 1225 struct perf_cpu_context *cpuctx)
2404{ 1226{
@@ -2509,335 +1331,9 @@ undo:
2509 return ret; 1331 return ret;
2510} 1332}
2511 1333
2512static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1334#include "perf_event_amd.c"
2513 .notifier_call = perf_event_nmi_handler, 1335#include "perf_event_p6.c"
2514 .next = NULL, 1336#include "perf_event_intel.c"
2515 .priority = 1
2516};
2517
2518static __initconst struct x86_pmu p6_pmu = {
2519 .name = "p6",
2520 .handle_irq = x86_pmu_handle_irq,
2521 .disable_all = p6_pmu_disable_all,
2522 .enable_all = p6_pmu_enable_all,
2523 .enable = p6_pmu_enable_event,
2524 .disable = p6_pmu_disable_event,
2525 .eventsel = MSR_P6_EVNTSEL0,
2526 .perfctr = MSR_P6_PERFCTR0,
2527 .event_map = p6_pmu_event_map,
2528 .raw_event = p6_pmu_raw_event,
2529 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
2530 .apic = 1,
2531 .max_period = (1ULL << 31) - 1,
2532 .version = 0,
2533 .num_events = 2,
2534 /*
2535 * Events have 40 bits implemented. However they are designed such
2536 * that bits [32-39] are sign extensions of bit 31. As such the
2537 * effective width of a event for P6-like PMU is 32 bits only.
2538 *
2539 * See IA-32 Intel Architecture Software developer manual Vol 3B
2540 */
2541 .event_bits = 32,
2542 .event_mask = (1ULL << 32) - 1,
2543 .get_event_constraints = intel_get_event_constraints,
2544 .event_constraints = intel_p6_event_constraints
2545};
2546
2547static __initconst struct x86_pmu core_pmu = {
2548 .name = "core",
2549 .handle_irq = x86_pmu_handle_irq,
2550 .disable_all = x86_pmu_disable_all,
2551 .enable_all = x86_pmu_enable_all,
2552 .enable = x86_pmu_enable_event,
2553 .disable = x86_pmu_disable_event,
2554 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
2555 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
2556 .event_map = intel_pmu_event_map,
2557 .raw_event = intel_pmu_raw_event,
2558 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
2559 .apic = 1,
2560 /*
2561 * Intel PMCs cannot be accessed sanely above 32 bit width,
2562 * so we install an artificial 1<<31 period regardless of
2563 * the generic event period:
2564 */
2565 .max_period = (1ULL << 31) - 1,
2566 .get_event_constraints = intel_get_event_constraints,
2567 .event_constraints = intel_core_event_constraints,
2568};
2569
2570static __initconst struct x86_pmu intel_pmu = {
2571 .name = "Intel",
2572 .handle_irq = intel_pmu_handle_irq,
2573 .disable_all = intel_pmu_disable_all,
2574 .enable_all = intel_pmu_enable_all,
2575 .enable = intel_pmu_enable_event,
2576 .disable = intel_pmu_disable_event,
2577 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
2578 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
2579 .event_map = intel_pmu_event_map,
2580 .raw_event = intel_pmu_raw_event,
2581 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
2582 .apic = 1,
2583 /*
2584 * Intel PMCs cannot be accessed sanely above 32 bit width,
2585 * so we install an artificial 1<<31 period regardless of
2586 * the generic event period:
2587 */
2588 .max_period = (1ULL << 31) - 1,
2589 .enable_bts = intel_pmu_enable_bts,
2590 .disable_bts = intel_pmu_disable_bts,
2591 .get_event_constraints = intel_get_event_constraints
2592};
2593
2594static __initconst struct x86_pmu amd_pmu = {
2595 .name = "AMD",
2596 .handle_irq = x86_pmu_handle_irq,
2597 .disable_all = x86_pmu_disable_all,
2598 .enable_all = x86_pmu_enable_all,
2599 .enable = x86_pmu_enable_event,
2600 .disable = x86_pmu_disable_event,
2601 .eventsel = MSR_K7_EVNTSEL0,
2602 .perfctr = MSR_K7_PERFCTR0,
2603 .event_map = amd_pmu_event_map,
2604 .raw_event = amd_pmu_raw_event,
2605 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
2606 .num_events = 4,
2607 .event_bits = 48,
2608 .event_mask = (1ULL << 48) - 1,
2609 .apic = 1,
2610 /* use highest bit to detect overflow */
2611 .max_period = (1ULL << 47) - 1,
2612 .get_event_constraints = amd_get_event_constraints,
2613 .put_event_constraints = amd_put_event_constraints
2614};
2615
2616static __init int p6_pmu_init(void)
2617{
2618 switch (boot_cpu_data.x86_model) {
2619 case 1:
2620 case 3: /* Pentium Pro */
2621 case 5:
2622 case 6: /* Pentium II */
2623 case 7:
2624 case 8:
2625 case 11: /* Pentium III */
2626 case 9:
2627 case 13:
2628 /* Pentium M */
2629 break;
2630 default:
2631 pr_cont("unsupported p6 CPU model %d ",
2632 boot_cpu_data.x86_model);
2633 return -ENODEV;
2634 }
2635
2636 x86_pmu = p6_pmu;
2637
2638 return 0;
2639}
2640
2641static __init int intel_pmu_init(void)
2642{
2643 union cpuid10_edx edx;
2644 union cpuid10_eax eax;
2645 unsigned int unused;
2646 unsigned int ebx;
2647 int version;
2648
2649 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2650 /* check for P6 processor family */
2651 if (boot_cpu_data.x86 == 6) {
2652 return p6_pmu_init();
2653 } else {
2654 return -ENODEV;
2655 }
2656 }
2657
2658 /*
2659 * Check whether the Architectural PerfMon supports
2660 * Branch Misses Retired hw_event or not.
2661 */
2662 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2663 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2664 return -ENODEV;
2665
2666 version = eax.split.version_id;
2667 if (version < 2)
2668 x86_pmu = core_pmu;
2669 else
2670 x86_pmu = intel_pmu;
2671
2672 x86_pmu.version = version;
2673 x86_pmu.num_events = eax.split.num_events;
2674 x86_pmu.event_bits = eax.split.bit_width;
2675 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
2676
2677 /*
2678 * Quirk: v2 perfmon does not report fixed-purpose events, so
2679 * assume at least 3 events:
2680 */
2681 if (version > 1)
2682 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
2683
2684 /*
2685 * Install the hw-cache-events table:
2686 */
2687 switch (boot_cpu_data.x86_model) {
2688 case 14: /* 65 nm core solo/duo, "Yonah" */
2689 pr_cont("Core events, ");
2690 break;
2691
2692 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2693 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2694 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2695 case 29: /* six-core 45 nm xeon "Dunnington" */
2696 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2697 sizeof(hw_cache_event_ids));
2698
2699 x86_pmu.event_constraints = intel_core2_event_constraints;
2700 pr_cont("Core2 events, ");
2701 break;
2702
2703 case 26: /* 45 nm nehalem, "Bloomfield" */
2704 case 30: /* 45 nm nehalem, "Lynnfield" */
2705 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2706 sizeof(hw_cache_event_ids));
2707
2708 x86_pmu.event_constraints = intel_nehalem_event_constraints;
2709 pr_cont("Nehalem/Corei7 events, ");
2710 break;
2711 case 28:
2712 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2713 sizeof(hw_cache_event_ids));
2714
2715 x86_pmu.event_constraints = intel_gen_event_constraints;
2716 pr_cont("Atom events, ");
2717 break;
2718
2719 case 37: /* 32 nm nehalem, "Clarkdale" */
2720 case 44: /* 32 nm nehalem, "Gulftown" */
2721 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
2722 sizeof(hw_cache_event_ids));
2723
2724 x86_pmu.event_constraints = intel_westmere_event_constraints;
2725 pr_cont("Westmere events, ");
2726 break;
2727 default:
2728 /*
2729 * default constraints for v2 and up
2730 */
2731 x86_pmu.event_constraints = intel_gen_event_constraints;
2732 pr_cont("generic architected perfmon, ");
2733 }
2734 return 0;
2735}
2736
2737static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
2738{
2739 struct amd_nb *nb;
2740 int i;
2741
2742 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
2743 if (!nb)
2744 return NULL;
2745
2746 memset(nb, 0, sizeof(*nb));
2747 nb->nb_id = nb_id;
2748
2749 /*
2750 * initialize all possible NB constraints
2751 */
2752 for (i = 0; i < x86_pmu.num_events; i++) {
2753 set_bit(i, nb->event_constraints[i].idxmsk);
2754 nb->event_constraints[i].weight = 1;
2755 }
2756 return nb;
2757}
2758
2759static void amd_pmu_cpu_online(int cpu)
2760{
2761 struct cpu_hw_events *cpu1, *cpu2;
2762 struct amd_nb *nb = NULL;
2763 int i, nb_id;
2764
2765 if (boot_cpu_data.x86_max_cores < 2)
2766 return;
2767
2768 /*
2769 * function may be called too early in the
2770 * boot process, in which case nb_id is bogus
2771 */
2772 nb_id = amd_get_nb_id(cpu);
2773 if (nb_id == BAD_APICID)
2774 return;
2775
2776 cpu1 = &per_cpu(cpu_hw_events, cpu);
2777 cpu1->amd_nb = NULL;
2778
2779 raw_spin_lock(&amd_nb_lock);
2780
2781 for_each_online_cpu(i) {
2782 cpu2 = &per_cpu(cpu_hw_events, i);
2783 nb = cpu2->amd_nb;
2784 if (!nb)
2785 continue;
2786 if (nb->nb_id == nb_id)
2787 goto found;
2788 }
2789
2790 nb = amd_alloc_nb(cpu, nb_id);
2791 if (!nb) {
2792 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
2793 raw_spin_unlock(&amd_nb_lock);
2794 return;
2795 }
2796found:
2797 nb->refcnt++;
2798 cpu1->amd_nb = nb;
2799
2800 raw_spin_unlock(&amd_nb_lock);
2801}
2802
2803static void amd_pmu_cpu_offline(int cpu)
2804{
2805 struct cpu_hw_events *cpuhw;
2806
2807 if (boot_cpu_data.x86_max_cores < 2)
2808 return;
2809
2810 cpuhw = &per_cpu(cpu_hw_events, cpu);
2811
2812 raw_spin_lock(&amd_nb_lock);
2813
2814 if (--cpuhw->amd_nb->refcnt == 0)
2815 kfree(cpuhw->amd_nb);
2816
2817 cpuhw->amd_nb = NULL;
2818
2819 raw_spin_unlock(&amd_nb_lock);
2820}
2821
2822static __init int amd_pmu_init(void)
2823{
2824 /* Performance-monitoring supported from K7 and later: */
2825 if (boot_cpu_data.x86 < 6)
2826 return -ENODEV;
2827
2828 x86_pmu = amd_pmu;
2829
2830 /* Events are common for all AMDs */
2831 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2832 sizeof(hw_cache_event_ids));
2833
2834 /*
2835 * explicitly initialize the boot cpu, other cpus will get
2836 * the cpu hotplug callbacks from smp_init()
2837 */
2838 amd_pmu_cpu_online(smp_processor_id());
2839 return 0;
2840}
2841 1337
2842static void __init pmu_check_apic(void) 1338static void __init pmu_check_apic(void)
2843{ 1339{
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 000000000000..6d28e08563e8
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,416 @@
1#ifdef CONFIG_CPU_SUP_AMD
2
3static raw_spinlock_t amd_nb_lock;
4
5static __initconst u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11 [ C(OP_READ) ] = {
12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 },
15 [ C(OP_WRITE) ] = {
16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS) ] = 0,
18 },
19 [ C(OP_PREFETCH) ] = {
20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
22 },
23 },
24 [ C(L1I ) ] = {
25 [ C(OP_READ) ] = {
26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 },
29 [ C(OP_WRITE) ] = {
30 [ C(RESULT_ACCESS) ] = -1,
31 [ C(RESULT_MISS) ] = -1,
32 },
33 [ C(OP_PREFETCH) ] = {
34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS) ] = 0,
36 },
37 },
38 [ C(LL ) ] = {
39 [ C(OP_READ) ] = {
40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 },
43 [ C(OP_WRITE) ] = {
44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS) ] = 0,
46 },
47 [ C(OP_PREFETCH) ] = {
48 [ C(RESULT_ACCESS) ] = 0,
49 [ C(RESULT_MISS) ] = 0,
50 },
51 },
52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
56 },
57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0,
60 },
61 [ C(OP_PREFETCH) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0,
64 },
65 },
66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
70 },
71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1,
73 [ C(RESULT_MISS) ] = -1,
74 },
75 [ C(OP_PREFETCH) ] = {
76 [ C(RESULT_ACCESS) ] = -1,
77 [ C(RESULT_MISS) ] = -1,
78 },
79 },
80 [ C(BPU ) ] = {
81 [ C(OP_READ) ] = {
82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 },
85 [ C(OP_WRITE) ] = {
86 [ C(RESULT_ACCESS) ] = -1,
87 [ C(RESULT_MISS) ] = -1,
88 },
89 [ C(OP_PREFETCH) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111 return amd_perfmon_event_map[hw_event];
112}
113
114static u64 amd_pmu_raw_event(u64 hw_event)
115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
121
122#define K7_EVNTSEL_MASK \
123 (K7_EVNTSEL_EVENT_MASK | \
124 K7_EVNTSEL_UNIT_MASK | \
125 K7_EVNTSEL_EDGE_MASK | \
126 K7_EVNTSEL_INV_MASK | \
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130}
131
132/*
133 * AMD64 events are detected based on their event codes.
134 */
135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
136{
137 return (hwc->config & 0xe0) == 0xe0;
138}
139
140static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
141 struct perf_event *event)
142{
143 struct hw_perf_event *hwc = &event->hw;
144 struct amd_nb *nb = cpuc->amd_nb;
145 int i;
146
147 /*
148 * only care about NB events
149 */
150 if (!(nb && amd_is_nb_event(hwc)))
151 return;
152
153 /*
154 * need to scan whole list because event may not have
155 * been assigned during scheduling
156 *
157 * no race condition possible because event can only
158 * be removed on one CPU at a time AND PMU is disabled
159 * when we come here
160 */
161 for (i = 0; i < x86_pmu.num_events; i++) {
162 if (nb->owners[i] == event) {
163 cmpxchg(nb->owners+i, event, NULL);
164 break;
165 }
166 }
167}
168
169 /*
170 * AMD64 NorthBridge events need special treatment because
171 * counter access needs to be synchronized across all cores
172 * of a package. Refer to BKDG section 3.12
173 *
174 * NB events are events measuring L3 cache, Hypertransport
175 * traffic. They are identified by an event code >= 0xe00.
176 * They measure events on the NorthBride which is shared
177 * by all cores on a package. NB events are counted on a
178 * shared set of counters. When a NB event is programmed
179 * in a counter, the data actually comes from a shared
180 * counter. Thus, access to those counters needs to be
181 * synchronized.
182 *
183 * We implement the synchronization such that no two cores
184 * can be measuring NB events using the same counters. Thus,
185 * we maintain a per-NB allocation table. The available slot
186 * is propagated using the event_constraint structure.
187 *
188 * We provide only one choice for each NB event based on
189 * the fact that only NB events have restrictions. Consequently,
190 * if a counter is available, there is a guarantee the NB event
191 * will be assigned to it. If no slot is available, an empty
192 * constraint is returned and scheduling will eventually fail
193 * for this event.
194 *
195 * Note that all cores attached the same NB compete for the same
196 * counters to host NB events, this is why we use atomic ops. Some
197 * multi-chip CPUs may have more than one NB.
198 *
199 * Given that resources are allocated (cmpxchg), they must be
200 * eventually freed for others to use. This is accomplished by
201 * calling amd_put_event_constraints().
202 *
203 * Non NB events are not impacted by this restriction.
204 */
205static struct event_constraint *
206amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
207{
208 struct hw_perf_event *hwc = &event->hw;
209 struct amd_nb *nb = cpuc->amd_nb;
210 struct perf_event *old = NULL;
211 int max = x86_pmu.num_events;
212 int i, j, k = -1;
213
214 /*
215 * if not NB event or no NB, then no constraints
216 */
217 if (!(nb && amd_is_nb_event(hwc)))
218 return &unconstrained;
219
220 /*
221 * detect if already present, if so reuse
222 *
223 * cannot merge with actual allocation
224 * because of possible holes
225 *
226 * event can already be present yet not assigned (in hwc->idx)
227 * because of successive calls to x86_schedule_events() from
228 * hw_perf_group_sched_in() without hw_perf_enable()
229 */
230 for (i = 0; i < max; i++) {
231 /*
232 * keep track of first free slot
233 */
234 if (k == -1 && !nb->owners[i])
235 k = i;
236
237 /* already present, reuse */
238 if (nb->owners[i] == event)
239 goto done;
240 }
241 /*
242 * not present, so grab a new slot
243 * starting either at:
244 */
245 if (hwc->idx != -1) {
246 /* previous assignment */
247 i = hwc->idx;
248 } else if (k != -1) {
249 /* start from free slot found */
250 i = k;
251 } else {
252 /*
253 * event not found, no slot found in
254 * first pass, try again from the
255 * beginning
256 */
257 i = 0;
258 }
259 j = i;
260 do {
261 old = cmpxchg(nb->owners+i, NULL, event);
262 if (!old)
263 break;
264 if (++i == max)
265 i = 0;
266 } while (i != j);
267done:
268 if (!old)
269 return &nb->event_constraints[i];
270
271 return &emptyconstraint;
272}
273
274static __initconst struct x86_pmu amd_pmu = {
275 .name = "AMD",
276 .handle_irq = x86_pmu_handle_irq,
277 .disable_all = x86_pmu_disable_all,
278 .enable_all = x86_pmu_enable_all,
279 .enable = x86_pmu_enable_event,
280 .disable = x86_pmu_disable_event,
281 .eventsel = MSR_K7_EVNTSEL0,
282 .perfctr = MSR_K7_PERFCTR0,
283 .event_map = amd_pmu_event_map,
284 .raw_event = amd_pmu_raw_event,
285 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
286 .num_events = 4,
287 .event_bits = 48,
288 .event_mask = (1ULL << 48) - 1,
289 .apic = 1,
290 /* use highest bit to detect overflow */
291 .max_period = (1ULL << 47) - 1,
292 .get_event_constraints = amd_get_event_constraints,
293 .put_event_constraints = amd_put_event_constraints
294};
295
296static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
297{
298 struct amd_nb *nb;
299 int i;
300
301 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
302 if (!nb)
303 return NULL;
304
305 memset(nb, 0, sizeof(*nb));
306 nb->nb_id = nb_id;
307
308 /*
309 * initialize all possible NB constraints
310 */
311 for (i = 0; i < x86_pmu.num_events; i++) {
312 set_bit(i, nb->event_constraints[i].idxmsk);
313 nb->event_constraints[i].weight = 1;
314 }
315 return nb;
316}
317
318static void amd_pmu_cpu_online(int cpu)
319{
320 struct cpu_hw_events *cpu1, *cpu2;
321 struct amd_nb *nb = NULL;
322 int i, nb_id;
323
324 if (boot_cpu_data.x86_max_cores < 2)
325 return;
326
327 /*
328 * function may be called too early in the
329 * boot process, in which case nb_id is bogus
330 */
331 nb_id = amd_get_nb_id(cpu);
332 if (nb_id == BAD_APICID)
333 return;
334
335 cpu1 = &per_cpu(cpu_hw_events, cpu);
336 cpu1->amd_nb = NULL;
337
338 raw_spin_lock(&amd_nb_lock);
339
340 for_each_online_cpu(i) {
341 cpu2 = &per_cpu(cpu_hw_events, i);
342 nb = cpu2->amd_nb;
343 if (!nb)
344 continue;
345 if (nb->nb_id == nb_id)
346 goto found;
347 }
348
349 nb = amd_alloc_nb(cpu, nb_id);
350 if (!nb) {
351 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
352 raw_spin_unlock(&amd_nb_lock);
353 return;
354 }
355found:
356 nb->refcnt++;
357 cpu1->amd_nb = nb;
358
359 raw_spin_unlock(&amd_nb_lock);
360}
361
362static void amd_pmu_cpu_offline(int cpu)
363{
364 struct cpu_hw_events *cpuhw;
365
366 if (boot_cpu_data.x86_max_cores < 2)
367 return;
368
369 cpuhw = &per_cpu(cpu_hw_events, cpu);
370
371 raw_spin_lock(&amd_nb_lock);
372
373 if (--cpuhw->amd_nb->refcnt == 0)
374 kfree(cpuhw->amd_nb);
375
376 cpuhw->amd_nb = NULL;
377
378 raw_spin_unlock(&amd_nb_lock);
379}
380
381static __init int amd_pmu_init(void)
382{
383 /* Performance-monitoring supported from K7 and later: */
384 if (boot_cpu_data.x86 < 6)
385 return -ENODEV;
386
387 x86_pmu = amd_pmu;
388
389 /* Events are common for all AMDs */
390 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
391 sizeof(hw_cache_event_ids));
392
393 /*
394 * explicitly initialize the boot cpu, other cpus will get
395 * the cpu hotplug callbacks from smp_init()
396 */
397 amd_pmu_cpu_online(smp_processor_id());
398 return 0;
399}
400
401#else /* CONFIG_CPU_SUP_AMD */
402
403static int amd_pmu_init(void)
404{
405 return 0;
406}
407
408static void amd_pmu_cpu_online(int cpu)
409{
410}
411
412static void amd_pmu_cpu_offline(int cpu)
413{
414}
415
416#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
new file mode 100644
index 000000000000..cf6590cf4a5f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -0,0 +1,971 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Intel PerfMon v3. Used on Core2 and later.
5 */
6static const u64 intel_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
15};
16
17static struct event_constraint intel_core_event_constraints[] =
18{
19 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
20 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
21 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
22 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
23 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
24 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
25 EVENT_CONSTRAINT_END
26};
27
28static struct event_constraint intel_core2_event_constraints[] =
29{
30 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
31 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
32 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
33 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
34 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
35 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
36 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
37 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
38 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
39 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
40 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
41 EVENT_CONSTRAINT_END
42};
43
44static struct event_constraint intel_nehalem_event_constraints[] =
45{
46 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
47 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
48 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
49 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
50 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
51 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
52 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
53 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
54 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
55 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
56 EVENT_CONSTRAINT_END
57};
58
59static struct event_constraint intel_westmere_event_constraints[] =
60{
61 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
62 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
63 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
64 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
65 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
66 EVENT_CONSTRAINT_END
67};
68
69static struct event_constraint intel_gen_event_constraints[] =
70{
71 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
72 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
73 EVENT_CONSTRAINT_END
74};
75
76static u64 intel_pmu_event_map(int hw_event)
77{
78 return intel_perfmon_event_map[hw_event];
79}
80
81static __initconst u64 westmere_hw_cache_event_ids
82 [PERF_COUNT_HW_CACHE_MAX]
83 [PERF_COUNT_HW_CACHE_OP_MAX]
84 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
85{
86 [ C(L1D) ] = {
87 [ C(OP_READ) ] = {
88 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
89 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
90 },
91 [ C(OP_WRITE) ] = {
92 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
93 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
94 },
95 [ C(OP_PREFETCH) ] = {
96 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
97 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
98 },
99 },
100 [ C(L1I ) ] = {
101 [ C(OP_READ) ] = {
102 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
103 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
104 },
105 [ C(OP_WRITE) ] = {
106 [ C(RESULT_ACCESS) ] = -1,
107 [ C(RESULT_MISS) ] = -1,
108 },
109 [ C(OP_PREFETCH) ] = {
110 [ C(RESULT_ACCESS) ] = 0x0,
111 [ C(RESULT_MISS) ] = 0x0,
112 },
113 },
114 [ C(LL ) ] = {
115 [ C(OP_READ) ] = {
116 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
117 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
118 },
119 [ C(OP_WRITE) ] = {
120 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
121 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
122 },
123 [ C(OP_PREFETCH) ] = {
124 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
125 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
126 },
127 },
128 [ C(DTLB) ] = {
129 [ C(OP_READ) ] = {
130 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
131 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
132 },
133 [ C(OP_WRITE) ] = {
134 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
135 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
136 },
137 [ C(OP_PREFETCH) ] = {
138 [ C(RESULT_ACCESS) ] = 0x0,
139 [ C(RESULT_MISS) ] = 0x0,
140 },
141 },
142 [ C(ITLB) ] = {
143 [ C(OP_READ) ] = {
144 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
145 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
146 },
147 [ C(OP_WRITE) ] = {
148 [ C(RESULT_ACCESS) ] = -1,
149 [ C(RESULT_MISS) ] = -1,
150 },
151 [ C(OP_PREFETCH) ] = {
152 [ C(RESULT_ACCESS) ] = -1,
153 [ C(RESULT_MISS) ] = -1,
154 },
155 },
156 [ C(BPU ) ] = {
157 [ C(OP_READ) ] = {
158 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
159 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
160 },
161 [ C(OP_WRITE) ] = {
162 [ C(RESULT_ACCESS) ] = -1,
163 [ C(RESULT_MISS) ] = -1,
164 },
165 [ C(OP_PREFETCH) ] = {
166 [ C(RESULT_ACCESS) ] = -1,
167 [ C(RESULT_MISS) ] = -1,
168 },
169 },
170};
171
172static __initconst u64 nehalem_hw_cache_event_ids
173 [PERF_COUNT_HW_CACHE_MAX]
174 [PERF_COUNT_HW_CACHE_OP_MAX]
175 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
176{
177 [ C(L1D) ] = {
178 [ C(OP_READ) ] = {
179 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
180 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
181 },
182 [ C(OP_WRITE) ] = {
183 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
184 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
185 },
186 [ C(OP_PREFETCH) ] = {
187 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
188 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
189 },
190 },
191 [ C(L1I ) ] = {
192 [ C(OP_READ) ] = {
193 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
194 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
195 },
196 [ C(OP_WRITE) ] = {
197 [ C(RESULT_ACCESS) ] = -1,
198 [ C(RESULT_MISS) ] = -1,
199 },
200 [ C(OP_PREFETCH) ] = {
201 [ C(RESULT_ACCESS) ] = 0x0,
202 [ C(RESULT_MISS) ] = 0x0,
203 },
204 },
205 [ C(LL ) ] = {
206 [ C(OP_READ) ] = {
207 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
208 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
209 },
210 [ C(OP_WRITE) ] = {
211 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
212 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
213 },
214 [ C(OP_PREFETCH) ] = {
215 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
216 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
217 },
218 },
219 [ C(DTLB) ] = {
220 [ C(OP_READ) ] = {
221 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
222 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
223 },
224 [ C(OP_WRITE) ] = {
225 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
226 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
227 },
228 [ C(OP_PREFETCH) ] = {
229 [ C(RESULT_ACCESS) ] = 0x0,
230 [ C(RESULT_MISS) ] = 0x0,
231 },
232 },
233 [ C(ITLB) ] = {
234 [ C(OP_READ) ] = {
235 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
236 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
237 },
238 [ C(OP_WRITE) ] = {
239 [ C(RESULT_ACCESS) ] = -1,
240 [ C(RESULT_MISS) ] = -1,
241 },
242 [ C(OP_PREFETCH) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 },
247 [ C(BPU ) ] = {
248 [ C(OP_READ) ] = {
249 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
250 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
251 },
252 [ C(OP_WRITE) ] = {
253 [ C(RESULT_ACCESS) ] = -1,
254 [ C(RESULT_MISS) ] = -1,
255 },
256 [ C(OP_PREFETCH) ] = {
257 [ C(RESULT_ACCESS) ] = -1,
258 [ C(RESULT_MISS) ] = -1,
259 },
260 },
261};
262
263static __initconst u64 core2_hw_cache_event_ids
264 [PERF_COUNT_HW_CACHE_MAX]
265 [PERF_COUNT_HW_CACHE_OP_MAX]
266 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
267{
268 [ C(L1D) ] = {
269 [ C(OP_READ) ] = {
270 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
271 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
272 },
273 [ C(OP_WRITE) ] = {
274 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
275 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
276 },
277 [ C(OP_PREFETCH) ] = {
278 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
279 [ C(RESULT_MISS) ] = 0,
280 },
281 },
282 [ C(L1I ) ] = {
283 [ C(OP_READ) ] = {
284 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
285 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
286 },
287 [ C(OP_WRITE) ] = {
288 [ C(RESULT_ACCESS) ] = -1,
289 [ C(RESULT_MISS) ] = -1,
290 },
291 [ C(OP_PREFETCH) ] = {
292 [ C(RESULT_ACCESS) ] = 0,
293 [ C(RESULT_MISS) ] = 0,
294 },
295 },
296 [ C(LL ) ] = {
297 [ C(OP_READ) ] = {
298 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
299 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
300 },
301 [ C(OP_WRITE) ] = {
302 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
303 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
304 },
305 [ C(OP_PREFETCH) ] = {
306 [ C(RESULT_ACCESS) ] = 0,
307 [ C(RESULT_MISS) ] = 0,
308 },
309 },
310 [ C(DTLB) ] = {
311 [ C(OP_READ) ] = {
312 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
313 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
317 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
318 },
319 [ C(OP_PREFETCH) ] = {
320 [ C(RESULT_ACCESS) ] = 0,
321 [ C(RESULT_MISS) ] = 0,
322 },
323 },
324 [ C(ITLB) ] = {
325 [ C(OP_READ) ] = {
326 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
327 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
328 },
329 [ C(OP_WRITE) ] = {
330 [ C(RESULT_ACCESS) ] = -1,
331 [ C(RESULT_MISS) ] = -1,
332 },
333 [ C(OP_PREFETCH) ] = {
334 [ C(RESULT_ACCESS) ] = -1,
335 [ C(RESULT_MISS) ] = -1,
336 },
337 },
338 [ C(BPU ) ] = {
339 [ C(OP_READ) ] = {
340 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
341 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
342 },
343 [ C(OP_WRITE) ] = {
344 [ C(RESULT_ACCESS) ] = -1,
345 [ C(RESULT_MISS) ] = -1,
346 },
347 [ C(OP_PREFETCH) ] = {
348 [ C(RESULT_ACCESS) ] = -1,
349 [ C(RESULT_MISS) ] = -1,
350 },
351 },
352};
353
354static __initconst u64 atom_hw_cache_event_ids
355 [PERF_COUNT_HW_CACHE_MAX]
356 [PERF_COUNT_HW_CACHE_OP_MAX]
357 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
358{
359 [ C(L1D) ] = {
360 [ C(OP_READ) ] = {
361 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
362 [ C(RESULT_MISS) ] = 0,
363 },
364 [ C(OP_WRITE) ] = {
365 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
366 [ C(RESULT_MISS) ] = 0,
367 },
368 [ C(OP_PREFETCH) ] = {
369 [ C(RESULT_ACCESS) ] = 0x0,
370 [ C(RESULT_MISS) ] = 0,
371 },
372 },
373 [ C(L1I ) ] = {
374 [ C(OP_READ) ] = {
375 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
376 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
377 },
378 [ C(OP_WRITE) ] = {
379 [ C(RESULT_ACCESS) ] = -1,
380 [ C(RESULT_MISS) ] = -1,
381 },
382 [ C(OP_PREFETCH) ] = {
383 [ C(RESULT_ACCESS) ] = 0,
384 [ C(RESULT_MISS) ] = 0,
385 },
386 },
387 [ C(LL ) ] = {
388 [ C(OP_READ) ] = {
389 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
390 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
391 },
392 [ C(OP_WRITE) ] = {
393 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
394 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
395 },
396 [ C(OP_PREFETCH) ] = {
397 [ C(RESULT_ACCESS) ] = 0,
398 [ C(RESULT_MISS) ] = 0,
399 },
400 },
401 [ C(DTLB) ] = {
402 [ C(OP_READ) ] = {
403 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
404 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
405 },
406 [ C(OP_WRITE) ] = {
407 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
408 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
409 },
410 [ C(OP_PREFETCH) ] = {
411 [ C(RESULT_ACCESS) ] = 0,
412 [ C(RESULT_MISS) ] = 0,
413 },
414 },
415 [ C(ITLB) ] = {
416 [ C(OP_READ) ] = {
417 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
418 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
419 },
420 [ C(OP_WRITE) ] = {
421 [ C(RESULT_ACCESS) ] = -1,
422 [ C(RESULT_MISS) ] = -1,
423 },
424 [ C(OP_PREFETCH) ] = {
425 [ C(RESULT_ACCESS) ] = -1,
426 [ C(RESULT_MISS) ] = -1,
427 },
428 },
429 [ C(BPU ) ] = {
430 [ C(OP_READ) ] = {
431 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
432 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
433 },
434 [ C(OP_WRITE) ] = {
435 [ C(RESULT_ACCESS) ] = -1,
436 [ C(RESULT_MISS) ] = -1,
437 },
438 [ C(OP_PREFETCH) ] = {
439 [ C(RESULT_ACCESS) ] = -1,
440 [ C(RESULT_MISS) ] = -1,
441 },
442 },
443};
444
445static u64 intel_pmu_raw_event(u64 hw_event)
446{
447#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
448#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
449#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
450#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
451#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
452
453#define CORE_EVNTSEL_MASK \
454 (INTEL_ARCH_EVTSEL_MASK | \
455 INTEL_ARCH_UNIT_MASK | \
456 INTEL_ARCH_EDGE_MASK | \
457 INTEL_ARCH_INV_MASK | \
458 INTEL_ARCH_CNT_MASK)
459
460 return hw_event & CORE_EVNTSEL_MASK;
461}
462
463static void intel_pmu_enable_bts(u64 config)
464{
465 unsigned long debugctlmsr;
466
467 debugctlmsr = get_debugctlmsr();
468
469 debugctlmsr |= X86_DEBUGCTL_TR;
470 debugctlmsr |= X86_DEBUGCTL_BTS;
471 debugctlmsr |= X86_DEBUGCTL_BTINT;
472
473 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
474 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
475
476 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
477 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
478
479 update_debugctlmsr(debugctlmsr);
480}
481
482static void intel_pmu_disable_bts(void)
483{
484 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
485 unsigned long debugctlmsr;
486
487 if (!cpuc->ds)
488 return;
489
490 debugctlmsr = get_debugctlmsr();
491
492 debugctlmsr &=
493 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
494 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
495
496 update_debugctlmsr(debugctlmsr);
497}
498
499static void intel_pmu_disable_all(void)
500{
501 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
502
503 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
504
505 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
506 intel_pmu_disable_bts();
507}
508
509static void intel_pmu_enable_all(void)
510{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
512
513 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
514
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
516 struct perf_event *event =
517 cpuc->events[X86_PMC_IDX_FIXED_BTS];
518
519 if (WARN_ON_ONCE(!event))
520 return;
521
522 intel_pmu_enable_bts(event->hw.config);
523 }
524}
525
526static inline u64 intel_pmu_get_status(void)
527{
528 u64 status;
529
530 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
531
532 return status;
533}
534
535static inline void intel_pmu_ack_status(u64 ack)
536{
537 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
538}
539
540static inline void
541intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
542{
543 int idx = __idx - X86_PMC_IDX_FIXED;
544 u64 ctrl_val, mask;
545
546 mask = 0xfULL << (idx * 4);
547
548 rdmsrl(hwc->config_base, ctrl_val);
549 ctrl_val &= ~mask;
550 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
551}
552
553static void intel_pmu_drain_bts_buffer(void)
554{
555 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
556 struct debug_store *ds = cpuc->ds;
557 struct bts_record {
558 u64 from;
559 u64 to;
560 u64 flags;
561 };
562 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
563 struct bts_record *at, *top;
564 struct perf_output_handle handle;
565 struct perf_event_header header;
566 struct perf_sample_data data;
567 struct pt_regs regs;
568
569 if (!event)
570 return;
571
572 if (!ds)
573 return;
574
575 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
576 top = (struct bts_record *)(unsigned long)ds->bts_index;
577
578 if (top <= at)
579 return;
580
581 ds->bts_index = ds->bts_buffer_base;
582
583
584 data.period = event->hw.last_period;
585 data.addr = 0;
586 data.raw = NULL;
587 regs.ip = 0;
588
589 /*
590 * Prepare a generic sample, i.e. fill in the invariant fields.
591 * We will overwrite the from and to address before we output
592 * the sample.
593 */
594 perf_prepare_sample(&header, &data, event, &regs);
595
596 if (perf_output_begin(&handle, event,
597 header.size * (top - at), 1, 1))
598 return;
599
600 for (; at < top; at++) {
601 data.ip = at->from;
602 data.addr = at->to;
603
604 perf_output_sample(&handle, &header, &data, event);
605 }
606
607 perf_output_end(&handle);
608
609 /* There's new data available. */
610 event->hw.interrupts++;
611 event->pending_kill = POLL_IN;
612}
613
614static inline void
615intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
616{
617 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
618 intel_pmu_disable_bts();
619 intel_pmu_drain_bts_buffer();
620 return;
621 }
622
623 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
624 intel_pmu_disable_fixed(hwc, idx);
625 return;
626 }
627
628 x86_pmu_disable_event(hwc, idx);
629}
630
631static inline void
632intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
633{
634 int idx = __idx - X86_PMC_IDX_FIXED;
635 u64 ctrl_val, bits, mask;
636 int err;
637
638 /*
639 * Enable IRQ generation (0x8),
640 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
641 * if requested:
642 */
643 bits = 0x8ULL;
644 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
645 bits |= 0x2;
646 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
647 bits |= 0x1;
648
649 /*
650 * ANY bit is supported in v3 and up
651 */
652 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
653 bits |= 0x4;
654
655 bits <<= (idx * 4);
656 mask = 0xfULL << (idx * 4);
657
658 rdmsrl(hwc->config_base, ctrl_val);
659 ctrl_val &= ~mask;
660 ctrl_val |= bits;
661 err = checking_wrmsrl(hwc->config_base, ctrl_val);
662}
663
664static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
665{
666 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
667 if (!__get_cpu_var(cpu_hw_events).enabled)
668 return;
669
670 intel_pmu_enable_bts(hwc->config);
671 return;
672 }
673
674 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
675 intel_pmu_enable_fixed(hwc, idx);
676 return;
677 }
678
679 __x86_pmu_enable_event(hwc, idx);
680}
681
682/*
683 * Save and restart an expired event. Called by NMI contexts,
684 * so it has to be careful about preempting normal event ops:
685 */
686static int intel_pmu_save_and_restart(struct perf_event *event)
687{
688 struct hw_perf_event *hwc = &event->hw;
689 int idx = hwc->idx;
690 int ret;
691
692 x86_perf_event_update(event, hwc, idx);
693 ret = x86_perf_event_set_period(event, hwc, idx);
694
695 return ret;
696}
697
698static void intel_pmu_reset(void)
699{
700 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
701 unsigned long flags;
702 int idx;
703
704 if (!x86_pmu.num_events)
705 return;
706
707 local_irq_save(flags);
708
709 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
710
711 for (idx = 0; idx < x86_pmu.num_events; idx++) {
712 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
713 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
714 }
715 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
716 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
717 }
718 if (ds)
719 ds->bts_index = ds->bts_buffer_base;
720
721 local_irq_restore(flags);
722}
723
724/*
725 * This handler is triggered by the local APIC, so the APIC IRQ handling
726 * rules apply:
727 */
728static int intel_pmu_handle_irq(struct pt_regs *regs)
729{
730 struct perf_sample_data data;
731 struct cpu_hw_events *cpuc;
732 int bit, loops;
733 u64 ack, status;
734
735 data.addr = 0;
736 data.raw = NULL;
737
738 cpuc = &__get_cpu_var(cpu_hw_events);
739
740 perf_disable();
741 intel_pmu_drain_bts_buffer();
742 status = intel_pmu_get_status();
743 if (!status) {
744 perf_enable();
745 return 0;
746 }
747
748 loops = 0;
749again:
750 if (++loops > 100) {
751 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
752 perf_event_print_debug();
753 intel_pmu_reset();
754 perf_enable();
755 return 1;
756 }
757
758 inc_irq_stat(apic_perf_irqs);
759 ack = status;
760 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
761 struct perf_event *event = cpuc->events[bit];
762
763 clear_bit(bit, (unsigned long *) &status);
764 if (!test_bit(bit, cpuc->active_mask))
765 continue;
766
767 if (!intel_pmu_save_and_restart(event))
768 continue;
769
770 data.period = event->hw.last_period;
771
772 if (perf_event_overflow(event, 1, &data, regs))
773 intel_pmu_disable_event(&event->hw, bit);
774 }
775
776 intel_pmu_ack_status(ack);
777
778 /*
779 * Repeat if there is more work to be done:
780 */
781 status = intel_pmu_get_status();
782 if (status)
783 goto again;
784
785 perf_enable();
786
787 return 1;
788}
789
790static struct event_constraint bts_constraint =
791 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
792
793static struct event_constraint *
794intel_special_constraints(struct perf_event *event)
795{
796 unsigned int hw_event;
797
798 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
799
800 if (unlikely((hw_event ==
801 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
802 (event->hw.sample_period == 1))) {
803
804 return &bts_constraint;
805 }
806 return NULL;
807}
808
809static struct event_constraint *
810intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
811{
812 struct event_constraint *c;
813
814 c = intel_special_constraints(event);
815 if (c)
816 return c;
817
818 return x86_get_event_constraints(cpuc, event);
819}
820
821static __initconst struct x86_pmu core_pmu = {
822 .name = "core",
823 .handle_irq = x86_pmu_handle_irq,
824 .disable_all = x86_pmu_disable_all,
825 .enable_all = x86_pmu_enable_all,
826 .enable = x86_pmu_enable_event,
827 .disable = x86_pmu_disable_event,
828 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
829 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
830 .event_map = intel_pmu_event_map,
831 .raw_event = intel_pmu_raw_event,
832 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
833 .apic = 1,
834 /*
835 * Intel PMCs cannot be accessed sanely above 32 bit width,
836 * so we install an artificial 1<<31 period regardless of
837 * the generic event period:
838 */
839 .max_period = (1ULL << 31) - 1,
840 .get_event_constraints = intel_get_event_constraints,
841 .event_constraints = intel_core_event_constraints,
842};
843
844static __initconst struct x86_pmu intel_pmu = {
845 .name = "Intel",
846 .handle_irq = intel_pmu_handle_irq,
847 .disable_all = intel_pmu_disable_all,
848 .enable_all = intel_pmu_enable_all,
849 .enable = intel_pmu_enable_event,
850 .disable = intel_pmu_disable_event,
851 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
852 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
853 .event_map = intel_pmu_event_map,
854 .raw_event = intel_pmu_raw_event,
855 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
856 .apic = 1,
857 /*
858 * Intel PMCs cannot be accessed sanely above 32 bit width,
859 * so we install an artificial 1<<31 period regardless of
860 * the generic event period:
861 */
862 .max_period = (1ULL << 31) - 1,
863 .enable_bts = intel_pmu_enable_bts,
864 .disable_bts = intel_pmu_disable_bts,
865 .get_event_constraints = intel_get_event_constraints
866};
867
868static __init int intel_pmu_init(void)
869{
870 union cpuid10_edx edx;
871 union cpuid10_eax eax;
872 unsigned int unused;
873 unsigned int ebx;
874 int version;
875
876 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
877 /* check for P6 processor family */
878 if (boot_cpu_data.x86 == 6) {
879 return p6_pmu_init();
880 } else {
881 return -ENODEV;
882 }
883 }
884
885 /*
886 * Check whether the Architectural PerfMon supports
887 * Branch Misses Retired hw_event or not.
888 */
889 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
890 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
891 return -ENODEV;
892
893 version = eax.split.version_id;
894 if (version < 2)
895 x86_pmu = core_pmu;
896 else
897 x86_pmu = intel_pmu;
898
899 x86_pmu.version = version;
900 x86_pmu.num_events = eax.split.num_events;
901 x86_pmu.event_bits = eax.split.bit_width;
902 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
903
904 /*
905 * Quirk: v2 perfmon does not report fixed-purpose events, so
906 * assume at least 3 events:
907 */
908 if (version > 1)
909 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
910
911 /*
912 * Install the hw-cache-events table:
913 */
914 switch (boot_cpu_data.x86_model) {
915 case 14: /* 65 nm core solo/duo, "Yonah" */
916 pr_cont("Core events, ");
917 break;
918
919 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
920 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
921 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
922 case 29: /* six-core 45 nm xeon "Dunnington" */
923 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
924 sizeof(hw_cache_event_ids));
925
926 x86_pmu.event_constraints = intel_core2_event_constraints;
927 pr_cont("Core2 events, ");
928 break;
929
930 case 26: /* 45 nm nehalem, "Bloomfield" */
931 case 30: /* 45 nm nehalem, "Lynnfield" */
932 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
933 sizeof(hw_cache_event_ids));
934
935 x86_pmu.event_constraints = intel_nehalem_event_constraints;
936 pr_cont("Nehalem/Corei7 events, ");
937 break;
938 case 28:
939 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
940 sizeof(hw_cache_event_ids));
941
942 x86_pmu.event_constraints = intel_gen_event_constraints;
943 pr_cont("Atom events, ");
944 break;
945
946 case 37: /* 32 nm nehalem, "Clarkdale" */
947 case 44: /* 32 nm nehalem, "Gulftown" */
948 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
949 sizeof(hw_cache_event_ids));
950
951 x86_pmu.event_constraints = intel_westmere_event_constraints;
952 pr_cont("Westmere events, ");
953 break;
954 default:
955 /*
956 * default constraints for v2 and up
957 */
958 x86_pmu.event_constraints = intel_gen_event_constraints;
959 pr_cont("generic architected perfmon, ");
960 }
961 return 0;
962}
963
964#else /* CONFIG_CPU_SUP_INTEL */
965
966static int intel_pmu_init(void)
967{
968 return 0;
969}
970
971#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
new file mode 100644
index 000000000000..1ca5ba078afd
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -0,0 +1,157 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Not sure about some of these
5 */
6static const u64 p6_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
15};
16
17static u64 p6_pmu_event_map(int hw_event)
18{
19 return p6_perfmon_event_map[hw_event];
20}
21
22/*
23 * Event setting that is specified not to count anything.
24 * We use this to effectively disable a counter.
25 *
26 * L2_RQSTS with 0 MESI unit mask.
27 */
28#define P6_NOP_EVENT 0x0000002EULL
29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] =
49{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
51 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
52 INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
53 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
54 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
55 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
56 EVENT_CONSTRAINT_END
57};
58
59static void p6_pmu_disable_all(void)
60{
61 u64 val;
62
63 /* p6 only has one enable register */
64 rdmsrl(MSR_P6_EVNTSEL0, val);
65 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
66 wrmsrl(MSR_P6_EVNTSEL0, val);
67}
68
69static void p6_pmu_enable_all(void)
70{
71 unsigned long val;
72
73 /* p6 only has one enable register */
74 rdmsrl(MSR_P6_EVNTSEL0, val);
75 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
76 wrmsrl(MSR_P6_EVNTSEL0, val);
77}
78
79static inline void
80p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
81{
82 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
83 u64 val = P6_NOP_EVENT;
84
85 if (cpuc->enabled)
86 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
87
88 (void)checking_wrmsrl(hwc->config_base + idx, val);
89}
90
91static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
92{
93 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
94 u64 val;
95
96 val = hwc->config;
97 if (cpuc->enabled)
98 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
99
100 (void)checking_wrmsrl(hwc->config_base + idx, val);
101}
102
103static __initconst struct x86_pmu p6_pmu = {
104 .name = "p6",
105 .handle_irq = x86_pmu_handle_irq,
106 .disable_all = p6_pmu_disable_all,
107 .enable_all = p6_pmu_enable_all,
108 .enable = p6_pmu_enable_event,
109 .disable = p6_pmu_disable_event,
110 .eventsel = MSR_P6_EVNTSEL0,
111 .perfctr = MSR_P6_PERFCTR0,
112 .event_map = p6_pmu_event_map,
113 .raw_event = p6_pmu_raw_event,
114 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
115 .apic = 1,
116 .max_period = (1ULL << 31) - 1,
117 .version = 0,
118 .num_events = 2,
119 /*
120 * Events have 40 bits implemented. However they are designed such
121 * that bits [32-39] are sign extensions of bit 31. As such the
122 * effective width of a event for P6-like PMU is 32 bits only.
123 *
124 * See IA-32 Intel Architecture Software developer manual Vol 3B
125 */
126 .event_bits = 32,
127 .event_mask = (1ULL << 32) - 1,
128 .get_event_constraints = x86_get_event_constraints,
129 .event_constraints = p6_event_constraints,
130};
131
132static __init int p6_pmu_init(void)
133{
134 switch (boot_cpu_data.x86_model) {
135 case 1:
136 case 3: /* Pentium Pro */
137 case 5:
138 case 6: /* Pentium II */
139 case 7:
140 case 8:
141 case 11: /* Pentium III */
142 case 9:
143 case 13:
144 /* Pentium M */
145 break;
146 default:
147 pr_cont("unsupported p6 CPU model %d ",
148 boot_cpu_data.x86_model);
149 return -ENODEV;
150 }
151
152 x86_pmu = p6_pmu;
153
154 return 0;
155}
156
157#endif /* CONFIG_CPU_SUP_INTEL */