aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorZhang, Yanmin <yanmin_zhang@linux.intel.com>2010-08-06 01:39:08 -0400
committerIngo Molnar <mingo@elte.hu>2010-08-18 05:17:39 -0400
commit351af0725e5222e35741011d1ea62215c1ed06db (patch)
tree1b58323fb9bc7a86c814c3883cd09c3e859b32c3 /arch/x86/kernel/cpu
parent9d5f3714e4705a66b6be693f7202192f756f498e (diff)
perf, x86: Fix Intel-nhm PMU programming errata workaround
Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented workaround we implemented in: 11164cd: perf, x86: Add Nehelem PMU programming errata workaround doesn't actually work fully and causes a stuck PMU state under load and non-functioning perf profiling. A functional workaround was found by trial & error. Affects all Nehalem-class Intel PMUs. Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <stable@kernel.org> # .35.x Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c81
1 files changed, 63 insertions, 18 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 214ac860ebe0..d8d86d014008 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
491 * Intel Errata AAP53 (model 30) 491 * Intel Errata AAP53 (model 30)
492 * Intel Errata BD53 (model 44) 492 * Intel Errata BD53 (model 44)
493 * 493 *
494 * These chips need to be 'reset' when adding counters by programming 494 * The official story:
495 * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 495 * These chips need to be 'reset' when adding counters by programming the
496 * either in sequence on the same PMC or on different PMCs. 496 * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
497 * in sequence on the same PMC or on different PMCs.
498 *
499 * In practise it appears some of these events do in fact count, and
500 * we need to programm all 4 events.
497 */ 501 */
498static void intel_pmu_nhm_enable_all(int added) 502static void intel_pmu_nhm_workaround(void)
499{ 503{
500 if (added) { 504 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
501 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 505 static const unsigned long nhm_magic[4] = {
502 int i; 506 0x4300B5,
507 0x4300D2,
508 0x4300B1,
509 0x4300B1
510 };
511 struct perf_event *event;
512 int i;
513
514 /*
515 * The Errata requires below steps:
516 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
517 * 2) Configure 4 PERFEVTSELx with the magic events and clear
518 * the corresponding PMCx;
519 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
520 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
521 * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
522 */
523
524 /*
525 * The real steps we choose are a little different from above.
526 * A) To reduce MSR operations, we don't run step 1) as they
527 * are already cleared before this function is called;
528 * B) Call x86_perf_event_update to save PMCx before configuring
529 * PERFEVTSELx with magic number;
530 * C) With step 5), we do clear only when the PERFEVTSELx is
531 * not used currently.
532 * D) Call x86_perf_event_set_period to restore PMCx;
533 */
503 534
504 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); 535 /* We always operate 4 pairs of PERF Counters */
505 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); 536 for (i = 0; i < 4; i++) {
506 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); 537 event = cpuc->events[i];
538 if (event)
539 x86_perf_event_update(event);
540 }
507 541
508 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); 542 for (i = 0; i < 4; i++) {
509 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); 543 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
544 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
545 }
510 546
511 for (i = 0; i < 3; i++) { 547 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
512 struct perf_event *event = cpuc->events[i]; 548 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
513 549
514 if (!event) 550 for (i = 0; i < 4; i++) {
515 continue; 551 event = cpuc->events[i];
516 552
553 if (event) {
554 x86_perf_event_set_period(event);
517 __x86_pmu_enable_event(&event->hw, 555 __x86_pmu_enable_event(&event->hw,
518 ARCH_PERFMON_EVENTSEL_ENABLE); 556 ARCH_PERFMON_EVENTSEL_ENABLE);
519 } 557 } else
558 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
520 } 559 }
560}
561
562static void intel_pmu_nhm_enable_all(int added)
563{
564 if (added)
565 intel_pmu_nhm_workaround();
521 intel_pmu_enable_all(added); 566 intel_pmu_enable_all(added);
522} 567}
523 568