diff options
author | Zhang, Yanmin <yanmin_zhang@linux.intel.com> | 2010-08-06 01:39:08 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-08-18 05:17:39 -0400 |
commit | 351af0725e5222e35741011d1ea62215c1ed06db (patch) | |
tree | 1b58323fb9bc7a86c814c3883cd09c3e859b32c3 /arch/x86/kernel/cpu | |
parent | 9d5f3714e4705a66b6be693f7202192f756f498e (diff) |
perf, x86: Fix Intel-nhm PMU programming errata workaround
Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented
workaround we implemented in:
11164cd: perf, x86: Add Nehelem PMU programming errata workaround
doesn't actually work fully and causes a stuck PMU state
under load and non-functioning perf profiling.
A functional workaround was found by trial & error.
Affects all Nehalem-class Intel PMUs.
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org> # .35.x
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 81 |
1 files changed, 63 insertions, 18 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 214ac860ebe0..d8d86d014008 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added) | |||
491 | * Intel Errata AAP53 (model 30) | 491 | * Intel Errata AAP53 (model 30) |
492 | * Intel Errata BD53 (model 44) | 492 | * Intel Errata BD53 (model 44) |
493 | * | 493 | * |
494 | * These chips need to be 'reset' when adding counters by programming | 494 | * The official story: |
495 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | 495 | * These chips need to be 'reset' when adding counters by programming the |
496 | * either in sequence on the same PMC or on different PMCs. | 496 | * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either |
497 | * in sequence on the same PMC or on different PMCs. | ||
498 | * | ||
499 | * In practise it appears some of these events do in fact count, and | ||
500 | * we need to programm all 4 events. | ||
497 | */ | 501 | */ |
498 | static void intel_pmu_nhm_enable_all(int added) | 502 | static void intel_pmu_nhm_workaround(void) |
499 | { | 503 | { |
500 | if (added) { | 504 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
501 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 505 | static const unsigned long nhm_magic[4] = { |
502 | int i; | 506 | 0x4300B5, |
507 | 0x4300D2, | ||
508 | 0x4300B1, | ||
509 | 0x4300B1 | ||
510 | }; | ||
511 | struct perf_event *event; | ||
512 | int i; | ||
513 | |||
514 | /* | ||
515 | * The Errata requires below steps: | ||
516 | * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; | ||
517 | * 2) Configure 4 PERFEVTSELx with the magic events and clear | ||
518 | * the corresponding PMCx; | ||
519 | * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; | ||
520 | * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; | ||
521 | * 5) Clear 4 pairs of ERFEVTSELx and PMCx; | ||
522 | */ | ||
523 | |||
524 | /* | ||
525 | * The real steps we choose are a little different from above. | ||
526 | * A) To reduce MSR operations, we don't run step 1) as they | ||
527 | * are already cleared before this function is called; | ||
528 | * B) Call x86_perf_event_update to save PMCx before configuring | ||
529 | * PERFEVTSELx with magic number; | ||
530 | * C) With step 5), we do clear only when the PERFEVTSELx is | ||
531 | * not used currently. | ||
532 | * D) Call x86_perf_event_set_period to restore PMCx; | ||
533 | */ | ||
503 | 534 | ||
504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | 535 | /* We always operate 4 pairs of PERF Counters */ |
505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | 536 | for (i = 0; i < 4; i++) { |
506 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | 537 | event = cpuc->events[i]; |
538 | if (event) | ||
539 | x86_perf_event_update(event); | ||
540 | } | ||
507 | 541 | ||
508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | 542 | for (i = 0; i < 4; i++) { |
509 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | 543 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); |
544 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); | ||
545 | } | ||
510 | 546 | ||
511 | for (i = 0; i < 3; i++) { | 547 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); |
512 | struct perf_event *event = cpuc->events[i]; | 548 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); |
513 | 549 | ||
514 | if (!event) | 550 | for (i = 0; i < 4; i++) { |
515 | continue; | 551 | event = cpuc->events[i]; |
516 | 552 | ||
553 | if (event) { | ||
554 | x86_perf_event_set_period(event); | ||
517 | __x86_pmu_enable_event(&event->hw, | 555 | __x86_pmu_enable_event(&event->hw, |
518 | ARCH_PERFMON_EVENTSEL_ENABLE); | 556 | ARCH_PERFMON_EVENTSEL_ENABLE); |
519 | } | 557 | } else |
558 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); | ||
520 | } | 559 | } |
560 | } | ||
561 | |||
562 | static void intel_pmu_nhm_enable_all(int added) | ||
563 | { | ||
564 | if (added) | ||
565 | intel_pmu_nhm_workaround(); | ||
521 | intel_pmu_enable_all(added); | 566 | intel_pmu_enable_all(added); |
522 | } | 567 | } |
523 | 568 | ||