diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-05-26 02:10:00 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-05-26 03:52:03 -0400 |
commit | aaba98018b8295dfa2119345d17f833d74448cd0 (patch) | |
tree | 680995e29b5fde045340dbddf3a22f23e72f63c9 /arch | |
parent | 79202ba9ff8cf570a75596f42e011167734d1c4b (diff) |
perf_counter, x86: Make NMI lockups more robust
We have a debug check that detects stuck NMIs and returns with
the PMU disabled in the global ctrl MSR - but i managed to trigger
a situation where this was not enough to deassert the NMI.
So clear/reset the full PMU and keep the disable count balanced when
exiting from here. This way the box produces a debug warning but
stays up and is more debuggable.
[ Impact: in case of PMU related bugs, recover more gracefully ]
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index ece3813c7a3c..2eeaa99add1c 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -724,6 +724,30 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter) | |||
724 | intel_pmu_enable_counter(hwc, idx); | 724 | intel_pmu_enable_counter(hwc, idx); |
725 | } | 725 | } |
726 | 726 | ||
727 | static void intel_pmu_reset(void) | ||
728 | { | ||
729 | unsigned long flags; | ||
730 | int idx; | ||
731 | |||
732 | if (!x86_pmu.num_counters) | ||
733 | return; | ||
734 | |||
735 | local_irq_save(flags); | ||
736 | |||
737 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
738 | |||
739 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
740 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
741 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
742 | } | ||
743 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | ||
744 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
745 | } | ||
746 | |||
747 | local_irq_restore(flags); | ||
748 | } | ||
749 | |||
750 | |||
727 | /* | 751 | /* |
728 | * This handler is triggered by the local APIC, so the APIC IRQ handling | 752 | * This handler is triggered by the local APIC, so the APIC IRQ handling |
729 | * rules apply: | 753 | * rules apply: |
@@ -750,6 +774,8 @@ again: | |||
750 | if (++loops > 100) { | 774 | if (++loops > 100) { |
751 | WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); | 775 | WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); |
752 | perf_counter_print_debug(); | 776 | perf_counter_print_debug(); |
777 | intel_pmu_reset(); | ||
778 | perf_enable(); | ||
753 | return 1; | 779 | return 1; |
754 | } | 780 | } |
755 | 781 | ||