aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-05-26 02:10:00 -0400
committerIngo Molnar <mingo@elte.hu>2009-05-26 03:52:03 -0400
commitaaba98018b8295dfa2119345d17f833d74448cd0 (patch)
tree680995e29b5fde045340dbddf3a22f23e72f63c9 /arch/x86/kernel
parent79202ba9ff8cf570a75596f42e011167734d1c4b (diff)
perf_counter, x86: Make NMI lockups more robust
We have a debug check that detects stuck NMIs and returns with the PMU disabled in the global ctrl MSR - but i managed to trigger a situation where this was not enough to deassert the NMI. So clear/reset the full PMU and keep the disable count balanced when exiting from here. This way the box produces a debug warning but stays up and is more debuggable. [ Impact: in case of PMU related bugs, recover more gracefully ] Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: John Kacur <jkacur@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c26
1 files changed, 26 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ece3813c7a3c..2eeaa99add1c 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -724,6 +724,30 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
724 intel_pmu_enable_counter(hwc, idx); 724 intel_pmu_enable_counter(hwc, idx);
725} 725}
726 726
727static void intel_pmu_reset(void)
728{
729 unsigned long flags;
730 int idx;
731
732 if (!x86_pmu.num_counters)
733 return;
734
735 local_irq_save(flags);
736
737 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
738
739 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
740 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
741 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
742 }
743 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
744 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
745 }
746
747 local_irq_restore(flags);
748}
749
750
727/* 751/*
728 * This handler is triggered by the local APIC, so the APIC IRQ handling 752 * This handler is triggered by the local APIC, so the APIC IRQ handling
729 * rules apply: 753 * rules apply:
@@ -750,6 +774,8 @@ again:
750 if (++loops > 100) { 774 if (++loops > 100) {
751 WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); 775 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
752 perf_counter_print_debug(); 776 perf_counter_print_debug();
777 intel_pmu_reset();
778 perf_enable();
753 return 1; 779 return 1;
754 } 780 }
755 781