diff options
author | Don Zickus <dzickus@redhat.com> | 2010-02-05 21:47:04 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-02-08 02:29:02 -0500 |
commit | 1fb9d6ad2766a1dd70d167552988375049a97f21 (patch) | |
tree | cee14f2d49bb40a2bed2f683c5a616990be93454 /arch/x86/kernel/apic | |
parent | e40b17208b6805be50ffe891878662b6076206b9 (diff) |
nmi_watchdog: Add new, generic implementation, using perf events
This is a new generic nmi_watchdog implementation using the perf
events infrastructure as suggested by Ingo.
The implementation is simple, just create an in-kernel perf
event and register an overflow handler to check for cpu lockups.
I created a generic implementation that lives in kernel/ and
the hardware specific part that for now lives in arch/x86.
This approach has a number of advantages:
- It simplifies the x86 PMU implementation in the long run,
in that it removes the hardcoded low-level PMU implementation
that was the NMI watchdog before.
- It allows new NMI watchdog features to be added in a central
place.
- It allows other architectures to enable the NMI watchdog,
as long as they have perf events (that provide NMIs)
implemented.
- It also allows for more graceful co-existence of existing
perf events apps and the NMI watchdog - before these changes
the relationship was exclusive. (The NMI watchdog will 'spend'
a perf event when enabled. In later iterations we might be
able to piggyback from an existing NMI event without having
to allocate a hardware event for the NMI watchdog - turning
this into a no-hardware-cost feature.)
As for compatibility, we'll keep the old NMI watchdog code as
well until the new one can 100% replace it on all CPUs, old and
new alike. That might take some time as the NMI watchdog has
been ported to many CPU models.
I have done light testing to make sure the framework works
correctly and it does.
v2: Set the correct timeout values based on the old nmi
watchdog
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
Cc: peterz@infradead.org
LKML-Reference: <1265424425-31562-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/apic')
-rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c new file mode 100644 index 000000000000..8c0e6a410d05 --- /dev/null +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -0,0 +1,114 @@ | |||
1 | /* | ||
2 | * HW NMI watchdog support | ||
3 | * | ||
4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. | ||
5 | * | ||
6 | * Arch specific calls to support NMI watchdog | ||
7 | * | ||
8 | * Bits copied from original nmi.c file | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <asm/apic.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/cpumask.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/percpu.h> | ||
17 | #include <linux/cpumask.h> | ||
18 | #include <linux/kernel_stat.h> | ||
19 | #include <asm/mce.h> | ||
20 | |||
21 | #include <linux/nmi.h> | ||
22 | #include <linux/module.h> | ||
23 | |||
24 | /* For reliability, we're prepared to waste bits here. */ | ||
25 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
26 | |||
27 | static DEFINE_PER_CPU(unsigned, last_irq_sum); | ||
28 | |||
29 | /* | ||
30 | * Take the local apic timer and PIT/HPET into account. We don't | ||
31 | * know which one is active, when we have highres/dyntick on | ||
32 | */ | ||
33 | static inline unsigned int get_timer_irqs(int cpu) | ||
34 | { | ||
35 | return per_cpu(irq_stat, cpu).apic_timer_irqs + | ||
36 | per_cpu(irq_stat, cpu).irq0_irqs; | ||
37 | } | ||
38 | |||
39 | static inline int mce_in_progress(void) | ||
40 | { | ||
41 | #if defined(CONFIG_X86_MCE) | ||
42 | return atomic_read(&mce_entry) > 0; | ||
43 | #endif | ||
44 | return 0; | ||
45 | } | ||
46 | |||
47 | int hw_nmi_is_cpu_stuck(struct pt_regs *regs) | ||
48 | { | ||
49 | unsigned int sum; | ||
50 | int cpu = smp_processor_id(); | ||
51 | |||
52 | /* FIXME: cheap hack for this check, probably should get its own | ||
53 | * die_notifier handler | ||
54 | */ | ||
55 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | ||
56 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | ||
57 | |||
58 | spin_lock(&lock); | ||
59 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | ||
60 | show_regs(regs); | ||
61 | dump_stack(); | ||
62 | spin_unlock(&lock); | ||
63 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | ||
64 | } | ||
65 | |||
66 | /* if we are doing an mce, just assume the cpu is not stuck */ | ||
67 | /* Could check oops_in_progress here too, but it's safer not to */ | ||
68 | if (mce_in_progress()) | ||
69 | return 0; | ||
70 | |||
71 | /* We determine if the cpu is stuck by checking whether any | ||
72 | * interrupts have happened since we last checked. Of course | ||
73 | * an nmi storm could create false positives, but the higher | ||
74 | * level logic should account for that | ||
75 | */ | ||
76 | sum = get_timer_irqs(cpu); | ||
77 | if (__get_cpu_var(last_irq_sum) == sum) { | ||
78 | return 1; | ||
79 | } else { | ||
80 | __get_cpu_var(last_irq_sum) = sum; | ||
81 | return 0; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | void arch_trigger_all_cpu_backtrace(void) | ||
86 | { | ||
87 | int i; | ||
88 | |||
89 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | ||
90 | |||
91 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
92 | apic->send_IPI_all(NMI_VECTOR); | ||
93 | |||
94 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
95 | for (i = 0; i < 10 * 1000; i++) { | ||
96 | if (cpumask_empty(to_cpumask(backtrace_mask))) | ||
97 | break; | ||
98 | mdelay(1); | ||
99 | } | ||
100 | } | ||
101 | |||
102 | /* STUB calls to mimic old nmi_watchdog behaviour */ | ||
103 | unsigned int nmi_watchdog = NMI_NONE; | ||
104 | EXPORT_SYMBOL(nmi_watchdog); | ||
105 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
106 | EXPORT_SYMBOL(nmi_active); | ||
107 | int nmi_watchdog_enabled; | ||
108 | int unknown_nmi_panic; | ||
109 | void cpu_nmi_set_wd_enabled(void) { return; } | ||
110 | void acpi_nmi_enable(void) { return; } | ||
111 | void acpi_nmi_disable(void) { return; } | ||
112 | void stop_apic_nmi_watchdog(void *unused) { return; } | ||
113 | void setup_apic_nmi_watchdog(void *unused) { return; } | ||
114 | int __init check_nmi_watchdog(void) { return 0; } | ||