aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDon Zickus <dzickus@redhat.com>2010-05-07 17:11:44 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-05-12 17:55:33 -0400
commit58687acba59266735adb8ccd9b5b9aa2c7cd205b (patch)
tree7236582375310b116eedec6facbee87d42e3dd6d /arch
parenta9aa1d02de36b450990b0e25a88fc2ff1c3e6b94 (diff)
lockup_detector: Combine nmi_watchdog and softlockup detector
The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Eric Paris <eparis@redhat.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/kernel/apic/Makefile4
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c2
-rw-r--r--arch/x86/kernel/traps.c4
4 files changed, 6 insertions, 6 deletions
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 5b41b0feb6db..932f0f86b4b7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
17 17
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void); 19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_NMI_WATCHDOG) 20#if !defined(CONFIG_LOCKUP_DETECTOR)
21extern int nmi_watchdog_enabled; 21extern int nmi_watchdog_enabled;
22#endif 22#endif
23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 1a4512e48d24..52f32e0ea194 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,10 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_NMI_WATCHDOG),y) 6ifneq ($(CONFIG_LOCKUP_DETECTOR),y)
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o 7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif 8endif
9obj-$(CONFIG_NMI_WATCHDOG) += hw_nmi.o 9obj-$(CONFIG_LOCKUP_DETECTOR) += hw_nmi.o
10 10
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 11obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 12obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index e8b78a0be5de..79425f96fcee 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -89,7 +89,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
89 89
90u64 hw_nmi_get_sample_period(void) 90u64 hw_nmi_get_sample_period(void)
91{ 91{
92 return cpu_khz * 1000; 92 return (u64)(cpu_khz) * 1000 * 60;
93} 93}
94 94
95#ifdef ARCH_HAS_NMI_WATCHDOG 95#ifdef ARCH_HAS_NMI_WATCHDOG
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bdc7fab3ef3e..bd347c2b34dc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -406,7 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
406 == NOTIFY_STOP) 406 == NOTIFY_STOP)
407 return; 407 return;
408 408
409#ifndef CONFIG_NMI_WATCHDOG 409#ifndef CONFIG_LOCKUP_DETECTOR
410 /* 410 /*
411 * Ok, so this is none of the documented NMI sources, 411 * Ok, so this is none of the documented NMI sources,
412 * so it must be the NMI watchdog. 412 * so it must be the NMI watchdog.
@@ -414,7 +414,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
414 if (nmi_watchdog_tick(regs, reason)) 414 if (nmi_watchdog_tick(regs, reason))
415 return; 415 return;
416 if (!do_nmi_callback(regs, cpu)) 416 if (!do_nmi_callback(regs, cpu))
417#endif /* !CONFIG_NMI_WATCHDOG */ 417#endif /* !CONFIG_LOCKUP_DETECTOR */
418 unknown_nmi_error(reason, regs); 418 unknown_nmi_error(reason, regs);
419#else 419#else
420 unknown_nmi_error(reason, regs); 420 unknown_nmi_error(reason, regs);