aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDon Zickus <dzickus@redhat.com>2010-05-07 17:11:44 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-05-12 17:55:33 -0400
commit58687acba59266735adb8ccd9b5b9aa2c7cd205b (patch)
tree7236582375310b116eedec6facbee87d42e3dd6d
parenta9aa1d02de36b450990b0e25a88fc2ff1c3e6b94 (diff)
lockup_detector: Combine nmi_watchdog and softlockup detector
The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Eric Paris <eparis@redhat.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/kernel/apic/Makefile4
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c2
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--include/linux/nmi.h8
-rw-r--r--include/linux/sched.h6
-rw-r--r--init/Kconfig5
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/sysctl.c21
-rw-r--r--kernel/watchdog.c592
-rw-r--r--lib/Kconfig.debug30
12 files changed, 650 insertions, 29 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 839b21b0699a..dfe8d1c226c6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1777,6 +1777,8 @@ and is between 256 and 4096 characters. It is defined in the file
1777 1777
1778 nousb [USB] Disable the USB subsystem 1778 nousb [USB] Disable the USB subsystem
1779 1779
1780 nowatchdog [KNL] Disable the lockup detector.
1781
1780 nowb [ARM] 1782 nowb [ARM]
1781 1783
1782 nox2apic [X86-64,APIC] Do not enable x2APIC mode. 1784 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 5b41b0feb6db..932f0f86b4b7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
17 17
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void); 19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_NMI_WATCHDOG) 20#if !defined(CONFIG_LOCKUP_DETECTOR)
21extern int nmi_watchdog_enabled; 21extern int nmi_watchdog_enabled;
22#endif 22#endif
23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 1a4512e48d24..52f32e0ea194 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,10 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_NMI_WATCHDOG),y) 6ifneq ($(CONFIG_LOCKUP_DETECTOR),y)
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o 7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif 8endif
9obj-$(CONFIG_NMI_WATCHDOG) += hw_nmi.o 9obj-$(CONFIG_LOCKUP_DETECTOR) += hw_nmi.o
10 10
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 11obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 12obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index e8b78a0be5de..79425f96fcee 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -89,7 +89,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
89 89
90u64 hw_nmi_get_sample_period(void) 90u64 hw_nmi_get_sample_period(void)
91{ 91{
92 return cpu_khz * 1000; 92 return (u64)(cpu_khz) * 1000 * 60;
93} 93}
94 94
95#ifdef ARCH_HAS_NMI_WATCHDOG 95#ifdef ARCH_HAS_NMI_WATCHDOG
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bdc7fab3ef3e..bd347c2b34dc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -406,7 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
406 == NOTIFY_STOP) 406 == NOTIFY_STOP)
407 return; 407 return;
408 408
409#ifndef CONFIG_NMI_WATCHDOG 409#ifndef CONFIG_LOCKUP_DETECTOR
410 /* 410 /*
411 * Ok, so this is none of the documented NMI sources, 411 * Ok, so this is none of the documented NMI sources,
412 * so it must be the NMI watchdog. 412 * so it must be the NMI watchdog.
@@ -414,7 +414,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
414 if (nmi_watchdog_tick(regs, reason)) 414 if (nmi_watchdog_tick(regs, reason))
415 return; 415 return;
416 if (!do_nmi_callback(regs, cpu)) 416 if (!do_nmi_callback(regs, cpu))
417#endif /* !CONFIG_NMI_WATCHDOG */ 417#endif /* !CONFIG_LOCKUP_DETECTOR */
418 unknown_nmi_error(reason, regs); 418 unknown_nmi_error(reason, regs);
419#else 419#else
420 unknown_nmi_error(reason, regs); 420 unknown_nmi_error(reason, regs);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 22cc7960b649..abd48aacaf79 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,7 +20,7 @@ extern void touch_nmi_watchdog(void);
20extern void acpi_nmi_disable(void); 20extern void acpi_nmi_disable(void);
21extern void acpi_nmi_enable(void); 21extern void acpi_nmi_enable(void);
22#else 22#else
23#ifndef CONFIG_NMI_WATCHDOG 23#ifndef CONFIG_LOCKUP_DETECTOR
24static inline void touch_nmi_watchdog(void) 24static inline void touch_nmi_watchdog(void)
25{ 25{
26 touch_softlockup_watchdog(); 26 touch_softlockup_watchdog();
@@ -51,12 +51,12 @@ static inline bool trigger_all_cpu_backtrace(void)
51} 51}
52#endif 52#endif
53 53
54#ifdef CONFIG_NMI_WATCHDOG 54#ifdef CONFIG_LOCKUP_DETECTOR
55int hw_nmi_is_cpu_stuck(struct pt_regs *); 55int hw_nmi_is_cpu_stuck(struct pt_regs *);
56u64 hw_nmi_get_sample_period(void); 56u64 hw_nmi_get_sample_period(void);
57extern int nmi_watchdog_enabled; 57extern int watchdog_enabled;
58struct ctl_table; 58struct ctl_table;
59extern int proc_nmi_enabled(struct ctl_table *, int , 59extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
60 void __user *, size_t *, loff_t *); 60 void __user *, size_t *, loff_t *);
61#endif 61#endif
62 62
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dad7f668ebf7..37efe8fa5306 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -346,6 +346,12 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
346 size_t *lenp, loff_t *ppos); 346 size_t *lenp, loff_t *ppos);
347#endif 347#endif
348 348
349#ifdef CONFIG_LOCKUP_DETECTOR
350extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
351 void __user *buffer,
352 size_t *lenp, loff_t *ppos);
353#endif
354
349/* Attach to any functions which should be ignored in wchan output. */ 355/* Attach to any functions which should be ignored in wchan output. */
350#define __sched __attribute__((__section__(".sched.text"))) 356#define __sched __attribute__((__section__(".sched.text")))
351 357
diff --git a/init/Kconfig b/init/Kconfig
index c6c8903cb534..e44e25422f22 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -944,8 +944,11 @@ config PERF_USE_VMALLOC
944 944
945config PERF_EVENTS_NMI 945config PERF_EVENTS_NMI
946 bool 946 bool
947 depends on PERF_EVENTS
947 help 948 help
948 Arch has support for nmi_watchdog 949 System hardware can generate an NMI using the perf event
950 subsystem. Also has support for calculating CPU cycle events
951 to determine how many clock cycles in a given period.
949 952
950menu "Kernel Performance Events And Counters" 953menu "Kernel Performance Events And Counters"
951 954
diff --git a/kernel/Makefile b/kernel/Makefile
index d5c30060ac14..6adeafc3e259 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,9 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
76obj-$(CONFIG_AUDIT_TREE) += audit_tree.o 76obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
77obj-$(CONFIG_KPROBES) += kprobes.o 77obj-$(CONFIG_KPROBES) += kprobes.o
78obj-$(CONFIG_KGDB) += kgdb.o 78obj-$(CONFIG_KGDB) += kgdb.o
79obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
80obj-$(CONFIG_NMI_WATCHDOG) += nmi_watchdog.o
81obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o 79obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
80obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
82obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 81obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
83obj-$(CONFIG_SECCOMP) += seccomp.o 82obj-$(CONFIG_SECCOMP) += seccomp.o
84obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 83obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a38af430f0d8..0f9adda85f97 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -74,7 +74,7 @@
74#include <scsi/sg.h> 74#include <scsi/sg.h>
75#endif 75#endif
76 76
77#ifdef CONFIG_NMI_WATCHDOG 77#ifdef CONFIG_LOCKUP_DETECTOR
78#include <linux/nmi.h> 78#include <linux/nmi.h>
79#endif 79#endif
80 80
@@ -686,16 +686,25 @@ static struct ctl_table kern_table[] = {
686 .mode = 0444, 686 .mode = 0444,
687 .proc_handler = proc_dointvec, 687 .proc_handler = proc_dointvec,
688 }, 688 },
689#if defined(CONFIG_NMI_WATCHDOG) 689#if defined(CONFIG_LOCKUP_DETECTOR)
690 { 690 {
691 .procname = "nmi_watchdog", 691 .procname = "watchdog",
692 .data = &nmi_watchdog_enabled, 692 .data = &watchdog_enabled,
693 .maxlen = sizeof (int), 693 .maxlen = sizeof (int),
694 .mode = 0644, 694 .mode = 0644,
695 .proc_handler = proc_nmi_enabled, 695 .proc_handler = proc_dowatchdog_enabled,
696 },
697 {
698 .procname = "watchdog_thresh",
699 .data = &softlockup_thresh,
700 .maxlen = sizeof(int),
701 .mode = 0644,
702 .proc_handler = proc_dowatchdog_thresh,
703 .extra1 = &neg_one,
704 .extra2 = &sixty,
696 }, 705 },
697#endif 706#endif
698#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG) 707#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
699 { 708 {
700 .procname = "unknown_nmi_panic", 709 .procname = "unknown_nmi_panic",
701 .data = &unknown_nmi_panic, 710 .data = &unknown_nmi_panic,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
new file mode 100644
index 000000000000..6b7fad8497af
--- /dev/null
+++ b/kernel/watchdog.c
@@ -0,0 +1,592 @@
1/*
2 * Detect hard and soft lockups on a system
3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 *
6 * this code detects hard lockups: incidents in where on a CPU
7 * the kernel does not respond to anything except NMI.
8 *
9 * Note: Most of this code is borrowed heavily from softlockup.c,
10 * so thanks to Ingo for the initial implementation.
11 * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
12 * to those contributors as well.
13 */
14
15#include <linux/mm.h>
16#include <linux/cpu.h>
17#include <linux/nmi.h>
18#include <linux/init.h>
19#include <linux/delay.h>
20#include <linux/freezer.h>
21#include <linux/kthread.h>
22#include <linux/lockdep.h>
23#include <linux/notifier.h>
24#include <linux/module.h>
25#include <linux/sysctl.h>
26
27#include <asm/irq_regs.h>
28#include <linux/perf_event.h>
29
30int watchdog_enabled;
31int __read_mostly softlockup_thresh = 60;
32
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
34static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
35static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
36static DEFINE_PER_CPU(bool, softlockup_touch_sync);
37static DEFINE_PER_CPU(bool, hard_watchdog_warn);
38static DEFINE_PER_CPU(bool, soft_watchdog_warn);
39#ifdef CONFIG_PERF_EVENTS_NMI
40static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
41static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
42static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
43#endif
44
45static int __read_mostly did_panic;
46static int __initdata no_watchdog;
47
48
49/* boot commands */
50/*
51 * Should we panic when a soft-lockup or hard-lockup occurs:
52 */
53#ifdef CONFIG_PERF_EVENTS_NMI
54static int hardlockup_panic;
55
56static int __init hardlockup_panic_setup(char *str)
57{
58 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1;
60 return 1;
61}
62__setup("nmi_watchdog=", hardlockup_panic_setup);
63#endif
64
65unsigned int __read_mostly softlockup_panic =
66 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
67
68static int __init softlockup_panic_setup(char *str)
69{
70 softlockup_panic = simple_strtoul(str, NULL, 0);
71
72 return 1;
73}
74__setup("softlockup_panic=", softlockup_panic_setup);
75
76static int __init nowatchdog_setup(char *str)
77{
78 no_watchdog = 1;
79 return 1;
80}
81__setup("nowatchdog", nowatchdog_setup);
82
83/* deprecated */
84static int __init nosoftlockup_setup(char *str)
85{
86 no_watchdog = 1;
87 return 1;
88}
89__setup("nosoftlockup", nosoftlockup_setup);
90/* */
91
92
93/*
94 * Returns seconds, approximately. We don't need nanosecond
95 * resolution, and we don't need to waste time with a big divide when
96 * 2^30ns == 1.074s.
97 */
98static unsigned long get_timestamp(int this_cpu)
99{
100 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
101}
102
103static unsigned long get_sample_period(void)
104{
105 /*
106 * convert softlockup_thresh from seconds to ns
107 * the divide by 5 is to give hrtimer 5 chances to
108 * increment before the hardlockup detector generates
109 * a warning
110 */
111 return softlockup_thresh / 5 * NSEC_PER_SEC;
112}
113
114/* Commands for resetting the watchdog */
115static void __touch_watchdog(void)
116{
117 int this_cpu = raw_smp_processor_id();
118
119 __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
120}
121
122void touch_watchdog(void)
123{
124 __get_cpu_var(watchdog_touch_ts) = 0;
125}
126EXPORT_SYMBOL(touch_watchdog);
127
128void touch_all_watchdog(void)
129{
130 int cpu;
131
132 /*
133 * this is done lockless
134 * do we care if a 0 races with a timestamp?
135 * all it means is the softlock check starts one cycle later
136 */
137 for_each_online_cpu(cpu)
138 per_cpu(watchdog_touch_ts, cpu) = 0;
139}
140
141void touch_nmi_watchdog(void)
142{
143 touch_watchdog();
144}
145EXPORT_SYMBOL(touch_nmi_watchdog);
146
147void touch_all_nmi_watchdog(void)
148{
149 touch_all_watchdog();
150}
151
152void touch_softlockup_watchdog(void)
153{
154 touch_watchdog();
155}
156
157void touch_all_softlockup_watchdogs(void)
158{
159 touch_all_watchdog();
160}
161
162void touch_softlockup_watchdog_sync(void)
163{
164 __raw_get_cpu_var(softlockup_touch_sync) = true;
165 __raw_get_cpu_var(watchdog_touch_ts) = 0;
166}
167
168void softlockup_tick(void)
169{
170}
171
172#ifdef CONFIG_PERF_EVENTS_NMI
173/* watchdog detector functions */
174static int is_hardlockup(int cpu)
175{
176 unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
177
178 if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
179 return 1;
180
181 per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
182 return 0;
183}
184#endif
185
186static int is_softlockup(unsigned long touch_ts, int cpu)
187{
188 unsigned long now = get_timestamp(cpu);
189
190 /* Warn about unreasonable delays: */
191 if (time_after(now, touch_ts + softlockup_thresh))
192 return now - touch_ts;
193
194 return 0;
195}
196
197static int
198watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
199{
200 did_panic = 1;
201
202 return NOTIFY_DONE;
203}
204
205static struct notifier_block panic_block = {
206 .notifier_call = watchdog_panic,
207};
208
209#ifdef CONFIG_PERF_EVENTS_NMI
210static struct perf_event_attr wd_hw_attr = {
211 .type = PERF_TYPE_HARDWARE,
212 .config = PERF_COUNT_HW_CPU_CYCLES,
213 .size = sizeof(struct perf_event_attr),
214 .pinned = 1,
215 .disabled = 1,
216};
217
218/* Callback function for perf event subsystem */
219void watchdog_overflow_callback(struct perf_event *event, int nmi,
220 struct perf_sample_data *data,
221 struct pt_regs *regs)
222{
223 int this_cpu = smp_processor_id();
224 unsigned long touch_ts = per_cpu(watchdog_touch_ts, this_cpu);
225
226 if (touch_ts == 0) {
227 __touch_watchdog();
228 return;
229 }
230
231 /* check for a hardlockup
232 * This is done by making sure our timer interrupt
233 * is incrementing. The timer interrupt should have
234 * fired multiple times before we overflow'd. If it hasn't
235 * then this is a good indication the cpu is stuck
236 */
237 if (is_hardlockup(this_cpu)) {
238 /* only print hardlockups once */
239 if (__get_cpu_var(hard_watchdog_warn) == true)
240 return;
241
242 if (hardlockup_panic)
243 panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
244 else
245 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
246
247 __get_cpu_var(hard_watchdog_warn) = true;
248 return;
249 }
250
251 __get_cpu_var(hard_watchdog_warn) = false;
252 return;
253}
254static void watchdog_interrupt_count(void)
255{
256 __get_cpu_var(hrtimer_interrupts)++;
257}
258#else
259static inline void watchdog_interrupt_count(void) { return; }
260#endif /* CONFIG_PERF_EVENTS_NMI */
261
262/* watchdog kicker functions */
263static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
264{
265 int this_cpu = smp_processor_id();
266 unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
267 struct pt_regs *regs = get_irq_regs();
268 int duration;
269
270 /* kick the hardlockup detector */
271 watchdog_interrupt_count();
272
273 /* kick the softlockup detector */
274 wake_up_process(__get_cpu_var(softlockup_watchdog));
275
276 /* .. and repeat */
277 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
278
279 if (touch_ts == 0) {
280 if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) {
281 /*
282 * If the time stamp was touched atomically
283 * make sure the scheduler tick is up to date.
284 */
285 per_cpu(softlockup_touch_sync, this_cpu) = false;
286 sched_clock_tick();
287 }
288 __touch_watchdog();
289 return HRTIMER_RESTART;
290 }
291
292 /* check for a softlockup
293 * This is done by making sure a high priority task is
294 * being scheduled. The task touches the watchdog to
295 * indicate it is getting cpu time. If it hasn't then
296 * this is a good indication some task is hogging the cpu
297 */
298 duration = is_softlockup(touch_ts, this_cpu);
299 if (unlikely(duration)) {
300 /* only warn once */
301 if (__get_cpu_var(soft_watchdog_warn) == true)
302 return HRTIMER_RESTART;
303
304 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
305 this_cpu, duration,
306 current->comm, task_pid_nr(current));
307 print_modules();
308 print_irqtrace_events(current);
309 if (regs)
310 show_regs(regs);
311 else
312 dump_stack();
313
314 if (softlockup_panic)
315 panic("softlockup: hung tasks");
316 __get_cpu_var(soft_watchdog_warn) = true;
317 } else
318 __get_cpu_var(soft_watchdog_warn) = false;
319
320 return HRTIMER_RESTART;
321}
322
323
324/*
325 * The watchdog thread - touches the timestamp.
326 */
327static int watchdog(void *__bind_cpu)
328{
329 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
330 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu);
331
332 sched_setscheduler(current, SCHED_FIFO, &param);
333
334 /* initialize timestamp */
335 __touch_watchdog();
336
337 /* kick off the timer for the hardlockup detector */
338 /* done here because hrtimer_start can only pin to smp_processor_id() */
339 hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
340 HRTIMER_MODE_REL_PINNED);
341
342 set_current_state(TASK_INTERRUPTIBLE);
343 /*
344 * Run briefly once per second to reset the softlockup timestamp.
345 * If this gets delayed for more than 60 seconds then the
346 * debug-printout triggers in softlockup_tick().
347 */
348 while (!kthread_should_stop()) {
349 __touch_watchdog();
350 schedule();
351
352 if (kthread_should_stop())
353 break;
354
355 set_current_state(TASK_INTERRUPTIBLE);
356 }
357 __set_current_state(TASK_RUNNING);
358
359 return 0;
360}
361
362
363#ifdef CONFIG_PERF_EVENTS_NMI
364static int watchdog_nmi_enable(int cpu)
365{
366 struct perf_event_attr *wd_attr;
367 struct perf_event *event = per_cpu(watchdog_ev, cpu);
368
369 /* is it already setup and enabled? */
370 if (event && event->state > PERF_EVENT_STATE_OFF)
371 goto out;
372
373 /* it is setup but not enabled */
374 if (event != NULL)
375 goto out_enable;
376
377 /* Try to register using hardware perf events */
378 wd_attr = &wd_hw_attr;
379 wd_attr->sample_period = hw_nmi_get_sample_period();
380 event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
381 if (!IS_ERR(event)) {
382 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
383 goto out_save;
384 }
385
386 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
387 return -1;
388
389 /* success path */
390out_save:
391 per_cpu(watchdog_ev, cpu) = event;
392out_enable:
393 perf_event_enable(per_cpu(watchdog_ev, cpu));
394out:
395 return 0;
396}
397
398static void watchdog_nmi_disable(int cpu)
399{
400 struct perf_event *event = per_cpu(watchdog_ev, cpu);
401
402 if (event) {
403 perf_event_disable(event);
404 per_cpu(watchdog_ev, cpu) = NULL;
405
406 /* should be in cleanup, but blocks oprofile */
407 perf_event_release_kernel(event);
408 }
409 return;
410}
411#else
412static int watchdog_nmi_enable(int cpu) { return 0; }
413static void watchdog_nmi_disable(int cpu) { return; }
414#endif /* CONFIG_PERF_EVENTS_NMI */
415
416/* prepare/enable/disable routines */
417static int watchdog_prepare_cpu(int cpu)
418{
419 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
420
421 WARN_ON(per_cpu(softlockup_watchdog, cpu));
422 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
423 hrtimer->function = watchdog_timer_fn;
424
425 return 0;
426}
427
428static int watchdog_enable(int cpu)
429{
430 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
431
432 /* enable the perf event */
433 if (watchdog_nmi_enable(cpu) != 0)
434 return -1;
435
436 /* create the watchdog thread */
437 if (!p) {
438 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
439 if (IS_ERR(p)) {
440 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
441 return -1;
442 }
443 kthread_bind(p, cpu);
444 per_cpu(watchdog_touch_ts, cpu) = 0;
445 per_cpu(softlockup_watchdog, cpu) = p;
446 wake_up_process(p);
447 }
448
449 return 0;
450}
451
452static void watchdog_disable(int cpu)
453{
454 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
455 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
456
457 /*
458 * cancel the timer first to stop incrementing the stats
459 * and waking up the kthread
460 */
461 hrtimer_cancel(hrtimer);
462
463 /* disable the perf event */
464 watchdog_nmi_disable(cpu);
465
466 /* stop the watchdog thread */
467 if (p) {
468 per_cpu(softlockup_watchdog, cpu) = NULL;
469 kthread_stop(p);
470 }
471
472 /* if any cpu succeeds, watchdog is considered enabled for the system */
473 watchdog_enabled = 1;
474}
475
476static void watchdog_enable_all_cpus(void)
477{
478 int cpu;
479 int result;
480
481 for_each_online_cpu(cpu)
482 result += watchdog_enable(cpu);
483
484 if (result)
485 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
486
487}
488
489static void watchdog_disable_all_cpus(void)
490{
491 int cpu;
492
493 for_each_online_cpu(cpu)
494 watchdog_disable(cpu);
495
496 /* if all watchdogs are disabled, then they are disabled for the system */
497 watchdog_enabled = 0;
498}
499
500
501/* sysctl functions */
502#ifdef CONFIG_SYSCTL
503/*
504 * proc handler for /proc/sys/kernel/nmi_watchdog
505 */
506
507int proc_dowatchdog_enabled(struct ctl_table *table, int write,
508 void __user *buffer, size_t *length, loff_t *ppos)
509{
510 proc_dointvec(table, write, buffer, length, ppos);
511
512 if (watchdog_enabled)
513 watchdog_enable_all_cpus();
514 else
515 watchdog_disable_all_cpus();
516 return 0;
517}
518
519int proc_dowatchdog_thresh(struct ctl_table *table, int write,
520 void __user *buffer,
521 size_t *lenp, loff_t *ppos)
522{
523 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
524}
525
526/* stub functions */
527int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
528 void __user *buffer,
529 size_t *lenp, loff_t *ppos)
530{
531 return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos);
532}
533/* end of stub functions */
534#endif /* CONFIG_SYSCTL */
535
536
537/*
538 * Create/destroy watchdog threads as CPUs come and go:
539 */
540static int __cpuinit
541cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
542{
543 int hotcpu = (unsigned long)hcpu;
544
545 switch (action) {
546 case CPU_UP_PREPARE:
547 case CPU_UP_PREPARE_FROZEN:
548 if (watchdog_prepare_cpu(hotcpu))
549 return NOTIFY_BAD;
550 break;
551 case CPU_ONLINE:
552 case CPU_ONLINE_FROZEN:
553 if (watchdog_enable(hotcpu))
554 return NOTIFY_BAD;
555 break;
556#ifdef CONFIG_HOTPLUG_CPU
557 case CPU_UP_CANCELED:
558 case CPU_UP_CANCELED_FROZEN:
559 watchdog_disable(hotcpu);
560 break;
561 case CPU_DEAD:
562 case CPU_DEAD_FROZEN:
563 watchdog_disable(hotcpu);
564 break;
565#endif /* CONFIG_HOTPLUG_CPU */
566 }
567 return NOTIFY_OK;
568}
569
570static struct notifier_block __cpuinitdata cpu_nfb = {
571 .notifier_call = cpu_callback
572};
573
574static int __init spawn_watchdog_task(void)
575{
576 void *cpu = (void *)(long)smp_processor_id();
577 int err;
578
579 if (no_watchdog)
580 return 0;
581
582 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
583 WARN_ON(err == NOTIFY_BAD);
584
585 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
586 register_cpu_notifier(&cpu_nfb);
587
588 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
589
590 return 0;
591}
592early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 220ae6063b6f..49e285dcaf57 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -153,7 +153,7 @@ config DEBUG_SHIRQ
153 points; some don't and need to be caught. 153 points; some don't and need to be caught.
154 154
155config DETECT_SOFTLOCKUP 155config DETECT_SOFTLOCKUP
156 bool "Detect Soft Lockups" 156 bool
157 depends on DEBUG_KERNEL && !S390 157 depends on DEBUG_KERNEL && !S390
158 default y 158 default y
159 help 159 help
@@ -171,17 +171,27 @@ config DETECT_SOFTLOCKUP
171 can be detected via the NMI-watchdog, on platforms that 171 can be detected via the NMI-watchdog, on platforms that
172 support it.) 172 support it.)
173 173
174config NMI_WATCHDOG 174config LOCKUP_DETECTOR
175 bool "Detect Hard Lockups with an NMI Watchdog" 175 bool "Detect Hard and Soft Lockups"
176 depends on DEBUG_KERNEL && PERF_EVENTS && PERF_EVENTS_NMI 176 depends on DEBUG_KERNEL
177 default DETECT_SOFTLOCKUP
177 help 178 help
178 Say Y here to enable the kernel to use the NMI as a watchdog 179 Say Y here to enable the kernel to act as a watchdog to detect
179 to detect hard lockups. This is useful when a cpu hangs for no 180 hard and soft lockups.
180 reason but can still respond to NMIs. A backtrace is displayed 181
181 for reviewing and reporting. 182 Softlockups are bugs that cause the kernel to loop in kernel
183 mode for more than 60 seconds, without giving other tasks a
184 chance to run. The current stack trace is displayed upon
185 detection and the system will stay locked up.
186
187 Hardlockups are bugs that cause the CPU to loop in kernel mode
188 for more than 60 seconds, without letting other interrupts have a
189 chance to run. The current stack trace is displayed upon detection
190 and the system will stay locked up.
182 191
183 The overhead should be minimal, just an extra NMI every few 192 The overhead should be minimal. A periodic hrtimer runs to
184 seconds. 193 generate interrupts and kick the watchdog task every 10-12 seconds.
194 An NMI is generated every 60 seconds or so to check for hardlockups.
185 195
186config BOOTPARAM_SOFTLOCKUP_PANIC 196config BOOTPARAM_SOFTLOCKUP_PANIC
187 bool "Panic (Reboot) On Soft Lockups" 197 bool "Panic (Reboot) On Soft Lockups"