aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-08-05 02:45:05 -0400
committerIngo Molnar <mingo@elte.hu>2010-08-05 02:45:05 -0400
commit61be7fdec2f51b99570cd5dcc30c7848c8e56513 (patch)
tree4a73ee635bc3e35dc54f75caddd26ffb6238bb5c
parent12a81c8df13c60904febcafcf6b90ca1acb67122 (diff)
parenteb703f98191a505f78d0066712ad67d5dedc4c90 (diff)
Merge branch 'perf/nmi' into perf/core
Conflicts: kernel/Makefile Merge reason: Add the now complete topic, fix the conflict. Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/kernel/apic/Makefile7
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c107
-rw-r--r--arch/x86/kernel/apic/nmi.c7
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--include/linux/nmi.h13
-rw-r--r--include/linux/sched.h12
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/softlockup.c293
-rw-r--r--kernel/sysctl.c55
-rw-r--r--kernel/timer.c1
-rw-r--r--kernel/watchdog.c567
-rw-r--r--lib/Kconfig.debug35
16 files changed, 770 insertions, 348 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2b2407d9a6d0..c45a3548537a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1812,6 +1812,8 @@ and is between 256 and 4096 characters. It is defined in the file
1812 1812
1813 nousb [USB] Disable the USB subsystem 1813 nousb [USB] Disable the USB subsystem
1814 1814
1815 nowatchdog [KNL] Disable the lockup detector.
1816
1815 nowb [ARM] 1817 nowb [ARM]
1816 1818
1817 nox2apic [X86-64,APIC] Do not enable x2APIC mode. 1819 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
diff --git a/arch/Kconfig b/arch/Kconfig
index acda512da2e2..4877a8c8ee16 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -151,4 +151,11 @@ config HAVE_MIXED_BREAKPOINTS_REGS
151config HAVE_USER_RETURN_NOTIFIER 151config HAVE_USER_RETURN_NOTIFIER
152 bool 152 bool
153 153
154config HAVE_PERF_EVENTS_NMI
155 bool
156 help
157 System hardware can generate an NMI using the perf event
158 subsystem. Also has support for calculating CPU cycle events
159 to determine how many clock cycles in a given period.
160
154source "kernel/gcov/Kconfig" 161source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dcb0593b4a66..6f77afa6bca9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,7 @@ config X86
55 select HAVE_HW_BREAKPOINT 55 select HAVE_HW_BREAKPOINT
56 select HAVE_MIXED_BREAKPOINTS_REGS 56 select HAVE_MIXED_BREAKPOINTS_REGS
57 select PERF_EVENTS 57 select PERF_EVENTS
58 select HAVE_PERF_EVENTS_NMI
58 select ANON_INODES 59 select ANON_INODES
59 select HAVE_ARCH_KMEMCHECK 60 select HAVE_ARCH_KMEMCHECK
60 select HAVE_USER_RETURN_NOTIFIER 61 select HAVE_USER_RETURN_NOTIFIER
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 93da9c3f3341..932f0f86b4b7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
17 17
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void); 19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_LOCKUP_DETECTOR)
20extern int nmi_watchdog_enabled; 21extern int nmi_watchdog_enabled;
22#endif
21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
22extern int reserve_perfctr_nmi(unsigned int); 24extern int reserve_perfctr_nmi(unsigned int);
23extern void release_perfctr_nmi(unsigned int); 25extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 565c1bfc507d..910f20b457c4 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,12 @@
2# Makefile for local APIC drivers and for the IO-APIC code 2# Makefile for local APIC drivers and for the IO-APIC code
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10
6obj-$(CONFIG_X86_IO_APIC) += io_apic.o 11obj-$(CONFIG_X86_IO_APIC) += io_apic.o
7obj-$(CONFIG_SMP) += ipi.o 12obj-$(CONFIG_SMP) += ipi.o
8 13
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
new file mode 100644
index 000000000000..cefd6942f0e9
--- /dev/null
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -0,0 +1,107 @@
1/*
2 * HW NMI watchdog support
3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 *
6 * Arch specific calls to support NMI watchdog
7 *
8 * Bits copied from original nmi.c file
9 *
10 */
11#include <asm/apic.h>
12
13#include <linux/cpumask.h>
14#include <linux/kdebug.h>
15#include <linux/notifier.h>
16#include <linux/kprobes.h>
17#include <linux/nmi.h>
18#include <linux/module.h>
19
20/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22
23u64 hw_nmi_get_sample_period(void)
24{
25 return (u64)(cpu_khz) * 1000 * 60;
26}
27
28#ifdef ARCH_HAS_NMI_WATCHDOG
29void arch_trigger_all_cpu_backtrace(void)
30{
31 int i;
32
33 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
34
35 printk(KERN_INFO "sending NMI to all CPUs:\n");
36 apic->send_IPI_all(NMI_VECTOR);
37
38 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
39 for (i = 0; i < 10 * 1000; i++) {
40 if (cpumask_empty(to_cpumask(backtrace_mask)))
41 break;
42 mdelay(1);
43 }
44}
45
46static int __kprobes
47arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
48 unsigned long cmd, void *__args)
49{
50 struct die_args *args = __args;
51 struct pt_regs *regs;
52 int cpu = smp_processor_id();
53
54 switch (cmd) {
55 case DIE_NMI:
56 case DIE_NMI_IPI:
57 break;
58
59 default:
60 return NOTIFY_DONE;
61 }
62
63 regs = args->regs;
64
65 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
66 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
67
68 arch_spin_lock(&lock);
69 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
70 show_regs(regs);
71 dump_stack();
72 arch_spin_unlock(&lock);
73 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
74 return NOTIFY_STOP;
75 }
76
77 return NOTIFY_DONE;
78}
79
80static __read_mostly struct notifier_block backtrace_notifier = {
81 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
82 .next = NULL,
83 .priority = 1
84};
85
86static int __init register_trigger_all_cpu_backtrace(void)
87{
88 register_die_notifier(&backtrace_notifier);
89 return 0;
90}
91early_initcall(register_trigger_all_cpu_backtrace);
92#endif
93
94/* STUB calls to mimic old nmi_watchdog behaviour */
95#if defined(CONFIG_X86_LOCAL_APIC)
96unsigned int nmi_watchdog = NMI_NONE;
97EXPORT_SYMBOL(nmi_watchdog);
98void acpi_nmi_enable(void) { return; }
99void acpi_nmi_disable(void) { return; }
100#endif
101atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
102EXPORT_SYMBOL(nmi_active);
103int unknown_nmi_panic;
104void cpu_nmi_set_wd_enabled(void) { return; }
105void stop_apic_nmi_watchdog(void *unused) { return; }
106void setup_apic_nmi_watchdog(void *unused) { return; }
107int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 1edaf15c0b8e..a43f71cb30f8 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
401 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
402 int rc = 0; 402 int rc = 0;
403 403
404 /* check for other users first */
405 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
406 == NOTIFY_STOP) {
407 rc = 1;
408 touched = 1;
409 }
410
411 sum = get_timer_irqs(cpu); 404 sum = get_timer_irqs(cpu);
412 405
413 if (__get_cpu_var(nmi_touch)) { 406 if (__get_cpu_var(nmi_touch)) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 725ef4d17cd5..60788dee0f8a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
392 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 392 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
393 == NOTIFY_STOP) 393 == NOTIFY_STOP)
394 return; 394 return;
395
395#ifdef CONFIG_X86_LOCAL_APIC 396#ifdef CONFIG_X86_LOCAL_APIC
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
398 == NOTIFY_STOP)
399 return;
400
401#ifndef CONFIG_LOCKUP_DETECTOR
396 /* 402 /*
397 * Ok, so this is none of the documented NMI sources, 403 * Ok, so this is none of the documented NMI sources,
398 * so it must be the NMI watchdog. 404 * so it must be the NMI watchdog.
@@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
400 if (nmi_watchdog_tick(regs, reason)) 406 if (nmi_watchdog_tick(regs, reason))
401 return; 407 return;
402 if (!do_nmi_callback(regs, cpu)) 408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
403 unknown_nmi_error(reason, regs); 410 unknown_nmi_error(reason, regs);
404#else 411#else
405 unknown_nmi_error(reason, regs); 412 unknown_nmi_error(reason, regs);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b752e807adde..06aab5eee134 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void);
20extern void acpi_nmi_disable(void); 20extern void acpi_nmi_disable(void);
21extern void acpi_nmi_enable(void); 21extern void acpi_nmi_enable(void);
22#else 22#else
23#ifndef CONFIG_HARDLOCKUP_DETECTOR
23static inline void touch_nmi_watchdog(void) 24static inline void touch_nmi_watchdog(void)
24{ 25{
25 touch_softlockup_watchdog(); 26 touch_softlockup_watchdog();
26} 27}
28#else
29extern void touch_nmi_watchdog(void);
30#endif
27static inline void acpi_nmi_disable(void) { } 31static inline void acpi_nmi_disable(void) { }
28static inline void acpi_nmi_enable(void) { } 32static inline void acpi_nmi_enable(void) { }
29#endif 33#endif
@@ -47,4 +51,13 @@ static inline bool trigger_all_cpu_backtrace(void)
47} 51}
48#endif 52#endif
49 53
54#ifdef CONFIG_LOCKUP_DETECTOR
55int hw_nmi_is_cpu_stuck(struct pt_regs *);
56u64 hw_nmi_get_sample_period(void);
57extern int watchdog_enabled;
58struct ctl_table;
59extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
60 void __user *, size_t *, loff_t *);
61#endif
62
50#endif 63#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7b6ec63cb74f..3992f50de614 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -316,20 +316,16 @@ extern void scheduler_tick(void);
316 316
317extern void sched_show_task(struct task_struct *p); 317extern void sched_show_task(struct task_struct *p);
318 318
319#ifdef CONFIG_DETECT_SOFTLOCKUP 319#ifdef CONFIG_LOCKUP_DETECTOR
320extern void softlockup_tick(void);
321extern void touch_softlockup_watchdog(void); 320extern void touch_softlockup_watchdog(void);
322extern void touch_softlockup_watchdog_sync(void); 321extern void touch_softlockup_watchdog_sync(void);
323extern void touch_all_softlockup_watchdogs(void); 322extern void touch_all_softlockup_watchdogs(void);
324extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, 323extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
325 void __user *buffer, 324 void __user *buffer,
326 size_t *lenp, loff_t *ppos); 325 size_t *lenp, loff_t *ppos);
327extern unsigned int softlockup_panic; 326extern unsigned int softlockup_panic;
328extern int softlockup_thresh; 327extern int softlockup_thresh;
329#else 328#else
330static inline void softlockup_tick(void)
331{
332}
333static inline void touch_softlockup_watchdog(void) 329static inline void touch_softlockup_watchdog(void)
334{ 330{
335} 331}
diff --git a/kernel/Makefile b/kernel/Makefile
index 057472fbc272..ce53fb2bd1d9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,8 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
76obj-$(CONFIG_AUDIT_TREE) += audit_tree.o 76obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
77obj-$(CONFIG_KPROBES) += kprobes.o 77obj-$(CONFIG_KPROBES) += kprobes.o
78obj-$(CONFIG_KGDB) += debug/ 78obj-$(CONFIG_KGDB) += debug/
79obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
80obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o 79obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
80obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
81obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 81obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
82obj-$(CONFIG_SECCOMP) += seccomp.o 82obj-$(CONFIG_SECCOMP) += seccomp.o
83obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 83obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
deleted file mode 100644
index 4b493f67dcb5..000000000000
--- a/kernel/softlockup.c
+++ /dev/null
@@ -1,293 +0,0 @@
1/*
2 * Detect Soft Lockups
3 *
4 * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
5 *
6 * this code detects soft lockups: incidents in where on a CPU
7 * the kernel does not reschedule for 10 seconds or more.
8 */
9#include <linux/mm.h>
10#include <linux/cpu.h>
11#include <linux/nmi.h>
12#include <linux/init.h>
13#include <linux/delay.h>
14#include <linux/freezer.h>
15#include <linux/kthread.h>
16#include <linux/lockdep.h>
17#include <linux/notifier.h>
18#include <linux/module.h>
19#include <linux/sysctl.h>
20
21#include <asm/irq_regs.h>
22
23static DEFINE_SPINLOCK(print_lock);
24
25static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
26static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
27static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
28static DEFINE_PER_CPU(bool, softlock_touch_sync);
29
30static int __read_mostly did_panic;
31int __read_mostly softlockup_thresh = 60;
32
33/*
34 * Should we panic (and reboot, if panic_timeout= is set) when a
35 * soft-lockup occurs:
36 */
37unsigned int __read_mostly softlockup_panic =
38 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
39
40static int __init softlockup_panic_setup(char *str)
41{
42 softlockup_panic = simple_strtoul(str, NULL, 0);
43
44 return 1;
45}
46__setup("softlockup_panic=", softlockup_panic_setup);
47
48static int
49softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
50{
51 did_panic = 1;
52
53 return NOTIFY_DONE;
54}
55
56static struct notifier_block panic_block = {
57 .notifier_call = softlock_panic,
58};
59
60/*
61 * Returns seconds, approximately. We don't need nanosecond
62 * resolution, and we don't need to waste time with a big divide when
63 * 2^30ns == 1.074s.
64 */
65static unsigned long get_timestamp(int this_cpu)
66{
67 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
68}
69
70static void __touch_softlockup_watchdog(void)
71{
72 int this_cpu = raw_smp_processor_id();
73
74 __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
75}
76
77void touch_softlockup_watchdog(void)
78{
79 __raw_get_cpu_var(softlockup_touch_ts) = 0;
80}
81EXPORT_SYMBOL(touch_softlockup_watchdog);
82
83void touch_softlockup_watchdog_sync(void)
84{
85 __raw_get_cpu_var(softlock_touch_sync) = true;
86 __raw_get_cpu_var(softlockup_touch_ts) = 0;
87}
88
89void touch_all_softlockup_watchdogs(void)
90{
91 int cpu;
92
93 /* Cause each CPU to re-update its timestamp rather than complain */
94 for_each_online_cpu(cpu)
95 per_cpu(softlockup_touch_ts, cpu) = 0;
96}
97EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
98
99int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
100 void __user *buffer,
101 size_t *lenp, loff_t *ppos)
102{
103 touch_all_softlockup_watchdogs();
104 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
105}
106
107/*
108 * This callback runs from the timer interrupt, and checks
109 * whether the watchdog thread has hung or not:
110 */
111void softlockup_tick(void)
112{
113 int this_cpu = smp_processor_id();
114 unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
115 unsigned long print_ts;
116 struct pt_regs *regs = get_irq_regs();
117 unsigned long now;
118
119 /* Is detection switched off? */
120 if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
121 /* Be sure we don't false trigger if switched back on */
122 if (touch_ts)
123 per_cpu(softlockup_touch_ts, this_cpu) = 0;
124 return;
125 }
126
127 if (touch_ts == 0) {
128 if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
129 /*
130 * If the time stamp was touched atomically
131 * make sure the scheduler tick is up to date.
132 */
133 per_cpu(softlock_touch_sync, this_cpu) = false;
134 sched_clock_tick();
135 }
136 __touch_softlockup_watchdog();
137 return;
138 }
139
140 print_ts = per_cpu(softlockup_print_ts, this_cpu);
141
142 /* report at most once a second */
143 if (print_ts == touch_ts || did_panic)
144 return;
145
146 /* do not print during early bootup: */
147 if (unlikely(system_state != SYSTEM_RUNNING)) {
148 __touch_softlockup_watchdog();
149 return;
150 }
151
152 now = get_timestamp(this_cpu);
153
154 /*
155 * Wake up the high-prio watchdog task twice per
156 * threshold timespan.
157 */
158 if (time_after(now - softlockup_thresh/2, touch_ts))
159 wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
160
161 /* Warn about unreasonable delays: */
162 if (time_before_eq(now - softlockup_thresh, touch_ts))
163 return;
164
165 per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
166
167 spin_lock(&print_lock);
168 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
169 this_cpu, now - touch_ts,
170 current->comm, task_pid_nr(current));
171 print_modules();
172 print_irqtrace_events(current);
173 if (regs)
174 show_regs(regs);
175 else
176 dump_stack();
177 spin_unlock(&print_lock);
178
179 if (softlockup_panic)
180 panic("softlockup: hung tasks");
181}
182
183/*
184 * The watchdog thread - runs every second and touches the timestamp.
185 */
186static int watchdog(void *__bind_cpu)
187{
188 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
189
190 sched_setscheduler(current, SCHED_FIFO, &param);
191
192 /* initialize timestamp */
193 __touch_softlockup_watchdog();
194
195 set_current_state(TASK_INTERRUPTIBLE);
196 /*
197 * Run briefly once per second to reset the softlockup timestamp.
198 * If this gets delayed for more than 60 seconds then the
199 * debug-printout triggers in softlockup_tick().
200 */
201 while (!kthread_should_stop()) {
202 __touch_softlockup_watchdog();
203 schedule();
204
205 if (kthread_should_stop())
206 break;
207
208 set_current_state(TASK_INTERRUPTIBLE);
209 }
210 __set_current_state(TASK_RUNNING);
211
212 return 0;
213}
214
215/*
216 * Create/destroy watchdog threads as CPUs come and go:
217 */
218static int __cpuinit
219cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
220{
221 int hotcpu = (unsigned long)hcpu;
222 struct task_struct *p;
223
224 switch (action) {
225 case CPU_UP_PREPARE:
226 case CPU_UP_PREPARE_FROZEN:
227 BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
228 p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
229 if (IS_ERR(p)) {
230 printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
231 return NOTIFY_BAD;
232 }
233 per_cpu(softlockup_touch_ts, hotcpu) = 0;
234 per_cpu(softlockup_watchdog, hotcpu) = p;
235 kthread_bind(p, hotcpu);
236 break;
237 case CPU_ONLINE:
238 case CPU_ONLINE_FROZEN:
239 wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
240 break;
241#ifdef CONFIG_HOTPLUG_CPU
242 case CPU_UP_CANCELED:
243 case CPU_UP_CANCELED_FROZEN:
244 if (!per_cpu(softlockup_watchdog, hotcpu))
245 break;
246 /* Unbind so it can run. Fall thru. */
247 kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
248 cpumask_any(cpu_online_mask));
249 case CPU_DEAD:
250 case CPU_DEAD_FROZEN:
251 p = per_cpu(softlockup_watchdog, hotcpu);
252 per_cpu(softlockup_watchdog, hotcpu) = NULL;
253 kthread_stop(p);
254 break;
255#endif /* CONFIG_HOTPLUG_CPU */
256 }
257 return NOTIFY_OK;
258}
259
260static struct notifier_block __cpuinitdata cpu_nfb = {
261 .notifier_call = cpu_callback
262};
263
264static int __initdata nosoftlockup;
265
266static int __init nosoftlockup_setup(char *str)
267{
268 nosoftlockup = 1;
269 return 1;
270}
271__setup("nosoftlockup", nosoftlockup_setup);
272
273static int __init spawn_softlockup_task(void)
274{
275 void *cpu = (void *)(long)smp_processor_id();
276 int err;
277
278 if (nosoftlockup)
279 return 0;
280
281 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
282 if (err == NOTIFY_BAD) {
283 BUG();
284 return 1;
285 }
286 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
287 register_cpu_notifier(&cpu_nfb);
288
289 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
290
291 return 0;
292}
293early_initcall(spawn_softlockup_task);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d24f761f4876..6f79c7f81c96 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -76,6 +76,10 @@
76#include <scsi/sg.h> 76#include <scsi/sg.h>
77#endif 77#endif
78 78
79#ifdef CONFIG_LOCKUP_DETECTOR
80#include <linux/nmi.h>
81#endif
82
79 83
80#if defined(CONFIG_SYSCTL) 84#if defined(CONFIG_SYSCTL)
81 85
@@ -106,7 +110,7 @@ extern int blk_iopoll_enabled;
106#endif 110#endif
107 111
108/* Constants used for minimum and maximum */ 112/* Constants used for minimum and maximum */
109#ifdef CONFIG_DETECT_SOFTLOCKUP 113#ifdef CONFIG_LOCKUP_DETECTOR
110static int sixty = 60; 114static int sixty = 60;
111static int neg_one = -1; 115static int neg_one = -1;
112#endif 116#endif
@@ -710,7 +714,34 @@ static struct ctl_table kern_table[] = {
710 .mode = 0444, 714 .mode = 0444,
711 .proc_handler = proc_dointvec, 715 .proc_handler = proc_dointvec,
712 }, 716 },
713#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 717#if defined(CONFIG_LOCKUP_DETECTOR)
718 {
719 .procname = "watchdog",
720 .data = &watchdog_enabled,
721 .maxlen = sizeof (int),
722 .mode = 0644,
723 .proc_handler = proc_dowatchdog_enabled,
724 },
725 {
726 .procname = "watchdog_thresh",
727 .data = &softlockup_thresh,
728 .maxlen = sizeof(int),
729 .mode = 0644,
730 .proc_handler = proc_dowatchdog_thresh,
731 .extra1 = &neg_one,
732 .extra2 = &sixty,
733 },
734 {
735 .procname = "softlockup_panic",
736 .data = &softlockup_panic,
737 .maxlen = sizeof(int),
738 .mode = 0644,
739 .proc_handler = proc_dointvec_minmax,
740 .extra1 = &zero,
741 .extra2 = &one,
742 },
743#endif
744#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
714 { 745 {
715 .procname = "unknown_nmi_panic", 746 .procname = "unknown_nmi_panic",
716 .data = &unknown_nmi_panic, 747 .data = &unknown_nmi_panic,
@@ -813,26 +844,6 @@ static struct ctl_table kern_table[] = {
813 .proc_handler = proc_dointvec, 844 .proc_handler = proc_dointvec,
814 }, 845 },
815#endif 846#endif
816#ifdef CONFIG_DETECT_SOFTLOCKUP
817 {
818 .procname = "softlockup_panic",
819 .data = &softlockup_panic,
820 .maxlen = sizeof(int),
821 .mode = 0644,
822 .proc_handler = proc_dointvec_minmax,
823 .extra1 = &zero,
824 .extra2 = &one,
825 },
826 {
827 .procname = "softlockup_thresh",
828 .data = &softlockup_thresh,
829 .maxlen = sizeof(int),
830 .mode = 0644,
831 .proc_handler = proc_dosoftlockup_thresh,
832 .extra1 = &neg_one,
833 .extra2 = &sixty,
834 },
835#endif
836#ifdef CONFIG_DETECT_HUNG_TASK 847#ifdef CONFIG_DETECT_HUNG_TASK
837 { 848 {
838 .procname = "hung_task_panic", 849 .procname = "hung_task_panic",
diff --git a/kernel/timer.c b/kernel/timer.c
index ee305c8d4e18..c29e2d4d2a66 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1289,7 +1289,6 @@ void run_local_timers(void)
1289{ 1289{
1290 hrtimer_run_queues(); 1290 hrtimer_run_queues();
1291 raise_softirq(TIMER_SOFTIRQ); 1291 raise_softirq(TIMER_SOFTIRQ);
1292 softlockup_tick();
1293} 1292}
1294 1293
1295/* 1294/*
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
new file mode 100644
index 000000000000..613bc1f04610
--- /dev/null
+++ b/kernel/watchdog.c
@@ -0,0 +1,567 @@
1/*
2 * Detect hard and soft lockups on a system
3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 *
6 * this code detects hard lockups: incidents in where on a CPU
7 * the kernel does not respond to anything except NMI.
8 *
9 * Note: Most of this code is borrowed heavily from softlockup.c,
10 * so thanks to Ingo for the initial implementation.
11 * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
12 * to those contributors as well.
13 */
14
15#include <linux/mm.h>
16#include <linux/cpu.h>
17#include <linux/nmi.h>
18#include <linux/init.h>
19#include <linux/delay.h>
20#include <linux/freezer.h>
21#include <linux/kthread.h>
22#include <linux/lockdep.h>
23#include <linux/notifier.h>
24#include <linux/module.h>
25#include <linux/sysctl.h>
26
27#include <asm/irq_regs.h>
28#include <linux/perf_event.h>
29
30int watchdog_enabled;
31int __read_mostly softlockup_thresh = 60;
32
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
34static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
35static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
36static DEFINE_PER_CPU(bool, softlockup_touch_sync);
37static DEFINE_PER_CPU(bool, soft_watchdog_warn);
38#ifdef CONFIG_HARDLOCKUP_DETECTOR
39static DEFINE_PER_CPU(bool, hard_watchdog_warn);
40static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
41static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
42static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif
45
46static int __read_mostly did_panic;
47static int __initdata no_watchdog;
48
49
50/* boot commands */
51/*
52 * Should we panic when a soft-lockup or hard-lockup occurs:
53 */
54#ifdef CONFIG_HARDLOCKUP_DETECTOR
55static int hardlockup_panic;
56
57static int __init hardlockup_panic_setup(char *str)
58{
59 if (!strncmp(str, "panic", 5))
60 hardlockup_panic = 1;
61 return 1;
62}
63__setup("nmi_watchdog=", hardlockup_panic_setup);
64#endif
65
66unsigned int __read_mostly softlockup_panic =
67 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
68
69static int __init softlockup_panic_setup(char *str)
70{
71 softlockup_panic = simple_strtoul(str, NULL, 0);
72
73 return 1;
74}
75__setup("softlockup_panic=", softlockup_panic_setup);
76
77static int __init nowatchdog_setup(char *str)
78{
79 no_watchdog = 1;
80 return 1;
81}
82__setup("nowatchdog", nowatchdog_setup);
83
84/* deprecated */
85static int __init nosoftlockup_setup(char *str)
86{
87 no_watchdog = 1;
88 return 1;
89}
90__setup("nosoftlockup", nosoftlockup_setup);
91/* */
92
93
94/*
95 * Returns seconds, approximately. We don't need nanosecond
96 * resolution, and we don't need to waste time with a big divide when
97 * 2^30ns == 1.074s.
98 */
99static unsigned long get_timestamp(int this_cpu)
100{
101 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
102}
103
104static unsigned long get_sample_period(void)
105{
106 /*
107 * convert softlockup_thresh from seconds to ns
108 * the divide by 5 is to give hrtimer 5 chances to
109 * increment before the hardlockup detector generates
110 * a warning
111 */
112 return softlockup_thresh / 5 * NSEC_PER_SEC;
113}
114
115/* Commands for resetting the watchdog */
116static void __touch_watchdog(void)
117{
118 int this_cpu = smp_processor_id();
119
120 __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
121}
122
123void touch_softlockup_watchdog(void)
124{
125 __get_cpu_var(watchdog_touch_ts) = 0;
126}
127EXPORT_SYMBOL(touch_softlockup_watchdog);
128
129void touch_all_softlockup_watchdogs(void)
130{
131 int cpu;
132
133 /*
134 * this is done lockless
135 * do we care if a 0 races with a timestamp?
136 * all it means is the softlock check starts one cycle later
137 */
138 for_each_online_cpu(cpu)
139 per_cpu(watchdog_touch_ts, cpu) = 0;
140}
141
142#ifdef CONFIG_HARDLOCKUP_DETECTOR
143void touch_nmi_watchdog(void)
144{
145 __get_cpu_var(watchdog_nmi_touch) = true;
146 touch_softlockup_watchdog();
147}
148EXPORT_SYMBOL(touch_nmi_watchdog);
149
150#endif
151
152void touch_softlockup_watchdog_sync(void)
153{
154 __raw_get_cpu_var(softlockup_touch_sync) = true;
155 __raw_get_cpu_var(watchdog_touch_ts) = 0;
156}
157
158#ifdef CONFIG_HARDLOCKUP_DETECTOR
159/* watchdog detector functions */
160static int is_hardlockup(void)
161{
162 unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
163
164 if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
165 return 1;
166
167 __get_cpu_var(hrtimer_interrupts_saved) = hrint;
168 return 0;
169}
170#endif
171
172static int is_softlockup(unsigned long touch_ts)
173{
174 unsigned long now = get_timestamp(smp_processor_id());
175
176 /* Warn about unreasonable delays: */
177 if (time_after(now, touch_ts + softlockup_thresh))
178 return now - touch_ts;
179
180 return 0;
181}
182
183static int
184watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
185{
186 did_panic = 1;
187
188 return NOTIFY_DONE;
189}
190
191static struct notifier_block panic_block = {
192 .notifier_call = watchdog_panic,
193};
194
195#ifdef CONFIG_HARDLOCKUP_DETECTOR
196static struct perf_event_attr wd_hw_attr = {
197 .type = PERF_TYPE_HARDWARE,
198 .config = PERF_COUNT_HW_CPU_CYCLES,
199 .size = sizeof(struct perf_event_attr),
200 .pinned = 1,
201 .disabled = 1,
202};
203
204/* Callback function for perf event subsystem */
205void watchdog_overflow_callback(struct perf_event *event, int nmi,
206 struct perf_sample_data *data,
207 struct pt_regs *regs)
208{
209 if (__get_cpu_var(watchdog_nmi_touch) == true) {
210 __get_cpu_var(watchdog_nmi_touch) = false;
211 return;
212 }
213
214 /* check for a hardlockup
215 * This is done by making sure our timer interrupt
216 * is incrementing. The timer interrupt should have
217 * fired multiple times before we overflow'd. If it hasn't
218 * then this is a good indication the cpu is stuck
219 */
220 if (is_hardlockup()) {
221 int this_cpu = smp_processor_id();
222
223 /* only print hardlockups once */
224 if (__get_cpu_var(hard_watchdog_warn) == true)
225 return;
226
227 if (hardlockup_panic)
228 panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
229 else
230 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
231
232 __get_cpu_var(hard_watchdog_warn) = true;
233 return;
234 }
235
236 __get_cpu_var(hard_watchdog_warn) = false;
237 return;
238}
239static void watchdog_interrupt_count(void)
240{
241 __get_cpu_var(hrtimer_interrupts)++;
242}
243#else
244static inline void watchdog_interrupt_count(void) { return; }
245#endif /* CONFIG_HARDLOCKUP_DETECTOR */
246
247/* watchdog kicker functions */
248static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
249{
250 unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
251 struct pt_regs *regs = get_irq_regs();
252 int duration;
253
254 /* kick the hardlockup detector */
255 watchdog_interrupt_count();
256
257 /* kick the softlockup detector */
258 wake_up_process(__get_cpu_var(softlockup_watchdog));
259
260 /* .. and repeat */
261 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
262
263 if (touch_ts == 0) {
264 if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
265 /*
266 * If the time stamp was touched atomically
267 * make sure the scheduler tick is up to date.
268 */
269 __get_cpu_var(softlockup_touch_sync) = false;
270 sched_clock_tick();
271 }
272 __touch_watchdog();
273 return HRTIMER_RESTART;
274 }
275
276 /* check for a softlockup
277 * This is done by making sure a high priority task is
278 * being scheduled. The task touches the watchdog to
279 * indicate it is getting cpu time. If it hasn't then
280 * this is a good indication some task is hogging the cpu
281 */
282 duration = is_softlockup(touch_ts);
283 if (unlikely(duration)) {
284 /* only warn once */
285 if (__get_cpu_var(soft_watchdog_warn) == true)
286 return HRTIMER_RESTART;
287
288 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
289 smp_processor_id(), duration,
290 current->comm, task_pid_nr(current));
291 print_modules();
292 print_irqtrace_events(current);
293 if (regs)
294 show_regs(regs);
295 else
296 dump_stack();
297
298 if (softlockup_panic)
299 panic("softlockup: hung tasks");
300 __get_cpu_var(soft_watchdog_warn) = true;
301 } else
302 __get_cpu_var(soft_watchdog_warn) = false;
303
304 return HRTIMER_RESTART;
305}
306
307
308/*
309 * The watchdog thread - touches the timestamp.
310 */
311static int watchdog(void *unused)
312{
313 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
314 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
315
316 sched_setscheduler(current, SCHED_FIFO, &param);
317
318 /* initialize timestamp */
319 __touch_watchdog();
320
321 /* kick off the timer for the hardlockup detector */
322 /* done here because hrtimer_start can only pin to smp_processor_id() */
323 hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
324 HRTIMER_MODE_REL_PINNED);
325
326 set_current_state(TASK_INTERRUPTIBLE);
327 /*
328 * Run briefly once per second to reset the softlockup timestamp.
329 * If this gets delayed for more than 60 seconds then the
330 * debug-printout triggers in watchdog_timer_fn().
331 */
332 while (!kthread_should_stop()) {
333 __touch_watchdog();
334 schedule();
335
336 if (kthread_should_stop())
337 break;
338
339 set_current_state(TASK_INTERRUPTIBLE);
340 }
341 __set_current_state(TASK_RUNNING);
342
343 return 0;
344}
345
346
347#ifdef CONFIG_HARDLOCKUP_DETECTOR
348static int watchdog_nmi_enable(int cpu)
349{
350 struct perf_event_attr *wd_attr;
351 struct perf_event *event = per_cpu(watchdog_ev, cpu);
352
353 /* is it already setup and enabled? */
354 if (event && event->state > PERF_EVENT_STATE_OFF)
355 goto out;
356
357 /* it is setup but not enabled */
358 if (event != NULL)
359 goto out_enable;
360
361 /* Try to register using hardware perf events */
362 wd_attr = &wd_hw_attr;
363 wd_attr->sample_period = hw_nmi_get_sample_period();
364 event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
365 if (!IS_ERR(event)) {
366 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
367 goto out_save;
368 }
369
370 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
371 return -1;
372
373 /* success path */
374out_save:
375 per_cpu(watchdog_ev, cpu) = event;
376out_enable:
377 perf_event_enable(per_cpu(watchdog_ev, cpu));
378out:
379 return 0;
380}
381
382static void watchdog_nmi_disable(int cpu)
383{
384 struct perf_event *event = per_cpu(watchdog_ev, cpu);
385
386 if (event) {
387 perf_event_disable(event);
388 per_cpu(watchdog_ev, cpu) = NULL;
389
390 /* should be in cleanup, but blocks oprofile */
391 perf_event_release_kernel(event);
392 }
393 return;
394}
395#else
396static int watchdog_nmi_enable(int cpu) { return 0; }
397static void watchdog_nmi_disable(int cpu) { return; }
398#endif /* CONFIG_HARDLOCKUP_DETECTOR */
399
400/* prepare/enable/disable routines */
401static int watchdog_prepare_cpu(int cpu)
402{
403 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
404
405 WARN_ON(per_cpu(softlockup_watchdog, cpu));
406 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
407 hrtimer->function = watchdog_timer_fn;
408
409 return 0;
410}
411
412static int watchdog_enable(int cpu)
413{
414 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
415
416 /* enable the perf event */
417 if (watchdog_nmi_enable(cpu) != 0)
418 return -1;
419
420 /* create the watchdog thread */
421 if (!p) {
422 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
423 if (IS_ERR(p)) {
424 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
425 return -1;
426 }
427 kthread_bind(p, cpu);
428 per_cpu(watchdog_touch_ts, cpu) = 0;
429 per_cpu(softlockup_watchdog, cpu) = p;
430 wake_up_process(p);
431 }
432
433 return 0;
434}
435
436static void watchdog_disable(int cpu)
437{
438 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
439 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
440
441 /*
442 * cancel the timer first to stop incrementing the stats
443 * and waking up the kthread
444 */
445 hrtimer_cancel(hrtimer);
446
447 /* disable the perf event */
448 watchdog_nmi_disable(cpu);
449
450 /* stop the watchdog thread */
451 if (p) {
452 per_cpu(softlockup_watchdog, cpu) = NULL;
453 kthread_stop(p);
454 }
455
456 /* if any cpu succeeds, watchdog is considered enabled for the system */
457 watchdog_enabled = 1;
458}
459
460static void watchdog_enable_all_cpus(void)
461{
462 int cpu;
463 int result = 0;
464
465 for_each_online_cpu(cpu)
466 result += watchdog_enable(cpu);
467
468 if (result)
469 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
470
471}
472
473static void watchdog_disable_all_cpus(void)
474{
475 int cpu;
476
477 for_each_online_cpu(cpu)
478 watchdog_disable(cpu);
479
480 /* if all watchdogs are disabled, then they are disabled for the system */
481 watchdog_enabled = 0;
482}
483
484
485/* sysctl functions */
486#ifdef CONFIG_SYSCTL
487/*
488 * proc handler for /proc/sys/kernel/nmi_watchdog
489 */
490
491int proc_dowatchdog_enabled(struct ctl_table *table, int write,
492 void __user *buffer, size_t *length, loff_t *ppos)
493{
494 proc_dointvec(table, write, buffer, length, ppos);
495
496 if (watchdog_enabled)
497 watchdog_enable_all_cpus();
498 else
499 watchdog_disable_all_cpus();
500 return 0;
501}
502
503int proc_dowatchdog_thresh(struct ctl_table *table, int write,
504 void __user *buffer,
505 size_t *lenp, loff_t *ppos)
506{
507 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
508}
509#endif /* CONFIG_SYSCTL */
510
511
512/*
513 * Create/destroy watchdog threads as CPUs come and go:
514 */
515static int __cpuinit
516cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
517{
518 int hotcpu = (unsigned long)hcpu;
519
520 switch (action) {
521 case CPU_UP_PREPARE:
522 case CPU_UP_PREPARE_FROZEN:
523 if (watchdog_prepare_cpu(hotcpu))
524 return NOTIFY_BAD;
525 break;
526 case CPU_ONLINE:
527 case CPU_ONLINE_FROZEN:
528 if (watchdog_enable(hotcpu))
529 return NOTIFY_BAD;
530 break;
531#ifdef CONFIG_HOTPLUG_CPU
532 case CPU_UP_CANCELED:
533 case CPU_UP_CANCELED_FROZEN:
534 watchdog_disable(hotcpu);
535 break;
536 case CPU_DEAD:
537 case CPU_DEAD_FROZEN:
538 watchdog_disable(hotcpu);
539 break;
540#endif /* CONFIG_HOTPLUG_CPU */
541 }
542 return NOTIFY_OK;
543}
544
545static struct notifier_block __cpuinitdata cpu_nfb = {
546 .notifier_call = cpu_callback
547};
548
549static int __init spawn_watchdog_task(void)
550{
551 void *cpu = (void *)(long)smp_processor_id();
552 int err;
553
554 if (no_watchdog)
555 return 0;
556
557 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
558 WARN_ON(err == NOTIFY_BAD);
559
560 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
561 register_cpu_notifier(&cpu_nfb);
562
563 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
564
565 return 0;
566}
567early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e722e9d62221..e2cd7fbf31c0 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -152,28 +152,33 @@ config DEBUG_SHIRQ
152 Drivers ought to be able to handle interrupts coming in at those 152 Drivers ought to be able to handle interrupts coming in at those
153 points; some don't and need to be caught. 153 points; some don't and need to be caught.
154 154
155config DETECT_SOFTLOCKUP 155config LOCKUP_DETECTOR
156 bool "Detect Soft Lockups" 156 bool "Detect Hard and Soft Lockups"
157 depends on DEBUG_KERNEL && !S390 157 depends on DEBUG_KERNEL && !S390
158 default y
159 help 158 help
160 Say Y here to enable the kernel to detect "soft lockups", 159 Say Y here to enable the kernel to act as a watchdog to detect
161 which are bugs that cause the kernel to loop in kernel 160 hard and soft lockups.
161
162 Softlockups are bugs that cause the kernel to loop in kernel
162 mode for more than 60 seconds, without giving other tasks a 163 mode for more than 60 seconds, without giving other tasks a
163 chance to run. 164 chance to run. The current stack trace is displayed upon
165 detection and the system will stay locked up.
164 166
165 When a soft-lockup is detected, the kernel will print the 167 Hardlockups are bugs that cause the CPU to loop in kernel mode
166 current stack trace (which you should report), but the 168 for more than 60 seconds, without letting other interrupts have a
167 system will stay locked up. This feature has negligible 169 chance to run. The current stack trace is displayed upon detection
168 overhead. 170 and the system will stay locked up.
171
172 The overhead should be minimal. A periodic hrtimer runs to
173 generate interrupts and kick the watchdog task every 10-12 seconds.
174 An NMI is generated every 60 seconds or so to check for hardlockups.
169 175
170 (Note that "hard lockups" are separate type of bugs that 176config HARDLOCKUP_DETECTOR
171 can be detected via the NMI-watchdog, on platforms that 177 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
172 support it.)
173 178
174config BOOTPARAM_SOFTLOCKUP_PANIC 179config BOOTPARAM_SOFTLOCKUP_PANIC
175 bool "Panic (Reboot) On Soft Lockups" 180 bool "Panic (Reboot) On Soft Lockups"
176 depends on DETECT_SOFTLOCKUP 181 depends on LOCKUP_DETECTOR
177 help 182 help
178 Say Y here to enable the kernel to panic on "soft lockups", 183 Say Y here to enable the kernel to panic on "soft lockups",
179 which are bugs that cause the kernel to loop in kernel 184 which are bugs that cause the kernel to loop in kernel
@@ -190,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
190 195
191config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE 196config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
192 int 197 int
193 depends on DETECT_SOFTLOCKUP 198 depends on LOCKUP_DETECTOR
194 range 0 1 199 range 0 1
195 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC 200 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
196 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC 201 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC