diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-23 21:34:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-23 21:34:13 -0400 |
commit | d7b6de14a0ef8a376f9d57b867545b47302b7bfb (patch) | |
tree | 46904d68a5a68f22e6c6baf3472edd4c37a39481 | |
parent | 30d38542ec777468bb6a31829076a2dbc5690e35 (diff) | |
parent | 4dca10a96041f78bed11ce9e4a5cfde813ec4ccb (diff) |
Merge branch 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
softlockup: fix invalid proc_handler for softlockup_panic
softlockup: fix watchdog task wakeup frequency
softlockup: fix watchdog task wakeup frequency
softlockup: show irqtrace
softlockup: print a module list on being stuck
softlockup: fix NMI hangs due to lock race - 2.6.26-rc regression
softlockup: fix false positives on nohz if CPU is 100% idle for more than 60 seconds
softlockup: fix softlockup_thresh fix
softlockup: fix softlockup_thresh unaligned access and disable detection at runtime
softlockup: allow panic on lockup
-rw-r--r-- | Documentation/kernel-parameters.txt | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/softlockup.c | 45 | ||||
-rw-r--r-- | kernel/sysctl.c | 20 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 4 | ||||
-rw-r--r-- | lib/Kconfig.debug | 26 |
6 files changed, 86 insertions, 15 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 30d44b78171a..47e7d8794fc6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2034,6 +2034,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2034 | 2034 | ||
2035 | snd-ymfpci= [HW,ALSA] | 2035 | snd-ymfpci= [HW,ALSA] |
2036 | 2036 | ||
2037 | softlockup_panic= | ||
2038 | [KNL] Should the soft-lockup detector generate panics. | ||
2039 | |||
2037 | sonypi.*= [HW] Sony Programmable I/O Control Device driver | 2040 | sonypi.*= [HW] Sony Programmable I/O Control Device driver |
2038 | See Documentation/sonypi.txt | 2041 | See Documentation/sonypi.txt |
2039 | 2042 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1941d8b5cf11..af443a08431f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -295,10 +295,11 @@ extern void softlockup_tick(void); | |||
295 | extern void spawn_softlockup_task(void); | 295 | extern void spawn_softlockup_task(void); |
296 | extern void touch_softlockup_watchdog(void); | 296 | extern void touch_softlockup_watchdog(void); |
297 | extern void touch_all_softlockup_watchdogs(void); | 297 | extern void touch_all_softlockup_watchdogs(void); |
298 | extern unsigned long softlockup_thresh; | 298 | extern unsigned int softlockup_panic; |
299 | extern unsigned long sysctl_hung_task_check_count; | 299 | extern unsigned long sysctl_hung_task_check_count; |
300 | extern unsigned long sysctl_hung_task_timeout_secs; | 300 | extern unsigned long sysctl_hung_task_timeout_secs; |
301 | extern unsigned long sysctl_hung_task_warnings; | 301 | extern unsigned long sysctl_hung_task_warnings; |
302 | extern int softlockup_thresh; | ||
302 | #else | 303 | #else |
303 | static inline void softlockup_tick(void) | 304 | static inline void softlockup_tick(void) |
304 | { | 305 | { |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index a272d78185eb..7bd8d1aadd5d 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/delay.h> | 13 | #include <linux/delay.h> |
14 | #include <linux/freezer.h> | 14 | #include <linux/freezer.h> |
15 | #include <linux/kthread.h> | 15 | #include <linux/kthread.h> |
16 | #include <linux/lockdep.h> | ||
16 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp); | |||
25 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); | 26 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); |
26 | 27 | ||
27 | static int __read_mostly did_panic; | 28 | static int __read_mostly did_panic; |
28 | unsigned long __read_mostly softlockup_thresh = 60; | 29 | int __read_mostly softlockup_thresh = 60; |
30 | |||
31 | /* | ||
32 | * Should we panic (and reboot, if panic_timeout= is set) when a | ||
33 | * soft-lockup occurs: | ||
34 | */ | ||
35 | unsigned int __read_mostly softlockup_panic = | ||
36 | CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; | ||
37 | |||
38 | static int __init softlockup_panic_setup(char *str) | ||
39 | { | ||
40 | softlockup_panic = simple_strtoul(str, NULL, 0); | ||
41 | |||
42 | return 1; | ||
43 | } | ||
44 | __setup("softlockup_panic=", softlockup_panic_setup); | ||
29 | 45 | ||
30 | static int | 46 | static int |
31 | softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) | 47 | softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) |
@@ -84,6 +100,14 @@ void softlockup_tick(void) | |||
84 | struct pt_regs *regs = get_irq_regs(); | 100 | struct pt_regs *regs = get_irq_regs(); |
85 | unsigned long now; | 101 | unsigned long now; |
86 | 102 | ||
103 | /* Is detection switched off? */ | ||
104 | if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) { | ||
105 | /* Be sure we don't false trigger if switched back on */ | ||
106 | if (touch_timestamp) | ||
107 | per_cpu(touch_timestamp, this_cpu) = 0; | ||
108 | return; | ||
109 | } | ||
110 | |||
87 | if (touch_timestamp == 0) { | 111 | if (touch_timestamp == 0) { |
88 | __touch_softlockup_watchdog(); | 112 | __touch_softlockup_watchdog(); |
89 | return; | 113 | return; |
@@ -92,11 +116,8 @@ void softlockup_tick(void) | |||
92 | print_timestamp = per_cpu(print_timestamp, this_cpu); | 116 | print_timestamp = per_cpu(print_timestamp, this_cpu); |
93 | 117 | ||
94 | /* report at most once a second */ | 118 | /* report at most once a second */ |
95 | if ((print_timestamp >= touch_timestamp && | 119 | if (print_timestamp == touch_timestamp || did_panic) |
96 | print_timestamp < (touch_timestamp + 1)) || | ||
97 | did_panic || !per_cpu(watchdog_task, this_cpu)) { | ||
98 | return; | 120 | return; |
99 | } | ||
100 | 121 | ||
101 | /* do not print during early bootup: */ | 122 | /* do not print during early bootup: */ |
102 | if (unlikely(system_state != SYSTEM_RUNNING)) { | 123 | if (unlikely(system_state != SYSTEM_RUNNING)) { |
@@ -106,8 +127,11 @@ void softlockup_tick(void) | |||
106 | 127 | ||
107 | now = get_timestamp(this_cpu); | 128 | now = get_timestamp(this_cpu); |
108 | 129 | ||
109 | /* Wake up the high-prio watchdog task every second: */ | 130 | /* |
110 | if (now > (touch_timestamp + 1)) | 131 | * Wake up the high-prio watchdog task twice per |
132 | * threshold timespan. | ||
133 | */ | ||
134 | if (now > touch_timestamp + softlockup_thresh/2) | ||
111 | wake_up_process(per_cpu(watchdog_task, this_cpu)); | 135 | wake_up_process(per_cpu(watchdog_task, this_cpu)); |
112 | 136 | ||
113 | /* Warn about unreasonable delays: */ | 137 | /* Warn about unreasonable delays: */ |
@@ -121,11 +145,15 @@ void softlockup_tick(void) | |||
121 | this_cpu, now - touch_timestamp, | 145 | this_cpu, now - touch_timestamp, |
122 | current->comm, task_pid_nr(current)); | 146 | current->comm, task_pid_nr(current)); |
123 | print_modules(); | 147 | print_modules(); |
148 | print_irqtrace_events(current); | ||
124 | if (regs) | 149 | if (regs) |
125 | show_regs(regs); | 150 | show_regs(regs); |
126 | else | 151 | else |
127 | dump_stack(); | 152 | dump_stack(); |
128 | spin_unlock(&print_lock); | 153 | spin_unlock(&print_lock); |
154 | |||
155 | if (softlockup_panic) | ||
156 | panic("softlockup: hung tasks"); | ||
129 | } | 157 | } |
130 | 158 | ||
131 | /* | 159 | /* |
@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now) | |||
178 | 206 | ||
179 | t->last_switch_timestamp = now; | 207 | t->last_switch_timestamp = now; |
180 | touch_nmi_watchdog(); | 208 | touch_nmi_watchdog(); |
209 | |||
210 | if (softlockup_panic) | ||
211 | panic("softlockup: blocked tasks"); | ||
181 | } | 212 | } |
182 | 213 | ||
183 | /* | 214 | /* |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b859e6b5a767..2a7b9d88706b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -88,12 +88,13 @@ extern int rcutorture_runnable; | |||
88 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 88 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
89 | 89 | ||
90 | /* Constants used for minimum and maximum */ | 90 | /* Constants used for minimum and maximum */ |
91 | #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) | 91 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) |
92 | static int one = 1; | 92 | static int one = 1; |
93 | #endif | 93 | #endif |
94 | 94 | ||
95 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 95 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
96 | static int sixty = 60; | 96 | static int sixty = 60; |
97 | static int neg_one = -1; | ||
97 | #endif | 98 | #endif |
98 | 99 | ||
99 | #ifdef CONFIG_MMU | 100 | #ifdef CONFIG_MMU |
@@ -739,13 +740,24 @@ static struct ctl_table kern_table[] = { | |||
739 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 740 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
740 | { | 741 | { |
741 | .ctl_name = CTL_UNNUMBERED, | 742 | .ctl_name = CTL_UNNUMBERED, |
743 | .procname = "softlockup_panic", | ||
744 | .data = &softlockup_panic, | ||
745 | .maxlen = sizeof(int), | ||
746 | .mode = 0644, | ||
747 | .proc_handler = &proc_dointvec_minmax, | ||
748 | .strategy = &sysctl_intvec, | ||
749 | .extra1 = &zero, | ||
750 | .extra2 = &one, | ||
751 | }, | ||
752 | { | ||
753 | .ctl_name = CTL_UNNUMBERED, | ||
742 | .procname = "softlockup_thresh", | 754 | .procname = "softlockup_thresh", |
743 | .data = &softlockup_thresh, | 755 | .data = &softlockup_thresh, |
744 | .maxlen = sizeof(unsigned long), | 756 | .maxlen = sizeof(int), |
745 | .mode = 0644, | 757 | .mode = 0644, |
746 | .proc_handler = &proc_doulongvec_minmax, | 758 | .proc_handler = &proc_dointvec_minmax, |
747 | .strategy = &sysctl_intvec, | 759 | .strategy = &sysctl_intvec, |
748 | .extra1 = &one, | 760 | .extra1 = &neg_one, |
749 | .extra2 = &sixty, | 761 | .extra2 = &sixty, |
750 | }, | 762 | }, |
751 | { | 763 | { |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index beef7ccdf842..942fc7c85283 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void) | |||
140 | if (!ts->tick_stopped) | 140 | if (!ts->tick_stopped) |
141 | return; | 141 | return; |
142 | 142 | ||
143 | touch_softlockup_watchdog(); | ||
144 | |||
145 | cpu_clear(cpu, nohz_cpu_mask); | 143 | cpu_clear(cpu, nohz_cpu_mask); |
146 | now = ktime_get(); | 144 | now = ktime_get(); |
147 | ts->idle_waketime = now; | 145 | ts->idle_waketime = now; |
@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void) | |||
149 | local_irq_save(flags); | 147 | local_irq_save(flags); |
150 | tick_do_update_jiffies64(now); | 148 | tick_do_update_jiffies64(now); |
151 | local_irq_restore(flags); | 149 | local_irq_restore(flags); |
150 | |||
151 | touch_softlockup_watchdog(); | ||
152 | } | 152 | } |
153 | 153 | ||
154 | void tick_nohz_stop_idle(int cpu) | 154 | void tick_nohz_stop_idle(int cpu) |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba106db5a65b..882c51048993 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -150,7 +150,7 @@ config DETECT_SOFTLOCKUP | |||
150 | help | 150 | help |
151 | Say Y here to enable the kernel to detect "soft lockups", | 151 | Say Y here to enable the kernel to detect "soft lockups", |
152 | which are bugs that cause the kernel to loop in kernel | 152 | which are bugs that cause the kernel to loop in kernel |
153 | mode for more than 10 seconds, without giving other tasks a | 153 | mode for more than 60 seconds, without giving other tasks a |
154 | chance to run. | 154 | chance to run. |
155 | 155 | ||
156 | When a soft-lockup is detected, the kernel will print the | 156 | When a soft-lockup is detected, the kernel will print the |
@@ -162,6 +162,30 @@ config DETECT_SOFTLOCKUP | |||
162 | can be detected via the NMI-watchdog, on platforms that | 162 | can be detected via the NMI-watchdog, on platforms that |
163 | support it.) | 163 | support it.) |
164 | 164 | ||
165 | config BOOTPARAM_SOFTLOCKUP_PANIC | ||
166 | bool "Panic (Reboot) On Soft Lockups" | ||
167 | depends on DETECT_SOFTLOCKUP | ||
168 | help | ||
169 | Say Y here to enable the kernel to panic on "soft lockups", | ||
170 | which are bugs that cause the kernel to loop in kernel | ||
171 | mode for more than 60 seconds, without giving other tasks a | ||
172 | chance to run. | ||
173 | |||
174 | The panic can be used in combination with panic_timeout, | ||
175 | to cause the system to reboot automatically after a | ||
176 | lockup has been detected. This feature is useful for | ||
177 | high-availability systems that have uptime guarantees and | ||
178 | where a lockup must be resolved ASAP. | ||
179 | |||
180 | Say N if unsure. | ||
181 | |||
182 | config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE | ||
183 | int | ||
184 | depends on DETECT_SOFTLOCKUP | ||
185 | range 0 1 | ||
186 | default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC | ||
187 | default 1 if BOOTPARAM_SOFTLOCKUP_PANIC | ||
188 | |||
165 | config SCHED_DEBUG | 189 | config SCHED_DEBUG |
166 | bool "Collect scheduler debugging info" | 190 | bool "Collect scheduler debugging info" |
167 | depends on DEBUG_KERNEL && PROC_FS | 191 | depends on DEBUG_KERNEL && PROC_FS |