aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-23 21:34:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-23 21:34:13 -0400
commitd7b6de14a0ef8a376f9d57b867545b47302b7bfb (patch)
tree46904d68a5a68f22e6c6baf3472edd4c37a39481
parent30d38542ec777468bb6a31829076a2dbc5690e35 (diff)
parent4dca10a96041f78bed11ce9e4a5cfde813ec4ccb (diff)
Merge branch 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: softlockup: fix invalid proc_handler for softlockup_panic softlockup: fix watchdog task wakeup frequency softlockup: fix watchdog task wakeup frequency softlockup: show irqtrace softlockup: print a module list on being stuck softlockup: fix NMI hangs due to lock race - 2.6.26-rc regression softlockup: fix false positives on nohz if CPU is 100% idle for more than 60 seconds softlockup: fix softlockup_thresh fix softlockup: fix softlockup_thresh unaligned access and disable detection at runtime softlockup: allow panic on lockup
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/softlockup.c45
-rw-r--r--kernel/sysctl.c20
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--lib/Kconfig.debug26
6 files changed, 86 insertions, 15 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 30d44b78171a..47e7d8794fc6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2034,6 +2034,9 @@ and is between 256 and 4096 characters. It is defined in the file
2034 2034
2035 snd-ymfpci= [HW,ALSA] 2035 snd-ymfpci= [HW,ALSA]
2036 2036
2037 softlockup_panic=
2038 [KNL] Should the soft-lockup detector generate panics.
2039
2037 sonypi.*= [HW] Sony Programmable I/O Control Device driver 2040 sonypi.*= [HW] Sony Programmable I/O Control Device driver
2038 See Documentation/sonypi.txt 2041 See Documentation/sonypi.txt
2039 2042
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1941d8b5cf11..af443a08431f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -295,10 +295,11 @@ extern void softlockup_tick(void);
295extern void spawn_softlockup_task(void); 295extern void spawn_softlockup_task(void);
296extern void touch_softlockup_watchdog(void); 296extern void touch_softlockup_watchdog(void);
297extern void touch_all_softlockup_watchdogs(void); 297extern void touch_all_softlockup_watchdogs(void);
298extern unsigned long softlockup_thresh; 298extern unsigned int softlockup_panic;
299extern unsigned long sysctl_hung_task_check_count; 299extern unsigned long sysctl_hung_task_check_count;
300extern unsigned long sysctl_hung_task_timeout_secs; 300extern unsigned long sysctl_hung_task_timeout_secs;
301extern unsigned long sysctl_hung_task_warnings; 301extern unsigned long sysctl_hung_task_warnings;
302extern int softlockup_thresh;
302#else 303#else
303static inline void softlockup_tick(void) 304static inline void softlockup_tick(void)
304{ 305{
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index a272d78185eb..7bd8d1aadd5d 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -13,6 +13,7 @@
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/freezer.h> 14#include <linux/freezer.h>
15#include <linux/kthread.h> 15#include <linux/kthread.h>
16#include <linux/lockdep.h>
16#include <linux/notifier.h> 17#include <linux/notifier.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
25static DEFINE_PER_CPU(struct task_struct *, watchdog_task); 26static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
26 27
27static int __read_mostly did_panic; 28static int __read_mostly did_panic;
28unsigned long __read_mostly softlockup_thresh = 60; 29int __read_mostly softlockup_thresh = 60;
30
31/*
32 * Should we panic (and reboot, if panic_timeout= is set) when a
33 * soft-lockup occurs:
34 */
35unsigned int __read_mostly softlockup_panic =
36 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
37
38static int __init softlockup_panic_setup(char *str)
39{
40 softlockup_panic = simple_strtoul(str, NULL, 0);
41
42 return 1;
43}
44__setup("softlockup_panic=", softlockup_panic_setup);
29 45
30static int 46static int
31softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) 47softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -84,6 +100,14 @@ void softlockup_tick(void)
84 struct pt_regs *regs = get_irq_regs(); 100 struct pt_regs *regs = get_irq_regs();
85 unsigned long now; 101 unsigned long now;
86 102
103 /* Is detection switched off? */
104 if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
105 /* Be sure we don't false trigger if switched back on */
106 if (touch_timestamp)
107 per_cpu(touch_timestamp, this_cpu) = 0;
108 return;
109 }
110
87 if (touch_timestamp == 0) { 111 if (touch_timestamp == 0) {
88 __touch_softlockup_watchdog(); 112 __touch_softlockup_watchdog();
89 return; 113 return;
@@ -92,11 +116,8 @@ void softlockup_tick(void)
92 print_timestamp = per_cpu(print_timestamp, this_cpu); 116 print_timestamp = per_cpu(print_timestamp, this_cpu);
93 117
94 /* report at most once a second */ 118 /* report at most once a second */
95 if ((print_timestamp >= touch_timestamp && 119 if (print_timestamp == touch_timestamp || did_panic)
96 print_timestamp < (touch_timestamp + 1)) ||
97 did_panic || !per_cpu(watchdog_task, this_cpu)) {
98 return; 120 return;
99 }
100 121
101 /* do not print during early bootup: */ 122 /* do not print during early bootup: */
102 if (unlikely(system_state != SYSTEM_RUNNING)) { 123 if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -106,8 +127,11 @@ void softlockup_tick(void)
106 127
107 now = get_timestamp(this_cpu); 128 now = get_timestamp(this_cpu);
108 129
109 /* Wake up the high-prio watchdog task every second: */ 130 /*
110 if (now > (touch_timestamp + 1)) 131 * Wake up the high-prio watchdog task twice per
132 * threshold timespan.
133 */
134 if (now > touch_timestamp + softlockup_thresh/2)
111 wake_up_process(per_cpu(watchdog_task, this_cpu)); 135 wake_up_process(per_cpu(watchdog_task, this_cpu));
112 136
113 /* Warn about unreasonable delays: */ 137 /* Warn about unreasonable delays: */
@@ -121,11 +145,15 @@ void softlockup_tick(void)
121 this_cpu, now - touch_timestamp, 145 this_cpu, now - touch_timestamp,
122 current->comm, task_pid_nr(current)); 146 current->comm, task_pid_nr(current));
123 print_modules(); 147 print_modules();
148 print_irqtrace_events(current);
124 if (regs) 149 if (regs)
125 show_regs(regs); 150 show_regs(regs);
126 else 151 else
127 dump_stack(); 152 dump_stack();
128 spin_unlock(&print_lock); 153 spin_unlock(&print_lock);
154
155 if (softlockup_panic)
156 panic("softlockup: hung tasks");
129} 157}
130 158
131/* 159/*
@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
178 206
179 t->last_switch_timestamp = now; 207 t->last_switch_timestamp = now;
180 touch_nmi_watchdog(); 208 touch_nmi_watchdog();
209
210 if (softlockup_panic)
211 panic("softlockup: blocked tasks");
181} 212}
182 213
183/* 214/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b859e6b5a767..2a7b9d88706b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -88,12 +88,13 @@ extern int rcutorture_runnable;
88#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 88#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
89 89
90/* Constants used for minimum and maximum */ 90/* Constants used for minimum and maximum */
91#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) 91#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
92static int one = 1; 92static int one = 1;
93#endif 93#endif
94 94
95#ifdef CONFIG_DETECT_SOFTLOCKUP 95#ifdef CONFIG_DETECT_SOFTLOCKUP
96static int sixty = 60; 96static int sixty = 60;
97static int neg_one = -1;
97#endif 98#endif
98 99
99#ifdef CONFIG_MMU 100#ifdef CONFIG_MMU
@@ -739,13 +740,24 @@ static struct ctl_table kern_table[] = {
739#ifdef CONFIG_DETECT_SOFTLOCKUP 740#ifdef CONFIG_DETECT_SOFTLOCKUP
740 { 741 {
741 .ctl_name = CTL_UNNUMBERED, 742 .ctl_name = CTL_UNNUMBERED,
743 .procname = "softlockup_panic",
744 .data = &softlockup_panic,
745 .maxlen = sizeof(int),
746 .mode = 0644,
747 .proc_handler = &proc_dointvec_minmax,
748 .strategy = &sysctl_intvec,
749 .extra1 = &zero,
750 .extra2 = &one,
751 },
752 {
753 .ctl_name = CTL_UNNUMBERED,
742 .procname = "softlockup_thresh", 754 .procname = "softlockup_thresh",
743 .data = &softlockup_thresh, 755 .data = &softlockup_thresh,
744 .maxlen = sizeof(unsigned long), 756 .maxlen = sizeof(int),
745 .mode = 0644, 757 .mode = 0644,
746 .proc_handler = &proc_doulongvec_minmax, 758 .proc_handler = &proc_dointvec_minmax,
747 .strategy = &sysctl_intvec, 759 .strategy = &sysctl_intvec,
748 .extra1 = &one, 760 .extra1 = &neg_one,
749 .extra2 = &sixty, 761 .extra2 = &sixty,
750 }, 762 },
751 { 763 {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index beef7ccdf842..942fc7c85283 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void)
140 if (!ts->tick_stopped) 140 if (!ts->tick_stopped)
141 return; 141 return;
142 142
143 touch_softlockup_watchdog();
144
145 cpu_clear(cpu, nohz_cpu_mask); 143 cpu_clear(cpu, nohz_cpu_mask);
146 now = ktime_get(); 144 now = ktime_get();
147 ts->idle_waketime = now; 145 ts->idle_waketime = now;
@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void)
149 local_irq_save(flags); 147 local_irq_save(flags);
150 tick_do_update_jiffies64(now); 148 tick_do_update_jiffies64(now);
151 local_irq_restore(flags); 149 local_irq_restore(flags);
150
151 touch_softlockup_watchdog();
152} 152}
153 153
154void tick_nohz_stop_idle(int cpu) 154void tick_nohz_stop_idle(int cpu)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba106db5a65b..882c51048993 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -150,7 +150,7 @@ config DETECT_SOFTLOCKUP
150 help 150 help
151 Say Y here to enable the kernel to detect "soft lockups", 151 Say Y here to enable the kernel to detect "soft lockups",
152 which are bugs that cause the kernel to loop in kernel 152 which are bugs that cause the kernel to loop in kernel
153 mode for more than 10 seconds, without giving other tasks a 153 mode for more than 60 seconds, without giving other tasks a
154 chance to run. 154 chance to run.
155 155
156 When a soft-lockup is detected, the kernel will print the 156 When a soft-lockup is detected, the kernel will print the
@@ -162,6 +162,30 @@ config DETECT_SOFTLOCKUP
162 can be detected via the NMI-watchdog, on platforms that 162 can be detected via the NMI-watchdog, on platforms that
163 support it.) 163 support it.)
164 164
165config BOOTPARAM_SOFTLOCKUP_PANIC
166 bool "Panic (Reboot) On Soft Lockups"
167 depends on DETECT_SOFTLOCKUP
168 help
169 Say Y here to enable the kernel to panic on "soft lockups",
170 which are bugs that cause the kernel to loop in kernel
171 mode for more than 60 seconds, without giving other tasks a
172 chance to run.
173
174 The panic can be used in combination with panic_timeout,
175 to cause the system to reboot automatically after a
176 lockup has been detected. This feature is useful for
177 high-availability systems that have uptime guarantees and
178 where a lockup must be resolved ASAP.
179
180 Say N if unsure.
181
182config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
183 int
184 depends on DETECT_SOFTLOCKUP
185 range 0 1
186 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
187 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
188
165config SCHED_DEBUG 189config SCHED_DEBUG
166 bool "Collect scheduler debugging info" 190 bool "Collect scheduler debugging info"
167 depends on DEBUG_KERNEL && PROC_FS 191 depends on DEBUG_KERNEL && PROC_FS