aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-04-07 17:11:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-07 17:11:07 -0400
commit1551260d1f0fb1d23f264582092b862fce5e2dbd (patch)
treed06a0d20c74adda44a78bc2dc9a3e3016de380d6
parentc93f216b5b985a12a18323e5ca2eb01db3d2f000 (diff)
parent5e34437840d33554f69380584311743b39e8fbeb (diff)
Merge branch 'core/softlockup' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core/softlockup' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: softlockup: make DETECT_HUNG_TASK default depend on DETECT_SOFTLOCKUP softlockup: move 'one' to the softlockup section in sysctl.c softlockup: ensure the task has been switched out once softlockup: remove timestamp checking from hung_task softlockup: convert read_lock in hung_task to rcu_read_lock softlockup: check all tasks in hung_task softlockup: remove unused definition for spawn_softlockup_task softlockup: fix potential race in hung_task when resetting timeout softlockup: fix to allow compiling with !DETECT_HUNG_TASK softlockup: decouple hung tasks check from softlockup detection
-rw-r--r--include/linux/sched.h18
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/hung_task.c217
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sysctl.c15
-rw-r--r--lib/Kconfig.debug38
7 files changed, 283 insertions, 114 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c96140210d1c..98e1fe51601d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -300,17 +300,11 @@ extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
300 struct file *filp, void __user *buffer, 300 struct file *filp, void __user *buffer,
301 size_t *lenp, loff_t *ppos); 301 size_t *lenp, loff_t *ppos);
302extern unsigned int softlockup_panic; 302extern unsigned int softlockup_panic;
303extern unsigned long sysctl_hung_task_check_count;
304extern unsigned long sysctl_hung_task_timeout_secs;
305extern unsigned long sysctl_hung_task_warnings;
306extern int softlockup_thresh; 303extern int softlockup_thresh;
307#else 304#else
308static inline void softlockup_tick(void) 305static inline void softlockup_tick(void)
309{ 306{
310} 307}
311static inline void spawn_softlockup_task(void)
312{
313}
314static inline void touch_softlockup_watchdog(void) 308static inline void touch_softlockup_watchdog(void)
315{ 309{
316} 310}
@@ -319,6 +313,15 @@ static inline void touch_all_softlockup_watchdogs(void)
319} 313}
320#endif 314#endif
321 315
316#ifdef CONFIG_DETECT_HUNG_TASK
317extern unsigned int sysctl_hung_task_panic;
318extern unsigned long sysctl_hung_task_check_count;
319extern unsigned long sysctl_hung_task_timeout_secs;
320extern unsigned long sysctl_hung_task_warnings;
321extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
322 struct file *filp, void __user *buffer,
323 size_t *lenp, loff_t *ppos);
324#endif
322 325
323/* Attach to any functions which should be ignored in wchan output. */ 326/* Attach to any functions which should be ignored in wchan output. */
324#define __sched __attribute__((__section__(".sched.text"))) 327#define __sched __attribute__((__section__(".sched.text")))
@@ -1255,9 +1258,8 @@ struct task_struct {
1255/* ipc stuff */ 1258/* ipc stuff */
1256 struct sysv_sem sysvsem; 1259 struct sysv_sem sysvsem;
1257#endif 1260#endif
1258#ifdef CONFIG_DETECT_SOFTLOCKUP 1261#ifdef CONFIG_DETECT_HUNG_TASK
1259/* hung task detection */ 1262/* hung task detection */
1260 unsigned long last_switch_timestamp;
1261 unsigned long last_switch_count; 1263 unsigned long last_switch_count;
1262#endif 1264#endif
1263/* CPU-specific state of this task */ 1265/* CPU-specific state of this task */
diff --git a/kernel/Makefile b/kernel/Makefile
index bab1dffe37e9..42423665660a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
74obj-$(CONFIG_KPROBES) += kprobes.o 74obj-$(CONFIG_KPROBES) += kprobes.o
75obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
77obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
77obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 78obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
78obj-$(CONFIG_SECCOMP) += seccomp.o 79obj-$(CONFIG_SECCOMP) += seccomp.o
79obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 80obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/fork.c b/kernel/fork.c
index 660c2b8765bc..989c7c202b3d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
645 645
646 tsk->min_flt = tsk->maj_flt = 0; 646 tsk->min_flt = tsk->maj_flt = 0;
647 tsk->nvcsw = tsk->nivcsw = 0; 647 tsk->nvcsw = tsk->nivcsw = 0;
648#ifdef CONFIG_DETECT_HUNG_TASK
649 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
650#endif
648 651
649 tsk->mm = NULL; 652 tsk->mm = NULL;
650 tsk->active_mm = NULL; 653 tsk->active_mm = NULL;
@@ -1032,11 +1035,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1032 1035
1033 p->default_timer_slack_ns = current->timer_slack_ns; 1036 p->default_timer_slack_ns = current->timer_slack_ns;
1034 1037
1035#ifdef CONFIG_DETECT_SOFTLOCKUP
1036 p->last_switch_count = 0;
1037 p->last_switch_timestamp = 0;
1038#endif
1039
1040 task_io_accounting_init(&p->ioac); 1038 task_io_accounting_init(&p->ioac);
1041 acct_clear_integrals(p); 1039 acct_clear_integrals(p);
1042 1040
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..022a4927b785
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
1/*
2 * Detect Hung Task
3 *
4 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
5 *
6 */
7
8#include <linux/mm.h>
9#include <linux/cpu.h>
10#include <linux/nmi.h>
11#include <linux/init.h>
12#include <linux/delay.h>
13#include <linux/freezer.h>
14#include <linux/kthread.h>
15#include <linux/lockdep.h>
16#include <linux/module.h>
17#include <linux/sysctl.h>
18
19/*
20 * The number of tasks checked:
21 */
22unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
23
24/*
25 * Limit number of tasks checked in a batch.
26 *
27 * This value controls the preemptibility of khungtaskd since preemption
28 * is disabled during the critical section. It also controls the size of
29 * the RCU grace period. So it needs to be upper-bound.
30 */
31#define HUNG_TASK_BATCHING 1024
32
33/*
34 * Zero means infinite timeout - no checking done:
35 */
36unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
37
38unsigned long __read_mostly sysctl_hung_task_warnings = 10;
39
40static int __read_mostly did_panic;
41
42static struct task_struct *watchdog_task;
43
44/*
45 * Should we panic (and reboot, if panic_timeout= is set) when a
46 * hung task is detected:
47 */
48unsigned int __read_mostly sysctl_hung_task_panic =
49 CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
50
51static int __init hung_task_panic_setup(char *str)
52{
53 sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
54
55 return 1;
56}
57__setup("hung_task_panic=", hung_task_panic_setup);
58
59static int
60hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
61{
62 did_panic = 1;
63
64 return NOTIFY_DONE;
65}
66
67static struct notifier_block panic_block = {
68 .notifier_call = hung_task_panic,
69};
70
71static void check_hung_task(struct task_struct *t, unsigned long timeout)
72{
73 unsigned long switch_count = t->nvcsw + t->nivcsw;
74
75 /*
76 * Ensure the task is not frozen.
77 * Also, when a freshly created task is scheduled once, changes
78 * its state to TASK_UNINTERRUPTIBLE without having ever been
79 * switched out once, it musn't be checked.
80 */
81 if (unlikely(t->flags & PF_FROZEN || !switch_count))
82 return;
83
84 if (switch_count != t->last_switch_count) {
85 t->last_switch_count = switch_count;
86 return;
87 }
88 if (!sysctl_hung_task_warnings)
89 return;
90 sysctl_hung_task_warnings--;
91
92 /*
93 * Ok, the task did not get scheduled for more than 2 minutes,
94 * complain:
95 */
96 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
97 "%ld seconds.\n", t->comm, t->pid, timeout);
98 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
99 " disables this message.\n");
100 sched_show_task(t);
101 __debug_show_held_locks(t);
102
103 touch_nmi_watchdog();
104
105 if (sysctl_hung_task_panic)
106 panic("hung_task: blocked tasks");
107}
108
109/*
110 * To avoid extending the RCU grace period for an unbounded amount of time,
111 * periodically exit the critical section and enter a new one.
112 *
113 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
114 * exit the grace period. For classic RCU, a reschedule is required.
115 */
116static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
117{
118 get_task_struct(g);
119 get_task_struct(t);
120 rcu_read_unlock();
121 cond_resched();
122 rcu_read_lock();
123 put_task_struct(t);
124 put_task_struct(g);
125}
126
127/*
128 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
129 * a really long time (120 seconds). If that happens, print out
130 * a warning.
131 */
132static void check_hung_uninterruptible_tasks(unsigned long timeout)
133{
134 int max_count = sysctl_hung_task_check_count;
135 int batch_count = HUNG_TASK_BATCHING;
136 struct task_struct *g, *t;
137
138 /*
139 * If the system crashed already then all bets are off,
140 * do not report extra hung tasks:
141 */
142 if (test_taint(TAINT_DIE) || did_panic)
143 return;
144
145 rcu_read_lock();
146 do_each_thread(g, t) {
147 if (!--max_count)
148 goto unlock;
149 if (!--batch_count) {
150 batch_count = HUNG_TASK_BATCHING;
151 rcu_lock_break(g, t);
152 /* Exit if t or g was unhashed during refresh. */
153 if (t->state == TASK_DEAD || g->state == TASK_DEAD)
154 goto unlock;
155 }
156 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
157 if (t->state == TASK_UNINTERRUPTIBLE)
158 check_hung_task(t, timeout);
159 } while_each_thread(g, t);
160 unlock:
161 rcu_read_unlock();
162}
163
164static unsigned long timeout_jiffies(unsigned long timeout)
165{
166 /* timeout of 0 will disable the watchdog */
167 return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
168}
169
170/*
171 * Process updating of timeout sysctl
172 */
173int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
174 struct file *filp, void __user *buffer,
175 size_t *lenp, loff_t *ppos)
176{
177 int ret;
178
179 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
180
181 if (ret || !write)
182 goto out;
183
184 wake_up_process(watchdog_task);
185
186 out:
187 return ret;
188}
189
190/*
191 * kthread which checks for tasks stuck in D state
192 */
193static int watchdog(void *dummy)
194{
195 set_user_nice(current, 0);
196
197 for ( ; ; ) {
198 unsigned long timeout = sysctl_hung_task_timeout_secs;
199
200 while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
201 timeout = sysctl_hung_task_timeout_secs;
202
203 check_hung_uninterruptible_tasks(timeout);
204 }
205
206 return 0;
207}
208
209static int __init hung_task_init(void)
210{
211 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
212 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
213
214 return 0;
215}
216
217module_init(hung_task_init);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a2455103..88796c330838 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
166} 166}
167 167
168/* 168/*
169 * Have a reasonable limit on the number of tasks checked:
170 */
171unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
172
173/*
174 * Zero means infinite timeout - no checking done:
175 */
176unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
177
178unsigned long __read_mostly sysctl_hung_task_warnings = 10;
179
180/*
181 * Only do the hung-tasks check on one CPU:
182 */
183static int check_cpu __read_mostly = -1;
184
185static void check_hung_task(struct task_struct *t, unsigned long now)
186{
187 unsigned long switch_count = t->nvcsw + t->nivcsw;
188
189 if (t->flags & PF_FROZEN)
190 return;
191
192 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
193 t->last_switch_count = switch_count;
194 t->last_switch_timestamp = now;
195 return;
196 }
197 if ((long)(now - t->last_switch_timestamp) <
198 sysctl_hung_task_timeout_secs)
199 return;
200 if (!sysctl_hung_task_warnings)
201 return;
202 sysctl_hung_task_warnings--;
203
204 /*
205 * Ok, the task did not get scheduled for more than 2 minutes,
206 * complain:
207 */
208 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
209 "%ld seconds.\n", t->comm, t->pid,
210 sysctl_hung_task_timeout_secs);
211 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
212 " disables this message.\n");
213 sched_show_task(t);
214 __debug_show_held_locks(t);
215
216 t->last_switch_timestamp = now;
217 touch_nmi_watchdog();
218
219 if (softlockup_panic)
220 panic("softlockup: blocked tasks");
221}
222
223/*
224 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
225 * a really long time (120 seconds). If that happens, print out
226 * a warning.
227 */
228static void check_hung_uninterruptible_tasks(int this_cpu)
229{
230 int max_count = sysctl_hung_task_check_count;
231 unsigned long now = get_timestamp(this_cpu);
232 struct task_struct *g, *t;
233
234 /*
235 * If the system crashed already then all bets are off,
236 * do not report extra hung tasks:
237 */
238 if (test_taint(TAINT_DIE) || did_panic)
239 return;
240
241 read_lock(&tasklist_lock);
242 do_each_thread(g, t) {
243 if (!--max_count)
244 goto unlock;
245 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
246 if (t->state == TASK_UNINTERRUPTIBLE)
247 check_hung_task(t, now);
248 } while_each_thread(g, t);
249 unlock:
250 read_unlock(&tasklist_lock);
251}
252
253/*
254 * The watchdog thread - runs every second and touches the timestamp. 169 * The watchdog thread - runs every second and touches the timestamp.
255 */ 170 */
256static int watchdog(void *__bind_cpu) 171static int watchdog(void *__bind_cpu)
257{ 172{
258 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 173 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
259 int this_cpu = (long)__bind_cpu;
260 174
261 sched_setscheduler(current, SCHED_FIFO, &param); 175 sched_setscheduler(current, SCHED_FIFO, &param);
262 176
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
276 if (kthread_should_stop()) 190 if (kthread_should_stop())
277 break; 191 break;
278 192
279 if (this_cpu == check_cpu) {
280 if (sysctl_hung_task_timeout_secs)
281 check_hung_uninterruptible_tasks(this_cpu);
282 }
283
284 set_current_state(TASK_INTERRUPTIBLE); 193 set_current_state(TASK_INTERRUPTIBLE);
285 } 194 }
286 __set_current_state(TASK_RUNNING); 195 __set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
312 break; 221 break;
313 case CPU_ONLINE: 222 case CPU_ONLINE:
314 case CPU_ONLINE_FROZEN: 223 case CPU_ONLINE_FROZEN:
315 check_cpu = cpumask_any(cpu_online_mask);
316 wake_up_process(per_cpu(watchdog_task, hotcpu)); 224 wake_up_process(per_cpu(watchdog_task, hotcpu));
317 break; 225 break;
318#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
319 case CPU_DOWN_PREPARE:
320 case CPU_DOWN_PREPARE_FROZEN:
321 if (hotcpu == check_cpu) {
322 /* Pick any other online cpu. */
323 check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
324 }
325 break;
326
327 case CPU_UP_CANCELED: 227 case CPU_UP_CANCELED:
328 case CPU_UP_CANCELED_FROZEN: 228 case CPU_UP_CANCELED_FROZEN:
329 if (!per_cpu(watchdog_task, hotcpu)) 229 if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 72eb1a41dcab..4286b62b34a0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -814,6 +814,19 @@ static struct ctl_table kern_table[] = {
814 .extra1 = &neg_one, 814 .extra1 = &neg_one,
815 .extra2 = &sixty, 815 .extra2 = &sixty,
816 }, 816 },
817#endif
818#ifdef CONFIG_DETECT_HUNG_TASK
819 {
820 .ctl_name = CTL_UNNUMBERED,
821 .procname = "hung_task_panic",
822 .data = &sysctl_hung_task_panic,
823 .maxlen = sizeof(int),
824 .mode = 0644,
825 .proc_handler = &proc_dointvec_minmax,
826 .strategy = &sysctl_intvec,
827 .extra1 = &zero,
828 .extra2 = &one,
829 },
817 { 830 {
818 .ctl_name = CTL_UNNUMBERED, 831 .ctl_name = CTL_UNNUMBERED,
819 .procname = "hung_task_check_count", 832 .procname = "hung_task_check_count",
@@ -829,7 +842,7 @@ static struct ctl_table kern_table[] = {
829 .data = &sysctl_hung_task_timeout_secs, 842 .data = &sysctl_hung_task_timeout_secs,
830 .maxlen = sizeof(unsigned long), 843 .maxlen = sizeof(unsigned long),
831 .mode = 0644, 844 .mode = 0644,
832 .proc_handler = &proc_doulongvec_minmax, 845 .proc_handler = &proc_dohung_task_timeout_secs,
833 .strategy = &sysctl_intvec, 846 .strategy = &sysctl_intvec,
834 }, 847 },
835 { 848 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9638d99644af..c6e854f215fa 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -186,6 +186,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
186 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC 186 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
187 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC 187 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
188 188
189config DETECT_HUNG_TASK
190 bool "Detect Hung Tasks"
191 depends on DEBUG_KERNEL
192 default DETECT_SOFTLOCKUP
193 help
194 Say Y here to enable the kernel to detect "hung tasks",
195 which are bugs that cause the task to be stuck in
196 uninterruptible "D" state indefinitiley.
197
198 When a hung task is detected, the kernel will print the
199 current stack trace (which you should report), but the
200 task will stay in uninterruptible state. If lockdep is
201 enabled then all held locks will also be reported. This
202 feature has negligible overhead.
203
204config BOOTPARAM_HUNG_TASK_PANIC
205 bool "Panic (Reboot) On Hung Tasks"
206 depends on DETECT_HUNG_TASK
207 help
208 Say Y here to enable the kernel to panic on "hung tasks",
209 which are bugs that cause the kernel to leave a task stuck
210 in uninterruptible "D" state.
211
212 The panic can be used in combination with panic_timeout,
213 to cause the system to reboot automatically after a
214 hung task has been detected. This feature is useful for
215 high-availability systems that have uptime guarantees and
216 where a hung tasks must be resolved ASAP.
217
218 Say N if unsure.
219
220config BOOTPARAM_HUNG_TASK_PANIC_VALUE
221 int
222 depends on DETECT_HUNG_TASK
223 range 0 1
224 default 0 if !BOOTPARAM_HUNG_TASK_PANIC
225 default 1 if BOOTPARAM_HUNG_TASK_PANIC
226
189config SCHED_DEBUG 227config SCHED_DEBUG
190 bool "Collect scheduler debugging info" 228 bool "Collect scheduler debugging info"
191 depends on DEBUG_KERNEL && PROC_FS 229 depends on DEBUG_KERNEL && PROC_FS