aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMandeep Singh Baines <msb@google.com>2009-01-15 14:08:40 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-16 08:06:04 -0500
commite162b39a368f0401e41b558f430c354d12a85b37 (patch)
tree3fb7e4d48f398d62e5074e7e3dd183cc54f59820
parentc903ff837909ccada51243307d4239f86af40179 (diff)
softlockup: decouple hung tasks check from softlockup detection
Decoupling allows: * hung tasks check to happen at very low priority * hung tasks check and softlockup to be enabled/disabled independently at compile and/or run-time * individual panic settings to be enabled disabled independently at compile and/or run-time * softlockup threshold to be reduced without increasing hung tasks poll frequency (hung task check is expensive relative to softlock watchdog) * hung task check to be zero over-head when disabled at run-time Signed-off-by: Mandeep Singh Baines <msb@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h14
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/hung_task.c198
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sysctl.c15
-rw-r--r--lib/Kconfig.debug38
6 files changed, 261 insertions, 105 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 54cbabf3b871..f2f94d532302 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -297,9 +297,6 @@ extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
297 struct file *filp, void __user *buffer, 297 struct file *filp, void __user *buffer,
298 size_t *lenp, loff_t *ppos); 298 size_t *lenp, loff_t *ppos);
299extern unsigned int softlockup_panic; 299extern unsigned int softlockup_panic;
300extern unsigned long sysctl_hung_task_check_count;
301extern unsigned long sysctl_hung_task_timeout_secs;
302extern unsigned long sysctl_hung_task_warnings;
303extern int softlockup_thresh; 300extern int softlockup_thresh;
304#else 301#else
305static inline void softlockup_tick(void) 302static inline void softlockup_tick(void)
@@ -316,6 +313,15 @@ static inline void touch_all_softlockup_watchdogs(void)
316} 313}
317#endif 314#endif
318 315
316#ifdef CONFIG_DETECT_HUNG_TASK
317extern unsigned int sysctl_hung_task_panic;
318extern unsigned long sysctl_hung_task_check_count;
319extern unsigned long sysctl_hung_task_timeout_secs;
320extern unsigned long sysctl_hung_task_warnings;
321extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
322 struct file *filp, void __user *buffer,
323 size_t *lenp, loff_t *ppos);
324#endif
319 325
320/* Attach to any functions which should be ignored in wchan output. */ 326/* Attach to any functions which should be ignored in wchan output. */
321#define __sched __attribute__((__section__(".sched.text"))) 327#define __sched __attribute__((__section__(".sched.text")))
@@ -1236,7 +1242,7 @@ struct task_struct {
1236/* ipc stuff */ 1242/* ipc stuff */
1237 struct sysv_sem sysvsem; 1243 struct sysv_sem sysvsem;
1238#endif 1244#endif
1239#ifdef CONFIG_DETECT_SOFTLOCKUP 1245#ifdef CONFIG_DETECT_HUNG_TASK
1240/* hung task detection */ 1246/* hung task detection */
1241 unsigned long last_switch_timestamp; 1247 unsigned long last_switch_timestamp;
1242 unsigned long last_switch_count; 1248 unsigned long last_switch_count;
diff --git a/kernel/Makefile b/kernel/Makefile
index 2aebc4cd7878..979745f1b4bc 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
74obj-$(CONFIG_KPROBES) += kprobes.o 74obj-$(CONFIG_KPROBES) += kprobes.o
75obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
77obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
77obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 78obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
78obj-$(CONFIG_SECCOMP) += seccomp.o 79obj-$(CONFIG_SECCOMP) += seccomp.o
79obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 80obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..ba5a77cad3bb
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,198 @@
1/*
2 * Detect Hung Task
3 *
4 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
5 *
6 */
7
8#include <linux/mm.h>
9#include <linux/cpu.h>
10#include <linux/nmi.h>
11#include <linux/init.h>
12#include <linux/delay.h>
13#include <linux/freezer.h>
14#include <linux/kthread.h>
15#include <linux/lockdep.h>
16#include <linux/module.h>
17#include <linux/sysctl.h>
18
19/*
20 * Have a reasonable limit on the number of tasks checked:
21 */
22unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
23
24/*
25 * Zero means infinite timeout - no checking done:
26 */
27unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
28static unsigned long __read_mostly hung_task_poll_jiffies;
29
30unsigned long __read_mostly sysctl_hung_task_warnings = 10;
31
32static int __read_mostly did_panic;
33
34static struct task_struct *watchdog_task;
35
36/*
37 * Should we panic (and reboot, if panic_timeout= is set) when a
38 * hung task is detected:
39 */
40unsigned int __read_mostly sysctl_hung_task_panic =
41 CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
42
43static int __init hung_task_panic_setup(char *str)
44{
45 sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
46
47 return 1;
48}
49__setup("hung_task_panic=", hung_task_panic_setup);
50
51static int
52hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
53{
54 did_panic = 1;
55
56 return NOTIFY_DONE;
57}
58
59static struct notifier_block panic_block = {
60 .notifier_call = hung_task_panic,
61};
62
63/*
64 * Returns seconds, approximately. We don't need nanosecond
65 * resolution, and we don't need to waste time with a big divide when
66 * 2^30ns == 1.074s.
67 */
68static unsigned long get_timestamp(void)
69{
70 int this_cpu = raw_smp_processor_id();
71
72 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
73}
74
75static void check_hung_task(struct task_struct *t, unsigned long now)
76{
77 unsigned long switch_count = t->nvcsw + t->nivcsw;
78
79 if (t->flags & PF_FROZEN)
80 return;
81
82 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
83 t->last_switch_count = switch_count;
84 t->last_switch_timestamp = now;
85 return;
86 }
87 if ((long)(now - t->last_switch_timestamp) <
88 sysctl_hung_task_timeout_secs)
89 return;
90 if (!sysctl_hung_task_warnings)
91 return;
92 sysctl_hung_task_warnings--;
93
94 /*
95 * Ok, the task did not get scheduled for more than 2 minutes,
96 * complain:
97 */
98 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
99 "%ld seconds.\n", t->comm, t->pid,
100 sysctl_hung_task_timeout_secs);
101 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
102 " disables this message.\n");
103 sched_show_task(t);
104 __debug_show_held_locks(t);
105
106 t->last_switch_timestamp = now;
107 touch_nmi_watchdog();
108
109 if (sysctl_hung_task_panic)
110 panic("hung_task: blocked tasks");
111}
112
113/*
114 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
115 * a really long time (120 seconds). If that happens, print out
116 * a warning.
117 */
118static void check_hung_uninterruptible_tasks(void)
119{
120 int max_count = sysctl_hung_task_check_count;
121 unsigned long now = get_timestamp();
122 struct task_struct *g, *t;
123
124 /*
125 * If the system crashed already then all bets are off,
126 * do not report extra hung tasks:
127 */
128 if (test_taint(TAINT_DIE) || did_panic)
129 return;
130
131 read_lock(&tasklist_lock);
132 do_each_thread(g, t) {
133 if (!--max_count)
134 goto unlock;
135 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
136 if (t->state == TASK_UNINTERRUPTIBLE)
137 check_hung_task(t, now);
138 } while_each_thread(g, t);
139 unlock:
140 read_unlock(&tasklist_lock);
141}
142
143static void update_poll_jiffies(void)
144{
145 /* timeout of 0 will disable the watchdog */
146 if (sysctl_hung_task_timeout_secs == 0)
147 hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT;
148 else
149 hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2;
150}
151
152/*
153 * Process updating of timeout sysctl
154 */
155int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
156 struct file *filp, void __user *buffer,
157 size_t *lenp, loff_t *ppos)
158{
159 int ret;
160
161 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
162
163 if (ret || !write)
164 goto out;
165
166 update_poll_jiffies();
167
168 wake_up_process(watchdog_task);
169
170 out:
171 return ret;
172}
173
174/*
175 * kthread which checks for tasks stuck in D state
176 */
177static int watchdog(void *dummy)
178{
179 set_user_nice(current, 0);
180 update_poll_jiffies();
181
182 for ( ; ; ) {
183 while (schedule_timeout_interruptible(hung_task_poll_jiffies));
184 check_hung_uninterruptible_tasks();
185 }
186
187 return 0;
188}
189
190static int __init hung_task_init(void)
191{
192 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
193 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
194
195 return 0;
196}
197
198module_init(hung_task_init);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a2455103..88796c330838 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
166} 166}
167 167
168/* 168/*
169 * Have a reasonable limit on the number of tasks checked:
170 */
171unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
172
173/*
174 * Zero means infinite timeout - no checking done:
175 */
176unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
177
178unsigned long __read_mostly sysctl_hung_task_warnings = 10;
179
180/*
181 * Only do the hung-tasks check on one CPU:
182 */
183static int check_cpu __read_mostly = -1;
184
185static void check_hung_task(struct task_struct *t, unsigned long now)
186{
187 unsigned long switch_count = t->nvcsw + t->nivcsw;
188
189 if (t->flags & PF_FROZEN)
190 return;
191
192 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
193 t->last_switch_count = switch_count;
194 t->last_switch_timestamp = now;
195 return;
196 }
197 if ((long)(now - t->last_switch_timestamp) <
198 sysctl_hung_task_timeout_secs)
199 return;
200 if (!sysctl_hung_task_warnings)
201 return;
202 sysctl_hung_task_warnings--;
203
204 /*
205 * Ok, the task did not get scheduled for more than 2 minutes,
206 * complain:
207 */
208 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
209 "%ld seconds.\n", t->comm, t->pid,
210 sysctl_hung_task_timeout_secs);
211 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
212 " disables this message.\n");
213 sched_show_task(t);
214 __debug_show_held_locks(t);
215
216 t->last_switch_timestamp = now;
217 touch_nmi_watchdog();
218
219 if (softlockup_panic)
220 panic("softlockup: blocked tasks");
221}
222
223/*
224 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
225 * a really long time (120 seconds). If that happens, print out
226 * a warning.
227 */
228static void check_hung_uninterruptible_tasks(int this_cpu)
229{
230 int max_count = sysctl_hung_task_check_count;
231 unsigned long now = get_timestamp(this_cpu);
232 struct task_struct *g, *t;
233
234 /*
235 * If the system crashed already then all bets are off,
236 * do not report extra hung tasks:
237 */
238 if (test_taint(TAINT_DIE) || did_panic)
239 return;
240
241 read_lock(&tasklist_lock);
242 do_each_thread(g, t) {
243 if (!--max_count)
244 goto unlock;
245 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
246 if (t->state == TASK_UNINTERRUPTIBLE)
247 check_hung_task(t, now);
248 } while_each_thread(g, t);
249 unlock:
250 read_unlock(&tasklist_lock);
251}
252
253/*
254 * The watchdog thread - runs every second and touches the timestamp. 169 * The watchdog thread - runs every second and touches the timestamp.
255 */ 170 */
256static int watchdog(void *__bind_cpu) 171static int watchdog(void *__bind_cpu)
257{ 172{
258 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 173 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
259 int this_cpu = (long)__bind_cpu;
260 174
261 sched_setscheduler(current, SCHED_FIFO, &param); 175 sched_setscheduler(current, SCHED_FIFO, &param);
262 176
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
276 if (kthread_should_stop()) 190 if (kthread_should_stop())
277 break; 191 break;
278 192
279 if (this_cpu == check_cpu) {
280 if (sysctl_hung_task_timeout_secs)
281 check_hung_uninterruptible_tasks(this_cpu);
282 }
283
284 set_current_state(TASK_INTERRUPTIBLE); 193 set_current_state(TASK_INTERRUPTIBLE);
285 } 194 }
286 __set_current_state(TASK_RUNNING); 195 __set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
312 break; 221 break;
313 case CPU_ONLINE: 222 case CPU_ONLINE:
314 case CPU_ONLINE_FROZEN: 223 case CPU_ONLINE_FROZEN:
315 check_cpu = cpumask_any(cpu_online_mask);
316 wake_up_process(per_cpu(watchdog_task, hotcpu)); 224 wake_up_process(per_cpu(watchdog_task, hotcpu));
317 break; 225 break;
318#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
319 case CPU_DOWN_PREPARE:
320 case CPU_DOWN_PREPARE_FROZEN:
321 if (hotcpu == check_cpu) {
322 /* Pick any other online cpu. */
323 check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
324 }
325 break;
326
327 case CPU_UP_CANCELED: 227 case CPU_UP_CANCELED:
328 case CPU_UP_CANCELED_FROZEN: 228 case CPU_UP_CANCELED_FROZEN:
329 if (!per_cpu(watchdog_task, hotcpu)) 229 if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 596dc31a7116..2481ed30d2b5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -805,6 +805,19 @@ static struct ctl_table kern_table[] = {
805 .extra1 = &neg_one, 805 .extra1 = &neg_one,
806 .extra2 = &sixty, 806 .extra2 = &sixty,
807 }, 807 },
808#endif
809#ifdef CONFIG_DETECT_HUNG_TASK
810 {
811 .ctl_name = CTL_UNNUMBERED,
812 .procname = "hung_task_panic",
813 .data = &sysctl_hung_task_panic,
814 .maxlen = sizeof(int),
815 .mode = 0644,
816 .proc_handler = &proc_dointvec_minmax,
817 .strategy = &sysctl_intvec,
818 .extra1 = &zero,
819 .extra2 = &one,
820 },
808 { 821 {
809 .ctl_name = CTL_UNNUMBERED, 822 .ctl_name = CTL_UNNUMBERED,
810 .procname = "hung_task_check_count", 823 .procname = "hung_task_check_count",
@@ -820,7 +833,7 @@ static struct ctl_table kern_table[] = {
820 .data = &sysctl_hung_task_timeout_secs, 833 .data = &sysctl_hung_task_timeout_secs,
821 .maxlen = sizeof(unsigned long), 834 .maxlen = sizeof(unsigned long),
822 .mode = 0644, 835 .mode = 0644,
823 .proc_handler = &proc_doulongvec_minmax, 836 .proc_handler = &proc_dohung_task_timeout_secs,
824 .strategy = &sysctl_intvec, 837 .strategy = &sysctl_intvec,
825 }, 838 },
826 { 839 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4c9ae6085c75..883ecea22f37 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -186,6 +186,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
186 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC 186 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
187 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC 187 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
188 188
189config DETECT_HUNG_TASK
190 bool "Detect Hung Tasks"
191 depends on DEBUG_KERNEL
192 default y
193 help
194 Say Y here to enable the kernel to detect "hung tasks",
195 which are bugs that cause the task to be stuck in
196 uninterruptible "D" state indefinitiley.
197
198 When a hung task is detected, the kernel will print the
199 current stack trace (which you should report), but the
200 task will stay in uninterruptible state. If lockdep is
201 enabled then all held locks will also be reported. This
202 feature has negligible overhead.
203
204config BOOTPARAM_HUNG_TASK_PANIC
205 bool "Panic (Reboot) On Hung Tasks"
206 depends on DETECT_HUNG_TASK
207 help
208 Say Y here to enable the kernel to panic on "hung tasks",
209 which are bugs that cause the kernel to leave a task stuck
210 in uninterruptible "D" state.
211
212 The panic can be used in combination with panic_timeout,
213 to cause the system to reboot automatically after a
214 hung task has been detected. This feature is useful for
215 high-availability systems that have uptime guarantees and
216 where a hung tasks must be resolved ASAP.
217
218 Say N if unsure.
219
220config BOOTPARAM_HUNG_TASK_PANIC_VALUE
221 int
222 depends on DETECT_HUNG_TASK
223 range 0 1
224 default 0 if !BOOTPARAM_HUNG_TASK_PANIC
225 default 1 if BOOTPARAM_HUNG_TASK_PANIC
226
189config SCHED_DEBUG 227config SCHED_DEBUG
190 bool "Collect scheduler debugging info" 228 bool "Collect scheduler debugging info"
191 depends on DEBUG_KERNEL && PROC_FS 229 depends on DEBUG_KERNEL && PROC_FS