aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorMandeep Singh Baines <msb@google.com>2009-01-15 14:08:40 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-16 08:06:04 -0500
commite162b39a368f0401e41b558f430c354d12a85b37 (patch)
tree3fb7e4d48f398d62e5074e7e3dd183cc54f59820 /kernel
parentc903ff837909ccada51243307d4239f86af40179 (diff)
softlockup: decouple hung tasks check from softlockup detection
Decoupling allows: * hung tasks check to happen at very low priority * hung tasks check and softlockup to be enabled/disabled independently at compile and/or run-time * individual panic settings to be enabled disabled independently at compile and/or run-time * softlockup threshold to be reduced without increasing hung tasks poll frequency (hung task check is expensive relative to softlock watchdog) * hung task check to be zero over-head when disabled at run-time Signed-off-by: Mandeep Singh Baines <msb@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/hung_task.c198
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sysctl.c15
4 files changed, 213 insertions, 101 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 2aebc4cd7878..979745f1b4bc 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
74obj-$(CONFIG_KPROBES) += kprobes.o 74obj-$(CONFIG_KPROBES) += kprobes.o
75obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
77obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
77obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 78obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
78obj-$(CONFIG_SECCOMP) += seccomp.o 79obj-$(CONFIG_SECCOMP) += seccomp.o
79obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 80obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..ba5a77cad3bb
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,198 @@
1/*
2 * Detect Hung Task
3 *
4 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
5 *
6 */
7
8#include <linux/mm.h>
9#include <linux/cpu.h>
10#include <linux/nmi.h>
11#include <linux/init.h>
12#include <linux/delay.h>
13#include <linux/freezer.h>
14#include <linux/kthread.h>
15#include <linux/lockdep.h>
16#include <linux/module.h>
17#include <linux/sysctl.h>
18
19/*
20 * Have a reasonable limit on the number of tasks checked:
21 */
22unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
23
24/*
25 * Zero means infinite timeout - no checking done:
26 */
27unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
28static unsigned long __read_mostly hung_task_poll_jiffies;
29
30unsigned long __read_mostly sysctl_hung_task_warnings = 10;
31
32static int __read_mostly did_panic;
33
34static struct task_struct *watchdog_task;
35
36/*
37 * Should we panic (and reboot, if panic_timeout= is set) when a
38 * hung task is detected:
39 */
40unsigned int __read_mostly sysctl_hung_task_panic =
41 CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
42
43static int __init hung_task_panic_setup(char *str)
44{
45 sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
46
47 return 1;
48}
49__setup("hung_task_panic=", hung_task_panic_setup);
50
51static int
52hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
53{
54 did_panic = 1;
55
56 return NOTIFY_DONE;
57}
58
59static struct notifier_block panic_block = {
60 .notifier_call = hung_task_panic,
61};
62
63/*
64 * Returns seconds, approximately. We don't need nanosecond
65 * resolution, and we don't need to waste time with a big divide when
66 * 2^30ns == 1.074s.
67 */
68static unsigned long get_timestamp(void)
69{
70 int this_cpu = raw_smp_processor_id();
71
72 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
73}
74
75static void check_hung_task(struct task_struct *t, unsigned long now)
76{
77 unsigned long switch_count = t->nvcsw + t->nivcsw;
78
79 if (t->flags & PF_FROZEN)
80 return;
81
82 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
83 t->last_switch_count = switch_count;
84 t->last_switch_timestamp = now;
85 return;
86 }
87 if ((long)(now - t->last_switch_timestamp) <
88 sysctl_hung_task_timeout_secs)
89 return;
90 if (!sysctl_hung_task_warnings)
91 return;
92 sysctl_hung_task_warnings--;
93
94 /*
95 * Ok, the task did not get scheduled for more than 2 minutes,
96 * complain:
97 */
98 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
99 "%ld seconds.\n", t->comm, t->pid,
100 sysctl_hung_task_timeout_secs);
101 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
102 " disables this message.\n");
103 sched_show_task(t);
104 __debug_show_held_locks(t);
105
106 t->last_switch_timestamp = now;
107 touch_nmi_watchdog();
108
109 if (sysctl_hung_task_panic)
110 panic("hung_task: blocked tasks");
111}
112
113/*
114 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
115 * a really long time (120 seconds). If that happens, print out
116 * a warning.
117 */
118static void check_hung_uninterruptible_tasks(void)
119{
120 int max_count = sysctl_hung_task_check_count;
121 unsigned long now = get_timestamp();
122 struct task_struct *g, *t;
123
124 /*
125 * If the system crashed already then all bets are off,
126 * do not report extra hung tasks:
127 */
128 if (test_taint(TAINT_DIE) || did_panic)
129 return;
130
131 read_lock(&tasklist_lock);
132 do_each_thread(g, t) {
133 if (!--max_count)
134 goto unlock;
135 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
136 if (t->state == TASK_UNINTERRUPTIBLE)
137 check_hung_task(t, now);
138 } while_each_thread(g, t);
139 unlock:
140 read_unlock(&tasklist_lock);
141}
142
143static void update_poll_jiffies(void)
144{
145 /* timeout of 0 will disable the watchdog */
146 if (sysctl_hung_task_timeout_secs == 0)
147 hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT;
148 else
149 hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2;
150}
151
152/*
153 * Process updating of timeout sysctl
154 */
155int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
156 struct file *filp, void __user *buffer,
157 size_t *lenp, loff_t *ppos)
158{
159 int ret;
160
161 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
162
163 if (ret || !write)
164 goto out;
165
166 update_poll_jiffies();
167
168 wake_up_process(watchdog_task);
169
170 out:
171 return ret;
172}
173
174/*
175 * kthread which checks for tasks stuck in D state
176 */
177static int watchdog(void *dummy)
178{
179 set_user_nice(current, 0);
180 update_poll_jiffies();
181
182 for ( ; ; ) {
183 while (schedule_timeout_interruptible(hung_task_poll_jiffies));
184 check_hung_uninterruptible_tasks();
185 }
186
187 return 0;
188}
189
190static int __init hung_task_init(void)
191{
192 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
193 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
194
195 return 0;
196}
197
198module_init(hung_task_init);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a2455103..88796c330838 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
166} 166}
167 167
168/* 168/*
169 * Have a reasonable limit on the number of tasks checked:
170 */
171unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
172
173/*
174 * Zero means infinite timeout - no checking done:
175 */
176unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
177
178unsigned long __read_mostly sysctl_hung_task_warnings = 10;
179
180/*
181 * Only do the hung-tasks check on one CPU:
182 */
183static int check_cpu __read_mostly = -1;
184
185static void check_hung_task(struct task_struct *t, unsigned long now)
186{
187 unsigned long switch_count = t->nvcsw + t->nivcsw;
188
189 if (t->flags & PF_FROZEN)
190 return;
191
192 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
193 t->last_switch_count = switch_count;
194 t->last_switch_timestamp = now;
195 return;
196 }
197 if ((long)(now - t->last_switch_timestamp) <
198 sysctl_hung_task_timeout_secs)
199 return;
200 if (!sysctl_hung_task_warnings)
201 return;
202 sysctl_hung_task_warnings--;
203
204 /*
205 * Ok, the task did not get scheduled for more than 2 minutes,
206 * complain:
207 */
208 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
209 "%ld seconds.\n", t->comm, t->pid,
210 sysctl_hung_task_timeout_secs);
211 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
212 " disables this message.\n");
213 sched_show_task(t);
214 __debug_show_held_locks(t);
215
216 t->last_switch_timestamp = now;
217 touch_nmi_watchdog();
218
219 if (softlockup_panic)
220 panic("softlockup: blocked tasks");
221}
222
223/*
224 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
225 * a really long time (120 seconds). If that happens, print out
226 * a warning.
227 */
228static void check_hung_uninterruptible_tasks(int this_cpu)
229{
230 int max_count = sysctl_hung_task_check_count;
231 unsigned long now = get_timestamp(this_cpu);
232 struct task_struct *g, *t;
233
234 /*
235 * If the system crashed already then all bets are off,
236 * do not report extra hung tasks:
237 */
238 if (test_taint(TAINT_DIE) || did_panic)
239 return;
240
241 read_lock(&tasklist_lock);
242 do_each_thread(g, t) {
243 if (!--max_count)
244 goto unlock;
245 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
246 if (t->state == TASK_UNINTERRUPTIBLE)
247 check_hung_task(t, now);
248 } while_each_thread(g, t);
249 unlock:
250 read_unlock(&tasklist_lock);
251}
252
253/*
254 * The watchdog thread - runs every second and touches the timestamp. 169 * The watchdog thread - runs every second and touches the timestamp.
255 */ 170 */
256static int watchdog(void *__bind_cpu) 171static int watchdog(void *__bind_cpu)
257{ 172{
258 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 173 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
259 int this_cpu = (long)__bind_cpu;
260 174
261 sched_setscheduler(current, SCHED_FIFO, &param); 175 sched_setscheduler(current, SCHED_FIFO, &param);
262 176
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
276 if (kthread_should_stop()) 190 if (kthread_should_stop())
277 break; 191 break;
278 192
279 if (this_cpu == check_cpu) {
280 if (sysctl_hung_task_timeout_secs)
281 check_hung_uninterruptible_tasks(this_cpu);
282 }
283
284 set_current_state(TASK_INTERRUPTIBLE); 193 set_current_state(TASK_INTERRUPTIBLE);
285 } 194 }
286 __set_current_state(TASK_RUNNING); 195 __set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
312 break; 221 break;
313 case CPU_ONLINE: 222 case CPU_ONLINE:
314 case CPU_ONLINE_FROZEN: 223 case CPU_ONLINE_FROZEN:
315 check_cpu = cpumask_any(cpu_online_mask);
316 wake_up_process(per_cpu(watchdog_task, hotcpu)); 224 wake_up_process(per_cpu(watchdog_task, hotcpu));
317 break; 225 break;
318#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
319 case CPU_DOWN_PREPARE:
320 case CPU_DOWN_PREPARE_FROZEN:
321 if (hotcpu == check_cpu) {
322 /* Pick any other online cpu. */
323 check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
324 }
325 break;
326
327 case CPU_UP_CANCELED: 227 case CPU_UP_CANCELED:
328 case CPU_UP_CANCELED_FROZEN: 228 case CPU_UP_CANCELED_FROZEN:
329 if (!per_cpu(watchdog_task, hotcpu)) 229 if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 596dc31a7116..2481ed30d2b5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -805,6 +805,19 @@ static struct ctl_table kern_table[] = {
805 .extra1 = &neg_one, 805 .extra1 = &neg_one,
806 .extra2 = &sixty, 806 .extra2 = &sixty,
807 }, 807 },
808#endif
809#ifdef CONFIG_DETECT_HUNG_TASK
810 {
811 .ctl_name = CTL_UNNUMBERED,
812 .procname = "hung_task_panic",
813 .data = &sysctl_hung_task_panic,
814 .maxlen = sizeof(int),
815 .mode = 0644,
816 .proc_handler = &proc_dointvec_minmax,
817 .strategy = &sysctl_intvec,
818 .extra1 = &zero,
819 .extra2 = &one,
820 },
808 { 821 {
809 .ctl_name = CTL_UNNUMBERED, 822 .ctl_name = CTL_UNNUMBERED,
810 .procname = "hung_task_check_count", 823 .procname = "hung_task_check_count",
@@ -820,7 +833,7 @@ static struct ctl_table kern_table[] = {
820 .data = &sysctl_hung_task_timeout_secs, 833 .data = &sysctl_hung_task_timeout_secs,
821 .maxlen = sizeof(unsigned long), 834 .maxlen = sizeof(unsigned long),
822 .mode = 0644, 835 .mode = 0644,
823 .proc_handler = &proc_doulongvec_minmax, 836 .proc_handler = &proc_dohung_task_timeout_secs,
824 .strategy = &sysctl_intvec, 837 .strategy = &sysctl_intvec,
825 }, 838 },
826 { 839 {