diff options
author | Mandeep Singh Baines <msb@google.com> | 2009-01-15 14:08:40 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-16 08:06:04 -0500 |
commit | e162b39a368f0401e41b558f430c354d12a85b37 (patch) | |
tree | 3fb7e4d48f398d62e5074e7e3dd183cc54f59820 /kernel/hung_task.c | |
parent | c903ff837909ccada51243307d4239f86af40179 (diff) |
softlockup: decouple hung tasks check from softlockup detection
Decoupling allows:
* hung tasks check to happen at very low priority
* hung tasks check and softlockup to be enabled/disabled independently
at compile and/or run-time
* individual panic settings to be enabled disabled independently
at compile and/or run-time
* softlockup threshold to be reduced without increasing hung tasks
poll frequency (hung task check is expensive relative to softlock watchdog)
* hung task check to be zero over-head when disabled at run-time
Signed-off-by: Mandeep Singh Baines <msb@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/hung_task.c')
-rw-r--r-- | kernel/hung_task.c | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/kernel/hung_task.c b/kernel/hung_task.c new file mode 100644 index 000000000000..ba5a77cad3bb --- /dev/null +++ b/kernel/hung_task.c | |||
@@ -0,0 +1,198 @@ | |||
1 | /* | ||
2 | * Detect Hung Task | ||
3 | * | ||
4 | * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #include <linux/mm.h> | ||
9 | #include <linux/cpu.h> | ||
10 | #include <linux/nmi.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/delay.h> | ||
13 | #include <linux/freezer.h> | ||
14 | #include <linux/kthread.h> | ||
15 | #include <linux/lockdep.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/sysctl.h> | ||
18 | |||
19 | /* | ||
20 | * Have a reasonable limit on the number of tasks checked: | ||
21 | */ | ||
22 | unsigned long __read_mostly sysctl_hung_task_check_count = 1024; | ||
23 | |||
24 | /* | ||
25 | * Zero means infinite timeout - no checking done: | ||
26 | */ | ||
27 | unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; | ||
28 | static unsigned long __read_mostly hung_task_poll_jiffies; | ||
29 | |||
30 | unsigned long __read_mostly sysctl_hung_task_warnings = 10; | ||
31 | |||
32 | static int __read_mostly did_panic; | ||
33 | |||
34 | static struct task_struct *watchdog_task; | ||
35 | |||
36 | /* | ||
37 | * Should we panic (and reboot, if panic_timeout= is set) when a | ||
38 | * hung task is detected: | ||
39 | */ | ||
40 | unsigned int __read_mostly sysctl_hung_task_panic = | ||
41 | CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; | ||
42 | |||
43 | static int __init hung_task_panic_setup(char *str) | ||
44 | { | ||
45 | sysctl_hung_task_panic = simple_strtoul(str, NULL, 0); | ||
46 | |||
47 | return 1; | ||
48 | } | ||
49 | __setup("hung_task_panic=", hung_task_panic_setup); | ||
50 | |||
51 | static int | ||
52 | hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) | ||
53 | { | ||
54 | did_panic = 1; | ||
55 | |||
56 | return NOTIFY_DONE; | ||
57 | } | ||
58 | |||
59 | static struct notifier_block panic_block = { | ||
60 | .notifier_call = hung_task_panic, | ||
61 | }; | ||
62 | |||
63 | /* | ||
64 | * Returns seconds, approximately. We don't need nanosecond | ||
65 | * resolution, and we don't need to waste time with a big divide when | ||
66 | * 2^30ns == 1.074s. | ||
67 | */ | ||
68 | static unsigned long get_timestamp(void) | ||
69 | { | ||
70 | int this_cpu = raw_smp_processor_id(); | ||
71 | |||
72 | return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ | ||
73 | } | ||
74 | |||
75 | static void check_hung_task(struct task_struct *t, unsigned long now) | ||
76 | { | ||
77 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
78 | |||
79 | if (t->flags & PF_FROZEN) | ||
80 | return; | ||
81 | |||
82 | if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { | ||
83 | t->last_switch_count = switch_count; | ||
84 | t->last_switch_timestamp = now; | ||
85 | return; | ||
86 | } | ||
87 | if ((long)(now - t->last_switch_timestamp) < | ||
88 | sysctl_hung_task_timeout_secs) | ||
89 | return; | ||
90 | if (!sysctl_hung_task_warnings) | ||
91 | return; | ||
92 | sysctl_hung_task_warnings--; | ||
93 | |||
94 | /* | ||
95 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
96 | * complain: | ||
97 | */ | ||
98 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
99 | "%ld seconds.\n", t->comm, t->pid, | ||
100 | sysctl_hung_task_timeout_secs); | ||
101 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
102 | " disables this message.\n"); | ||
103 | sched_show_task(t); | ||
104 | __debug_show_held_locks(t); | ||
105 | |||
106 | t->last_switch_timestamp = now; | ||
107 | touch_nmi_watchdog(); | ||
108 | |||
109 | if (sysctl_hung_task_panic) | ||
110 | panic("hung_task: blocked tasks"); | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
115 | * a really long time (120 seconds). If that happens, print out | ||
116 | * a warning. | ||
117 | */ | ||
118 | static void check_hung_uninterruptible_tasks(void) | ||
119 | { | ||
120 | int max_count = sysctl_hung_task_check_count; | ||
121 | unsigned long now = get_timestamp(); | ||
122 | struct task_struct *g, *t; | ||
123 | |||
124 | /* | ||
125 | * If the system crashed already then all bets are off, | ||
126 | * do not report extra hung tasks: | ||
127 | */ | ||
128 | if (test_taint(TAINT_DIE) || did_panic) | ||
129 | return; | ||
130 | |||
131 | read_lock(&tasklist_lock); | ||
132 | do_each_thread(g, t) { | ||
133 | if (!--max_count) | ||
134 | goto unlock; | ||
135 | /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ | ||
136 | if (t->state == TASK_UNINTERRUPTIBLE) | ||
137 | check_hung_task(t, now); | ||
138 | } while_each_thread(g, t); | ||
139 | unlock: | ||
140 | read_unlock(&tasklist_lock); | ||
141 | } | ||
142 | |||
143 | static void update_poll_jiffies(void) | ||
144 | { | ||
145 | /* timeout of 0 will disable the watchdog */ | ||
146 | if (sysctl_hung_task_timeout_secs == 0) | ||
147 | hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT; | ||
148 | else | ||
149 | hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Process updating of timeout sysctl | ||
154 | */ | ||
155 | int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, | ||
156 | struct file *filp, void __user *buffer, | ||
157 | size_t *lenp, loff_t *ppos) | ||
158 | { | ||
159 | int ret; | ||
160 | |||
161 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
162 | |||
163 | if (ret || !write) | ||
164 | goto out; | ||
165 | |||
166 | update_poll_jiffies(); | ||
167 | |||
168 | wake_up_process(watchdog_task); | ||
169 | |||
170 | out: | ||
171 | return ret; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * kthread which checks for tasks stuck in D state | ||
176 | */ | ||
177 | static int watchdog(void *dummy) | ||
178 | { | ||
179 | set_user_nice(current, 0); | ||
180 | update_poll_jiffies(); | ||
181 | |||
182 | for ( ; ; ) { | ||
183 | while (schedule_timeout_interruptible(hung_task_poll_jiffies)); | ||
184 | check_hung_uninterruptible_tasks(); | ||
185 | } | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static int __init hung_task_init(void) | ||
191 | { | ||
192 | atomic_notifier_chain_register(&panic_notifier_list, &panic_block); | ||
193 | watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); | ||
194 | |||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | module_init(hung_task_init); | ||