diff options
author | Mandeep Singh Baines <msb@google.com> | 2009-01-15 14:08:40 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-16 08:06:04 -0500 |
commit | e162b39a368f0401e41b558f430c354d12a85b37 (patch) | |
tree | 3fb7e4d48f398d62e5074e7e3dd183cc54f59820 /kernel/softlockup.c | |
parent | c903ff837909ccada51243307d4239f86af40179 (diff) |
softlockup: decouple hung tasks check from softlockup detection
Decoupling allows:
* hung tasks check to happen at very low priority
* hung tasks check and softlockup to be enabled/disabled independently
at compile and/or run-time
* individual panic settings to be enabled disabled independently
at compile and/or run-time
* softlockup threshold to be reduced without increasing hung tasks
poll frequency (hung task check is expensive relative to softlock watchdog)
* hung task check to be zero over-head when disabled at run-time
Signed-off-by: Mandeep Singh Baines <msb@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/softlockup.c')
-rw-r--r-- | kernel/softlockup.c | 100 |
1 files changed, 0 insertions, 100 deletions
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 85d5a2455103..88796c330838 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -166,97 +166,11 @@ void softlockup_tick(void) | |||
166 | } | 166 | } |
167 | 167 | ||
168 | /* | 168 | /* |
169 | * Have a reasonable limit on the number of tasks checked: | ||
170 | */ | ||
171 | unsigned long __read_mostly sysctl_hung_task_check_count = 1024; | ||
172 | |||
173 | /* | ||
174 | * Zero means infinite timeout - no checking done: | ||
175 | */ | ||
176 | unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; | ||
177 | |||
178 | unsigned long __read_mostly sysctl_hung_task_warnings = 10; | ||
179 | |||
180 | /* | ||
181 | * Only do the hung-tasks check on one CPU: | ||
182 | */ | ||
183 | static int check_cpu __read_mostly = -1; | ||
184 | |||
185 | static void check_hung_task(struct task_struct *t, unsigned long now) | ||
186 | { | ||
187 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
188 | |||
189 | if (t->flags & PF_FROZEN) | ||
190 | return; | ||
191 | |||
192 | if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { | ||
193 | t->last_switch_count = switch_count; | ||
194 | t->last_switch_timestamp = now; | ||
195 | return; | ||
196 | } | ||
197 | if ((long)(now - t->last_switch_timestamp) < | ||
198 | sysctl_hung_task_timeout_secs) | ||
199 | return; | ||
200 | if (!sysctl_hung_task_warnings) | ||
201 | return; | ||
202 | sysctl_hung_task_warnings--; | ||
203 | |||
204 | /* | ||
205 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
206 | * complain: | ||
207 | */ | ||
208 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
209 | "%ld seconds.\n", t->comm, t->pid, | ||
210 | sysctl_hung_task_timeout_secs); | ||
211 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
212 | " disables this message.\n"); | ||
213 | sched_show_task(t); | ||
214 | __debug_show_held_locks(t); | ||
215 | |||
216 | t->last_switch_timestamp = now; | ||
217 | touch_nmi_watchdog(); | ||
218 | |||
219 | if (softlockup_panic) | ||
220 | panic("softlockup: blocked tasks"); | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
225 | * a really long time (120 seconds). If that happens, print out | ||
226 | * a warning. | ||
227 | */ | ||
228 | static void check_hung_uninterruptible_tasks(int this_cpu) | ||
229 | { | ||
230 | int max_count = sysctl_hung_task_check_count; | ||
231 | unsigned long now = get_timestamp(this_cpu); | ||
232 | struct task_struct *g, *t; | ||
233 | |||
234 | /* | ||
235 | * If the system crashed already then all bets are off, | ||
236 | * do not report extra hung tasks: | ||
237 | */ | ||
238 | if (test_taint(TAINT_DIE) || did_panic) | ||
239 | return; | ||
240 | |||
241 | read_lock(&tasklist_lock); | ||
242 | do_each_thread(g, t) { | ||
243 | if (!--max_count) | ||
244 | goto unlock; | ||
245 | /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ | ||
246 | if (t->state == TASK_UNINTERRUPTIBLE) | ||
247 | check_hung_task(t, now); | ||
248 | } while_each_thread(g, t); | ||
249 | unlock: | ||
250 | read_unlock(&tasklist_lock); | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * The watchdog thread - runs every second and touches the timestamp. | 169 | * The watchdog thread - runs every second and touches the timestamp. |
255 | */ | 170 | */ |
256 | static int watchdog(void *__bind_cpu) | 171 | static int watchdog(void *__bind_cpu) |
257 | { | 172 | { |
258 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 173 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
259 | int this_cpu = (long)__bind_cpu; | ||
260 | 174 | ||
261 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 175 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
262 | 176 | ||
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu) | |||
276 | if (kthread_should_stop()) | 190 | if (kthread_should_stop()) |
277 | break; | 191 | break; |
278 | 192 | ||
279 | if (this_cpu == check_cpu) { | ||
280 | if (sysctl_hung_task_timeout_secs) | ||
281 | check_hung_uninterruptible_tasks(this_cpu); | ||
282 | } | ||
283 | |||
284 | set_current_state(TASK_INTERRUPTIBLE); | 193 | set_current_state(TASK_INTERRUPTIBLE); |
285 | } | 194 | } |
286 | __set_current_state(TASK_RUNNING); | 195 | __set_current_state(TASK_RUNNING); |
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
312 | break; | 221 | break; |
313 | case CPU_ONLINE: | 222 | case CPU_ONLINE: |
314 | case CPU_ONLINE_FROZEN: | 223 | case CPU_ONLINE_FROZEN: |
315 | check_cpu = cpumask_any(cpu_online_mask); | ||
316 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | 224 | wake_up_process(per_cpu(watchdog_task, hotcpu)); |
317 | break; | 225 | break; |
318 | #ifdef CONFIG_HOTPLUG_CPU | 226 | #ifdef CONFIG_HOTPLUG_CPU |
319 | case CPU_DOWN_PREPARE: | ||
320 | case CPU_DOWN_PREPARE_FROZEN: | ||
321 | if (hotcpu == check_cpu) { | ||
322 | /* Pick any other online cpu. */ | ||
323 | check_cpu = cpumask_any_but(cpu_online_mask, hotcpu); | ||
324 | } | ||
325 | break; | ||
326 | |||
327 | case CPU_UP_CANCELED: | 227 | case CPU_UP_CANCELED: |
328 | case CPU_UP_CANCELED_FROZEN: | 228 | case CPU_UP_CANCELED_FROZEN: |
329 | if (!per_cpu(watchdog_task, hotcpu)) | 229 | if (!per_cpu(watchdog_task, hotcpu)) |