aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2012-03-23 18:01:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-23 19:58:32 -0400
commit7a05c0f7bbae91d08b7d0acf016fdb42dbc912ae (patch)
tree0fca719baf9c60f5659a28bc977f975acd2ec107 /kernel
parent397a21f24d455982a8a6f9bc11b5f3326ce3c6ef (diff)
watchdog: make sure the watchdog thread gets CPU on loaded system
If the system is loaded while hotplugging a CPU we might end up with a bogus hardlockup detection. This has been seen during LTP pounder test executed in parallel with hotplug test. The main problem is that enable_watchdog (called when CPU is brought up) registers perf event which periodically checks per-cpu counter (hrtimer_interrupts), updated from a hrtimer callback, but the hrtimer is fired from the kernel thread. This means that while we already do check for the hard lockup the kernel thread might be sitting on the runqueue with zillions of tasks so there is nobody to update the value we rely on and so we KABOOM. Let's fix this by boosting the watchdog thread priority before we wake it up rather than when it's already running. This still doesn't handle a case where we have the same amount of high prio FIFO tasks but that doesn't seem to be common. The current implementation doesn't handle that case anyway so this is not worse at least. Unfortunately, we cannot start perf counter from the watchdog thread because we could miss a real lock up and also we cannot start the hrtimer watchdog_enable because we there is no way (at least I don't know any) to start a hrtimer from a different CPU. [dzickus@redhat.com: fix compile issue with param] Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Reviewed-by: Mandeep Singh Baines <msb@chromium.org> Signed-off-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Don Zickus <dzickus@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/watchdog.c7
1 files changed, 3 insertions, 4 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 14bc092fb12c..203fc6e1a285 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -319,11 +319,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
319 */ 319 */
320static int watchdog(void *unused) 320static int watchdog(void *unused)
321{ 321{
322 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 322 struct sched_param param = { .sched_priority = 0 };
323 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 323 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
324 324
325 sched_setscheduler(current, SCHED_FIFO, &param);
326
327 /* initialize timestamp */ 325 /* initialize timestamp */
328 __touch_watchdog(); 326 __touch_watchdog();
329 327
@@ -350,7 +348,6 @@ static int watchdog(void *unused)
350 set_current_state(TASK_INTERRUPTIBLE); 348 set_current_state(TASK_INTERRUPTIBLE);
351 } 349 }
352 __set_current_state(TASK_RUNNING); 350 __set_current_state(TASK_RUNNING);
353 param.sched_priority = 0;
354 sched_setscheduler(current, SCHED_NORMAL, &param); 351 sched_setscheduler(current, SCHED_NORMAL, &param);
355 return 0; 352 return 0;
356} 353}
@@ -439,6 +436,7 @@ static int watchdog_enable(int cpu)
439 436
440 /* create the watchdog thread */ 437 /* create the watchdog thread */
441 if (!p) { 438 if (!p) {
439 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
442 p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); 440 p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
443 if (IS_ERR(p)) { 441 if (IS_ERR(p)) {
444 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); 442 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
@@ -450,6 +448,7 @@ static int watchdog_enable(int cpu)
450 } 448 }
451 goto out; 449 goto out;
452 } 450 }
451 sched_setscheduler(p, SCHED_FIFO, &param);
453 kthread_bind(p, cpu); 452 kthread_bind(p, cpu);
454 per_cpu(watchdog_touch_ts, cpu) = 0; 453 per_cpu(watchdog_touch_ts, cpu) = 0;
455 per_cpu(softlockup_watchdog, cpu) = p; 454 per_cpu(softlockup_watchdog, cpu) = p;