aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2013-09-24 18:27:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-24 20:00:25 -0400
commit9809b18fcf6b8d8ec4d3643677345907e6b50eca (patch)
tree0dfb9bf2f84f5a18736567d126cb9ed7478fe008 /kernel
parent359e6fab6600562073162348cd4c18c5958296d8 (diff)
watchdog: update watchdog_thresh properly
watchdog_tresh controls how often nmi perf event counter checks per-cpu hrtimer_interrupts counter and blows up if the counter hasn't changed since the last check. The counter is updated by per-cpu watchdog_hrtimer hrtimer which is scheduled with 2/5 watchdog_thresh period which guarantees that hrtimer is scheduled 2 times per the main period. Both hrtimer and perf event are started together when the watchdog is enabled. So far so good. But... But what happens when watchdog_thresh is updated from sysctl handler? proc_dowatchdog will set a new sampling period and hrtimer callback (watchdog_timer_fn) will use the new value in the next round. The problem, however, is that nobody tells the perf event that the sampling period has changed so it is ticking with the period configured when it has been set up. This might result in an ear ripping dissonance between perf and hrtimer parts if the watchdog_thresh is increased. And even worse it might lead to KABOOM if the watchdog is configured to panic on such a spurious lockup. This patch fixes the issue by updating both nmi perf even counter and hrtimers if the threshold value has changed. The nmi one is disabled and then reinitialized from scratch. This has an unpleasant side effect that the allocation of the new event might fail theoretically so the hard lockup detector would be disabled for such cpus. On the other hand such a memory allocation failure is very unlikely because the original event is deallocated right before. It would be much nicer if we just changed perf event period but there doesn't seem to be any API to do that right now. It is also unfortunate that perf_event_alloc uses GFP_KERNEL allocation unconditionally so we cannot use on_each_cpu() and do the same thing from the per-cpu context. The update from the current CPU should be safe because perf_event_disable removes the event atomically before it clears the per-cpu watchdog_ev so it cannot change anything under running handler feet. The hrtimer is simply restarted (thanks to Don Zickus who has pointed this out) if it is queued because we cannot rely it will fire&adopt to the new sampling period before a new nmi event triggers (when the treshold is decreased). [akpm@linux-foundation.org: the UP version of __smp_call_function_single ended up in the wrong place] Signed-off-by: Michal Hocko <mhocko@suse.cz> Acked-by: Don Zickus <dzickus@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@kernel.org> Cc: Fabio Estevam <festevam@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/watchdog.c53
1 files changed, 50 insertions, 3 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index ced7d0609931..4431610f049a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -486,7 +486,52 @@ static struct smp_hotplug_thread watchdog_threads = {
486 .unpark = watchdog_enable, 486 .unpark = watchdog_enable,
487}; 487};
488 488
489static int watchdog_enable_all_cpus(void) 489static void restart_watchdog_hrtimer(void *info)
490{
491 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
492 int ret;
493
494 /*
495 * No need to cancel and restart hrtimer if it is currently executing
496 * because it will reprogram itself with the new period now.
497 * We should never see it unqueued here because we are running per-cpu
498 * with interrupts disabled.
499 */
500 ret = hrtimer_try_to_cancel(hrtimer);
501 if (ret == 1)
502 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
503 HRTIMER_MODE_REL_PINNED);
504}
505
506static void update_timers(int cpu)
507{
508 struct call_single_data data = {.func = restart_watchdog_hrtimer};
509 /*
510 * Make sure that perf event counter will adopt to a new
511 * sampling period. Updating the sampling period directly would
512 * be much nicer but we do not have an API for that now so
513 * let's use a big hammer.
514 * Hrtimer will adopt the new period on the next tick but this
515 * might be late already so we have to restart the timer as well.
516 */
517 watchdog_nmi_disable(cpu);
518 __smp_call_function_single(cpu, &data, 1);
519 watchdog_nmi_enable(cpu);
520}
521
522static void update_timers_all_cpus(void)
523{
524 int cpu;
525
526 get_online_cpus();
527 preempt_disable();
528 for_each_online_cpu(cpu)
529 update_timers(cpu);
530 preempt_enable();
531 put_online_cpus();
532}
533
534static int watchdog_enable_all_cpus(bool sample_period_changed)
490{ 535{
491 int err = 0; 536 int err = 0;
492 537
@@ -496,6 +541,8 @@ static int watchdog_enable_all_cpus(void)
496 pr_err("Failed to create watchdog threads, disabled\n"); 541 pr_err("Failed to create watchdog threads, disabled\n");
497 else 542 else
498 watchdog_running = 1; 543 watchdog_running = 1;
544 } else if (sample_period_changed) {
545 update_timers_all_cpus();
499 } 546 }
500 547
501 return err; 548 return err;
@@ -537,7 +584,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,
537 * watchdog_*_all_cpus() function takes care of this. 584 * watchdog_*_all_cpus() function takes care of this.
538 */ 585 */
539 if (watchdog_user_enabled && watchdog_thresh) 586 if (watchdog_user_enabled && watchdog_thresh)
540 err = watchdog_enable_all_cpus(); 587 err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
541 else 588 else
542 watchdog_disable_all_cpus(); 589 watchdog_disable_all_cpus();
543 590
@@ -557,5 +604,5 @@ void __init lockup_detector_init(void)
557 set_sample_period(); 604 set_sample_period();
558 605
559 if (watchdog_user_enabled) 606 if (watchdog_user_enabled)
560 watchdog_enable_all_cpus(); 607 watchdog_enable_all_cpus(false);
561} 608}