aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/watchdog.c
diff options
context:
space:
mode:
authorUlrich Obergfell <uobergfe@redhat.com>2015-04-14 18:44:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-14 19:48:59 -0400
commitbcfba4f4bf3c9c7c72b459d52a9e826dfd72855e (patch)
tree8e439f7790d9e572a55c2c257fafabbaca865563 /kernel/watchdog.c
parent83a80a39075a9ded23df1e26a4b617c289077630 (diff)
watchdog: implement error handling for failure to set up hardware perf events
If watchdog_nmi_enable() fails to set up the hardware perf event of one CPU, the entire hard lockup detector is deemed unreliable. Hence, disable the hard lockup detector and shut down the hardware perf events on all CPUs. [dzickus@redhat.com: update comments to explain some code] Signed-off-by: Ulrich Obergfell <uobergfe@redhat.com> Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r--kernel/watchdog.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 26002ed4c16e..fd2b6dc14486 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -502,6 +502,21 @@ static void watchdog(unsigned int cpu)
502 __this_cpu_write(soft_lockup_hrtimer_cnt, 502 __this_cpu_write(soft_lockup_hrtimer_cnt,
503 __this_cpu_read(hrtimer_interrupts)); 503 __this_cpu_read(hrtimer_interrupts));
504 __touch_watchdog(); 504 __touch_watchdog();
505
506 /*
507 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
508 * failure path. Check for failures that can occur asynchronously -
509 * for example, when CPUs are on-lined - and shut down the hardware
510 * perf event on each CPU accordingly.
511 *
512 * The only non-obvious place this bit can be cleared is through
513 * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a
514 * pr_info here would be too noisy as it would result in a message
515 * every few seconds if the hardlockup was disabled but the softlockup
516 * enabled.
517 */
518 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
519 watchdog_nmi_disable(cpu);
505} 520}
506 521
507#ifdef CONFIG_HARDLOCKUP_DETECTOR 522#ifdef CONFIG_HARDLOCKUP_DETECTOR
@@ -552,6 +567,18 @@ handle_err:
552 goto out_save; 567 goto out_save;
553 } 568 }
554 569
570 /*
571 * Disable the hard lockup detector if _any_ CPU fails to set up
572 * set up the hardware perf event. The watchdog() function checks
573 * the NMI_WATCHDOG_ENABLED bit periodically.
574 *
575 * The barriers are for syncing up watchdog_enabled across all the
576 * cpus, as clear_bit() does not use barriers.
577 */
578 smp_mb__before_atomic();
579 clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
580 smp_mb__after_atomic();
581
555 /* skip displaying the same error again */ 582 /* skip displaying the same error again */
556 if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) 583 if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
557 return PTR_ERR(event); 584 return PTR_ERR(event);
@@ -565,6 +592,9 @@ handle_err:
565 else 592 else
566 pr_err("disabled (cpu%i): unable to create perf event: %ld\n", 593 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
567 cpu, PTR_ERR(event)); 594 cpu, PTR_ERR(event));
595
596 pr_info("Shutting down hard lockup detector on all cpus\n");
597
568 return PTR_ERR(event); 598 return PTR_ERR(event);
569 599
570 /* success path */ 600 /* success path */