summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/watchdog.c15
-rw-r--r--kernel/watchdog_hld.c20
2 files changed, 7 insertions, 28 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1c185d9dd468..af000956286c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -485,21 +485,6 @@ static void watchdog(unsigned int cpu)
485 __this_cpu_write(soft_lockup_hrtimer_cnt, 485 __this_cpu_write(soft_lockup_hrtimer_cnt,
486 __this_cpu_read(hrtimer_interrupts)); 486 __this_cpu_read(hrtimer_interrupts));
487 __touch_watchdog(); 487 __touch_watchdog();
488
489 /*
490 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
491 * failure path. Check for failures that can occur asynchronously -
492 * for example, when CPUs are on-lined - and shut down the hardware
493 * perf event on each CPU accordingly.
494 *
495 * The only non-obvious place this bit can be cleared is through
496 * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a
497 * pr_info here would be too noisy as it would result in a message
498 * every few seconds if the hardlockup was disabled but the softlockup
499 * enabled.
500 */
501 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
502 watchdog_nmi_disable(cpu);
503} 488}
504 489
505static struct smp_hotplug_thread watchdog_threads = { 490static struct smp_hotplug_thread watchdog_threads = {
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index c9586ebc2e98..7b602714ea53 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -23,6 +23,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
24 24
25static unsigned long hardlockup_allcpu_dumped; 25static unsigned long hardlockup_allcpu_dumped;
26static bool hardlockup_detector_disabled;
26 27
27void arch_touch_nmi_watchdog(void) 28void arch_touch_nmi_watchdog(void)
28{ 29{
@@ -178,6 +179,10 @@ int watchdog_nmi_enable(unsigned int cpu)
178 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) 179 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
179 goto out; 180 goto out;
180 181
182 /* A failure disabled the hardlockup detector permanently */
183 if (hardlockup_detector_disabled)
184 return -ENODEV;
185
181 /* is it already setup and enabled? */ 186 /* is it already setup and enabled? */
182 if (event && event->state > PERF_EVENT_STATE_OFF) 187 if (event && event->state > PERF_EVENT_STATE_OFF)
183 goto out; 188 goto out;
@@ -206,18 +211,6 @@ int watchdog_nmi_enable(unsigned int cpu)
206 goto out_save; 211 goto out_save;
207 } 212 }
208 213
209 /*
210 * Disable the hard lockup detector if _any_ CPU fails to set up
211 * set up the hardware perf event. The watchdog() function checks
212 * the NMI_WATCHDOG_ENABLED bit periodically.
213 *
214 * The barriers are for syncing up watchdog_enabled across all the
215 * cpus, as clear_bit() does not use barriers.
216 */
217 smp_mb__before_atomic();
218 clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
219 smp_mb__after_atomic();
220
221 /* skip displaying the same error again */ 214 /* skip displaying the same error again */
222 if (!firstcpu && (PTR_ERR(event) == firstcpu_err)) 215 if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
223 return PTR_ERR(event); 216 return PTR_ERR(event);
@@ -232,7 +225,8 @@ int watchdog_nmi_enable(unsigned int cpu)
232 pr_err("disabled (cpu%i): unable to create perf event: %ld\n", 225 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
233 cpu, PTR_ERR(event)); 226 cpu, PTR_ERR(event));
234 227
235 pr_info("Shutting down hard lockup detector on all cpus\n"); 228 pr_info("Disabling hard lockup detector permanently\n");
229 hardlockup_detector_disabled = true;
236 230
237 return PTR_ERR(event); 231 return PTR_ERR(event);
238 232