summaryrefslogtreecommitdiffstats
path: root/kernel/watchdog_hld.c
diff options
context:
space:
mode:
authorDon Zickus <dzickus@redhat.com>2017-11-01 14:11:27 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-11-01 16:18:40 -0400
commit42f930da7f00c0ab23df4c7aed36137f35988980 (patch)
tree893725417db8d2f581994c6baa8cb1b2d2e79f72 /kernel/watchdog_hld.c
parent9c388a5ed1960b2ebbebd3dbe7553092b0c15ec1 (diff)
watchdog/hardlockup/perf: Use atomics to track in-use cpu counter
Guenter reported: There is still a problem. When running echo 6 > /proc/sys/kernel/watchdog_thresh echo 5 > /proc/sys/kernel/watchdog_thresh repeatedly, the message NMI watchdog: Enabled. Permanently consumes one hw-PMU counter. stops after a while (after ~10-30 iterations, with fluctuations). Maybe watchdog_cpus needs to be atomic ? That's correct as this again is affected by the asynchronous nature of the smpboot thread unpark mechanism. CPU 0 CPU1 CPU2 write(watchdog_thresh, 6) stop() park() update() start() unpark() thread->unpark() cnt++; write(watchdog_thresh, 5) thread->unpark() stop() park() thread->park() cnt--; cnt++; update() start() unpark() That's not a functional problem, it just affects the informational message. Convert watchdog_cpus to atomic_t to prevent the problem Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Don Zickus <dzickus@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lkml.kernel.org/r/20171101181126.j727fqjmdthjz4xk@redhat.com
Diffstat (limited to 'kernel/watchdog_hld.c')
-rw-r--r--kernel/watchdog_hld.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index a7f137c1933a..a84b205fac9a 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -12,6 +12,7 @@
12#define pr_fmt(fmt) "NMI watchdog: " fmt 12#define pr_fmt(fmt) "NMI watchdog: " fmt
13 13
14#include <linux/nmi.h> 14#include <linux/nmi.h>
15#include <linux/atomic.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <linux/sched/debug.h> 17#include <linux/sched/debug.h>
17 18
@@ -25,7 +26,7 @@ static DEFINE_PER_CPU(struct perf_event *, dead_event);
25static struct cpumask dead_events_mask; 26static struct cpumask dead_events_mask;
26 27
27static unsigned long hardlockup_allcpu_dumped; 28static unsigned long hardlockup_allcpu_dumped;
28static unsigned int watchdog_cpus; 29static atomic_t watchdog_cpus = ATOMIC_INIT(0);
29 30
30void arch_touch_nmi_watchdog(void) 31void arch_touch_nmi_watchdog(void)
31{ 32{
@@ -189,7 +190,8 @@ void hardlockup_detector_perf_enable(void)
189 if (hardlockup_detector_event_create()) 190 if (hardlockup_detector_event_create())
190 return; 191 return;
191 192
192 if (!watchdog_cpus++) 193 /* use original value for check */
194 if (!atomic_fetch_inc(&watchdog_cpus))
193 pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); 195 pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
194 196
195 perf_event_enable(this_cpu_read(watchdog_ev)); 197 perf_event_enable(this_cpu_read(watchdog_ev));
@@ -207,7 +209,7 @@ void hardlockup_detector_perf_disable(void)
207 this_cpu_write(watchdog_ev, NULL); 209 this_cpu_write(watchdog_ev, NULL);
208 this_cpu_write(dead_event, event); 210 this_cpu_write(dead_event, event);
209 cpumask_set_cpu(smp_processor_id(), &dead_events_mask); 211 cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
210 watchdog_cpus--; 212 atomic_dec(&watchdog_cpus);
211 } 213 }
212} 214}
213 215