aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/parisc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/watchdog.c30
-rw-r--r--arch/x86/events/intel/core.c11
-rw-r--r--include/linux/nmi.h121
-rw-r--r--include/linux/smpboot.h4
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/smpboot.c25
-rw-r--r--kernel/sysctl.c22
-rw-r--r--kernel/watchdog.c643
-rw-r--r--kernel/watchdog_hld.c196
10 files changed, 456 insertions, 604 deletions
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index a45a67d526f8..30f92391a93e 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -146,7 +146,7 @@ void machine_power_off(void)
146 146
147 /* prevent soft lockup/stalled CPU messages for endless loop. */ 147 /* prevent soft lockup/stalled CPU messages for endless loop. */
148 rcu_sysrq_start(); 148 rcu_sysrq_start();
149 lockup_detector_suspend(); 149 lockup_detector_soft_poweroff();
150 for (;;); 150 for (;;);
151} 151}
152 152
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 2f6eadd9408d..c702a8981452 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -310,9 +310,6 @@ static int start_wd_on_cpu(unsigned int cpu)
310 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) 310 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
311 return 0; 311 return 0;
312 312
313 if (watchdog_suspended)
314 return 0;
315
316 if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) 313 if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
317 return 0; 314 return 0;
318 315
@@ -358,36 +355,39 @@ static void watchdog_calc_timeouts(void)
358 wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; 355 wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
359} 356}
360 357
361void watchdog_nmi_reconfigure(void) 358void watchdog_nmi_stop(void)
362{ 359{
363 int cpu; 360 int cpu;
364 361
365 watchdog_calc_timeouts();
366
367 for_each_cpu(cpu, &wd_cpus_enabled) 362 for_each_cpu(cpu, &wd_cpus_enabled)
368 stop_wd_on_cpu(cpu); 363 stop_wd_on_cpu(cpu);
364}
369 365
366void watchdog_nmi_start(void)
367{
368 int cpu;
369
370 watchdog_calc_timeouts();
370 for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) 371 for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
371 start_wd_on_cpu(cpu); 372 start_wd_on_cpu(cpu);
372} 373}
373 374
374/* 375/*
375 * This runs after lockup_detector_init() which sets up watchdog_cpumask. 376 * Invoked from core watchdog init.
376 */ 377 */
377static int __init powerpc_watchdog_init(void) 378int __init watchdog_nmi_probe(void)
378{ 379{
379 int err; 380 int err;
380 381
381 watchdog_calc_timeouts(); 382 err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
382 383 "powerpc/watchdog:online",
383 err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", 384 start_wd_on_cpu, stop_wd_on_cpu);
384 start_wd_on_cpu, stop_wd_on_cpu); 385 if (err < 0) {
385 if (err < 0)
386 pr_warn("Watchdog could not be initialized"); 386 pr_warn("Watchdog could not be initialized");
387 387 return err;
388 }
388 return 0; 389 return 0;
389} 390}
390arch_initcall(powerpc_watchdog_init);
391 391
392static void handle_backtrace_ipi(struct pt_regs *regs) 392static void handle_backtrace_ipi(struct pt_regs *regs)
393{ 393{
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 829e89cfcee2..9fb9a1f1e47b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void)
4409 return 0; 4409 return 0;
4410 } 4410 }
4411 4411
4412 if (lockup_detector_suspend() != 0) { 4412 cpus_read_lock();
4413 pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n"); 4413
4414 return 0; 4414 hardlockup_detector_perf_stop();
4415 }
4416 4415
4417 x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); 4416 x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
4418 4417
@@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void)
4420 x86_pmu.commit_scheduling = NULL; 4419 x86_pmu.commit_scheduling = NULL;
4421 x86_pmu.stop_scheduling = NULL; 4420 x86_pmu.stop_scheduling = NULL;
4422 4421
4423 lockup_detector_resume(); 4422 hardlockup_detector_perf_restart();
4424
4425 cpus_read_lock();
4426 4423
4427 for_each_online_cpu(c) 4424 for_each_online_cpu(c)
4428 free_excl_cntrs(c); 4425 free_excl_cntrs(c);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a36abe2da13e..27e249ed7c5c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -12,11 +12,31 @@
12 12
13#ifdef CONFIG_LOCKUP_DETECTOR 13#ifdef CONFIG_LOCKUP_DETECTOR
14void lockup_detector_init(void); 14void lockup_detector_init(void);
15void lockup_detector_soft_poweroff(void);
16void lockup_detector_cleanup(void);
17bool is_hardlockup(void);
18
19extern int watchdog_user_enabled;
20extern int nmi_watchdog_user_enabled;
21extern int soft_watchdog_user_enabled;
22extern int watchdog_thresh;
23extern unsigned long watchdog_enabled;
24
25extern struct cpumask watchdog_cpumask;
26extern unsigned long *watchdog_cpumask_bits;
27#ifdef CONFIG_SMP
28extern int sysctl_softlockup_all_cpu_backtrace;
29extern int sysctl_hardlockup_all_cpu_backtrace;
15#else 30#else
16static inline void lockup_detector_init(void) 31#define sysctl_softlockup_all_cpu_backtrace 0
17{ 32#define sysctl_hardlockup_all_cpu_backtrace 0
18} 33#endif /* !CONFIG_SMP */
19#endif 34
35#else /* CONFIG_LOCKUP_DETECTOR */
36static inline void lockup_detector_init(void) { }
37static inline void lockup_detector_soft_poweroff(void) { }
38static inline void lockup_detector_cleanup(void) { }
39#endif /* !CONFIG_LOCKUP_DETECTOR */
20 40
21#ifdef CONFIG_SOFTLOCKUP_DETECTOR 41#ifdef CONFIG_SOFTLOCKUP_DETECTOR
22extern void touch_softlockup_watchdog_sched(void); 42extern void touch_softlockup_watchdog_sched(void);
@@ -24,29 +44,17 @@ extern void touch_softlockup_watchdog(void);
24extern void touch_softlockup_watchdog_sync(void); 44extern void touch_softlockup_watchdog_sync(void);
25extern void touch_all_softlockup_watchdogs(void); 45extern void touch_all_softlockup_watchdogs(void);
26extern unsigned int softlockup_panic; 46extern unsigned int softlockup_panic;
27extern int soft_watchdog_enabled;
28extern atomic_t watchdog_park_in_progress;
29#else 47#else
30static inline void touch_softlockup_watchdog_sched(void) 48static inline void touch_softlockup_watchdog_sched(void) { }
31{ 49static inline void touch_softlockup_watchdog(void) { }
32} 50static inline void touch_softlockup_watchdog_sync(void) { }
33static inline void touch_softlockup_watchdog(void) 51static inline void touch_all_softlockup_watchdogs(void) { }
34{
35}
36static inline void touch_softlockup_watchdog_sync(void)
37{
38}
39static inline void touch_all_softlockup_watchdogs(void)
40{
41}
42#endif 52#endif
43 53
44#ifdef CONFIG_DETECT_HUNG_TASK 54#ifdef CONFIG_DETECT_HUNG_TASK
45void reset_hung_task_detector(void); 55void reset_hung_task_detector(void);
46#else 56#else
47static inline void reset_hung_task_detector(void) 57static inline void reset_hung_task_detector(void) { }
48{
49}
50#endif 58#endif
51 59
52/* 60/*
@@ -54,12 +62,12 @@ static inline void reset_hung_task_detector(void)
54 * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - 62 * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
55 * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. 63 * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
56 * 64 *
57 * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' 65 * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and
58 * are variables that are only used as an 'interface' between the parameters 66 * 'soft_watchdog_user_enabled' are variables that are only used as an
59 * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The 67 * 'interface' between the parameters in /proc/sys/kernel and the internal
60 * 'watchdog_thresh' variable is handled differently because its value is not 68 * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is
61 * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' 69 * handled differently because its value is not boolean, and the lockup
62 * is equal zero. 70 * detectors are 'suspended' while 'watchdog_thresh' is equal zero.
63 */ 71 */
64#define NMI_WATCHDOG_ENABLED_BIT 0 72#define NMI_WATCHDOG_ENABLED_BIT 0
65#define SOFT_WATCHDOG_ENABLED_BIT 1 73#define SOFT_WATCHDOG_ENABLED_BIT 1
@@ -73,17 +81,41 @@ extern unsigned int hardlockup_panic;
73static inline void hardlockup_detector_disable(void) {} 81static inline void hardlockup_detector_disable(void) {}
74#endif 82#endif
75 83
84#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
85# define NMI_WATCHDOG_SYSCTL_PERM 0644
86#else
87# define NMI_WATCHDOG_SYSCTL_PERM 0444
88#endif
89
76#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) 90#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
77extern void arch_touch_nmi_watchdog(void); 91extern void arch_touch_nmi_watchdog(void);
92extern void hardlockup_detector_perf_stop(void);
93extern void hardlockup_detector_perf_restart(void);
94extern void hardlockup_detector_perf_disable(void);
95extern void hardlockup_detector_perf_enable(void);
96extern void hardlockup_detector_perf_cleanup(void);
97extern int hardlockup_detector_perf_init(void);
78#else 98#else
79#if !defined(CONFIG_HAVE_NMI_WATCHDOG) 99static inline void hardlockup_detector_perf_stop(void) { }
100static inline void hardlockup_detector_perf_restart(void) { }
101static inline void hardlockup_detector_perf_disable(void) { }
102static inline void hardlockup_detector_perf_enable(void) { }
103static inline void hardlockup_detector_perf_cleanup(void) { }
104# if !defined(CONFIG_HAVE_NMI_WATCHDOG)
105static inline int hardlockup_detector_perf_init(void) { return -ENODEV; }
80static inline void arch_touch_nmi_watchdog(void) {} 106static inline void arch_touch_nmi_watchdog(void) {}
107# else
108static inline int hardlockup_detector_perf_init(void) { return 0; }
109# endif
81#endif 110#endif
82#endif 111
112void watchdog_nmi_stop(void);
113void watchdog_nmi_start(void);
114int watchdog_nmi_probe(void);
83 115
84/** 116/**
85 * touch_nmi_watchdog - restart NMI watchdog timeout. 117 * touch_nmi_watchdog - restart NMI watchdog timeout.
86 * 118 *
87 * If the architecture supports the NMI watchdog, touch_nmi_watchdog() 119 * If the architecture supports the NMI watchdog, touch_nmi_watchdog()
88 * may be used to reset the timeout - for code which intentionally 120 * may be used to reset the timeout - for code which intentionally
89 * disables interrupts for a long time. This call is stateless. 121 * disables interrupts for a long time. This call is stateless.
@@ -153,22 +185,6 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
153u64 hw_nmi_get_sample_period(int watchdog_thresh); 185u64 hw_nmi_get_sample_period(int watchdog_thresh);
154#endif 186#endif
155 187
156#ifdef CONFIG_LOCKUP_DETECTOR
157extern int nmi_watchdog_enabled;
158extern int watchdog_user_enabled;
159extern int watchdog_thresh;
160extern unsigned long watchdog_enabled;
161extern struct cpumask watchdog_cpumask;
162extern unsigned long *watchdog_cpumask_bits;
163extern int __read_mostly watchdog_suspended;
164#ifdef CONFIG_SMP
165extern int sysctl_softlockup_all_cpu_backtrace;
166extern int sysctl_hardlockup_all_cpu_backtrace;
167#else
168#define sysctl_softlockup_all_cpu_backtrace 0
169#define sysctl_hardlockup_all_cpu_backtrace 0
170#endif
171
172#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ 188#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
173 defined(CONFIG_HARDLOCKUP_DETECTOR) 189 defined(CONFIG_HARDLOCKUP_DETECTOR)
174void watchdog_update_hrtimer_threshold(u64 period); 190void watchdog_update_hrtimer_threshold(u64 period);
@@ -176,7 +192,6 @@ void watchdog_update_hrtimer_threshold(u64 period);
176static inline void watchdog_update_hrtimer_threshold(u64 period) { } 192static inline void watchdog_update_hrtimer_threshold(u64 period) { }
177#endif 193#endif
178 194
179extern bool is_hardlockup(void);
180struct ctl_table; 195struct ctl_table;
181extern int proc_watchdog(struct ctl_table *, int , 196extern int proc_watchdog(struct ctl_table *, int ,
182 void __user *, size_t *, loff_t *); 197 void __user *, size_t *, loff_t *);
@@ -188,18 +203,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int ,
188 void __user *, size_t *, loff_t *); 203 void __user *, size_t *, loff_t *);
189extern int proc_watchdog_cpumask(struct ctl_table *, int, 204extern int proc_watchdog_cpumask(struct ctl_table *, int,
190 void __user *, size_t *, loff_t *); 205 void __user *, size_t *, loff_t *);
191extern int lockup_detector_suspend(void);
192extern void lockup_detector_resume(void);
193#else
194static inline int lockup_detector_suspend(void)
195{
196 return 0;
197}
198
199static inline void lockup_detector_resume(void)
200{
201}
202#endif
203 206
204#ifdef CONFIG_HAVE_ACPI_APEI_NMI 207#ifdef CONFIG_HAVE_ACPI_APEI_NMI
205#include <asm/nmi.h> 208#include <asm/nmi.h>
diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index 12910cf19869..c149aa7bedf3 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -55,7 +55,7 @@ smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
55} 55}
56 56
57void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); 57void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
58int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, 58void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
59 const struct cpumask *); 59 const struct cpumask *);
60 60
61#endif 61#endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8de11a29e495..d851df22f5c5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -24,6 +24,7 @@
24#include <linux/lockdep.h> 24#include <linux/lockdep.h>
25#include <linux/tick.h> 25#include <linux/tick.h>
26#include <linux/irq.h> 26#include <linux/irq.h>
27#include <linux/nmi.h>
27#include <linux/smpboot.h> 28#include <linux/smpboot.h>
28#include <linux/relay.h> 29#include <linux/relay.h>
29#include <linux/slab.h> 30#include <linux/slab.h>
@@ -897,6 +898,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
897 898
898out: 899out:
899 cpus_write_unlock(); 900 cpus_write_unlock();
901 /*
902 * Do post unplug cleanup. This is still protected against
903 * concurrent CPU hotplug via cpu_add_remove_lock.
904 */
905 lockup_detector_cleanup();
900 return ret; 906 return ret;
901} 907}
902 908
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 1d71c051a951..5043e7433f4b 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -344,39 +344,30 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
344 * by the client, but only by calling this function. 344 * by the client, but only by calling this function.
345 * This function can only be called on a registered smp_hotplug_thread. 345 * This function can only be called on a registered smp_hotplug_thread.
346 */ 346 */
347int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, 347void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
348 const struct cpumask *new) 348 const struct cpumask *new)
349{ 349{
350 struct cpumask *old = plug_thread->cpumask; 350 struct cpumask *old = plug_thread->cpumask;
351 cpumask_var_t tmp; 351 static struct cpumask tmp;
352 unsigned int cpu; 352 unsigned int cpu;
353 353
354 if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) 354 lockdep_assert_cpus_held();
355 return -ENOMEM;
356
357 get_online_cpus();
358 mutex_lock(&smpboot_threads_lock); 355 mutex_lock(&smpboot_threads_lock);
359 356
360 /* Park threads that were exclusively enabled on the old mask. */ 357 /* Park threads that were exclusively enabled on the old mask. */
361 cpumask_andnot(tmp, old, new); 358 cpumask_andnot(&tmp, old, new);
362 for_each_cpu_and(cpu, tmp, cpu_online_mask) 359 for_each_cpu_and(cpu, &tmp, cpu_online_mask)
363 smpboot_park_thread(plug_thread, cpu); 360 smpboot_park_thread(plug_thread, cpu);
364 361
365 /* Unpark threads that are exclusively enabled on the new mask. */ 362 /* Unpark threads that are exclusively enabled on the new mask. */
366 cpumask_andnot(tmp, new, old); 363 cpumask_andnot(&tmp, new, old);
367 for_each_cpu_and(cpu, tmp, cpu_online_mask) 364 for_each_cpu_and(cpu, &tmp, cpu_online_mask)
368 smpboot_unpark_thread(plug_thread, cpu); 365 smpboot_unpark_thread(plug_thread, cpu);
369 366
370 cpumask_copy(old, new); 367 cpumask_copy(old, new);
371 368
372 mutex_unlock(&smpboot_threads_lock); 369 mutex_unlock(&smpboot_threads_lock);
373 put_online_cpus();
374
375 free_cpumask_var(tmp);
376
377 return 0;
378} 370}
379EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread);
380 371
381static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); 372static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
382 373
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4da9e622471f..d9c31bc2eaea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -872,9 +872,9 @@ static struct ctl_table kern_table[] = {
872#if defined(CONFIG_LOCKUP_DETECTOR) 872#if defined(CONFIG_LOCKUP_DETECTOR)
873 { 873 {
874 .procname = "watchdog", 874 .procname = "watchdog",
875 .data = &watchdog_user_enabled, 875 .data = &watchdog_user_enabled,
876 .maxlen = sizeof (int), 876 .maxlen = sizeof(int),
877 .mode = 0644, 877 .mode = 0644,
878 .proc_handler = proc_watchdog, 878 .proc_handler = proc_watchdog,
879 .extra1 = &zero, 879 .extra1 = &zero,
880 .extra2 = &one, 880 .extra2 = &one,
@@ -890,16 +890,12 @@ static struct ctl_table kern_table[] = {
890 }, 890 },
891 { 891 {
892 .procname = "nmi_watchdog", 892 .procname = "nmi_watchdog",
893 .data = &nmi_watchdog_enabled, 893 .data = &nmi_watchdog_user_enabled,
894 .maxlen = sizeof (int), 894 .maxlen = sizeof(int),
895 .mode = 0644, 895 .mode = NMI_WATCHDOG_SYSCTL_PERM,
896 .proc_handler = proc_nmi_watchdog, 896 .proc_handler = proc_nmi_watchdog,
897 .extra1 = &zero, 897 .extra1 = &zero,
898#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
899 .extra2 = &one, 898 .extra2 = &one,
900#else
901 .extra2 = &zero,
902#endif
903 }, 899 },
904 { 900 {
905 .procname = "watchdog_cpumask", 901 .procname = "watchdog_cpumask",
@@ -911,9 +907,9 @@ static struct ctl_table kern_table[] = {
911#ifdef CONFIG_SOFTLOCKUP_DETECTOR 907#ifdef CONFIG_SOFTLOCKUP_DETECTOR
912 { 908 {
913 .procname = "soft_watchdog", 909 .procname = "soft_watchdog",
914 .data = &soft_watchdog_enabled, 910 .data = &soft_watchdog_user_enabled,
915 .maxlen = sizeof (int), 911 .maxlen = sizeof(int),
916 .mode = 0644, 912 .mode = 0644,
917 .proc_handler = proc_soft_watchdog, 913 .proc_handler = proc_soft_watchdog,
918 .extra1 = &zero, 914 .extra1 = &zero,
919 .extra2 = &one, 915 .extra2 = &one,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f5d52024f6b7..6bcb854909c0 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,20 +29,29 @@
29#include <linux/kvm_para.h> 29#include <linux/kvm_para.h>
30#include <linux/kthread.h> 30#include <linux/kthread.h>
31 31
32/* Watchdog configuration */ 32static DEFINE_MUTEX(watchdog_mutex);
33static DEFINE_MUTEX(watchdog_proc_mutex);
34
35int __read_mostly nmi_watchdog_enabled;
36 33
37#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) 34#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
38unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | 35# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
39 NMI_WATCHDOG_ENABLED; 36# define NMI_WATCHDOG_DEFAULT 1
40#else 37#else
41unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; 38# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
39# define NMI_WATCHDOG_DEFAULT 0
42#endif 40#endif
43 41
42unsigned long __read_mostly watchdog_enabled;
43int __read_mostly watchdog_user_enabled = 1;
44int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
45int __read_mostly soft_watchdog_user_enabled = 1;
46int __read_mostly watchdog_thresh = 10;
47int __read_mostly nmi_watchdog_available;
48
49struct cpumask watchdog_allowed_mask __read_mostly;
50
51struct cpumask watchdog_cpumask __read_mostly;
52unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
53
44#ifdef CONFIG_HARDLOCKUP_DETECTOR 54#ifdef CONFIG_HARDLOCKUP_DETECTOR
45/* boot commands */
46/* 55/*
47 * Should we panic when a soft-lockup or hard-lockup occurs: 56 * Should we panic when a soft-lockup or hard-lockup occurs:
48 */ 57 */
@@ -56,9 +65,9 @@ unsigned int __read_mostly hardlockup_panic =
56 * kernel command line parameters are parsed, because otherwise it is not 65 * kernel command line parameters are parsed, because otherwise it is not
57 * possible to override this in hardlockup_panic_setup(). 66 * possible to override this in hardlockup_panic_setup().
58 */ 67 */
59void hardlockup_detector_disable(void) 68void __init hardlockup_detector_disable(void)
60{ 69{
61 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; 70 nmi_watchdog_user_enabled = 0;
62} 71}
63 72
64static int __init hardlockup_panic_setup(char *str) 73static int __init hardlockup_panic_setup(char *str)
@@ -68,48 +77,24 @@ static int __init hardlockup_panic_setup(char *str)
68 else if (!strncmp(str, "nopanic", 7)) 77 else if (!strncmp(str, "nopanic", 7))
69 hardlockup_panic = 0; 78 hardlockup_panic = 0;
70 else if (!strncmp(str, "0", 1)) 79 else if (!strncmp(str, "0", 1))
71 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; 80 nmi_watchdog_user_enabled = 0;
72 else if (!strncmp(str, "1", 1)) 81 else if (!strncmp(str, "1", 1))
73 watchdog_enabled |= NMI_WATCHDOG_ENABLED; 82 nmi_watchdog_user_enabled = 1;
74 return 1; 83 return 1;
75} 84}
76__setup("nmi_watchdog=", hardlockup_panic_setup); 85__setup("nmi_watchdog=", hardlockup_panic_setup);
77 86
78#endif 87# ifdef CONFIG_SMP
79
80#ifdef CONFIG_SOFTLOCKUP_DETECTOR
81int __read_mostly soft_watchdog_enabled;
82#endif
83
84int __read_mostly watchdog_user_enabled;
85int __read_mostly watchdog_thresh = 10;
86
87#ifdef CONFIG_SMP
88int __read_mostly sysctl_softlockup_all_cpu_backtrace;
89int __read_mostly sysctl_hardlockup_all_cpu_backtrace; 88int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
90#endif
91struct cpumask watchdog_cpumask __read_mostly;
92unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
93 89
94/* 90static int __init hardlockup_all_cpu_backtrace_setup(char *str)
95 * The 'watchdog_running' variable is set to 1 when the watchdog threads 91{
96 * are registered/started and is set to 0 when the watchdog threads are 92 sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
97 * unregistered/stopped, so it is an indicator whether the threads exist. 93 return 1;
98 */ 94}
99static int __read_mostly watchdog_running; 95__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
100/* 96# endif /* CONFIG_SMP */
101 * If a subsystem has a need to deactivate the watchdog temporarily, it 97#endif /* CONFIG_HARDLOCKUP_DETECTOR */
102 * can use the suspend/resume interface to achieve this. The content of
103 * the 'watchdog_suspended' variable reflects this state. Existing threads
104 * are parked/unparked by the lockup_detector_{suspend|resume} functions
105 * (see comment blocks pertaining to those functions for further details).
106 *
107 * 'watchdog_suspended' also prevents threads from being registered/started
108 * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
109 * of 'watchdog_running' cannot change while the watchdog is deactivated
110 * temporarily (see related code in 'proc' handlers).
111 */
112int __read_mostly watchdog_suspended;
113 98
114/* 99/*
115 * These functions can be overridden if an architecture implements its 100 * These functions can be overridden if an architecture implements its
@@ -121,36 +106,68 @@ int __read_mostly watchdog_suspended;
121 */ 106 */
122int __weak watchdog_nmi_enable(unsigned int cpu) 107int __weak watchdog_nmi_enable(unsigned int cpu)
123{ 108{
109 hardlockup_detector_perf_enable();
124 return 0; 110 return 0;
125} 111}
112
126void __weak watchdog_nmi_disable(unsigned int cpu) 113void __weak watchdog_nmi_disable(unsigned int cpu)
127{ 114{
115 hardlockup_detector_perf_disable();
128} 116}
129 117
130/* 118/* Return 0, if a NMI watchdog is available. Error code otherwise */
131 * watchdog_nmi_reconfigure can be implemented to be notified after any 119int __weak __init watchdog_nmi_probe(void)
132 * watchdog configuration change. The arch hardlockup watchdog should 120{
133 * respond to the following variables: 121 return hardlockup_detector_perf_init();
134 * - nmi_watchdog_enabled 122}
123
124/**
125 * watchdog_nmi_stop - Stop the watchdog for reconfiguration
126 *
127 * The reconfiguration steps are:
128 * watchdog_nmi_stop();
129 * update_variables();
130 * watchdog_nmi_start();
131 */
132void __weak watchdog_nmi_stop(void) { }
133
134/**
135 * watchdog_nmi_start - Start the watchdog after reconfiguration
136 *
137 * Counterpart to watchdog_nmi_stop().
138 *
139 * The following variables have been updated in update_variables() and
140 * contain the currently valid configuration:
141 * - watchdog_enabled
135 * - watchdog_thresh 142 * - watchdog_thresh
136 * - watchdog_cpumask 143 * - watchdog_cpumask
137 * - sysctl_hardlockup_all_cpu_backtrace
138 * - hardlockup_panic
139 * - watchdog_suspended
140 */ 144 */
141void __weak watchdog_nmi_reconfigure(void) 145void __weak watchdog_nmi_start(void) { }
146
147/**
148 * lockup_detector_update_enable - Update the sysctl enable bit
149 *
150 * Caller needs to make sure that the NMI/perf watchdogs are off, so this
151 * can't race with watchdog_nmi_disable().
152 */
153static void lockup_detector_update_enable(void)
142{ 154{
155 watchdog_enabled = 0;
156 if (!watchdog_user_enabled)
157 return;
158 if (nmi_watchdog_available && nmi_watchdog_user_enabled)
159 watchdog_enabled |= NMI_WATCHDOG_ENABLED;
160 if (soft_watchdog_user_enabled)
161 watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
143} 162}
144 163
145
146#ifdef CONFIG_SOFTLOCKUP_DETECTOR 164#ifdef CONFIG_SOFTLOCKUP_DETECTOR
147 165
148/* Helper for online, unparked cpus. */ 166/* Global variables, exported for sysctl */
149#define for_each_watchdog_cpu(cpu) \ 167unsigned int __read_mostly softlockup_panic =
150 for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) 168 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
151
152atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
153 169
170static bool softlockup_threads_initialized __read_mostly;
154static u64 __read_mostly sample_period; 171static u64 __read_mostly sample_period;
155 172
156static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 173static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -164,50 +181,40 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
164static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); 181static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
165static unsigned long soft_lockup_nmi_warn; 182static unsigned long soft_lockup_nmi_warn;
166 183
167unsigned int __read_mostly softlockup_panic =
168 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
169
170static int __init softlockup_panic_setup(char *str) 184static int __init softlockup_panic_setup(char *str)
171{ 185{
172 softlockup_panic = simple_strtoul(str, NULL, 0); 186 softlockup_panic = simple_strtoul(str, NULL, 0);
173
174 return 1; 187 return 1;
175} 188}
176__setup("softlockup_panic=", softlockup_panic_setup); 189__setup("softlockup_panic=", softlockup_panic_setup);
177 190
178static int __init nowatchdog_setup(char *str) 191static int __init nowatchdog_setup(char *str)
179{ 192{
180 watchdog_enabled = 0; 193 watchdog_user_enabled = 0;
181 return 1; 194 return 1;
182} 195}
183__setup("nowatchdog", nowatchdog_setup); 196__setup("nowatchdog", nowatchdog_setup);
184 197
185static int __init nosoftlockup_setup(char *str) 198static int __init nosoftlockup_setup(char *str)
186{ 199{
187 watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; 200 soft_watchdog_user_enabled = 0;
188 return 1; 201 return 1;
189} 202}
190__setup("nosoftlockup", nosoftlockup_setup); 203__setup("nosoftlockup", nosoftlockup_setup);
191 204
192#ifdef CONFIG_SMP 205#ifdef CONFIG_SMP
206int __read_mostly sysctl_softlockup_all_cpu_backtrace;
207
193static int __init softlockup_all_cpu_backtrace_setup(char *str) 208static int __init softlockup_all_cpu_backtrace_setup(char *str)
194{ 209{
195 sysctl_softlockup_all_cpu_backtrace = 210 sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
196 !!simple_strtol(str, NULL, 0);
197 return 1; 211 return 1;
198} 212}
199__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); 213__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
200#ifdef CONFIG_HARDLOCKUP_DETECTOR
201static int __init hardlockup_all_cpu_backtrace_setup(char *str)
202{
203 sysctl_hardlockup_all_cpu_backtrace =
204 !!simple_strtol(str, NULL, 0);
205 return 1;
206}
207__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
208#endif
209#endif 214#endif
210 215
216static void __lockup_detector_cleanup(void);
217
211/* 218/*
212 * Hard-lockup warnings should be triggered after just a few seconds. Soft- 219 * Hard-lockup warnings should be triggered after just a few seconds. Soft-
213 * lockups can have false positives under extreme conditions. So we generally 220 * lockups can have false positives under extreme conditions. So we generally
@@ -278,11 +285,15 @@ void touch_all_softlockup_watchdogs(void)
278 int cpu; 285 int cpu;
279 286
280 /* 287 /*
281 * this is done lockless 288 * watchdog_mutex cannpt be taken here, as this might be called
282 * do we care if a 0 races with a timestamp? 289 * from (soft)interrupt context, so the access to
283 * all it means is the softlock check starts one cycle later 290 * watchdog_allowed_cpumask might race with a concurrent update.
291 *
292 * The watchdog time stamp can race against a concurrent real
293 * update as well, the only side effect might be a cycle delay for
294 * the softlockup check.
284 */ 295 */
285 for_each_watchdog_cpu(cpu) 296 for_each_cpu(cpu, &watchdog_allowed_mask)
286 per_cpu(watchdog_touch_ts, cpu) = 0; 297 per_cpu(watchdog_touch_ts, cpu) = 0;
287 wq_watchdog_touch(-1); 298 wq_watchdog_touch(-1);
288} 299}
@@ -322,9 +333,6 @@ static void watchdog_interrupt_count(void)
322 __this_cpu_inc(hrtimer_interrupts); 333 __this_cpu_inc(hrtimer_interrupts);
323} 334}
324 335
325static int watchdog_enable_all_cpus(void);
326static void watchdog_disable_all_cpus(void);
327
328/* watchdog kicker functions */ 336/* watchdog kicker functions */
329static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 337static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
330{ 338{
@@ -333,7 +341,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
333 int duration; 341 int duration;
334 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; 342 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
335 343
336 if (atomic_read(&watchdog_park_in_progress) != 0) 344 if (!watchdog_enabled)
337 return HRTIMER_NORESTART; 345 return HRTIMER_NORESTART;
338 346
339 /* kick the hardlockup detector */ 347 /* kick the hardlockup detector */
@@ -447,32 +455,38 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio)
447 455
448static void watchdog_enable(unsigned int cpu) 456static void watchdog_enable(unsigned int cpu)
449{ 457{
450 struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); 458 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
451 459
452 /* kick off the timer for the hardlockup detector */ 460 /*
461 * Start the timer first to prevent the NMI watchdog triggering
462 * before the timer has a chance to fire.
463 */
453 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 464 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
454 hrtimer->function = watchdog_timer_fn; 465 hrtimer->function = watchdog_timer_fn;
455
456 /* Enable the perf event */
457 watchdog_nmi_enable(cpu);
458
459 /* done here because hrtimer_start can only pin to smp_processor_id() */
460 hrtimer_start(hrtimer, ns_to_ktime(sample_period), 466 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
461 HRTIMER_MODE_REL_PINNED); 467 HRTIMER_MODE_REL_PINNED);
462 468
463 /* initialize timestamp */ 469 /* Initialize timestamp */
464 watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
465 __touch_watchdog(); 470 __touch_watchdog();
471 /* Enable the perf event */
472 if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
473 watchdog_nmi_enable(cpu);
474
475 watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
466} 476}
467 477
468static void watchdog_disable(unsigned int cpu) 478static void watchdog_disable(unsigned int cpu)
469{ 479{
470 struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); 480 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
471 481
472 watchdog_set_prio(SCHED_NORMAL, 0); 482 watchdog_set_prio(SCHED_NORMAL, 0);
473 hrtimer_cancel(hrtimer); 483 /*
474 /* disable the perf event */ 484 * Disable the perf event first. That prevents that a large delay
485 * between disabling the timer and disabling the perf event causes
486 * the perf NMI to detect a false positive.
487 */
475 watchdog_nmi_disable(cpu); 488 watchdog_nmi_disable(cpu);
489 hrtimer_cancel(hrtimer);
476} 490}
477 491
478static void watchdog_cleanup(unsigned int cpu, bool online) 492static void watchdog_cleanup(unsigned int cpu, bool online)
@@ -499,21 +513,6 @@ static void watchdog(unsigned int cpu)
499 __this_cpu_write(soft_lockup_hrtimer_cnt, 513 __this_cpu_write(soft_lockup_hrtimer_cnt,
500 __this_cpu_read(hrtimer_interrupts)); 514 __this_cpu_read(hrtimer_interrupts));
501 __touch_watchdog(); 515 __touch_watchdog();
502
503 /*
504 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
505 * failure path. Check for failures that can occur asynchronously -
506 * for example, when CPUs are on-lined - and shut down the hardware
507 * perf event on each CPU accordingly.
508 *
509 * The only non-obvious place this bit can be cleared is through
510 * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a
511 * pr_info here would be too noisy as it would result in a message
512 * every few seconds if the hardlockup was disabled but the softlockup
513 * enabled.
514 */
515 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
516 watchdog_nmi_disable(cpu);
517} 516}
518 517
519static struct smp_hotplug_thread watchdog_threads = { 518static struct smp_hotplug_thread watchdog_threads = {
@@ -527,295 +526,174 @@ static struct smp_hotplug_thread watchdog_threads = {
527 .unpark = watchdog_enable, 526 .unpark = watchdog_enable,
528}; 527};
529 528
530/* 529static void softlockup_update_smpboot_threads(void)
531 * park all watchdog threads that are specified in 'watchdog_cpumask'
532 *
533 * This function returns an error if kthread_park() of a watchdog thread
534 * fails. In this situation, the watchdog threads of some CPUs can already
535 * be parked and the watchdog threads of other CPUs can still be runnable.
536 * Callers are expected to handle this special condition as appropriate in
537 * their context.
538 *
539 * This function may only be called in a context that is protected against
540 * races with CPU hotplug - for example, via get_online_cpus().
541 */
542static int watchdog_park_threads(void)
543{ 530{
544 int cpu, ret = 0; 531 lockdep_assert_held(&watchdog_mutex);
545 532
546 atomic_set(&watchdog_park_in_progress, 1); 533 if (!softlockup_threads_initialized)
534 return;
547 535
548 for_each_watchdog_cpu(cpu) { 536 smpboot_update_cpumask_percpu_thread(&watchdog_threads,
549 ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); 537 &watchdog_allowed_mask);
550 if (ret)
551 break;
552 }
553
554 atomic_set(&watchdog_park_in_progress, 0);
555
556 return ret;
557} 538}
558 539
559/* 540/* Temporarily park all watchdog threads */
560 * unpark all watchdog threads that are specified in 'watchdog_cpumask' 541static void softlockup_park_all_threads(void)
561 *
562 * This function may only be called in a context that is protected against
563 * races with CPU hotplug - for example, via get_online_cpus().
564 */
565static void watchdog_unpark_threads(void)
566{ 542{
567 int cpu; 543 cpumask_clear(&watchdog_allowed_mask);
568 544 softlockup_update_smpboot_threads();
569 for_each_watchdog_cpu(cpu)
570 kthread_unpark(per_cpu(softlockup_watchdog, cpu));
571} 545}
572 546
573static int update_watchdog_all_cpus(void) 547/* Unpark enabled threads */
548static void softlockup_unpark_threads(void)
574{ 549{
575 int ret; 550 cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
576 551 softlockup_update_smpboot_threads();
577 ret = watchdog_park_threads();
578 if (ret)
579 return ret;
580
581 watchdog_unpark_threads();
582
583 return 0;
584} 552}
585 553
586static int watchdog_enable_all_cpus(void) 554static void lockup_detector_reconfigure(void)
587{ 555{
588 int err = 0; 556 cpus_read_lock();
589 557 watchdog_nmi_stop();
590 if (!watchdog_running) { 558 softlockup_park_all_threads();
591 err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, 559 set_sample_period();
592 &watchdog_cpumask); 560 lockup_detector_update_enable();
593 if (err) 561 if (watchdog_enabled && watchdog_thresh)
594 pr_err("Failed to create watchdog threads, disabled\n"); 562 softlockup_unpark_threads();
595 else 563 watchdog_nmi_start();
596 watchdog_running = 1; 564 cpus_read_unlock();
597 } else { 565 /*
598 /* 566 * Must be called outside the cpus locked section to prevent
599 * Enable/disable the lockup detectors or 567 * recursive locking in the perf code.
600 * change the sample period 'on the fly'. 568 */
601 */ 569 __lockup_detector_cleanup();
602 err = update_watchdog_all_cpus();
603
604 if (err) {
605 watchdog_disable_all_cpus();
606 pr_err("Failed to update lockup detectors, disabled\n");
607 }
608 }
609
610 if (err)
611 watchdog_enabled = 0;
612
613 return err;
614} 570}
615 571
616static void watchdog_disable_all_cpus(void) 572/*
573 * Create the watchdog thread infrastructure and configure the detector(s).
574 *
575 * The threads are not unparked as watchdog_allowed_mask is empty. When
576 * the threads are sucessfully initialized, take the proper locks and
577 * unpark the threads in the watchdog_cpumask if the watchdog is enabled.
578 */
579static __init void lockup_detector_setup(void)
617{ 580{
618 if (watchdog_running) { 581 int ret;
619 watchdog_running = 0;
620 smpboot_unregister_percpu_thread(&watchdog_threads);
621 }
622}
623 582
624#ifdef CONFIG_SYSCTL 583 /*
625static int watchdog_update_cpus(void) 584 * If sysctl is off and watchdog got disabled on the command line,
626{ 585 * nothing to do here.
627 return smpboot_update_cpumask_percpu_thread( 586 */
628 &watchdog_threads, &watchdog_cpumask); 587 lockup_detector_update_enable();
629}
630#endif
631 588
632#else /* SOFTLOCKUP */ 589 if (!IS_ENABLED(CONFIG_SYSCTL) &&
633static int watchdog_park_threads(void) 590 !(watchdog_enabled && watchdog_thresh))
634{ 591 return;
635 return 0;
636}
637 592
638static void watchdog_unpark_threads(void) 593 ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
639{ 594 &watchdog_allowed_mask);
640} 595 if (ret) {
596 pr_err("Failed to initialize soft lockup detector threads\n");
597 return;
598 }
641 599
642static int watchdog_enable_all_cpus(void) 600 mutex_lock(&watchdog_mutex);
643{ 601 softlockup_threads_initialized = true;
644 return 0; 602 lockup_detector_reconfigure();
603 mutex_unlock(&watchdog_mutex);
645} 604}
646 605
647static void watchdog_disable_all_cpus(void) 606#else /* CONFIG_SOFTLOCKUP_DETECTOR */
607static inline int watchdog_park_threads(void) { return 0; }
608static inline void watchdog_unpark_threads(void) { }
609static inline int watchdog_enable_all_cpus(void) { return 0; }
610static inline void watchdog_disable_all_cpus(void) { }
611static void lockup_detector_reconfigure(void)
648{ 612{
613 cpus_read_lock();
614 watchdog_nmi_stop();
615 lockup_detector_update_enable();
616 watchdog_nmi_start();
617 cpus_read_unlock();
649} 618}
650 619static inline void lockup_detector_setup(void)
651#ifdef CONFIG_SYSCTL
652static int watchdog_update_cpus(void)
653{ 620{
654 return 0; 621 lockup_detector_reconfigure();
655} 622}
656#endif 623#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
657 624
658static void set_sample_period(void) 625static void __lockup_detector_cleanup(void)
659{ 626{
627 lockdep_assert_held(&watchdog_mutex);
628 hardlockup_detector_perf_cleanup();
660} 629}
661#endif /* SOFTLOCKUP */
662 630
663/* 631/**
664 * Suspend the hard and soft lockup detector by parking the watchdog threads. 632 * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
633 *
634 * Caller must not hold the cpu hotplug rwsem.
665 */ 635 */
666int lockup_detector_suspend(void) 636void lockup_detector_cleanup(void)
667{ 637{
668 int ret = 0; 638 mutex_lock(&watchdog_mutex);
669 639 __lockup_detector_cleanup();
670 get_online_cpus(); 640 mutex_unlock(&watchdog_mutex);
671 mutex_lock(&watchdog_proc_mutex);
672 /*
673 * Multiple suspend requests can be active in parallel (counted by
674 * the 'watchdog_suspended' variable). If the watchdog threads are
675 * running, the first caller takes care that they will be parked.
676 * The state of 'watchdog_running' cannot change while a suspend
677 * request is active (see related code in 'proc' handlers).
678 */
679 if (watchdog_running && !watchdog_suspended)
680 ret = watchdog_park_threads();
681
682 if (ret == 0)
683 watchdog_suspended++;
684 else {
685 watchdog_disable_all_cpus();
686 pr_err("Failed to suspend lockup detectors, disabled\n");
687 watchdog_enabled = 0;
688 }
689
690 watchdog_nmi_reconfigure();
691
692 mutex_unlock(&watchdog_proc_mutex);
693
694 return ret;
695} 641}
696 642
697/* 643/**
698 * Resume the hard and soft lockup detector by unparking the watchdog threads. 644 * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
645 *
646 * Special interface for parisc. It prevents lockup detector warnings from
647 * the default pm_poweroff() function which busy loops forever.
699 */ 648 */
700void lockup_detector_resume(void) 649void lockup_detector_soft_poweroff(void)
701{ 650{
702 mutex_lock(&watchdog_proc_mutex); 651 watchdog_enabled = 0;
703
704 watchdog_suspended--;
705 /*
706 * The watchdog threads are unparked if they were previously running
707 * and if there is no more active suspend request.
708 */
709 if (watchdog_running && !watchdog_suspended)
710 watchdog_unpark_threads();
711
712 watchdog_nmi_reconfigure();
713
714 mutex_unlock(&watchdog_proc_mutex);
715 put_online_cpus();
716} 652}
717 653
718#ifdef CONFIG_SYSCTL 654#ifdef CONFIG_SYSCTL
719 655
720/* 656/* Propagate any changes to the watchdog threads */
721 * Update the run state of the lockup detectors. 657static void proc_watchdog_update(void)
722 */
723static int proc_watchdog_update(void)
724{ 658{
725 int err = 0; 659 /* Remove impossible cpus to keep sysctl output clean. */
726 660 cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
727 /* 661 lockup_detector_reconfigure();
728 * Watchdog threads won't be started if they are already active.
729 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
730 * care of this. If those threads are already active, the sample
731 * period will be updated and the lockup detectors will be enabled
732 * or disabled 'on the fly'.
733 */
734 if (watchdog_enabled && watchdog_thresh)
735 err = watchdog_enable_all_cpus();
736 else
737 watchdog_disable_all_cpus();
738
739 watchdog_nmi_reconfigure();
740
741 return err;
742
743} 662}
744 663
745/* 664/*
746 * common function for watchdog, nmi_watchdog and soft_watchdog parameter 665 * common function for watchdog, nmi_watchdog and soft_watchdog parameter
747 * 666 *
748 * caller | table->data points to | 'which' contains the flag(s) 667 * caller | table->data points to | 'which'
749 * -------------------|-----------------------|----------------------------- 668 * -------------------|----------------------------|--------------------------
750 * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed 669 * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
751 * | | with SOFT_WATCHDOG_ENABLED 670 * | | SOFT_WATCHDOG_ENABLED
752 * -------------------|-----------------------|----------------------------- 671 * -------------------|----------------------------|--------------------------
753 * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED 672 * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
754 * -------------------|-----------------------|----------------------------- 673 * -------------------|----------------------------|--------------------------
755 * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED 674 * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
756 */ 675 */
757static int proc_watchdog_common(int which, struct ctl_table *table, int write, 676static int proc_watchdog_common(int which, struct ctl_table *table, int write,
758 void __user *buffer, size_t *lenp, loff_t *ppos) 677 void __user *buffer, size_t *lenp, loff_t *ppos)
759{ 678{
760 int err, old, new; 679 int err, old, *param = table->data;
761 int *watchdog_param = (int *)table->data;
762 680
763 get_online_cpus(); 681 mutex_lock(&watchdog_mutex);
764 mutex_lock(&watchdog_proc_mutex);
765 682
766 if (watchdog_suspended) {
767 /* no parameter changes allowed while watchdog is suspended */
768 err = -EAGAIN;
769 goto out;
770 }
771
772 /*
773 * If the parameter is being read return the state of the corresponding
774 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
775 * run state of the lockup detectors.
776 */
777 if (!write) { 683 if (!write) {
778 *watchdog_param = (watchdog_enabled & which) != 0; 684 /*
685 * On read synchronize the userspace interface. This is a
686 * racy snapshot.
687 */
688 *param = (watchdog_enabled & which) != 0;
779 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 689 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
780 } else { 690 } else {
691 old = READ_ONCE(*param);
781 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 692 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
782 if (err) 693 if (!err && old != READ_ONCE(*param))
783 goto out; 694 proc_watchdog_update();
784
785 /*
786 * There is a race window between fetching the current value
787 * from 'watchdog_enabled' and storing the new value. During
788 * this race window, watchdog_nmi_enable() can sneak in and
789 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
790 * The 'cmpxchg' detects this race and the loop retries.
791 */
792 do {
793 old = watchdog_enabled;
794 /*
795 * If the parameter value is not zero set the
796 * corresponding bit(s), else clear it(them).
797 */
798 if (*watchdog_param)
799 new = old | which;
800 else
801 new = old & ~which;
802 } while (cmpxchg(&watchdog_enabled, old, new) != old);
803
804 /*
805 * Update the run state of the lockup detectors. There is _no_
806 * need to check the value returned by proc_watchdog_update()
807 * and to restore the previous value of 'watchdog_enabled' as
808 * both lockup detectors are disabled if proc_watchdog_update()
809 * returns an error.
810 */
811 if (old == new)
812 goto out;
813
814 err = proc_watchdog_update();
815 } 695 }
816out: 696 mutex_unlock(&watchdog_mutex);
817 mutex_unlock(&watchdog_proc_mutex);
818 put_online_cpus();
819 return err; 697 return err;
820} 698}
821 699
@@ -835,6 +713,8 @@ int proc_watchdog(struct ctl_table *table, int write,
835int proc_nmi_watchdog(struct ctl_table *table, int write, 713int proc_nmi_watchdog(struct ctl_table *table, int write,
836 void __user *buffer, size_t *lenp, loff_t *ppos) 714 void __user *buffer, size_t *lenp, loff_t *ppos)
837{ 715{
716 if (!nmi_watchdog_available && write)
717 return -ENOTSUPP;
838 return proc_watchdog_common(NMI_WATCHDOG_ENABLED, 718 return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
839 table, write, buffer, lenp, ppos); 719 table, write, buffer, lenp, ppos);
840} 720}
@@ -855,39 +735,17 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
855int proc_watchdog_thresh(struct ctl_table *table, int write, 735int proc_watchdog_thresh(struct ctl_table *table, int write,
856 void __user *buffer, size_t *lenp, loff_t *ppos) 736 void __user *buffer, size_t *lenp, loff_t *ppos)
857{ 737{
858 int err, old, new; 738 int err, old;
859
860 get_online_cpus();
861 mutex_lock(&watchdog_proc_mutex);
862 739
863 if (watchdog_suspended) { 740 mutex_lock(&watchdog_mutex);
864 /* no parameter changes allowed while watchdog is suspended */
865 err = -EAGAIN;
866 goto out;
867 }
868 741
869 old = ACCESS_ONCE(watchdog_thresh); 742 old = READ_ONCE(watchdog_thresh);
870 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 743 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
871 744
872 if (err || !write) 745 if (!err && write && old != READ_ONCE(watchdog_thresh))
873 goto out; 746 proc_watchdog_update();
874
875 /*
876 * Update the sample period. Restore on failure.
877 */
878 new = ACCESS_ONCE(watchdog_thresh);
879 if (old == new)
880 goto out;
881 747
882 set_sample_period(); 748 mutex_unlock(&watchdog_mutex);
883 err = proc_watchdog_update();
884 if (err) {
885 watchdog_thresh = old;
886 set_sample_period();
887 }
888out:
889 mutex_unlock(&watchdog_proc_mutex);
890 put_online_cpus();
891 return err; 749 return err;
892} 750}
893 751
@@ -902,45 +760,19 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
902{ 760{
903 int err; 761 int err;
904 762
905 get_online_cpus(); 763 mutex_lock(&watchdog_mutex);
906 mutex_lock(&watchdog_proc_mutex);
907
908 if (watchdog_suspended) {
909 /* no parameter changes allowed while watchdog is suspended */
910 err = -EAGAIN;
911 goto out;
912 }
913 764
914 err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); 765 err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
915 if (!err && write) { 766 if (!err && write)
916 /* Remove impossible cpus to keep sysctl output cleaner. */ 767 proc_watchdog_update();
917 cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
918 cpu_possible_mask);
919
920 if (watchdog_running) {
921 /*
922 * Failure would be due to being unable to allocate
923 * a temporary cpumask, so we are likely not in a
924 * position to do much else to make things better.
925 */
926 if (watchdog_update_cpus() != 0)
927 pr_err("cpumask update failed\n");
928 }
929 768
930 watchdog_nmi_reconfigure(); 769 mutex_unlock(&watchdog_mutex);
931 }
932out:
933 mutex_unlock(&watchdog_proc_mutex);
934 put_online_cpus();
935 return err; 770 return err;
936} 771}
937
938#endif /* CONFIG_SYSCTL */ 772#endif /* CONFIG_SYSCTL */
939 773
940void __init lockup_detector_init(void) 774void __init lockup_detector_init(void)
941{ 775{
942 set_sample_period();
943
944#ifdef CONFIG_NO_HZ_FULL 776#ifdef CONFIG_NO_HZ_FULL
945 if (tick_nohz_full_enabled()) { 777 if (tick_nohz_full_enabled()) {
946 pr_info("Disabling watchdog on nohz_full cores by default\n"); 778 pr_info("Disabling watchdog on nohz_full cores by default\n");
@@ -951,6 +783,7 @@ void __init lockup_detector_init(void)
951 cpumask_copy(&watchdog_cpumask, cpu_possible_mask); 783 cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
952#endif 784#endif
953 785
954 if (watchdog_enabled) 786 if (!watchdog_nmi_probe())
955 watchdog_enable_all_cpus(); 787 nmi_watchdog_available = true;
788 lockup_detector_setup();
956} 789}
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 3a09ea1b1d3d..71a62ceacdc8 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -21,8 +21,10 @@
21static DEFINE_PER_CPU(bool, hard_watchdog_warn); 21static DEFINE_PER_CPU(bool, hard_watchdog_warn);
22static DEFINE_PER_CPU(bool, watchdog_nmi_touch); 22static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
24static struct cpumask dead_events_mask;
24 25
25static unsigned long hardlockup_allcpu_dumped; 26static unsigned long hardlockup_allcpu_dumped;
27static unsigned int watchdog_cpus;
26 28
27void arch_touch_nmi_watchdog(void) 29void arch_touch_nmi_watchdog(void)
28{ 30{
@@ -103,15 +105,12 @@ static struct perf_event_attr wd_hw_attr = {
103 105
104/* Callback function for perf event subsystem */ 106/* Callback function for perf event subsystem */
105static void watchdog_overflow_callback(struct perf_event *event, 107static void watchdog_overflow_callback(struct perf_event *event,
106 struct perf_sample_data *data, 108 struct perf_sample_data *data,
107 struct pt_regs *regs) 109 struct pt_regs *regs)
108{ 110{
109 /* Ensure the watchdog never gets throttled */ 111 /* Ensure the watchdog never gets throttled */
110 event->hw.interrupts = 0; 112 event->hw.interrupts = 0;
111 113
112 if (atomic_read(&watchdog_park_in_progress) != 0)
113 return;
114
115 if (__this_cpu_read(watchdog_nmi_touch) == true) { 114 if (__this_cpu_read(watchdog_nmi_touch) == true) {
116 __this_cpu_write(watchdog_nmi_touch, false); 115 __this_cpu_write(watchdog_nmi_touch, false);
117 return; 116 return;
@@ -160,104 +159,131 @@ static void watchdog_overflow_callback(struct perf_event *event,
160 return; 159 return;
161} 160}
162 161
163/* 162static int hardlockup_detector_event_create(void)
164 * People like the simple clean cpu node info on boot.
165 * Reduce the watchdog noise by only printing messages
166 * that are different from what cpu0 displayed.
167 */
168static unsigned long firstcpu_err;
169static atomic_t watchdog_cpus;
170
171int watchdog_nmi_enable(unsigned int cpu)
172{ 163{
164 unsigned int cpu = smp_processor_id();
173 struct perf_event_attr *wd_attr; 165 struct perf_event_attr *wd_attr;
174 struct perf_event *event = per_cpu(watchdog_ev, cpu); 166 struct perf_event *evt;
175 int firstcpu = 0;
176
177 /* nothing to do if the hard lockup detector is disabled */
178 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
179 goto out;
180
181 /* is it already setup and enabled? */
182 if (event && event->state > PERF_EVENT_STATE_OFF)
183 goto out;
184
185 /* it is setup but not enabled */
186 if (event != NULL)
187 goto out_enable;
188
189 if (atomic_inc_return(&watchdog_cpus) == 1)
190 firstcpu = 1;
191 167
192 wd_attr = &wd_hw_attr; 168 wd_attr = &wd_hw_attr;
193 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); 169 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
194 170
195 /* Try to register using hardware perf events */ 171 /* Try to register using hardware perf events */
196 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); 172 evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
173 watchdog_overflow_callback, NULL);
174 if (IS_ERR(evt)) {
175 pr_info("Perf event create on CPU %d failed with %ld\n", cpu,
176 PTR_ERR(evt));
177 return PTR_ERR(evt);
178 }
179 this_cpu_write(watchdog_ev, evt);
180 return 0;
181}
197 182
198 /* save the first cpu's error for future comparision */ 183/**
199 if (firstcpu && IS_ERR(event)) 184 * hardlockup_detector_perf_enable - Enable the local event
200 firstcpu_err = PTR_ERR(event); 185 */
186void hardlockup_detector_perf_enable(void)
187{
188 if (hardlockup_detector_event_create())
189 return;
201 190
202 if (!IS_ERR(event)) { 191 if (!watchdog_cpus++)
203 /* only print for the first cpu initialized */ 192 pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
204 if (firstcpu || firstcpu_err)
205 pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
206 goto out_save;
207 }
208 193
209 /* 194 perf_event_enable(this_cpu_read(watchdog_ev));
210 * Disable the hard lockup detector if _any_ CPU fails to set up
211 * set up the hardware perf event. The watchdog() function checks
212 * the NMI_WATCHDOG_ENABLED bit periodically.
213 *
214 * The barriers are for syncing up watchdog_enabled across all the
215 * cpus, as clear_bit() does not use barriers.
216 */
217 smp_mb__before_atomic();
218 clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
219 smp_mb__after_atomic();
220
221 /* skip displaying the same error again */
222 if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
223 return PTR_ERR(event);
224
225 /* vary the KERN level based on the returned errno */
226 if (PTR_ERR(event) == -EOPNOTSUPP)
227 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
228 else if (PTR_ERR(event) == -ENOENT)
229 pr_warn("disabled (cpu%i): hardware events not enabled\n",
230 cpu);
231 else
232 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
233 cpu, PTR_ERR(event));
234
235 pr_info("Shutting down hard lockup detector on all cpus\n");
236
237 return PTR_ERR(event);
238
239 /* success path */
240out_save:
241 per_cpu(watchdog_ev, cpu) = event;
242out_enable:
243 perf_event_enable(per_cpu(watchdog_ev, cpu));
244out:
245 return 0;
246} 195}
247 196
248void watchdog_nmi_disable(unsigned int cpu) 197/**
198 * hardlockup_detector_perf_disable - Disable the local event
199 */
200void hardlockup_detector_perf_disable(void)
249{ 201{
250 struct perf_event *event = per_cpu(watchdog_ev, cpu); 202 struct perf_event *event = this_cpu_read(watchdog_ev);
251 203
252 if (event) { 204 if (event) {
253 perf_event_disable(event); 205 perf_event_disable(event);
206 cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
207 watchdog_cpus--;
208 }
209}
210
211/**
212 * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
213 *
214 * Called from lockup_detector_cleanup(). Serialized by the caller.
215 */
216void hardlockup_detector_perf_cleanup(void)
217{
218 int cpu;
219
220 for_each_cpu(cpu, &dead_events_mask) {
221 struct perf_event *event = per_cpu(watchdog_ev, cpu);
222
223 /*
224 * Required because for_each_cpu() reports unconditionally
225 * CPU0 as set on UP kernels. Sigh.
226 */
227 if (event)
228 perf_event_release_kernel(event);
254 per_cpu(watchdog_ev, cpu) = NULL; 229 per_cpu(watchdog_ev, cpu) = NULL;
230 }
231 cpumask_clear(&dead_events_mask);
232}
233
234/**
235 * hardlockup_detector_perf_stop - Globally stop watchdog events
236 *
237 * Special interface for x86 to handle the perf HT bug.
238 */
239void __init hardlockup_detector_perf_stop(void)
240{
241 int cpu;
242
243 lockdep_assert_cpus_held();
244
245 for_each_online_cpu(cpu) {
246 struct perf_event *event = per_cpu(watchdog_ev, cpu);
247
248 if (event)
249 perf_event_disable(event);
250 }
251}
255 252
256 /* should be in cleanup, but blocks oprofile */ 253/**
257 perf_event_release_kernel(event); 254 * hardlockup_detector_perf_restart - Globally restart watchdog events
255 *
256 * Special interface for x86 to handle the perf HT bug.
257 */
258void __init hardlockup_detector_perf_restart(void)
259{
260 int cpu;
261
262 lockdep_assert_cpus_held();
263
264 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
265 return;
266
267 for_each_online_cpu(cpu) {
268 struct perf_event *event = per_cpu(watchdog_ev, cpu);
269
270 if (event)
271 perf_event_enable(event);
272 }
273}
274
275/**
276 * hardlockup_detector_perf_init - Probe whether NMI event is available at all
277 */
278int __init hardlockup_detector_perf_init(void)
279{
280 int ret = hardlockup_detector_event_create();
258 281
259 /* watchdog_nmi_enable() expects this to be zero initially. */ 282 if (ret) {
260 if (atomic_dec_and_test(&watchdog_cpus)) 283 pr_info("Perf NMI watchdog permanently disabled\n");
261 firstcpu_err = 0; 284 } else {
285 perf_event_release_kernel(this_cpu_read(watchdog_ev));
286 this_cpu_write(watchdog_ev, NULL);
262 } 287 }
288 return ret;
263} 289}