aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-10-06 11:36:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-10-06 11:36:41 -0400
commit27efed3e8384e4d87fe3c07e7a046c1f43eb0993 (patch)
tree96d45ac3bfd2c0009dad5b23387da801ce7203e1 /kernel
parent7a92616c0bac849e790283723b36c399668a1d9f (diff)
parent0b62bf862dc93a05fea97b6ca6ffca072e2f30c1 (diff)
Merge branch 'core-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull watchddog clean-up and fixes from Thomas Gleixner: "The watchdog (hard/softlockup detector) code is pretty much broken in its current state. The patch series addresses this by removing all duct tape and refactoring it into a workable state. The reasons why I ask for inclusion that late in the cycle are: 1) The code causes lockdep splats vs. hotplug locking which get reported over and over. Unfortunately there is no easy fix. 2) The risk of breakage is minimal because it's already broken 3) As 4.14 is a long term stable kernel, I prefer to have working watchdog code in that and the lockdep issues resolved. I wouldn't ask you to pull if 4.14 wouldn't be a LTS kernel or if the solution would be easy to backport. 4) The series was around before the merge window opened, but then got delayed due to the UP failure caused by the for_each_cpu() surprise which we discussed recently. Changes vs. V1: - Addressed your review points - Addressed the warning in the powerpc code which was discovered late - Changed two function names which made sense up to a certain point in the series. Now they match what they do in the end. - Fixed a 'unused variable' warning, which got not detected by the intel robot. I triggered it when trying all possible related config combinations manually. Randconfig testing seems not random enough. The changes have been tested by and reviewed by Don Zickus and tested and acked by Micheal Ellerman for powerpc" * 'core-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits) watchdog/core: Put softlockup_threads_initialized under ifdef guard watchdog/core: Rename some softlockup_* functions powerpc/watchdog: Make use of watchdog_nmi_probe() watchdog/core, powerpc: Lock cpus across reconfiguration watchdog/core, powerpc: Replace watchdog_nmi_reconfigure() watchdog/hardlockup/perf: Fix spelling mistake: "permanetely" -> "permanently" watchdog/hardlockup/perf: Cure UP damage watchdog/hardlockup: Clean up hotplug locking mess watchdog/hardlockup/perf: Simplify deferred event destroy watchdog/hardlockup/perf: Use new perf CPU enable mechanism watchdog/hardlockup/perf: Implement CPU enable replacement watchdog/hardlockup/perf: Implement init time detection of perf watchdog/hardlockup/perf: Implement init time perf validation watchdog/core: Get rid of the racy update loop watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage watchdog/sysctl: Clean up sysctl variable name space watchdog/sysctl: Get rid of the #ifdeffery watchdog/core: Clean up header mess watchdog/core: Further simplify sysctl handling watchdog/core: Get rid of the thread teardown/setup dance ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/smpboot.c25
-rw-r--r--kernel/sysctl.c22
-rw-r--r--kernel/watchdog.c643
-rw-r--r--kernel/watchdog_hld.c196
5 files changed, 372 insertions, 520 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8de11a29e495..d851df22f5c5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -24,6 +24,7 @@
24#include <linux/lockdep.h> 24#include <linux/lockdep.h>
25#include <linux/tick.h> 25#include <linux/tick.h>
26#include <linux/irq.h> 26#include <linux/irq.h>
27#include <linux/nmi.h>
27#include <linux/smpboot.h> 28#include <linux/smpboot.h>
28#include <linux/relay.h> 29#include <linux/relay.h>
29#include <linux/slab.h> 30#include <linux/slab.h>
@@ -897,6 +898,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
897 898
898out: 899out:
899 cpus_write_unlock(); 900 cpus_write_unlock();
901 /*
902 * Do post unplug cleanup. This is still protected against
903 * concurrent CPU hotplug via cpu_add_remove_lock.
904 */
905 lockup_detector_cleanup();
900 return ret; 906 return ret;
901} 907}
902 908
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 1d71c051a951..5043e7433f4b 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -344,39 +344,30 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
344 * by the client, but only by calling this function. 344 * by the client, but only by calling this function.
345 * This function can only be called on a registered smp_hotplug_thread. 345 * This function can only be called on a registered smp_hotplug_thread.
346 */ 346 */
347int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, 347void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
348 const struct cpumask *new) 348 const struct cpumask *new)
349{ 349{
350 struct cpumask *old = plug_thread->cpumask; 350 struct cpumask *old = plug_thread->cpumask;
351 cpumask_var_t tmp; 351 static struct cpumask tmp;
352 unsigned int cpu; 352 unsigned int cpu;
353 353
354 if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) 354 lockdep_assert_cpus_held();
355 return -ENOMEM;
356
357 get_online_cpus();
358 mutex_lock(&smpboot_threads_lock); 355 mutex_lock(&smpboot_threads_lock);
359 356
360 /* Park threads that were exclusively enabled on the old mask. */ 357 /* Park threads that were exclusively enabled on the old mask. */
361 cpumask_andnot(tmp, old, new); 358 cpumask_andnot(&tmp, old, new);
362 for_each_cpu_and(cpu, tmp, cpu_online_mask) 359 for_each_cpu_and(cpu, &tmp, cpu_online_mask)
363 smpboot_park_thread(plug_thread, cpu); 360 smpboot_park_thread(plug_thread, cpu);
364 361
365 /* Unpark threads that are exclusively enabled on the new mask. */ 362 /* Unpark threads that are exclusively enabled on the new mask. */
366 cpumask_andnot(tmp, new, old); 363 cpumask_andnot(&tmp, new, old);
367 for_each_cpu_and(cpu, tmp, cpu_online_mask) 364 for_each_cpu_and(cpu, &tmp, cpu_online_mask)
368 smpboot_unpark_thread(plug_thread, cpu); 365 smpboot_unpark_thread(plug_thread, cpu);
369 366
370 cpumask_copy(old, new); 367 cpumask_copy(old, new);
371 368
372 mutex_unlock(&smpboot_threads_lock); 369 mutex_unlock(&smpboot_threads_lock);
373 put_online_cpus();
374
375 free_cpumask_var(tmp);
376
377 return 0;
378} 370}
379EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread);
380 371
381static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); 372static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
382 373
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4da9e622471f..d9c31bc2eaea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -872,9 +872,9 @@ static struct ctl_table kern_table[] = {
872#if defined(CONFIG_LOCKUP_DETECTOR) 872#if defined(CONFIG_LOCKUP_DETECTOR)
873 { 873 {
874 .procname = "watchdog", 874 .procname = "watchdog",
875 .data = &watchdog_user_enabled, 875 .data = &watchdog_user_enabled,
876 .maxlen = sizeof (int), 876 .maxlen = sizeof(int),
877 .mode = 0644, 877 .mode = 0644,
878 .proc_handler = proc_watchdog, 878 .proc_handler = proc_watchdog,
879 .extra1 = &zero, 879 .extra1 = &zero,
880 .extra2 = &one, 880 .extra2 = &one,
@@ -890,16 +890,12 @@ static struct ctl_table kern_table[] = {
890 }, 890 },
891 { 891 {
892 .procname = "nmi_watchdog", 892 .procname = "nmi_watchdog",
893 .data = &nmi_watchdog_enabled, 893 .data = &nmi_watchdog_user_enabled,
894 .maxlen = sizeof (int), 894 .maxlen = sizeof(int),
895 .mode = 0644, 895 .mode = NMI_WATCHDOG_SYSCTL_PERM,
896 .proc_handler = proc_nmi_watchdog, 896 .proc_handler = proc_nmi_watchdog,
897 .extra1 = &zero, 897 .extra1 = &zero,
898#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
899 .extra2 = &one, 898 .extra2 = &one,
900#else
901 .extra2 = &zero,
902#endif
903 }, 899 },
904 { 900 {
905 .procname = "watchdog_cpumask", 901 .procname = "watchdog_cpumask",
@@ -911,9 +907,9 @@ static struct ctl_table kern_table[] = {
911#ifdef CONFIG_SOFTLOCKUP_DETECTOR 907#ifdef CONFIG_SOFTLOCKUP_DETECTOR
912 { 908 {
913 .procname = "soft_watchdog", 909 .procname = "soft_watchdog",
914 .data = &soft_watchdog_enabled, 910 .data = &soft_watchdog_user_enabled,
915 .maxlen = sizeof (int), 911 .maxlen = sizeof(int),
916 .mode = 0644, 912 .mode = 0644,
917 .proc_handler = proc_soft_watchdog, 913 .proc_handler = proc_soft_watchdog,
918 .extra1 = &zero, 914 .extra1 = &zero,
919 .extra2 = &one, 915 .extra2 = &one,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f5d52024f6b7..6bcb854909c0 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,20 +29,29 @@
29#include <linux/kvm_para.h> 29#include <linux/kvm_para.h>
30#include <linux/kthread.h> 30#include <linux/kthread.h>
31 31
32/* Watchdog configuration */ 32static DEFINE_MUTEX(watchdog_mutex);
33static DEFINE_MUTEX(watchdog_proc_mutex);
34
35int __read_mostly nmi_watchdog_enabled;
36 33
37#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) 34#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
38unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | 35# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
39 NMI_WATCHDOG_ENABLED; 36# define NMI_WATCHDOG_DEFAULT 1
40#else 37#else
41unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; 38# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
39# define NMI_WATCHDOG_DEFAULT 0
42#endif 40#endif
43 41
42unsigned long __read_mostly watchdog_enabled;
43int __read_mostly watchdog_user_enabled = 1;
44int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
45int __read_mostly soft_watchdog_user_enabled = 1;
46int __read_mostly watchdog_thresh = 10;
47int __read_mostly nmi_watchdog_available;
48
49struct cpumask watchdog_allowed_mask __read_mostly;
50
51struct cpumask watchdog_cpumask __read_mostly;
52unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
53
44#ifdef CONFIG_HARDLOCKUP_DETECTOR 54#ifdef CONFIG_HARDLOCKUP_DETECTOR
45/* boot commands */
46/* 55/*
47 * Should we panic when a soft-lockup or hard-lockup occurs: 56 * Should we panic when a soft-lockup or hard-lockup occurs:
48 */ 57 */
@@ -56,9 +65,9 @@ unsigned int __read_mostly hardlockup_panic =
56 * kernel command line parameters are parsed, because otherwise it is not 65 * kernel command line parameters are parsed, because otherwise it is not
57 * possible to override this in hardlockup_panic_setup(). 66 * possible to override this in hardlockup_panic_setup().
58 */ 67 */
59void hardlockup_detector_disable(void) 68void __init hardlockup_detector_disable(void)
60{ 69{
61 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; 70 nmi_watchdog_user_enabled = 0;
62} 71}
63 72
64static int __init hardlockup_panic_setup(char *str) 73static int __init hardlockup_panic_setup(char *str)
@@ -68,48 +77,24 @@ static int __init hardlockup_panic_setup(char *str)
68 else if (!strncmp(str, "nopanic", 7)) 77 else if (!strncmp(str, "nopanic", 7))
69 hardlockup_panic = 0; 78 hardlockup_panic = 0;
70 else if (!strncmp(str, "0", 1)) 79 else if (!strncmp(str, "0", 1))
71 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; 80 nmi_watchdog_user_enabled = 0;
72 else if (!strncmp(str, "1", 1)) 81 else if (!strncmp(str, "1", 1))
73 watchdog_enabled |= NMI_WATCHDOG_ENABLED; 82 nmi_watchdog_user_enabled = 1;
74 return 1; 83 return 1;
75} 84}
76__setup("nmi_watchdog=", hardlockup_panic_setup); 85__setup("nmi_watchdog=", hardlockup_panic_setup);
77 86
78#endif 87# ifdef CONFIG_SMP
79
80#ifdef CONFIG_SOFTLOCKUP_DETECTOR
81int __read_mostly soft_watchdog_enabled;
82#endif
83
84int __read_mostly watchdog_user_enabled;
85int __read_mostly watchdog_thresh = 10;
86
87#ifdef CONFIG_SMP
88int __read_mostly sysctl_softlockup_all_cpu_backtrace;
89int __read_mostly sysctl_hardlockup_all_cpu_backtrace; 88int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
90#endif
91struct cpumask watchdog_cpumask __read_mostly;
92unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
93 89
94/* 90static int __init hardlockup_all_cpu_backtrace_setup(char *str)
95 * The 'watchdog_running' variable is set to 1 when the watchdog threads 91{
96 * are registered/started and is set to 0 when the watchdog threads are 92 sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
97 * unregistered/stopped, so it is an indicator whether the threads exist. 93 return 1;
98 */ 94}
99static int __read_mostly watchdog_running; 95__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
100/* 96# endif /* CONFIG_SMP */
101 * If a subsystem has a need to deactivate the watchdog temporarily, it 97#endif /* CONFIG_HARDLOCKUP_DETECTOR */
102 * can use the suspend/resume interface to achieve this. The content of
103 * the 'watchdog_suspended' variable reflects this state. Existing threads
104 * are parked/unparked by the lockup_detector_{suspend|resume} functions
105 * (see comment blocks pertaining to those functions for further details).
106 *
107 * 'watchdog_suspended' also prevents threads from being registered/started
108 * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
109 * of 'watchdog_running' cannot change while the watchdog is deactivated
110 * temporarily (see related code in 'proc' handlers).
111 */
112int __read_mostly watchdog_suspended;
113 98
114/* 99/*
115 * These functions can be overridden if an architecture implements its 100 * These functions can be overridden if an architecture implements its
@@ -121,36 +106,68 @@ int __read_mostly watchdog_suspended;
121 */ 106 */
122int __weak watchdog_nmi_enable(unsigned int cpu) 107int __weak watchdog_nmi_enable(unsigned int cpu)
123{ 108{
109 hardlockup_detector_perf_enable();
124 return 0; 110 return 0;
125} 111}
112
126void __weak watchdog_nmi_disable(unsigned int cpu) 113void __weak watchdog_nmi_disable(unsigned int cpu)
127{ 114{
115 hardlockup_detector_perf_disable();
128} 116}
129 117
130/* 118/* Return 0, if a NMI watchdog is available. Error code otherwise */
131 * watchdog_nmi_reconfigure can be implemented to be notified after any 119int __weak __init watchdog_nmi_probe(void)
132 * watchdog configuration change. The arch hardlockup watchdog should 120{
133 * respond to the following variables: 121 return hardlockup_detector_perf_init();
134 * - nmi_watchdog_enabled 122}
123
124/**
125 * watchdog_nmi_stop - Stop the watchdog for reconfiguration
126 *
127 * The reconfiguration steps are:
128 * watchdog_nmi_stop();
129 * update_variables();
130 * watchdog_nmi_start();
131 */
132void __weak watchdog_nmi_stop(void) { }
133
134/**
135 * watchdog_nmi_start - Start the watchdog after reconfiguration
136 *
137 * Counterpart to watchdog_nmi_stop().
138 *
139 * The following variables have been updated in update_variables() and
140 * contain the currently valid configuration:
141 * - watchdog_enabled
135 * - watchdog_thresh 142 * - watchdog_thresh
136 * - watchdog_cpumask 143 * - watchdog_cpumask
137 * - sysctl_hardlockup_all_cpu_backtrace
138 * - hardlockup_panic
139 * - watchdog_suspended
140 */ 144 */
141void __weak watchdog_nmi_reconfigure(void) 145void __weak watchdog_nmi_start(void) { }
146
147/**
148 * lockup_detector_update_enable - Update the sysctl enable bit
149 *
150 * Caller needs to make sure that the NMI/perf watchdogs are off, so this
151 * can't race with watchdog_nmi_disable().
152 */
153static void lockup_detector_update_enable(void)
142{ 154{
155 watchdog_enabled = 0;
156 if (!watchdog_user_enabled)
157 return;
158 if (nmi_watchdog_available && nmi_watchdog_user_enabled)
159 watchdog_enabled |= NMI_WATCHDOG_ENABLED;
160 if (soft_watchdog_user_enabled)
161 watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
143} 162}
144 163
145
146#ifdef CONFIG_SOFTLOCKUP_DETECTOR 164#ifdef CONFIG_SOFTLOCKUP_DETECTOR
147 165
148/* Helper for online, unparked cpus. */ 166/* Global variables, exported for sysctl */
149#define for_each_watchdog_cpu(cpu) \ 167unsigned int __read_mostly softlockup_panic =
150 for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) 168 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
151
152atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
153 169
170static bool softlockup_threads_initialized __read_mostly;
154static u64 __read_mostly sample_period; 171static u64 __read_mostly sample_period;
155 172
156static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 173static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -164,50 +181,40 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
164static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); 181static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
165static unsigned long soft_lockup_nmi_warn; 182static unsigned long soft_lockup_nmi_warn;
166 183
167unsigned int __read_mostly softlockup_panic =
168 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
169
170static int __init softlockup_panic_setup(char *str) 184static int __init softlockup_panic_setup(char *str)
171{ 185{
172 softlockup_panic = simple_strtoul(str, NULL, 0); 186 softlockup_panic = simple_strtoul(str, NULL, 0);
173
174 return 1; 187 return 1;
175} 188}
176__setup("softlockup_panic=", softlockup_panic_setup); 189__setup("softlockup_panic=", softlockup_panic_setup);
177 190
178static int __init nowatchdog_setup(char *str) 191static int __init nowatchdog_setup(char *str)
179{ 192{
180 watchdog_enabled = 0; 193 watchdog_user_enabled = 0;
181 return 1; 194 return 1;
182} 195}
183__setup("nowatchdog", nowatchdog_setup); 196__setup("nowatchdog", nowatchdog_setup);
184 197
185static int __init nosoftlockup_setup(char *str) 198static int __init nosoftlockup_setup(char *str)
186{ 199{
187 watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; 200 soft_watchdog_user_enabled = 0;
188 return 1; 201 return 1;
189} 202}
190__setup("nosoftlockup", nosoftlockup_setup); 203__setup("nosoftlockup", nosoftlockup_setup);
191 204
192#ifdef CONFIG_SMP 205#ifdef CONFIG_SMP
206int __read_mostly sysctl_softlockup_all_cpu_backtrace;
207
193static int __init softlockup_all_cpu_backtrace_setup(char *str) 208static int __init softlockup_all_cpu_backtrace_setup(char *str)
194{ 209{
195 sysctl_softlockup_all_cpu_backtrace = 210 sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
196 !!simple_strtol(str, NULL, 0);
197 return 1; 211 return 1;
198} 212}
199__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); 213__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
200#ifdef CONFIG_HARDLOCKUP_DETECTOR
201static int __init hardlockup_all_cpu_backtrace_setup(char *str)
202{
203 sysctl_hardlockup_all_cpu_backtrace =
204 !!simple_strtol(str, NULL, 0);
205 return 1;
206}
207__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
208#endif
209#endif 214#endif
210 215
216static void __lockup_detector_cleanup(void);
217
211/* 218/*
212 * Hard-lockup warnings should be triggered after just a few seconds. Soft- 219 * Hard-lockup warnings should be triggered after just a few seconds. Soft-
213 * lockups can have false positives under extreme conditions. So we generally 220 * lockups can have false positives under extreme conditions. So we generally
@@ -278,11 +285,15 @@ void touch_all_softlockup_watchdogs(void)
278 int cpu; 285 int cpu;
279 286
280 /* 287 /*
281 * this is done lockless 288 * watchdog_mutex cannpt be taken here, as this might be called
282 * do we care if a 0 races with a timestamp? 289 * from (soft)interrupt context, so the access to
283 * all it means is the softlock check starts one cycle later 290 * watchdog_allowed_cpumask might race with a concurrent update.
291 *
292 * The watchdog time stamp can race against a concurrent real
293 * update as well, the only side effect might be a cycle delay for
294 * the softlockup check.
284 */ 295 */
285 for_each_watchdog_cpu(cpu) 296 for_each_cpu(cpu, &watchdog_allowed_mask)
286 per_cpu(watchdog_touch_ts, cpu) = 0; 297 per_cpu(watchdog_touch_ts, cpu) = 0;
287 wq_watchdog_touch(-1); 298 wq_watchdog_touch(-1);
288} 299}
@@ -322,9 +333,6 @@ static void watchdog_interrupt_count(void)
322 __this_cpu_inc(hrtimer_interrupts); 333 __this_cpu_inc(hrtimer_interrupts);
323} 334}
324 335
325static int watchdog_enable_all_cpus(void);
326static void watchdog_disable_all_cpus(void);
327
328/* watchdog kicker functions */ 336/* watchdog kicker functions */
329static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 337static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
330{ 338{
@@ -333,7 +341,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
333 int duration; 341 int duration;
334 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; 342 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
335 343
336 if (atomic_read(&watchdog_park_in_progress) != 0) 344 if (!watchdog_enabled)
337 return HRTIMER_NORESTART; 345 return HRTIMER_NORESTART;
338 346
339 /* kick the hardlockup detector */ 347 /* kick the hardlockup detector */
@@ -447,32 +455,38 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio)
447 455
448static void watchdog_enable(unsigned int cpu) 456static void watchdog_enable(unsigned int cpu)
449{ 457{
450 struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); 458 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
451 459
452 /* kick off the timer for the hardlockup detector */ 460 /*
461 * Start the timer first to prevent the NMI watchdog triggering
462 * before the timer has a chance to fire.
463 */
453 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 464 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
454 hrtimer->function = watchdog_timer_fn; 465 hrtimer->function = watchdog_timer_fn;
455
456 /* Enable the perf event */
457 watchdog_nmi_enable(cpu);
458
459 /* done here because hrtimer_start can only pin to smp_processor_id() */
460 hrtimer_start(hrtimer, ns_to_ktime(sample_period), 466 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
461 HRTIMER_MODE_REL_PINNED); 467 HRTIMER_MODE_REL_PINNED);
462 468
463 /* initialize timestamp */ 469 /* Initialize timestamp */
464 watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
465 __touch_watchdog(); 470 __touch_watchdog();
471 /* Enable the perf event */
472 if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
473 watchdog_nmi_enable(cpu);
474
475 watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
466} 476}
467 477
468static void watchdog_disable(unsigned int cpu) 478static void watchdog_disable(unsigned int cpu)
469{ 479{
470 struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); 480 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
471 481
472 watchdog_set_prio(SCHED_NORMAL, 0); 482 watchdog_set_prio(SCHED_NORMAL, 0);
473 hrtimer_cancel(hrtimer); 483 /*
474 /* disable the perf event */ 484 * Disable the perf event first. That prevents that a large delay
485 * between disabling the timer and disabling the perf event causes
486 * the perf NMI to detect a false positive.
487 */
475 watchdog_nmi_disable(cpu); 488 watchdog_nmi_disable(cpu);
489 hrtimer_cancel(hrtimer);
476} 490}
477 491
478static void watchdog_cleanup(unsigned int cpu, bool online) 492static void watchdog_cleanup(unsigned int cpu, bool online)
@@ -499,21 +513,6 @@ static void watchdog(unsigned int cpu)
499 __this_cpu_write(soft_lockup_hrtimer_cnt, 513 __this_cpu_write(soft_lockup_hrtimer_cnt,
500 __this_cpu_read(hrtimer_interrupts)); 514 __this_cpu_read(hrtimer_interrupts));
501 __touch_watchdog(); 515 __touch_watchdog();
502
503 /*
504 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
505 * failure path. Check for failures that can occur asynchronously -
506 * for example, when CPUs are on-lined - and shut down the hardware
507 * perf event on each CPU accordingly.
508 *
509 * The only non-obvious place this bit can be cleared is through
510 * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a
511 * pr_info here would be too noisy as it would result in a message
512 * every few seconds if the hardlockup was disabled but the softlockup
513 * enabled.
514 */
515 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
516 watchdog_nmi_disable(cpu);
517} 516}
518 517
519static struct smp_hotplug_thread watchdog_threads = { 518static struct smp_hotplug_thread watchdog_threads = {
@@ -527,295 +526,174 @@ static struct smp_hotplug_thread watchdog_threads = {
527 .unpark = watchdog_enable, 526 .unpark = watchdog_enable,
528}; 527};
529 528
530/* 529static void softlockup_update_smpboot_threads(void)
531 * park all watchdog threads that are specified in 'watchdog_cpumask'
532 *
533 * This function returns an error if kthread_park() of a watchdog thread
534 * fails. In this situation, the watchdog threads of some CPUs can already
535 * be parked and the watchdog threads of other CPUs can still be runnable.
536 * Callers are expected to handle this special condition as appropriate in
537 * their context.
538 *
539 * This function may only be called in a context that is protected against
540 * races with CPU hotplug - for example, via get_online_cpus().
541 */
542static int watchdog_park_threads(void)
543{ 530{
544 int cpu, ret = 0; 531 lockdep_assert_held(&watchdog_mutex);
545 532
546 atomic_set(&watchdog_park_in_progress, 1); 533 if (!softlockup_threads_initialized)
534 return;
547 535
548 for_each_watchdog_cpu(cpu) { 536 smpboot_update_cpumask_percpu_thread(&watchdog_threads,
549 ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); 537 &watchdog_allowed_mask);
550 if (ret)
551 break;
552 }
553
554 atomic_set(&watchdog_park_in_progress, 0);
555
556 return ret;
557} 538}
558 539
559/* 540/* Temporarily park all watchdog threads */
560 * unpark all watchdog threads that are specified in 'watchdog_cpumask' 541static void softlockup_park_all_threads(void)
561 *
562 * This function may only be called in a context that is protected against
563 * races with CPU hotplug - for example, via get_online_cpus().
564 */
565static void watchdog_unpark_threads(void)
566{ 542{
567 int cpu; 543 cpumask_clear(&watchdog_allowed_mask);
568 544 softlockup_update_smpboot_threads();
569 for_each_watchdog_cpu(cpu)
570 kthread_unpark(per_cpu(softlockup_watchdog, cpu));
571} 545}
572 546
573static int update_watchdog_all_cpus(void) 547/* Unpark enabled threads */
548static void softlockup_unpark_threads(void)
574{ 549{
575 int ret; 550 cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
576 551 softlockup_update_smpboot_threads();
577 ret = watchdog_park_threads();
578 if (ret)
579 return ret;
580
581 watchdog_unpark_threads();
582
583 return 0;
584} 552}
585 553
586static int watchdog_enable_all_cpus(void) 554static void lockup_detector_reconfigure(void)
587{ 555{
588 int err = 0; 556 cpus_read_lock();
589 557 watchdog_nmi_stop();
590 if (!watchdog_running) { 558 softlockup_park_all_threads();
591 err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, 559 set_sample_period();
592 &watchdog_cpumask); 560 lockup_detector_update_enable();
593 if (err) 561 if (watchdog_enabled && watchdog_thresh)
594 pr_err("Failed to create watchdog threads, disabled\n"); 562 softlockup_unpark_threads();
595 else 563 watchdog_nmi_start();
596 watchdog_running = 1; 564 cpus_read_unlock();
597 } else { 565 /*
598 /* 566 * Must be called outside the cpus locked section to prevent
599 * Enable/disable the lockup detectors or 567 * recursive locking in the perf code.
600 * change the sample period 'on the fly'. 568 */
601 */ 569 __lockup_detector_cleanup();
602 err = update_watchdog_all_cpus();
603
604 if (err) {
605 watchdog_disable_all_cpus();
606 pr_err("Failed to update lockup detectors, disabled\n");
607 }
608 }
609
610 if (err)
611 watchdog_enabled = 0;
612
613 return err;
614} 570}
615 571
616static void watchdog_disable_all_cpus(void) 572/*
573 * Create the watchdog thread infrastructure and configure the detector(s).
574 *
575 * The threads are not unparked as watchdog_allowed_mask is empty. When
576 * the threads are sucessfully initialized, take the proper locks and
577 * unpark the threads in the watchdog_cpumask if the watchdog is enabled.
578 */
579static __init void lockup_detector_setup(void)
617{ 580{
618 if (watchdog_running) { 581 int ret;
619 watchdog_running = 0;
620 smpboot_unregister_percpu_thread(&watchdog_threads);
621 }
622}
623 582
624#ifdef CONFIG_SYSCTL 583 /*
625static int watchdog_update_cpus(void) 584 * If sysctl is off and watchdog got disabled on the command line,
626{ 585 * nothing to do here.
627 return smpboot_update_cpumask_percpu_thread( 586 */
628 &watchdog_threads, &watchdog_cpumask); 587 lockup_detector_update_enable();
629}
630#endif
631 588
632#else /* SOFTLOCKUP */ 589 if (!IS_ENABLED(CONFIG_SYSCTL) &&
633static int watchdog_park_threads(void) 590 !(watchdog_enabled && watchdog_thresh))
634{ 591 return;
635 return 0;
636}
637 592
638static void watchdog_unpark_threads(void) 593 ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
639{ 594 &watchdog_allowed_mask);
640} 595 if (ret) {
596 pr_err("Failed to initialize soft lockup detector threads\n");
597 return;
598 }
641 599
642static int watchdog_enable_all_cpus(void) 600 mutex_lock(&watchdog_mutex);
643{ 601 softlockup_threads_initialized = true;
644 return 0; 602 lockup_detector_reconfigure();
603 mutex_unlock(&watchdog_mutex);
645} 604}
646 605
647static void watchdog_disable_all_cpus(void) 606#else /* CONFIG_SOFTLOCKUP_DETECTOR */
607static inline int watchdog_park_threads(void) { return 0; }
608static inline void watchdog_unpark_threads(void) { }
609static inline int watchdog_enable_all_cpus(void) { return 0; }
610static inline void watchdog_disable_all_cpus(void) { }
611static void lockup_detector_reconfigure(void)
648{ 612{
613 cpus_read_lock();
614 watchdog_nmi_stop();
615 lockup_detector_update_enable();
616 watchdog_nmi_start();
617 cpus_read_unlock();
649} 618}
650 619static inline void lockup_detector_setup(void)
651#ifdef CONFIG_SYSCTL
652static int watchdog_update_cpus(void)
653{ 620{
654 return 0; 621 lockup_detector_reconfigure();
655} 622}
656#endif 623#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
657 624
658static void set_sample_period(void) 625static void __lockup_detector_cleanup(void)
659{ 626{
627 lockdep_assert_held(&watchdog_mutex);
628 hardlockup_detector_perf_cleanup();
660} 629}
661#endif /* SOFTLOCKUP */
662 630
663/* 631/**
664 * Suspend the hard and soft lockup detector by parking the watchdog threads. 632 * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
633 *
634 * Caller must not hold the cpu hotplug rwsem.
665 */ 635 */
666int lockup_detector_suspend(void) 636void lockup_detector_cleanup(void)
667{ 637{
668 int ret = 0; 638 mutex_lock(&watchdog_mutex);
669 639 __lockup_detector_cleanup();
670 get_online_cpus(); 640 mutex_unlock(&watchdog_mutex);
671 mutex_lock(&watchdog_proc_mutex);
672 /*
673 * Multiple suspend requests can be active in parallel (counted by
674 * the 'watchdog_suspended' variable). If the watchdog threads are
675 * running, the first caller takes care that they will be parked.
676 * The state of 'watchdog_running' cannot change while a suspend
677 * request is active (see related code in 'proc' handlers).
678 */
679 if (watchdog_running && !watchdog_suspended)
680 ret = watchdog_park_threads();
681
682 if (ret == 0)
683 watchdog_suspended++;
684 else {
685 watchdog_disable_all_cpus();
686 pr_err("Failed to suspend lockup detectors, disabled\n");
687 watchdog_enabled = 0;
688 }
689
690 watchdog_nmi_reconfigure();
691
692 mutex_unlock(&watchdog_proc_mutex);
693
694 return ret;
695} 641}
696 642
697/* 643/**
698 * Resume the hard and soft lockup detector by unparking the watchdog threads. 644 * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
645 *
646 * Special interface for parisc. It prevents lockup detector warnings from
647 * the default pm_poweroff() function which busy loops forever.
699 */ 648 */
700void lockup_detector_resume(void) 649void lockup_detector_soft_poweroff(void)
701{ 650{
702 mutex_lock(&watchdog_proc_mutex); 651 watchdog_enabled = 0;
703
704 watchdog_suspended--;
705 /*
706 * The watchdog threads are unparked if they were previously running
707 * and if there is no more active suspend request.
708 */
709 if (watchdog_running && !watchdog_suspended)
710 watchdog_unpark_threads();
711
712 watchdog_nmi_reconfigure();
713
714 mutex_unlock(&watchdog_proc_mutex);
715 put_online_cpus();
716} 652}
717 653
718#ifdef CONFIG_SYSCTL 654#ifdef CONFIG_SYSCTL
719 655
720/* 656/* Propagate any changes to the watchdog threads */
721 * Update the run state of the lockup detectors. 657static void proc_watchdog_update(void)
722 */
723static int proc_watchdog_update(void)
724{ 658{
725 int err = 0; 659 /* Remove impossible cpus to keep sysctl output clean. */
726 660 cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
727 /* 661 lockup_detector_reconfigure();
728 * Watchdog threads won't be started if they are already active.
729 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
730 * care of this. If those threads are already active, the sample
731 * period will be updated and the lockup detectors will be enabled
732 * or disabled 'on the fly'.
733 */
734 if (watchdog_enabled && watchdog_thresh)
735 err = watchdog_enable_all_cpus();
736 else
737 watchdog_disable_all_cpus();
738
739 watchdog_nmi_reconfigure();
740
741 return err;
742
743} 662}
744 663
745/* 664/*
746 * common function for watchdog, nmi_watchdog and soft_watchdog parameter 665 * common function for watchdog, nmi_watchdog and soft_watchdog parameter
747 * 666 *
748 * caller | table->data points to | 'which' contains the flag(s) 667 * caller | table->data points to | 'which'
749 * -------------------|-----------------------|----------------------------- 668 * -------------------|----------------------------|--------------------------
750 * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed 669 * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
751 * | | with SOFT_WATCHDOG_ENABLED 670 * | | SOFT_WATCHDOG_ENABLED
752 * -------------------|-----------------------|----------------------------- 671 * -------------------|----------------------------|--------------------------
753 * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED 672 * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
754 * -------------------|-----------------------|----------------------------- 673 * -------------------|----------------------------|--------------------------
755 * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED 674 * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
756 */ 675 */
757static int proc_watchdog_common(int which, struct ctl_table *table, int write, 676static int proc_watchdog_common(int which, struct ctl_table *table, int write,
758 void __user *buffer, size_t *lenp, loff_t *ppos) 677 void __user *buffer, size_t *lenp, loff_t *ppos)
759{ 678{
760 int err, old, new; 679 int err, old, *param = table->data;
761 int *watchdog_param = (int *)table->data;
762 680
763 get_online_cpus(); 681 mutex_lock(&watchdog_mutex);
764 mutex_lock(&watchdog_proc_mutex);
765 682
766 if (watchdog_suspended) {
767 /* no parameter changes allowed while watchdog is suspended */
768 err = -EAGAIN;
769 goto out;
770 }
771
772 /*
773 * If the parameter is being read return the state of the corresponding
774 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
775 * run state of the lockup detectors.
776 */
777 if (!write) { 683 if (!write) {
778 *watchdog_param = (watchdog_enabled & which) != 0; 684 /*
685 * On read synchronize the userspace interface. This is a
686 * racy snapshot.
687 */
688 *param = (watchdog_enabled & which) != 0;
779 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 689 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
780 } else { 690 } else {
691 old = READ_ONCE(*param);
781 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 692 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
782 if (err) 693 if (!err && old != READ_ONCE(*param))
783 goto out; 694 proc_watchdog_update();
784
785 /*
786 * There is a race window between fetching the current value
787 * from 'watchdog_enabled' and storing the new value. During
788 * this race window, watchdog_nmi_enable() can sneak in and
789 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
790 * The 'cmpxchg' detects this race and the loop retries.
791 */
792 do {
793 old = watchdog_enabled;
794 /*
795 * If the parameter value is not zero set the
796 * corresponding bit(s), else clear it(them).
797 */
798 if (*watchdog_param)
799 new = old | which;
800 else
801 new = old & ~which;
802 } while (cmpxchg(&watchdog_enabled, old, new) != old);
803
804 /*
805 * Update the run state of the lockup detectors. There is _no_
806 * need to check the value returned by proc_watchdog_update()
807 * and to restore the previous value of 'watchdog_enabled' as
808 * both lockup detectors are disabled if proc_watchdog_update()
809 * returns an error.
810 */
811 if (old == new)
812 goto out;
813
814 err = proc_watchdog_update();
815 } 695 }
816out: 696 mutex_unlock(&watchdog_mutex);
817 mutex_unlock(&watchdog_proc_mutex);
818 put_online_cpus();
819 return err; 697 return err;
820} 698}
821 699
@@ -835,6 +713,8 @@ int proc_watchdog(struct ctl_table *table, int write,
835int proc_nmi_watchdog(struct ctl_table *table, int write, 713int proc_nmi_watchdog(struct ctl_table *table, int write,
836 void __user *buffer, size_t *lenp, loff_t *ppos) 714 void __user *buffer, size_t *lenp, loff_t *ppos)
837{ 715{
716 if (!nmi_watchdog_available && write)
717 return -ENOTSUPP;
838 return proc_watchdog_common(NMI_WATCHDOG_ENABLED, 718 return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
839 table, write, buffer, lenp, ppos); 719 table, write, buffer, lenp, ppos);
840} 720}
@@ -855,39 +735,17 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
855int proc_watchdog_thresh(struct ctl_table *table, int write, 735int proc_watchdog_thresh(struct ctl_table *table, int write,
856 void __user *buffer, size_t *lenp, loff_t *ppos) 736 void __user *buffer, size_t *lenp, loff_t *ppos)
857{ 737{
858 int err, old, new; 738 int err, old;
859
860 get_online_cpus();
861 mutex_lock(&watchdog_proc_mutex);
862 739
863 if (watchdog_suspended) { 740 mutex_lock(&watchdog_mutex);
864 /* no parameter changes allowed while watchdog is suspended */
865 err = -EAGAIN;
866 goto out;
867 }
868 741
869 old = ACCESS_ONCE(watchdog_thresh); 742 old = READ_ONCE(watchdog_thresh);
870 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 743 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
871 744
872 if (err || !write) 745 if (!err && write && old != READ_ONCE(watchdog_thresh))
873 goto out; 746 proc_watchdog_update();
874
875 /*
876 * Update the sample period. Restore on failure.
877 */
878 new = ACCESS_ONCE(watchdog_thresh);
879 if (old == new)
880 goto out;
881 747
882 set_sample_period(); 748 mutex_unlock(&watchdog_mutex);
883 err = proc_watchdog_update();
884 if (err) {
885 watchdog_thresh = old;
886 set_sample_period();
887 }
888out:
889 mutex_unlock(&watchdog_proc_mutex);
890 put_online_cpus();
891 return err; 749 return err;
892} 750}
893 751
@@ -902,45 +760,19 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
902{ 760{
903 int err; 761 int err;
904 762
905 get_online_cpus(); 763 mutex_lock(&watchdog_mutex);
906 mutex_lock(&watchdog_proc_mutex);
907
908 if (watchdog_suspended) {
909 /* no parameter changes allowed while watchdog is suspended */
910 err = -EAGAIN;
911 goto out;
912 }
913 764
914 err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); 765 err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
915 if (!err && write) { 766 if (!err && write)
916 /* Remove impossible cpus to keep sysctl output cleaner. */ 767 proc_watchdog_update();
917 cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
918 cpu_possible_mask);
919
920 if (watchdog_running) {
921 /*
922 * Failure would be due to being unable to allocate
923 * a temporary cpumask, so we are likely not in a
924 * position to do much else to make things better.
925 */
926 if (watchdog_update_cpus() != 0)
927 pr_err("cpumask update failed\n");
928 }
929 768
930 watchdog_nmi_reconfigure(); 769 mutex_unlock(&watchdog_mutex);
931 }
932out:
933 mutex_unlock(&watchdog_proc_mutex);
934 put_online_cpus();
935 return err; 770 return err;
936} 771}
937
938#endif /* CONFIG_SYSCTL */ 772#endif /* CONFIG_SYSCTL */
939 773
940void __init lockup_detector_init(void) 774void __init lockup_detector_init(void)
941{ 775{
942 set_sample_period();
943
944#ifdef CONFIG_NO_HZ_FULL 776#ifdef CONFIG_NO_HZ_FULL
945 if (tick_nohz_full_enabled()) { 777 if (tick_nohz_full_enabled()) {
946 pr_info("Disabling watchdog on nohz_full cores by default\n"); 778 pr_info("Disabling watchdog on nohz_full cores by default\n");
@@ -951,6 +783,7 @@ void __init lockup_detector_init(void)
951 cpumask_copy(&watchdog_cpumask, cpu_possible_mask); 783 cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
952#endif 784#endif
953 785
954 if (watchdog_enabled) 786 if (!watchdog_nmi_probe())
955 watchdog_enable_all_cpus(); 787 nmi_watchdog_available = true;
788 lockup_detector_setup();
956} 789}
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 3a09ea1b1d3d..71a62ceacdc8 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -21,8 +21,10 @@
21static DEFINE_PER_CPU(bool, hard_watchdog_warn); 21static DEFINE_PER_CPU(bool, hard_watchdog_warn);
22static DEFINE_PER_CPU(bool, watchdog_nmi_touch); 22static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
24static struct cpumask dead_events_mask;
24 25
25static unsigned long hardlockup_allcpu_dumped; 26static unsigned long hardlockup_allcpu_dumped;
27static unsigned int watchdog_cpus;
26 28
27void arch_touch_nmi_watchdog(void) 29void arch_touch_nmi_watchdog(void)
28{ 30{
@@ -103,15 +105,12 @@ static struct perf_event_attr wd_hw_attr = {
103 105
104/* Callback function for perf event subsystem */ 106/* Callback function for perf event subsystem */
105static void watchdog_overflow_callback(struct perf_event *event, 107static void watchdog_overflow_callback(struct perf_event *event,
106 struct perf_sample_data *data, 108 struct perf_sample_data *data,
107 struct pt_regs *regs) 109 struct pt_regs *regs)
108{ 110{
109 /* Ensure the watchdog never gets throttled */ 111 /* Ensure the watchdog never gets throttled */
110 event->hw.interrupts = 0; 112 event->hw.interrupts = 0;
111 113
112 if (atomic_read(&watchdog_park_in_progress) != 0)
113 return;
114
115 if (__this_cpu_read(watchdog_nmi_touch) == true) { 114 if (__this_cpu_read(watchdog_nmi_touch) == true) {
116 __this_cpu_write(watchdog_nmi_touch, false); 115 __this_cpu_write(watchdog_nmi_touch, false);
117 return; 116 return;
@@ -160,104 +159,131 @@ static void watchdog_overflow_callback(struct perf_event *event,
160 return; 159 return;
161} 160}
162 161
163/* 162static int hardlockup_detector_event_create(void)
164 * People like the simple clean cpu node info on boot.
165 * Reduce the watchdog noise by only printing messages
166 * that are different from what cpu0 displayed.
167 */
168static unsigned long firstcpu_err;
169static atomic_t watchdog_cpus;
170
171int watchdog_nmi_enable(unsigned int cpu)
172{ 163{
164 unsigned int cpu = smp_processor_id();
173 struct perf_event_attr *wd_attr; 165 struct perf_event_attr *wd_attr;
174 struct perf_event *event = per_cpu(watchdog_ev, cpu); 166 struct perf_event *evt;
175 int firstcpu = 0;
176
177 /* nothing to do if the hard lockup detector is disabled */
178 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
179 goto out;
180
181 /* is it already setup and enabled? */
182 if (event && event->state > PERF_EVENT_STATE_OFF)
183 goto out;
184
185 /* it is setup but not enabled */
186 if (event != NULL)
187 goto out_enable;
188
189 if (atomic_inc_return(&watchdog_cpus) == 1)
190 firstcpu = 1;
191 167
192 wd_attr = &wd_hw_attr; 168 wd_attr = &wd_hw_attr;
193 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); 169 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
194 170
195 /* Try to register using hardware perf events */ 171 /* Try to register using hardware perf events */
196 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); 172 evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
173 watchdog_overflow_callback, NULL);
174 if (IS_ERR(evt)) {
175 pr_info("Perf event create on CPU %d failed with %ld\n", cpu,
176 PTR_ERR(evt));
177 return PTR_ERR(evt);
178 }
179 this_cpu_write(watchdog_ev, evt);
180 return 0;
181}
197 182
198 /* save the first cpu's error for future comparision */ 183/**
199 if (firstcpu && IS_ERR(event)) 184 * hardlockup_detector_perf_enable - Enable the local event
200 firstcpu_err = PTR_ERR(event); 185 */
186void hardlockup_detector_perf_enable(void)
187{
188 if (hardlockup_detector_event_create())
189 return;
201 190
202 if (!IS_ERR(event)) { 191 if (!watchdog_cpus++)
203 /* only print for the first cpu initialized */ 192 pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
204 if (firstcpu || firstcpu_err)
205 pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
206 goto out_save;
207 }
208 193
209 /* 194 perf_event_enable(this_cpu_read(watchdog_ev));
210 * Disable the hard lockup detector if _any_ CPU fails to set up
211 * set up the hardware perf event. The watchdog() function checks
212 * the NMI_WATCHDOG_ENABLED bit periodically.
213 *
214 * The barriers are for syncing up watchdog_enabled across all the
215 * cpus, as clear_bit() does not use barriers.
216 */
217 smp_mb__before_atomic();
218 clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
219 smp_mb__after_atomic();
220
221 /* skip displaying the same error again */
222 if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
223 return PTR_ERR(event);
224
225 /* vary the KERN level based on the returned errno */
226 if (PTR_ERR(event) == -EOPNOTSUPP)
227 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
228 else if (PTR_ERR(event) == -ENOENT)
229 pr_warn("disabled (cpu%i): hardware events not enabled\n",
230 cpu);
231 else
232 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
233 cpu, PTR_ERR(event));
234
235 pr_info("Shutting down hard lockup detector on all cpus\n");
236
237 return PTR_ERR(event);
238
239 /* success path */
240out_save:
241 per_cpu(watchdog_ev, cpu) = event;
242out_enable:
243 perf_event_enable(per_cpu(watchdog_ev, cpu));
244out:
245 return 0;
246} 195}
247 196
248void watchdog_nmi_disable(unsigned int cpu) 197/**
198 * hardlockup_detector_perf_disable - Disable the local event
199 */
200void hardlockup_detector_perf_disable(void)
249{ 201{
250 struct perf_event *event = per_cpu(watchdog_ev, cpu); 202 struct perf_event *event = this_cpu_read(watchdog_ev);
251 203
252 if (event) { 204 if (event) {
253 perf_event_disable(event); 205 perf_event_disable(event);
206 cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
207 watchdog_cpus--;
208 }
209}
210
211/**
212 * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
213 *
214 * Called from lockup_detector_cleanup(). Serialized by the caller.
215 */
216void hardlockup_detector_perf_cleanup(void)
217{
218 int cpu;
219
220 for_each_cpu(cpu, &dead_events_mask) {
221 struct perf_event *event = per_cpu(watchdog_ev, cpu);
222
223 /*
224 * Required because for_each_cpu() reports unconditionally
225 * CPU0 as set on UP kernels. Sigh.
226 */
227 if (event)
228 perf_event_release_kernel(event);
254 per_cpu(watchdog_ev, cpu) = NULL; 229 per_cpu(watchdog_ev, cpu) = NULL;
230 }
231 cpumask_clear(&dead_events_mask);
232}
233
234/**
235 * hardlockup_detector_perf_stop - Globally stop watchdog events
236 *
237 * Special interface for x86 to handle the perf HT bug.
238 */
239void __init hardlockup_detector_perf_stop(void)
240{
241 int cpu;
242
243 lockdep_assert_cpus_held();
244
245 for_each_online_cpu(cpu) {
246 struct perf_event *event = per_cpu(watchdog_ev, cpu);
247
248 if (event)
249 perf_event_disable(event);
250 }
251}
255 252
256 /* should be in cleanup, but blocks oprofile */ 253/**
257 perf_event_release_kernel(event); 254 * hardlockup_detector_perf_restart - Globally restart watchdog events
255 *
256 * Special interface for x86 to handle the perf HT bug.
257 */
258void __init hardlockup_detector_perf_restart(void)
259{
260 int cpu;
261
262 lockdep_assert_cpus_held();
263
264 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
265 return;
266
267 for_each_online_cpu(cpu) {
268 struct perf_event *event = per_cpu(watchdog_ev, cpu);
269
270 if (event)
271 perf_event_enable(event);
272 }
273}
274
275/**
276 * hardlockup_detector_perf_init - Probe whether NMI event is available at all
277 */
278int __init hardlockup_detector_perf_init(void)
279{
280 int ret = hardlockup_detector_event_create();
258 281
259 /* watchdog_nmi_enable() expects this to be zero initially. */ 282 if (ret) {
260 if (atomic_dec_and_test(&watchdog_cpus)) 283 pr_info("Perf NMI watchdog permanently disabled\n");
261 firstcpu_err = 0; 284 } else {
285 perf_event_release_kernel(this_cpu_read(watchdog_ev));
286 this_cpu_write(watchdog_ev, NULL);
262 } 287 }
288 return ret;
263} 289}