diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 20:25:18 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 20:25:18 -0500 |
commit | a57cb1c1d7974c62a5c80f7869e35b492ace12cd (patch) | |
tree | 5a42ee9a668f171143464bc86013954c1bbe94ad /kernel/watchdog.c | |
parent | cf1b3341afab9d3ad02a76b3a619ea027dcf4e28 (diff) | |
parent | e1e14ab8411df344a17687821f8f78f0a1e73cbb (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- a few misc things
- kexec updates
- DMA-mapping updates to better support networking DMA operations
- IPC updates
- various MM changes to improve DAX fault handling
- lots of radix-tree changes, mainly to the test suite. All leading up
to reimplementing the IDA/IDR code to be a wrapper layer over the
radix-tree. However the final trigger-pulling patch is held off for
4.11.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (114 commits)
radix tree test suite: delete unused rcupdate.c
radix tree test suite: add new tag check
radix-tree: ensure counts are initialised
radix tree test suite: cache recently freed objects
radix tree test suite: add some more functionality
idr: reduce the number of bits per level from 8 to 6
rxrpc: abstract away knowledge of IDR internals
tpm: use idr_find(), not idr_find_slowpath()
idr: add ida_is_empty
radix tree test suite: check multiorder iteration
radix-tree: fix replacement for multiorder entries
radix-tree: add radix_tree_split_preload()
radix-tree: add radix_tree_split
radix-tree: add radix_tree_join
radix-tree: delete radix_tree_range_tag_if_tagged()
radix-tree: delete radix_tree_locate_item()
radix-tree: improve multiorder iterators
btrfs: fix race in btrfs_free_dummy_fs_info()
radix-tree: improve dump output
radix-tree: make radix_tree_find_next_bit more useful
...
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r-- | kernel/watchdog.c | 270 |
1 files changed, 15 insertions, 255 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f280ec..d4b0fa01cae3 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -24,32 +24,14 @@ | |||
24 | 24 | ||
25 | #include <asm/irq_regs.h> | 25 | #include <asm/irq_regs.h> |
26 | #include <linux/kvm_para.h> | 26 | #include <linux/kvm_para.h> |
27 | #include <linux/perf_event.h> | ||
28 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
29 | 28 | ||
30 | /* | ||
31 | * The run state of the lockup detectors is controlled by the content of the | ||
32 | * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - | ||
33 | * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. | ||
34 | * | ||
35 | * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' | ||
36 | * are variables that are only used as an 'interface' between the parameters | ||
37 | * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The | ||
38 | * 'watchdog_thresh' variable is handled differently because its value is not | ||
39 | * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' | ||
40 | * is equal zero. | ||
41 | */ | ||
42 | #define NMI_WATCHDOG_ENABLED_BIT 0 | ||
43 | #define SOFT_WATCHDOG_ENABLED_BIT 1 | ||
44 | #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) | ||
45 | #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) | ||
46 | |||
47 | static DEFINE_MUTEX(watchdog_proc_mutex); | 29 | static DEFINE_MUTEX(watchdog_proc_mutex); |
48 | 30 | ||
49 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 31 | #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) |
50 | static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; | 32 | unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; |
51 | #else | 33 | #else |
52 | static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; | 34 | unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; |
53 | #endif | 35 | #endif |
54 | int __read_mostly nmi_watchdog_enabled; | 36 | int __read_mostly nmi_watchdog_enabled; |
55 | int __read_mostly soft_watchdog_enabled; | 37 | int __read_mostly soft_watchdog_enabled; |
@@ -59,9 +41,6 @@ int __read_mostly watchdog_thresh = 10; | |||
59 | #ifdef CONFIG_SMP | 41 | #ifdef CONFIG_SMP |
60 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; | 42 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; |
61 | int __read_mostly sysctl_hardlockup_all_cpu_backtrace; | 43 | int __read_mostly sysctl_hardlockup_all_cpu_backtrace; |
62 | #else | ||
63 | #define sysctl_softlockup_all_cpu_backtrace 0 | ||
64 | #define sysctl_hardlockup_all_cpu_backtrace 0 | ||
65 | #endif | 44 | #endif |
66 | static struct cpumask watchdog_cpumask __read_mostly; | 45 | static struct cpumask watchdog_cpumask __read_mostly; |
67 | unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); | 46 | unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); |
@@ -100,50 +79,9 @@ static DEFINE_PER_CPU(bool, soft_watchdog_warn); | |||
100 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); | 79 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); |
101 | static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); | 80 | static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); |
102 | static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); | 81 | static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); |
103 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
104 | static DEFINE_PER_CPU(bool, hard_watchdog_warn); | ||
105 | static DEFINE_PER_CPU(bool, watchdog_nmi_touch); | ||
106 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | 82 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); |
107 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | ||
108 | #endif | ||
109 | static unsigned long soft_lockup_nmi_warn; | 83 | static unsigned long soft_lockup_nmi_warn; |
110 | 84 | ||
111 | /* boot commands */ | ||
112 | /* | ||
113 | * Should we panic when a soft-lockup or hard-lockup occurs: | ||
114 | */ | ||
115 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
116 | unsigned int __read_mostly hardlockup_panic = | ||
117 | CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; | ||
118 | static unsigned long hardlockup_allcpu_dumped; | ||
119 | /* | ||
120 | * We may not want to enable hard lockup detection by default in all cases, | ||
121 | * for example when running the kernel as a guest on a hypervisor. In these | ||
122 | * cases this function can be called to disable hard lockup detection. This | ||
123 | * function should only be executed once by the boot processor before the | ||
124 | * kernel command line parameters are parsed, because otherwise it is not | ||
125 | * possible to override this in hardlockup_panic_setup(). | ||
126 | */ | ||
127 | void hardlockup_detector_disable(void) | ||
128 | { | ||
129 | watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; | ||
130 | } | ||
131 | |||
132 | static int __init hardlockup_panic_setup(char *str) | ||
133 | { | ||
134 | if (!strncmp(str, "panic", 5)) | ||
135 | hardlockup_panic = 1; | ||
136 | else if (!strncmp(str, "nopanic", 7)) | ||
137 | hardlockup_panic = 0; | ||
138 | else if (!strncmp(str, "0", 1)) | ||
139 | watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; | ||
140 | else if (!strncmp(str, "1", 1)) | ||
141 | watchdog_enabled |= NMI_WATCHDOG_ENABLED; | ||
142 | return 1; | ||
143 | } | ||
144 | __setup("nmi_watchdog=", hardlockup_panic_setup); | ||
145 | #endif | ||
146 | |||
147 | unsigned int __read_mostly softlockup_panic = | 85 | unsigned int __read_mostly softlockup_panic = |
148 | CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; | 86 | CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; |
149 | 87 | ||
@@ -264,32 +202,14 @@ void touch_all_softlockup_watchdogs(void) | |||
264 | wq_watchdog_touch(-1); | 202 | wq_watchdog_touch(-1); |
265 | } | 203 | } |
266 | 204 | ||
267 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
268 | void touch_nmi_watchdog(void) | ||
269 | { | ||
270 | /* | ||
271 | * Using __raw here because some code paths have | ||
272 | * preemption enabled. If preemption is enabled | ||
273 | * then interrupts should be enabled too, in which | ||
274 | * case we shouldn't have to worry about the watchdog | ||
275 | * going off. | ||
276 | */ | ||
277 | raw_cpu_write(watchdog_nmi_touch, true); | ||
278 | touch_softlockup_watchdog(); | ||
279 | } | ||
280 | EXPORT_SYMBOL(touch_nmi_watchdog); | ||
281 | |||
282 | #endif | ||
283 | |||
284 | void touch_softlockup_watchdog_sync(void) | 205 | void touch_softlockup_watchdog_sync(void) |
285 | { | 206 | { |
286 | __this_cpu_write(softlockup_touch_sync, true); | 207 | __this_cpu_write(softlockup_touch_sync, true); |
287 | __this_cpu_write(watchdog_touch_ts, 0); | 208 | __this_cpu_write(watchdog_touch_ts, 0); |
288 | } | 209 | } |
289 | 210 | ||
290 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
291 | /* watchdog detector functions */ | 211 | /* watchdog detector functions */ |
292 | static bool is_hardlockup(void) | 212 | bool is_hardlockup(void) |
293 | { | 213 | { |
294 | unsigned long hrint = __this_cpu_read(hrtimer_interrupts); | 214 | unsigned long hrint = __this_cpu_read(hrtimer_interrupts); |
295 | 215 | ||
@@ -299,7 +219,6 @@ static bool is_hardlockup(void) | |||
299 | __this_cpu_write(hrtimer_interrupts_saved, hrint); | 219 | __this_cpu_write(hrtimer_interrupts_saved, hrint); |
300 | return false; | 220 | return false; |
301 | } | 221 | } |
302 | #endif | ||
303 | 222 | ||
304 | static int is_softlockup(unsigned long touch_ts) | 223 | static int is_softlockup(unsigned long touch_ts) |
305 | { | 224 | { |
@@ -313,78 +232,22 @@ static int is_softlockup(unsigned long touch_ts) | |||
313 | return 0; | 232 | return 0; |
314 | } | 233 | } |
315 | 234 | ||
316 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
317 | |||
318 | static struct perf_event_attr wd_hw_attr = { | ||
319 | .type = PERF_TYPE_HARDWARE, | ||
320 | .config = PERF_COUNT_HW_CPU_CYCLES, | ||
321 | .size = sizeof(struct perf_event_attr), | ||
322 | .pinned = 1, | ||
323 | .disabled = 1, | ||
324 | }; | ||
325 | |||
326 | /* Callback function for perf event subsystem */ | ||
327 | static void watchdog_overflow_callback(struct perf_event *event, | ||
328 | struct perf_sample_data *data, | ||
329 | struct pt_regs *regs) | ||
330 | { | ||
331 | /* Ensure the watchdog never gets throttled */ | ||
332 | event->hw.interrupts = 0; | ||
333 | |||
334 | if (__this_cpu_read(watchdog_nmi_touch) == true) { | ||
335 | __this_cpu_write(watchdog_nmi_touch, false); | ||
336 | return; | ||
337 | } | ||
338 | |||
339 | /* check for a hardlockup | ||
340 | * This is done by making sure our timer interrupt | ||
341 | * is incrementing. The timer interrupt should have | ||
342 | * fired multiple times before we overflow'd. If it hasn't | ||
343 | * then this is a good indication the cpu is stuck | ||
344 | */ | ||
345 | if (is_hardlockup()) { | ||
346 | int this_cpu = smp_processor_id(); | ||
347 | struct pt_regs *regs = get_irq_regs(); | ||
348 | |||
349 | /* only print hardlockups once */ | ||
350 | if (__this_cpu_read(hard_watchdog_warn) == true) | ||
351 | return; | ||
352 | |||
353 | pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); | ||
354 | print_modules(); | ||
355 | print_irqtrace_events(current); | ||
356 | if (regs) | ||
357 | show_regs(regs); | ||
358 | else | ||
359 | dump_stack(); | ||
360 | |||
361 | /* | ||
362 | * Perform all-CPU dump only once to avoid multiple hardlockups | ||
363 | * generating interleaving traces | ||
364 | */ | ||
365 | if (sysctl_hardlockup_all_cpu_backtrace && | ||
366 | !test_and_set_bit(0, &hardlockup_allcpu_dumped)) | ||
367 | trigger_allbutself_cpu_backtrace(); | ||
368 | |||
369 | if (hardlockup_panic) | ||
370 | nmi_panic(regs, "Hard LOCKUP"); | ||
371 | |||
372 | __this_cpu_write(hard_watchdog_warn, true); | ||
373 | return; | ||
374 | } | ||
375 | |||
376 | __this_cpu_write(hard_watchdog_warn, false); | ||
377 | return; | ||
378 | } | ||
379 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | ||
380 | |||
381 | static void watchdog_interrupt_count(void) | 235 | static void watchdog_interrupt_count(void) |
382 | { | 236 | { |
383 | __this_cpu_inc(hrtimer_interrupts); | 237 | __this_cpu_inc(hrtimer_interrupts); |
384 | } | 238 | } |
385 | 239 | ||
386 | static int watchdog_nmi_enable(unsigned int cpu); | 240 | /* |
387 | static void watchdog_nmi_disable(unsigned int cpu); | 241 | * These two functions are mostly architecture specific |
242 | * defining them as weak here. | ||
243 | */ | ||
244 | int __weak watchdog_nmi_enable(unsigned int cpu) | ||
245 | { | ||
246 | return 0; | ||
247 | } | ||
248 | void __weak watchdog_nmi_disable(unsigned int cpu) | ||
249 | { | ||
250 | } | ||
388 | 251 | ||
389 | static int watchdog_enable_all_cpus(void); | 252 | static int watchdog_enable_all_cpus(void); |
390 | static void watchdog_disable_all_cpus(void); | 253 | static void watchdog_disable_all_cpus(void); |
@@ -577,109 +440,6 @@ static void watchdog(unsigned int cpu) | |||
577 | watchdog_nmi_disable(cpu); | 440 | watchdog_nmi_disable(cpu); |
578 | } | 441 | } |
579 | 442 | ||
580 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
581 | /* | ||
582 | * People like the simple clean cpu node info on boot. | ||
583 | * Reduce the watchdog noise by only printing messages | ||
584 | * that are different from what cpu0 displayed. | ||
585 | */ | ||
586 | static unsigned long cpu0_err; | ||
587 | |||
588 | static int watchdog_nmi_enable(unsigned int cpu) | ||
589 | { | ||
590 | struct perf_event_attr *wd_attr; | ||
591 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | ||
592 | |||
593 | /* nothing to do if the hard lockup detector is disabled */ | ||
594 | if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) | ||
595 | goto out; | ||
596 | |||
597 | /* is it already setup and enabled? */ | ||
598 | if (event && event->state > PERF_EVENT_STATE_OFF) | ||
599 | goto out; | ||
600 | |||
601 | /* it is setup but not enabled */ | ||
602 | if (event != NULL) | ||
603 | goto out_enable; | ||
604 | |||
605 | wd_attr = &wd_hw_attr; | ||
606 | wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); | ||
607 | |||
608 | /* Try to register using hardware perf events */ | ||
609 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | ||
610 | |||
611 | /* save cpu0 error for future comparision */ | ||
612 | if (cpu == 0 && IS_ERR(event)) | ||
613 | cpu0_err = PTR_ERR(event); | ||
614 | |||
615 | if (!IS_ERR(event)) { | ||
616 | /* only print for cpu0 or different than cpu0 */ | ||
617 | if (cpu == 0 || cpu0_err) | ||
618 | pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); | ||
619 | goto out_save; | ||
620 | } | ||
621 | |||
622 | /* | ||
623 | * Disable the hard lockup detector if _any_ CPU fails to set up | ||
624 | * set up the hardware perf event. The watchdog() function checks | ||
625 | * the NMI_WATCHDOG_ENABLED bit periodically. | ||
626 | * | ||
627 | * The barriers are for syncing up watchdog_enabled across all the | ||
628 | * cpus, as clear_bit() does not use barriers. | ||
629 | */ | ||
630 | smp_mb__before_atomic(); | ||
631 | clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled); | ||
632 | smp_mb__after_atomic(); | ||
633 | |||
634 | /* skip displaying the same error again */ | ||
635 | if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) | ||
636 | return PTR_ERR(event); | ||
637 | |||
638 | /* vary the KERN level based on the returned errno */ | ||
639 | if (PTR_ERR(event) == -EOPNOTSUPP) | ||
640 | pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); | ||
641 | else if (PTR_ERR(event) == -ENOENT) | ||
642 | pr_warn("disabled (cpu%i): hardware events not enabled\n", | ||
643 | cpu); | ||
644 | else | ||
645 | pr_err("disabled (cpu%i): unable to create perf event: %ld\n", | ||
646 | cpu, PTR_ERR(event)); | ||
647 | |||
648 | pr_info("Shutting down hard lockup detector on all cpus\n"); | ||
649 | |||
650 | return PTR_ERR(event); | ||
651 | |||
652 | /* success path */ | ||
653 | out_save: | ||
654 | per_cpu(watchdog_ev, cpu) = event; | ||
655 | out_enable: | ||
656 | perf_event_enable(per_cpu(watchdog_ev, cpu)); | ||
657 | out: | ||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | static void watchdog_nmi_disable(unsigned int cpu) | ||
662 | { | ||
663 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | ||
664 | |||
665 | if (event) { | ||
666 | perf_event_disable(event); | ||
667 | per_cpu(watchdog_ev, cpu) = NULL; | ||
668 | |||
669 | /* should be in cleanup, but blocks oprofile */ | ||
670 | perf_event_release_kernel(event); | ||
671 | } | ||
672 | if (cpu == 0) { | ||
673 | /* watchdog_nmi_enable() expects this to be zero initially. */ | ||
674 | cpu0_err = 0; | ||
675 | } | ||
676 | } | ||
677 | |||
678 | #else | ||
679 | static int watchdog_nmi_enable(unsigned int cpu) { return 0; } | ||
680 | static void watchdog_nmi_disable(unsigned int cpu) { return; } | ||
681 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | ||
682 | |||
683 | static struct smp_hotplug_thread watchdog_threads = { | 443 | static struct smp_hotplug_thread watchdog_threads = { |
684 | .store = &softlockup_watchdog, | 444 | .store = &softlockup_watchdog, |
685 | .thread_should_run = watchdog_should_run, | 445 | .thread_should_run = watchdog_should_run, |