aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/debug/debug_core.c4
-rw-r--r--kernel/debug/kdb/kdb_io.c37
-rw-r--r--kernel/debug/kdb/kdb_main.c1
-rw-r--r--kernel/debug/kdb/kdb_private.h1
-rw-r--r--kernel/events/uprobes.c4
-rw-r--r--kernel/kcov.c5
-rw-r--r--kernel/kexec_core.c5
-rw-r--r--kernel/printk/printk.c3
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/signal.c7
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/sysctl_binary.c4
-rw-r--r--kernel/time/alarmtimer.c3
-rw-r--r--kernel/watchdog.c270
-rw-r--r--kernel/watchdog_hld.c227
16 files changed, 287 insertions, 297 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index eaee9de224bd..12c679f769c6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
84obj-$(CONFIG_KGDB) += debug/ 84obj-$(CONFIG_KGDB) += debug/
85obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o 85obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
86obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o 86obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
87obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
87obj-$(CONFIG_SECCOMP) += seccomp.o 88obj-$(CONFIG_SECCOMP) += seccomp.o
88obj-$(CONFIG_RELAY) += relay.o 89obj-$(CONFIG_RELAY) += relay.o
89obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 90obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0874e2edd275..79517e5549f1 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -598,11 +598,11 @@ return_normal:
598 /* 598 /*
599 * Wait for the other CPUs to be notified and be waiting for us: 599 * Wait for the other CPUs to be notified and be waiting for us:
600 */ 600 */
601 time_left = loops_per_jiffy * HZ; 601 time_left = MSEC_PER_SEC;
602 while (kgdb_do_roundup && --time_left && 602 while (kgdb_do_roundup && --time_left &&
603 (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) != 603 (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) !=
604 online_cpus) 604 online_cpus)
605 cpu_relax(); 605 udelay(1000);
606 if (!time_left) 606 if (!time_left)
607 pr_crit("Timed out waiting for secondary CPUs.\n"); 607 pr_crit("Timed out waiting for secondary CPUs.\n");
608 608
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 98c9011eac78..e74be38245ad 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -30,6 +30,7 @@
30char kdb_prompt_str[CMD_BUFLEN]; 30char kdb_prompt_str[CMD_BUFLEN];
31 31
32int kdb_trap_printk; 32int kdb_trap_printk;
33int kdb_printf_cpu = -1;
33 34
34static int kgdb_transition_check(char *buffer) 35static int kgdb_transition_check(char *buffer)
35{ 36{
@@ -554,31 +555,26 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
554 int linecount; 555 int linecount;
555 int colcount; 556 int colcount;
556 int logging, saved_loglevel = 0; 557 int logging, saved_loglevel = 0;
557 int saved_trap_printk;
558 int got_printf_lock = 0;
559 int retlen = 0; 558 int retlen = 0;
560 int fnd, len; 559 int fnd, len;
560 int this_cpu, old_cpu;
561 char *cp, *cp2, *cphold = NULL, replaced_byte = ' '; 561 char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
562 char *moreprompt = "more> "; 562 char *moreprompt = "more> ";
563 struct console *c = console_drivers; 563 struct console *c = console_drivers;
564 static DEFINE_SPINLOCK(kdb_printf_lock);
565 unsigned long uninitialized_var(flags); 564 unsigned long uninitialized_var(flags);
566 565
567 preempt_disable();
568 saved_trap_printk = kdb_trap_printk;
569 kdb_trap_printk = 0;
570
571 /* Serialize kdb_printf if multiple cpus try to write at once. 566 /* Serialize kdb_printf if multiple cpus try to write at once.
572 * But if any cpu goes recursive in kdb, just print the output, 567 * But if any cpu goes recursive in kdb, just print the output,
573 * even if it is interleaved with any other text. 568 * even if it is interleaved with any other text.
574 */ 569 */
575 if (!KDB_STATE(PRINTF_LOCK)) { 570 local_irq_save(flags);
576 KDB_STATE_SET(PRINTF_LOCK); 571 this_cpu = smp_processor_id();
577 spin_lock_irqsave(&kdb_printf_lock, flags); 572 for (;;) {
578 got_printf_lock = 1; 573 old_cpu = cmpxchg(&kdb_printf_cpu, -1, this_cpu);
579 atomic_inc(&kdb_event); 574 if (old_cpu == -1 || old_cpu == this_cpu)
580 } else { 575 break;
581 __acquire(kdb_printf_lock); 576
577 cpu_relax();
582 } 578 }
583 579
584 diag = kdbgetintenv("LINES", &linecount); 580 diag = kdbgetintenv("LINES", &linecount);
@@ -847,16 +843,9 @@ kdb_print_out:
847 suspend_grep = 0; /* end of what may have been a recursive call */ 843 suspend_grep = 0; /* end of what may have been a recursive call */
848 if (logging) 844 if (logging)
849 console_loglevel = saved_loglevel; 845 console_loglevel = saved_loglevel;
850 if (KDB_STATE(PRINTF_LOCK) && got_printf_lock) { 846 /* kdb_printf_cpu locked the code above. */
851 got_printf_lock = 0; 847 smp_store_release(&kdb_printf_cpu, old_cpu);
852 spin_unlock_irqrestore(&kdb_printf_lock, flags); 848 local_irq_restore(flags);
853 KDB_STATE_CLEAR(PRINTF_LOCK);
854 atomic_dec(&kdb_event);
855 } else {
856 __release(kdb_printf_lock);
857 }
858 kdb_trap_printk = saved_trap_printk;
859 preempt_enable();
860 return retlen; 849 return retlen;
861} 850}
862 851
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 2a20c0dfdafc..ca183919d302 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -60,7 +60,6 @@ int kdb_grep_trailing;
60 * Kernel debugger state flags 60 * Kernel debugger state flags
61 */ 61 */
62int kdb_flags; 62int kdb_flags;
63atomic_t kdb_event;
64 63
65/* 64/*
66 * kdb_lock protects updates to kdb_initial_cpu. Used to 65 * kdb_lock protects updates to kdb_initial_cpu. Used to
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 75014d7f4568..fc224fbcf954 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -132,7 +132,6 @@ extern int kdb_state;
132#define KDB_STATE_PAGER 0x00000400 /* pager is available */ 132#define KDB_STATE_PAGER 0x00000400 /* pager is available */
133#define KDB_STATE_GO_SWITCH 0x00000800 /* go is switching 133#define KDB_STATE_GO_SWITCH 0x00000800 /* go is switching
134 * back to initial cpu */ 134 * back to initial cpu */
135#define KDB_STATE_PRINTF_LOCK 0x00001000 /* Holds kdb_printf lock */
136#define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */ 135#define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */
137#define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */ 136#define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */
138#define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been 137#define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f9ec9add2164..215871bda3a2 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -301,7 +301,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
301retry: 301retry:
302 /* Read the page with vaddr into memory */ 302 /* Read the page with vaddr into memory */
303 ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page, 303 ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
304 &vma); 304 &vma, NULL);
305 if (ret <= 0) 305 if (ret <= 0)
306 return ret; 306 return ret;
307 307
@@ -1712,7 +1712,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
1712 * essentially a kernel access to the memory. 1712 * essentially a kernel access to the memory.
1713 */ 1713 */
1714 result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page, 1714 result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
1715 NULL); 1715 NULL, NULL);
1716 if (result < 0) 1716 if (result < 0)
1717 return result; 1717 return result;
1718 1718
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 3cbb0c879705..cc2fa35ca480 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -1,11 +1,16 @@
1#define pr_fmt(fmt) "kcov: " fmt 1#define pr_fmt(fmt) "kcov: " fmt
2 2
3#define DISABLE_BRANCH_PROFILING 3#define DISABLE_BRANCH_PROFILING
4#include <linux/atomic.h>
4#include <linux/compiler.h> 5#include <linux/compiler.h>
6#include <linux/errno.h>
7#include <linux/export.h>
5#include <linux/types.h> 8#include <linux/types.h>
6#include <linux/file.h> 9#include <linux/file.h>
7#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/init.h>
8#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/preempt.h>
9#include <linux/printk.h> 14#include <linux/printk.h>
10#include <linux/sched.h> 15#include <linux/sched.h>
11#include <linux/slab.h> 16#include <linux/slab.h>
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 561675589511..5617cc412444 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -441,6 +441,8 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
441 while (hole_end <= crashk_res.end) { 441 while (hole_end <= crashk_res.end) {
442 unsigned long i; 442 unsigned long i;
443 443
444 cond_resched();
445
444 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) 446 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
445 break; 447 break;
446 /* See if I overlap any of the segments */ 448 /* See if I overlap any of the segments */
@@ -1467,9 +1469,6 @@ static int __init crash_save_vmcoreinfo_init(void)
1467#endif 1469#endif
1468 VMCOREINFO_NUMBER(PG_head_mask); 1470 VMCOREINFO_NUMBER(PG_head_mask);
1469 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 1471 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1470#ifdef CONFIG_X86
1471 VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
1472#endif
1473#ifdef CONFIG_HUGETLB_PAGE 1472#ifdef CONFIG_HUGETLB_PAGE
1474 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); 1473 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
1475#endif 1474#endif
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 577f2288d19f..a3ce35e0fa1e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1926,7 +1926,8 @@ int vprintk_default(const char *fmt, va_list args)
1926 int r; 1926 int r;
1927 1927
1928#ifdef CONFIG_KGDB_KDB 1928#ifdef CONFIG_KGDB_KDB
1929 if (unlikely(kdb_trap_printk)) { 1929 /* Allow to pass printk() to kdb but avoid a recursion. */
1930 if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
1930 r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); 1931 r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
1931 return r; 1932 return r;
1932 } 1933 }
diff --git a/kernel/relay.c b/kernel/relay.c
index da79a109dbeb..8f18d314a96a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -809,11 +809,11 @@ void relay_subbufs_consumed(struct rchan *chan,
809{ 809{
810 struct rchan_buf *buf; 810 struct rchan_buf *buf;
811 811
812 if (!chan) 812 if (!chan || cpu >= NR_CPUS)
813 return; 813 return;
814 814
815 buf = *per_cpu_ptr(chan->buf, cpu); 815 buf = *per_cpu_ptr(chan->buf, cpu);
816 if (cpu >= NR_CPUS || !buf || subbufs_consumed > chan->n_subbufs) 816 if (!buf || subbufs_consumed > chan->n_subbufs)
817 return; 817 return;
818 818
819 if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed) 819 if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed)
diff --git a/kernel/signal.c b/kernel/signal.c
index 29a410780aa9..ae60996fedff 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2491,6 +2491,13 @@ void __set_current_blocked(const sigset_t *newset)
2491{ 2491{
2492 struct task_struct *tsk = current; 2492 struct task_struct *tsk = current;
2493 2493
2494 /*
2495 * In case the signal mask hasn't changed, there is nothing we need
2496 * to do. The current->blocked shouldn't be modified by other task.
2497 */
2498 if (sigequalsets(&tsk->blocked, newset))
2499 return;
2500
2494 spin_lock_irq(&tsk->sighand->siglock); 2501 spin_lock_irq(&tsk->sighand->siglock);
2495 __set_task_blocked(tsk, newset); 2502 __set_task_blocked(tsk, newset);
2496 spin_unlock_irq(&tsk->sighand->siglock); 2503 spin_unlock_irq(&tsk->sighand->siglock);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 39b3368f6de6..1475d2545b7e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2389,9 +2389,11 @@ static void validate_coredump_safety(void)
2389#ifdef CONFIG_COREDUMP 2389#ifdef CONFIG_COREDUMP
2390 if (suid_dumpable == SUID_DUMP_ROOT && 2390 if (suid_dumpable == SUID_DUMP_ROOT &&
2391 core_pattern[0] != '/' && core_pattern[0] != '|') { 2391 core_pattern[0] != '/' && core_pattern[0] != '|') {
2392 printk(KERN_WARNING "Unsafe core_pattern used with "\ 2392 printk(KERN_WARNING
2393 "suid_dumpable=2. Pipe handler or fully qualified "\ 2393"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2394 "core dump path required.\n"); 2394"Pipe handler or fully qualified core dump path required.\n"
2395"Set kernel.core_pattern before fs.suid_dumpable.\n"
2396 );
2395 } 2397 }
2396#endif 2398#endif
2397} 2399}
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 6eb99c17dbd8..ece4b177052b 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1354,8 +1354,8 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
1354 "warning: process `%s' used the deprecated sysctl " 1354 "warning: process `%s' used the deprecated sysctl "
1355 "system call with ", current->comm); 1355 "system call with ", current->comm);
1356 for (i = 0; i < nlen; i++) 1356 for (i = 0; i < nlen; i++)
1357 printk("%d.", name[i]); 1357 printk(KERN_CONT "%d.", name[i]);
1358 printk("\n"); 1358 printk(KERN_CONT "\n");
1359 } 1359 }
1360 return; 1360 return;
1361} 1361}
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 9b08ca391aed..3921cf7fea8e 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -516,7 +516,8 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
516 516
517 spin_lock_irqsave(&ptr->it_lock, flags); 517 spin_lock_irqsave(&ptr->it_lock, flags);
518 if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) { 518 if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) {
519 if (posix_timer_event(ptr, 0) != 0) 519 if (IS_ENABLED(CONFIG_POSIX_TIMERS) &&
520 posix_timer_event(ptr, 0) != 0)
520 ptr->it_overrun++; 521 ptr->it_overrun++;
521 } 522 }
522 523
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f280ec..d4b0fa01cae3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,32 +24,14 @@
24 24
25#include <asm/irq_regs.h> 25#include <asm/irq_regs.h>
26#include <linux/kvm_para.h> 26#include <linux/kvm_para.h>
27#include <linux/perf_event.h>
28#include <linux/kthread.h> 27#include <linux/kthread.h>
29 28
30/*
31 * The run state of the lockup detectors is controlled by the content of the
32 * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
33 * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
34 *
35 * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
36 * are variables that are only used as an 'interface' between the parameters
37 * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
38 * 'watchdog_thresh' variable is handled differently because its value is not
39 * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
40 * is equal zero.
41 */
42#define NMI_WATCHDOG_ENABLED_BIT 0
43#define SOFT_WATCHDOG_ENABLED_BIT 1
44#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
45#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
46
47static DEFINE_MUTEX(watchdog_proc_mutex); 29static DEFINE_MUTEX(watchdog_proc_mutex);
48 30
49#ifdef CONFIG_HARDLOCKUP_DETECTOR 31#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
50static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; 32unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
51#else 33#else
52static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; 34unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
53#endif 35#endif
54int __read_mostly nmi_watchdog_enabled; 36int __read_mostly nmi_watchdog_enabled;
55int __read_mostly soft_watchdog_enabled; 37int __read_mostly soft_watchdog_enabled;
@@ -59,9 +41,6 @@ int __read_mostly watchdog_thresh = 10;
59#ifdef CONFIG_SMP 41#ifdef CONFIG_SMP
60int __read_mostly sysctl_softlockup_all_cpu_backtrace; 42int __read_mostly sysctl_softlockup_all_cpu_backtrace;
61int __read_mostly sysctl_hardlockup_all_cpu_backtrace; 43int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
62#else
63#define sysctl_softlockup_all_cpu_backtrace 0
64#define sysctl_hardlockup_all_cpu_backtrace 0
65#endif 44#endif
66static struct cpumask watchdog_cpumask __read_mostly; 45static struct cpumask watchdog_cpumask __read_mostly;
67unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); 46unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -100,50 +79,9 @@ static DEFINE_PER_CPU(bool, soft_watchdog_warn);
100static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); 79static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
101static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); 80static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
102static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); 81static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
103#ifdef CONFIG_HARDLOCKUP_DETECTOR
104static DEFINE_PER_CPU(bool, hard_watchdog_warn);
105static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
106static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); 82static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
107static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
108#endif
109static unsigned long soft_lockup_nmi_warn; 83static unsigned long soft_lockup_nmi_warn;
110 84
111/* boot commands */
112/*
113 * Should we panic when a soft-lockup or hard-lockup occurs:
114 */
115#ifdef CONFIG_HARDLOCKUP_DETECTOR
116unsigned int __read_mostly hardlockup_panic =
117 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
118static unsigned long hardlockup_allcpu_dumped;
119/*
120 * We may not want to enable hard lockup detection by default in all cases,
121 * for example when running the kernel as a guest on a hypervisor. In these
122 * cases this function can be called to disable hard lockup detection. This
123 * function should only be executed once by the boot processor before the
124 * kernel command line parameters are parsed, because otherwise it is not
125 * possible to override this in hardlockup_panic_setup().
126 */
127void hardlockup_detector_disable(void)
128{
129 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
130}
131
132static int __init hardlockup_panic_setup(char *str)
133{
134 if (!strncmp(str, "panic", 5))
135 hardlockup_panic = 1;
136 else if (!strncmp(str, "nopanic", 7))
137 hardlockup_panic = 0;
138 else if (!strncmp(str, "0", 1))
139 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
140 else if (!strncmp(str, "1", 1))
141 watchdog_enabled |= NMI_WATCHDOG_ENABLED;
142 return 1;
143}
144__setup("nmi_watchdog=", hardlockup_panic_setup);
145#endif
146
147unsigned int __read_mostly softlockup_panic = 85unsigned int __read_mostly softlockup_panic =
148 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; 86 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
149 87
@@ -264,32 +202,14 @@ void touch_all_softlockup_watchdogs(void)
264 wq_watchdog_touch(-1); 202 wq_watchdog_touch(-1);
265} 203}
266 204
267#ifdef CONFIG_HARDLOCKUP_DETECTOR
268void touch_nmi_watchdog(void)
269{
270 /*
271 * Using __raw here because some code paths have
272 * preemption enabled. If preemption is enabled
273 * then interrupts should be enabled too, in which
274 * case we shouldn't have to worry about the watchdog
275 * going off.
276 */
277 raw_cpu_write(watchdog_nmi_touch, true);
278 touch_softlockup_watchdog();
279}
280EXPORT_SYMBOL(touch_nmi_watchdog);
281
282#endif
283
284void touch_softlockup_watchdog_sync(void) 205void touch_softlockup_watchdog_sync(void)
285{ 206{
286 __this_cpu_write(softlockup_touch_sync, true); 207 __this_cpu_write(softlockup_touch_sync, true);
287 __this_cpu_write(watchdog_touch_ts, 0); 208 __this_cpu_write(watchdog_touch_ts, 0);
288} 209}
289 210
290#ifdef CONFIG_HARDLOCKUP_DETECTOR
291/* watchdog detector functions */ 211/* watchdog detector functions */
292static bool is_hardlockup(void) 212bool is_hardlockup(void)
293{ 213{
294 unsigned long hrint = __this_cpu_read(hrtimer_interrupts); 214 unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
295 215
@@ -299,7 +219,6 @@ static bool is_hardlockup(void)
299 __this_cpu_write(hrtimer_interrupts_saved, hrint); 219 __this_cpu_write(hrtimer_interrupts_saved, hrint);
300 return false; 220 return false;
301} 221}
302#endif
303 222
304static int is_softlockup(unsigned long touch_ts) 223static int is_softlockup(unsigned long touch_ts)
305{ 224{
@@ -313,78 +232,22 @@ static int is_softlockup(unsigned long touch_ts)
313 return 0; 232 return 0;
314} 233}
315 234
316#ifdef CONFIG_HARDLOCKUP_DETECTOR
317
318static struct perf_event_attr wd_hw_attr = {
319 .type = PERF_TYPE_HARDWARE,
320 .config = PERF_COUNT_HW_CPU_CYCLES,
321 .size = sizeof(struct perf_event_attr),
322 .pinned = 1,
323 .disabled = 1,
324};
325
326/* Callback function for perf event subsystem */
327static void watchdog_overflow_callback(struct perf_event *event,
328 struct perf_sample_data *data,
329 struct pt_regs *regs)
330{
331 /* Ensure the watchdog never gets throttled */
332 event->hw.interrupts = 0;
333
334 if (__this_cpu_read(watchdog_nmi_touch) == true) {
335 __this_cpu_write(watchdog_nmi_touch, false);
336 return;
337 }
338
339 /* check for a hardlockup
340 * This is done by making sure our timer interrupt
341 * is incrementing. The timer interrupt should have
342 * fired multiple times before we overflow'd. If it hasn't
343 * then this is a good indication the cpu is stuck
344 */
345 if (is_hardlockup()) {
346 int this_cpu = smp_processor_id();
347 struct pt_regs *regs = get_irq_regs();
348
349 /* only print hardlockups once */
350 if (__this_cpu_read(hard_watchdog_warn) == true)
351 return;
352
353 pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
354 print_modules();
355 print_irqtrace_events(current);
356 if (regs)
357 show_regs(regs);
358 else
359 dump_stack();
360
361 /*
362 * Perform all-CPU dump only once to avoid multiple hardlockups
363 * generating interleaving traces
364 */
365 if (sysctl_hardlockup_all_cpu_backtrace &&
366 !test_and_set_bit(0, &hardlockup_allcpu_dumped))
367 trigger_allbutself_cpu_backtrace();
368
369 if (hardlockup_panic)
370 nmi_panic(regs, "Hard LOCKUP");
371
372 __this_cpu_write(hard_watchdog_warn, true);
373 return;
374 }
375
376 __this_cpu_write(hard_watchdog_warn, false);
377 return;
378}
379#endif /* CONFIG_HARDLOCKUP_DETECTOR */
380
381static void watchdog_interrupt_count(void) 235static void watchdog_interrupt_count(void)
382{ 236{
383 __this_cpu_inc(hrtimer_interrupts); 237 __this_cpu_inc(hrtimer_interrupts);
384} 238}
385 239
386static int watchdog_nmi_enable(unsigned int cpu); 240/*
387static void watchdog_nmi_disable(unsigned int cpu); 241 * These two functions are mostly architecture specific
242 * defining them as weak here.
243 */
244int __weak watchdog_nmi_enable(unsigned int cpu)
245{
246 return 0;
247}
248void __weak watchdog_nmi_disable(unsigned int cpu)
249{
250}
388 251
389static int watchdog_enable_all_cpus(void); 252static int watchdog_enable_all_cpus(void);
390static void watchdog_disable_all_cpus(void); 253static void watchdog_disable_all_cpus(void);
@@ -577,109 +440,6 @@ static void watchdog(unsigned int cpu)
577 watchdog_nmi_disable(cpu); 440 watchdog_nmi_disable(cpu);
578} 441}
579 442
580#ifdef CONFIG_HARDLOCKUP_DETECTOR
581/*
582 * People like the simple clean cpu node info on boot.
583 * Reduce the watchdog noise by only printing messages
584 * that are different from what cpu0 displayed.
585 */
586static unsigned long cpu0_err;
587
588static int watchdog_nmi_enable(unsigned int cpu)
589{
590 struct perf_event_attr *wd_attr;
591 struct perf_event *event = per_cpu(watchdog_ev, cpu);
592
593 /* nothing to do if the hard lockup detector is disabled */
594 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
595 goto out;
596
597 /* is it already setup and enabled? */
598 if (event && event->state > PERF_EVENT_STATE_OFF)
599 goto out;
600
601 /* it is setup but not enabled */
602 if (event != NULL)
603 goto out_enable;
604
605 wd_attr = &wd_hw_attr;
606 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
607
608 /* Try to register using hardware perf events */
609 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
610
611 /* save cpu0 error for future comparision */
612 if (cpu == 0 && IS_ERR(event))
613 cpu0_err = PTR_ERR(event);
614
615 if (!IS_ERR(event)) {
616 /* only print for cpu0 or different than cpu0 */
617 if (cpu == 0 || cpu0_err)
618 pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
619 goto out_save;
620 }
621
622 /*
623 * Disable the hard lockup detector if _any_ CPU fails to set up
624 * set up the hardware perf event. The watchdog() function checks
625 * the NMI_WATCHDOG_ENABLED bit periodically.
626 *
627 * The barriers are for syncing up watchdog_enabled across all the
628 * cpus, as clear_bit() does not use barriers.
629 */
630 smp_mb__before_atomic();
631 clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
632 smp_mb__after_atomic();
633
634 /* skip displaying the same error again */
635 if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
636 return PTR_ERR(event);
637
638 /* vary the KERN level based on the returned errno */
639 if (PTR_ERR(event) == -EOPNOTSUPP)
640 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
641 else if (PTR_ERR(event) == -ENOENT)
642 pr_warn("disabled (cpu%i): hardware events not enabled\n",
643 cpu);
644 else
645 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
646 cpu, PTR_ERR(event));
647
648 pr_info("Shutting down hard lockup detector on all cpus\n");
649
650 return PTR_ERR(event);
651
652 /* success path */
653out_save:
654 per_cpu(watchdog_ev, cpu) = event;
655out_enable:
656 perf_event_enable(per_cpu(watchdog_ev, cpu));
657out:
658 return 0;
659}
660
661static void watchdog_nmi_disable(unsigned int cpu)
662{
663 struct perf_event *event = per_cpu(watchdog_ev, cpu);
664
665 if (event) {
666 perf_event_disable(event);
667 per_cpu(watchdog_ev, cpu) = NULL;
668
669 /* should be in cleanup, but blocks oprofile */
670 perf_event_release_kernel(event);
671 }
672 if (cpu == 0) {
673 /* watchdog_nmi_enable() expects this to be zero initially. */
674 cpu0_err = 0;
675 }
676}
677
678#else
679static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
680static void watchdog_nmi_disable(unsigned int cpu) { return; }
681#endif /* CONFIG_HARDLOCKUP_DETECTOR */
682
683static struct smp_hotplug_thread watchdog_threads = { 443static struct smp_hotplug_thread watchdog_threads = {
684 .store = &softlockup_watchdog, 444 .store = &softlockup_watchdog,
685 .thread_should_run = watchdog_should_run, 445 .thread_should_run = watchdog_should_run,
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
new file mode 100644
index 000000000000..84016c8aee6b
--- /dev/null
+++ b/kernel/watchdog_hld.c
@@ -0,0 +1,227 @@
1/*
2 * Detect hard lockups on a system
3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 *
6 * Note: Most of this code is borrowed heavily from the original softlockup
7 * detector, so thanks to Ingo for the initial implementation.
8 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
9 * to those contributors as well.
10 */
11
12#define pr_fmt(fmt) "NMI watchdog: " fmt
13
14#include <linux/nmi.h>
15#include <linux/module.h>
16#include <asm/irq_regs.h>
17#include <linux/perf_event.h>
18
19static DEFINE_PER_CPU(bool, hard_watchdog_warn);
20static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
21static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
22
23/* boot commands */
24/*
25 * Should we panic when a soft-lockup or hard-lockup occurs:
26 */
27unsigned int __read_mostly hardlockup_panic =
28 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
29static unsigned long hardlockup_allcpu_dumped;
30/*
31 * We may not want to enable hard lockup detection by default in all cases,
32 * for example when running the kernel as a guest on a hypervisor. In these
33 * cases this function can be called to disable hard lockup detection. This
34 * function should only be executed once by the boot processor before the
35 * kernel command line parameters are parsed, because otherwise it is not
36 * possible to override this in hardlockup_panic_setup().
37 */
38void hardlockup_detector_disable(void)
39{
40 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
41}
42
43static int __init hardlockup_panic_setup(char *str)
44{
45 if (!strncmp(str, "panic", 5))
46 hardlockup_panic = 1;
47 else if (!strncmp(str, "nopanic", 7))
48 hardlockup_panic = 0;
49 else if (!strncmp(str, "0", 1))
50 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
51 else if (!strncmp(str, "1", 1))
52 watchdog_enabled |= NMI_WATCHDOG_ENABLED;
53 return 1;
54}
55__setup("nmi_watchdog=", hardlockup_panic_setup);
56
57void touch_nmi_watchdog(void)
58{
59 /*
60 * Using __raw here because some code paths have
61 * preemption enabled. If preemption is enabled
62 * then interrupts should be enabled too, in which
63 * case we shouldn't have to worry about the watchdog
64 * going off.
65 */
66 raw_cpu_write(watchdog_nmi_touch, true);
67 touch_softlockup_watchdog();
68}
69EXPORT_SYMBOL(touch_nmi_watchdog);
70
71static struct perf_event_attr wd_hw_attr = {
72 .type = PERF_TYPE_HARDWARE,
73 .config = PERF_COUNT_HW_CPU_CYCLES,
74 .size = sizeof(struct perf_event_attr),
75 .pinned = 1,
76 .disabled = 1,
77};
78
79/* Callback function for perf event subsystem */
80static void watchdog_overflow_callback(struct perf_event *event,
81 struct perf_sample_data *data,
82 struct pt_regs *regs)
83{
84 /* Ensure the watchdog never gets throttled */
85 event->hw.interrupts = 0;
86
87 if (__this_cpu_read(watchdog_nmi_touch) == true) {
88 __this_cpu_write(watchdog_nmi_touch, false);
89 return;
90 }
91
92 /* check for a hardlockup
93 * This is done by making sure our timer interrupt
94 * is incrementing. The timer interrupt should have
95 * fired multiple times before we overflow'd. If it hasn't
96 * then this is a good indication the cpu is stuck
97 */
98 if (is_hardlockup()) {
99 int this_cpu = smp_processor_id();
100
101 /* only print hardlockups once */
102 if (__this_cpu_read(hard_watchdog_warn) == true)
103 return;
104
105 pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
106 print_modules();
107 print_irqtrace_events(current);
108 if (regs)
109 show_regs(regs);
110 else
111 dump_stack();
112
113 /*
114 * Perform all-CPU dump only once to avoid multiple hardlockups
115 * generating interleaving traces
116 */
117 if (sysctl_hardlockup_all_cpu_backtrace &&
118 !test_and_set_bit(0, &hardlockup_allcpu_dumped))
119 trigger_allbutself_cpu_backtrace();
120
121 if (hardlockup_panic)
122 nmi_panic(regs, "Hard LOCKUP");
123
124 __this_cpu_write(hard_watchdog_warn, true);
125 return;
126 }
127
128 __this_cpu_write(hard_watchdog_warn, false);
129 return;
130}
131
132/*
133 * People like the simple clean cpu node info on boot.
134 * Reduce the watchdog noise by only printing messages
135 * that are different from what cpu0 displayed.
136 */
137static unsigned long cpu0_err;
138
139int watchdog_nmi_enable(unsigned int cpu)
140{
141 struct perf_event_attr *wd_attr;
142 struct perf_event *event = per_cpu(watchdog_ev, cpu);
143
144 /* nothing to do if the hard lockup detector is disabled */
145 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
146 goto out;
147
148 /* is it already setup and enabled? */
149 if (event && event->state > PERF_EVENT_STATE_OFF)
150 goto out;
151
152 /* it is setup but not enabled */
153 if (event != NULL)
154 goto out_enable;
155
156 wd_attr = &wd_hw_attr;
157 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
158
159 /* Try to register using hardware perf events */
160 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
161
162 /* save cpu0 error for future comparision */
163 if (cpu == 0 && IS_ERR(event))
164 cpu0_err = PTR_ERR(event);
165
166 if (!IS_ERR(event)) {
167 /* only print for cpu0 or different than cpu0 */
168 if (cpu == 0 || cpu0_err)
169 pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
170 goto out_save;
171 }
172
173 /*
174 * Disable the hard lockup detector if _any_ CPU fails to set up
175 * set up the hardware perf event. The watchdog() function checks
176 * the NMI_WATCHDOG_ENABLED bit periodically.
177 *
178 * The barriers are for syncing up watchdog_enabled across all the
179 * cpus, as clear_bit() does not use barriers.
180 */
181 smp_mb__before_atomic();
182 clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
183 smp_mb__after_atomic();
184
185 /* skip displaying the same error again */
186 if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
187 return PTR_ERR(event);
188
189 /* vary the KERN level based on the returned errno */
190 if (PTR_ERR(event) == -EOPNOTSUPP)
191 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
192 else if (PTR_ERR(event) == -ENOENT)
193 pr_warn("disabled (cpu%i): hardware events not enabled\n",
194 cpu);
195 else
196 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
197 cpu, PTR_ERR(event));
198
199 pr_info("Shutting down hard lockup detector on all cpus\n");
200
201 return PTR_ERR(event);
202
203 /* success path */
204out_save:
205 per_cpu(watchdog_ev, cpu) = event;
206out_enable:
207 perf_event_enable(per_cpu(watchdog_ev, cpu));
208out:
209 return 0;
210}
211
212void watchdog_nmi_disable(unsigned int cpu)
213{
214 struct perf_event *event = per_cpu(watchdog_ev, cpu);
215
216 if (event) {
217 perf_event_disable(event);
218 per_cpu(watchdog_ev, cpu) = NULL;
219
220 /* should be in cleanup, but blocks oprofile */
221 perf_event_release_kernel(event);
222 }
223 if (cpu == 0) {
224 /* watchdog_nmi_enable() expects this to be zero initially. */
225 cpu0_err = 0;
226 }
227}