summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPetr Mladek <pmladek@suse.com>2016-05-20 20:00:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 20:58:30 -0400
commit42a0bb3f71383b457a7db362f1c69e7afb96732b (patch)
treec63f12bed74fee20662fbcc8cc985d53a0d20def
parent2eeed7e98d6a1341b1574893a95ce5b8379140f2 (diff)
printk/nmi: generic solution for safe printk in NMI
printk() takes some locks and could not be used a safe way in NMI context. The chance of a deadlock is real especially when printing stacks from all CPUs. This particular problem has been addressed on x86 by the commit a9edc8809328 ("x86/nmi: Perform a safe NMI stack trace on all CPUs"). The patchset brings two big advantages. First, it makes the NMI backtraces safe on all architectures for free. Second, it makes all NMI messages almost safe on all architectures (the temporary buffer is limited. We still should keep the number of messages in NMI context at minimum). Note that there already are several messages printed in NMI context: WARN_ON(in_nmi()), BUG_ON(in_nmi()), anything being printed out from MCE handlers. These are not easy to avoid. This patch reuses most of the code and makes it generic. It is useful for all messages and architectures that support NMI. The alternative printk_func is set when entering and is reseted when leaving NMI context. It queues IRQ work to copy the messages into the main ring buffer in a safe context. __printk_nmi_flush() copies all available messages and reset the buffer. Then we could use a simple cmpxchg operations to get synchronized with writers. There is also used a spinlock to get synchronized with other flushers. We do not longer use seq_buf because it depends on external lock. It would be hard to make all supported operations safe for a lockless use. It would be confusing and error prone to make only some operations safe. The code is put into separate printk/nmi.c as suggested by Steven Rostedt. It needs a per-CPU buffer and is compiled only on architectures that call nmi_enter(). This is achieved by the new HAVE_NMI Kconfig flag. The are MN10300 and Xtensa architectures. We need to clean up NMI handling there first. Let's do it separately. The patch is heavily based on the draft from Peter Zijlstra, see https://lkml.org/lkml/2015/6/10/327 [arnd@arndb.de: printk-nmi: use %zu format string for size_t] [akpm@linux-foundation.org: min_t->min - all types are size_t here] Signed-off-by: Petr Mladek <pmladek@suse.com> Suggested-by: Peter Zijlstra <peterz@infradead.org> Suggested-by: Steven Rostedt <rostedt@goodmis.org> Cc: Jan Kara <jack@suse.cz> Acked-by: Russell King <rmk+kernel@arm.linux.org.uk> [arm part] Cc: Daniel Thompson <daniel.thompson@linaro.org> Cc: Jiri Kosina <jkosina@suse.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: David Miller <davem@davemloft.net> Cc: Daniel Thompson <daniel.thompson@linaro.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/Kconfig4
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/kernel/smp.c2
-rw-r--r--arch/avr32/Kconfig1
-rw-r--r--arch/blackfin/Kconfig1
-rw-r--r--arch/cris/Kconfig1
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/tile/Kconfig1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--include/linux/hardirq.h2
-rw-r--r--include/linux/percpu.h3
-rw-r--r--include/linux/printk.h12
-rw-r--r--init/Kconfig5
-rw-r--r--init/main.c1
-rw-r--r--kernel/printk/Makefile1
-rw-r--r--kernel/printk/internal.h44
-rw-r--r--kernel/printk/nmi.c219
-rw-r--r--kernel/printk/printk.c19
-rw-r--r--lib/nmi_backtrace.c89
24 files changed, 306 insertions, 107 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 0f298f9123dc..8f84fd268dee 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -187,7 +187,11 @@ config HAVE_OPTPROBES
187config HAVE_KPROBES_ON_FTRACE 187config HAVE_KPROBES_ON_FTRACE
188 bool 188 bool
189 189
190config HAVE_NMI
191 bool
192
190config HAVE_NMI_WATCHDOG 193config HAVE_NMI_WATCHDOG
194 depends on HAVE_NMI
191 bool 195 bool
192# 196#
193# An arch should select this if it provides all these things: 197# An arch should select this if it provides all these things:
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 956d3575426c..90542db1220d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -67,6 +67,7 @@ config ARM
67 select HAVE_KRETPROBES if (HAVE_KPROBES) 67 select HAVE_KRETPROBES if (HAVE_KPROBES)
68 select HAVE_MEMBLOCK 68 select HAVE_MEMBLOCK
69 select HAVE_MOD_ARCH_SPECIFIC 69 select HAVE_MOD_ARCH_SPECIFIC
70 select HAVE_NMI
70 select HAVE_OPROFILE if (HAVE_PERF_EVENTS) 71 select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
71 select HAVE_OPTPROBES if !THUMB2_KERNEL 72 select HAVE_OPTPROBES if !THUMB2_KERNEL
72 select HAVE_PERF_EVENTS 73 select HAVE_PERF_EVENTS
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index baee70267f29..df90bc59bfce 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -644,9 +644,11 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
644 break; 644 break;
645 645
646 case IPI_CPU_BACKTRACE: 646 case IPI_CPU_BACKTRACE:
647 printk_nmi_enter();
647 irq_enter(); 648 irq_enter();
648 nmi_cpu_backtrace(regs); 649 nmi_cpu_backtrace(regs);
649 irq_exit(); 650 irq_exit();
651 printk_nmi_exit();
650 break; 652 break;
651 653
652 default: 654 default:
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index e43519a2ca89..7e75d45e20cd 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -18,6 +18,7 @@ config AVR32
18 select GENERIC_CLOCKEVENTS 18 select GENERIC_CLOCKEVENTS
19 select HAVE_MOD_ARCH_SPECIFIC 19 select HAVE_MOD_ARCH_SPECIFIC
20 select MODULES_USE_ELF_RELA 20 select MODULES_USE_ELF_RELA
21 select HAVE_NMI
21 help 22 help
22 AVR32 is a high-performance 32-bit RISC microprocessor core, 23 AVR32 is a high-performance 32-bit RISC microprocessor core,
23 designed for cost-sensitive embedded applications, with particular 24 designed for cost-sensitive embedded applications, with particular
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index a63c12259e77..28c63fea786d 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -40,6 +40,7 @@ config BLACKFIN
40 select HAVE_MOD_ARCH_SPECIFIC 40 select HAVE_MOD_ARCH_SPECIFIC
41 select MODULES_USE_ELF_RELA 41 select MODULES_USE_ELF_RELA
42 select HAVE_DEBUG_STACKOVERFLOW 42 select HAVE_DEBUG_STACKOVERFLOW
43 select HAVE_NMI
43 44
44config GENERIC_CSUM 45config GENERIC_CSUM
45 def_bool y 46 def_bool y
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 5c0ca8ae9293..deba2662b9f3 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -70,6 +70,7 @@ config CRIS
70 select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32 70 select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32
71 select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32 71 select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32
72 select HAVE_DEBUG_BUGVERBOSE if ETRAX_ARCH_V32 72 select HAVE_DEBUG_BUGVERBOSE if ETRAX_ARCH_V32
73 select HAVE_NMI
73 74
74config HZ 75config HZ
75 int 76 int
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5663f411c225..8040fb1845b4 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -48,6 +48,7 @@ config MIPS
48 select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC 48 select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
49 select GENERIC_CMOS_UPDATE 49 select GENERIC_CMOS_UPDATE
50 select HAVE_MOD_ARCH_SPECIFIC 50 select HAVE_MOD_ARCH_SPECIFIC
51 select HAVE_NMI
51 select VIRT_TO_BUS 52 select VIRT_TO_BUS
52 select MODULES_USE_ELF_REL if MODULES 53 select MODULES_USE_ELF_REL if MODULES
53 select MODULES_USE_ELF_RELA if MODULES && 64BIT 54 select MODULES_USE_ELF_RELA if MODULES && 64BIT
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f0403b58ae8b..01f7464d9fea 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -155,6 +155,7 @@ config PPC
155 select NO_BOOTMEM 155 select NO_BOOTMEM
156 select HAVE_GENERIC_RCU_GUP 156 select HAVE_GENERIC_RCU_GUP
157 select HAVE_PERF_EVENTS_NMI if PPC64 157 select HAVE_PERF_EVENTS_NMI if PPC64
158 select HAVE_NMI if PERF_EVENTS
158 select EDAC_SUPPORT 159 select EDAC_SUPPORT
159 select EDAC_ATOMIC_SCRUB 160 select EDAC_ATOMIC_SCRUB
160 select ARCH_HAS_DMA_SET_COHERENT_MASK 161 select ARCH_HAS_DMA_SET_COHERENT_MASK
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e2c9aaaf64b2..1c3c43d9d1b5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -166,6 +166,7 @@ config S390
166 select TTY 166 select TTY
167 select VIRT_CPU_ACCOUNTING 167 select VIRT_CPU_ACCOUNTING
168 select VIRT_TO_BUS 168 select VIRT_TO_BUS
169 select HAVE_NMI
169 170
170 171
171config SCHED_OMIT_FRAME_POINTER 172config SCHED_OMIT_FRAME_POINTER
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index cb93af8f8017..f6254341c065 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -44,6 +44,7 @@ config SUPERH
44 select OLD_SIGSUSPEND 44 select OLD_SIGSUSPEND
45 select OLD_SIGACTION 45 select OLD_SIGACTION
46 select HAVE_ARCH_AUDITSYSCALL 46 select HAVE_ARCH_AUDITSYSCALL
47 select HAVE_NMI
47 help 48 help
48 The SuperH is a RISC processor targeted for use in embedded systems 49 The SuperH is a RISC processor targeted for use in embedded systems
49 and consumer electronics; it was also used in the Sega Dreamcast 50 and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 27b3a0ad40a0..1012f7ffcdf5 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -79,6 +79,7 @@ config SPARC64
79 select NO_BOOTMEM 79 select NO_BOOTMEM
80 select HAVE_ARCH_AUDITSYSCALL 80 select HAVE_ARCH_AUDITSYSCALL
81 select ARCH_SUPPORTS_ATOMIC_RMW 81 select ARCH_SUPPORTS_ATOMIC_RMW
82 select HAVE_NMI
82 83
83config ARCH_DEFCONFIG 84config ARCH_DEFCONFIG
84 string 85 string
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 174746225577..76989b878f3c 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -30,6 +30,7 @@ config TILE
30 select HAVE_DEBUG_STACKOVERFLOW 30 select HAVE_DEBUG_STACKOVERFLOW
31 select ARCH_WANT_FRAME_POINTERS 31 select ARCH_WANT_FRAME_POINTERS
32 select HAVE_CONTEXT_TRACKING 32 select HAVE_CONTEXT_TRACKING
33 select HAVE_NMI if USE_PMC
33 select EDAC_SUPPORT 34 select EDAC_SUPPORT
34 select GENERIC_STRNCPY_FROM_USER 35 select GENERIC_STRNCPY_FROM_USER
35 select GENERIC_STRNLEN_USER 36 select GENERIC_STRNLEN_USER
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ff5b3be95d4..0a7b885964ba 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -131,6 +131,7 @@ config X86
131 select HAVE_MEMBLOCK 131 select HAVE_MEMBLOCK
132 select HAVE_MEMBLOCK_NODE_MAP 132 select HAVE_MEMBLOCK_NODE_MAP
133 select HAVE_MIXED_BREAKPOINTS_REGS 133 select HAVE_MIXED_BREAKPOINTS_REGS
134 select HAVE_NMI
134 select HAVE_OPROFILE 135 select HAVE_OPROFILE
135 select HAVE_OPTPROBES 136 select HAVE_OPTPROBES
136 select HAVE_PCSPKR_PLATFORM 137 select HAVE_PCSPKR_PLATFORM
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 045e424fb368..7788ce643bf4 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -18,7 +18,6 @@
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/delay.h> 20#include <linux/delay.h>
21#include <linux/seq_buf.h>
22 21
23#ifdef CONFIG_HARDLOCKUP_DETECTOR 22#ifdef CONFIG_HARDLOCKUP_DETECTOR
24u64 hw_nmi_get_sample_period(int watchdog_thresh) 23u64 hw_nmi_get_sample_period(int watchdog_thresh)
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index dfd59d6bc6f0..c683996110b1 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -61,6 +61,7 @@ extern void irq_exit(void);
61 61
62#define nmi_enter() \ 62#define nmi_enter() \
63 do { \ 63 do { \
64 printk_nmi_enter(); \
64 lockdep_off(); \ 65 lockdep_off(); \
65 ftrace_nmi_enter(); \ 66 ftrace_nmi_enter(); \
66 BUG_ON(in_nmi()); \ 67 BUG_ON(in_nmi()); \
@@ -77,6 +78,7 @@ extern void irq_exit(void);
77 preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ 78 preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
78 ftrace_nmi_exit(); \ 79 ftrace_nmi_exit(); \
79 lockdep_on(); \ 80 lockdep_on(); \
81 printk_nmi_exit(); \
80 } while (0) 82 } while (0)
81 83
82#endif /* LINUX_HARDIRQ_H */ 84#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 4bc6dafb703e..56939d3f6e53 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -129,7 +129,4 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
129 (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ 129 (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \
130 __alignof__(type)) 130 __alignof__(type))
131 131
132/* To avoid include hell, as printk can not declare this, we declare it here */
133DECLARE_PER_CPU(printk_func_t, printk_func);
134
135#endif /* __LINUX_PERCPU_H */ 132#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9ccbdf2c1453..51dd6b824fe2 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -122,7 +122,17 @@ static inline __printf(1, 2) __cold
122void early_printk(const char *s, ...) { } 122void early_printk(const char *s, ...) { }
123#endif 123#endif
124 124
125typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); 125#ifdef CONFIG_PRINTK_NMI
126extern void printk_nmi_init(void);
127extern void printk_nmi_enter(void);
128extern void printk_nmi_exit(void);
129extern void printk_nmi_flush(void);
130#else
131static inline void printk_nmi_init(void) { }
132static inline void printk_nmi_enter(void) { }
133static inline void printk_nmi_exit(void) { }
134static inline void printk_nmi_flush(void) { }
135#endif /* PRINTK_NMI */
126 136
127#ifdef CONFIG_PRINTK 137#ifdef CONFIG_PRINTK
128asmlinkage __printf(5, 0) 138asmlinkage __printf(5, 0)
diff --git a/init/Kconfig b/init/Kconfig
index 79a91a2c0444..bccc1d607be5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1454,6 +1454,11 @@ config PRINTK
1454 very difficult to diagnose system problems, saying N here is 1454 very difficult to diagnose system problems, saying N here is
1455 strongly discouraged. 1455 strongly discouraged.
1456 1456
1457config PRINTK_NMI
1458 def_bool y
1459 depends on PRINTK
1460 depends on HAVE_NMI
1461
1457config BUG 1462config BUG
1458 bool "BUG() support" if EXPERT 1463 bool "BUG() support" if EXPERT
1459 default y 1464 default y
diff --git a/init/main.c b/init/main.c
index 2075fafaad59..fa9b2bdde183 100644
--- a/init/main.c
+++ b/init/main.c
@@ -569,6 +569,7 @@ asmlinkage __visible void __init start_kernel(void)
569 timekeeping_init(); 569 timekeeping_init();
570 time_init(); 570 time_init();
571 sched_clock_postinit(); 571 sched_clock_postinit();
572 printk_nmi_init();
572 perf_event_init(); 573 perf_event_init();
573 profile_init(); 574 profile_init();
574 call_function_init(); 575 call_function_init();
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index 85405bdcf2b3..abb0042a427b 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,2 +1,3 @@
1obj-y = printk.o 1obj-y = printk.o
2obj-$(CONFIG_PRINTK_NMI) += nmi.o
2obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o 3obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
new file mode 100644
index 000000000000..2de99faedfc1
--- /dev/null
+++ b/kernel/printk/internal.h
@@ -0,0 +1,44 @@
1/*
2 * internal.h - printk internal definitions
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17#include <linux/percpu.h>
18
19typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
20
21int __printf(1, 0) vprintk_default(const char *fmt, va_list args);
22
23#ifdef CONFIG_PRINTK_NMI
24
25/*
26 * printk() could not take logbuf_lock in NMI context. Instead,
27 * it temporary stores the strings into a per-CPU buffer.
28 * The alternative implementation is chosen transparently
29 * via per-CPU variable.
30 */
31DECLARE_PER_CPU(printk_func_t, printk_func);
32static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
33{
34 return this_cpu_read(printk_func)(fmt, args);
35}
36
37#else /* CONFIG_PRINTK_NMI */
38
39static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
40{
41 return vprintk_default(fmt, args);
42}
43
44#endif /* CONFIG_PRINTK_NMI */
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
new file mode 100644
index 000000000000..303cf0d15e57
--- /dev/null
+++ b/kernel/printk/nmi.c
@@ -0,0 +1,219 @@
1/*
2 * nmi.c - Safe printk in NMI context
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/preempt.h>
19#include <linux/spinlock.h>
20#include <linux/smp.h>
21#include <linux/cpumask.h>
22#include <linux/irq_work.h>
23#include <linux/printk.h>
24
25#include "internal.h"
26
27/*
28 * printk() could not take logbuf_lock in NMI context. Instead,
29 * it uses an alternative implementation that temporary stores
30 * the strings into a per-CPU buffer. The content of the buffer
31 * is later flushed into the main ring buffer via IRQ work.
32 *
33 * The alternative implementation is chosen transparently
34 * via @printk_func per-CPU variable.
35 *
36 * The implementation allows to flush the strings also from another CPU.
37 * There are situations when we want to make sure that all buffers
38 * were handled or when IRQs are blocked.
39 */
40DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
41static int printk_nmi_irq_ready;
42
43#define NMI_LOG_BUF_LEN (4096 - sizeof(atomic_t) - sizeof(struct irq_work))
44
45struct nmi_seq_buf {
46 atomic_t len; /* length of written data */
47 struct irq_work work; /* IRQ work that flushes the buffer */
48 unsigned char buffer[NMI_LOG_BUF_LEN];
49};
50static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
51
52/*
53 * Safe printk() for NMI context. It uses a per-CPU buffer to
54 * store the message. NMIs are not nested, so there is always only
55 * one writer running. But the buffer might get flushed from another
56 * CPU, so we need to be careful.
57 */
58static int vprintk_nmi(const char *fmt, va_list args)
59{
60 struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
61 int add = 0;
62 size_t len;
63
64again:
65 len = atomic_read(&s->len);
66
67 if (len >= sizeof(s->buffer))
68 return 0;
69
70 /*
71 * Make sure that all old data have been read before the buffer was
72 * reseted. This is not needed when we just append data.
73 */
74 if (!len)
75 smp_rmb();
76
77 add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
78
79 /*
80 * Do it once again if the buffer has been flushed in the meantime.
81 * Note that atomic_cmpxchg() is an implicit memory barrier that
82 * makes sure that the data were written before updating s->len.
83 */
84 if (atomic_cmpxchg(&s->len, len, len + add) != len)
85 goto again;
86
87 /* Get flushed in a more safe context. */
88 if (add && printk_nmi_irq_ready) {
89 /* Make sure that IRQ work is really initialized. */
90 smp_rmb();
91 irq_work_queue(&s->work);
92 }
93
94 return add;
95}
96
97/*
98 * printk one line from the temporary buffer from @start index until
99 * and including the @end index.
100 */
101static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end)
102{
103 const char *buf = s->buffer + start;
104
105 printk("%.*s", (end - start) + 1, buf);
106}
107
108/*
109 * Flush data from the associated per_CPU buffer. The function
110 * can be called either via IRQ work or independently.
111 */
112static void __printk_nmi_flush(struct irq_work *work)
113{
114 static raw_spinlock_t read_lock =
115 __RAW_SPIN_LOCK_INITIALIZER(read_lock);
116 struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work);
117 unsigned long flags;
118 size_t len, size;
119 int i, last_i;
120
121 /*
122 * The lock has two functions. First, one reader has to flush all
123 * available message to make the lockless synchronization with
124 * writers easier. Second, we do not want to mix messages from
125 * different CPUs. This is especially important when printing
126 * a backtrace.
127 */
128 raw_spin_lock_irqsave(&read_lock, flags);
129
130 i = 0;
131more:
132 len = atomic_read(&s->len);
133
134 /*
135 * This is just a paranoid check that nobody has manipulated
136 * the buffer an unexpected way. If we printed something then
137 * @len must only increase.
138 */
139 if (i && i >= len)
140 pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n",
141 i, len);
142
143 if (!len)
144 goto out; /* Someone else has already flushed the buffer. */
145
146 /* Make sure that data has been written up to the @len */
147 smp_rmb();
148
149 size = min(len, sizeof(s->buffer));
150 last_i = i;
151
152 /* Print line by line. */
153 for (; i < size; i++) {
154 if (s->buffer[i] == '\n') {
155 print_nmi_seq_line(s, last_i, i);
156 last_i = i + 1;
157 }
158 }
159 /* Check if there was a partial line. */
160 if (last_i < size) {
161 print_nmi_seq_line(s, last_i, size - 1);
162 pr_cont("\n");
163 }
164
165 /*
166 * Check that nothing has got added in the meantime and truncate
167 * the buffer. Note that atomic_cmpxchg() is an implicit memory
168 * barrier that makes sure that the data were copied before
169 * updating s->len.
170 */
171 if (atomic_cmpxchg(&s->len, len, 0) != len)
172 goto more;
173
174out:
175 raw_spin_unlock_irqrestore(&read_lock, flags);
176}
177
178/**
179 * printk_nmi_flush - flush all per-cpu nmi buffers.
180 *
181 * The buffers are flushed automatically via IRQ work. This function
182 * is useful only when someone wants to be sure that all buffers have
183 * been flushed at some point.
184 */
185void printk_nmi_flush(void)
186{
187 int cpu;
188
189 for_each_possible_cpu(cpu)
190 __printk_nmi_flush(&per_cpu(nmi_print_seq, cpu).work);
191}
192
193void __init printk_nmi_init(void)
194{
195 int cpu;
196
197 for_each_possible_cpu(cpu) {
198 struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu);
199
200 init_irq_work(&s->work, __printk_nmi_flush);
201 }
202
203 /* Make sure that IRQ works are initialized before enabling. */
204 smp_wmb();
205 printk_nmi_irq_ready = 1;
206
207 /* Flush pending messages that did not have scheduled IRQ works. */
208 printk_nmi_flush();
209}
210
211void printk_nmi_enter(void)
212{
213 this_cpu_write(printk_func, vprintk_nmi);
214}
215
216void printk_nmi_exit(void)
217{
218 this_cpu_write(printk_func, vprintk_default);
219}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index bfbf284e4218..71eba0607034 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -55,6 +55,7 @@
55 55
56#include "console_cmdline.h" 56#include "console_cmdline.h"
57#include "braille.h" 57#include "braille.h"
58#include "internal.h"
58 59
59int console_printk[4] = { 60int console_printk[4] = {
60 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ 61 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */
@@ -1807,14 +1808,6 @@ int vprintk_default(const char *fmt, va_list args)
1807} 1808}
1808EXPORT_SYMBOL_GPL(vprintk_default); 1809EXPORT_SYMBOL_GPL(vprintk_default);
1809 1810
1810/*
1811 * This allows printk to be diverted to another function per cpu.
1812 * This is useful for calling printk functions from within NMI
1813 * without worrying about race conditions that can lock up the
1814 * box.
1815 */
1816DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
1817
1818/** 1811/**
1819 * printk - print a kernel message 1812 * printk - print a kernel message
1820 * @fmt: format string 1813 * @fmt: format string
@@ -1838,21 +1831,11 @@ DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
1838 */ 1831 */
1839asmlinkage __visible int printk(const char *fmt, ...) 1832asmlinkage __visible int printk(const char *fmt, ...)
1840{ 1833{
1841 printk_func_t vprintk_func;
1842 va_list args; 1834 va_list args;
1843 int r; 1835 int r;
1844 1836
1845 va_start(args, fmt); 1837 va_start(args, fmt);
1846
1847 /*
1848 * If a caller overrides the per_cpu printk_func, then it needs
1849 * to disable preemption when calling printk(). Otherwise
1850 * the printk_func should be set to the default. No need to
1851 * disable preemption here.
1852 */
1853 vprintk_func = this_cpu_read(printk_func);
1854 r = vprintk_func(fmt, args); 1838 r = vprintk_func(fmt, args);
1855
1856 va_end(args); 1839 va_end(args);
1857 1840
1858 return r; 1841 return r;
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 6019c53c669e..26caf51cc238 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -16,33 +16,14 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/kprobes.h> 17#include <linux/kprobes.h>
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/seq_buf.h>
20 19
21#ifdef arch_trigger_all_cpu_backtrace 20#ifdef arch_trigger_all_cpu_backtrace
22/* For reliability, we're prepared to waste bits here. */ 21/* For reliability, we're prepared to waste bits here. */
23static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 22static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
24static cpumask_t printtrace_mask;
25
26#define NMI_BUF_SIZE 4096
27
28struct nmi_seq_buf {
29 unsigned char buffer[NMI_BUF_SIZE];
30 struct seq_buf seq;
31};
32
33/* Safe printing in NMI context */
34static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
35 23
36/* "in progress" flag of arch_trigger_all_cpu_backtrace */ 24/* "in progress" flag of arch_trigger_all_cpu_backtrace */
37static unsigned long backtrace_flag; 25static unsigned long backtrace_flag;
38 26
39static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
40{
41 const char *buf = s->buffer + start;
42
43 printk("%.*s", (end - start) + 1, buf);
44}
45
46/* 27/*
47 * When raise() is called it will be is passed a pointer to the 28 * When raise() is called it will be is passed a pointer to the
48 * backtrace_mask. Architectures that call nmi_cpu_backtrace() 29 * backtrace_mask. Architectures that call nmi_cpu_backtrace()
@@ -52,8 +33,7 @@ static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
52void nmi_trigger_all_cpu_backtrace(bool include_self, 33void nmi_trigger_all_cpu_backtrace(bool include_self,
53 void (*raise)(cpumask_t *mask)) 34 void (*raise)(cpumask_t *mask))
54{ 35{
55 struct nmi_seq_buf *s; 36 int i, this_cpu = get_cpu();
56 int i, cpu, this_cpu = get_cpu();
57 37
58 if (test_and_set_bit(0, &backtrace_flag)) { 38 if (test_and_set_bit(0, &backtrace_flag)) {
59 /* 39 /*
@@ -68,17 +48,6 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
68 if (!include_self) 48 if (!include_self)
69 cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); 49 cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
70 50
71 cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask));
72
73 /*
74 * Set up per_cpu seq_buf buffers that the NMIs running on the other
75 * CPUs will write to.
76 */
77 for_each_cpu(cpu, to_cpumask(backtrace_mask)) {
78 s = &per_cpu(nmi_print_seq, cpu);
79 seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
80 }
81
82 if (!cpumask_empty(to_cpumask(backtrace_mask))) { 51 if (!cpumask_empty(to_cpumask(backtrace_mask))) {
83 pr_info("Sending NMI to %s CPUs:\n", 52 pr_info("Sending NMI to %s CPUs:\n",
84 (include_self ? "all" : "other")); 53 (include_self ? "all" : "other"));
@@ -94,73 +63,25 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
94 } 63 }
95 64
96 /* 65 /*
97 * Now that all the NMIs have triggered, we can dump out their 66 * Force flush any remote buffers that might be stuck in IRQ context
98 * back traces safely to the console. 67 * and therefore could not run their irq_work.
99 */ 68 */
100 for_each_cpu(cpu, &printtrace_mask) { 69 printk_nmi_flush();
101 int len, last_i = 0;
102 70
103 s = &per_cpu(nmi_print_seq, cpu); 71 clear_bit_unlock(0, &backtrace_flag);
104 len = seq_buf_used(&s->seq);
105 if (!len)
106 continue;
107
108 /* Print line by line. */
109 for (i = 0; i < len; i++) {
110 if (s->buffer[i] == '\n') {
111 print_seq_line(s, last_i, i);
112 last_i = i + 1;
113 }
114 }
115 /* Check if there was a partial line. */
116 if (last_i < len) {
117 print_seq_line(s, last_i, len - 1);
118 pr_cont("\n");
119 }
120 }
121
122 clear_bit(0, &backtrace_flag);
123 smp_mb__after_atomic();
124 put_cpu(); 72 put_cpu();
125} 73}
126 74
127/*
128 * It is not safe to call printk() directly from NMI handlers.
129 * It may be fine if the NMI detected a lock up and we have no choice
130 * but to do so, but doing a NMI on all other CPUs to get a back trace
131 * can be done with a sysrq-l. We don't want that to lock up, which
132 * can happen if the NMI interrupts a printk in progress.
133 *
134 * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
135 * the content into a per cpu seq_buf buffer. Then when the NMIs are
136 * all done, we can safely dump the contents of the seq_buf to a printk()
137 * from a non NMI context.
138 */
139static int nmi_vprintk(const char *fmt, va_list args)
140{
141 struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
142 unsigned int len = seq_buf_used(&s->seq);
143
144 seq_buf_vprintf(&s->seq, fmt, args);
145 return seq_buf_used(&s->seq) - len;
146}
147
148bool nmi_cpu_backtrace(struct pt_regs *regs) 75bool nmi_cpu_backtrace(struct pt_regs *regs)
149{ 76{
150 int cpu = smp_processor_id(); 77 int cpu = smp_processor_id();
151 78
152 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 79 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
153 printk_func_t printk_func_save = this_cpu_read(printk_func);
154
155 /* Replace printk to write into the NMI seq */
156 this_cpu_write(printk_func, nmi_vprintk);
157 pr_warn("NMI backtrace for cpu %d\n", cpu); 80 pr_warn("NMI backtrace for cpu %d\n", cpu);
158 if (regs) 81 if (regs)
159 show_regs(regs); 82 show_regs(regs);
160 else 83 else
161 dump_stack(); 84 dump_stack();
162 this_cpu_write(printk_func, printk_func_save);
163
164 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 85 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
165 return true; 86 return true;
166 } 87 }