diff options
author | Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com> | 2015-12-14 05:19:09 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2015-12-19 05:07:00 -0500 |
commit | 1717f2096b543cede7a380c858c765c41936bc35 (patch) | |
tree | fdf2498aadd9a6706c8af5e964591956e3a69101 | |
parent | d267b8d6c65ed7636a412ca479b96df7c0f5b27b (diff) |
panic, x86: Fix re-entrance problem due to panic on NMI
If panic on NMI happens just after panic() on the same CPU, panic() is
recursively called. Kernel stalls, as a result, after failing to acquire
panic_lock.
To avoid this problem, don't call panic() in NMI context if we've
already entered panic().
For that, introduce nmi_panic() macro to reduce code duplication. In
the case of panic on NMI, don't return from NMI handlers if another CPU
already panicked.
Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: David Hildenbrand <dahi@linux.vnet.ibm.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Gobinda Charan Maji <gobinda.cemk07@gmail.com>
Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Javi Merino <javi.merino@arm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: kexec@lists.infradead.org
Cc: linux-doc@vger.kernel.org
Cc: lkml <linux-kernel@vger.kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Link: http://lkml.kernel.org/r/20151210014626.25437.13302.stgit@softrs
[ Cleanup comments, fixup formatting. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | arch/x86/kernel/nmi.c | 16 | ||||
-rw-r--r-- | include/linux/kernel.h | 20 | ||||
-rw-r--r-- | kernel/panic.c | 16 | ||||
-rw-r--r-- | kernel/watchdog.c | 2 |
4 files changed, 46 insertions, 8 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 697f90db0e37..fca87938d739 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -231,7 +231,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) | |||
231 | #endif | 231 | #endif |
232 | 232 | ||
233 | if (panic_on_unrecovered_nmi) | 233 | if (panic_on_unrecovered_nmi) |
234 | panic("NMI: Not continuing"); | 234 | nmi_panic("NMI: Not continuing"); |
235 | 235 | ||
236 | pr_emerg("Dazed and confused, but trying to continue\n"); | 236 | pr_emerg("Dazed and confused, but trying to continue\n"); |
237 | 237 | ||
@@ -255,8 +255,16 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
255 | reason, smp_processor_id()); | 255 | reason, smp_processor_id()); |
256 | show_regs(regs); | 256 | show_regs(regs); |
257 | 257 | ||
258 | if (panic_on_io_nmi) | 258 | if (panic_on_io_nmi) { |
259 | panic("NMI IOCK error: Not continuing"); | 259 | nmi_panic("NMI IOCK error: Not continuing"); |
260 | |||
261 | /* | ||
262 | * If we end up here, it means we have received an NMI while | ||
263 | * processing panic(). Simply return without delaying and | ||
264 | * re-enabling NMIs. | ||
265 | */ | ||
266 | return; | ||
267 | } | ||
260 | 268 | ||
261 | /* Re-enable the IOCK line, wait for a few seconds */ | 269 | /* Re-enable the IOCK line, wait for a few seconds */ |
262 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; | 270 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; |
@@ -297,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
297 | 305 | ||
298 | pr_emerg("Do you have a strange power saving mode enabled?\n"); | 306 | pr_emerg("Do you have a strange power saving mode enabled?\n"); |
299 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) | 307 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) |
300 | panic("NMI: Not continuing"); | 308 | nmi_panic("NMI: Not continuing"); |
301 | 309 | ||
302 | pr_emerg("Dazed and confused, but trying to continue\n"); | 310 | pr_emerg("Dazed and confused, but trying to continue\n"); |
303 | } | 311 | } |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 350dfb08aee3..750cc5c7c999 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -446,6 +446,26 @@ extern int sysctl_panic_on_stackoverflow; | |||
446 | extern bool crash_kexec_post_notifiers; | 446 | extern bool crash_kexec_post_notifiers; |
447 | 447 | ||
448 | /* | 448 | /* |
449 | * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It | ||
450 | * holds a CPU number which is executing panic() currently. A value of | ||
451 | * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). | ||
452 | */ | ||
453 | extern atomic_t panic_cpu; | ||
454 | #define PANIC_CPU_INVALID -1 | ||
455 | |||
456 | /* | ||
457 | * A variant of panic() called from NMI context. We return if we've already | ||
458 | * panicked on this CPU. | ||
459 | */ | ||
460 | #define nmi_panic(fmt, ...) \ | ||
461 | do { \ | ||
462 | int cpu = raw_smp_processor_id(); \ | ||
463 | \ | ||
464 | if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu) \ | ||
465 | panic(fmt, ##__VA_ARGS__); \ | ||
466 | } while (0) | ||
467 | |||
468 | /* | ||
449 | * Only to be used by arch init code. If the user over-wrote the default | 469 | * Only to be used by arch init code. If the user over-wrote the default |
450 | * CONFIG_PANIC_TIMEOUT, honor it. | 470 | * CONFIG_PANIC_TIMEOUT, honor it. |
451 | */ | 471 | */ |
diff --git a/kernel/panic.c b/kernel/panic.c index 4b150bc0c6c1..3344524cf6ff 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -61,6 +61,8 @@ void __weak panic_smp_self_stop(void) | |||
61 | cpu_relax(); | 61 | cpu_relax(); |
62 | } | 62 | } |
63 | 63 | ||
64 | atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); | ||
65 | |||
64 | /** | 66 | /** |
65 | * panic - halt the system | 67 | * panic - halt the system |
66 | * @fmt: The text string to print | 68 | * @fmt: The text string to print |
@@ -71,17 +73,17 @@ void __weak panic_smp_self_stop(void) | |||
71 | */ | 73 | */ |
72 | void panic(const char *fmt, ...) | 74 | void panic(const char *fmt, ...) |
73 | { | 75 | { |
74 | static DEFINE_SPINLOCK(panic_lock); | ||
75 | static char buf[1024]; | 76 | static char buf[1024]; |
76 | va_list args; | 77 | va_list args; |
77 | long i, i_next = 0; | 78 | long i, i_next = 0; |
78 | int state = 0; | 79 | int state = 0; |
80 | int old_cpu, this_cpu; | ||
79 | 81 | ||
80 | /* | 82 | /* |
81 | * Disable local interrupts. This will prevent panic_smp_self_stop | 83 | * Disable local interrupts. This will prevent panic_smp_self_stop |
82 | * from deadlocking the first cpu that invokes the panic, since | 84 | * from deadlocking the first cpu that invokes the panic, since |
83 | * there is nothing to prevent an interrupt handler (that runs | 85 | * there is nothing to prevent an interrupt handler (that runs |
84 | * after the panic_lock is acquired) from invoking panic again. | 86 | * after setting panic_cpu) from invoking panic() again. |
85 | */ | 87 | */ |
86 | local_irq_disable(); | 88 | local_irq_disable(); |
87 | 89 | ||
@@ -94,8 +96,16 @@ void panic(const char *fmt, ...) | |||
94 | * multiple parallel invocations of panic, all other CPUs either | 96 | * multiple parallel invocations of panic, all other CPUs either |
95 | * stop themself or will wait until they are stopped by the 1st CPU | 97 | * stop themself or will wait until they are stopped by the 1st CPU |
96 | * with smp_send_stop(). | 98 | * with smp_send_stop(). |
99 | * | ||
100 | * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which | ||
101 | * comes here, so go ahead. | ||
102 | * `old_cpu == this_cpu' means we came from nmi_panic() which sets | ||
103 | * panic_cpu to this CPU. In this case, this is also the 1st CPU. | ||
97 | */ | 104 | */ |
98 | if (!spin_trylock(&panic_lock)) | 105 | this_cpu = raw_smp_processor_id(); |
106 | old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); | ||
107 | |||
108 | if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) | ||
99 | panic_smp_self_stop(); | 109 | panic_smp_self_stop(); |
100 | 110 | ||
101 | console_verbose(); | 111 | console_verbose(); |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 18f34cf75f74..b9be18fae154 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -351,7 +351,7 @@ static void watchdog_overflow_callback(struct perf_event *event, | |||
351 | trigger_allbutself_cpu_backtrace(); | 351 | trigger_allbutself_cpu_backtrace(); |
352 | 352 | ||
353 | if (hardlockup_panic) | 353 | if (hardlockup_panic) |
354 | panic("Hard LOCKUP"); | 354 | nmi_panic("Hard LOCKUP"); |
355 | 355 | ||
356 | __this_cpu_write(hard_watchdog_warn, true); | 356 | __this_cpu_write(hard_watchdog_warn, true); |
357 | return; | 357 | return; |