diff options
| author | Prarit Bhargava <prarit@redhat.com> | 2014-12-10 18:45:50 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-10 20:41:10 -0500 |
| commit | 9e3961a0979817c612b10b2da4f3045ec9faa779 (patch) | |
| tree | 08ddeb0aed7fe4a0dd0e00838b373be786c95ada | |
| parent | f938612dd97d481b8b5bf960c992ae577f081c17 (diff) | |
kernel: add panic_on_warn
There have been several times where I have had to rebuild a kernel to
cause a panic when hitting a WARN() in the code in order to get a crash
dump from a system. Sometimes this is easy to do, other times (such as
in the case of a remote admin) it is not trivial to send new images to
the user.
A much easier method would be a switch to change the WARN() over to a
panic. This makes debugging easier in that I can now test the actual
image the WARN() was seen on and I do not have to engage in remote
debugging.
This patch adds a panic_on_warn kernel parameter and
/proc/sys/kernel/panic_on_warn calls panic() in the
warn_slowpath_common() path. The function will still print out the
location of the warning.
An example of the panic_on_warn output:
The first line below is from the WARN_ON() to output the WARN_ON()'s
location. After that the panic() output is displayed.
WARNING: CPU: 30 PID: 11698 at /home/prarit/dummy_module/dummy-module.c:25 init_dummy+0x1f/0x30 [dummy_module]()
Kernel panic - not syncing: panic_on_warn set ...
CPU: 30 PID: 11698 Comm: insmod Tainted: G W OE 3.17.0+ #57
Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013
0000000000000000 000000008e3f87df ffff88080f093c38 ffffffff81665190
0000000000000000 ffffffff818aea3d ffff88080f093cb8 ffffffff8165e2ec
ffffffff00000008 ffff88080f093cc8 ffff88080f093c68 000000008e3f87df
Call Trace:
[<ffffffff81665190>] dump_stack+0x46/0x58
[<ffffffff8165e2ec>] panic+0xd0/0x204
[<ffffffffa038e05f>] ? init_dummy+0x1f/0x30 [dummy_module]
[<ffffffff81076b90>] warn_slowpath_common+0xd0/0xd0
[<ffffffffa038e040>] ? dummy_greetings+0x40/0x40 [dummy_module]
[<ffffffff81076c8a>] warn_slowpath_null+0x1a/0x20
[<ffffffffa038e05f>] init_dummy+0x1f/0x30 [dummy_module]
[<ffffffff81002144>] do_one_initcall+0xd4/0x210
[<ffffffff811b52c2>] ? __vunmap+0xc2/0x110
[<ffffffff810f8889>] load_module+0x16a9/0x1b30
[<ffffffff810f3d30>] ? store_uevent+0x70/0x70
[<ffffffff810f49b9>] ? copy_module_from_fd.isra.44+0x129/0x180
[<ffffffff810f8ec6>] SyS_finit_module+0xa6/0xd0
[<ffffffff8166cf29>] system_call_fastpath+0x12/0x17
Successfully tested by me.
hpa said: There is another very valid use for this: many operators would
rather a machine shuts down than being potentially compromised either
functionally or security-wise.
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | Documentation/kdump/kdump.txt | 7 | ||||
| -rw-r--r-- | Documentation/kernel-parameters.txt | 3 | ||||
| -rw-r--r-- | Documentation/sysctl/kernel.txt | 40 | ||||
| -rw-r--r-- | include/linux/kernel.h | 1 | ||||
| -rw-r--r-- | include/uapi/linux/sysctl.h | 1 | ||||
| -rw-r--r-- | kernel/panic.c | 13 | ||||
| -rw-r--r-- | kernel/sysctl.c | 9 | ||||
| -rw-r--r-- | kernel/sysctl_binary.c | 1 |
8 files changed, 61 insertions, 14 deletions
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 6c0b9f27e465..bc4bd5a44b88 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt | |||
| @@ -471,6 +471,13 @@ format. Crash is available on Dave Anderson's site at the following URL: | |||
| 471 | 471 | ||
| 472 | http://people.redhat.com/~anderson/ | 472 | http://people.redhat.com/~anderson/ |
| 473 | 473 | ||
| 474 | Trigger Kdump on WARN() | ||
| 475 | ======================= | ||
| 476 | |||
| 477 | The kernel parameter, panic_on_warn, calls panic() in all WARN() paths. This | ||
| 478 | will cause a kdump to occur at the panic() call. In cases where a user wants | ||
| 479 | to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1 | ||
| 480 | to achieve the same behaviour. | ||
| 474 | 481 | ||
| 475 | Contact | 482 | Contact |
| 476 | ======= | 483 | ======= |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 838f3776c924..d6eb3636fe5a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -2509,6 +2509,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 2509 | timeout < 0: reboot immediately | 2509 | timeout < 0: reboot immediately |
| 2510 | Format: <timeout> | 2510 | Format: <timeout> |
| 2511 | 2511 | ||
| 2512 | panic_on_warn panic() instead of WARN(). Useful to cause kdump | ||
| 2513 | on a WARN(). | ||
| 2514 | |||
| 2512 | crash_kexec_post_notifiers | 2515 | crash_kexec_post_notifiers |
| 2513 | Run kdump after running panic-notifiers and dumping | 2516 | Run kdump after running panic-notifiers and dumping |
| 2514 | kmsg. This only for the users who doubt kdump always | 2517 | kmsg. This only for the users who doubt kdump always |
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 57baff5bdb80..b5d0c8501a18 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
| @@ -54,8 +54,9 @@ show up in /proc/sys/kernel: | |||
| 54 | - overflowuid | 54 | - overflowuid |
| 55 | - panic | 55 | - panic |
| 56 | - panic_on_oops | 56 | - panic_on_oops |
| 57 | - panic_on_unrecovered_nmi | ||
| 58 | - panic_on_stackoverflow | 57 | - panic_on_stackoverflow |
| 58 | - panic_on_unrecovered_nmi | ||
| 59 | - panic_on_warn | ||
| 59 | - pid_max | 60 | - pid_max |
| 60 | - powersave-nap [ PPC only ] | 61 | - powersave-nap [ PPC only ] |
| 61 | - printk | 62 | - printk |
| @@ -527,19 +528,6 @@ the recommended setting is 60. | |||
| 527 | 528 | ||
| 528 | ============================================================== | 529 | ============================================================== |
| 529 | 530 | ||
| 530 | panic_on_unrecovered_nmi: | ||
| 531 | |||
| 532 | The default Linux behaviour on an NMI of either memory or unknown is | ||
| 533 | to continue operation. For many environments such as scientific | ||
| 534 | computing it is preferable that the box is taken out and the error | ||
| 535 | dealt with than an uncorrected parity/ECC error get propagated. | ||
| 536 | |||
| 537 | A small number of systems do generate NMI's for bizarre random reasons | ||
| 538 | such as power management so the default is off. That sysctl works like | ||
| 539 | the existing panic controls already in that directory. | ||
| 540 | |||
| 541 | ============================================================== | ||
| 542 | |||
| 543 | panic_on_oops: | 531 | panic_on_oops: |
| 544 | 532 | ||
| 545 | Controls the kernel's behaviour when an oops or BUG is encountered. | 533 | Controls the kernel's behaviour when an oops or BUG is encountered. |
| @@ -563,6 +551,30 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled. | |||
| 563 | 551 | ||
| 564 | ============================================================== | 552 | ============================================================== |
| 565 | 553 | ||
| 554 | panic_on_unrecovered_nmi: | ||
| 555 | |||
| 556 | The default Linux behaviour on an NMI of either memory or unknown is | ||
| 557 | to continue operation. For many environments such as scientific | ||
| 558 | computing it is preferable that the box is taken out and the error | ||
| 559 | dealt with than an uncorrected parity/ECC error get propagated. | ||
| 560 | |||
| 561 | A small number of systems do generate NMI's for bizarre random reasons | ||
| 562 | such as power management so the default is off. That sysctl works like | ||
| 563 | the existing panic controls already in that directory. | ||
| 564 | |||
| 565 | ============================================================== | ||
| 566 | |||
| 567 | panic_on_warn: | ||
| 568 | |||
| 569 | Calls panic() in the WARN() path when set to 1. This is useful to avoid | ||
| 570 | a kernel rebuild when attempting to kdump at the location of a WARN(). | ||
| 571 | |||
| 572 | 0: only WARN(), default behaviour. | ||
| 573 | |||
| 574 | 1: call panic() after printing out WARN() location. | ||
| 575 | |||
| 576 | ============================================================== | ||
| 577 | |||
| 566 | perf_cpu_time_max_percent: | 578 | perf_cpu_time_max_percent: |
| 567 | 579 | ||
| 568 | Hints to the kernel how much CPU time it should be allowed to | 580 | Hints to the kernel how much CPU time it should be allowed to |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 446d76a87ba1..233ea8107038 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
| @@ -427,6 +427,7 @@ extern int panic_timeout; | |||
| 427 | extern int panic_on_oops; | 427 | extern int panic_on_oops; |
| 428 | extern int panic_on_unrecovered_nmi; | 428 | extern int panic_on_unrecovered_nmi; |
| 429 | extern int panic_on_io_nmi; | 429 | extern int panic_on_io_nmi; |
| 430 | extern int panic_on_warn; | ||
| 430 | extern int sysctl_panic_on_stackoverflow; | 431 | extern int sysctl_panic_on_stackoverflow; |
| 431 | /* | 432 | /* |
| 432 | * Only to be used by arch init code. If the user over-wrote the default | 433 | * Only to be used by arch init code. If the user over-wrote the default |
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 43aaba1cc037..0956373b56db 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h | |||
| @@ -153,6 +153,7 @@ enum | |||
| 153 | KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */ | 153 | KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */ |
| 154 | KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ | 154 | KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ |
| 155 | KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ | 155 | KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ |
| 156 | KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */ | ||
| 156 | }; | 157 | }; |
| 157 | 158 | ||
| 158 | 159 | ||
diff --git a/kernel/panic.c b/kernel/panic.c index cf80672b7924..4d8d6f906dec 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -33,6 +33,7 @@ static int pause_on_oops; | |||
| 33 | static int pause_on_oops_flag; | 33 | static int pause_on_oops_flag; |
| 34 | static DEFINE_SPINLOCK(pause_on_oops_lock); | 34 | static DEFINE_SPINLOCK(pause_on_oops_lock); |
| 35 | static bool crash_kexec_post_notifiers; | 35 | static bool crash_kexec_post_notifiers; |
| 36 | int panic_on_warn __read_mostly; | ||
| 36 | 37 | ||
| 37 | int panic_timeout = CONFIG_PANIC_TIMEOUT; | 38 | int panic_timeout = CONFIG_PANIC_TIMEOUT; |
| 38 | EXPORT_SYMBOL_GPL(panic_timeout); | 39 | EXPORT_SYMBOL_GPL(panic_timeout); |
| @@ -428,6 +429,17 @@ static void warn_slowpath_common(const char *file, int line, void *caller, | |||
| 428 | if (args) | 429 | if (args) |
| 429 | vprintk(args->fmt, args->args); | 430 | vprintk(args->fmt, args->args); |
| 430 | 431 | ||
| 432 | if (panic_on_warn) { | ||
| 433 | /* | ||
| 434 | * This thread may hit another WARN() in the panic path. | ||
| 435 | * Resetting this prevents additional WARN() from panicking the | ||
| 436 | * system on this thread. Other threads are blocked by the | ||
| 437 | * panic_mutex in panic(). | ||
| 438 | */ | ||
| 439 | panic_on_warn = 0; | ||
| 440 | panic("panic_on_warn set ...\n"); | ||
| 441 | } | ||
| 442 | |||
| 431 | print_modules(); | 443 | print_modules(); |
| 432 | dump_stack(); | 444 | dump_stack(); |
| 433 | print_oops_end_marker(); | 445 | print_oops_end_marker(); |
| @@ -485,6 +497,7 @@ EXPORT_SYMBOL(__stack_chk_fail); | |||
| 485 | 497 | ||
| 486 | core_param(panic, panic_timeout, int, 0644); | 498 | core_param(panic, panic_timeout, int, 0644); |
| 487 | core_param(pause_on_oops, pause_on_oops, int, 0644); | 499 | core_param(pause_on_oops, pause_on_oops, int, 0644); |
| 500 | core_param(panic_on_warn, panic_on_warn, int, 0644); | ||
| 488 | 501 | ||
| 489 | static int __init setup_crash_kexec_post_notifiers(char *s) | 502 | static int __init setup_crash_kexec_post_notifiers(char *s) |
| 490 | { | 503 | { |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 15f2511a1b7c..7c54ff79afd7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -1104,6 +1104,15 @@ static struct ctl_table kern_table[] = { | |||
| 1104 | .proc_handler = proc_dointvec, | 1104 | .proc_handler = proc_dointvec, |
| 1105 | }, | 1105 | }, |
| 1106 | #endif | 1106 | #endif |
| 1107 | { | ||
| 1108 | .procname = "panic_on_warn", | ||
| 1109 | .data = &panic_on_warn, | ||
| 1110 | .maxlen = sizeof(int), | ||
| 1111 | .mode = 0644, | ||
| 1112 | .proc_handler = proc_dointvec_minmax, | ||
| 1113 | .extra1 = &zero, | ||
| 1114 | .extra2 = &one, | ||
| 1115 | }, | ||
| 1107 | { } | 1116 | { } |
| 1108 | }; | 1117 | }; |
| 1109 | 1118 | ||
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 9a4f750a2963..7e7746a42a62 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
| @@ -137,6 +137,7 @@ static const struct bin_table bin_kern_table[] = { | |||
| 137 | { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, | 137 | { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, |
| 138 | { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, | 138 | { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, |
| 139 | { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, | 139 | { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, |
| 140 | { CTL_INT, KERN_PANIC_ON_WARN, "panic_on_warn" }, | ||
| 140 | {} | 141 | {} |
| 141 | }; | 142 | }; |
| 142 | 143 | ||
