aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/debug
diff options
context:
space:
mode:
authorDouglas Anderson <dianders@chromium.org>2019-09-25 19:47:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-25 20:51:40 -0400
commit7d92bda271ddcbb2d1be2f82733dcb9bf8378010 (patch)
tree1ba044465a88d6c1b387021a3dbb0b8e7cb803e1 /kernel/debug
parentac7c3e4ff401b304489a031938dbeaab585bfe0a (diff)
kgdb: don't use a notifier to enter kgdb at panic; call directly
Right now kgdb/kdb hooks up to debug panics by registering for the panic notifier. This works OK except that it means that kgdb/kdb gets called _after_ the CPUs in the system are taken offline. That means that if anything important was happening on those CPUs (like something that might have contributed to the panic) you can't debug them. Specifically I ran into a case where I got a panic because a task was "blocked for more than 120 seconds" which was detected on CPU 2. I nicely got shown stack traces in the kernel log for all CPUs including CPU 0, which was running 'PID: 111 Comm: kworker/0:1H' and was in the middle of __mmc_switch(). I then ended up at the kdb prompt where switched over to kgdb to try to look at local variables of the process on CPU 0. I found that I couldn't. Digging more, I found that I had no info on any tasks running on CPUs other than CPU 2 and that asking kdb for help showed me "Error: no saved data for this cpu". This was because all the CPUs were offline. Let's move the entry of kdb/kgdb to a direct call from panic() and stop using the generic notifier. Putting a direct call in allows us to order things more properly and it also doesn't seem like we're breaking any abstractions by calling into the debugger from the panic function. Daniel said: : This patch changes the way kdump and kgdb interact with each other. : However it would seem rather odd to have both tools simultaneously armed : and, even if they were, the user still has the option to use panic_timeout : to force a kdump to happen. Thus I think the change of order is : acceptable. Link: http://lkml.kernel.org/r/20190703170354.217312-1-dianders@chromium.org Signed-off-by: Douglas Anderson <dianders@chromium.org> Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org> Cc: Jason Wessel <jason.wessel@windriver.com> Cc: Kees Cook <keescook@chromium.org> Cc: Borislav Petkov <bp@suse.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Feng Tang <feng.tang@intel.com> Cc: YueHaibing <yuehaibing@huawei.com> Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com> Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/debug')
-rw-r--r--kernel/debug/debug_core.c31
1 files changed, 11 insertions, 20 deletions
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 10f1187b3907..f76d6f77dd5e 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -893,30 +893,25 @@ static struct sysrq_key_op sysrq_dbg_op = {
893}; 893};
894#endif 894#endif
895 895
896static int kgdb_panic_event(struct notifier_block *self, 896void kgdb_panic(const char *msg)
897 unsigned long val,
898 void *data)
899{ 897{
898 if (!kgdb_io_module_registered)
899 return;
900
900 /* 901 /*
901 * Avoid entering the debugger if we were triggered due to a panic 902 * We don't want to get stuck waiting for input from user if
902 * We don't want to get stuck waiting for input from user in such case. 903 * "panic_timeout" indicates the system should automatically
903 * panic_timeout indicates the system should automatically
904 * reboot on panic. 904 * reboot on panic.
905 */ 905 */
906 if (panic_timeout) 906 if (panic_timeout)
907 return NOTIFY_DONE; 907 return;
908 908
909 if (dbg_kdb_mode) 909 if (dbg_kdb_mode)
910 kdb_printf("PANIC: %s\n", (char *)data); 910 kdb_printf("PANIC: %s\n", msg);
911
911 kgdb_breakpoint(); 912 kgdb_breakpoint();
912 return NOTIFY_DONE;
913} 913}
914 914
915static struct notifier_block kgdb_panic_event_nb = {
916 .notifier_call = kgdb_panic_event,
917 .priority = INT_MAX,
918};
919
920void __weak kgdb_arch_late(void) 915void __weak kgdb_arch_late(void)
921{ 916{
922} 917}
@@ -965,8 +960,6 @@ static void kgdb_register_callbacks(void)
965 kgdb_arch_late(); 960 kgdb_arch_late();
966 register_module_notifier(&dbg_module_load_nb); 961 register_module_notifier(&dbg_module_load_nb);
967 register_reboot_notifier(&dbg_reboot_notifier); 962 register_reboot_notifier(&dbg_reboot_notifier);
968 atomic_notifier_chain_register(&panic_notifier_list,
969 &kgdb_panic_event_nb);
970#ifdef CONFIG_MAGIC_SYSRQ 963#ifdef CONFIG_MAGIC_SYSRQ
971 register_sysrq_key('g', &sysrq_dbg_op); 964 register_sysrq_key('g', &sysrq_dbg_op);
972#endif 965#endif
@@ -980,16 +973,14 @@ static void kgdb_register_callbacks(void)
980static void kgdb_unregister_callbacks(void) 973static void kgdb_unregister_callbacks(void)
981{ 974{
982 /* 975 /*
983 * When this routine is called KGDB should unregister from the 976 * When this routine is called KGDB should unregister from
984 * panic handler and clean up, making sure it is not handling any 977 * handlers and clean up, making sure it is not handling any
985 * break exceptions at the time. 978 * break exceptions at the time.
986 */ 979 */
987 if (kgdb_io_module_registered) { 980 if (kgdb_io_module_registered) {
988 kgdb_io_module_registered = 0; 981 kgdb_io_module_registered = 0;
989 unregister_reboot_notifier(&dbg_reboot_notifier); 982 unregister_reboot_notifier(&dbg_reboot_notifier);
990 unregister_module_notifier(&dbg_module_load_nb); 983 unregister_module_notifier(&dbg_module_load_nb);
991 atomic_notifier_chain_unregister(&panic_notifier_list,
992 &kgdb_panic_event_nb);
993 kgdb_arch_exit(); 984 kgdb_arch_exit();
994#ifdef CONFIG_MAGIC_SYSRQ 985#ifdef CONFIG_MAGIC_SYSRQ
995 unregister_sysrq_key('g', &sysrq_dbg_op); 986 unregister_sysrq_key('g', &sysrq_dbg_op);