diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-08-02 05:28:21 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-08-02 07:27:17 -0400 |
commit | c1dc0b9c0c8979ce4d411caadff5c0d79dee58bc (patch) | |
tree | 6822205799a6cf8928623d60aa226c95534a20f9 /drivers/char | |
parent | ed680c4ad478d0fee9740f7d029087f181346564 (diff) |
debug lockups: Improve lockup detection
When debugging a recent lockup bug i found various deficiencies
in how our current lockup detection helpers work:
- SysRq-L is not very efficient as it uses a workqueue, hence
it cannot punch through hard lockups and cannot see through
most soft lockups either.
- The SysRq-L code depends on the NMI watchdog - which is off
by default.
- We dont print backtraces from the RCU code's built-in
'RCU state machine is stuck' debug code. This debug
code tends to be one of the first (and only) mechanisms
that show that a lockup has occured.
This patch changes the code so taht we:
- Trigger the NMI backtrace code from SysRq-L instead of using
a workqueue (which cannot punch through hard lockups)
- Trigger print-all-CPU-backtraces from the RCU lockup detection
code
Also decouple the backtrace printing code from the NMI watchdog:
- Dont use variable size cpumasks (it might not be initialized
and they are a bit more fragile anyway)
- Trigger an NMI immediately via an IPI, instead of waiting
for the NMI tick to occur. This is a lot faster and can
produce more relevant backtraces. It will also work if the
NMI watchdog is disabled.
- Dont print the 'dazed and confused' message when we print
a backtrace from the NMI
- Do a show_regs() plus a dump_stack() to get maximum info
out of the dump. Worst-case we get two stacktraces - which
is not a big deal. Sometimes, if register content is
corrupted, the precise stack walker in show_regs() wont
give us a full backtrace - in this case dump_stack() will
do it.
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'drivers/char')
-rw-r--r-- | drivers/char/sysrq.c | 8 |
1 files changed, 2 insertions, 6 deletions
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 5d7a02f63e1c..165f307f30e8 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/sysrq.h> | 24 | #include <linux/sysrq.h> |
25 | #include <linux/kbd_kern.h> | 25 | #include <linux/kbd_kern.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/nmi.h> | ||
27 | #include <linux/quotaops.h> | 28 | #include <linux/quotaops.h> |
28 | #include <linux/perf_counter.h> | 29 | #include <linux/perf_counter.h> |
29 | #include <linux/kernel.h> | 30 | #include <linux/kernel.h> |
@@ -222,12 +223,7 @@ static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus); | |||
222 | 223 | ||
223 | static void sysrq_handle_showallcpus(int key, struct tty_struct *tty) | 224 | static void sysrq_handle_showallcpus(int key, struct tty_struct *tty) |
224 | { | 225 | { |
225 | struct pt_regs *regs = get_irq_regs(); | 226 | trigger_all_cpu_backtrace(); |
226 | if (regs) { | ||
227 | printk(KERN_INFO "CPU%d:\n", smp_processor_id()); | ||
228 | show_regs(regs); | ||
229 | } | ||
230 | schedule_work(&sysrq_showallcpus); | ||
231 | } | 227 | } |
232 | 228 | ||
233 | static struct sysrq_key_op sysrq_showallcpus_op = { | 229 | static struct sysrq_key_op sysrq_showallcpus_op = { |