diff options
author | Steven Rostedt (Red Hat) <rostedt@goodmis.org> | 2014-06-19 17:33:32 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2014-11-19 22:01:21 -0500 |
commit | a9edc88093287183ac934be44f295f183b2c62dd (patch) | |
tree | 41e34afb4b458d73e0e374479c780d861cd10f7b /arch/x86/kernel/apic/hw_nmi.c | |
parent | afdc34a3d3b823a12a93b822ee1efb566f884032 (diff) |
x86/nmi: Perform a safe NMI stack trace on all CPUs
When trigger_all_cpu_backtrace() is called on x86, it will trigger an
NMI on each CPU and call show_regs(). But this can lead to a hard lock
up if the NMI comes in on another printk().
In order to avoid this, when the NMI triggers, it switches the printk
routine for that CPU to call a NMI safe printk function that records the
printk in a per_cpu seq_buf descriptor. After all NMIs have finished
recording its data, the seq_bufs are printed in a safe context.
Link: http://lkml.kernel.org/p/20140619213952.360076309@goodmis.org
Link: http://lkml.kernel.org/r/20141115050605.055232587@goodmis.org
Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'arch/x86/kernel/apic/hw_nmi.c')
-rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 91 |
1 files changed, 86 insertions, 5 deletions
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 6a1e71bde323..c95c3e9ce196 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/nmi.h> | 18 | #include <linux/nmi.h> |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/seq_buf.h> | ||
21 | 22 | ||
22 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 23 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
23 | u64 hw_nmi_get_sample_period(int watchdog_thresh) | 24 | u64 hw_nmi_get_sample_period(int watchdog_thresh) |
@@ -29,14 +30,35 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) | |||
29 | #ifdef arch_trigger_all_cpu_backtrace | 30 | #ifdef arch_trigger_all_cpu_backtrace |
30 | /* For reliability, we're prepared to waste bits here. */ | 31 | /* For reliability, we're prepared to waste bits here. */ |
31 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | 32 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; |
33 | static cpumask_var_t printtrace_mask; | ||
34 | |||
35 | #define NMI_BUF_SIZE 4096 | ||
36 | |||
37 | struct nmi_seq_buf { | ||
38 | unsigned char buffer[NMI_BUF_SIZE]; | ||
39 | struct seq_buf seq; | ||
40 | }; | ||
41 | |||
42 | /* Safe printing in NMI context */ | ||
43 | static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); | ||
32 | 44 | ||
33 | /* "in progress" flag of arch_trigger_all_cpu_backtrace */ | 45 | /* "in progress" flag of arch_trigger_all_cpu_backtrace */ |
34 | static unsigned long backtrace_flag; | 46 | static unsigned long backtrace_flag; |
35 | 47 | ||
48 | static void print_seq_line(struct nmi_seq_buf *s, int start, int end) | ||
49 | { | ||
50 | const char *buf = s->buffer + start; | ||
51 | |||
52 | printk("%.*s", (end - start) + 1, buf); | ||
53 | } | ||
54 | |||
36 | void arch_trigger_all_cpu_backtrace(bool include_self) | 55 | void arch_trigger_all_cpu_backtrace(bool include_self) |
37 | { | 56 | { |
57 | struct nmi_seq_buf *s; | ||
58 | int len; | ||
59 | int cpu; | ||
38 | int i; | 60 | int i; |
39 | int cpu = get_cpu(); | 61 | int this_cpu = get_cpu(); |
40 | 62 | ||
41 | if (test_and_set_bit(0, &backtrace_flag)) { | 63 | if (test_and_set_bit(0, &backtrace_flag)) { |
42 | /* | 64 | /* |
@@ -49,7 +71,17 @@ void arch_trigger_all_cpu_backtrace(bool include_self) | |||
49 | 71 | ||
50 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | 72 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); |
51 | if (!include_self) | 73 | if (!include_self) |
52 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | 74 | cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); |
75 | |||
76 | cpumask_copy(printtrace_mask, to_cpumask(backtrace_mask)); | ||
77 | /* | ||
78 | * Set up per_cpu seq_buf buffers that the NMIs running on the other | ||
79 | * CPUs will write to. | ||
80 | */ | ||
81 | for_each_cpu(cpu, to_cpumask(backtrace_mask)) { | ||
82 | s = &per_cpu(nmi_print_seq, cpu); | ||
83 | seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); | ||
84 | } | ||
53 | 85 | ||
54 | if (!cpumask_empty(to_cpumask(backtrace_mask))) { | 86 | if (!cpumask_empty(to_cpumask(backtrace_mask))) { |
55 | pr_info("sending NMI to %s CPUs:\n", | 87 | pr_info("sending NMI to %s CPUs:\n", |
@@ -65,11 +97,58 @@ void arch_trigger_all_cpu_backtrace(bool include_self) | |||
65 | touch_softlockup_watchdog(); | 97 | touch_softlockup_watchdog(); |
66 | } | 98 | } |
67 | 99 | ||
100 | /* | ||
101 | * Now that all the NMIs have triggered, we can dump out their | ||
102 | * back traces safely to the console. | ||
103 | */ | ||
104 | for_each_cpu(cpu, printtrace_mask) { | ||
105 | int last_i = 0; | ||
106 | |||
107 | s = &per_cpu(nmi_print_seq, cpu); | ||
108 | len = seq_buf_used(&s->seq); | ||
109 | if (!len) | ||
110 | continue; | ||
111 | |||
112 | /* Print line by line. */ | ||
113 | for (i = 0; i < len; i++) { | ||
114 | if (s->buffer[i] == '\n') { | ||
115 | print_seq_line(s, last_i, i); | ||
116 | last_i = i + 1; | ||
117 | } | ||
118 | } | ||
119 | /* Check if there was a partial line. */ | ||
120 | if (last_i < len) { | ||
121 | print_seq_line(s, last_i, len - 1); | ||
122 | pr_cont("\n"); | ||
123 | } | ||
124 | } | ||
125 | |||
68 | clear_bit(0, &backtrace_flag); | 126 | clear_bit(0, &backtrace_flag); |
69 | smp_mb__after_atomic(); | 127 | smp_mb__after_atomic(); |
70 | put_cpu(); | 128 | put_cpu(); |
71 | } | 129 | } |
72 | 130 | ||
131 | /* | ||
132 | * It is not safe to call printk() directly from NMI handlers. | ||
133 | * It may be fine if the NMI detected a lock up and we have no choice | ||
134 | * but to do so, but doing a NMI on all other CPUs to get a back trace | ||
135 | * can be done with a sysrq-l. We don't want that to lock up, which | ||
136 | * can happen if the NMI interrupts a printk in progress. | ||
137 | * | ||
138 | * Instead, we redirect the vprintk() to this nmi_vprintk() that writes | ||
139 | * the content into a per cpu seq_buf buffer. Then when the NMIs are | ||
140 | * all done, we can safely dump the contents of the seq_buf to a printk() | ||
141 | * from a non NMI context. | ||
142 | */ | ||
143 | static int nmi_vprintk(const char *fmt, va_list args) | ||
144 | { | ||
145 | struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); | ||
146 | unsigned int len = seq_buf_used(&s->seq); | ||
147 | |||
148 | seq_buf_vprintf(&s->seq, fmt, args); | ||
149 | return seq_buf_used(&s->seq) - len; | ||
150 | } | ||
151 | |||
73 | static int | 152 | static int |
74 | arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) | 153 | arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) |
75 | { | 154 | { |
@@ -78,12 +157,14 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) | |||
78 | cpu = smp_processor_id(); | 157 | cpu = smp_processor_id(); |
79 | 158 | ||
80 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | 159 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
81 | static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; | 160 | printk_func_t printk_func_save = this_cpu_read(printk_func); |
82 | 161 | ||
83 | arch_spin_lock(&lock); | 162 | /* Replace printk to write into the NMI seq */ |
163 | this_cpu_write(printk_func, nmi_vprintk); | ||
84 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 164 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
85 | show_regs(regs); | 165 | show_regs(regs); |
86 | arch_spin_unlock(&lock); | 166 | this_cpu_write(printk_func, printk_func_save); |
167 | |||
87 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | 168 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
88 | return NMI_HANDLED; | 169 | return NMI_HANDLED; |
89 | } | 170 | } |