summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDouglas Anderson <dianders@chromium.org>2018-12-04 22:38:26 -0500
committerDaniel Thompson <daniel.thompson@linaro.org>2018-12-30 03:28:02 -0500
commit3cd99ac3559855f69afbc1d5080e17eaa12394ff (patch)
tree39731c43ed540124099d310bfd2bb2a4490b3ded
parent9ef7fa507d6b53a96de4da3298c5f01bde603c0a (diff)
kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function()
When I had lockdep turned on and dropped into kgdb I got a nice splat on my system. Specifically it hit: DEBUG_LOCKS_WARN_ON(current->hardirq_context) Specifically it looked like this: sysrq: SysRq : DEBUG ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 0 PID: 0 at .../kernel/locking/lockdep.c:2875 lockdep_hardirqs_on+0xf0/0x160 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #27 pstate: 604003c9 (nZCv DAIF +PAN -UAO) pc : lockdep_hardirqs_on+0xf0/0x160 ... Call trace: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c kgdb_cpu_enter+0x53c/0x5cc kgdb_handle_exception+0x180/0x1d4 kgdb_compiled_brk_fn+0x30/0x3c brk_handler+0x134/0x178 do_debug_exception+0xfc/0x178 el1_dbg+0x18/0x78 kgdb_breakpoint+0x34/0x58 sysrq_handle_dbg+0x54/0x5c __handle_sysrq+0x114/0x21c handle_sysrq+0x30/0x3c qcom_geni_serial_isr+0x2dc/0x30c ... ... irq event stamp: ...45 hardirqs last enabled at (...44): [...] __do_softirq+0xd8/0x4e4 hardirqs last disabled at (...45): [...] el1_irq+0x74/0x130 softirqs last enabled at (...42): [...] _local_bh_enable+0x2c/0x34 softirqs last disabled at (...43): [...] irq_exit+0xa8/0x100 ---[ end trace adf21f830c46e638 ]--- Looking closely at it, it seems like a really bad idea to be calling local_irq_enable() in kgdb_roundup_cpus(). If nothing else that seems like it could violate spinlock semantics and cause a deadlock. Instead, let's use a private csd alongside smp_call_function_single_async() to round up the other CPUs. Using smp_call_function_single_async() doesn't require interrupts to be enabled so we can remove the offending bit of code. In order to avoid duplicating this across all the architectures that use the default kgdb_roundup_cpus(), we'll add a "weak" implementation to debug_core.c. Looking at all the people who previously had copies of this code, there were a few variants. I've attempted to keep the variants working like they used to. Specifically: * For arch/arc we passed NULL to kgdb_nmicallback() instead of get_irq_regs(). * For arch/mips there was a bit of extra code around kgdb_nmicallback() NOTE: In this patch we will still get into trouble if we try to round up a CPU that failed to round up before. We'll try to round it up again and potentially hang when we try to grab the csd lock. That's not new behavior but we'll still try to do better in a future patch. Suggested-by: Daniel Thompson <daniel.thompson@linaro.org> Signed-off-by: Douglas Anderson <dianders@chromium.org> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Paul Burton <paul.burton@mips.com> Cc: James Hogan <jhogan@kernel.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: Rich Felker <dalias@libc.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
-rw-r--r--arch/arc/kernel/kgdb.c10
-rw-r--r--arch/arm/kernel/kgdb.c12
-rw-r--r--arch/arm64/kernel/kgdb.c12
-rw-r--r--arch/hexagon/kernel/kgdb.c27
-rw-r--r--arch/mips/kernel/kgdb.c9
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/sh/kernel/kgdb.c12
-rw-r--r--include/linux/kgdb.h15
-rw-r--r--kernel/debug/debug_core.c41
9 files changed, 59 insertions, 83 deletions
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index 0932851028e0..68d9fe4b5aa7 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -192,18 +192,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
192 instruction_pointer(regs) = ip; 192 instruction_pointer(regs) = ip;
193} 193}
194 194
195static void kgdb_call_nmi_hook(void *ignored) 195void kgdb_call_nmi_hook(void *ignored)
196{ 196{
197 /* Default implementation passes get_irq_regs() but we don't */
197 kgdb_nmicallback(raw_smp_processor_id(), NULL); 198 kgdb_nmicallback(raw_smp_processor_id(), NULL);
198} 199}
199 200
200void kgdb_roundup_cpus(void)
201{
202 local_irq_enable();
203 smp_call_function(kgdb_call_nmi_hook, NULL, 0);
204 local_irq_disable();
205}
206
207struct kgdb_arch arch_kgdb_ops = { 201struct kgdb_arch arch_kgdb_ops = {
208 /* breakpoint instruction: TRAP_S 0x3 */ 202 /* breakpoint instruction: TRAP_S 0x3 */
209#ifdef CONFIG_CPU_BIG_ENDIAN 203#ifdef CONFIG_CPU_BIG_ENDIAN
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index f21077b077be..d9a69e941463 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -170,18 +170,6 @@ static struct undef_hook kgdb_compiled_brkpt_hook = {
170 .fn = kgdb_compiled_brk_fn 170 .fn = kgdb_compiled_brk_fn
171}; 171};
172 172
173static void kgdb_call_nmi_hook(void *ignored)
174{
175 kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
176}
177
178void kgdb_roundup_cpus(void)
179{
180 local_irq_enable();
181 smp_call_function(kgdb_call_nmi_hook, NULL, 0);
182 local_irq_disable();
183}
184
185static int __kgdb_notify(struct die_args *args, unsigned long cmd) 173static int __kgdb_notify(struct die_args *args, unsigned long cmd)
186{ 174{
187 struct pt_regs *regs = args->regs; 175 struct pt_regs *regs = args->regs;
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 12c339ff6e75..da880247c734 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -284,18 +284,6 @@ static struct step_hook kgdb_step_hook = {
284 .fn = kgdb_step_brk_fn 284 .fn = kgdb_step_brk_fn
285}; 285};
286 286
287static void kgdb_call_nmi_hook(void *ignored)
288{
289 kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
290}
291
292void kgdb_roundup_cpus(void)
293{
294 local_irq_enable();
295 smp_call_function(kgdb_call_nmi_hook, NULL, 0);
296 local_irq_disable();
297}
298
299static int __kgdb_notify(struct die_args *args, unsigned long cmd) 287static int __kgdb_notify(struct die_args *args, unsigned long cmd)
300{ 288{
301 struct pt_regs *regs = args->regs; 289 struct pt_regs *regs = args->regs;
diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c
index 012e0e230ac2..b95d12038a4e 100644
--- a/arch/hexagon/kernel/kgdb.c
+++ b/arch/hexagon/kernel/kgdb.c
@@ -115,33 +115,6 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
115 instruction_pointer(regs) = pc; 115 instruction_pointer(regs) = pc;
116} 116}
117 117
118#ifdef CONFIG_SMP
119
120/**
121 * kgdb_roundup_cpus - Get other CPUs into a holding pattern
122 *
123 * On SMP systems, we need to get the attention of the other CPUs
124 * and get them be in a known state. This should do what is needed
125 * to get the other CPUs to call kgdb_wait(). Note that on some arches,
126 * the NMI approach is not used for rounding up all the CPUs. For example,
127 * in case of MIPS, smp_call_function() is used to roundup CPUs.
128 *
129 * On non-SMP systems, this is not called.
130 */
131
132static void hexagon_kgdb_nmi_hook(void *ignored)
133{
134 kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
135}
136
137void kgdb_roundup_cpus(void)
138{
139 local_irq_enable();
140 smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0);
141 local_irq_disable();
142}
143#endif
144
145 118
146/* Not yet working */ 119/* Not yet working */
147void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 120void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 2b05effc17b4..42f057a6c215 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -207,7 +207,7 @@ void arch_kgdb_breakpoint(void)
207 ".set\treorder"); 207 ".set\treorder");
208} 208}
209 209
210static void kgdb_call_nmi_hook(void *ignored) 210void kgdb_call_nmi_hook(void *ignored)
211{ 211{
212 mm_segment_t old_fs; 212 mm_segment_t old_fs;
213 213
@@ -219,13 +219,6 @@ static void kgdb_call_nmi_hook(void *ignored)
219 set_fs(old_fs); 219 set_fs(old_fs);
220} 220}
221 221
222void kgdb_roundup_cpus(void)
223{
224 local_irq_enable();
225 smp_call_function(kgdb_call_nmi_hook, NULL, 0);
226 local_irq_disable();
227}
228
229static int compute_signal(int tt) 222static int compute_signal(int tt)
230{ 223{
231 struct hard_trap_info *ht; 224 struct hard_trap_info *ht;
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index b0e804844be0..b4ce54d73337 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -117,7 +117,7 @@ int kgdb_skipexception(int exception, struct pt_regs *regs)
117 return kgdb_isremovedbreak(regs->nip); 117 return kgdb_isremovedbreak(regs->nip);
118} 118}
119 119
120static int kgdb_call_nmi_hook(struct pt_regs *regs) 120static int kgdb_debugger_ipi(struct pt_regs *regs)
121{ 121{
122 kgdb_nmicallback(raw_smp_processor_id(), regs); 122 kgdb_nmicallback(raw_smp_processor_id(), regs);
123 return 0; 123 return 0;
@@ -502,7 +502,7 @@ int kgdb_arch_init(void)
502 old__debugger_break_match = __debugger_break_match; 502 old__debugger_break_match = __debugger_break_match;
503 old__debugger_fault_handler = __debugger_fault_handler; 503 old__debugger_fault_handler = __debugger_fault_handler;
504 504
505 __debugger_ipi = kgdb_call_nmi_hook; 505 __debugger_ipi = kgdb_debugger_ipi;
506 __debugger = kgdb_debugger; 506 __debugger = kgdb_debugger;
507 __debugger_bpt = kgdb_handle_breakpoint; 507 __debugger_bpt = kgdb_handle_breakpoint;
508 __debugger_sstep = kgdb_singlestep; 508 __debugger_sstep = kgdb_singlestep;
diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c
index cc57630f6bf2..14e012ad7c57 100644
--- a/arch/sh/kernel/kgdb.c
+++ b/arch/sh/kernel/kgdb.c
@@ -314,18 +314,6 @@ BUILD_TRAP_HANDLER(singlestep)
314 local_irq_restore(flags); 314 local_irq_restore(flags);
315} 315}
316 316
317static void kgdb_call_nmi_hook(void *ignored)
318{
319 kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
320}
321
322void kgdb_roundup_cpus(void)
323{
324 local_irq_enable();
325 smp_call_function(kgdb_call_nmi_hook, NULL, 0);
326 local_irq_disable();
327}
328
329static int __kgdb_notify(struct die_args *args, unsigned long cmd) 317static int __kgdb_notify(struct die_args *args, unsigned long cmd)
330{ 318{
331 int ret; 319 int ret;
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 05e5b2eb0d32..24422865cd18 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -177,13 +177,24 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code,
177 struct pt_regs *regs); 177 struct pt_regs *regs);
178 178
179/** 179/**
180 * kgdb_call_nmi_hook - Call kgdb_nmicallback() on the current CPU
181 * @ignored: This parameter is only here to match the prototype.
182 *
183 * If you're using the default implementation of kgdb_roundup_cpus()
184 * this function will be called per CPU. If you don't implement
185 * kgdb_call_nmi_hook() a default will be used.
186 */
187
188extern void kgdb_call_nmi_hook(void *ignored);
189
190/**
180 * kgdb_roundup_cpus - Get other CPUs into a holding pattern 191 * kgdb_roundup_cpus - Get other CPUs into a holding pattern
181 * 192 *
182 * On SMP systems, we need to get the attention of the other CPUs 193 * On SMP systems, we need to get the attention of the other CPUs
183 * and get them into a known state. This should do what is needed 194 * and get them into a known state. This should do what is needed
184 * to get the other CPUs to call kgdb_wait(). Note that on some arches, 195 * to get the other CPUs to call kgdb_wait(). Note that on some arches,
185 * the NMI approach is not used for rounding up all the CPUs. For example, 196 * the NMI approach is not used for rounding up all the CPUs. Normally
186 * in case of MIPS, smp_call_function() is used to roundup CPUs. 197 * those architectures can just not implement this and get the default.
187 * 198 *
188 * On non-SMP systems, this is not called. 199 * On non-SMP systems, this is not called.
189 */ 200 */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index f3cadda45f07..10db2833a423 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -55,6 +55,7 @@
55#include <linux/mm.h> 55#include <linux/mm.h>
56#include <linux/vmacache.h> 56#include <linux/vmacache.h>
57#include <linux/rcupdate.h> 57#include <linux/rcupdate.h>
58#include <linux/irq.h>
58 59
59#include <asm/cacheflush.h> 60#include <asm/cacheflush.h>
60#include <asm/byteorder.h> 61#include <asm/byteorder.h>
@@ -220,6 +221,46 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
220 return 0; 221 return 0;
221} 222}
222 223
224#ifdef CONFIG_SMP
225
226/*
227 * Default (weak) implementation for kgdb_roundup_cpus
228 */
229
230static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd);
231
232void __weak kgdb_call_nmi_hook(void *ignored)
233{
234 /*
235 * NOTE: get_irq_regs() is supposed to get the registers from
236 * before the IPI interrupt happened and so is supposed to
237 * show where the processor was. In some situations it's
238 * possible we might be called without an IPI, so it might be
239 * safer to figure out how to make kgdb_breakpoint() work
240 * properly here.
241 */
242 kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
243}
244
245void __weak kgdb_roundup_cpus(void)
246{
247 call_single_data_t *csd;
248 int this_cpu = raw_smp_processor_id();
249 int cpu;
250
251 for_each_online_cpu(cpu) {
252 /* No need to roundup ourselves */
253 if (cpu == this_cpu)
254 continue;
255
256 csd = &per_cpu(kgdb_roundup_csd, cpu);
257 csd->func = kgdb_call_nmi_hook;
258 smp_call_function_single_async(cpu, csd);
259 }
260}
261
262#endif
263
223/* 264/*
224 * Some architectures need cache flushes when we set/clear a 265 * Some architectures need cache flushes when we set/clear a
225 * breakpoint: 266 * breakpoint: