aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2011-11-29 19:23:11 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-12-07 22:02:22 -0500
commit07fe0c6132578186773e01ffb0f63ded222effe7 (patch)
tree306f3fe8cc1994994a0d581c5d5b3d6d612d0def /arch/powerpc/kernel
parent9b00ac06978c54788f13eefd34a07b77db48d567 (diff)
powerpc/kdump: Use setjmp/longjmp to handle kdump and system reset recursion
We can handle recursion caused by system reset by reusing the crash shutdown fault handler. Since we don't have an OS triggerable NMI, if all CPUs don't make it into kdump then we tell the user to issue a system reset. However if we have a panic timeout set we cannot wait forever and must continue the kdump. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/crash.c72
1 files changed, 57 insertions, 15 deletions
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 3d87b205d5f5..a8b6e2d705a4 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -53,6 +53,16 @@ static cpumask_t cpus_in_crash = CPU_MASK_NONE;
53static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; 53static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
54static DEFINE_SPINLOCK(crash_handlers_lock); 54static DEFINE_SPINLOCK(crash_handlers_lock);
55 55
56static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
57static int crash_shutdown_cpu = -1;
58
59static int handle_fault(struct pt_regs *regs)
60{
61 if (crash_shutdown_cpu == smp_processor_id())
62 longjmp(crash_shutdown_buf, 1);
63 return 0;
64}
65
56#ifdef CONFIG_SMP 66#ifdef CONFIG_SMP
57 67
58void crash_ipi_callback(struct pt_regs *regs) 68void crash_ipi_callback(struct pt_regs *regs)
@@ -89,14 +99,16 @@ void crash_ipi_callback(struct pt_regs *regs)
89static void crash_kexec_prepare_cpus(int cpu) 99static void crash_kexec_prepare_cpus(int cpu)
90{ 100{
91 unsigned int msecs; 101 unsigned int msecs;
92
93 unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 102 unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
103 int tries = 0;
104 int (*old_handler)(struct pt_regs *regs);
94 105
95 printk(KERN_EMERG "Sending IPI to other CPUs\n"); 106 printk(KERN_EMERG "Sending IPI to other CPUs\n");
96 107
97 crash_send_ipi(crash_ipi_callback); 108 crash_send_ipi(crash_ipi_callback);
98 smp_wmb(); 109 smp_wmb();
99 110
111again:
100 /* 112 /*
101 * FIXME: Until we will have the way to stop other CPUs reliably, 113 * FIXME: Until we will have the way to stop other CPUs reliably,
102 * the crash CPU will send an IPI and wait for other CPUs to 114 * the crash CPU will send an IPI and wait for other CPUs to
@@ -111,12 +123,52 @@ static void crash_kexec_prepare_cpus(int cpu)
111 123
112 /* Would it be better to replace the trap vector here? */ 124 /* Would it be better to replace the trap vector here? */
113 125
114 if (cpumask_weight(&cpus_in_crash) < ncpus) { 126 if (cpumask_weight(&cpus_in_crash) >= ncpus) {
115 printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n", 127 printk(KERN_EMERG "IPI complete\n");
116 ncpus - cpumask_weight(&cpus_in_crash)); 128 return;
129 }
130
131 printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
132 ncpus - cpumask_weight(&cpus_in_crash));
133
134 /*
135 * If we have a panic timeout set then we can't wait indefinitely
136 * for someone to activate system reset. We also give up on the
137 * second time through if system reset fail to work.
138 */
139 if ((panic_timeout > 0) || (tries > 0))
140 return;
141
142 /*
143 * A system reset will cause all CPUs to take an 0x100 exception.
144 * The primary CPU returns here via setjmp, and the secondary
145 * CPUs reexecute the crash_kexec_secondary path.
146 */
147 old_handler = __debugger;
148 __debugger = handle_fault;
149 crash_shutdown_cpu = smp_processor_id();
150
151 if (setjmp(crash_shutdown_buf) == 0) {
152 printk(KERN_EMERG "Activate system reset (dumprestart) "
153 "to stop other cpu(s)\n");
154
155 /*
156 * A system reset will force all CPUs to execute the
157 * crash code again. We need to reset cpus_in_crash so we
158 * wait for everyone to do this.
159 */
160 cpus_in_crash = CPU_MASK_NONE;
161 smp_mb();
162
163 while (cpumask_weight(&cpus_in_crash) < ncpus)
164 cpu_relax();
117 } 165 }
118 166
119 printk(KERN_EMERG "IPI complete\n"); 167 crash_shutdown_cpu = -1;
168 __debugger = old_handler;
169
170 tries++;
171 goto again;
120} 172}
121 173
122/* 174/*
@@ -245,16 +297,6 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
245} 297}
246EXPORT_SYMBOL(crash_shutdown_unregister); 298EXPORT_SYMBOL(crash_shutdown_unregister);
247 299
248static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
249static int crash_shutdown_cpu = -1;
250
251static int handle_fault(struct pt_regs *regs)
252{
253 if (crash_shutdown_cpu == smp_processor_id())
254 longjmp(crash_shutdown_buf, 1);
255 return 0;
256}
257
258void default_machine_crash_shutdown(struct pt_regs *regs) 300void default_machine_crash_shutdown(struct pt_regs *regs)
259{ 301{
260 unsigned int i; 302 unsigned int i;