diff options
author | Anton Blanchard <anton@samba.org> | 2011-11-29 19:23:11 -0500 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2011-12-07 22:02:22 -0500 |
commit | 07fe0c6132578186773e01ffb0f63ded222effe7 (patch) | |
tree | 306f3fe8cc1994994a0d581c5d5b3d6d612d0def /arch/powerpc/kernel/crash.c | |
parent | 9b00ac06978c54788f13eefd34a07b77db48d567 (diff) |
powerpc/kdump: Use setjmp/longjmp to handle kdump and system reset recursion
We can handle recursion caused by system reset by reusing the crash
shutdown fault handler.
Since we don't have an OS triggerable NMI, if all CPUs don't make it
into kdump then we tell the user to issue a system reset. However if
we have a panic timeout set we cannot wait forever and must continue
the kdump.
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel/crash.c')
-rw-r--r-- | arch/powerpc/kernel/crash.c | 72 |
1 files changed, 57 insertions, 15 deletions
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3d87b205d5f5..a8b6e2d705a4 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c | |||
@@ -53,6 +53,16 @@ static cpumask_t cpus_in_crash = CPU_MASK_NONE; | |||
53 | static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; | 53 | static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; |
54 | static DEFINE_SPINLOCK(crash_handlers_lock); | 54 | static DEFINE_SPINLOCK(crash_handlers_lock); |
55 | 55 | ||
56 | static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; | ||
57 | static int crash_shutdown_cpu = -1; | ||
58 | |||
59 | static int handle_fault(struct pt_regs *regs) | ||
60 | { | ||
61 | if (crash_shutdown_cpu == smp_processor_id()) | ||
62 | longjmp(crash_shutdown_buf, 1); | ||
63 | return 0; | ||
64 | } | ||
65 | |||
56 | #ifdef CONFIG_SMP | 66 | #ifdef CONFIG_SMP |
57 | 67 | ||
58 | void crash_ipi_callback(struct pt_regs *regs) | 68 | void crash_ipi_callback(struct pt_regs *regs) |
@@ -89,14 +99,16 @@ void crash_ipi_callback(struct pt_regs *regs) | |||
89 | static void crash_kexec_prepare_cpus(int cpu) | 99 | static void crash_kexec_prepare_cpus(int cpu) |
90 | { | 100 | { |
91 | unsigned int msecs; | 101 | unsigned int msecs; |
92 | |||
93 | unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ | 102 | unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ |
103 | int tries = 0; | ||
104 | int (*old_handler)(struct pt_regs *regs); | ||
94 | 105 | ||
95 | printk(KERN_EMERG "Sending IPI to other CPUs\n"); | 106 | printk(KERN_EMERG "Sending IPI to other CPUs\n"); |
96 | 107 | ||
97 | crash_send_ipi(crash_ipi_callback); | 108 | crash_send_ipi(crash_ipi_callback); |
98 | smp_wmb(); | 109 | smp_wmb(); |
99 | 110 | ||
111 | again: | ||
100 | /* | 112 | /* |
101 | * FIXME: Until we will have the way to stop other CPUs reliably, | 113 | * FIXME: Until we will have the way to stop other CPUs reliably, |
102 | * the crash CPU will send an IPI and wait for other CPUs to | 114 | * the crash CPU will send an IPI and wait for other CPUs to |
@@ -111,12 +123,52 @@ static void crash_kexec_prepare_cpus(int cpu) | |||
111 | 123 | ||
112 | /* Would it be better to replace the trap vector here? */ | 124 | /* Would it be better to replace the trap vector here? */ |
113 | 125 | ||
114 | if (cpumask_weight(&cpus_in_crash) < ncpus) { | 126 | if (cpumask_weight(&cpus_in_crash) >= ncpus) { |
115 | printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n", | 127 | printk(KERN_EMERG "IPI complete\n"); |
116 | ncpus - cpumask_weight(&cpus_in_crash)); | 128 | return; |
129 | } | ||
130 | |||
131 | printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", | ||
132 | ncpus - cpumask_weight(&cpus_in_crash)); | ||
133 | |||
134 | /* | ||
135 | * If we have a panic timeout set then we can't wait indefinitely | ||
136 | * for someone to activate system reset. We also give up on the | ||
137 | * second time through if system reset fail to work. | ||
138 | */ | ||
139 | if ((panic_timeout > 0) || (tries > 0)) | ||
140 | return; | ||
141 | |||
142 | /* | ||
143 | * A system reset will cause all CPUs to take an 0x100 exception. | ||
144 | * The primary CPU returns here via setjmp, and the secondary | ||
145 | * CPUs reexecute the crash_kexec_secondary path. | ||
146 | */ | ||
147 | old_handler = __debugger; | ||
148 | __debugger = handle_fault; | ||
149 | crash_shutdown_cpu = smp_processor_id(); | ||
150 | |||
151 | if (setjmp(crash_shutdown_buf) == 0) { | ||
152 | printk(KERN_EMERG "Activate system reset (dumprestart) " | ||
153 | "to stop other cpu(s)\n"); | ||
154 | |||
155 | /* | ||
156 | * A system reset will force all CPUs to execute the | ||
157 | * crash code again. We need to reset cpus_in_crash so we | ||
158 | * wait for everyone to do this. | ||
159 | */ | ||
160 | cpus_in_crash = CPU_MASK_NONE; | ||
161 | smp_mb(); | ||
162 | |||
163 | while (cpumask_weight(&cpus_in_crash) < ncpus) | ||
164 | cpu_relax(); | ||
117 | } | 165 | } |
118 | 166 | ||
119 | printk(KERN_EMERG "IPI complete\n"); | 167 | crash_shutdown_cpu = -1; |
168 | __debugger = old_handler; | ||
169 | |||
170 | tries++; | ||
171 | goto again; | ||
120 | } | 172 | } |
121 | 173 | ||
122 | /* | 174 | /* |
@@ -245,16 +297,6 @@ int crash_shutdown_unregister(crash_shutdown_t handler) | |||
245 | } | 297 | } |
246 | EXPORT_SYMBOL(crash_shutdown_unregister); | 298 | EXPORT_SYMBOL(crash_shutdown_unregister); |
247 | 299 | ||
248 | static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; | ||
249 | static int crash_shutdown_cpu = -1; | ||
250 | |||
251 | static int handle_fault(struct pt_regs *regs) | ||
252 | { | ||
253 | if (crash_shutdown_cpu == smp_processor_id()) | ||
254 | longjmp(crash_shutdown_buf, 1); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | void default_machine_crash_shutdown(struct pt_regs *regs) | 300 | void default_machine_crash_shutdown(struct pt_regs *regs) |
259 | { | 301 | { |
260 | unsigned int i; | 302 | unsigned int i; |