aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Neuling <mikey@neuling.org>2010-05-13 15:40:11 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-05-21 03:31:11 -0400
commit60adec6226bbcf061d4c2d10944fced209d1847d (patch)
treeeab88076a6f7e3b617d03faac87e8366eb6789eb
parent1fc711f7ffb01089efc58042cfdbac8573d1b59a (diff)
powerpc/kdump: Fix race in kdump shutdown
When we are crashing, the crashing/primary CPU IPIs the secondaries to turn off IRQs, go into real mode and wait in kexec_wait. While this is happening, the primary tears down all the MMU maps. Unfortunately the primary doesn't check to make sure the secondaries have entered real mode before doing this. On PHYP machines, the secondaries can take a long time shutting down the IRQ controller as RTAS calls are need. These RTAS calls need to be serialised which resilts in the secondaries contending in lock_rtas() and hence taking a long time to shut down. We've hit this on large POWER7 machines, where some secondaries are still waiting in lock_rtas(), when the primary tears down the HPTEs. This patch makes sure all secondaries are in real mode before the primary tears down the MMU. It uses the new kexec_state entry in the paca. It times out if the secondaries don't reach real mode after 10sec. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/kernel/crash.c27
1 files changed, 27 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index cca7c8fafc1c..8c066d6a8e4b 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -162,6 +162,32 @@ static void crash_kexec_prepare_cpus(int cpu)
162 /* Leave the IPI callback set */ 162 /* Leave the IPI callback set */
163} 163}
164 164
165/* wait for all the CPUs to hit real mode but timeout if they don't come in */
166static void crash_kexec_wait_realmode(int cpu)
167{
168 unsigned int msecs;
169 int i;
170
171 msecs = 10000;
172 for (i=0; i < NR_CPUS && msecs > 0; i++) {
173 if (i == cpu)
174 continue;
175
176 while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
177 barrier();
178 if (!cpu_possible(i)) {
179 break;
180 }
181 if (!cpu_online(i)) {
182 break;
183 }
184 msecs--;
185 mdelay(1);
186 }
187 }
188 mb();
189}
190
165/* 191/*
166 * This function will be called by secondary cpus or by kexec cpu 192 * This function will be called by secondary cpus or by kexec cpu
167 * if soft-reset is activated to stop some CPUs. 193 * if soft-reset is activated to stop some CPUs.
@@ -419,6 +445,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
419 crash_kexec_prepare_cpus(crashing_cpu); 445 crash_kexec_prepare_cpus(crashing_cpu);
420 cpu_set(crashing_cpu, cpus_in_crash); 446 cpu_set(crashing_cpu, cpus_in_crash);
421 crash_kexec_stop_spus(); 447 crash_kexec_stop_spus();
448 crash_kexec_wait_realmode(crashing_cpu);
422 if (ppc_md.kexec_cpu_down) 449 if (ppc_md.kexec_cpu_down)
423 ppc_md.kexec_cpu_down(1, 0); 450 ppc_md.kexec_cpu_down(1, 0);
424} 451}