powerpc/powernv: Fix kexec races going back to OPAL

We have a subtle race when sending CPUs back to OPAL on kexec. We mark them as "in real mode" right before we send them down. Once we've booted the new kernel, it might try to call opal_reinit_cpus() to change endianness, and that requires all CPUs to be spinning inside OPAL. However there is no synchronization here and we've observed cases where the returning CPUs hadn't established their new state inside OPAL before opal_reinit_cpus() is called, causing it to fail. The proper fix is to actually wait for them to go down all the way from the kexec'ing kernel. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
author: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2014-04-24 02:14:25 -0400
committer: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2014-04-27 23:08:50 -0400
commit: 298b34d7d578c3b4325248b823f3d83a8ea1541c (patch)
tree: 3b77b3f53752ad216a822a706d0a3ac80949f92d /arch/powerpc/platforms/powernv
parent: 63aecfb20a6629f1ca91d7a052f87988ceb6dd53 (diff)
1 files changed, 46 insertions, 2 deletions
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 61cf8fa9c61b..8723d32632f5 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -162,18 +162,62 @@ static void pnv_shutdown(void)
 }
 #ifdef CONFIG_KEXEC
+static void pnv_kexec_wait_secondaries_down(void)
+{
+        int my_cpu, i, notified = -1;
+        my_cpu = get_cpu();
+        for_each_online_cpu(i) {
+                uint8_t status;
+                int64_t rc;
+                if (i == my_cpu)
+                        continue;
+                for (;;) {
+                        rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
+                                                   &status);
+                        if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
+                                break;
+                        barrier();
+                        if (i != notified) {
+                                printk(KERN_INFO "kexec: waiting for cpu %d "
+                                       "(physical %d) to enter OPAL\n",
+                                       i, paca[i].hw_cpu_id);
+                                notified = i;
+                        }
+                }
+        }
+}
 static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 {
        xics_kexec_teardown_cpu(secondary);
-        /* Return secondary CPUs to firmware on OPAL v3 */
+        /* On OPAL v3, we return all CPUs to firmware */
-        if (firmware_has_feature(FW_FEATURE_OPALv3) && secondary) {
+        if (!firmware_has_feature(FW_FEATURE_OPALv3))
+                return;
+        if (secondary) {
+                /* Return secondary CPUs to firmware on OPAL v3 */
                mb();
                get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
                mb();
                /* Return the CPU to OPAL */
                opal_return_cpu();
+        } else if (crash_shutdown) {
+                /*
+                 * On crash, we don't wait for secondaries to go
+                 * down as they might be unreachable or hung, so
+                 * instead we just wait a bit and move on.
+                 */
+                mdelay(1);
+        } else {
+                /* Primary waits for the secondaries to have reached OPAL */
+                pnv_kexec_wait_secondaries_down();
        }
 }
 #endif /* CONFIG_KEXEC */
author	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2014-04-24 02:14:25 -0400
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2014-04-27 23:08:50 -0400
commit	298b34d7d578c3b4325248b823f3d83a8ea1541c (patch)
tree	3b77b3f53752ad216a822a706d0a3ac80949f92d /arch/powerpc/platforms/powernv
parent	63aecfb20a6629f1ca91d7a052f87988ceb6dd53 (diff)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 61cf8fa9c61b..8723d32632f5 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c
@@ -162,18 +162,62 @@ static void pnv_shutdown(void)
162	}	162	}
163		163
164	#ifdef CONFIG_KEXEC	164	#ifdef CONFIG_KEXEC
		165	static void pnv_kexec_wait_secondaries_down(void)
		166	{
		167	int my_cpu, i, notified = -1;
		168
		169	my_cpu = get_cpu();
		170
		171	for_each_online_cpu(i) {
		172	uint8_t status;
		173	int64_t rc;
		174
		175	if (i == my_cpu)
		176	continue;
		177
		178	for (;;) {
		179	rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
		180	&status);
		181	if (rc != OPAL_SUCCESS \|\| status != OPAL_THREAD_STARTED)
		182	break;
		183	barrier();
		184	if (i != notified) {
		185	printk(KERN_INFO "kexec: waiting for cpu %d "
		186	"(physical %d) to enter OPAL\n",
		187	i, paca[i].hw_cpu_id);
		188	notified = i;
		189	}
		190	}
		191	}
		192	}
		193
165	static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)	194	static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
166	{	195	{
167	xics_kexec_teardown_cpu(secondary);	196	xics_kexec_teardown_cpu(secondary);
168		197
169	/* Return secondary CPUs to firmware on OPAL v3 */	198	/* On OPAL v3, we return all CPUs to firmware */
170	if (firmware_has_feature(FW_FEATURE_OPALv3) && secondary) {	199
		200	if (!firmware_has_feature(FW_FEATURE_OPALv3))
		201	return;
		202
		203	if (secondary) {
		204	/* Return secondary CPUs to firmware on OPAL v3 */
171	mb();	205	mb();
172	get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;	206	get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
173	mb();	207	mb();
174		208
175	/* Return the CPU to OPAL */	209	/* Return the CPU to OPAL */
176	opal_return_cpu();	210	opal_return_cpu();
		211	} else if (crash_shutdown) {
		212	/*
		213	* On crash, we don't wait for secondaries to go
		214	* down as they might be unreachable or hung, so
		215	* instead we just wait a bit and move on.
		216	*/
		217	mdelay(1);
		218	} else {
		219	/* Primary waits for the secondaries to have reached OPAL */
		220	pnv_kexec_wait_secondaries_down();
177	}	221	}
178	}	222	}
179	#endif /* CONFIG_KEXEC */	223	#endif /* CONFIG_KEXEC */