aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2018-04-01 06:36:15 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2018-04-03 08:59:57 -0400
commitf2748bdfe157343eb8cf910a1d89ccf2fd20100b (patch)
tree5088070ac7b1f8e47f168ecfd275f821c322e422
parent855bfe0de1a05a01f89975ea8ba9f5521fb0f567 (diff)
powerpc/powernv: Always stop secondaries before reboot/shutdown
Currently powernv reboot and shutdown requests just leave secondaries to do their own things. This is undesirable because they can trigger any number of watchdogs while waiting for reboot, but also we don't know what else they might be doing -- they might be causing trouble, trampling memory, etc. The opal scheduled flash update code already ran into watchdog problems due to flashing taking a long time, and it was fixed with 2196c6f1ed ("powerpc/powernv: Return secondary CPUs to firmware before FW update"), which returns secondaries to opal. It's been found that regular reboots can take over 10 seconds, which can result in the hard lockup watchdog firing, reboot: Restarting system [ 360.038896709,5] OPAL: Reboot request... Watchdog CPU:0 Hard LOCKUP Watchdog CPU:44 detected Hard LOCKUP other CPUS:16 Watchdog CPU:16 Hard LOCKUP watchdog: BUG: soft lockup - CPU#16 stuck for 3s! [swapper/16:0] This patch removes the special case for flash update, and calls smp_send_stop in all cases before calling reboot/shutdown. smp_send_stop could return CPUs to OPAL, the main reason not to is that the request could come from a NMI that interrupts OPAL code, so re-entry to OPAL can cause a number of problems. Putting secondaries into simple spin loops improves the chances of a successful reboot. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/opal.h2
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c28
-rw-r--r--arch/powerpc/platforms/powernv/setup.c15
3 files changed, 7 insertions, 38 deletions
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index dde60089d0d4..7159e1a6a61a 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -325,7 +325,7 @@ struct rtc_time;
325extern unsigned long opal_get_boot_time(void); 325extern unsigned long opal_get_boot_time(void);
326extern void opal_nvram_init(void); 326extern void opal_nvram_init(void);
327extern void opal_flash_update_init(void); 327extern void opal_flash_update_init(void);
328extern void opal_flash_term_callback(void); 328extern void opal_flash_update_print_message(void);
329extern int opal_elog_init(void); 329extern int opal_elog_init(void);
330extern void opal_platform_dump_init(void); 330extern void opal_platform_dump_init(void);
331extern void opal_sys_param_init(void); 331extern void opal_sys_param_init(void);
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 1cb0b895a236..b37015101bf6 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -303,26 +303,9 @@ invalid_img:
303 return rc; 303 return rc;
304} 304}
305 305
306/* Return CPUs to OPAL before starting FW update */
307static void flash_return_cpu(void *info)
308{
309 int cpu = smp_processor_id();
310
311 if (!cpu_online(cpu))
312 return;
313
314 /* Disable IRQ */
315 hard_irq_disable();
316
317 /* Return the CPU to OPAL */
318 opal_return_cpu();
319}
320
321/* This gets called just before system reboots */ 306/* This gets called just before system reboots */
322void opal_flash_term_callback(void) 307void opal_flash_update_print_message(void)
323{ 308{
324 struct cpumask mask;
325
326 if (update_flash_data.status != FLASH_IMG_READY) 309 if (update_flash_data.status != FLASH_IMG_READY)
327 return; 310 return;
328 311
@@ -333,15 +316,6 @@ void opal_flash_term_callback(void)
333 316
334 /* Small delay to help getting the above message out */ 317 /* Small delay to help getting the above message out */
335 msleep(500); 318 msleep(500);
336
337 /* Return secondary CPUs to firmware */
338 cpumask_copy(&mask, cpu_online_mask);
339 cpumask_clear_cpu(smp_processor_id(), &mask);
340 if (!cpumask_empty(&mask))
341 smp_call_function_many(&mask,
342 flash_return_cpu, NULL, false);
343 /* Hard disable interrupts */
344 hard_irq_disable();
345} 319}
346 320
347/* 321/*
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 5f963286232f..ef8c9ce53a61 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -201,17 +201,12 @@ static void pnv_prepare_going_down(void)
201 */ 201 */
202 opal_event_shutdown(); 202 opal_event_shutdown();
203 203
204 /* Soft disable interrupts */ 204 /* Print flash update message if one is scheduled. */
205 local_irq_disable(); 205 opal_flash_update_print_message();
206 206
207 /* 207 smp_send_stop();
208 * Return secondary CPUs to firwmare if a flash update 208
209 * is pending otherwise we will get all sort of error 209 hard_irq_disable();
210 * messages about CPU being stuck etc.. This will also
211 * have the side effect of hard disabling interrupts so
212 * past this point, the kernel is effectively dead.
213 */
214 opal_flash_term_callback();
215} 210}
216 211
217static void __noreturn pnv_restart(char *cmd) 212static void __noreturn pnv_restart(char *cmd)