aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2019-03-04 14:42:19 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2019-05-02 12:54:57 -0400
commitde269129a48a2d590ba1d20c719e19d86e3ddb3f (patch)
treef24b4a3afde2d90ce614a347129acf6324315a1a
parent0acb5f64560a052fd66ab37b212a72964847160f (diff)
powerpc/hmi: Fix kernel hang when TB is in error state.
On TOD/TB errors timebase register stops/freezes until HMI error recovery gets TOD/TB back into running state. On successful recovery, TB starts running again and udelay() that relies on TB value continues to function properly. But in case when HMI fails to recover from TOD/TB errors, the TB register stay freezed. With TB not running the __delay() function keeps looping and never return. If __delay() is called while in panic path then system hangs and never reboots after panic. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/opal-api.h10
-rw-r--r--arch/powerpc/include/asm/opal.h2
-rw-r--r--arch/powerpc/include/asm/time.h2
-rw-r--r--arch/powerpc/kernel/time.c9
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c1
-rw-r--r--arch/powerpc/platforms/powernv/opal.c21
-rw-r--r--arch/powerpc/platforms/powernv/setup.c5
7 files changed, 49 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index e1d118ac61dc..234fde15b37c 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -209,6 +209,7 @@
209#define OPAL_SENSOR_GROUP_ENABLE 163 209#define OPAL_SENSOR_GROUP_ENABLE 163
210#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 210#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
211#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 211#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
212#define OPAL_HANDLE_HMI2 166
212#define OPAL_NX_COPROC_INIT 167 213#define OPAL_NX_COPROC_INIT 167
213#define OPAL_XIVE_GET_VP_STATE 170 214#define OPAL_XIVE_GET_VP_STATE 170
214#define OPAL_LAST 170 215#define OPAL_LAST 170
@@ -635,6 +636,15 @@ struct OpalHMIEvent {
635 } u; 636 } u;
636}; 637};
637 638
639/* OPAL_HANDLE_HMI2 out_flags */
640enum {
641 OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */
642 OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */
643 OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */
644 OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */
645 OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */
646};
647
638enum { 648enum {
639 OPAL_P7IOC_DIAG_TYPE_NONE = 0, 649 OPAL_P7IOC_DIAG_TYPE_NONE = 0,
640 OPAL_P7IOC_DIAG_TYPE_RGC = 1, 650 OPAL_P7IOC_DIAG_TYPE_RGC = 1,
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 4e978d4dea5c..4cc37e708bc7 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
203int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); 203int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
204int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data); 204int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data);
205int64_t opal_handle_hmi(void); 205int64_t opal_handle_hmi(void);
206int64_t opal_handle_hmi2(__be64 *out_flags);
206int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); 207int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
207int64_t opal_unregister_dump_region(uint32_t id); 208int64_t opal_unregister_dump_region(uint32_t id);
208int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); 209int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
@@ -359,6 +360,7 @@ int opal_power_control_init(void);
359extern int opal_machine_check(struct pt_regs *regs); 360extern int opal_machine_check(struct pt_regs *regs);
360extern bool opal_mce_check_early_recovery(struct pt_regs *regs); 361extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
361extern int opal_hmi_exception_early(struct pt_regs *regs); 362extern int opal_hmi_exception_early(struct pt_regs *regs);
363extern int opal_hmi_exception_early2(struct pt_regs *regs);
362extern int opal_handle_hmi_exception(struct pt_regs *regs); 364extern int opal_handle_hmi_exception(struct pt_regs *regs);
363 365
364extern void opal_shutdown(void); 366extern void opal_shutdown(void);
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 54bf7e68a7e1..57e968413d1e 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq;
36extern unsigned long ppc_tb_freq; 36extern unsigned long ppc_tb_freq;
37#define DEFAULT_TB_FREQ 125000000UL 37#define DEFAULT_TB_FREQ 125000000UL
38 38
39extern bool tb_invalid;
40
39struct div_result { 41struct div_result {
40 u64 result_high; 42 u64 result_high;
41 u64 result_low; 43 u64 result_low;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6ef32472ee1d..325d60633dfa 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -150,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
150unsigned long ppc_tb_freq; 150unsigned long ppc_tb_freq;
151EXPORT_SYMBOL_GPL(ppc_tb_freq); 151EXPORT_SYMBOL_GPL(ppc_tb_freq);
152 152
153bool tb_invalid;
154
153#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 155#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
154/* 156/*
155 * Factor for converting from cputime_t (timebase ticks) to 157 * Factor for converting from cputime_t (timebase ticks) to
@@ -459,6 +461,13 @@ void __delay(unsigned long loops)
459 diff += 1000000000; 461 diff += 1000000000;
460 spin_cpu_relax(); 462 spin_cpu_relax();
461 } while (diff < loops); 463 } while (diff < loops);
464 } else if (tb_invalid) {
465 /*
466 * TB is in error state and isn't ticking anymore.
467 * HMI handler was unable to recover from TB error.
468 * Return immediately, so that kernel won't get stuck here.
469 */
470 spin_cpu_relax();
462 } else { 471 } else {
463 start = get_tbl(); 472 start = get_tbl();
464 while (get_tbl() - start < loops) 473 while (get_tbl() - start < loops)
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 7cba0d5da3ff..36c8fa3647a2 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -220,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
220OPAL_CALL(opal_get_param, OPAL_GET_PARAM); 220OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
221OPAL_CALL(opal_set_param, OPAL_SET_PARAM); 221OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
222OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); 222OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
223OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2);
223OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); 224OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
224OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); 225OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
225OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); 226OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 737c51d63480..f2b063b027f0 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -614,6 +614,27 @@ int opal_hmi_exception_early(struct pt_regs *regs)
614 return 0; 614 return 0;
615} 615}
616 616
617int opal_hmi_exception_early2(struct pt_regs *regs)
618{
619 s64 rc;
620 __be64 out_flags;
621
622 /*
623 * call opal hmi handler.
624 * Check 64-bit flag mask to find out if an event was generated,
625 * and whether TB is still valid or not etc.
626 */
627 rc = opal_handle_hmi2(&out_flags);
628 if (rc != OPAL_SUCCESS)
629 return 0;
630
631 if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
632 local_paca->hmi_event_available = 1;
633 if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
634 tb_invalid = true;
635 return 1;
636}
637
617/* HMI exception handler called in virtual mode during check_irq_replay. */ 638/* HMI exception handler called in virtual mode during check_irq_replay. */
618int opal_handle_hmi_exception(struct pt_regs *regs) 639int opal_handle_hmi_exception(struct pt_regs *regs)
619{ 640{
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 14befee4b3f1..3cf40f689aac 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -401,7 +401,10 @@ static void __init pnv_setup_machdep_opal(void)
401 /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */ 401 /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
402 ppc_md.machine_check_exception = opal_machine_check; 402 ppc_md.machine_check_exception = opal_machine_check;
403 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; 403 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
404 ppc_md.hmi_exception_early = opal_hmi_exception_early; 404 if (opal_check_token(OPAL_HANDLE_HMI2))
405 ppc_md.hmi_exception_early = opal_hmi_exception_early2;
406 else
407 ppc_md.hmi_exception_early = opal_hmi_exception_early;
405 ppc_md.handle_hmi_exception = opal_handle_hmi_exception; 408 ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
406} 409}
407 410