aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@ozlabs.org>2018-01-17 04:51:13 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2018-01-17 23:31:25 -0500
commitd075745d893c78730e4a3b7a60fca23c2f764081 (patch)
tree5503d7e4b87f2e9cdfe42f7ab2407d7294667714
parent7f1c410da59090f9bb2300efebbc3b717594d64c (diff)
KVM: PPC: Book3S HV: Improve handling of debug-trigger HMIs on POWER9
Hypervisor maintenance interrupts (HMIs) are generated by various causes, signalled by bits in the hypervisor maintenance exception register (HMER). In most cases calling OPAL to handle the interrupt is the correct thing to do, but the "debug trigger" HMIs signalled by PPC bit 17 (bit 46) of HMER are used to invoke software workarounds for hardware bugs, and OPAL does not have any code to handle this cause. The debug trigger HMI is used in POWER9 DD2.0 and DD2.1 chips to work around a hardware bug in executing vector load instructions to cache inhibited memory. In POWER9 DD2.2 chips, it is generated when conditions are detected relating to threads being in TM (transactional memory) suspended mode when the core SMT configuration needs to be reconfigured. The kernel currently has code to detect the vector CI load condition, but only when the HMI occurs in the host, not when it occurs in a guest. If a HMI occurs in the guest, it is always passed to OPAL, and then we always re-sync the timebase, because the HMI cause might have been a timebase error, for which OPAL would re-sync the timebase, thus removing the timebase offset which KVM applied for the guest. Since we don't know what OPAL did, we don't know whether to subtract the timebase offset from the timebase, so instead we re-sync the timebase. This adds code to determine explicitly what the cause of a debug trigger HMI will be. This is based on a new device-tree property under the CPU nodes called ibm,hmi-special-triggers, if it is present, or otherwise based on the PVR (processor version register). The handling of debug trigger HMIs is pulled out into a separate function which can be called from the KVM guest exit code. If this function handles and clears the HMI, and no other HMI causes remain, then we skip calling OPAL and we proceed to subtract the guest timebase offset from the timebase. The overall handling for HMIs that occur in the host (i.e. not in a KVM guest) is largely unchanged, except that we now don't set the flag for the vector CI load workaround on DD2.2 processors. This also removes a BUG_ON in the KVM code. BUG_ON is generally not useful in KVM guest entry/exit code since it is difficult to handle the resulting trap gracefully. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/hmi.h4
-rw-r--r--arch/powerpc/include/asm/reg.h5
-rw-r--r--arch/powerpc/kernel/mce.c142
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c8
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S9
5 files changed, 131 insertions, 37 deletions
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 85b7a1a21e22..9c14f7b5c46c 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
42static inline void wait_for_subcore_guest_exit(void) { } 42static inline void wait_for_subcore_guest_exit(void) { }
43static inline void wait_for_tb_resync(void) { } 43static inline void wait_for_tb_resync(void) { }
44#endif 44#endif
45
46struct pt_regs;
47extern long hmi_handle_debugtrig(struct pt_regs *regs);
48
45#endif /* __ASM_PPC64_HMI_H__ */ 49#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b779f3ccd412..14e41b843952 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -432,8 +432,9 @@
432#define SPRN_LPID 0x13F /* Logical Partition Identifier */ 432#define SPRN_LPID 0x13F /* Logical Partition Identifier */
433#endif 433#endif
434#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ 434#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
435#define SPRN_HMER 0x150 /* Hardware m? error recovery */ 435#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
436#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ 436#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
437#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
437#define SPRN_PCR 0x152 /* Processor compatibility register */ 438#define SPRN_PCR 0x152 /* Processor compatibility register */
438#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ 439#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
439#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ 440#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 742e4658c5dc..d2fecaec4fec 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
495 return handled; 495 return handled;
496} 496}
497 497
498long hmi_exception_realmode(struct pt_regs *regs) 498/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
499static enum {
500 DTRIG_UNKNOWN,
501 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
502 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
503} hmer_debug_trig_function;
504
505static int init_debug_trig_function(void)
499{ 506{
500 __this_cpu_inc(irq_stat.hmi_exceptions); 507 int pvr;
501 508 struct device_node *cpun;
502#ifdef CONFIG_PPC_BOOK3S_64 509 struct property *prop = NULL;
503 /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ 510 const char *str;
504 if (pvr_version_is(PVR_POWER9)) { 511
505 unsigned long hmer = mfspr(SPRN_HMER); 512 /* First look in the device tree */
506 513 preempt_disable();
507 /* Do we have the debug bit set */ 514 cpun = of_get_cpu_node(smp_processor_id(), NULL);
508 if (hmer & PPC_BIT(17)) { 515 if (cpun) {
509 hmer &= ~PPC_BIT(17); 516 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
510 mtspr(SPRN_HMER, hmer); 517 prop, str) {
511 518 if (strcmp(str, "bit17-vector-ci-load") == 0)
512 /* 519 hmer_debug_trig_function = DTRIG_VECTOR_CI;
513 * Now to avoid problems with soft-disable we 520 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
514 * only do the emulation if we are coming from 521 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
515 * user space
516 */
517 if (user_mode(regs))
518 local_paca->hmi_p9_special_emu = 1;
519
520 /*
521 * Don't bother going to OPAL if that's the
522 * only relevant bit.
523 */
524 if (!(hmer & mfspr(SPRN_HMEER)))
525 return local_paca->hmi_p9_special_emu;
526 } 522 }
523 of_node_put(cpun);
524 }
525 preempt_enable();
526
527 /* If we found the property, don't look at PVR */
528 if (prop)
529 goto out;
530
531 pvr = mfspr(SPRN_PVR);
532 /* Check for POWER9 Nimbus (scale-out) */
533 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
534 /* DD2.2 and later */
535 if ((pvr & 0xfff) >= 0x202)
536 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
537 /* DD2.0 and DD2.1 - used for vector CI load emulation */
538 else if ((pvr & 0xfff) >= 0x200)
539 hmer_debug_trig_function = DTRIG_VECTOR_CI;
540 }
541
542 out:
543 switch (hmer_debug_trig_function) {
544 case DTRIG_VECTOR_CI:
545 pr_debug("HMI debug trigger used for vector CI load\n");
546 break;
547 case DTRIG_SUSPEND_ESCAPE:
548 pr_debug("HMI debug trigger used for TM suspend escape\n");
549 break;
550 default:
551 break;
527 } 552 }
528#endif /* CONFIG_PPC_BOOK3S_64 */ 553 return 0;
554}
555__initcall(init_debug_trig_function);
556
557/*
558 * Handle HMIs that occur as a result of a debug trigger.
559 * Return values:
560 * -1 means this is not a HMI cause that we know about
561 * 0 means no further handling is required
562 * 1 means further handling is required
563 */
564long hmi_handle_debugtrig(struct pt_regs *regs)
565{
566 unsigned long hmer = mfspr(SPRN_HMER);
567 long ret = 0;
568
569 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
570 if (!((hmer & HMER_DEBUG_TRIG)
571 && hmer_debug_trig_function != DTRIG_UNKNOWN))
572 return -1;
573
574 hmer &= ~HMER_DEBUG_TRIG;
575 /* HMER is a write-AND register */
576 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
577
578 switch (hmer_debug_trig_function) {
579 case DTRIG_VECTOR_CI:
580 /*
581 * Now to avoid problems with soft-disable we
582 * only do the emulation if we are coming from
583 * host user space
584 */
585 if (regs && user_mode(regs))
586 ret = local_paca->hmi_p9_special_emu = 1;
587
588 break;
589
590 default:
591 break;
592 }
593
594 /*
595 * See if any other HMI causes remain to be handled
596 */
597 if (hmer & mfspr(SPRN_HMEER))
598 return -1;
599
600 return ret;
601}
602
603/*
604 * Return values:
605 */
606long hmi_exception_realmode(struct pt_regs *regs)
607{
608 int ret;
609
610 __this_cpu_inc(irq_stat.hmi_exceptions);
611
612 ret = hmi_handle_debugtrig(regs);
613 if (ret >= 0)
614 return ret;
529 615
530 wait_for_subcore_guest_exit(); 616 wait_for_subcore_guest_exit();
531 617
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index c356f9a40b24..c296343d0dcc 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void)
268 * secondary threads to proceed. 268 * secondary threads to proceed.
269 * - All secondary threads will eventually call opal hmi handler on 269 * - All secondary threads will eventually call opal hmi handler on
270 * their exit path. 270 * their exit path.
271 *
272 * Returns 1 if the timebase offset should be applied, 0 if not.
271 */ 273 */
272 274
273long kvmppc_realmode_hmi_handler(void) 275long kvmppc_realmode_hmi_handler(void)
274{ 276{
275 int ptid = local_paca->kvm_hstate.ptid;
276 bool resync_req; 277 bool resync_req;
277 278
278 /* This is only called on primary thread. */
279 BUG_ON(ptid != 0);
280 __this_cpu_inc(irq_stat.hmi_exceptions); 279 __this_cpu_inc(irq_stat.hmi_exceptions);
281 280
281 if (hmi_handle_debugtrig(NULL) >= 0)
282 return 1;
283
282 /* 284 /*
283 * By now primary thread has already completed guest->host 285 * By now primary thread has already completed guest->host
284 * partition switch but haven't signaled secondaries yet. 286 * partition switch but haven't signaled secondaries yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2659844784b8..bd0b623335af 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1909,16 +1909,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1909 bne 27f 1909 bne 27f
1910 bl kvmppc_realmode_hmi_handler 1910 bl kvmppc_realmode_hmi_handler
1911 nop 1911 nop
1912 cmpdi r3, 0
1912 li r12, BOOK3S_INTERRUPT_HMI 1913 li r12, BOOK3S_INTERRUPT_HMI
1913 /* 1914 /*
1914 * At this point kvmppc_realmode_hmi_handler would have resync-ed 1915 * At this point kvmppc_realmode_hmi_handler may have resync-ed
1915 * the TB. Hence it is not required to subtract guest timebase 1916 * the TB, and if it has, we must not subtract the guest timebase
1916 * offset from timebase. So, skip it. 1917 * offset from the timebase. So, skip it.
1917 * 1918 *
1918 * Also, do not call kvmppc_subcore_exit_guest() because it has 1919 * Also, do not call kvmppc_subcore_exit_guest() because it has
1919 * been invoked as part of kvmppc_realmode_hmi_handler(). 1920 * been invoked as part of kvmppc_realmode_hmi_handler().
1920 */ 1921 */
1921 b 30f 1922 beq 30f
1922 1923
192327: 192427:
1924 /* Subtract timebase offset from timebase */ 1925 /* Subtract timebase offset from timebase */