aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2014-06-11 04:48:21 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-06-11 05:15:15 -0400
commit74845bc2fa9c0e6b218821cd4e1eb7a552d3e503 (patch)
treeea880beba8879b6c97dbea43bbfe530b56f48e40 /arch
parente6654d5b4259317be82b06cf9218f82abec8c8e7 (diff)
powerpc/book3s: Fix guest MC delivery mechanism to avoid soft lockups in guest.
Currently we forward MCEs to guest which have been recovered by guest. And for unhandled errors we do not deliver the MCE to guest. It looks like with no support of FWNMI in qemu, guest just panics whenever we deliver the recovered MCEs to guest. Also, the existig code used to return to host for unhandled errors which was casuing guest to hang with soft lockups inside guest and makes it difficult to recover guest instance. This patch now forwards all fatal MCEs to guest causing guest to crash/panic. And, for recovered errors we just go back to normal functioning of guest instead of returning to host. This fixes soft lockup issues in guest. This patch also fixes an issue where guest MCE events were not logged to host console. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c15
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S19
2 files changed, 23 insertions, 11 deletions
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 768a9f977c00..3a5c568b1e89 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -113,10 +113,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
113 * We assume that if the condition is recovered then linux host 113 * We assume that if the condition is recovered then linux host
114 * will have generated an error log event that we will pick 114 * will have generated an error log event that we will pick
115 * up and log later. 115 * up and log later.
116 * Don't release mce event now. In case if condition is not 116 * Don't release mce event now. We will queue up the event so that
117 * recovered we do guest exit and go back to linux host machine 117 * we can log the MCE event info on host console.
118 * check handler. Hence we need make sure that current mce event
119 * is available for linux host to consume.
120 */ 118 */
121 if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) 119 if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
122 goto out; 120 goto out;
@@ -128,11 +126,12 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
128 126
129out: 127out:
130 /* 128 /*
131 * If we have handled the error, then release the mce event because 129 * We are now going enter guest either through machine check
132 * we will be delivering machine check to guest. 130 * interrupt (for unhandled errors) or will continue from
131 * current HSRR0 (for handled errors) in guest. Hence
132 * queue up the event so that we can log it from host console later.
133 */ 133 */
134 if (handled) 134 machine_check_queue_event();
135 release_mce_event();
136 135
137 return handled; 136 return handled;
138} 137}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 77356fd25ccc..868347ef09fd 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2257,15 +2257,28 @@ machine_check_realmode:
2257 mr r3, r9 /* get vcpu pointer */ 2257 mr r3, r9 /* get vcpu pointer */
2258 bl kvmppc_realmode_machine_check 2258 bl kvmppc_realmode_machine_check
2259 nop 2259 nop
2260 cmpdi r3, 0 /* continue exiting from guest? */ 2260 cmpdi r3, 0 /* Did we handle MCE ? */
2261 ld r9, HSTATE_KVM_VCPU(r13) 2261 ld r9, HSTATE_KVM_VCPU(r13)
2262 li r12, BOOK3S_INTERRUPT_MACHINE_CHECK 2262 li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
2263 beq mc_cont 2263 /*
2264 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
2265 * machine check interrupt (set HSRR0 to 0x200). And for handled
2266 * errors (no-fatal), just go back to guest execution with current
2267 * HSRR0 instead of exiting guest. This new approach will inject
2268 * machine check to guest for fatal error causing guest to crash.
2269 *
2270 * The old code used to return to host for unhandled errors which
2271 * was causing guest to hang with soft lockups inside guest and
2272 * makes it difficult to recover guest instance.
2273 */
2274 ld r10, VCPU_PC(r9)
2275 ld r11, VCPU_MSR(r9)
2276 bne 2f /* Continue guest execution. */
2264 /* If not, deliver a machine check. SRR0/1 are already set */ 2277 /* If not, deliver a machine check. SRR0/1 are already set */
2265 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK 2278 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
2266 ld r11, VCPU_MSR(r9) 2279 ld r11, VCPU_MSR(r9)
2267 bl kvmppc_msr_interrupt 2280 bl kvmppc_msr_interrupt
2268 b fast_interrupt_c_return 22812: b fast_interrupt_c_return
2269 2282
2270/* 2283/*
2271 * Check the reason we woke from nap, and take appropriate action. 2284 * Check the reason we woke from nap, and take appropriate action.