aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorJoao Martins <joao.m.martins@oracle.com>2019-11-11 12:20:11 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2019-11-12 04:17:41 -0500
commit132194ffa138863eac620abb3b6f983278e61b4a (patch)
tree92d81d2894552c7e1424773b4bc9e93ca03b54a0 /arch/x86
parent9482ae458b7ae1b47c76333592bbe013d47e579f (diff)
KVM: VMX: Do not change PID.NDST when loading a blocked vCPU
When vCPU enters block phase, pi_pre_block() inserts vCPU to a per pCPU linked list of all vCPUs that are blocked on this pCPU. Afterwards, it changes PID.NV to POSTED_INTR_WAKEUP_VECTOR which its handler (wakeup_handler()) is responsible to kick (unblock) any vCPU on that linked list that now has pending posted interrupts. While vCPU is blocked (in kvm_vcpu_block()), it may be preempted which will cause vmx_vcpu_pi_put() to set PID.SN. If later the vCPU will be scheduled to run on a different pCPU, vmx_vcpu_pi_load() will clear PID.SN but will also *overwrite PID.NDST to this different pCPU*. Instead of keeping it with original pCPU which vCPU had entered block phase on. This results in an issue because when a posted interrupt is delivered, as the wakeup_handler() will be executed and fail to find blocked vCPU on its per pCPU linked list of all vCPUs that are blocked on this pCPU. Which is due to the vCPU being placed on a *different* per pCPU linked list i.e. the original pCPU in which it entered block phase. The regression is introduced by commit c112b5f50232 ("KVM: x86: Recompute PID.ON when clearing PID.SN"). Therefore, partially revert it and reintroduce the condition in vmx_vcpu_pi_load() responsible for avoiding changing PID.NDST when loading a blocked vCPU. Fixes: c112b5f50232 ("KVM: x86: Recompute PID.ON when clearing PID.SN") Tested-by: Nathan Ni <nathan.ni@oracle.com> Co-developed-by: Liran Alon <liran.alon@oracle.com> Signed-off-by: Liran Alon <liran.alon@oracle.com> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kvm/vmx/vmx.c14
-rw-r--r--arch/x86/kvm/vmx/vmx.h6
2 files changed, 20 insertions, 0 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 623914dc3a3e..54458c5d5a01 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1268,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1268 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) 1268 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
1269 return; 1269 return;
1270 1270
1271 /*
1272 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
1273 * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
1274 * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
1275 * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
1276 * correctly.
1277 */
1278 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
1279 pi_clear_sn(pi_desc);
1280 goto after_clear_sn;
1281 }
1282
1271 /* The full case. */ 1283 /* The full case. */
1272 do { 1284 do {
1273 old.control = new.control = pi_desc->control; 1285 old.control = new.control = pi_desc->control;
@@ -1283,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1283 } while (cmpxchg64(&pi_desc->control, old.control, 1295 } while (cmpxchg64(&pi_desc->control, old.control,
1284 new.control) != old.control); 1296 new.control) != old.control);
1285 1297
1298after_clear_sn:
1299
1286 /* 1300 /*
1287 * Clear SN before reading the bitmap. The VT-d firmware 1301 * Clear SN before reading the bitmap. The VT-d firmware
1288 * writes the bitmap and reads SN atomically (5.2.3 in the 1302 * writes the bitmap and reads SN atomically (5.2.3 in the
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index bee16687dc0b..1e32ab54fc2d 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -373,6 +373,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc)
373 (unsigned long *)&pi_desc->control); 373 (unsigned long *)&pi_desc->control);
374} 374}
375 375
376static inline void pi_clear_sn(struct pi_desc *pi_desc)
377{
378 clear_bit(POSTED_INTR_SN,
379 (unsigned long *)&pi_desc->control);
380}
381
376static inline int pi_test_on(struct pi_desc *pi_desc) 382static inline int pi_test_on(struct pi_desc *pi_desc)
377{ 383{
378 return test_bit(POSTED_INTR_ON, 384 return test_bit(POSTED_INTR_ON,