aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2015-03-25 07:08:14 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2015-04-27 09:48:59 -0400
commit5dca0d9147458be9b9363b8a484aa77d710b412a (patch)
treec1614d2156f3d2b28ecc003e81cee39e8513f023 /arch/x86/kvm
parentb787f68c36d49bb1d9236f403813641efa74a031 (diff)
kvm: x86: fix kvmclock update protocol
The kvmclock spec says that the host will increment a version field to an odd number, then update stuff, then increment it to an even number. The host is buggy and doesn't do this, and the result is observable when one vcpu reads another vcpu's kvmclock data. There's no good way for a guest kernel to keep its vdso from reading a different vcpu's kvmclock data, but we don't need to care about changing VCPUs as long as we read a consistent data from kvmclock. (VCPU can change outside of this loop too, so it doesn't matter if we return a value not fit for this VCPU.) Based on a patch by Radim Krčmář. Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> Acked-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/x86.c33
1 files changed, 28 insertions, 5 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ed31c31b2485..c73efcd03e29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1669,12 +1669,28 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1669 &guest_hv_clock, sizeof(guest_hv_clock)))) 1669 &guest_hv_clock, sizeof(guest_hv_clock))))
1670 return 0; 1670 return 0;
1671 1671
1672 /* 1672 /* This VCPU is paused, but it's legal for a guest to read another
1673 * The interface expects us to write an even number signaling that the 1673 * VCPU's kvmclock, so we really have to follow the specification where
1674 * update is finished. Since the guest won't see the intermediate 1674 * it says that version is odd if data is being modified, and even after
1675 * state, we just increase by 2 at the end. 1675 * it is consistent.
1676 *
1677 * Version field updates must be kept separate. This is because
1678 * kvm_write_guest_cached might use a "rep movs" instruction, and
1679 * writes within a string instruction are weakly ordered. So there
1680 * are three writes overall.
1681 *
1682 * As a small optimization, only write the version field in the first
1683 * and third write. The vcpu->pv_time cache is still valid, because the
1684 * version field is the first in the struct.
1676 */ 1685 */
1677 vcpu->hv_clock.version = guest_hv_clock.version + 2; 1686 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
1687
1688 vcpu->hv_clock.version = guest_hv_clock.version + 1;
1689 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1690 &vcpu->hv_clock,
1691 sizeof(vcpu->hv_clock.version));
1692
1693 smp_wmb();
1678 1694
1679 /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ 1695 /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
1680 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); 1696 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1695,6 +1711,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1695 kvm_write_guest_cached(v->kvm, &vcpu->pv_time, 1711 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1696 &vcpu->hv_clock, 1712 &vcpu->hv_clock,
1697 sizeof(vcpu->hv_clock)); 1713 sizeof(vcpu->hv_clock));
1714
1715 smp_wmb();
1716
1717 vcpu->hv_clock.version++;
1718 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1719 &vcpu->hv_clock,
1720 sizeof(vcpu->hv_clock.version));
1698 return 0; 1721 return 0;
1699} 1722}
1700 1723