KVM: x86: fix tsc catchup issue with tsc scaling

To fix a problem related to different resolution of TSC and system clock, the offset in TSC units is approximated by delta = vcpu->hv_clock.tsc_timestamp - vcpu->last_guest_tsc (Guest TSC value at (Guest TSC value at last VM-exit) the last kvm_guest_time_update call) Delta is then later scaled using mult,shift pair found in hv_clock structure (which is correct against tsc_timestamp in that structure). However, if a frequency change is performed between these two points, this delta is measured using different TSC frequencies, but scaled using mult,shift pair for one frequency only. The end result is an incorrect delta. The bug which this code works around is not the only cause for clock backwards events. The global accumulator is still necessary, so remove the max_kernel_ns fix and rely on the global accumulator for no clock backwards events. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Marcelo Tosatti <mtosatti@redhat.com> 2014-01-06 09:18:59 -0500
committer: Paolo Bonzini <pbonzini@redhat.com> 2014-01-15 07:44:46 -0500
commit: f25e656d31ad112612839edaded18920cafea3b1 (patch)
tree: e88286a2bc521b4b86915e1519f5555a8576fb21 /arch/x86/kvm/x86.c
parent: 9ed96e87c5748de4c2807ef17e81287c7304186c (diff)
1 files changed, 1 insertions, 40 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0fd2bd78fccf..842abd33e9b5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1487,7 +1487,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        unsigned long flags, this_tsc_khz;
        struct kvm_vcpu_arch *vcpu = &v->arch;
        struct kvm_arch *ka = &v->kvm->arch;
-        s64 kernel_ns, max_kernel_ns;
+        s64 kernel_ns;
        u64 tsc_timestamp, host_tsc;
        struct pvclock_vcpu_time_info guest_hv_clock;
        u8 pvclock_flags;
@@ -1546,37 +1546,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        if (!vcpu->pv_time_enabled)
                return 0;
-        /*
-         * Time as measured by the TSC may go backwards when resetting the base
-         * tsc_timestamp.  The reason for this is that the TSC resolution is
-         * higher than the resolution of the other clock scales.  Thus, many
-         * possible measurments of the TSC correspond to one measurement of any
-         * other clock, and so a spread of values is possible.  This is not a
-         * problem for the computation of the nanosecond clock; with TSC rates
-         * around 1GHZ, there can only be a few cycles which correspond to one
-         * nanosecond value, and any path through this code will inevitably
-         * take longer than that.  However, with the kernel_ns value itself,
-         * the precision may be much lower, down to HZ granularity.  If the
-         * first sampling of TSC against kernel_ns ends in the low part of the
-         * range, and the second in the high end of the range, we can get:
-         *
-         * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
-         *
-         * As the sampling errors potentially range in the thousands of cycles,
-         * it is possible such a time value has already been observed by the
-         * guest.  To protect against this, we must compute the system time as
-         * observed by the guest and ensure the new system time is greater.
-         */
-        max_kernel_ns = 0;
-        if (vcpu->hv_clock.tsc_timestamp) {
-                max_kernel_ns = vcpu->last_guest_tsc -
-                                vcpu->hv_clock.tsc_timestamp;
-                max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
-                                    vcpu->hv_clock.tsc_to_system_mul,
-                                    vcpu->hv_clock.tsc_shift);
-                max_kernel_ns += vcpu->last_kernel_ns;
-        }
        if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
                kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
                                   &vcpu->hv_clock.tsc_shift,
@@ -1584,14 +1553,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
                vcpu->hw_tsc_khz = this_tsc_khz;
        }
-        /* with a master <monotonic time, tsc value> tuple,
-         * pvclock clock reads always increase at the (scaled) rate
-         * of guest TSC - no need to deal with sampling errors.
-         */
-        if (!use_master_clock) {
-                if (max_kernel_ns > kernel_ns)
-                        kernel_ns = max_kernel_ns;
-        }
        /* With all the info we got, fill in the values */
        vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
        vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
author	Marcelo Tosatti <mtosatti@redhat.com>	2014-01-06 09:18:59 -0500
committer	Paolo Bonzini <pbonzini@redhat.com>	2014-01-15 07:44:46 -0500
commit	f25e656d31ad112612839edaded18920cafea3b1 (patch)
tree	e88286a2bc521b4b86915e1519f5555a8576fb21 /arch/x86/kvm/x86.c
parent	9ed96e87c5748de4c2807ef17e81287c7304186c (diff)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0fd2bd78fccf..842abd33e9b5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c
@@ -1487,7 +1487,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1487	unsigned long flags, this_tsc_khz;	1487	unsigned long flags, this_tsc_khz;
1488	struct kvm_vcpu_arch *vcpu = &v->arch;	1488	struct kvm_vcpu_arch *vcpu = &v->arch;
1489	struct kvm_arch *ka = &v->kvm->arch;	1489	struct kvm_arch *ka = &v->kvm->arch;
1490	s64 kernel_ns, max_kernel_ns;	1490	s64 kernel_ns;
1491	u64 tsc_timestamp, host_tsc;	1491	u64 tsc_timestamp, host_tsc;
1492	struct pvclock_vcpu_time_info guest_hv_clock;	1492	struct pvclock_vcpu_time_info guest_hv_clock;
1493	u8 pvclock_flags;	1493	u8 pvclock_flags;
@@ -1546,37 +1546,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1546	if (!vcpu->pv_time_enabled)	1546	if (!vcpu->pv_time_enabled)
1547	return 0;	1547	return 0;
1548		1548
1549	/*
1550	* Time as measured by the TSC may go backwards when resetting the base
1551	* tsc_timestamp. The reason for this is that the TSC resolution is
1552	* higher than the resolution of the other clock scales. Thus, many
1553	* possible measurments of the TSC correspond to one measurement of any
1554	* other clock, and so a spread of values is possible. This is not a
1555	* problem for the computation of the nanosecond clock; with TSC rates
1556	* around 1GHZ, there can only be a few cycles which correspond to one
1557	* nanosecond value, and any path through this code will inevitably
1558	* take longer than that. However, with the kernel_ns value itself,
1559	* the precision may be much lower, down to HZ granularity. If the
1560	* first sampling of TSC against kernel_ns ends in the low part of the
1561	* range, and the second in the high end of the range, we can get:
1562	*
1563	* (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
1564	*
1565	* As the sampling errors potentially range in the thousands of cycles,
1566	* it is possible such a time value has already been observed by the
1567	* guest. To protect against this, we must compute the system time as
1568	* observed by the guest and ensure the new system time is greater.
1569	*/
1570	max_kernel_ns = 0;
1571	if (vcpu->hv_clock.tsc_timestamp) {
1572	max_kernel_ns = vcpu->last_guest_tsc -
1573	vcpu->hv_clock.tsc_timestamp;
1574	max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1575	vcpu->hv_clock.tsc_to_system_mul,
1576	vcpu->hv_clock.tsc_shift);
1577	max_kernel_ns += vcpu->last_kernel_ns;
1578	}
1579
1580	if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {	1549	if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1581	kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,	1550	kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1582	&vcpu->hv_clock.tsc_shift,	1551	&vcpu->hv_clock.tsc_shift,
@@ -1584,14 +1553,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1584	vcpu->hw_tsc_khz = this_tsc_khz;	1553	vcpu->hw_tsc_khz = this_tsc_khz;
1585	}	1554	}
1586		1555
1587	/* with a master <monotonic time, tsc value> tuple,
1588	* pvclock clock reads always increase at the (scaled) rate
1589	* of guest TSC - no need to deal with sampling errors.
1590	*/
1591	if (!use_master_clock) {
1592	if (max_kernel_ns > kernel_ns)
1593	kernel_ns = max_kernel_ns;
1594	}
1595	/* With all the info we got, fill in the values */	1556	/* With all the info we got, fill in the values */
1596	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;	1557	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1597	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;	1558	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;