aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2014-01-06 09:18:59 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2014-01-15 07:44:46 -0500
commitf25e656d31ad112612839edaded18920cafea3b1 (patch)
treee88286a2bc521b4b86915e1519f5555a8576fb21 /arch/x86/kvm/x86.c
parent9ed96e87c5748de4c2807ef17e81287c7304186c (diff)
KVM: x86: fix tsc catchup issue with tsc scaling
To fix a problem related to different resolution of TSC and system clock, the offset in TSC units is approximated by delta = vcpu->hv_clock.tsc_timestamp - vcpu->last_guest_tsc (Guest TSC value at (Guest TSC value at last VM-exit) the last kvm_guest_time_update call) Delta is then later scaled using mult,shift pair found in hv_clock structure (which is correct against tsc_timestamp in that structure). However, if a frequency change is performed between these two points, this delta is measured using different TSC frequencies, but scaled using mult,shift pair for one frequency only. The end result is an incorrect delta. The bug which this code works around is not the only cause for clock backwards events. The global accumulator is still necessary, so remove the max_kernel_ns fix and rely on the global accumulator for no clock backwards events. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c41
1 files changed, 1 insertions, 40 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0fd2bd78fccf..842abd33e9b5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1487,7 +1487,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1487 unsigned long flags, this_tsc_khz; 1487 unsigned long flags, this_tsc_khz;
1488 struct kvm_vcpu_arch *vcpu = &v->arch; 1488 struct kvm_vcpu_arch *vcpu = &v->arch;
1489 struct kvm_arch *ka = &v->kvm->arch; 1489 struct kvm_arch *ka = &v->kvm->arch;
1490 s64 kernel_ns, max_kernel_ns; 1490 s64 kernel_ns;
1491 u64 tsc_timestamp, host_tsc; 1491 u64 tsc_timestamp, host_tsc;
1492 struct pvclock_vcpu_time_info guest_hv_clock; 1492 struct pvclock_vcpu_time_info guest_hv_clock;
1493 u8 pvclock_flags; 1493 u8 pvclock_flags;
@@ -1546,37 +1546,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1546 if (!vcpu->pv_time_enabled) 1546 if (!vcpu->pv_time_enabled)
1547 return 0; 1547 return 0;
1548 1548
1549 /*
1550 * Time as measured by the TSC may go backwards when resetting the base
1551 * tsc_timestamp. The reason for this is that the TSC resolution is
1552 * higher than the resolution of the other clock scales. Thus, many
1553 * possible measurments of the TSC correspond to one measurement of any
1554 * other clock, and so a spread of values is possible. This is not a
1555 * problem for the computation of the nanosecond clock; with TSC rates
1556 * around 1GHZ, there can only be a few cycles which correspond to one
1557 * nanosecond value, and any path through this code will inevitably
1558 * take longer than that. However, with the kernel_ns value itself,
1559 * the precision may be much lower, down to HZ granularity. If the
1560 * first sampling of TSC against kernel_ns ends in the low part of the
1561 * range, and the second in the high end of the range, we can get:
1562 *
1563 * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
1564 *
1565 * As the sampling errors potentially range in the thousands of cycles,
1566 * it is possible such a time value has already been observed by the
1567 * guest. To protect against this, we must compute the system time as
1568 * observed by the guest and ensure the new system time is greater.
1569 */
1570 max_kernel_ns = 0;
1571 if (vcpu->hv_clock.tsc_timestamp) {
1572 max_kernel_ns = vcpu->last_guest_tsc -
1573 vcpu->hv_clock.tsc_timestamp;
1574 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1575 vcpu->hv_clock.tsc_to_system_mul,
1576 vcpu->hv_clock.tsc_shift);
1577 max_kernel_ns += vcpu->last_kernel_ns;
1578 }
1579
1580 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { 1549 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1581 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, 1550 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1582 &vcpu->hv_clock.tsc_shift, 1551 &vcpu->hv_clock.tsc_shift,
@@ -1584,14 +1553,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1584 vcpu->hw_tsc_khz = this_tsc_khz; 1553 vcpu->hw_tsc_khz = this_tsc_khz;
1585 } 1554 }
1586 1555
1587 /* with a master <monotonic time, tsc value> tuple,
1588 * pvclock clock reads always increase at the (scaled) rate
1589 * of guest TSC - no need to deal with sampling errors.
1590 */
1591 if (!use_master_clock) {
1592 if (max_kernel_ns > kernel_ns)
1593 kernel_ns = max_kernel_ns;
1594 }
1595 /* With all the info we got, fill in the values */ 1556 /* With all the info we got, fill in the values */
1596 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1557 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1597 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1558 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;