aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorZachary Amsden <zamsden@gmail.com>2012-02-03 12:43:56 -0500
committerAvi Kivity <avi@redhat.com>2012-03-08 07:10:08 -0500
commit0dd6a6edb0124e6c71931ff575b18e15ed6e8603 (patch)
tree758efc28e830eca71e3912a76315917037fadfee /arch/x86/kvm
parentf1e2b26003c41e581243c09ceed7567677449468 (diff)
KVM: Dont mark TSC unstable due to S4 suspend
During a host suspend, TSC may go backwards, which KVM interprets as an unstable TSC. Technically, KVM should not be marking the TSC unstable, which causes the TSC clocksource to go bad, but we need to be adjusting the TSC offsets in such a case. Dealing with this issue is a little tricky as the only place we can reliably do it is before much of the timekeeping infrastructure is up and running. On top of this, we are not in a KVM thread context, so we may not be able to safely access VCPU fields. Instead, we compute our best known hardware offset at power-up and stash it to be applied to all VCPUs when they actually start running. Signed-off-by: Zachary Amsden <zamsden@gmail.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/x86.c93
1 files changed, 88 insertions, 5 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3b931302fa55..4e9bd23d522d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2252,6 +2252,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2252 } 2252 }
2253 2253
2254 kvm_x86_ops->vcpu_load(vcpu, cpu); 2254 kvm_x86_ops->vcpu_load(vcpu, cpu);
2255
2256 /* Apply any externally detected TSC adjustments (due to suspend) */
2257 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2258 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2259 vcpu->arch.tsc_offset_adjustment = 0;
2260 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2261 }
2262
2255 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { 2263 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2256 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : 2264 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2257 native_read_tsc() - vcpu->arch.last_host_tsc; 2265 native_read_tsc() - vcpu->arch.last_host_tsc;
@@ -5964,13 +5972,88 @@ int kvm_arch_hardware_enable(void *garbage)
5964 struct kvm *kvm; 5972 struct kvm *kvm;
5965 struct kvm_vcpu *vcpu; 5973 struct kvm_vcpu *vcpu;
5966 int i; 5974 int i;
5975 int ret;
5976 u64 local_tsc;
5977 u64 max_tsc = 0;
5978 bool stable, backwards_tsc = false;
5967 5979
5968 kvm_shared_msr_cpu_online(); 5980 kvm_shared_msr_cpu_online();
5969 list_for_each_entry(kvm, &vm_list, vm_list) 5981 ret = kvm_x86_ops->hardware_enable(garbage);
5970 kvm_for_each_vcpu(i, vcpu, kvm) 5982 if (ret != 0)
5971 if (vcpu->cpu == smp_processor_id()) 5983 return ret;
5972 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 5984
5973 return kvm_x86_ops->hardware_enable(garbage); 5985 local_tsc = native_read_tsc();
5986 stable = !check_tsc_unstable();
5987 list_for_each_entry(kvm, &vm_list, vm_list) {
5988 kvm_for_each_vcpu(i, vcpu, kvm) {
5989 if (!stable && vcpu->cpu == smp_processor_id())
5990 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
5991 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
5992 backwards_tsc = true;
5993 if (vcpu->arch.last_host_tsc > max_tsc)
5994 max_tsc = vcpu->arch.last_host_tsc;
5995 }
5996 }
5997 }
5998
5999 /*
6000 * Sometimes, even reliable TSCs go backwards. This happens on
6001 * platforms that reset TSC during suspend or hibernate actions, but
6002 * maintain synchronization. We must compensate. Fortunately, we can
6003 * detect that condition here, which happens early in CPU bringup,
6004 * before any KVM threads can be running. Unfortunately, we can't
6005 * bring the TSCs fully up to date with real time, as we aren't yet far
6006 * enough into CPU bringup that we know how much real time has actually
6007 * elapsed; our helper function, get_kernel_ns() will be using boot
6008 * variables that haven't been updated yet.
6009 *
6010 * So we simply find the maximum observed TSC above, then record the
6011 * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
6012 * the adjustment will be applied. Note that we accumulate
6013 * adjustments, in case multiple suspend cycles happen before some VCPU
6014 * gets a chance to run again. In the event that no KVM threads get a
6015 * chance to run, we will miss the entire elapsed period, as we'll have
6016 * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
6017 * loose cycle time. This isn't too big a deal, since the loss will be
6018 * uniform across all VCPUs (not to mention the scenario is extremely
6019 * unlikely). It is possible that a second hibernate recovery happens
6020 * much faster than a first, causing the observed TSC here to be
6021 * smaller; this would require additional padding adjustment, which is
6022 * why we set last_host_tsc to the local tsc observed here.
6023 *
6024 * N.B. - this code below runs only on platforms with reliable TSC,
6025 * as that is the only way backwards_tsc is set above. Also note
6026 * that this runs for ALL vcpus, which is not a bug; all VCPUs should
6027 * have the same delta_cyc adjustment applied if backwards_tsc
6028 * is detected. Note further, this adjustment is only done once,
6029 * as we reset last_host_tsc on all VCPUs to stop this from being
6030 * called multiple times (one for each physical CPU bringup).
6031 *
6032 * Platforms with unnreliable TSCs don't have to deal with this, they
6033 * will be compensated by the logic in vcpu_load, which sets the TSC to
6034 * catchup mode. This will catchup all VCPUs to real time, but cannot
6035 * guarantee that they stay in perfect synchronization.
6036 */
6037 if (backwards_tsc) {
6038 u64 delta_cyc = max_tsc - local_tsc;
6039 list_for_each_entry(kvm, &vm_list, vm_list) {
6040 kvm_for_each_vcpu(i, vcpu, kvm) {
6041 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6042 vcpu->arch.last_host_tsc = local_tsc;
6043 }
6044
6045 /*
6046 * We have to disable TSC offset matching.. if you were
6047 * booting a VM while issuing an S4 host suspend....
6048 * you may have some problem. Solving this issue is
6049 * left as an exercise to the reader.
6050 */
6051 kvm->arch.last_tsc_nsec = 0;
6052 kvm->arch.last_tsc_write = 0;
6053 }
6054
6055 }
6056 return 0;
5974} 6057}
5975 6058
5976void kvm_arch_hardware_disable(void *garbage) 6059void kvm_arch_hardware_disable(void *garbage)