aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kvm/x86.c93
2 files changed, 89 insertions, 5 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd439f13df8..4fbeb84b181 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -423,6 +423,7 @@ struct kvm_vcpu_arch {
423 u64 last_tsc_nsec; 423 u64 last_tsc_nsec;
424 u64 last_tsc_write; 424 u64 last_tsc_write;
425 u64 last_host_tsc; 425 u64 last_host_tsc;
426 u64 tsc_offset_adjustment;
426 bool tsc_catchup; 427 bool tsc_catchup;
427 bool tsc_always_catchup; 428 bool tsc_always_catchup;
428 s8 virtual_tsc_shift; 429 s8 virtual_tsc_shift;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3b931302fa5..4e9bd23d522 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2252,6 +2252,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2252 } 2252 }
2253 2253
2254 kvm_x86_ops->vcpu_load(vcpu, cpu); 2254 kvm_x86_ops->vcpu_load(vcpu, cpu);
2255
2256 /* Apply any externally detected TSC adjustments (due to suspend) */
2257 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2258 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2259 vcpu->arch.tsc_offset_adjustment = 0;
2260 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2261 }
2262
2255 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { 2263 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2256 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : 2264 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2257 native_read_tsc() - vcpu->arch.last_host_tsc; 2265 native_read_tsc() - vcpu->arch.last_host_tsc;
@@ -5964,13 +5972,88 @@ int kvm_arch_hardware_enable(void *garbage)
5964 struct kvm *kvm; 5972 struct kvm *kvm;
5965 struct kvm_vcpu *vcpu; 5973 struct kvm_vcpu *vcpu;
5966 int i; 5974 int i;
5975 int ret;
5976 u64 local_tsc;
5977 u64 max_tsc = 0;
5978 bool stable, backwards_tsc = false;
5967 5979
5968 kvm_shared_msr_cpu_online(); 5980 kvm_shared_msr_cpu_online();
5969 list_for_each_entry(kvm, &vm_list, vm_list) 5981 ret = kvm_x86_ops->hardware_enable(garbage);
5970 kvm_for_each_vcpu(i, vcpu, kvm) 5982 if (ret != 0)
5971 if (vcpu->cpu == smp_processor_id()) 5983 return ret;
5972 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 5984
5973 return kvm_x86_ops->hardware_enable(garbage); 5985 local_tsc = native_read_tsc();
5986 stable = !check_tsc_unstable();
5987 list_for_each_entry(kvm, &vm_list, vm_list) {
5988 kvm_for_each_vcpu(i, vcpu, kvm) {
5989 if (!stable && vcpu->cpu == smp_processor_id())
5990 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
5991 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
5992 backwards_tsc = true;
5993 if (vcpu->arch.last_host_tsc > max_tsc)
5994 max_tsc = vcpu->arch.last_host_tsc;
5995 }
5996 }
5997 }
5998
5999 /*
6000 * Sometimes, even reliable TSCs go backwards. This happens on
6001 * platforms that reset TSC during suspend or hibernate actions, but
6002 * maintain synchronization. We must compensate. Fortunately, we can
6003 * detect that condition here, which happens early in CPU bringup,
6004 * before any KVM threads can be running. Unfortunately, we can't
6005 * bring the TSCs fully up to date with real time, as we aren't yet far
6006 * enough into CPU bringup that we know how much real time has actually
6007 * elapsed; our helper function, get_kernel_ns() will be using boot
6008 * variables that haven't been updated yet.
6009 *
6010 * So we simply find the maximum observed TSC above, then record the
6011 * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
6012 * the adjustment will be applied. Note that we accumulate
6013 * adjustments, in case multiple suspend cycles happen before some VCPU
6014 * gets a chance to run again. In the event that no KVM threads get a
6015 * chance to run, we will miss the entire elapsed period, as we'll have
6016 * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
6017 * loose cycle time. This isn't too big a deal, since the loss will be
6018 * uniform across all VCPUs (not to mention the scenario is extremely
6019 * unlikely). It is possible that a second hibernate recovery happens
6020 * much faster than a first, causing the observed TSC here to be
6021 * smaller; this would require additional padding adjustment, which is
6022 * why we set last_host_tsc to the local tsc observed here.
6023 *
6024 * N.B. - this code below runs only on platforms with reliable TSC,
6025 * as that is the only way backwards_tsc is set above. Also note
6026 * that this runs for ALL vcpus, which is not a bug; all VCPUs should
6027 * have the same delta_cyc adjustment applied if backwards_tsc
6028 * is detected. Note further, this adjustment is only done once,
6029 * as we reset last_host_tsc on all VCPUs to stop this from being
6030 * called multiple times (one for each physical CPU bringup).
6031 *
6032 * Platforms with unnreliable TSCs don't have to deal with this, they
6033 * will be compensated by the logic in vcpu_load, which sets the TSC to
6034 * catchup mode. This will catchup all VCPUs to real time, but cannot
6035 * guarantee that they stay in perfect synchronization.
6036 */
6037 if (backwards_tsc) {
6038 u64 delta_cyc = max_tsc - local_tsc;
6039 list_for_each_entry(kvm, &vm_list, vm_list) {
6040 kvm_for_each_vcpu(i, vcpu, kvm) {
6041 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6042 vcpu->arch.last_host_tsc = local_tsc;
6043 }
6044
6045 /*
6046 * We have to disable TSC offset matching.. if you were
6047 * booting a VM while issuing an S4 host suspend....
6048 * you may have some problem. Solving this issue is
6049 * left as an exercise to the reader.
6050 */
6051 kvm->arch.last_tsc_nsec = 0;
6052 kvm->arch.last_tsc_write = 0;
6053 }
6054
6055 }
6056 return 0;
5974} 6057}
5975 6058
5976void kvm_arch_hardware_disable(void *garbage) 6059void kvm_arch_hardware_disable(void *garbage)