aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2015-12-10 22:20:18 -0500
committerIngo Molnar <mingo@kernel.org>2015-12-11 02:56:02 -0500
commit677a73a9aa5433ea728200c26a7b3506d5eaa92b (patch)
treeaefd2afa048807a3b79cea05bf7971f31e512568
parent478dc89cf316697e8029411a64ea2b30c528434d (diff)
x86/kvm: On KVM re-enable (e.g. after suspend), update clocks
This gets rid of the "did TSC go backwards" logic and just updates all clocks. It should work better (no more disabling of fast timing) and more reliably (all of the clocks are actually updated). Signed-off-by: Andy Lutomirski <luto@kernel.org> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/861716d768a1da6d1fd257b7972f8df13baf7f85.1449702533.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kvm/x86.c75
1 files changed, 3 insertions, 72 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00462bd63129..6e32e87d6af6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -123,8 +123,6 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
123unsigned int __read_mostly lapic_timer_advance_ns = 0; 123unsigned int __read_mostly lapic_timer_advance_ns = 0;
124module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); 124module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
125 125
126static bool __read_mostly backwards_tsc_observed = false;
127
128#define KVM_NR_SHARED_MSRS 16 126#define KVM_NR_SHARED_MSRS 16
129 127
130struct kvm_shared_msrs_global { 128struct kvm_shared_msrs_global {
@@ -1671,7 +1669,6 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1671 &ka->master_cycle_now); 1669 &ka->master_cycle_now);
1672 1670
1673 ka->use_master_clock = host_tsc_clocksource && vcpus_matched 1671 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
1674 && !backwards_tsc_observed
1675 && !ka->boot_vcpu_runs_old_kvmclock; 1672 && !ka->boot_vcpu_runs_old_kvmclock;
1676 1673
1677 if (ka->use_master_clock) 1674 if (ka->use_master_clock)
@@ -7366,88 +7363,22 @@ int kvm_arch_hardware_enable(void)
7366 struct kvm_vcpu *vcpu; 7363 struct kvm_vcpu *vcpu;
7367 int i; 7364 int i;
7368 int ret; 7365 int ret;
7369 u64 local_tsc;
7370 u64 max_tsc = 0;
7371 bool stable, backwards_tsc = false;
7372 7366
7373 kvm_shared_msr_cpu_online(); 7367 kvm_shared_msr_cpu_online();
7374 ret = kvm_x86_ops->hardware_enable(); 7368 ret = kvm_x86_ops->hardware_enable();
7375 if (ret != 0) 7369 if (ret != 0)
7376 return ret; 7370 return ret;
7377 7371
7378 local_tsc = rdtsc();
7379 stable = !check_tsc_unstable();
7380 list_for_each_entry(kvm, &vm_list, vm_list) { 7372 list_for_each_entry(kvm, &vm_list, vm_list) {
7381 kvm_for_each_vcpu(i, vcpu, kvm) { 7373 kvm_for_each_vcpu(i, vcpu, kvm) {
7382 if (!stable && vcpu->cpu == smp_processor_id()) 7374 if (vcpu->cpu == smp_processor_id()) {
7383 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 7375 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7384 if (stable && vcpu->arch.last_host_tsc > local_tsc) { 7376 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE,
7385 backwards_tsc = true; 7377 vcpu);
7386 if (vcpu->arch.last_host_tsc > max_tsc)
7387 max_tsc = vcpu->arch.last_host_tsc;
7388 } 7378 }
7389 } 7379 }
7390 } 7380 }
7391 7381
7392 /*
7393 * Sometimes, even reliable TSCs go backwards. This happens on
7394 * platforms that reset TSC during suspend or hibernate actions, but
7395 * maintain synchronization. We must compensate. Fortunately, we can
7396 * detect that condition here, which happens early in CPU bringup,
7397 * before any KVM threads can be running. Unfortunately, we can't
7398 * bring the TSCs fully up to date with real time, as we aren't yet far
7399 * enough into CPU bringup that we know how much real time has actually
7400 * elapsed; our helper function, get_kernel_ns() will be using boot
7401 * variables that haven't been updated yet.
7402 *
7403 * So we simply find the maximum observed TSC above, then record the
7404 * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
7405 * the adjustment will be applied. Note that we accumulate
7406 * adjustments, in case multiple suspend cycles happen before some VCPU
7407 * gets a chance to run again. In the event that no KVM threads get a
7408 * chance to run, we will miss the entire elapsed period, as we'll have
7409 * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
7410 * loose cycle time. This isn't too big a deal, since the loss will be
7411 * uniform across all VCPUs (not to mention the scenario is extremely
7412 * unlikely). It is possible that a second hibernate recovery happens
7413 * much faster than a first, causing the observed TSC here to be
7414 * smaller; this would require additional padding adjustment, which is
7415 * why we set last_host_tsc to the local tsc observed here.
7416 *
7417 * N.B. - this code below runs only on platforms with reliable TSC,
7418 * as that is the only way backwards_tsc is set above. Also note
7419 * that this runs for ALL vcpus, which is not a bug; all VCPUs should
7420 * have the same delta_cyc adjustment applied if backwards_tsc
7421 * is detected. Note further, this adjustment is only done once,
7422 * as we reset last_host_tsc on all VCPUs to stop this from being
7423 * called multiple times (one for each physical CPU bringup).
7424 *
7425 * Platforms with unreliable TSCs don't have to deal with this, they
7426 * will be compensated by the logic in vcpu_load, which sets the TSC to
7427 * catchup mode. This will catchup all VCPUs to real time, but cannot
7428 * guarantee that they stay in perfect synchronization.
7429 */
7430 if (backwards_tsc) {
7431 u64 delta_cyc = max_tsc - local_tsc;
7432 backwards_tsc_observed = true;
7433 list_for_each_entry(kvm, &vm_list, vm_list) {
7434 kvm_for_each_vcpu(i, vcpu, kvm) {
7435 vcpu->arch.tsc_offset_adjustment += delta_cyc;
7436 vcpu->arch.last_host_tsc = local_tsc;
7437 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7438 }
7439
7440 /*
7441 * We have to disable TSC offset matching.. if you were
7442 * booting a VM while issuing an S4 host suspend....
7443 * you may have some problem. Solving this issue is
7444 * left as an exercise to the reader.
7445 */
7446 kvm->arch.last_tsc_nsec = 0;
7447 kvm->arch.last_tsc_write = 0;
7448 }
7449
7450 }
7451 return 0; 7382 return 0;
7452} 7383}
7453 7384