aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2016-09-01 08:21:03 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2016-09-20 03:26:15 -0400
commit108b249c453dd7132599ab6dc7e435a7036c193f (patch)
treeeb600c0558d7032bc1b0c62066f74ac109bd4fd7
parent67198ac3f37ffb150f1c95fae16b597339eabc9d (diff)
KVM: x86: introduce get_kvmclock_ns
Introduce a function that reads the exact nanoseconds value that is provided to the guest in kvmclock. This crystallizes the notion of kvmclock as a thin veneer over a stable TSC, that the guest will (hopefully) convert with NTP. In other words, kvmclock is *not* a paravirtualized host-to-guest NTP. Drop the get_kernel_ns() function, that was used both to get the base value of the master clock and to get the current value of kvmclock. The former use is replaced by ktime_get_boot_ns(), the latter is the purpose of get_kernel_ns(). This also allows KVM to provide a Hyper-V time reference counter that is synchronized with the time that is computed from the TSC page. Reviewed-by: Roman Kagan <rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c2
-rw-r--r--arch/x86/include/asm/pvclock.h5
-rw-r--r--arch/x86/kernel/pvclock.c2
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/kvm/x86.h6
6 files changed, 43 insertions, 22 deletions
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 94d54d0defa7..02223cb4bcfd 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -129,7 +129,7 @@ static notrace cycle_t vread_pvclock(int *mode)
129 return 0; 129 return 0;
130 } 130 }
131 131
132 ret = __pvclock_read_cycles(pvti); 132 ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
133 } while (pvclock_read_retry(pvti, version)); 133 } while (pvclock_read_retry(pvti, version));
134 134
135 /* refer to vread_tsc() comment for rationale */ 135 /* refer to vread_tsc() comment for rationale */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d019f0cc80ec..3ad741b84072 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -87,9 +87,10 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
87} 87}
88 88
89static __always_inline 89static __always_inline
90cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src) 90cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
91 u64 tsc)
91{ 92{
92 u64 delta = rdtsc_ordered() - src->tsc_timestamp; 93 u64 delta = tsc - src->tsc_timestamp;
93 cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, 94 cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
94 src->tsc_shift); 95 src->tsc_shift);
95 return src->system_time + offset; 96 return src->system_time + offset;
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 3599404e3089..5b2cc889ce34 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -80,7 +80,7 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
80 80
81 do { 81 do {
82 version = pvclock_read_begin(src); 82 version = pvclock_read_begin(src);
83 ret = __pvclock_read_cycles(src); 83 ret = __pvclock_read_cycles(src, rdtsc_ordered());
84 flags = src->flags; 84 flags = src->flags;
85 } while (pvclock_read_retry(src, version)); 85 } while (pvclock_read_retry(src, version));
86 86
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 01bd7b7a6866..ed5b77f39ffb 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -386,7 +386,7 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic)
386 386
387static u64 get_time_ref_counter(struct kvm *kvm) 387static u64 get_time_ref_counter(struct kvm *kvm)
388{ 388{
389 return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); 389 return div_u64(get_kvmclock_ns(kvm), 100);
390} 390}
391 391
392static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 392static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00e569c3ca71..81e9945cdf28 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1431,7 +1431,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1431 1431
1432 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1432 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1433 offset = kvm_compute_tsc_offset(vcpu, data); 1433 offset = kvm_compute_tsc_offset(vcpu, data);
1434 ns = get_kernel_ns(); 1434 ns = ktime_get_boot_ns();
1435 elapsed = ns - kvm->arch.last_tsc_nsec; 1435 elapsed = ns - kvm->arch.last_tsc_nsec;
1436 1436
1437 if (vcpu->arch.virtual_tsc_khz) { 1437 if (vcpu->arch.virtual_tsc_khz) {
@@ -1722,6 +1722,34 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
1722#endif 1722#endif
1723} 1723}
1724 1724
1725static u64 __get_kvmclock_ns(struct kvm *kvm)
1726{
1727 struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
1728 struct kvm_arch *ka = &kvm->arch;
1729 s64 ns;
1730
1731 if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
1732 u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1733 ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
1734 } else {
1735 ns = ktime_get_boot_ns() + ka->kvmclock_offset;
1736 }
1737
1738 return ns;
1739}
1740
1741u64 get_kvmclock_ns(struct kvm *kvm)
1742{
1743 unsigned long flags;
1744 s64 ns;
1745
1746 local_irq_save(flags);
1747 ns = __get_kvmclock_ns(kvm);
1748 local_irq_restore(flags);
1749
1750 return ns;
1751}
1752
1725static void kvm_setup_pvclock_page(struct kvm_vcpu *v) 1753static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
1726{ 1754{
1727 struct kvm_vcpu_arch *vcpu = &v->arch; 1755 struct kvm_vcpu_arch *vcpu = &v->arch;
@@ -1811,7 +1839,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1811 } 1839 }
1812 if (!use_master_clock) { 1840 if (!use_master_clock) {
1813 host_tsc = rdtsc(); 1841 host_tsc = rdtsc();
1814 kernel_ns = get_kernel_ns(); 1842 kernel_ns = ktime_get_boot_ns();
1815 } 1843 }
1816 1844
1817 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); 1845 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@ -4054,7 +4082,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
4054 case KVM_SET_CLOCK: { 4082 case KVM_SET_CLOCK: {
4055 struct kvm_clock_data user_ns; 4083 struct kvm_clock_data user_ns;
4056 u64 now_ns; 4084 u64 now_ns;
4057 s64 delta;
4058 4085
4059 r = -EFAULT; 4086 r = -EFAULT;
4060 if (copy_from_user(&user_ns, argp, sizeof(user_ns))) 4087 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
@@ -4066,10 +4093,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
4066 4093
4067 r = 0; 4094 r = 0;
4068 local_irq_disable(); 4095 local_irq_disable();
4069 now_ns = get_kernel_ns(); 4096 now_ns = __get_kvmclock_ns(kvm);
4070 delta = user_ns.clock - now_ns; 4097 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
4071 local_irq_enable(); 4098 local_irq_enable();
4072 kvm->arch.kvmclock_offset = delta;
4073 kvm_gen_update_masterclock(kvm); 4099 kvm_gen_update_masterclock(kvm);
4074 break; 4100 break;
4075 } 4101 }
@@ -4077,10 +4103,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
4077 struct kvm_clock_data user_ns; 4103 struct kvm_clock_data user_ns;
4078 u64 now_ns; 4104 u64 now_ns;
4079 4105
4080 local_irq_disable(); 4106 now_ns = get_kvmclock_ns(kvm);
4081 now_ns = get_kernel_ns(); 4107 user_ns.clock = now_ns;
4082 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
4083 local_irq_enable();
4084 user_ns.flags = 0; 4108 user_ns.flags = 0;
4085 memset(&user_ns.pad, 0, sizeof(user_ns.pad)); 4109 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
4086 4110
@@ -7544,7 +7568,7 @@ int kvm_arch_hardware_enable(void)
7544 * before any KVM threads can be running. Unfortunately, we can't 7568 * before any KVM threads can be running. Unfortunately, we can't
7545 * bring the TSCs fully up to date with real time, as we aren't yet far 7569 * bring the TSCs fully up to date with real time, as we aren't yet far
7546 * enough into CPU bringup that we know how much real time has actually 7570 * enough into CPU bringup that we know how much real time has actually
7547 * elapsed; our helper function, get_kernel_ns() will be using boot 7571 * elapsed; our helper function, ktime_get_boot_ns() will be using boot
7548 * variables that haven't been updated yet. 7572 * variables that haven't been updated yet.
7549 * 7573 *
7550 * So we simply find the maximum observed TSC above, then record the 7574 * So we simply find the maximum observed TSC above, then record the
@@ -7779,7 +7803,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7779 mutex_init(&kvm->arch.apic_map_lock); 7803 mutex_init(&kvm->arch.apic_map_lock);
7780 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); 7804 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
7781 7805
7782 kvm->arch.kvmclock_offset = -get_kernel_ns(); 7806 kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
7783 pvclock_update_vm_gtod_copy(kvm); 7807 pvclock_update_vm_gtod_copy(kvm);
7784 7808
7785 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); 7809 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a82ca466b62e..e8ff3e4ce38a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -148,11 +148,6 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
148 return kvm_register_write(vcpu, reg, val); 148 return kvm_register_write(vcpu, reg, val);
149} 149}
150 150
151static inline u64 get_kernel_ns(void)
152{
153 return ktime_get_boot_ns();
154}
155
156static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) 151static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
157{ 152{
158 return !(kvm->arch.disabled_quirks & quirk); 153 return !(kvm->arch.disabled_quirks & quirk);
@@ -164,6 +159,7 @@ void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
164int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); 159int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
165 160
166void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); 161void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
162u64 get_kvmclock_ns(struct kvm *kvm);
167 163
168int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, 164int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
169 gva_t addr, void *val, unsigned int bytes, 165 gva_t addr, void *val, unsigned int bytes,