diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2016-09-01 08:21:03 -0400 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2016-09-20 03:26:15 -0400 |
| commit | 108b249c453dd7132599ab6dc7e435a7036c193f (patch) | |
| tree | eb600c0558d7032bc1b0c62066f74ac109bd4fd7 | |
| parent | 67198ac3f37ffb150f1c95fae16b597339eabc9d (diff) | |
KVM: x86: introduce get_kvmclock_ns
Introduce a function that reads the exact nanoseconds value that is
provided to the guest in kvmclock. This crystallizes the notion of
kvmclock as a thin veneer over a stable TSC, that the guest will
(hopefully) convert with NTP. In other words, kvmclock is *not* a
paravirtualized host-to-guest NTP.
Drop the get_kernel_ns() function, that was used both to get the base
value of the master clock and to get the current value of kvmclock.
The former use is replaced by ktime_get_boot_ns(), the latter is
the purpose of get_kernel_ns().
This also allows KVM to provide a Hyper-V time reference counter that
is synchronized with the time that is computed from the TSC page.
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
| -rw-r--r-- | arch/x86/entry/vdso/vclock_gettime.c | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/pvclock.h | 5 | ||||
| -rw-r--r-- | arch/x86/kernel/pvclock.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/hyperv.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 48 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.h | 6 |
6 files changed, 43 insertions, 22 deletions
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 94d54d0defa7..02223cb4bcfd 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c | |||
| @@ -129,7 +129,7 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
| 129 | return 0; | 129 | return 0; |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | ret = __pvclock_read_cycles(pvti); | 132 | ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); |
| 133 | } while (pvclock_read_retry(pvti, version)); | 133 | } while (pvclock_read_retry(pvti, version)); |
| 134 | 134 | ||
| 135 | /* refer to vread_tsc() comment for rationale */ | 135 | /* refer to vread_tsc() comment for rationale */ |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d019f0cc80ec..3ad741b84072 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
| @@ -87,9 +87,10 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) | |||
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | static __always_inline | 89 | static __always_inline |
| 90 | cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src) | 90 | cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, |
| 91 | u64 tsc) | ||
| 91 | { | 92 | { |
| 92 | u64 delta = rdtsc_ordered() - src->tsc_timestamp; | 93 | u64 delta = tsc - src->tsc_timestamp; |
| 93 | cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, | 94 | cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, |
| 94 | src->tsc_shift); | 95 | src->tsc_shift); |
| 95 | return src->system_time + offset; | 96 | return src->system_time + offset; |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 3599404e3089..5b2cc889ce34 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
| @@ -80,7 +80,7 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | |||
| 80 | 80 | ||
| 81 | do { | 81 | do { |
| 82 | version = pvclock_read_begin(src); | 82 | version = pvclock_read_begin(src); |
| 83 | ret = __pvclock_read_cycles(src); | 83 | ret = __pvclock_read_cycles(src, rdtsc_ordered()); |
| 84 | flags = src->flags; | 84 | flags = src->flags; |
| 85 | } while (pvclock_read_retry(src, version)); | 85 | } while (pvclock_read_retry(src, version)); |
| 86 | 86 | ||
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 01bd7b7a6866..ed5b77f39ffb 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
| @@ -386,7 +386,7 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic) | |||
| 386 | 386 | ||
| 387 | static u64 get_time_ref_counter(struct kvm *kvm) | 387 | static u64 get_time_ref_counter(struct kvm *kvm) |
| 388 | { | 388 | { |
| 389 | return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); | 389 | return div_u64(get_kvmclock_ns(kvm), 100); |
| 390 | } | 390 | } |
| 391 | 391 | ||
| 392 | static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, | 392 | static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00e569c3ca71..81e9945cdf28 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -1431,7 +1431,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
| 1431 | 1431 | ||
| 1432 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1432 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
| 1433 | offset = kvm_compute_tsc_offset(vcpu, data); | 1433 | offset = kvm_compute_tsc_offset(vcpu, data); |
| 1434 | ns = get_kernel_ns(); | 1434 | ns = ktime_get_boot_ns(); |
| 1435 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1435 | elapsed = ns - kvm->arch.last_tsc_nsec; |
| 1436 | 1436 | ||
| 1437 | if (vcpu->arch.virtual_tsc_khz) { | 1437 | if (vcpu->arch.virtual_tsc_khz) { |
| @@ -1722,6 +1722,34 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
| 1722 | #endif | 1722 | #endif |
| 1723 | } | 1723 | } |
| 1724 | 1724 | ||
| 1725 | static u64 __get_kvmclock_ns(struct kvm *kvm) | ||
| 1726 | { | ||
| 1727 | struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0); | ||
| 1728 | struct kvm_arch *ka = &kvm->arch; | ||
| 1729 | s64 ns; | ||
| 1730 | |||
| 1731 | if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) { | ||
| 1732 | u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); | ||
| 1733 | ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc); | ||
| 1734 | } else { | ||
| 1735 | ns = ktime_get_boot_ns() + ka->kvmclock_offset; | ||
| 1736 | } | ||
| 1737 | |||
| 1738 | return ns; | ||
| 1739 | } | ||
| 1740 | |||
| 1741 | u64 get_kvmclock_ns(struct kvm *kvm) | ||
| 1742 | { | ||
| 1743 | unsigned long flags; | ||
| 1744 | s64 ns; | ||
| 1745 | |||
| 1746 | local_irq_save(flags); | ||
| 1747 | ns = __get_kvmclock_ns(kvm); | ||
| 1748 | local_irq_restore(flags); | ||
| 1749 | |||
| 1750 | return ns; | ||
| 1751 | } | ||
| 1752 | |||
| 1725 | static void kvm_setup_pvclock_page(struct kvm_vcpu *v) | 1753 | static void kvm_setup_pvclock_page(struct kvm_vcpu *v) |
| 1726 | { | 1754 | { |
| 1727 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1755 | struct kvm_vcpu_arch *vcpu = &v->arch; |
| @@ -1811,7 +1839,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
| 1811 | } | 1839 | } |
| 1812 | if (!use_master_clock) { | 1840 | if (!use_master_clock) { |
| 1813 | host_tsc = rdtsc(); | 1841 | host_tsc = rdtsc(); |
| 1814 | kernel_ns = get_kernel_ns(); | 1842 | kernel_ns = ktime_get_boot_ns(); |
| 1815 | } | 1843 | } |
| 1816 | 1844 | ||
| 1817 | tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); | 1845 | tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); |
| @@ -4054,7 +4082,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 4054 | case KVM_SET_CLOCK: { | 4082 | case KVM_SET_CLOCK: { |
| 4055 | struct kvm_clock_data user_ns; | 4083 | struct kvm_clock_data user_ns; |
| 4056 | u64 now_ns; | 4084 | u64 now_ns; |
| 4057 | s64 delta; | ||
| 4058 | 4085 | ||
| 4059 | r = -EFAULT; | 4086 | r = -EFAULT; |
| 4060 | if (copy_from_user(&user_ns, argp, sizeof(user_ns))) | 4087 | if (copy_from_user(&user_ns, argp, sizeof(user_ns))) |
| @@ -4066,10 +4093,9 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 4066 | 4093 | ||
| 4067 | r = 0; | 4094 | r = 0; |
| 4068 | local_irq_disable(); | 4095 | local_irq_disable(); |
| 4069 | now_ns = get_kernel_ns(); | 4096 | now_ns = __get_kvmclock_ns(kvm); |
| 4070 | delta = user_ns.clock - now_ns; | 4097 | kvm->arch.kvmclock_offset += user_ns.clock - now_ns; |
| 4071 | local_irq_enable(); | 4098 | local_irq_enable(); |
| 4072 | kvm->arch.kvmclock_offset = delta; | ||
| 4073 | kvm_gen_update_masterclock(kvm); | 4099 | kvm_gen_update_masterclock(kvm); |
| 4074 | break; | 4100 | break; |
| 4075 | } | 4101 | } |
| @@ -4077,10 +4103,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 4077 | struct kvm_clock_data user_ns; | 4103 | struct kvm_clock_data user_ns; |
| 4078 | u64 now_ns; | 4104 | u64 now_ns; |
| 4079 | 4105 | ||
| 4080 | local_irq_disable(); | 4106 | now_ns = get_kvmclock_ns(kvm); |
| 4081 | now_ns = get_kernel_ns(); | 4107 | user_ns.clock = now_ns; |
| 4082 | user_ns.clock = kvm->arch.kvmclock_offset + now_ns; | ||
| 4083 | local_irq_enable(); | ||
| 4084 | user_ns.flags = 0; | 4108 | user_ns.flags = 0; |
| 4085 | memset(&user_ns.pad, 0, sizeof(user_ns.pad)); | 4109 | memset(&user_ns.pad, 0, sizeof(user_ns.pad)); |
| 4086 | 4110 | ||
| @@ -7544,7 +7568,7 @@ int kvm_arch_hardware_enable(void) | |||
| 7544 | * before any KVM threads can be running. Unfortunately, we can't | 7568 | * before any KVM threads can be running. Unfortunately, we can't |
| 7545 | * bring the TSCs fully up to date with real time, as we aren't yet far | 7569 | * bring the TSCs fully up to date with real time, as we aren't yet far |
| 7546 | * enough into CPU bringup that we know how much real time has actually | 7570 | * enough into CPU bringup that we know how much real time has actually |
| 7547 | * elapsed; our helper function, get_kernel_ns() will be using boot | 7571 | * elapsed; our helper function, ktime_get_boot_ns() will be using boot |
| 7548 | * variables that haven't been updated yet. | 7572 | * variables that haven't been updated yet. |
| 7549 | * | 7573 | * |
| 7550 | * So we simply find the maximum observed TSC above, then record the | 7574 | * So we simply find the maximum observed TSC above, then record the |
| @@ -7779,7 +7803,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 7779 | mutex_init(&kvm->arch.apic_map_lock); | 7803 | mutex_init(&kvm->arch.apic_map_lock); |
| 7780 | spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); | 7804 | spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); |
| 7781 | 7805 | ||
| 7782 | kvm->arch.kvmclock_offset = -get_kernel_ns(); | 7806 | kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); |
| 7783 | pvclock_update_vm_gtod_copy(kvm); | 7807 | pvclock_update_vm_gtod_copy(kvm); |
| 7784 | 7808 | ||
| 7785 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); | 7809 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a82ca466b62e..e8ff3e4ce38a 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -148,11 +148,6 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu, | |||
| 148 | return kvm_register_write(vcpu, reg, val); | 148 | return kvm_register_write(vcpu, reg, val); |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | static inline u64 get_kernel_ns(void) | ||
| 152 | { | ||
| 153 | return ktime_get_boot_ns(); | ||
| 154 | } | ||
| 155 | |||
| 156 | static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) | 151 | static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) |
| 157 | { | 152 | { |
| 158 | return !(kvm->arch.disabled_quirks & quirk); | 153 | return !(kvm->arch.disabled_quirks & quirk); |
| @@ -164,6 +159,7 @@ void kvm_set_pending_timer(struct kvm_vcpu *vcpu); | |||
| 164 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); | 159 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); |
| 165 | 160 | ||
| 166 | void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); | 161 | void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); |
| 162 | u64 get_kvmclock_ns(struct kvm *kvm); | ||
| 167 | 163 | ||
| 168 | int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, | 164 | int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, |
| 169 | gva_t addr, void *val, unsigned int bytes, | 165 | gva_t addr, void *val, unsigned int bytes, |
