diff options
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 44 |
2 files changed, 44 insertions, 2 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5ab1c3fb34ef..789e9462668f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -339,6 +339,8 @@ struct kvm_vcpu_arch { | |||
339 | unsigned int time_offset; | 339 | unsigned int time_offset; |
340 | struct page *time_page; | 340 | struct page *time_page; |
341 | u64 last_host_tsc; | 341 | u64 last_host_tsc; |
342 | u64 last_guest_tsc; | ||
343 | u64 last_kernel_ns; | ||
342 | 344 | ||
343 | bool nmi_pending; | 345 | bool nmi_pending; |
344 | bool nmi_injected; | 346 | bool nmi_injected; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0764a258047..d4d33f943d99 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/mce.h> | 55 | #include <asm/mce.h> |
56 | #include <asm/i387.h> | 56 | #include <asm/i387.h> |
57 | #include <asm/xcr.h> | 57 | #include <asm/xcr.h> |
58 | #include <asm/pvclock.h> | ||
58 | 59 | ||
59 | #define MAX_IO_MSRS 256 | 60 | #define MAX_IO_MSRS 256 |
60 | #define CR0_RESERVED_BITS \ | 61 | #define CR0_RESERVED_BITS \ |
@@ -976,14 +977,15 @@ static int kvm_write_guest_time(struct kvm_vcpu *v) | |||
976 | struct kvm_vcpu_arch *vcpu = &v->arch; | 977 | struct kvm_vcpu_arch *vcpu = &v->arch; |
977 | void *shared_kaddr; | 978 | void *shared_kaddr; |
978 | unsigned long this_tsc_khz; | 979 | unsigned long this_tsc_khz; |
979 | s64 kernel_ns; | 980 | s64 kernel_ns, max_kernel_ns; |
981 | u64 tsc_timestamp; | ||
980 | 982 | ||
981 | if ((!vcpu->time_page)) | 983 | if ((!vcpu->time_page)) |
982 | return 0; | 984 | return 0; |
983 | 985 | ||
984 | /* Keep irq disabled to prevent changes to the clock */ | 986 | /* Keep irq disabled to prevent changes to the clock */ |
985 | local_irq_save(flags); | 987 | local_irq_save(flags); |
986 | kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); | 988 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); |
987 | kernel_ns = get_kernel_ns(); | 989 | kernel_ns = get_kernel_ns(); |
988 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); | 990 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); |
989 | local_irq_restore(flags); | 991 | local_irq_restore(flags); |
@@ -993,13 +995,49 @@ static int kvm_write_guest_time(struct kvm_vcpu *v) | |||
993 | return 1; | 995 | return 1; |
994 | } | 996 | } |
995 | 997 | ||
998 | /* | ||
999 | * Time as measured by the TSC may go backwards when resetting the base | ||
1000 | * tsc_timestamp. The reason for this is that the TSC resolution is | ||
1001 | * higher than the resolution of the other clock scales. Thus, many | ||
1002 | * possible measurments of the TSC correspond to one measurement of any | ||
1003 | * other clock, and so a spread of values is possible. This is not a | ||
1004 | * problem for the computation of the nanosecond clock; with TSC rates | ||
1005 | * around 1GHZ, there can only be a few cycles which correspond to one | ||
1006 | * nanosecond value, and any path through this code will inevitably | ||
1007 | * take longer than that. However, with the kernel_ns value itself, | ||
1008 | * the precision may be much lower, down to HZ granularity. If the | ||
1009 | * first sampling of TSC against kernel_ns ends in the low part of the | ||
1010 | * range, and the second in the high end of the range, we can get: | ||
1011 | * | ||
1012 | * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new | ||
1013 | * | ||
1014 | * As the sampling errors potentially range in the thousands of cycles, | ||
1015 | * it is possible such a time value has already been observed by the | ||
1016 | * guest. To protect against this, we must compute the system time as | ||
1017 | * observed by the guest and ensure the new system time is greater. | ||
1018 | */ | ||
1019 | max_kernel_ns = 0; | ||
1020 | if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) { | ||
1021 | max_kernel_ns = vcpu->last_guest_tsc - | ||
1022 | vcpu->hv_clock.tsc_timestamp; | ||
1023 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, | ||
1024 | vcpu->hv_clock.tsc_to_system_mul, | ||
1025 | vcpu->hv_clock.tsc_shift); | ||
1026 | max_kernel_ns += vcpu->last_kernel_ns; | ||
1027 | } | ||
1028 | |||
996 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { | 1029 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { |
997 | kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); | 1030 | kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); |
998 | vcpu->hw_tsc_khz = this_tsc_khz; | 1031 | vcpu->hw_tsc_khz = this_tsc_khz; |
999 | } | 1032 | } |
1000 | 1033 | ||
1034 | if (max_kernel_ns > kernel_ns) | ||
1035 | kernel_ns = max_kernel_ns; | ||
1036 | |||
1001 | /* With all the info we got, fill in the values */ | 1037 | /* With all the info we got, fill in the values */ |
1038 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; | ||
1002 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; | 1039 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; |
1040 | vcpu->last_kernel_ns = kernel_ns; | ||
1003 | vcpu->hv_clock.flags = 0; | 1041 | vcpu->hv_clock.flags = 0; |
1004 | 1042 | ||
1005 | /* | 1043 | /* |
@@ -4931,6 +4969,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
4931 | if (hw_breakpoint_active()) | 4969 | if (hw_breakpoint_active()) |
4932 | hw_breakpoint_restore(); | 4970 | hw_breakpoint_restore(); |
4933 | 4971 | ||
4972 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | ||
4973 | |||
4934 | atomic_set(&vcpu->guest_mode, 0); | 4974 | atomic_set(&vcpu->guest_mode, 0); |
4935 | smp_wmb(); | 4975 | smp_wmb(); |
4936 | local_irq_enable(); | 4976 | local_irq_enable(); |