diff options
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 157 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 8 |
4 files changed, 150 insertions, 20 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 32a43a25d415..4b20f7304b9c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -702,6 +702,8 @@ struct kvm_hv { | |||
702 | /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ | 702 | /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ |
703 | u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; | 703 | u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; |
704 | u64 hv_crash_ctl; | 704 | u64 hv_crash_ctl; |
705 | |||
706 | HV_REFERENCE_TSC_PAGE tsc_ref; | ||
705 | }; | 707 | }; |
706 | 708 | ||
707 | struct kvm_arch { | 709 | struct kvm_arch { |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ed5b77f39ffb..42b1c83741c8 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -386,7 +386,21 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic) | |||
386 | 386 | ||
387 | static u64 get_time_ref_counter(struct kvm *kvm) | 387 | static u64 get_time_ref_counter(struct kvm *kvm) |
388 | { | 388 | { |
389 | return div_u64(get_kvmclock_ns(kvm), 100); | 389 | struct kvm_hv *hv = &kvm->arch.hyperv; |
390 | struct kvm_vcpu *vcpu; | ||
391 | u64 tsc; | ||
392 | |||
393 | /* | ||
394 | * The guest has not set up the TSC page or the clock isn't | ||
395 | * stable, fall back to get_kvmclock_ns. | ||
396 | */ | ||
397 | if (!hv->tsc_ref.tsc_sequence) | ||
398 | return div_u64(get_kvmclock_ns(kvm), 100); | ||
399 | |||
400 | vcpu = kvm_get_vcpu(kvm, 0); | ||
401 | tsc = kvm_read_l1_tsc(vcpu, rdtsc()); | ||
402 | return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) | ||
403 | + hv->tsc_ref.tsc_offset; | ||
390 | } | 404 | } |
391 | 405 | ||
392 | static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, | 406 | static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, |
@@ -756,6 +770,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, | |||
756 | return 0; | 770 | return 0; |
757 | } | 771 | } |
758 | 772 | ||
773 | /* | ||
774 | * The kvmclock and Hyper-V TSC page use similar formulas, and converting | ||
775 | * between them is possible: | ||
776 | * | ||
777 | * kvmclock formula: | ||
778 | * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) | ||
779 | * + system_time | ||
780 | * | ||
781 | * Hyper-V formula: | ||
782 | * nsec/100 = ticks * scale / 2^64 + offset | ||
783 | * | ||
784 | * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. | ||
785 | * By dividing the kvmclock formula by 100 and equating what's left we get: | ||
786 | * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 | ||
787 | * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 | ||
788 | * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 | ||
789 | * | ||
790 | * Now expand the kvmclock formula and divide by 100: | ||
791 | * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) | ||
792 | * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) | ||
793 | * + system_time | ||
794 | * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 | ||
795 | * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 | ||
796 | * + system_time / 100 | ||
797 | * | ||
798 | * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: | ||
799 | * nsec/100 = ticks * scale / 2^64 | ||
800 | * - tsc_timestamp * scale / 2^64 | ||
801 | * + system_time / 100 | ||
802 | * | ||
803 | * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: | ||
804 | * offset = system_time / 100 - tsc_timestamp * scale / 2^64 | ||
805 | * | ||
806 | * These two equivalencies are implemented in this function. | ||
807 | */ | ||
808 | static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, | ||
809 | HV_REFERENCE_TSC_PAGE *tsc_ref) | ||
810 | { | ||
811 | u64 max_mul; | ||
812 | |||
813 | if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) | ||
814 | return false; | ||
815 | |||
816 | /* | ||
817 | * check if scale would overflow, if so we use the time ref counter | ||
818 | * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 | ||
819 | * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) | ||
820 | * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) | ||
821 | */ | ||
822 | max_mul = 100ull << (32 - hv_clock->tsc_shift); | ||
823 | if (hv_clock->tsc_to_system_mul >= max_mul) | ||
824 | return false; | ||
825 | |||
826 | /* | ||
827 | * Otherwise compute the scale and offset according to the formulas | ||
828 | * derived above. | ||
829 | */ | ||
830 | tsc_ref->tsc_scale = | ||
831 | mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), | ||
832 | hv_clock->tsc_to_system_mul, | ||
833 | 100); | ||
834 | |||
835 | tsc_ref->tsc_offset = hv_clock->system_time; | ||
836 | do_div(tsc_ref->tsc_offset, 100); | ||
837 | tsc_ref->tsc_offset -= | ||
838 | mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); | ||
839 | return true; | ||
840 | } | ||
841 | |||
842 | void kvm_hv_setup_tsc_page(struct kvm *kvm, | ||
843 | struct pvclock_vcpu_time_info *hv_clock) | ||
844 | { | ||
845 | struct kvm_hv *hv = &kvm->arch.hyperv; | ||
846 | u32 tsc_seq; | ||
847 | u64 gfn; | ||
848 | |||
849 | BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); | ||
850 | BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); | ||
851 | |||
852 | if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) | ||
853 | return; | ||
854 | |||
855 | gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; | ||
856 | /* | ||
857 | * Because the TSC parameters only vary when there is a | ||
858 | * change in the master clock, do not bother with caching. | ||
859 | */ | ||
860 | if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), | ||
861 | &tsc_seq, sizeof(tsc_seq)))) | ||
862 | return; | ||
863 | |||
864 | /* | ||
865 | * While we're computing and writing the parameters, force the | ||
866 | * guest to use the time reference count MSR. | ||
867 | */ | ||
868 | hv->tsc_ref.tsc_sequence = 0; | ||
869 | if (kvm_write_guest(kvm, gfn_to_gpa(gfn), | ||
870 | &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) | ||
871 | return; | ||
872 | |||
873 | if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) | ||
874 | return; | ||
875 | |||
876 | /* Ensure sequence is zero before writing the rest of the struct. */ | ||
877 | smp_wmb(); | ||
878 | if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) | ||
879 | return; | ||
880 | |||
881 | /* | ||
882 | * Now switch to the TSC page mechanism by writing the sequence. | ||
883 | */ | ||
884 | tsc_seq++; | ||
885 | if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) | ||
886 | tsc_seq = 1; | ||
887 | |||
888 | /* Write the struct entirely before the non-zero sequence. */ | ||
889 | smp_wmb(); | ||
890 | |||
891 | hv->tsc_ref.tsc_sequence = tsc_seq; | ||
892 | kvm_write_guest(kvm, gfn_to_gpa(gfn), | ||
893 | &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); | ||
894 | } | ||
895 | |||
759 | static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, | 896 | static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, |
760 | bool host) | 897 | bool host) |
761 | { | 898 | { |
@@ -793,23 +930,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, | |||
793 | mark_page_dirty(kvm, gfn); | 930 | mark_page_dirty(kvm, gfn); |
794 | break; | 931 | break; |
795 | } | 932 | } |
796 | case HV_X64_MSR_REFERENCE_TSC: { | 933 | case HV_X64_MSR_REFERENCE_TSC: |
797 | u64 gfn; | ||
798 | HV_REFERENCE_TSC_PAGE tsc_ref; | ||
799 | |||
800 | memset(&tsc_ref, 0, sizeof(tsc_ref)); | ||
801 | hv->hv_tsc_page = data; | 934 | hv->hv_tsc_page = data; |
802 | if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) | 935 | if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) |
803 | break; | 936 | kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); |
804 | gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; | ||
805 | if (kvm_write_guest( | ||
806 | kvm, | ||
807 | gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, | ||
808 | &tsc_ref, sizeof(tsc_ref))) | ||
809 | return 1; | ||
810 | mark_page_dirty(kvm, gfn); | ||
811 | break; | 937 | break; |
812 | } | ||
813 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: | 938 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: |
814 | return kvm_hv_msr_set_crash_data(vcpu, | 939 | return kvm_hv_msr_set_crash_data(vcpu, |
815 | msr - HV_X64_MSR_CRASH_P0, | 940 | msr - HV_X64_MSR_CRASH_P0, |
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 60eccd4bd1d3..cd1119538add 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h | |||
@@ -84,4 +84,7 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) | |||
84 | 84 | ||
85 | void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); | 85 | void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); |
86 | 86 | ||
87 | void kvm_hv_setup_tsc_page(struct kvm *kvm, | ||
88 | struct pvclock_vcpu_time_info *hv_clock); | ||
89 | |||
87 | #endif | 90 | #endif |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 81e9945cdf28..3ee8a91a78c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1887,10 +1887,10 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1887 | 1887 | ||
1888 | vcpu->hv_clock.flags = pvclock_flags; | 1888 | vcpu->hv_clock.flags = pvclock_flags; |
1889 | 1889 | ||
1890 | if (!vcpu->pv_time_enabled) | 1890 | if (vcpu->pv_time_enabled) |
1891 | return 0; | 1891 | kvm_setup_pvclock_page(v); |
1892 | 1892 | if (v == kvm_get_vcpu(v->kvm, 0)) | |
1893 | kvm_setup_pvclock_page(v); | 1893 | kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); |
1894 | return 0; | 1894 | return 0; |
1895 | } | 1895 | } |
1896 | 1896 | ||