aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorZachary Amsden <zamsden@gmail.com>2012-02-03 12:43:50 -0500
committerAvi Kivity <avi@redhat.com>2012-03-08 07:09:35 -0500
commitcc578287e3224d0da196cc1d226bdae6b068faa7 (patch)
treec4352ebbd4d35de296622a8be99d76a1a6a48793 /arch/x86/kvm
parenta59cb29e4d81e025192550c2703f305637f016f6 (diff)
KVM: Infrastructure for software and hardware based TSC rate scaling
This requires some restructuring; rather than use 'virtual_tsc_khz' to indicate whether hardware rate scaling is in effect, we consider each VCPU to always have a virtual TSC rate. Instead, there is new logic above the vendor-specific hardware scaling that decides whether it is even necessary to use and updates all rate variables used by common code. This means we can simply query the virtual rate at any point, which is needed for software rate scaling. There is also now a threshold added to the TSC rate scaling; minor differences and variations of measured TSC rate can accidentally provoke rate scaling to be used when it is not needed. Instead, we have a tolerance variable called tsc_tolerance_ppm, which is the maximum variation from user requested rate at which scaling will be used. The default is 250ppm, which is the half the threshold for NTP adjustment, allowing for some hardware variation. In the event that hardware rate scaling is not available, we can kludge a bit by forcing TSC catchup to turn on when a faster than hardware speed has been requested, but there is nothing available yet for the reverse case; this requires a trap and emulate software implementation for RDTSC, which is still forthcoming. [avi: fix 64-bit division on i386] Signed-off-by: Zachary Amsden <zamsden@gmail.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/svm.c20
-rw-r--r--arch/x86/kvm/vmx.c16
-rw-r--r--arch/x86/kvm/x86.c82
4 files changed, 66 insertions, 54 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3ee1d83c695d..72975f758c83 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -731,7 +731,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
731 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 731 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
732 u64 ns = 0; 732 u64 ns = 0;
733 struct kvm_vcpu *vcpu = apic->vcpu; 733 struct kvm_vcpu *vcpu = apic->vcpu;
734 unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); 734 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
735 unsigned long flags; 735 unsigned long flags;
736 736
737 if (unlikely(!tscdeadline || !this_tsc_khz)) 737 if (unlikely(!tscdeadline || !this_tsc_khz))
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7bbd17cc3488..e12026e5244e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -964,20 +964,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
964 return _tsc; 964 return _tsc;
965} 965}
966 966
967static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 967static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
968{ 968{
969 struct vcpu_svm *svm = to_svm(vcpu); 969 struct vcpu_svm *svm = to_svm(vcpu);
970 u64 ratio; 970 u64 ratio;
971 u64 khz; 971 u64 khz;
972 972
973 /* TSC scaling supported? */ 973 /* Guest TSC same frequency as host TSC? */
974 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) 974 if (!scale) {
975 svm->tsc_ratio = TSC_RATIO_DEFAULT;
975 return; 976 return;
977 }
976 978
977 /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ 979 /* TSC scaling supported? */
978 if (user_tsc_khz == 0) { 980 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
979 vcpu->arch.virtual_tsc_khz = 0; 981 if (user_tsc_khz > tsc_khz) {
980 svm->tsc_ratio = TSC_RATIO_DEFAULT; 982 vcpu->arch.tsc_catchup = 1;
983 vcpu->arch.tsc_always_catchup = 1;
984 } else
985 WARN(1, "user requested TSC rate below hardware speed\n");
981 return; 986 return;
982 } 987 }
983 988
@@ -992,7 +997,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
992 user_tsc_khz); 997 user_tsc_khz);
993 return; 998 return;
994 } 999 }
995 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
996 svm->tsc_ratio = ratio; 1000 svm->tsc_ratio = ratio;
997} 1001}
998 1002
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3b4c8d8ad906..e6bf61fa1c03 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1817,13 +1817,19 @@ u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
1817} 1817}
1818 1818
1819/* 1819/*
1820 * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ 1820 * Engage any workarounds for mis-matched TSC rates. Currently limited to
1821 * ioctl. In this case the call-back should update internal vmx state to make 1821 * software catchup for faster rates on slower CPUs.
1822 * the changes effective.
1823 */ 1822 */
1824static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 1823static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1825{ 1824{
1826 /* Nothing to do here */ 1825 if (!scale)
1826 return;
1827
1828 if (user_tsc_khz > tsc_khz) {
1829 vcpu->arch.tsc_catchup = 1;
1830 vcpu->arch.tsc_always_catchup = 1;
1831 } else
1832 WARN(1, "user requested TSC rate below hardware speed\n");
1827} 1833}
1828 1834
1829/* 1835/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2bd77a3a41ed..41bb90acb238 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -96,6 +96,10 @@ EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
96u32 kvm_max_guest_tsc_khz; 96u32 kvm_max_guest_tsc_khz;
97EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); 97EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
98 98
99/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
100static u32 tsc_tolerance_ppm = 250;
101module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
102
99#define KVM_NR_SHARED_MSRS 16 103#define KVM_NR_SHARED_MSRS 16
100 104
101struct kvm_shared_msrs_global { 105struct kvm_shared_msrs_global {
@@ -968,49 +972,50 @@ static inline u64 get_kernel_ns(void)
968static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 972static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
969unsigned long max_tsc_khz; 973unsigned long max_tsc_khz;
970 974
971static inline int kvm_tsc_changes_freq(void) 975static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
972{ 976{
973 int cpu = get_cpu(); 977 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
974 int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && 978 vcpu->arch.virtual_tsc_shift);
975 cpufreq_quick_get(cpu) != 0;
976 put_cpu();
977 return ret;
978} 979}
979 980
980u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) 981static u32 adjust_tsc_khz(u32 khz, s32 ppm)
981{ 982{
982 if (vcpu->arch.virtual_tsc_khz) 983 u64 v = (u64)khz * (1000000 + ppm);
983 return vcpu->arch.virtual_tsc_khz; 984 do_div(v, 1000000);
984 else 985 return v;
985 return __this_cpu_read(cpu_tsc_khz);
986} 986}
987 987
988static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) 988static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
989{ 989{
990 u64 ret; 990 u32 thresh_lo, thresh_hi;
991 int use_scaling = 0;
991 992
992 WARN_ON(preemptible());
993 if (kvm_tsc_changes_freq())
994 printk_once(KERN_WARNING
995 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
996 ret = nsec * vcpu_tsc_khz(vcpu);
997 do_div(ret, USEC_PER_SEC);
998 return ret;
999}
1000
1001static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1002{
1003 /* Compute a scale to convert nanoseconds in TSC cycles */ 993 /* Compute a scale to convert nanoseconds in TSC cycles */
1004 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, 994 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1005 &vcpu->arch.tsc_catchup_shift, 995 &vcpu->arch.virtual_tsc_shift,
1006 &vcpu->arch.tsc_catchup_mult); 996 &vcpu->arch.virtual_tsc_mult);
997 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
998
999 /*
1000 * Compute the variation in TSC rate which is acceptable
1001 * within the range of tolerance and decide if the
1002 * rate being applied is within that bounds of the hardware
1003 * rate. If so, no scaling or compensation need be done.
1004 */
1005 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1006 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1007 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1008 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1009 use_scaling = 1;
1010 }
1011 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1007} 1012}
1008 1013
1009static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) 1014static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1010{ 1015{
1011 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, 1016 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
1012 vcpu->arch.tsc_catchup_mult, 1017 vcpu->arch.virtual_tsc_mult,
1013 vcpu->arch.tsc_catchup_shift); 1018 vcpu->arch.virtual_tsc_shift);
1014 tsc += vcpu->arch.last_tsc_write; 1019 tsc += vcpu->arch.last_tsc_write;
1015 return tsc; 1020 return tsc;
1016} 1021}
@@ -1077,7 +1082,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1077 local_irq_save(flags); 1082 local_irq_save(flags);
1078 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); 1083 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1079 kernel_ns = get_kernel_ns(); 1084 kernel_ns = get_kernel_ns();
1080 this_tsc_khz = vcpu_tsc_khz(v); 1085 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1081 if (unlikely(this_tsc_khz == 0)) { 1086 if (unlikely(this_tsc_khz == 0)) {
1082 local_irq_restore(flags); 1087 local_irq_restore(flags);
1083 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); 1088 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -2804,26 +2809,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2804 u32 user_tsc_khz; 2809 u32 user_tsc_khz;
2805 2810
2806 r = -EINVAL; 2811 r = -EINVAL;
2807 if (!kvm_has_tsc_control)
2808 break;
2809
2810 user_tsc_khz = (u32)arg; 2812 user_tsc_khz = (u32)arg;
2811 2813
2812 if (user_tsc_khz >= kvm_max_guest_tsc_khz) 2814 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
2813 goto out; 2815 goto out;
2814 2816
2815 kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); 2817 if (user_tsc_khz == 0)
2818 user_tsc_khz = tsc_khz;
2819
2820 kvm_set_tsc_khz(vcpu, user_tsc_khz);
2816 2821
2817 r = 0; 2822 r = 0;
2818 goto out; 2823 goto out;
2819 } 2824 }
2820 case KVM_GET_TSC_KHZ: { 2825 case KVM_GET_TSC_KHZ: {
2821 r = -EIO; 2826 r = vcpu->arch.virtual_tsc_khz;
2822 if (check_tsc_unstable())
2823 goto out;
2824
2825 r = vcpu_tsc_khz(vcpu);
2826
2827 goto out; 2827 goto out;
2828 } 2828 }
2829 default: 2829 default:
@@ -5312,6 +5312,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5312 profile_hit(KVM_PROFILING, (void *)rip); 5312 profile_hit(KVM_PROFILING, (void *)rip);
5313 } 5313 }
5314 5314
5315 if (unlikely(vcpu->arch.tsc_always_catchup))
5316 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5315 5317
5316 kvm_lapic_sync_from_vapic(vcpu); 5318 kvm_lapic_sync_from_vapic(vcpu);
5317 5319
@@ -6004,7 +6006,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6004 } 6006 }
6005 vcpu->arch.pio_data = page_address(page); 6007 vcpu->arch.pio_data = page_address(page);
6006 6008
6007 kvm_init_tsc_catchup(vcpu, max_tsc_khz); 6009 kvm_set_tsc_khz(vcpu, max_tsc_khz);
6008 6010
6009 r = kvm_mmu_create(vcpu); 6011 r = kvm_mmu_create(vcpu);
6010 if (r < 0) 6012 if (r < 0)