diff options
-rw-r--r-- | arch/x86/kvm/x86.c | 157 |
1 files changed, 114 insertions, 43 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e7da14c317e6..699c6b89c1b4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -895,6 +895,15 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info * | |||
895 | 895 | ||
896 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); | 896 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); |
897 | 897 | ||
898 | static inline int kvm_tsc_changes_freq(void) | ||
899 | { | ||
900 | int cpu = get_cpu(); | ||
901 | int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && | ||
902 | cpufreq_quick_get(cpu) != 0; | ||
903 | put_cpu(); | ||
904 | return ret; | ||
905 | } | ||
906 | |||
898 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | 907 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) |
899 | { | 908 | { |
900 | struct kvm *kvm = vcpu->kvm; | 909 | struct kvm *kvm = vcpu->kvm; |
@@ -940,7 +949,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
940 | } | 949 | } |
941 | EXPORT_SYMBOL_GPL(kvm_write_tsc); | 950 | EXPORT_SYMBOL_GPL(kvm_write_tsc); |
942 | 951 | ||
943 | static void kvm_write_guest_time(struct kvm_vcpu *v) | 952 | static int kvm_write_guest_time(struct kvm_vcpu *v) |
944 | { | 953 | { |
945 | struct timespec ts; | 954 | struct timespec ts; |
946 | unsigned long flags; | 955 | unsigned long flags; |
@@ -949,24 +958,27 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
949 | unsigned long this_tsc_khz; | 958 | unsigned long this_tsc_khz; |
950 | 959 | ||
951 | if ((!vcpu->time_page)) | 960 | if ((!vcpu->time_page)) |
952 | return; | 961 | return 0; |
953 | |||
954 | this_tsc_khz = get_cpu_var(cpu_tsc_khz); | ||
955 | if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) { | ||
956 | kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); | ||
957 | vcpu->hv_clock_tsc_khz = this_tsc_khz; | ||
958 | } | ||
959 | put_cpu_var(cpu_tsc_khz); | ||
960 | 962 | ||
961 | /* Keep irq disabled to prevent changes to the clock */ | 963 | /* Keep irq disabled to prevent changes to the clock */ |
962 | local_irq_save(flags); | 964 | local_irq_save(flags); |
963 | kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); | 965 | kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); |
964 | ktime_get_ts(&ts); | 966 | ktime_get_ts(&ts); |
965 | monotonic_to_bootbased(&ts); | 967 | monotonic_to_bootbased(&ts); |
968 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); | ||
966 | local_irq_restore(flags); | 969 | local_irq_restore(flags); |
967 | 970 | ||
968 | /* With all the info we got, fill in the values */ | 971 | if (unlikely(this_tsc_khz == 0)) { |
972 | kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v); | ||
973 | return 1; | ||
974 | } | ||
969 | 975 | ||
976 | if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) { | ||
977 | kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); | ||
978 | vcpu->hv_clock_tsc_khz = this_tsc_khz; | ||
979 | } | ||
980 | |||
981 | /* With all the info we got, fill in the values */ | ||
970 | vcpu->hv_clock.system_time = ts.tv_nsec + | 982 | vcpu->hv_clock.system_time = ts.tv_nsec + |
971 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; | 983 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
972 | 984 | ||
@@ -987,6 +999,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
987 | kunmap_atomic(shared_kaddr, KM_USER0); | 999 | kunmap_atomic(shared_kaddr, KM_USER0); |
988 | 1000 | ||
989 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); | 1001 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); |
1002 | return 0; | ||
990 | } | 1003 | } |
991 | 1004 | ||
992 | static int kvm_request_guest_time_update(struct kvm_vcpu *v) | 1005 | static int kvm_request_guest_time_update(struct kvm_vcpu *v) |
@@ -1853,12 +1866,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1853 | } | 1866 | } |
1854 | 1867 | ||
1855 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1868 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
1856 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | ||
1857 | unsigned long khz = cpufreq_quick_get(cpu); | ||
1858 | if (!khz) | ||
1859 | khz = tsc_khz; | ||
1860 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
1861 | } | ||
1862 | kvm_request_guest_time_update(vcpu); | 1869 | kvm_request_guest_time_update(vcpu); |
1863 | } | 1870 | } |
1864 | 1871 | ||
@@ -4152,9 +4159,23 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) | |||
4152 | } | 4159 | } |
4153 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); | 4160 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); |
4154 | 4161 | ||
4155 | static void bounce_off(void *info) | 4162 | static void tsc_bad(void *info) |
4163 | { | ||
4164 | __get_cpu_var(cpu_tsc_khz) = 0; | ||
4165 | } | ||
4166 | |||
4167 | static void tsc_khz_changed(void *data) | ||
4156 | { | 4168 | { |
4157 | /* nothing */ | 4169 | struct cpufreq_freqs *freq = data; |
4170 | unsigned long khz = 0; | ||
4171 | |||
4172 | if (data) | ||
4173 | khz = freq->new; | ||
4174 | else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
4175 | khz = cpufreq_quick_get(raw_smp_processor_id()); | ||
4176 | if (!khz) | ||
4177 | khz = tsc_khz; | ||
4178 | __get_cpu_var(cpu_tsc_khz) = khz; | ||
4158 | } | 4179 | } |
4159 | 4180 | ||
4160 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 4181 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
@@ -4165,11 +4186,51 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4165 | struct kvm_vcpu *vcpu; | 4186 | struct kvm_vcpu *vcpu; |
4166 | int i, send_ipi = 0; | 4187 | int i, send_ipi = 0; |
4167 | 4188 | ||
4189 | /* | ||
4190 | * We allow guests to temporarily run on slowing clocks, | ||
4191 | * provided we notify them after, or to run on accelerating | ||
4192 | * clocks, provided we notify them before. Thus time never | ||
4193 | * goes backwards. | ||
4194 | * | ||
4195 | * However, we have a problem. We can't atomically update | ||
4196 | * the frequency of a given CPU from this function; it is | ||
4197 | * merely a notifier, which can be called from any CPU. | ||
4198 | * Changing the TSC frequency at arbitrary points in time | ||
4199 | * requires a recomputation of local variables related to | ||
4200 | * the TSC for each VCPU. We must flag these local variables | ||
4201 | * to be updated and be sure the update takes place with the | ||
4202 | * new frequency before any guests proceed. | ||
4203 | * | ||
4204 | * Unfortunately, the combination of hotplug CPU and frequency | ||
4205 | * change creates an intractable locking scenario; the order | ||
4206 | * of when these callouts happen is undefined with respect to | ||
4207 | * CPU hotplug, and they can race with each other. As such, | ||
4208 | * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is | ||
4209 | * undefined; you can actually have a CPU frequency change take | ||
4210 | * place in between the computation of X and the setting of the | ||
4211 | * variable. To protect against this problem, all updates of | ||
4212 | * the per_cpu tsc_khz variable are done in an interrupt | ||
4213 | * protected IPI, and all callers wishing to update the value | ||
4214 | * must wait for a synchronous IPI to complete (which is trivial | ||
4215 | * if the caller is on the CPU already). This establishes the | ||
4216 | * necessary total order on variable updates. | ||
4217 | * | ||
4218 | * Note that because a guest time update may take place | ||
4219 | * anytime after the setting of the VCPU's request bit, the | ||
4220 | * correct TSC value must be set before the request. However, | ||
4221 | * to ensure the update actually makes it to any guest which | ||
4222 | * starts running in hardware virtualization between the set | ||
4223 | * and the acquisition of the spinlock, we must also ping the | ||
4224 | * CPU after setting the request bit. | ||
4225 | * | ||
4226 | */ | ||
4227 | |||
4168 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) | 4228 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) |
4169 | return 0; | 4229 | return 0; |
4170 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) | 4230 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) |
4171 | return 0; | 4231 | return 0; |
4172 | per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; | 4232 | |
4233 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); | ||
4173 | 4234 | ||
4174 | spin_lock(&kvm_lock); | 4235 | spin_lock(&kvm_lock); |
4175 | list_for_each_entry(kvm, &vm_list, vm_list) { | 4236 | list_for_each_entry(kvm, &vm_list, vm_list) { |
@@ -4179,7 +4240,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4179 | if (!kvm_request_guest_time_update(vcpu)) | 4240 | if (!kvm_request_guest_time_update(vcpu)) |
4180 | continue; | 4241 | continue; |
4181 | if (vcpu->cpu != smp_processor_id()) | 4242 | if (vcpu->cpu != smp_processor_id()) |
4182 | send_ipi++; | 4243 | send_ipi = 1; |
4183 | } | 4244 | } |
4184 | } | 4245 | } |
4185 | spin_unlock(&kvm_lock); | 4246 | spin_unlock(&kvm_lock); |
@@ -4197,32 +4258,48 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4197 | * guest context is entered kvmclock will be updated, | 4258 | * guest context is entered kvmclock will be updated, |
4198 | * so the guest will not see stale values. | 4259 | * so the guest will not see stale values. |
4199 | */ | 4260 | */ |
4200 | smp_call_function_single(freq->cpu, bounce_off, NULL, 1); | 4261 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); |
4201 | } | 4262 | } |
4202 | return 0; | 4263 | return 0; |
4203 | } | 4264 | } |
4204 | 4265 | ||
4205 | static struct notifier_block kvmclock_cpufreq_notifier_block = { | 4266 | static struct notifier_block kvmclock_cpufreq_notifier_block = { |
4206 | .notifier_call = kvmclock_cpufreq_notifier | 4267 | .notifier_call = kvmclock_cpufreq_notifier |
4268 | }; | ||
4269 | |||
4270 | static int kvmclock_cpu_notifier(struct notifier_block *nfb, | ||
4271 | unsigned long action, void *hcpu) | ||
4272 | { | ||
4273 | unsigned int cpu = (unsigned long)hcpu; | ||
4274 | |||
4275 | switch (action) { | ||
4276 | case CPU_ONLINE: | ||
4277 | case CPU_DOWN_FAILED: | ||
4278 | smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); | ||
4279 | break; | ||
4280 | case CPU_DOWN_PREPARE: | ||
4281 | smp_call_function_single(cpu, tsc_bad, NULL, 1); | ||
4282 | break; | ||
4283 | } | ||
4284 | return NOTIFY_OK; | ||
4285 | } | ||
4286 | |||
4287 | static struct notifier_block kvmclock_cpu_notifier_block = { | ||
4288 | .notifier_call = kvmclock_cpu_notifier, | ||
4289 | .priority = -INT_MAX | ||
4207 | }; | 4290 | }; |
4208 | 4291 | ||
4209 | static void kvm_timer_init(void) | 4292 | static void kvm_timer_init(void) |
4210 | { | 4293 | { |
4211 | int cpu; | 4294 | int cpu; |
4212 | 4295 | ||
4296 | register_hotcpu_notifier(&kvmclock_cpu_notifier_block); | ||
4213 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | 4297 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { |
4214 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | 4298 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, |
4215 | CPUFREQ_TRANSITION_NOTIFIER); | 4299 | CPUFREQ_TRANSITION_NOTIFIER); |
4216 | for_each_online_cpu(cpu) { | ||
4217 | unsigned long khz = cpufreq_get(cpu); | ||
4218 | if (!khz) | ||
4219 | khz = tsc_khz; | ||
4220 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
4221 | } | ||
4222 | } else { | ||
4223 | for_each_possible_cpu(cpu) | ||
4224 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
4225 | } | 4300 | } |
4301 | for_each_online_cpu(cpu) | ||
4302 | smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); | ||
4226 | } | 4303 | } |
4227 | 4304 | ||
4228 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | 4305 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); |
@@ -4324,6 +4401,7 @@ void kvm_arch_exit(void) | |||
4324 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 4401 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
4325 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, | 4402 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, |
4326 | CPUFREQ_TRANSITION_NOTIFIER); | 4403 | CPUFREQ_TRANSITION_NOTIFIER); |
4404 | unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block); | ||
4327 | kvm_x86_ops = NULL; | 4405 | kvm_x86_ops = NULL; |
4328 | kvm_mmu_module_exit(); | 4406 | kvm_mmu_module_exit(); |
4329 | } | 4407 | } |
@@ -4739,8 +4817,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
4739 | kvm_mmu_unload(vcpu); | 4817 | kvm_mmu_unload(vcpu); |
4740 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) | 4818 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) |
4741 | __kvm_migrate_timers(vcpu); | 4819 | __kvm_migrate_timers(vcpu); |
4742 | if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) | 4820 | if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) { |
4743 | kvm_write_guest_time(vcpu); | 4821 | r = kvm_write_guest_time(vcpu); |
4822 | if (unlikely(r)) | ||
4823 | goto out; | ||
4824 | } | ||
4744 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) | 4825 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) |
4745 | kvm_mmu_sync_roots(vcpu); | 4826 | kvm_mmu_sync_roots(vcpu); |
4746 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) | 4827 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) |
@@ -5423,17 +5504,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5423 | 5504 | ||
5424 | int kvm_arch_hardware_enable(void *garbage) | 5505 | int kvm_arch_hardware_enable(void *garbage) |
5425 | { | 5506 | { |
5426 | /* | ||
5427 | * Since this may be called from a hotplug notifcation, | ||
5428 | * we can't get the CPU frequency directly. | ||
5429 | */ | ||
5430 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
5431 | int cpu = raw_smp_processor_id(); | ||
5432 | per_cpu(cpu_tsc_khz, cpu) = 0; | ||
5433 | } | ||
5434 | |||
5435 | kvm_shared_msr_cpu_online(); | 5507 | kvm_shared_msr_cpu_online(); |
5436 | |||
5437 | return kvm_x86_ops->hardware_enable(garbage); | 5508 | return kvm_x86_ops->hardware_enable(garbage); |
5438 | } | 5509 | } |
5439 | 5510 | ||