aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/x86.c157
1 files changed, 114 insertions, 43 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e7da14c317e6..699c6b89c1b4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -895,6 +895,15 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
895 895
896static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 896static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
897 897
898static inline int kvm_tsc_changes_freq(void)
899{
900 int cpu = get_cpu();
901 int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
902 cpufreq_quick_get(cpu) != 0;
903 put_cpu();
904 return ret;
905}
906
898void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) 907void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
899{ 908{
900 struct kvm *kvm = vcpu->kvm; 909 struct kvm *kvm = vcpu->kvm;
@@ -940,7 +949,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
940} 949}
941EXPORT_SYMBOL_GPL(kvm_write_tsc); 950EXPORT_SYMBOL_GPL(kvm_write_tsc);
942 951
943static void kvm_write_guest_time(struct kvm_vcpu *v) 952static int kvm_write_guest_time(struct kvm_vcpu *v)
944{ 953{
945 struct timespec ts; 954 struct timespec ts;
946 unsigned long flags; 955 unsigned long flags;
@@ -949,24 +958,27 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
949 unsigned long this_tsc_khz; 958 unsigned long this_tsc_khz;
950 959
951 if ((!vcpu->time_page)) 960 if ((!vcpu->time_page))
952 return; 961 return 0;
953
954 this_tsc_khz = get_cpu_var(cpu_tsc_khz);
955 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
956 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
957 vcpu->hv_clock_tsc_khz = this_tsc_khz;
958 }
959 put_cpu_var(cpu_tsc_khz);
960 962
961 /* Keep irq disabled to prevent changes to the clock */ 963 /* Keep irq disabled to prevent changes to the clock */
962 local_irq_save(flags); 964 local_irq_save(flags);
963 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); 965 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
964 ktime_get_ts(&ts); 966 ktime_get_ts(&ts);
965 monotonic_to_bootbased(&ts); 967 monotonic_to_bootbased(&ts);
968 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
966 local_irq_restore(flags); 969 local_irq_restore(flags);
967 970
968 /* With all the info we got, fill in the values */ 971 if (unlikely(this_tsc_khz == 0)) {
972 kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
973 return 1;
974 }
969 975
976 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
977 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
978 vcpu->hv_clock_tsc_khz = this_tsc_khz;
979 }
980
981 /* With all the info we got, fill in the values */
970 vcpu->hv_clock.system_time = ts.tv_nsec + 982 vcpu->hv_clock.system_time = ts.tv_nsec +
971 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; 983 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
972 984
@@ -987,6 +999,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
987 kunmap_atomic(shared_kaddr, KM_USER0); 999 kunmap_atomic(shared_kaddr, KM_USER0);
988 1000
989 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 1001 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1002 return 0;
990} 1003}
991 1004
992static int kvm_request_guest_time_update(struct kvm_vcpu *v) 1005static int kvm_request_guest_time_update(struct kvm_vcpu *v)
@@ -1853,12 +1866,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1853 } 1866 }
1854 1867
1855 kvm_x86_ops->vcpu_load(vcpu, cpu); 1868 kvm_x86_ops->vcpu_load(vcpu, cpu);
1856 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
1857 unsigned long khz = cpufreq_quick_get(cpu);
1858 if (!khz)
1859 khz = tsc_khz;
1860 per_cpu(cpu_tsc_khz, cpu) = khz;
1861 }
1862 kvm_request_guest_time_update(vcpu); 1869 kvm_request_guest_time_update(vcpu);
1863} 1870}
1864 1871
@@ -4152,9 +4159,23 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4152} 4159}
4153EXPORT_SYMBOL_GPL(kvm_fast_pio_out); 4160EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4154 4161
4155static void bounce_off(void *info) 4162static void tsc_bad(void *info)
4163{
4164 __get_cpu_var(cpu_tsc_khz) = 0;
4165}
4166
4167static void tsc_khz_changed(void *data)
4156{ 4168{
4157 /* nothing */ 4169 struct cpufreq_freqs *freq = data;
4170 unsigned long khz = 0;
4171
4172 if (data)
4173 khz = freq->new;
4174 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4175 khz = cpufreq_quick_get(raw_smp_processor_id());
4176 if (!khz)
4177 khz = tsc_khz;
4178 __get_cpu_var(cpu_tsc_khz) = khz;
4158} 4179}
4159 4180
4160static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 4181static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -4165,11 +4186,51 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
4165 struct kvm_vcpu *vcpu; 4186 struct kvm_vcpu *vcpu;
4166 int i, send_ipi = 0; 4187 int i, send_ipi = 0;
4167 4188
4189 /*
4190 * We allow guests to temporarily run on slowing clocks,
4191 * provided we notify them after, or to run on accelerating
4192 * clocks, provided we notify them before. Thus time never
4193 * goes backwards.
4194 *
4195 * However, we have a problem. We can't atomically update
4196 * the frequency of a given CPU from this function; it is
4197 * merely a notifier, which can be called from any CPU.
4198 * Changing the TSC frequency at arbitrary points in time
4199 * requires a recomputation of local variables related to
4200 * the TSC for each VCPU. We must flag these local variables
4201 * to be updated and be sure the update takes place with the
4202 * new frequency before any guests proceed.
4203 *
4204 * Unfortunately, the combination of hotplug CPU and frequency
4205 * change creates an intractable locking scenario; the order
4206 * of when these callouts happen is undefined with respect to
4207 * CPU hotplug, and they can race with each other. As such,
4208 * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is
4209 * undefined; you can actually have a CPU frequency change take
4210 * place in between the computation of X and the setting of the
4211 * variable. To protect against this problem, all updates of
4212 * the per_cpu tsc_khz variable are done in an interrupt
4213 * protected IPI, and all callers wishing to update the value
4214 * must wait for a synchronous IPI to complete (which is trivial
4215 * if the caller is on the CPU already). This establishes the
4216 * necessary total order on variable updates.
4217 *
4218 * Note that because a guest time update may take place
4219 * anytime after the setting of the VCPU's request bit, the
4220 * correct TSC value must be set before the request. However,
4221 * to ensure the update actually makes it to any guest which
4222 * starts running in hardware virtualization between the set
4223 * and the acquisition of the spinlock, we must also ping the
4224 * CPU after setting the request bit.
4225 *
4226 */
4227
4168 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) 4228 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
4169 return 0; 4229 return 0;
4170 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) 4230 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
4171 return 0; 4231 return 0;
4172 per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; 4232
4233 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4173 4234
4174 spin_lock(&kvm_lock); 4235 spin_lock(&kvm_lock);
4175 list_for_each_entry(kvm, &vm_list, vm_list) { 4236 list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -4179,7 +4240,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
4179 if (!kvm_request_guest_time_update(vcpu)) 4240 if (!kvm_request_guest_time_update(vcpu))
4180 continue; 4241 continue;
4181 if (vcpu->cpu != smp_processor_id()) 4242 if (vcpu->cpu != smp_processor_id())
4182 send_ipi++; 4243 send_ipi = 1;
4183 } 4244 }
4184 } 4245 }
4185 spin_unlock(&kvm_lock); 4246 spin_unlock(&kvm_lock);
@@ -4197,32 +4258,48 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
4197 * guest context is entered kvmclock will be updated, 4258 * guest context is entered kvmclock will be updated,
4198 * so the guest will not see stale values. 4259 * so the guest will not see stale values.
4199 */ 4260 */
4200 smp_call_function_single(freq->cpu, bounce_off, NULL, 1); 4261 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4201 } 4262 }
4202 return 0; 4263 return 0;
4203} 4264}
4204 4265
4205static struct notifier_block kvmclock_cpufreq_notifier_block = { 4266static struct notifier_block kvmclock_cpufreq_notifier_block = {
4206 .notifier_call = kvmclock_cpufreq_notifier 4267 .notifier_call = kvmclock_cpufreq_notifier
4268};
4269
4270static int kvmclock_cpu_notifier(struct notifier_block *nfb,
4271 unsigned long action, void *hcpu)
4272{
4273 unsigned int cpu = (unsigned long)hcpu;
4274
4275 switch (action) {
4276 case CPU_ONLINE:
4277 case CPU_DOWN_FAILED:
4278 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4279 break;
4280 case CPU_DOWN_PREPARE:
4281 smp_call_function_single(cpu, tsc_bad, NULL, 1);
4282 break;
4283 }
4284 return NOTIFY_OK;
4285}
4286
4287static struct notifier_block kvmclock_cpu_notifier_block = {
4288 .notifier_call = kvmclock_cpu_notifier,
4289 .priority = -INT_MAX
4207}; 4290};
4208 4291
4209static void kvm_timer_init(void) 4292static void kvm_timer_init(void)
4210{ 4293{
4211 int cpu; 4294 int cpu;
4212 4295
4296 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4213 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { 4297 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
4214 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 4298 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4215 CPUFREQ_TRANSITION_NOTIFIER); 4299 CPUFREQ_TRANSITION_NOTIFIER);
4216 for_each_online_cpu(cpu) {
4217 unsigned long khz = cpufreq_get(cpu);
4218 if (!khz)
4219 khz = tsc_khz;
4220 per_cpu(cpu_tsc_khz, cpu) = khz;
4221 }
4222 } else {
4223 for_each_possible_cpu(cpu)
4224 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
4225 } 4300 }
4301 for_each_online_cpu(cpu)
4302 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4226} 4303}
4227 4304
4228static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); 4305static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -4324,6 +4401,7 @@ void kvm_arch_exit(void)
4324 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 4401 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4325 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, 4402 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4326 CPUFREQ_TRANSITION_NOTIFIER); 4403 CPUFREQ_TRANSITION_NOTIFIER);
4404 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4327 kvm_x86_ops = NULL; 4405 kvm_x86_ops = NULL;
4328 kvm_mmu_module_exit(); 4406 kvm_mmu_module_exit();
4329} 4407}
@@ -4739,8 +4817,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
4739 kvm_mmu_unload(vcpu); 4817 kvm_mmu_unload(vcpu);
4740 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) 4818 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
4741 __kvm_migrate_timers(vcpu); 4819 __kvm_migrate_timers(vcpu);
4742 if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) 4820 if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) {
4743 kvm_write_guest_time(vcpu); 4821 r = kvm_write_guest_time(vcpu);
4822 if (unlikely(r))
4823 goto out;
4824 }
4744 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) 4825 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
4745 kvm_mmu_sync_roots(vcpu); 4826 kvm_mmu_sync_roots(vcpu);
4746 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 4827 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
@@ -5423,17 +5504,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5423 5504
5424int kvm_arch_hardware_enable(void *garbage) 5505int kvm_arch_hardware_enable(void *garbage)
5425{ 5506{
5426 /*
5427 * Since this may be called from a hotplug notifcation,
5428 * we can't get the CPU frequency directly.
5429 */
5430 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5431 int cpu = raw_smp_processor_id();
5432 per_cpu(cpu_tsc_khz, cpu) = 0;
5433 }
5434
5435 kvm_shared_msr_cpu_online(); 5507 kvm_shared_msr_cpu_online();
5436
5437 return kvm_x86_ops->hardware_enable(garbage); 5508 return kvm_x86_ops->hardware_enable(garbage);
5438} 5509}
5439 5510