aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorZachary Amsden <zamsden@redhat.com>2010-08-20 04:07:21 -0400
committerAvi Kivity <avi@redhat.com>2010-10-24 04:51:22 -0400
commit8cfdc0008542b57caadbfe013da163131a8293f4 (patch)
tree97b24e0d910f32908e36f3e921f4e6d3287c9955 /arch
parentf38e098ff3a315bb74abbb4a35cba11bbea8e2fa (diff)
KVM: x86: Make cpu_tsc_khz updates use local CPU
This simplifies much of the init code; we can now simply always call tsc_khz_changed, optionally passing it a new value, or letting it figure out the existing value (while interrupts are disabled, and thus, by inference from the rule, not raceful against CPU hotplug or frequency updates, which will issue IPIs to the local CPU to perform this very same task). Signed-off-by: Zachary Amsden <zamsden@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kvm/x86.c157
1 files changed, 114 insertions, 43 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e7da14c317e6..699c6b89c1b4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -895,6 +895,15 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
895 895
896static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 896static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
897 897
898static inline int kvm_tsc_changes_freq(void)
899{
900 int cpu = get_cpu();
901 int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
902 cpufreq_quick_get(cpu) != 0;
903 put_cpu();
904 return ret;
905}
906
898void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) 907void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
899{ 908{
900 struct kvm *kvm = vcpu->kvm; 909 struct kvm *kvm = vcpu->kvm;
@@ -940,7 +949,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
940} 949}
941EXPORT_SYMBOL_GPL(kvm_write_tsc); 950EXPORT_SYMBOL_GPL(kvm_write_tsc);
942 951
943static void kvm_write_guest_time(struct kvm_vcpu *v) 952static int kvm_write_guest_time(struct kvm_vcpu *v)
944{ 953{
945 struct timespec ts; 954 struct timespec ts;
946 unsigned long flags; 955 unsigned long flags;
@@ -949,24 +958,27 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
949 unsigned long this_tsc_khz; 958 unsigned long this_tsc_khz;
950 959
951 if ((!vcpu->time_page)) 960 if ((!vcpu->time_page))
952 return; 961 return 0;
953
954 this_tsc_khz = get_cpu_var(cpu_tsc_khz);
955 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
956 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
957 vcpu->hv_clock_tsc_khz = this_tsc_khz;
958 }
959 put_cpu_var(cpu_tsc_khz);
960 962
961 /* Keep irq disabled to prevent changes to the clock */ 963 /* Keep irq disabled to prevent changes to the clock */
962 local_irq_save(flags); 964 local_irq_save(flags);
963 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); 965 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
964 ktime_get_ts(&ts); 966 ktime_get_ts(&ts);
965 monotonic_to_bootbased(&ts); 967 monotonic_to_bootbased(&ts);
968 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
966 local_irq_restore(flags); 969 local_irq_restore(flags);
967 970
968 /* With all the info we got, fill in the values */ 971 if (unlikely(this_tsc_khz == 0)) {
972 kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
973 return 1;
974 }
969 975
976 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
977 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
978 vcpu->hv_clock_tsc_khz = this_tsc_khz;
979 }
980
981 /* With all the info we got, fill in the values */
970 vcpu->hv_clock.system_time = ts.tv_nsec + 982 vcpu->hv_clock.system_time = ts.tv_nsec +
971 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; 983 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
972 984
@@ -987,6 +999,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
987 kunmap_atomic(shared_kaddr, KM_USER0); 999 kunmap_atomic(shared_kaddr, KM_USER0);
988 1000
989 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 1001 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1002 return 0;
990} 1003}
991 1004
992static int kvm_request_guest_time_update(struct kvm_vcpu *v) 1005static int kvm_request_guest_time_update(struct kvm_vcpu *v)
@@ -1853,12 +1866,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1853 } 1866 }
1854 1867
1855 kvm_x86_ops->vcpu_load(vcpu, cpu); 1868 kvm_x86_ops->vcpu_load(vcpu, cpu);
1856 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
1857 unsigned long khz = cpufreq_quick_get(cpu);
1858 if (!khz)
1859 khz = tsc_khz;
1860 per_cpu(cpu_tsc_khz, cpu) = khz;
1861 }
1862 kvm_request_guest_time_update(vcpu); 1869 kvm_request_guest_time_update(vcpu);
1863} 1870}
1864 1871
@@ -4152,9 +4159,23 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4152} 4159}
4153EXPORT_SYMBOL_GPL(kvm_fast_pio_out); 4160EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4154 4161
4155static void bounce_off(void *info) 4162static void tsc_bad(void *info)
4163{
4164 __get_cpu_var(cpu_tsc_khz) = 0;
4165}
4166
4167static void tsc_khz_changed(void *data)
4156{ 4168{
4157 /* nothing */ 4169 struct cpufreq_freqs *freq = data;
4170 unsigned long khz = 0;
4171
4172 if (data)
4173 khz = freq->new;
4174 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4175 khz = cpufreq_quick_get(raw_smp_processor_id());
4176 if (!khz)
4177 khz = tsc_khz;
4178 __get_cpu_var(cpu_tsc_khz) = khz;
4158} 4179}
4159 4180
4160static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 4181static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -4165,11 +4186,51 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
4165 struct kvm_vcpu *vcpu; 4186 struct kvm_vcpu *vcpu;
4166 int i, send_ipi = 0; 4187 int i, send_ipi = 0;
4167 4188
4189 /*
4190 * We allow guests to temporarily run on slowing clocks,
4191 * provided we notify them after, or to run on accelerating
4192 * clocks, provided we notify them before. Thus time never
4193 * goes backwards.
4194 *
4195 * However, we have a problem. We can't atomically update
4196 * the frequency of a given CPU from this function; it is
4197 * merely a notifier, which can be called from any CPU.
4198 * Changing the TSC frequency at arbitrary points in time
4199 * requires a recomputation of local variables related to
4200 * the TSC for each VCPU. We must flag these local variables
4201 * to be updated and be sure the update takes place with the
4202 * new frequency before any guests proceed.
4203 *
4204 * Unfortunately, the combination of hotplug CPU and frequency
4205 * change creates an intractable locking scenario; the order
4206 * of when these callouts happen is undefined with respect to
4207 * CPU hotplug, and they can race with each other. As such,
4208 * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is
4209 * undefined; you can actually have a CPU frequency change take
4210 * place in between the computation of X and the setting of the
4211 * variable. To protect against this problem, all updates of
4212 * the per_cpu tsc_khz variable are done in an interrupt
4213 * protected IPI, and all callers wishing to update the value
4214 * must wait for a synchronous IPI to complete (which is trivial
4215 * if the caller is on the CPU already). This establishes the
4216 * necessary total order on variable updates.
4217 *
4218 * Note that because a guest time update may take place
4219 * anytime after the setting of the VCPU's request bit, the
4220 * correct TSC value must be set before the request. However,
4221 * to ensure the update actually makes it to any guest which
4222 * starts running in hardware virtualization between the set
4223 * and the acquisition of the spinlock, we must also ping the
4224 * CPU after setting the request bit.
4225 *
4226 */
4227
4168 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) 4228 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
4169 return 0; 4229 return 0;
4170 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) 4230 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
4171 return 0; 4231 return 0;
4172 per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; 4232
4233 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4173 4234
4174 spin_lock(&kvm_lock); 4235 spin_lock(&kvm_lock);
4175 list_for_each_entry(kvm, &vm_list, vm_list) { 4236 list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -4179,7 +4240,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
4179 if (!kvm_request_guest_time_update(vcpu)) 4240 if (!kvm_request_guest_time_update(vcpu))
4180 continue; 4241 continue;
4181 if (vcpu->cpu != smp_processor_id()) 4242 if (vcpu->cpu != smp_processor_id())
4182 send_ipi++; 4243 send_ipi = 1;
4183 } 4244 }
4184 } 4245 }
4185 spin_unlock(&kvm_lock); 4246 spin_unlock(&kvm_lock);
@@ -4197,32 +4258,48 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
4197 * guest context is entered kvmclock will be updated, 4258 * guest context is entered kvmclock will be updated,
4198 * so the guest will not see stale values. 4259 * so the guest will not see stale values.
4199 */ 4260 */
4200 smp_call_function_single(freq->cpu, bounce_off, NULL, 1); 4261 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4201 } 4262 }
4202 return 0; 4263 return 0;
4203} 4264}
4204 4265
4205static struct notifier_block kvmclock_cpufreq_notifier_block = { 4266static struct notifier_block kvmclock_cpufreq_notifier_block = {
4206 .notifier_call = kvmclock_cpufreq_notifier 4267 .notifier_call = kvmclock_cpufreq_notifier
4268};
4269
4270static int kvmclock_cpu_notifier(struct notifier_block *nfb,
4271 unsigned long action, void *hcpu)
4272{
4273 unsigned int cpu = (unsigned long)hcpu;
4274
4275 switch (action) {
4276 case CPU_ONLINE:
4277 case CPU_DOWN_FAILED:
4278 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4279 break;
4280 case CPU_DOWN_PREPARE:
4281 smp_call_function_single(cpu, tsc_bad, NULL, 1);
4282 break;
4283 }
4284 return NOTIFY_OK;
4285}
4286
4287static struct notifier_block kvmclock_cpu_notifier_block = {
4288 .notifier_call = kvmclock_cpu_notifier,
4289 .priority = -INT_MAX
4207}; 4290};
4208 4291
4209static void kvm_timer_init(void) 4292static void kvm_timer_init(void)
4210{ 4293{
4211 int cpu; 4294 int cpu;
4212 4295
4296 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4213 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { 4297 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
4214 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 4298 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4215 CPUFREQ_TRANSITION_NOTIFIER); 4299 CPUFREQ_TRANSITION_NOTIFIER);
4216 for_each_online_cpu(cpu) {
4217 unsigned long khz = cpufreq_get(cpu);
4218 if (!khz)
4219 khz = tsc_khz;
4220 per_cpu(cpu_tsc_khz, cpu) = khz;
4221 }
4222 } else {
4223 for_each_possible_cpu(cpu)
4224 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
4225 } 4300 }
4301 for_each_online_cpu(cpu)
4302 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4226} 4303}
4227 4304
4228static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); 4305static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -4324,6 +4401,7 @@ void kvm_arch_exit(void)
4324 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 4401 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4325 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, 4402 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4326 CPUFREQ_TRANSITION_NOTIFIER); 4403 CPUFREQ_TRANSITION_NOTIFIER);
4404 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4327 kvm_x86_ops = NULL; 4405 kvm_x86_ops = NULL;
4328 kvm_mmu_module_exit(); 4406 kvm_mmu_module_exit();
4329} 4407}
@@ -4739,8 +4817,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
4739 kvm_mmu_unload(vcpu); 4817 kvm_mmu_unload(vcpu);
4740 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) 4818 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
4741 __kvm_migrate_timers(vcpu); 4819 __kvm_migrate_timers(vcpu);
4742 if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) 4820 if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) {
4743 kvm_write_guest_time(vcpu); 4821 r = kvm_write_guest_time(vcpu);
4822 if (unlikely(r))
4823 goto out;
4824 }
4744 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) 4825 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
4745 kvm_mmu_sync_roots(vcpu); 4826 kvm_mmu_sync_roots(vcpu);
4746 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 4827 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
@@ -5423,17 +5504,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5423 5504
5424int kvm_arch_hardware_enable(void *garbage) 5505int kvm_arch_hardware_enable(void *garbage)
5425{ 5506{
5426 /*
5427 * Since this may be called from a hotplug notifcation,
5428 * we can't get the CPU frequency directly.
5429 */
5430 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5431 int cpu = raw_smp_processor_id();
5432 per_cpu(cpu_tsc_khz, cpu) = 0;
5433 }
5434
5435 kvm_shared_msr_cpu_online(); 5507 kvm_shared_msr_cpu_online();
5436
5437 return kvm_x86_ops->hardware_enable(garbage); 5508 return kvm_x86_ops->hardware_enable(garbage);
5438} 5509}
5439 5510