aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorZachary Amsden <zamsden@redhat.com>2010-09-18 20:38:15 -0400
committerAvi Kivity <avi@redhat.com>2010-10-24 04:53:05 -0400
commitc285545f813d7b0ce989fd34e42ad1fe785dc65d (patch)
tree34c5aa4febe52e14e96d03b1a4f719ef9b357c85 /arch/x86
parent34c238a1d1832d7b1f655641f52782e86396b30a (diff)
KVM: x86: TSC catchup mode
Negate the effects of AN TYM spell while kvm thread is preempted by tracking conversion factor to the highest TSC rate and catching the TSC up when it has fallen behind the kernel view of time. Note that once triggered, we don't turn off catchup mode. A slightly more clever version of this is possible, which only does catchup when TSC rate drops, and which specifically targets only CPUs with broken TSC, but since these all are considered unstable_tsc(), this patch covers all necessary cases. Signed-off-by: Zachary Amsden <zamsden@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/kvm/x86.c87
2 files changed, 72 insertions, 21 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 519d6f784984..9e6fe391094e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -384,6 +384,9 @@ struct kvm_vcpu_arch {
384 u64 last_host_tsc; 384 u64 last_host_tsc;
385 u64 last_guest_tsc; 385 u64 last_guest_tsc;
386 u64 last_kernel_ns; 386 u64 last_kernel_ns;
387 u64 last_tsc_nsec;
388 u64 last_tsc_write;
389 bool tsc_catchup;
387 390
388 bool nmi_pending; 391 bool nmi_pending;
389 bool nmi_injected; 392 bool nmi_injected;
@@ -444,6 +447,9 @@ struct kvm_arch {
444 u64 last_tsc_nsec; 447 u64 last_tsc_nsec;
445 u64 last_tsc_offset; 448 u64 last_tsc_offset;
446 u64 last_tsc_write; 449 u64 last_tsc_write;
450 u32 virtual_tsc_khz;
451 u32 virtual_tsc_mult;
452 s8 virtual_tsc_shift;
447 453
448 struct kvm_xen_hvm_config xen_hvm_config; 454 struct kvm_xen_hvm_config xen_hvm_config;
449 455
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ce57cd899a62..bfcf8fd5e080 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -962,6 +962,7 @@ static inline u64 get_kernel_ns(void)
962} 962}
963 963
964static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 964static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
965unsigned long max_tsc_khz;
965 966
966static inline int kvm_tsc_changes_freq(void) 967static inline int kvm_tsc_changes_freq(void)
967{ 968{
@@ -985,6 +986,24 @@ static inline u64 nsec_to_cycles(u64 nsec)
985 return ret; 986 return ret;
986} 987}
987 988
989static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz)
990{
991 /* Compute a scale to convert nanoseconds in TSC cycles */
992 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
993 &kvm->arch.virtual_tsc_shift,
994 &kvm->arch.virtual_tsc_mult);
995 kvm->arch.virtual_tsc_khz = this_tsc_khz;
996}
997
998static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
999{
1000 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
1001 vcpu->kvm->arch.virtual_tsc_mult,
1002 vcpu->kvm->arch.virtual_tsc_shift);
1003 tsc += vcpu->arch.last_tsc_write;
1004 return tsc;
1005}
1006
988void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) 1007void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
989{ 1008{
990 struct kvm *kvm = vcpu->kvm; 1009 struct kvm *kvm = vcpu->kvm;
@@ -1029,6 +1048,8 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1029 1048
1030 /* Reset of TSC must disable overshoot protection below */ 1049 /* Reset of TSC must disable overshoot protection below */
1031 vcpu->arch.hv_clock.tsc_timestamp = 0; 1050 vcpu->arch.hv_clock.tsc_timestamp = 0;
1051 vcpu->arch.last_tsc_write = data;
1052 vcpu->arch.last_tsc_nsec = ns;
1032} 1053}
1033EXPORT_SYMBOL_GPL(kvm_write_tsc); 1054EXPORT_SYMBOL_GPL(kvm_write_tsc);
1034 1055
@@ -1041,22 +1062,42 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1041 s64 kernel_ns, max_kernel_ns; 1062 s64 kernel_ns, max_kernel_ns;
1042 u64 tsc_timestamp; 1063 u64 tsc_timestamp;
1043 1064
1044 if ((!vcpu->time_page))
1045 return 0;
1046
1047 /* Keep irq disabled to prevent changes to the clock */ 1065 /* Keep irq disabled to prevent changes to the clock */
1048 local_irq_save(flags); 1066 local_irq_save(flags);
1049 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); 1067 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
1050 kernel_ns = get_kernel_ns(); 1068 kernel_ns = get_kernel_ns();
1051 this_tsc_khz = __get_cpu_var(cpu_tsc_khz); 1069 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1052 local_irq_restore(flags);
1053 1070
1054 if (unlikely(this_tsc_khz == 0)) { 1071 if (unlikely(this_tsc_khz == 0)) {
1072 local_irq_restore(flags);
1055 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); 1073 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1056 return 1; 1074 return 1;
1057 } 1075 }
1058 1076
1059 /* 1077 /*
1078 * We may have to catch up the TSC to match elapsed wall clock
1079 * time for two reasons, even if kvmclock is used.
1080 * 1) CPU could have been running below the maximum TSC rate
1081 * 2) Broken TSC compensation resets the base at each VCPU
1082 * entry to avoid unknown leaps of TSC even when running
1083 * again on the same CPU. This may cause apparent elapsed
1084 * time to disappear, and the guest to stand still or run
1085 * very slowly.
1086 */
1087 if (vcpu->tsc_catchup) {
1088 u64 tsc = compute_guest_tsc(v, kernel_ns);
1089 if (tsc > tsc_timestamp) {
1090 kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
1091 tsc_timestamp = tsc;
1092 }
1093 }
1094
1095 local_irq_restore(flags);
1096
1097 if (!vcpu->time_page)
1098 return 0;
1099
1100 /*
1060 * Time as measured by the TSC may go backwards when resetting the base 1101 * Time as measured by the TSC may go backwards when resetting the base
1061 * tsc_timestamp. The reason for this is that the TSC resolution is 1102 * tsc_timestamp. The reason for this is that the TSC resolution is
1062 * higher than the resolution of the other clock scales. Thus, many 1103 * higher than the resolution of the other clock scales. Thus, many
@@ -1122,16 +1163,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1122 return 0; 1163 return 0;
1123} 1164}
1124 1165
1125static int kvm_request_guest_time_update(struct kvm_vcpu *v)
1126{
1127 struct kvm_vcpu_arch *vcpu = &v->arch;
1128
1129 if (!vcpu->time_page)
1130 return 0;
1131 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1132 return 1;
1133}
1134
1135static bool msr_mtrr_valid(unsigned msr) 1166static bool msr_mtrr_valid(unsigned msr)
1136{ 1167{
1137 switch (msr) { 1168 switch (msr) {
@@ -1455,6 +1486,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1455 } 1486 }
1456 1487
1457 vcpu->arch.time = data; 1488 vcpu->arch.time = data;
1489 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1458 1490
1459 /* we verify if the enable bit is set... */ 1491 /* we verify if the enable bit is set... */
1460 if (!(data & 1)) 1492 if (!(data & 1))
@@ -1470,8 +1502,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1470 kvm_release_page_clean(vcpu->arch.time_page); 1502 kvm_release_page_clean(vcpu->arch.time_page);
1471 vcpu->arch.time_page = NULL; 1503 vcpu->arch.time_page = NULL;
1472 } 1504 }
1473
1474 kvm_request_guest_time_update(vcpu);
1475 break; 1505 break;
1476 } 1506 }
1477 case MSR_IA32_MCG_CTL: 1507 case MSR_IA32_MCG_CTL:
@@ -2028,9 +2058,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2028 native_read_tsc() - vcpu->arch.last_host_tsc; 2058 native_read_tsc() - vcpu->arch.last_host_tsc;
2029 if (tsc_delta < 0) 2059 if (tsc_delta < 0)
2030 mark_tsc_unstable("KVM discovered backwards TSC"); 2060 mark_tsc_unstable("KVM discovered backwards TSC");
2031 if (check_tsc_unstable()) 2061 if (check_tsc_unstable()) {
2032 kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); 2062 kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
2033 kvm_migrate_timers(vcpu); 2063 vcpu->arch.tsc_catchup = 1;
2064 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2065 }
2066 if (vcpu->cpu != cpu)
2067 kvm_migrate_timers(vcpu);
2034 vcpu->cpu = cpu; 2068 vcpu->cpu = cpu;
2035 } 2069 }
2036} 2070}
@@ -4461,8 +4495,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
4461 kvm_for_each_vcpu(i, vcpu, kvm) { 4495 kvm_for_each_vcpu(i, vcpu, kvm) {
4462 if (vcpu->cpu != freq->cpu) 4496 if (vcpu->cpu != freq->cpu)
4463 continue; 4497 continue;
4464 if (!kvm_request_guest_time_update(vcpu)) 4498 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4465 continue;
4466 if (vcpu->cpu != smp_processor_id()) 4499 if (vcpu->cpu != smp_processor_id())
4467 send_ipi = 1; 4500 send_ipi = 1;
4468 } 4501 }
@@ -4517,11 +4550,20 @@ static void kvm_timer_init(void)
4517{ 4550{
4518 int cpu; 4551 int cpu;
4519 4552
4553 max_tsc_khz = tsc_khz;
4520 register_hotcpu_notifier(&kvmclock_cpu_notifier_block); 4554 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4521 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { 4555 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
4556#ifdef CONFIG_CPU_FREQ
4557 struct cpufreq_policy policy;
4558 memset(&policy, 0, sizeof(policy));
4559 cpufreq_get_policy(&policy, get_cpu());
4560 if (policy.cpuinfo.max_freq)
4561 max_tsc_khz = policy.cpuinfo.max_freq;
4562#endif
4522 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 4563 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4523 CPUFREQ_TRANSITION_NOTIFIER); 4564 CPUFREQ_TRANSITION_NOTIFIER);
4524 } 4565 }
4566 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
4525 for_each_online_cpu(cpu) 4567 for_each_online_cpu(cpu)
4526 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); 4568 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4527} 4569}
@@ -5752,7 +5794,7 @@ int kvm_arch_hardware_enable(void *garbage)
5752 list_for_each_entry(kvm, &vm_list, vm_list) 5794 list_for_each_entry(kvm, &vm_list, vm_list)
5753 kvm_for_each_vcpu(i, vcpu, kvm) 5795 kvm_for_each_vcpu(i, vcpu, kvm)
5754 if (vcpu->cpu == smp_processor_id()) 5796 if (vcpu->cpu == smp_processor_id())
5755 kvm_request_guest_time_update(vcpu); 5797 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5756 return kvm_x86_ops->hardware_enable(garbage); 5798 return kvm_x86_ops->hardware_enable(garbage);
5757} 5799}
5758 5800
@@ -5803,6 +5845,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5803 } 5845 }
5804 vcpu->arch.pio_data = page_address(page); 5846 vcpu->arch.pio_data = page_address(page);
5805 5847
5848 if (!kvm->arch.virtual_tsc_khz)
5849 kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
5850
5806 r = kvm_mmu_create(vcpu); 5851 r = kvm_mmu_create(vcpu);
5807 if (r < 0) 5852 if (r < 0)
5808 goto fail_free_pio_data; 5853 goto fail_free_pio_data;