diff options
author | Zachary Amsden <zamsden@redhat.com> | 2010-09-18 20:38:15 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2010-10-24 04:53:05 -0400 |
commit | c285545f813d7b0ce989fd34e42ad1fe785dc65d (patch) | |
tree | 34c5aa4febe52e14e96d03b1a4f719ef9b357c85 /arch | |
parent | 34c238a1d1832d7b1f655641f52782e86396b30a (diff) |
KVM: x86: TSC catchup mode
Negate the effects of AN TYM spell while kvm thread is preempted by tracking
conversion factor to the highest TSC rate and catching the TSC up when it has
fallen behind the kernel view of time. Note that once triggered, we don't
turn off catchup mode.
A slightly more clever version of this is possible, which only does catchup
when TSC rate drops, and which specifically targets only CPUs with broken
TSC, but since these all are considered unstable_tsc(), this patch covers
all necessary cases.
Signed-off-by: Zachary Amsden <zamsden@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 87 |
2 files changed, 72 insertions, 21 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 519d6f784984..9e6fe391094e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -384,6 +384,9 @@ struct kvm_vcpu_arch { | |||
384 | u64 last_host_tsc; | 384 | u64 last_host_tsc; |
385 | u64 last_guest_tsc; | 385 | u64 last_guest_tsc; |
386 | u64 last_kernel_ns; | 386 | u64 last_kernel_ns; |
387 | u64 last_tsc_nsec; | ||
388 | u64 last_tsc_write; | ||
389 | bool tsc_catchup; | ||
387 | 390 | ||
388 | bool nmi_pending; | 391 | bool nmi_pending; |
389 | bool nmi_injected; | 392 | bool nmi_injected; |
@@ -444,6 +447,9 @@ struct kvm_arch { | |||
444 | u64 last_tsc_nsec; | 447 | u64 last_tsc_nsec; |
445 | u64 last_tsc_offset; | 448 | u64 last_tsc_offset; |
446 | u64 last_tsc_write; | 449 | u64 last_tsc_write; |
450 | u32 virtual_tsc_khz; | ||
451 | u32 virtual_tsc_mult; | ||
452 | s8 virtual_tsc_shift; | ||
447 | 453 | ||
448 | struct kvm_xen_hvm_config xen_hvm_config; | 454 | struct kvm_xen_hvm_config xen_hvm_config; |
449 | 455 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ce57cd899a62..bfcf8fd5e080 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -962,6 +962,7 @@ static inline u64 get_kernel_ns(void) | |||
962 | } | 962 | } |
963 | 963 | ||
964 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); | 964 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); |
965 | unsigned long max_tsc_khz; | ||
965 | 966 | ||
966 | static inline int kvm_tsc_changes_freq(void) | 967 | static inline int kvm_tsc_changes_freq(void) |
967 | { | 968 | { |
@@ -985,6 +986,24 @@ static inline u64 nsec_to_cycles(u64 nsec) | |||
985 | return ret; | 986 | return ret; |
986 | } | 987 | } |
987 | 988 | ||
989 | static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) | ||
990 | { | ||
991 | /* Compute a scale to convert nanoseconds in TSC cycles */ | ||
992 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | ||
993 | &kvm->arch.virtual_tsc_shift, | ||
994 | &kvm->arch.virtual_tsc_mult); | ||
995 | kvm->arch.virtual_tsc_khz = this_tsc_khz; | ||
996 | } | ||
997 | |||
998 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | ||
999 | { | ||
1000 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, | ||
1001 | vcpu->kvm->arch.virtual_tsc_mult, | ||
1002 | vcpu->kvm->arch.virtual_tsc_shift); | ||
1003 | tsc += vcpu->arch.last_tsc_write; | ||
1004 | return tsc; | ||
1005 | } | ||
1006 | |||
988 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | 1007 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) |
989 | { | 1008 | { |
990 | struct kvm *kvm = vcpu->kvm; | 1009 | struct kvm *kvm = vcpu->kvm; |
@@ -1029,6 +1048,8 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1029 | 1048 | ||
1030 | /* Reset of TSC must disable overshoot protection below */ | 1049 | /* Reset of TSC must disable overshoot protection below */ |
1031 | vcpu->arch.hv_clock.tsc_timestamp = 0; | 1050 | vcpu->arch.hv_clock.tsc_timestamp = 0; |
1051 | vcpu->arch.last_tsc_write = data; | ||
1052 | vcpu->arch.last_tsc_nsec = ns; | ||
1032 | } | 1053 | } |
1033 | EXPORT_SYMBOL_GPL(kvm_write_tsc); | 1054 | EXPORT_SYMBOL_GPL(kvm_write_tsc); |
1034 | 1055 | ||
@@ -1041,22 +1062,42 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1041 | s64 kernel_ns, max_kernel_ns; | 1062 | s64 kernel_ns, max_kernel_ns; |
1042 | u64 tsc_timestamp; | 1063 | u64 tsc_timestamp; |
1043 | 1064 | ||
1044 | if ((!vcpu->time_page)) | ||
1045 | return 0; | ||
1046 | |||
1047 | /* Keep irq disabled to prevent changes to the clock */ | 1065 | /* Keep irq disabled to prevent changes to the clock */ |
1048 | local_irq_save(flags); | 1066 | local_irq_save(flags); |
1049 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); | 1067 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); |
1050 | kernel_ns = get_kernel_ns(); | 1068 | kernel_ns = get_kernel_ns(); |
1051 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); | 1069 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); |
1052 | local_irq_restore(flags); | ||
1053 | 1070 | ||
1054 | if (unlikely(this_tsc_khz == 0)) { | 1071 | if (unlikely(this_tsc_khz == 0)) { |
1072 | local_irq_restore(flags); | ||
1055 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); | 1073 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); |
1056 | return 1; | 1074 | return 1; |
1057 | } | 1075 | } |
1058 | 1076 | ||
1059 | /* | 1077 | /* |
1078 | * We may have to catch up the TSC to match elapsed wall clock | ||
1079 | * time for two reasons, even if kvmclock is used. | ||
1080 | * 1) CPU could have been running below the maximum TSC rate | ||
1081 | * 2) Broken TSC compensation resets the base at each VCPU | ||
1082 | * entry to avoid unknown leaps of TSC even when running | ||
1083 | * again on the same CPU. This may cause apparent elapsed | ||
1084 | * time to disappear, and the guest to stand still or run | ||
1085 | * very slowly. | ||
1086 | */ | ||
1087 | if (vcpu->tsc_catchup) { | ||
1088 | u64 tsc = compute_guest_tsc(v, kernel_ns); | ||
1089 | if (tsc > tsc_timestamp) { | ||
1090 | kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp); | ||
1091 | tsc_timestamp = tsc; | ||
1092 | } | ||
1093 | } | ||
1094 | |||
1095 | local_irq_restore(flags); | ||
1096 | |||
1097 | if (!vcpu->time_page) | ||
1098 | return 0; | ||
1099 | |||
1100 | /* | ||
1060 | * Time as measured by the TSC may go backwards when resetting the base | 1101 | * Time as measured by the TSC may go backwards when resetting the base |
1061 | * tsc_timestamp. The reason for this is that the TSC resolution is | 1102 | * tsc_timestamp. The reason for this is that the TSC resolution is |
1062 | * higher than the resolution of the other clock scales. Thus, many | 1103 | * higher than the resolution of the other clock scales. Thus, many |
@@ -1122,16 +1163,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1122 | return 0; | 1163 | return 0; |
1123 | } | 1164 | } |
1124 | 1165 | ||
1125 | static int kvm_request_guest_time_update(struct kvm_vcpu *v) | ||
1126 | { | ||
1127 | struct kvm_vcpu_arch *vcpu = &v->arch; | ||
1128 | |||
1129 | if (!vcpu->time_page) | ||
1130 | return 0; | ||
1131 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); | ||
1132 | return 1; | ||
1133 | } | ||
1134 | |||
1135 | static bool msr_mtrr_valid(unsigned msr) | 1166 | static bool msr_mtrr_valid(unsigned msr) |
1136 | { | 1167 | { |
1137 | switch (msr) { | 1168 | switch (msr) { |
@@ -1455,6 +1486,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1455 | } | 1486 | } |
1456 | 1487 | ||
1457 | vcpu->arch.time = data; | 1488 | vcpu->arch.time = data; |
1489 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
1458 | 1490 | ||
1459 | /* we verify if the enable bit is set... */ | 1491 | /* we verify if the enable bit is set... */ |
1460 | if (!(data & 1)) | 1492 | if (!(data & 1)) |
@@ -1470,8 +1502,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1470 | kvm_release_page_clean(vcpu->arch.time_page); | 1502 | kvm_release_page_clean(vcpu->arch.time_page); |
1471 | vcpu->arch.time_page = NULL; | 1503 | vcpu->arch.time_page = NULL; |
1472 | } | 1504 | } |
1473 | |||
1474 | kvm_request_guest_time_update(vcpu); | ||
1475 | break; | 1505 | break; |
1476 | } | 1506 | } |
1477 | case MSR_IA32_MCG_CTL: | 1507 | case MSR_IA32_MCG_CTL: |
@@ -2028,9 +2058,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2028 | native_read_tsc() - vcpu->arch.last_host_tsc; | 2058 | native_read_tsc() - vcpu->arch.last_host_tsc; |
2029 | if (tsc_delta < 0) | 2059 | if (tsc_delta < 0) |
2030 | mark_tsc_unstable("KVM discovered backwards TSC"); | 2060 | mark_tsc_unstable("KVM discovered backwards TSC"); |
2031 | if (check_tsc_unstable()) | 2061 | if (check_tsc_unstable()) { |
2032 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); | 2062 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); |
2033 | kvm_migrate_timers(vcpu); | 2063 | vcpu->arch.tsc_catchup = 1; |
2064 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2065 | } | ||
2066 | if (vcpu->cpu != cpu) | ||
2067 | kvm_migrate_timers(vcpu); | ||
2034 | vcpu->cpu = cpu; | 2068 | vcpu->cpu = cpu; |
2035 | } | 2069 | } |
2036 | } | 2070 | } |
@@ -4461,8 +4495,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4461 | kvm_for_each_vcpu(i, vcpu, kvm) { | 4495 | kvm_for_each_vcpu(i, vcpu, kvm) { |
4462 | if (vcpu->cpu != freq->cpu) | 4496 | if (vcpu->cpu != freq->cpu) |
4463 | continue; | 4497 | continue; |
4464 | if (!kvm_request_guest_time_update(vcpu)) | 4498 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
4465 | continue; | ||
4466 | if (vcpu->cpu != smp_processor_id()) | 4499 | if (vcpu->cpu != smp_processor_id()) |
4467 | send_ipi = 1; | 4500 | send_ipi = 1; |
4468 | } | 4501 | } |
@@ -4517,11 +4550,20 @@ static void kvm_timer_init(void) | |||
4517 | { | 4550 | { |
4518 | int cpu; | 4551 | int cpu; |
4519 | 4552 | ||
4553 | max_tsc_khz = tsc_khz; | ||
4520 | register_hotcpu_notifier(&kvmclock_cpu_notifier_block); | 4554 | register_hotcpu_notifier(&kvmclock_cpu_notifier_block); |
4521 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | 4555 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { |
4556 | #ifdef CONFIG_CPU_FREQ | ||
4557 | struct cpufreq_policy policy; | ||
4558 | memset(&policy, 0, sizeof(policy)); | ||
4559 | cpufreq_get_policy(&policy, get_cpu()); | ||
4560 | if (policy.cpuinfo.max_freq) | ||
4561 | max_tsc_khz = policy.cpuinfo.max_freq; | ||
4562 | #endif | ||
4522 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | 4563 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, |
4523 | CPUFREQ_TRANSITION_NOTIFIER); | 4564 | CPUFREQ_TRANSITION_NOTIFIER); |
4524 | } | 4565 | } |
4566 | pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); | ||
4525 | for_each_online_cpu(cpu) | 4567 | for_each_online_cpu(cpu) |
4526 | smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); | 4568 | smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); |
4527 | } | 4569 | } |
@@ -5752,7 +5794,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
5752 | list_for_each_entry(kvm, &vm_list, vm_list) | 5794 | list_for_each_entry(kvm, &vm_list, vm_list) |
5753 | kvm_for_each_vcpu(i, vcpu, kvm) | 5795 | kvm_for_each_vcpu(i, vcpu, kvm) |
5754 | if (vcpu->cpu == smp_processor_id()) | 5796 | if (vcpu->cpu == smp_processor_id()) |
5755 | kvm_request_guest_time_update(vcpu); | 5797 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
5756 | return kvm_x86_ops->hardware_enable(garbage); | 5798 | return kvm_x86_ops->hardware_enable(garbage); |
5757 | } | 5799 | } |
5758 | 5800 | ||
@@ -5803,6 +5845,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5803 | } | 5845 | } |
5804 | vcpu->arch.pio_data = page_address(page); | 5846 | vcpu->arch.pio_data = page_address(page); |
5805 | 5847 | ||
5848 | if (!kvm->arch.virtual_tsc_khz) | ||
5849 | kvm_arch_set_tsc_khz(kvm, max_tsc_khz); | ||
5850 | |||
5806 | r = kvm_mmu_create(vcpu); | 5851 | r = kvm_mmu_create(vcpu); |
5807 | if (r < 0) | 5852 | if (r < 0) |
5808 | goto fail_free_pio_data; | 5853 | goto fail_free_pio_data; |