diff options
author | Gerd Hoffmann <kraxel@redhat.com> | 2009-02-04 11:52:04 -0500 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2009-03-24 05:03:09 -0400 |
commit | c807660407a695f390034e402edfe544a1d2e40c (patch) | |
tree | f362e26ed5aee6458a0f84aa60f52dfb4ea6437e /arch/x86/kvm | |
parent | 49cd7d2238e44f7ee4269481cd8a1261cc8f93a5 (diff) |
KVM: Fix kvmclock on !constant_tsc boxes
kvmclock currently falls apart on machines without constant tsc.
This patch fixes it. Changes:
* keep tsc frequency in a per-cpu variable.
* handle kvmclock update using a new request flag, thus checking
whenever we need an update each time we enter guest context.
* use a cpufreq notifier to track frequency changes and force
kvmclock updates.
* send ipis to kick cpu out of guest context if needed to make
sure the guest doesn't see stale values.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/x86.c | 103 |
1 files changed, 94 insertions, 9 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f83590b47dd..05d7be89b5eb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/highmem.h> | 36 | #include <linux/highmem.h> |
37 | #include <linux/iommu.h> | 37 | #include <linux/iommu.h> |
38 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
39 | #include <linux/cpufreq.h> | ||
39 | 40 | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | #include <asm/msr.h> | 42 | #include <asm/msr.h> |
@@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info * | |||
617 | hv_clock->tsc_to_system_mul); | 618 | hv_clock->tsc_to_system_mul); |
618 | } | 619 | } |
619 | 620 | ||
621 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); | ||
622 | |||
620 | static void kvm_write_guest_time(struct kvm_vcpu *v) | 623 | static void kvm_write_guest_time(struct kvm_vcpu *v) |
621 | { | 624 | { |
622 | struct timespec ts; | 625 | struct timespec ts; |
@@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
627 | if ((!vcpu->time_page)) | 630 | if ((!vcpu->time_page)) |
628 | return; | 631 | return; |
629 | 632 | ||
630 | if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { | 633 | if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { |
631 | kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); | 634 | kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); |
632 | vcpu->hv_clock_tsc_khz = tsc_khz; | 635 | vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); |
633 | } | 636 | } |
634 | 637 | ||
635 | /* Keep irq disabled to prevent changes to the clock */ | 638 | /* Keep irq disabled to prevent changes to the clock */ |
@@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
660 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); | 663 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); |
661 | } | 664 | } |
662 | 665 | ||
666 | static int kvm_request_guest_time_update(struct kvm_vcpu *v) | ||
667 | { | ||
668 | struct kvm_vcpu_arch *vcpu = &v->arch; | ||
669 | |||
670 | if (!vcpu->time_page) | ||
671 | return 0; | ||
672 | set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); | ||
673 | return 1; | ||
674 | } | ||
675 | |||
663 | static bool msr_mtrr_valid(unsigned msr) | 676 | static bool msr_mtrr_valid(unsigned msr) |
664 | { | 677 | { |
665 | switch (msr) { | 678 | switch (msr) { |
@@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
790 | vcpu->arch.time_page = NULL; | 803 | vcpu->arch.time_page = NULL; |
791 | } | 804 | } |
792 | 805 | ||
793 | kvm_write_guest_time(vcpu); | 806 | kvm_request_guest_time_update(vcpu); |
794 | break; | 807 | break; |
795 | } | 808 | } |
796 | default: | 809 | default: |
@@ -1000,6 +1013,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1000 | case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: | 1013 | case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: |
1001 | case KVM_CAP_SET_TSS_ADDR: | 1014 | case KVM_CAP_SET_TSS_ADDR: |
1002 | case KVM_CAP_EXT_CPUID: | 1015 | case KVM_CAP_EXT_CPUID: |
1016 | case KVM_CAP_CLOCKSOURCE: | ||
1003 | case KVM_CAP_PIT: | 1017 | case KVM_CAP_PIT: |
1004 | case KVM_CAP_NOP_IO_DELAY: | 1018 | case KVM_CAP_NOP_IO_DELAY: |
1005 | case KVM_CAP_MP_STATE: | 1019 | case KVM_CAP_MP_STATE: |
@@ -1025,9 +1039,6 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1025 | case KVM_CAP_IOMMU: | 1039 | case KVM_CAP_IOMMU: |
1026 | r = iommu_found(); | 1040 | r = iommu_found(); |
1027 | break; | 1041 | break; |
1028 | case KVM_CAP_CLOCKSOURCE: | ||
1029 | r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); | ||
1030 | break; | ||
1031 | default: | 1042 | default: |
1032 | r = 0; | 1043 | r = 0; |
1033 | break; | 1044 | break; |
@@ -1098,7 +1109,7 @@ out: | |||
1098 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1109 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1099 | { | 1110 | { |
1100 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1111 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
1101 | kvm_write_guest_time(vcpu); | 1112 | kvm_request_guest_time_update(vcpu); |
1102 | } | 1113 | } |
1103 | 1114 | ||
1104 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1115 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
@@ -2642,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2642 | } | 2653 | } |
2643 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | 2654 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); |
2644 | 2655 | ||
2656 | static void bounce_off(void *info) | ||
2657 | { | ||
2658 | /* nothing */ | ||
2659 | } | ||
2660 | |||
2661 | static unsigned int ref_freq; | ||
2662 | static unsigned long tsc_khz_ref; | ||
2663 | |||
2664 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
2665 | void *data) | ||
2666 | { | ||
2667 | struct cpufreq_freqs *freq = data; | ||
2668 | struct kvm *kvm; | ||
2669 | struct kvm_vcpu *vcpu; | ||
2670 | int i, send_ipi = 0; | ||
2671 | |||
2672 | if (!ref_freq) | ||
2673 | ref_freq = freq->old; | ||
2674 | |||
2675 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) | ||
2676 | return 0; | ||
2677 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) | ||
2678 | return 0; | ||
2679 | per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | ||
2680 | |||
2681 | spin_lock(&kvm_lock); | ||
2682 | list_for_each_entry(kvm, &vm_list, vm_list) { | ||
2683 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
2684 | vcpu = kvm->vcpus[i]; | ||
2685 | if (!vcpu) | ||
2686 | continue; | ||
2687 | if (vcpu->cpu != freq->cpu) | ||
2688 | continue; | ||
2689 | if (!kvm_request_guest_time_update(vcpu)) | ||
2690 | continue; | ||
2691 | if (vcpu->cpu != smp_processor_id()) | ||
2692 | send_ipi++; | ||
2693 | } | ||
2694 | } | ||
2695 | spin_unlock(&kvm_lock); | ||
2696 | |||
2697 | if (freq->old < freq->new && send_ipi) { | ||
2698 | /* | ||
2699 | * We upscale the frequency. Must make the guest | ||
2700 | * doesn't see old kvmclock values while running with | ||
2701 | * the new frequency, otherwise we risk the guest sees | ||
2702 | * time go backwards. | ||
2703 | * | ||
2704 | * In case we update the frequency for another cpu | ||
2705 | * (which might be in guest context) send an interrupt | ||
2706 | * to kick the cpu out of guest context. Next time | ||
2707 | * guest context is entered kvmclock will be updated, | ||
2708 | * so the guest will not see stale values. | ||
2709 | */ | ||
2710 | smp_call_function_single(freq->cpu, bounce_off, NULL, 1); | ||
2711 | } | ||
2712 | return 0; | ||
2713 | } | ||
2714 | |||
2715 | static struct notifier_block kvmclock_cpufreq_notifier_block = { | ||
2716 | .notifier_call = kvmclock_cpufreq_notifier | ||
2717 | }; | ||
2718 | |||
2645 | int kvm_arch_init(void *opaque) | 2719 | int kvm_arch_init(void *opaque) |
2646 | { | 2720 | { |
2647 | int r; | 2721 | int r, cpu; |
2648 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | 2722 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; |
2649 | 2723 | ||
2650 | if (kvm_x86_ops) { | 2724 | if (kvm_x86_ops) { |
@@ -2675,6 +2749,15 @@ int kvm_arch_init(void *opaque) | |||
2675 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | 2749 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); |
2676 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 2750 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
2677 | PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); | 2751 | PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); |
2752 | |||
2753 | for_each_possible_cpu(cpu) | ||
2754 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
2755 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
2756 | tsc_khz_ref = tsc_khz; | ||
2757 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
2758 | CPUFREQ_TRANSITION_NOTIFIER); | ||
2759 | } | ||
2760 | |||
2678 | return 0; | 2761 | return 0; |
2679 | 2762 | ||
2680 | out: | 2763 | out: |
@@ -3010,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3010 | if (vcpu->requests) { | 3093 | if (vcpu->requests) { |
3011 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) | 3094 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) |
3012 | __kvm_migrate_timers(vcpu); | 3095 | __kvm_migrate_timers(vcpu); |
3096 | if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) | ||
3097 | kvm_write_guest_time(vcpu); | ||
3013 | if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) | 3098 | if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) |
3014 | kvm_mmu_sync_roots(vcpu); | 3099 | kvm_mmu_sync_roots(vcpu); |
3015 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 3100 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) |