aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorGerd Hoffmann <kraxel@redhat.com>2009-02-04 11:52:04 -0500
committerAvi Kivity <avi@redhat.com>2009-03-24 05:03:09 -0400
commitc807660407a695f390034e402edfe544a1d2e40c (patch)
treef362e26ed5aee6458a0f84aa60f52dfb4ea6437e /arch/x86/kvm
parent49cd7d2238e44f7ee4269481cd8a1261cc8f93a5 (diff)
KVM: Fix kvmclock on !constant_tsc boxes
kvmclock currently falls apart on machines without constant tsc. This patch fixes it. Changes: * keep tsc frequency in a per-cpu variable. * handle kvmclock update using a new request flag, thus checking whenever we need an update each time we enter guest context. * use a cpufreq notifier to track frequency changes and force kvmclock updates. * send ipis to kick cpu out of guest context if needed to make sure the guest doesn't see stale values. Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/x86.c103
1 files changed, 94 insertions, 9 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f83590b47dd..05d7be89b5eb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
36#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/msr.h> 42#include <asm/msr.h>
@@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
617 hv_clock->tsc_to_system_mul); 618 hv_clock->tsc_to_system_mul);
618} 619}
619 620
621static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
622
620static void kvm_write_guest_time(struct kvm_vcpu *v) 623static void kvm_write_guest_time(struct kvm_vcpu *v)
621{ 624{
622 struct timespec ts; 625 struct timespec ts;
@@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
627 if ((!vcpu->time_page)) 630 if ((!vcpu->time_page))
628 return; 631 return;
629 632
630 if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { 633 if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
631 kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); 634 kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
632 vcpu->hv_clock_tsc_khz = tsc_khz; 635 vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
633 } 636 }
634 637
635 /* Keep irq disabled to prevent changes to the clock */ 638 /* Keep irq disabled to prevent changes to the clock */
@@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
660 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 663 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
661} 664}
662 665
666static int kvm_request_guest_time_update(struct kvm_vcpu *v)
667{
668 struct kvm_vcpu_arch *vcpu = &v->arch;
669
670 if (!vcpu->time_page)
671 return 0;
672 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
673 return 1;
674}
675
663static bool msr_mtrr_valid(unsigned msr) 676static bool msr_mtrr_valid(unsigned msr)
664{ 677{
665 switch (msr) { 678 switch (msr) {
@@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
790 vcpu->arch.time_page = NULL; 803 vcpu->arch.time_page = NULL;
791 } 804 }
792 805
793 kvm_write_guest_time(vcpu); 806 kvm_request_guest_time_update(vcpu);
794 break; 807 break;
795 } 808 }
796 default: 809 default:
@@ -1000,6 +1013,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1000 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 1013 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
1001 case KVM_CAP_SET_TSS_ADDR: 1014 case KVM_CAP_SET_TSS_ADDR:
1002 case KVM_CAP_EXT_CPUID: 1015 case KVM_CAP_EXT_CPUID:
1016 case KVM_CAP_CLOCKSOURCE:
1003 case KVM_CAP_PIT: 1017 case KVM_CAP_PIT:
1004 case KVM_CAP_NOP_IO_DELAY: 1018 case KVM_CAP_NOP_IO_DELAY:
1005 case KVM_CAP_MP_STATE: 1019 case KVM_CAP_MP_STATE:
@@ -1025,9 +1039,6 @@ int kvm_dev_ioctl_check_extension(long ext)
1025 case KVM_CAP_IOMMU: 1039 case KVM_CAP_IOMMU:
1026 r = iommu_found(); 1040 r = iommu_found();
1027 break; 1041 break;
1028 case KVM_CAP_CLOCKSOURCE:
1029 r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
1030 break;
1031 default: 1042 default:
1032 r = 0; 1043 r = 0;
1033 break; 1044 break;
@@ -1098,7 +1109,7 @@ out:
1098void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1109void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1099{ 1110{
1100 kvm_x86_ops->vcpu_load(vcpu, cpu); 1111 kvm_x86_ops->vcpu_load(vcpu, cpu);
1101 kvm_write_guest_time(vcpu); 1112 kvm_request_guest_time_update(vcpu);
1102} 1113}
1103 1114
1104void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1115void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2642,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2642} 2653}
2643EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 2654EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
2644 2655
2656static void bounce_off(void *info)
2657{
2658 /* nothing */
2659}
2660
2661static unsigned int ref_freq;
2662static unsigned long tsc_khz_ref;
2663
2664static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
2665 void *data)
2666{
2667 struct cpufreq_freqs *freq = data;
2668 struct kvm *kvm;
2669 struct kvm_vcpu *vcpu;
2670 int i, send_ipi = 0;
2671
2672 if (!ref_freq)
2673 ref_freq = freq->old;
2674
2675 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
2676 return 0;
2677 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
2678 return 0;
2679 per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
2680
2681 spin_lock(&kvm_lock);
2682 list_for_each_entry(kvm, &vm_list, vm_list) {
2683 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
2684 vcpu = kvm->vcpus[i];
2685 if (!vcpu)
2686 continue;
2687 if (vcpu->cpu != freq->cpu)
2688 continue;
2689 if (!kvm_request_guest_time_update(vcpu))
2690 continue;
2691 if (vcpu->cpu != smp_processor_id())
2692 send_ipi++;
2693 }
2694 }
2695 spin_unlock(&kvm_lock);
2696
2697 if (freq->old < freq->new && send_ipi) {
2698 /*
2699 * We upscale the frequency. Must make the guest
2700 * doesn't see old kvmclock values while running with
2701 * the new frequency, otherwise we risk the guest sees
2702 * time go backwards.
2703 *
2704 * In case we update the frequency for another cpu
2705 * (which might be in guest context) send an interrupt
2706 * to kick the cpu out of guest context. Next time
2707 * guest context is entered kvmclock will be updated,
2708 * so the guest will not see stale values.
2709 */
2710 smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
2711 }
2712 return 0;
2713}
2714
2715static struct notifier_block kvmclock_cpufreq_notifier_block = {
2716 .notifier_call = kvmclock_cpufreq_notifier
2717};
2718
2645int kvm_arch_init(void *opaque) 2719int kvm_arch_init(void *opaque)
2646{ 2720{
2647 int r; 2721 int r, cpu;
2648 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; 2722 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
2649 2723
2650 if (kvm_x86_ops) { 2724 if (kvm_x86_ops) {
@@ -2675,6 +2749,15 @@ int kvm_arch_init(void *opaque)
2675 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2749 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2676 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2750 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2677 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); 2751 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
2752
2753 for_each_possible_cpu(cpu)
2754 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
2755 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
2756 tsc_khz_ref = tsc_khz;
2757 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
2758 CPUFREQ_TRANSITION_NOTIFIER);
2759 }
2760
2678 return 0; 2761 return 0;
2679 2762
2680out: 2763out:
@@ -3010,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3010 if (vcpu->requests) { 3093 if (vcpu->requests) {
3011 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 3094 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
3012 __kvm_migrate_timers(vcpu); 3095 __kvm_migrate_timers(vcpu);
3096 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
3097 kvm_write_guest_time(vcpu);
3013 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) 3098 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
3014 kvm_mmu_sync_roots(vcpu); 3099 kvm_mmu_sync_roots(vcpu);
3015 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 3100 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))