diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 724 |
1 files changed, 488 insertions, 236 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7bddfab12013..d48ec60ea421 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
6 | * | 6 | * |
7 | * Copyright (C) 2006 Qumranet, Inc. | 7 | * Copyright (C) 2006 Qumranet, Inc. |
8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | 8 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. |
9 | * | 9 | * |
10 | * Authors: | 10 | * Authors: |
11 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
@@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); | |||
69 | static int __read_mostly vmm_exclusive = 1; | 69 | static int __read_mostly vmm_exclusive = 1; |
70 | module_param(vmm_exclusive, bool, S_IRUGO); | 70 | module_param(vmm_exclusive, bool, S_IRUGO); |
71 | 71 | ||
72 | static int __read_mostly yield_on_hlt = 1; | ||
73 | module_param(yield_on_hlt, bool, S_IRUGO); | ||
74 | |||
72 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 75 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
73 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 76 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) |
74 | #define KVM_GUEST_CR0_MASK \ | 77 | #define KVM_GUEST_CR0_MASK \ |
@@ -90,14 +93,14 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
90 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 93 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
91 | * ple_gap: upper bound on the amount of time between two successive | 94 | * ple_gap: upper bound on the amount of time between two successive |
92 | * executions of PAUSE in a loop. Also indicate if ple enabled. | 95 | * executions of PAUSE in a loop. Also indicate if ple enabled. |
93 | * According to test, this time is usually small than 41 cycles. | 96 | * According to test, this time is usually smaller than 128 cycles. |
94 | * ple_window: upper bound on the amount of time a guest is allowed to execute | 97 | * ple_window: upper bound on the amount of time a guest is allowed to execute |
95 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | 98 | * in a PAUSE loop. Tests indicate that most spinlocks are held for |
96 | * less than 2^12 cycles | 99 | * less than 2^12 cycles |
97 | * Time is measured based on a counter that runs at the same rate as the TSC, | 100 | * Time is measured based on a counter that runs at the same rate as the TSC, |
98 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | 101 | * refer SDM volume 3b section 21.6.13 & 22.1.3. |
99 | */ | 102 | */ |
100 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | 103 | #define KVM_VMX_DEFAULT_PLE_GAP 128 |
101 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | 104 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 |
102 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | 105 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; |
103 | module_param(ple_gap, int, S_IRUGO); | 106 | module_param(ple_gap, int, S_IRUGO); |
@@ -125,7 +128,11 @@ struct vcpu_vmx { | |||
125 | unsigned long host_rsp; | 128 | unsigned long host_rsp; |
126 | int launched; | 129 | int launched; |
127 | u8 fail; | 130 | u8 fail; |
131 | u8 cpl; | ||
132 | bool nmi_known_unmasked; | ||
133 | u32 exit_intr_info; | ||
128 | u32 idt_vectoring_info; | 134 | u32 idt_vectoring_info; |
135 | ulong rflags; | ||
129 | struct shared_msr_entry *guest_msrs; | 136 | struct shared_msr_entry *guest_msrs; |
130 | int nmsrs; | 137 | int nmsrs; |
131 | int save_nmsrs; | 138 | int save_nmsrs; |
@@ -154,12 +161,11 @@ struct vcpu_vmx { | |||
154 | u32 limit; | 161 | u32 limit; |
155 | u32 ar; | 162 | u32 ar; |
156 | } tr, es, ds, fs, gs; | 163 | } tr, es, ds, fs, gs; |
157 | struct { | ||
158 | bool pending; | ||
159 | u8 vector; | ||
160 | unsigned rip; | ||
161 | } irq; | ||
162 | } rmode; | 164 | } rmode; |
165 | struct { | ||
166 | u32 bitmask; /* 4 bits per segment (1 bit per field) */ | ||
167 | struct kvm_save_segment seg[8]; | ||
168 | } segment_cache; | ||
163 | int vpid; | 169 | int vpid; |
164 | bool emulation_required; | 170 | bool emulation_required; |
165 | 171 | ||
@@ -172,15 +178,25 @@ struct vcpu_vmx { | |||
172 | bool rdtscp_enabled; | 178 | bool rdtscp_enabled; |
173 | }; | 179 | }; |
174 | 180 | ||
181 | enum segment_cache_field { | ||
182 | SEG_FIELD_SEL = 0, | ||
183 | SEG_FIELD_BASE = 1, | ||
184 | SEG_FIELD_LIMIT = 2, | ||
185 | SEG_FIELD_AR = 3, | ||
186 | |||
187 | SEG_FIELD_NR = 4 | ||
188 | }; | ||
189 | |||
175 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 190 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
176 | { | 191 | { |
177 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 192 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
178 | } | 193 | } |
179 | 194 | ||
180 | static int init_rmode(struct kvm *kvm); | ||
181 | static u64 construct_eptp(unsigned long root_hpa); | 195 | static u64 construct_eptp(unsigned long root_hpa); |
182 | static void kvm_cpu_vmxon(u64 addr); | 196 | static void kvm_cpu_vmxon(u64 addr); |
183 | static void kvm_cpu_vmxoff(void); | 197 | static void kvm_cpu_vmxoff(void); |
198 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
199 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | ||
184 | 200 | ||
185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 201 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 202 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -192,6 +208,8 @@ static unsigned long *vmx_io_bitmap_b; | |||
192 | static unsigned long *vmx_msr_bitmap_legacy; | 208 | static unsigned long *vmx_msr_bitmap_legacy; |
193 | static unsigned long *vmx_msr_bitmap_longmode; | 209 | static unsigned long *vmx_msr_bitmap_longmode; |
194 | 210 | ||
211 | static bool cpu_has_load_ia32_efer; | ||
212 | |||
195 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); | 213 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); |
196 | static DEFINE_SPINLOCK(vmx_vpid_lock); | 214 | static DEFINE_SPINLOCK(vmx_vpid_lock); |
197 | 215 | ||
@@ -476,7 +494,7 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
476 | u8 error; | 494 | u8 error; |
477 | 495 | ||
478 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" | 496 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" |
479 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 497 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
480 | : "cc", "memory"); | 498 | : "cc", "memory"); |
481 | if (error) | 499 | if (error) |
482 | printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", | 500 | printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", |
@@ -489,7 +507,7 @@ static void vmcs_load(struct vmcs *vmcs) | |||
489 | u8 error; | 507 | u8 error; |
490 | 508 | ||
491 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | 509 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" |
492 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 510 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
493 | : "cc", "memory"); | 511 | : "cc", "memory"); |
494 | if (error) | 512 | if (error) |
495 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | 513 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", |
@@ -505,7 +523,6 @@ static void __vcpu_clear(void *arg) | |||
505 | vmcs_clear(vmx->vmcs); | 523 | vmcs_clear(vmx->vmcs); |
506 | if (per_cpu(current_vmcs, cpu) == vmx->vmcs) | 524 | if (per_cpu(current_vmcs, cpu) == vmx->vmcs) |
507 | per_cpu(current_vmcs, cpu) = NULL; | 525 | per_cpu(current_vmcs, cpu) = NULL; |
508 | rdtscll(vmx->vcpu.arch.host_tsc); | ||
509 | list_del(&vmx->local_vcpus_link); | 526 | list_del(&vmx->local_vcpus_link); |
510 | vmx->vcpu.cpu = -1; | 527 | vmx->vcpu.cpu = -1; |
511 | vmx->launched = 0; | 528 | vmx->launched = 0; |
@@ -570,10 +587,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | |||
570 | 587 | ||
571 | static unsigned long vmcs_readl(unsigned long field) | 588 | static unsigned long vmcs_readl(unsigned long field) |
572 | { | 589 | { |
573 | unsigned long value; | 590 | unsigned long value = 0; |
574 | 591 | ||
575 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) | 592 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) |
576 | : "=a"(value) : "d"(field) : "cc"); | 593 | : "+a"(value) : "d"(field) : "cc"); |
577 | return value; | 594 | return value; |
578 | } | 595 | } |
579 | 596 | ||
@@ -642,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) | |||
642 | vmcs_writel(field, vmcs_readl(field) | mask); | 659 | vmcs_writel(field, vmcs_readl(field) | mask); |
643 | } | 660 | } |
644 | 661 | ||
662 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) | ||
663 | { | ||
664 | vmx->segment_cache.bitmask = 0; | ||
665 | } | ||
666 | |||
667 | static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, | ||
668 | unsigned field) | ||
669 | { | ||
670 | bool ret; | ||
671 | u32 mask = 1 << (seg * SEG_FIELD_NR + field); | ||
672 | |||
673 | if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { | ||
674 | vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); | ||
675 | vmx->segment_cache.bitmask = 0; | ||
676 | } | ||
677 | ret = vmx->segment_cache.bitmask & mask; | ||
678 | vmx->segment_cache.bitmask |= mask; | ||
679 | return ret; | ||
680 | } | ||
681 | |||
682 | static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) | ||
683 | { | ||
684 | u16 *p = &vmx->segment_cache.seg[seg].selector; | ||
685 | |||
686 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) | ||
687 | *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); | ||
688 | return *p; | ||
689 | } | ||
690 | |||
691 | static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) | ||
692 | { | ||
693 | ulong *p = &vmx->segment_cache.seg[seg].base; | ||
694 | |||
695 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) | ||
696 | *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); | ||
697 | return *p; | ||
698 | } | ||
699 | |||
700 | static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) | ||
701 | { | ||
702 | u32 *p = &vmx->segment_cache.seg[seg].limit; | ||
703 | |||
704 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) | ||
705 | *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); | ||
706 | return *p; | ||
707 | } | ||
708 | |||
709 | static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) | ||
710 | { | ||
711 | u32 *p = &vmx->segment_cache.seg[seg].ar; | ||
712 | |||
713 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) | ||
714 | *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); | ||
715 | return *p; | ||
716 | } | ||
717 | |||
645 | static void update_exception_bitmap(struct kvm_vcpu *vcpu) | 718 | static void update_exception_bitmap(struct kvm_vcpu *vcpu) |
646 | { | 719 | { |
647 | u32 eb; | 720 | u32 eb; |
@@ -666,6 +739,12 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
666 | unsigned i; | 739 | unsigned i; |
667 | struct msr_autoload *m = &vmx->msr_autoload; | 740 | struct msr_autoload *m = &vmx->msr_autoload; |
668 | 741 | ||
742 | if (msr == MSR_EFER && cpu_has_load_ia32_efer) { | ||
743 | vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); | ||
744 | vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); | ||
745 | return; | ||
746 | } | ||
747 | |||
669 | for (i = 0; i < m->nr; ++i) | 748 | for (i = 0; i < m->nr; ++i) |
670 | if (m->guest[i].index == msr) | 749 | if (m->guest[i].index == msr) |
671 | break; | 750 | break; |
@@ -685,6 +764,14 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
685 | unsigned i; | 764 | unsigned i; |
686 | struct msr_autoload *m = &vmx->msr_autoload; | 765 | struct msr_autoload *m = &vmx->msr_autoload; |
687 | 766 | ||
767 | if (msr == MSR_EFER && cpu_has_load_ia32_efer) { | ||
768 | vmcs_write64(GUEST_IA32_EFER, guest_val); | ||
769 | vmcs_write64(HOST_IA32_EFER, host_val); | ||
770 | vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); | ||
771 | vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); | ||
772 | return; | ||
773 | } | ||
774 | |||
688 | for (i = 0; i < m->nr; ++i) | 775 | for (i = 0; i < m->nr; ++i) |
689 | if (m->guest[i].index == msr) | 776 | if (m->guest[i].index == msr) |
690 | break; | 777 | break; |
@@ -706,11 +793,10 @@ static void reload_tss(void) | |||
706 | /* | 793 | /* |
707 | * VT restores TR but not its size. Useless. | 794 | * VT restores TR but not its size. Useless. |
708 | */ | 795 | */ |
709 | struct desc_ptr gdt; | 796 | struct desc_ptr *gdt = &__get_cpu_var(host_gdt); |
710 | struct desc_struct *descs; | 797 | struct desc_struct *descs; |
711 | 798 | ||
712 | native_store_gdt(&gdt); | 799 | descs = (void *)gdt->address; |
713 | descs = (void *)gdt.address; | ||
714 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | 800 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ |
715 | load_TR_desc(); | 801 | load_TR_desc(); |
716 | } | 802 | } |
@@ -753,7 +839,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
753 | 839 | ||
754 | static unsigned long segment_base(u16 selector) | 840 | static unsigned long segment_base(u16 selector) |
755 | { | 841 | { |
756 | struct desc_ptr gdt; | 842 | struct desc_ptr *gdt = &__get_cpu_var(host_gdt); |
757 | struct desc_struct *d; | 843 | struct desc_struct *d; |
758 | unsigned long table_base; | 844 | unsigned long table_base; |
759 | unsigned long v; | 845 | unsigned long v; |
@@ -761,8 +847,7 @@ static unsigned long segment_base(u16 selector) | |||
761 | if (!(selector & ~3)) | 847 | if (!(selector & ~3)) |
762 | return 0; | 848 | return 0; |
763 | 849 | ||
764 | native_store_gdt(&gdt); | 850 | table_base = gdt->address; |
765 | table_base = gdt.address; | ||
766 | 851 | ||
767 | if (selector & 4) { /* from ldt */ | 852 | if (selector & 4) { /* from ldt */ |
768 | u16 ldt_selector = kvm_read_ldt(); | 853 | u16 ldt_selector = kvm_read_ldt(); |
@@ -828,10 +913,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
828 | #endif | 913 | #endif |
829 | 914 | ||
830 | #ifdef CONFIG_X86_64 | 915 | #ifdef CONFIG_X86_64 |
831 | if (is_long_mode(&vmx->vcpu)) { | 916 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
832 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 917 | if (is_long_mode(&vmx->vcpu)) |
833 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 918 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
834 | } | ||
835 | #endif | 919 | #endif |
836 | for (i = 0; i < vmx->save_nmsrs; ++i) | 920 | for (i = 0; i < vmx->save_nmsrs; ++i) |
837 | kvm_set_shared_msr(vmx->guest_msrs[i].index, | 921 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
@@ -846,23 +930,23 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
846 | 930 | ||
847 | ++vmx->vcpu.stat.host_state_reload; | 931 | ++vmx->vcpu.stat.host_state_reload; |
848 | vmx->host_state.loaded = 0; | 932 | vmx->host_state.loaded = 0; |
849 | if (vmx->host_state.fs_reload_needed) | 933 | #ifdef CONFIG_X86_64 |
850 | loadsegment(fs, vmx->host_state.fs_sel); | 934 | if (is_long_mode(&vmx->vcpu)) |
935 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | ||
936 | #endif | ||
851 | if (vmx->host_state.gs_ldt_reload_needed) { | 937 | if (vmx->host_state.gs_ldt_reload_needed) { |
852 | kvm_load_ldt(vmx->host_state.ldt_sel); | 938 | kvm_load_ldt(vmx->host_state.ldt_sel); |
853 | #ifdef CONFIG_X86_64 | 939 | #ifdef CONFIG_X86_64 |
854 | load_gs_index(vmx->host_state.gs_sel); | 940 | load_gs_index(vmx->host_state.gs_sel); |
855 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
856 | #else | 941 | #else |
857 | loadsegment(gs, vmx->host_state.gs_sel); | 942 | loadsegment(gs, vmx->host_state.gs_sel); |
858 | #endif | 943 | #endif |
859 | } | 944 | } |
945 | if (vmx->host_state.fs_reload_needed) | ||
946 | loadsegment(fs, vmx->host_state.fs_sel); | ||
860 | reload_tss(); | 947 | reload_tss(); |
861 | #ifdef CONFIG_X86_64 | 948 | #ifdef CONFIG_X86_64 |
862 | if (is_long_mode(&vmx->vcpu)) { | 949 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
863 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | ||
864 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
865 | } | ||
866 | #endif | 950 | #endif |
867 | if (current_thread_info()->status & TS_USEDFPU) | 951 | if (current_thread_info()->status & TS_USEDFPU) |
868 | clts(); | 952 | clts(); |
@@ -883,7 +967,6 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) | |||
883 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 967 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
884 | { | 968 | { |
885 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 969 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
886 | u64 tsc_this, delta, new_offset; | ||
887 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 970 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
888 | 971 | ||
889 | if (!vmm_exclusive) | 972 | if (!vmm_exclusive) |
@@ -897,37 +980,24 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
897 | } | 980 | } |
898 | 981 | ||
899 | if (vcpu->cpu != cpu) { | 982 | if (vcpu->cpu != cpu) { |
900 | struct desc_ptr dt; | 983 | struct desc_ptr *gdt = &__get_cpu_var(host_gdt); |
901 | unsigned long sysenter_esp; | 984 | unsigned long sysenter_esp; |
902 | 985 | ||
903 | kvm_migrate_timers(vcpu); | ||
904 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 986 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
905 | local_irq_disable(); | 987 | local_irq_disable(); |
906 | list_add(&vmx->local_vcpus_link, | 988 | list_add(&vmx->local_vcpus_link, |
907 | &per_cpu(vcpus_on_cpu, cpu)); | 989 | &per_cpu(vcpus_on_cpu, cpu)); |
908 | local_irq_enable(); | 990 | local_irq_enable(); |
909 | 991 | ||
910 | vcpu->cpu = cpu; | ||
911 | /* | 992 | /* |
912 | * Linux uses per-cpu TSS and GDT, so set these when switching | 993 | * Linux uses per-cpu TSS and GDT, so set these when switching |
913 | * processors. | 994 | * processors. |
914 | */ | 995 | */ |
915 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ | 996 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ |
916 | native_store_gdt(&dt); | 997 | vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */ |
917 | vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ | ||
918 | 998 | ||
919 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 999 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
920 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 1000 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
921 | |||
922 | /* | ||
923 | * Make sure the time stamp counter is monotonous. | ||
924 | */ | ||
925 | rdtscll(tsc_this); | ||
926 | if (tsc_this < vcpu->arch.host_tsc) { | ||
927 | delta = vcpu->arch.host_tsc - tsc_this; | ||
928 | new_offset = vmcs_read64(TSC_OFFSET) + delta; | ||
929 | vmcs_write64(TSC_OFFSET, new_offset); | ||
930 | } | ||
931 | } | 1001 | } |
932 | } | 1002 | } |
933 | 1003 | ||
@@ -972,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
972 | { | 1042 | { |
973 | unsigned long rflags, save_rflags; | 1043 | unsigned long rflags, save_rflags; |
974 | 1044 | ||
975 | rflags = vmcs_readl(GUEST_RFLAGS); | 1045 | if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { |
976 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1046 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); |
977 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; | 1047 | rflags = vmcs_readl(GUEST_RFLAGS); |
978 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | 1048 | if (to_vmx(vcpu)->rmode.vm86_active) { |
979 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | 1049 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
1050 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | ||
1051 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | ||
1052 | } | ||
1053 | to_vmx(vcpu)->rflags = rflags; | ||
980 | } | 1054 | } |
981 | return rflags; | 1055 | return to_vmx(vcpu)->rflags; |
982 | } | 1056 | } |
983 | 1057 | ||
984 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1058 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
985 | { | 1059 | { |
1060 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); | ||
1061 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
1062 | to_vmx(vcpu)->rflags = rflags; | ||
986 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1063 | if (to_vmx(vcpu)->rmode.vm86_active) { |
987 | to_vmx(vcpu)->rmode.save_rflags = rflags; | 1064 | to_vmx(vcpu)->rmode.save_rflags = rflags; |
988 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 1065 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
@@ -1031,6 +1108,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
1031 | vmx_set_interrupt_shadow(vcpu, 0); | 1108 | vmx_set_interrupt_shadow(vcpu, 0); |
1032 | } | 1109 | } |
1033 | 1110 | ||
1111 | static void vmx_clear_hlt(struct kvm_vcpu *vcpu) | ||
1112 | { | ||
1113 | /* Ensure that we clear the HLT state in the VMCS. We don't need to | ||
1114 | * explicitly skip the instruction because if the HLT state is set, then | ||
1115 | * the instruction is already executing and RIP has already been | ||
1116 | * advanced. */ | ||
1117 | if (!yield_on_hlt && | ||
1118 | vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) | ||
1119 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); | ||
1120 | } | ||
1121 | |||
1034 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 1122 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
1035 | bool has_error_code, u32 error_code, | 1123 | bool has_error_code, u32 error_code, |
1036 | bool reinject) | 1124 | bool reinject) |
@@ -1044,16 +1132,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1044 | } | 1132 | } |
1045 | 1133 | ||
1046 | if (vmx->rmode.vm86_active) { | 1134 | if (vmx->rmode.vm86_active) { |
1047 | vmx->rmode.irq.pending = true; | 1135 | int inc_eip = 0; |
1048 | vmx->rmode.irq.vector = nr; | ||
1049 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | ||
1050 | if (kvm_exception_is_soft(nr)) | 1136 | if (kvm_exception_is_soft(nr)) |
1051 | vmx->rmode.irq.rip += | 1137 | inc_eip = vcpu->arch.event_exit_inst_len; |
1052 | vmx->vcpu.arch.event_exit_inst_len; | 1138 | if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) |
1053 | intr_info |= INTR_TYPE_SOFT_INTR; | 1139 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
1054 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | ||
1055 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | ||
1056 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | ||
1057 | return; | 1140 | return; |
1058 | } | 1141 | } |
1059 | 1142 | ||
@@ -1065,6 +1148,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1065 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 1148 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
1066 | 1149 | ||
1067 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 1150 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
1151 | vmx_clear_hlt(vcpu); | ||
1068 | } | 1152 | } |
1069 | 1153 | ||
1070 | static bool vmx_rdtscp_supported(void) | 1154 | static bool vmx_rdtscp_supported(void) |
@@ -1149,12 +1233,32 @@ static u64 guest_read_tsc(void) | |||
1149 | } | 1233 | } |
1150 | 1234 | ||
1151 | /* | 1235 | /* |
1152 | * writes 'guest_tsc' into guest's timestamp counter "register" | 1236 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ |
1153 | * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc | 1237 | * ioctl. In this case the call-back should update internal vmx state to make |
1238 | * the changes effective. | ||
1239 | */ | ||
1240 | static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | ||
1241 | { | ||
1242 | /* Nothing to do here */ | ||
1243 | } | ||
1244 | |||
1245 | /* | ||
1246 | * writes 'offset' into guest's timestamp counter offset register | ||
1154 | */ | 1247 | */ |
1155 | static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) | 1248 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
1156 | { | 1249 | { |
1157 | vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); | 1250 | vmcs_write64(TSC_OFFSET, offset); |
1251 | } | ||
1252 | |||
1253 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | ||
1254 | { | ||
1255 | u64 offset = vmcs_read64(TSC_OFFSET); | ||
1256 | vmcs_write64(TSC_OFFSET, offset + adjustment); | ||
1257 | } | ||
1258 | |||
1259 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | ||
1260 | { | ||
1261 | return target_tsc - native_read_tsc(); | ||
1158 | } | 1262 | } |
1159 | 1263 | ||
1160 | /* | 1264 | /* |
@@ -1227,7 +1331,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1227 | { | 1331 | { |
1228 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1332 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1229 | struct shared_msr_entry *msr; | 1333 | struct shared_msr_entry *msr; |
1230 | u64 host_tsc; | ||
1231 | int ret = 0; | 1334 | int ret = 0; |
1232 | 1335 | ||
1233 | switch (msr_index) { | 1336 | switch (msr_index) { |
@@ -1237,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1237 | break; | 1340 | break; |
1238 | #ifdef CONFIG_X86_64 | 1341 | #ifdef CONFIG_X86_64 |
1239 | case MSR_FS_BASE: | 1342 | case MSR_FS_BASE: |
1343 | vmx_segment_cache_clear(vmx); | ||
1240 | vmcs_writel(GUEST_FS_BASE, data); | 1344 | vmcs_writel(GUEST_FS_BASE, data); |
1241 | break; | 1345 | break; |
1242 | case MSR_GS_BASE: | 1346 | case MSR_GS_BASE: |
1347 | vmx_segment_cache_clear(vmx); | ||
1243 | vmcs_writel(GUEST_GS_BASE, data); | 1348 | vmcs_writel(GUEST_GS_BASE, data); |
1244 | break; | 1349 | break; |
1245 | case MSR_KERNEL_GS_BASE: | 1350 | case MSR_KERNEL_GS_BASE: |
@@ -1257,8 +1362,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1257 | vmcs_writel(GUEST_SYSENTER_ESP, data); | 1362 | vmcs_writel(GUEST_SYSENTER_ESP, data); |
1258 | break; | 1363 | break; |
1259 | case MSR_IA32_TSC: | 1364 | case MSR_IA32_TSC: |
1260 | rdtscll(host_tsc); | 1365 | kvm_write_tsc(vcpu, data); |
1261 | guest_write_tsc(data, host_tsc); | ||
1262 | break; | 1366 | break; |
1263 | case MSR_IA32_CR_PAT: | 1367 | case MSR_IA32_CR_PAT: |
1264 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | 1368 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
@@ -1328,16 +1432,25 @@ static __init int vmx_disabled_by_bios(void) | |||
1328 | 1432 | ||
1329 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1433 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
1330 | if (msr & FEATURE_CONTROL_LOCKED) { | 1434 | if (msr & FEATURE_CONTROL_LOCKED) { |
1435 | /* launched w/ TXT and VMX disabled */ | ||
1331 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) | 1436 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
1332 | && tboot_enabled()) | 1437 | && tboot_enabled()) |
1333 | return 1; | 1438 | return 1; |
1439 | /* launched w/o TXT and VMX only enabled w/ TXT */ | ||
1440 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
1441 | && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) | ||
1442 | && !tboot_enabled()) { | ||
1443 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " | ||
1444 | "activate TXT before enabling KVM\n"); | ||
1445 | return 1; | ||
1446 | } | ||
1447 | /* launched w/o TXT and VMX disabled */ | ||
1334 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | 1448 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) |
1335 | && !tboot_enabled()) | 1449 | && !tboot_enabled()) |
1336 | return 1; | 1450 | return 1; |
1337 | } | 1451 | } |
1338 | 1452 | ||
1339 | return 0; | 1453 | return 0; |
1340 | /* locked but not enabled */ | ||
1341 | } | 1454 | } |
1342 | 1455 | ||
1343 | static void kvm_cpu_vmxon(u64 addr) | 1456 | static void kvm_cpu_vmxon(u64 addr) |
@@ -1427,6 +1540,14 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | |||
1427 | return 0; | 1540 | return 0; |
1428 | } | 1541 | } |
1429 | 1542 | ||
1543 | static __init bool allow_1_setting(u32 msr, u32 ctl) | ||
1544 | { | ||
1545 | u32 vmx_msr_low, vmx_msr_high; | ||
1546 | |||
1547 | rdmsr(msr, vmx_msr_low, vmx_msr_high); | ||
1548 | return vmx_msr_high & ctl; | ||
1549 | } | ||
1550 | |||
1430 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | 1551 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) |
1431 | { | 1552 | { |
1432 | u32 vmx_msr_low, vmx_msr_high; | 1553 | u32 vmx_msr_low, vmx_msr_high; |
@@ -1443,7 +1564,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1443 | &_pin_based_exec_control) < 0) | 1564 | &_pin_based_exec_control) < 0) |
1444 | return -EIO; | 1565 | return -EIO; |
1445 | 1566 | ||
1446 | min = CPU_BASED_HLT_EXITING | | 1567 | min = |
1447 | #ifdef CONFIG_X86_64 | 1568 | #ifdef CONFIG_X86_64 |
1448 | CPU_BASED_CR8_LOAD_EXITING | | 1569 | CPU_BASED_CR8_LOAD_EXITING | |
1449 | CPU_BASED_CR8_STORE_EXITING | | 1570 | CPU_BASED_CR8_STORE_EXITING | |
@@ -1456,6 +1577,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1456 | CPU_BASED_MWAIT_EXITING | | 1577 | CPU_BASED_MWAIT_EXITING | |
1457 | CPU_BASED_MONITOR_EXITING | | 1578 | CPU_BASED_MONITOR_EXITING | |
1458 | CPU_BASED_INVLPG_EXITING; | 1579 | CPU_BASED_INVLPG_EXITING; |
1580 | |||
1581 | if (yield_on_hlt) | ||
1582 | min |= CPU_BASED_HLT_EXITING; | ||
1583 | |||
1459 | opt = CPU_BASED_TPR_SHADOW | | 1584 | opt = CPU_BASED_TPR_SHADOW | |
1460 | CPU_BASED_USE_MSR_BITMAPS | | 1585 | CPU_BASED_USE_MSR_BITMAPS | |
1461 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1586 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -1537,6 +1662,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1537 | vmcs_conf->vmexit_ctrl = _vmexit_control; | 1662 | vmcs_conf->vmexit_ctrl = _vmexit_control; |
1538 | vmcs_conf->vmentry_ctrl = _vmentry_control; | 1663 | vmcs_conf->vmentry_ctrl = _vmentry_control; |
1539 | 1664 | ||
1665 | cpu_has_load_ia32_efer = | ||
1666 | allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, | ||
1667 | VM_ENTRY_LOAD_IA32_EFER) | ||
1668 | && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS, | ||
1669 | VM_EXIT_LOAD_IA32_EFER); | ||
1670 | |||
1540 | return 0; | 1671 | return 0; |
1541 | } | 1672 | } |
1542 | 1673 | ||
@@ -1657,6 +1788,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1657 | vmx->emulation_required = 1; | 1788 | vmx->emulation_required = 1; |
1658 | vmx->rmode.vm86_active = 0; | 1789 | vmx->rmode.vm86_active = 0; |
1659 | 1790 | ||
1791 | vmx_segment_cache_clear(vmx); | ||
1792 | |||
1793 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); | ||
1660 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); | 1794 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); |
1661 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); | 1795 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); |
1662 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | 1796 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
@@ -1679,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1679 | fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); | 1813 | fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); |
1680 | fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); | 1814 | fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); |
1681 | 1815 | ||
1816 | vmx_segment_cache_clear(vmx); | ||
1817 | |||
1682 | vmcs_write16(GUEST_SS_SELECTOR, 0); | 1818 | vmcs_write16(GUEST_SS_SELECTOR, 0); |
1683 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); | 1819 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); |
1684 | 1820 | ||
@@ -1710,9 +1846,13 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) | |||
1710 | save->limit = vmcs_read32(sf->limit); | 1846 | save->limit = vmcs_read32(sf->limit); |
1711 | save->ar = vmcs_read32(sf->ar_bytes); | 1847 | save->ar = vmcs_read32(sf->ar_bytes); |
1712 | vmcs_write16(sf->selector, save->base >> 4); | 1848 | vmcs_write16(sf->selector, save->base >> 4); |
1713 | vmcs_write32(sf->base, save->base & 0xfffff); | 1849 | vmcs_write32(sf->base, save->base & 0xffff0); |
1714 | vmcs_write32(sf->limit, 0xffff); | 1850 | vmcs_write32(sf->limit, 0xffff); |
1715 | vmcs_write32(sf->ar_bytes, 0xf3); | 1851 | vmcs_write32(sf->ar_bytes, 0xf3); |
1852 | if (save->base & 0xf) | ||
1853 | printk_once(KERN_WARNING "kvm: segment base is not paragraph" | ||
1854 | " aligned when entering protected mode (seg=%d)", | ||
1855 | seg); | ||
1716 | } | 1856 | } |
1717 | 1857 | ||
1718 | static void enter_rmode(struct kvm_vcpu *vcpu) | 1858 | static void enter_rmode(struct kvm_vcpu *vcpu) |
@@ -1726,6 +1866,21 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1726 | vmx->emulation_required = 1; | 1866 | vmx->emulation_required = 1; |
1727 | vmx->rmode.vm86_active = 1; | 1867 | vmx->rmode.vm86_active = 1; |
1728 | 1868 | ||
1869 | /* | ||
1870 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | ||
1871 | * vcpu. Call it here with phys address pointing 16M below 4G. | ||
1872 | */ | ||
1873 | if (!vcpu->kvm->arch.tss_addr) { | ||
1874 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " | ||
1875 | "called before entering vcpu\n"); | ||
1876 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
1877 | vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); | ||
1878 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
1879 | } | ||
1880 | |||
1881 | vmx_segment_cache_clear(vmx); | ||
1882 | |||
1883 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); | ||
1729 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1884 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
1730 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1885 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
1731 | 1886 | ||
@@ -1764,7 +1919,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1764 | 1919 | ||
1765 | continue_rmode: | 1920 | continue_rmode: |
1766 | kvm_mmu_reset_context(vcpu); | 1921 | kvm_mmu_reset_context(vcpu); |
1767 | init_rmode(vcpu->kvm); | ||
1768 | } | 1922 | } |
1769 | 1923 | ||
1770 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1924 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
@@ -1802,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1802 | { | 1956 | { |
1803 | u32 guest_tr_ar; | 1957 | u32 guest_tr_ar; |
1804 | 1958 | ||
1959 | vmx_segment_cache_clear(to_vmx(vcpu)); | ||
1960 | |||
1805 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); | 1961 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); |
1806 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { | 1962 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { |
1807 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", | 1963 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", |
@@ -1841,6 +1997,13 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | |||
1841 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | 1997 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; |
1842 | } | 1998 | } |
1843 | 1999 | ||
2000 | static void vmx_decache_cr3(struct kvm_vcpu *vcpu) | ||
2001 | { | ||
2002 | if (enable_ept && is_paging(vcpu)) | ||
2003 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
2004 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
2005 | } | ||
2006 | |||
1844 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 2007 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1845 | { | 2008 | { |
1846 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; | 2009 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; |
@@ -1856,20 +2019,20 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | |||
1856 | return; | 2019 | return; |
1857 | 2020 | ||
1858 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | 2021 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { |
1859 | vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); | 2022 | vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]); |
1860 | vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); | 2023 | vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]); |
1861 | vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); | 2024 | vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]); |
1862 | vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); | 2025 | vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]); |
1863 | } | 2026 | } |
1864 | } | 2027 | } |
1865 | 2028 | ||
1866 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu) | 2029 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu) |
1867 | { | 2030 | { |
1868 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | 2031 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { |
1869 | vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); | 2032 | vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); |
1870 | vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); | 2033 | vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); |
1871 | vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); | 2034 | vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); |
1872 | vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); | 2035 | vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); |
1873 | } | 2036 | } |
1874 | 2037 | ||
1875 | __set_bit(VCPU_EXREG_PDPTR, | 2038 | __set_bit(VCPU_EXREG_PDPTR, |
@@ -1884,6 +2047,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1884 | unsigned long cr0, | 2047 | unsigned long cr0, |
1885 | struct kvm_vcpu *vcpu) | 2048 | struct kvm_vcpu *vcpu) |
1886 | { | 2049 | { |
2050 | if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) | ||
2051 | vmx_decache_cr3(vcpu); | ||
1887 | if (!(cr0 & X86_CR0_PG)) { | 2052 | if (!(cr0 & X86_CR0_PG)) { |
1888 | /* From paging/starting to nonpaging */ | 2053 | /* From paging/starting to nonpaging */ |
1889 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 2054 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1941,6 +2106,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1941 | vmcs_writel(CR0_READ_SHADOW, cr0); | 2106 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1942 | vmcs_writel(GUEST_CR0, hw_cr0); | 2107 | vmcs_writel(GUEST_CR0, hw_cr0); |
1943 | vcpu->arch.cr0 = cr0; | 2108 | vcpu->arch.cr0 = cr0; |
2109 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
1944 | } | 2110 | } |
1945 | 2111 | ||
1946 | static u64 construct_eptp(unsigned long root_hpa) | 2112 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -1964,7 +2130,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1964 | if (enable_ept) { | 2130 | if (enable_ept) { |
1965 | eptp = construct_eptp(cr3); | 2131 | eptp = construct_eptp(cr3); |
1966 | vmcs_write64(EPT_POINTER, eptp); | 2132 | vmcs_write64(EPT_POINTER, eptp); |
1967 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 2133 | guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : |
1968 | vcpu->kvm->arch.ept_identity_map_addr; | 2134 | vcpu->kvm->arch.ept_identity_map_addr; |
1969 | ept_load_pdptrs(vcpu); | 2135 | ept_load_pdptrs(vcpu); |
1970 | } | 2136 | } |
@@ -1992,23 +2158,39 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1992 | vmcs_writel(GUEST_CR4, hw_cr4); | 2158 | vmcs_writel(GUEST_CR4, hw_cr4); |
1993 | } | 2159 | } |
1994 | 2160 | ||
1995 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | ||
1996 | { | ||
1997 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
1998 | |||
1999 | return vmcs_readl(sf->base); | ||
2000 | } | ||
2001 | |||
2002 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | 2161 | static void vmx_get_segment(struct kvm_vcpu *vcpu, |
2003 | struct kvm_segment *var, int seg) | 2162 | struct kvm_segment *var, int seg) |
2004 | { | 2163 | { |
2005 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2164 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2165 | struct kvm_save_segment *save; | ||
2006 | u32 ar; | 2166 | u32 ar; |
2007 | 2167 | ||
2008 | var->base = vmcs_readl(sf->base); | 2168 | if (vmx->rmode.vm86_active |
2009 | var->limit = vmcs_read32(sf->limit); | 2169 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES |
2010 | var->selector = vmcs_read16(sf->selector); | 2170 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS |
2011 | ar = vmcs_read32(sf->ar_bytes); | 2171 | || seg == VCPU_SREG_GS) |
2172 | && !emulate_invalid_guest_state) { | ||
2173 | switch (seg) { | ||
2174 | case VCPU_SREG_TR: save = &vmx->rmode.tr; break; | ||
2175 | case VCPU_SREG_ES: save = &vmx->rmode.es; break; | ||
2176 | case VCPU_SREG_DS: save = &vmx->rmode.ds; break; | ||
2177 | case VCPU_SREG_FS: save = &vmx->rmode.fs; break; | ||
2178 | case VCPU_SREG_GS: save = &vmx->rmode.gs; break; | ||
2179 | default: BUG(); | ||
2180 | } | ||
2181 | var->selector = save->selector; | ||
2182 | var->base = save->base; | ||
2183 | var->limit = save->limit; | ||
2184 | ar = save->ar; | ||
2185 | if (seg == VCPU_SREG_TR | ||
2186 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) | ||
2187 | goto use_saved_rmode_seg; | ||
2188 | } | ||
2189 | var->base = vmx_read_guest_seg_base(vmx, seg); | ||
2190 | var->limit = vmx_read_guest_seg_limit(vmx, seg); | ||
2191 | var->selector = vmx_read_guest_seg_selector(vmx, seg); | ||
2192 | ar = vmx_read_guest_seg_ar(vmx, seg); | ||
2193 | use_saved_rmode_seg: | ||
2012 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | 2194 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) |
2013 | ar = 0; | 2195 | ar = 0; |
2014 | var->type = ar & 15; | 2196 | var->type = ar & 15; |
@@ -2022,17 +2204,39 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
2022 | var->unusable = (ar >> 16) & 1; | 2204 | var->unusable = (ar >> 16) & 1; |
2023 | } | 2205 | } |
2024 | 2206 | ||
2025 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 2207 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) |
2208 | { | ||
2209 | struct kvm_segment s; | ||
2210 | |||
2211 | if (to_vmx(vcpu)->rmode.vm86_active) { | ||
2212 | vmx_get_segment(vcpu, &s, seg); | ||
2213 | return s.base; | ||
2214 | } | ||
2215 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); | ||
2216 | } | ||
2217 | |||
2218 | static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | ||
2026 | { | 2219 | { |
2027 | if (!is_protmode(vcpu)) | 2220 | if (!is_protmode(vcpu)) |
2028 | return 0; | 2221 | return 0; |
2029 | 2222 | ||
2030 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | 2223 | if (!is_long_mode(vcpu) |
2224 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ | ||
2031 | return 3; | 2225 | return 3; |
2032 | 2226 | ||
2033 | return vmcs_read16(GUEST_CS_SELECTOR) & 3; | 2227 | return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; |
2034 | } | 2228 | } |
2035 | 2229 | ||
2230 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | ||
2231 | { | ||
2232 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { | ||
2233 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
2234 | to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); | ||
2235 | } | ||
2236 | return to_vmx(vcpu)->cpl; | ||
2237 | } | ||
2238 | |||
2239 | |||
2036 | static u32 vmx_segment_access_rights(struct kvm_segment *var) | 2240 | static u32 vmx_segment_access_rights(struct kvm_segment *var) |
2037 | { | 2241 | { |
2038 | u32 ar; | 2242 | u32 ar; |
@@ -2062,7 +2266,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2062 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2266 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2063 | u32 ar; | 2267 | u32 ar; |
2064 | 2268 | ||
2269 | vmx_segment_cache_clear(vmx); | ||
2270 | |||
2065 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 2271 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { |
2272 | vmcs_write16(sf->selector, var->selector); | ||
2066 | vmx->rmode.tr.selector = var->selector; | 2273 | vmx->rmode.tr.selector = var->selector; |
2067 | vmx->rmode.tr.base = var->base; | 2274 | vmx->rmode.tr.base = var->base; |
2068 | vmx->rmode.tr.limit = var->limit; | 2275 | vmx->rmode.tr.limit = var->limit; |
@@ -2097,11 +2304,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2097 | ar |= 0x1; /* Accessed */ | 2304 | ar |= 0x1; /* Accessed */ |
2098 | 2305 | ||
2099 | vmcs_write32(sf->ar_bytes, ar); | 2306 | vmcs_write32(sf->ar_bytes, ar); |
2307 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
2100 | } | 2308 | } |
2101 | 2309 | ||
2102 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 2310 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
2103 | { | 2311 | { |
2104 | u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); | 2312 | u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); |
2105 | 2313 | ||
2106 | *db = (ar >> 14) & 1; | 2314 | *db = (ar >> 14) & 1; |
2107 | *l = (ar >> 13) & 1; | 2315 | *l = (ar >> 13) & 1; |
@@ -2323,11 +2531,12 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) | |||
2323 | 2531 | ||
2324 | static int init_rmode_tss(struct kvm *kvm) | 2532 | static int init_rmode_tss(struct kvm *kvm) |
2325 | { | 2533 | { |
2326 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 2534 | gfn_t fn; |
2327 | u16 data = 0; | 2535 | u16 data = 0; |
2328 | int ret = 0; | 2536 | int r, idx, ret = 0; |
2329 | int r; | ||
2330 | 2537 | ||
2538 | idx = srcu_read_lock(&kvm->srcu); | ||
2539 | fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | ||
2331 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 2540 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
2332 | if (r < 0) | 2541 | if (r < 0) |
2333 | goto out; | 2542 | goto out; |
@@ -2351,12 +2560,13 @@ static int init_rmode_tss(struct kvm *kvm) | |||
2351 | 2560 | ||
2352 | ret = 1; | 2561 | ret = 1; |
2353 | out: | 2562 | out: |
2563 | srcu_read_unlock(&kvm->srcu, idx); | ||
2354 | return ret; | 2564 | return ret; |
2355 | } | 2565 | } |
2356 | 2566 | ||
2357 | static int init_rmode_identity_map(struct kvm *kvm) | 2567 | static int init_rmode_identity_map(struct kvm *kvm) |
2358 | { | 2568 | { |
2359 | int i, r, ret; | 2569 | int i, idx, r, ret; |
2360 | pfn_t identity_map_pfn; | 2570 | pfn_t identity_map_pfn; |
2361 | u32 tmp; | 2571 | u32 tmp; |
2362 | 2572 | ||
@@ -2371,6 +2581,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2371 | return 1; | 2581 | return 1; |
2372 | ret = 0; | 2582 | ret = 0; |
2373 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; | 2583 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; |
2584 | idx = srcu_read_lock(&kvm->srcu); | ||
2374 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | 2585 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); |
2375 | if (r < 0) | 2586 | if (r < 0) |
2376 | goto out; | 2587 | goto out; |
@@ -2386,6 +2597,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2386 | kvm->arch.ept_identity_pagetable_done = true; | 2597 | kvm->arch.ept_identity_pagetable_done = true; |
2387 | ret = 1; | 2598 | ret = 1; |
2388 | out: | 2599 | out: |
2600 | srcu_read_unlock(&kvm->srcu, idx); | ||
2389 | return ret; | 2601 | return ret; |
2390 | } | 2602 | } |
2391 | 2603 | ||
@@ -2515,7 +2727,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2515 | { | 2727 | { |
2516 | u32 host_sysenter_cs, msr_low, msr_high; | 2728 | u32 host_sysenter_cs, msr_low, msr_high; |
2517 | u32 junk; | 2729 | u32 junk; |
2518 | u64 host_pat, tsc_this, tsc_base; | 2730 | u64 host_pat; |
2519 | unsigned long a; | 2731 | unsigned long a; |
2520 | struct desc_ptr dt; | 2732 | struct desc_ptr dt; |
2521 | int i; | 2733 | int i; |
@@ -2656,32 +2868,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2656 | vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; | 2868 | vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; |
2657 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | 2869 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); |
2658 | 2870 | ||
2659 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; | 2871 | kvm_write_tsc(&vmx->vcpu, 0); |
2660 | rdtscll(tsc_this); | ||
2661 | if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc) | ||
2662 | tsc_base = tsc_this; | ||
2663 | |||
2664 | guest_write_tsc(0, tsc_base); | ||
2665 | 2872 | ||
2666 | return 0; | 2873 | return 0; |
2667 | } | 2874 | } |
2668 | 2875 | ||
2669 | static int init_rmode(struct kvm *kvm) | ||
2670 | { | ||
2671 | int idx, ret = 0; | ||
2672 | |||
2673 | idx = srcu_read_lock(&kvm->srcu); | ||
2674 | if (!init_rmode_tss(kvm)) | ||
2675 | goto exit; | ||
2676 | if (!init_rmode_identity_map(kvm)) | ||
2677 | goto exit; | ||
2678 | |||
2679 | ret = 1; | ||
2680 | exit: | ||
2681 | srcu_read_unlock(&kvm->srcu, idx); | ||
2682 | return ret; | ||
2683 | } | ||
2684 | |||
2685 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 2876 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
2686 | { | 2877 | { |
2687 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2878 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -2689,10 +2880,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2689 | int ret; | 2880 | int ret; |
2690 | 2881 | ||
2691 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2882 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2692 | if (!init_rmode(vmx->vcpu.kvm)) { | ||
2693 | ret = -ENOMEM; | ||
2694 | goto out; | ||
2695 | } | ||
2696 | 2883 | ||
2697 | vmx->rmode.vm86_active = 0; | 2884 | vmx->rmode.vm86_active = 0; |
2698 | 2885 | ||
@@ -2709,6 +2896,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2709 | if (ret != 0) | 2896 | if (ret != 0) |
2710 | goto out; | 2897 | goto out; |
2711 | 2898 | ||
2899 | vmx_segment_cache_clear(vmx); | ||
2900 | |||
2712 | seg_setup(VCPU_SREG_CS); | 2901 | seg_setup(VCPU_SREG_CS); |
2713 | /* | 2902 | /* |
2714 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | 2903 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode |
@@ -2757,7 +2946,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2757 | vmcs_writel(GUEST_IDTR_BASE, 0); | 2946 | vmcs_writel(GUEST_IDTR_BASE, 0); |
2758 | vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); | 2947 | vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); |
2759 | 2948 | ||
2760 | vmcs_write32(GUEST_ACTIVITY_STATE, 0); | 2949 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); |
2761 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); | 2950 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); |
2762 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); | 2951 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); |
2763 | 2952 | ||
@@ -2772,7 +2961,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2772 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); | 2961 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); |
2773 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) | 2962 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) |
2774 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 2963 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, |
2775 | page_to_phys(vmx->vcpu.arch.apic->regs_page)); | 2964 | __pa(vmx->vcpu.arch.apic->regs)); |
2776 | vmcs_write32(TPR_THRESHOLD, 0); | 2965 | vmcs_write32(TPR_THRESHOLD, 0); |
2777 | } | 2966 | } |
2778 | 2967 | ||
@@ -2819,6 +3008,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2819 | return; | 3008 | return; |
2820 | } | 3009 | } |
2821 | 3010 | ||
3011 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | ||
3012 | enable_irq_window(vcpu); | ||
3013 | return; | ||
3014 | } | ||
2822 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 3015 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
2823 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 3016 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
2824 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 3017 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
@@ -2834,16 +3027,11 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
2834 | 3027 | ||
2835 | ++vcpu->stat.irq_injections; | 3028 | ++vcpu->stat.irq_injections; |
2836 | if (vmx->rmode.vm86_active) { | 3029 | if (vmx->rmode.vm86_active) { |
2837 | vmx->rmode.irq.pending = true; | 3030 | int inc_eip = 0; |
2838 | vmx->rmode.irq.vector = irq; | ||
2839 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | ||
2840 | if (vcpu->arch.interrupt.soft) | 3031 | if (vcpu->arch.interrupt.soft) |
2841 | vmx->rmode.irq.rip += | 3032 | inc_eip = vcpu->arch.event_exit_inst_len; |
2842 | vmx->vcpu.arch.event_exit_inst_len; | 3033 | if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) |
2843 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 3034 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2844 | irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); | ||
2845 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | ||
2846 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | ||
2847 | return; | 3035 | return; |
2848 | } | 3036 | } |
2849 | intr = irq | INTR_INFO_VALID_MASK; | 3037 | intr = irq | INTR_INFO_VALID_MASK; |
@@ -2854,6 +3042,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
2854 | } else | 3042 | } else |
2855 | intr |= INTR_TYPE_EXT_INTR; | 3043 | intr |= INTR_TYPE_EXT_INTR; |
2856 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | 3044 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); |
3045 | vmx_clear_hlt(vcpu); | ||
2857 | } | 3046 | } |
2858 | 3047 | ||
2859 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 3048 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -2874,19 +3063,15 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2874 | } | 3063 | } |
2875 | 3064 | ||
2876 | ++vcpu->stat.nmi_injections; | 3065 | ++vcpu->stat.nmi_injections; |
3066 | vmx->nmi_known_unmasked = false; | ||
2877 | if (vmx->rmode.vm86_active) { | 3067 | if (vmx->rmode.vm86_active) { |
2878 | vmx->rmode.irq.pending = true; | 3068 | if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) |
2879 | vmx->rmode.irq.vector = NMI_VECTOR; | 3069 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2880 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | ||
2881 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
2882 | NMI_VECTOR | INTR_TYPE_SOFT_INTR | | ||
2883 | INTR_INFO_VALID_MASK); | ||
2884 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | ||
2885 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | ||
2886 | return; | 3070 | return; |
2887 | } | 3071 | } |
2888 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 3072 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
2889 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 3073 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
3074 | vmx_clear_hlt(vcpu); | ||
2890 | } | 3075 | } |
2891 | 3076 | ||
2892 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 3077 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -2895,13 +3080,16 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2895 | return 0; | 3080 | return 0; |
2896 | 3081 | ||
2897 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 3082 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2898 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); | 3083 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI |
3084 | | GUEST_INTR_STATE_NMI)); | ||
2899 | } | 3085 | } |
2900 | 3086 | ||
2901 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 3087 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
2902 | { | 3088 | { |
2903 | if (!cpu_has_virtual_nmis()) | 3089 | if (!cpu_has_virtual_nmis()) |
2904 | return to_vmx(vcpu)->soft_vnmi_blocked; | 3090 | return to_vmx(vcpu)->soft_vnmi_blocked; |
3091 | if (to_vmx(vcpu)->nmi_known_unmasked) | ||
3092 | return false; | ||
2905 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; | 3093 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; |
2906 | } | 3094 | } |
2907 | 3095 | ||
@@ -2915,6 +3103,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
2915 | vmx->vnmi_blocked_time = 0; | 3103 | vmx->vnmi_blocked_time = 0; |
2916 | } | 3104 | } |
2917 | } else { | 3105 | } else { |
3106 | vmx->nmi_known_unmasked = !masked; | ||
2918 | if (masked) | 3107 | if (masked) |
2919 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3108 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
2920 | GUEST_INTR_STATE_NMI); | 3109 | GUEST_INTR_STATE_NMI); |
@@ -2945,6 +3134,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
2945 | if (ret) | 3134 | if (ret) |
2946 | return ret; | 3135 | return ret; |
2947 | kvm->arch.tss_addr = addr; | 3136 | kvm->arch.tss_addr = addr; |
3137 | if (!init_rmode_tss(kvm)) | ||
3138 | return -ENOMEM; | ||
3139 | |||
2948 | return 0; | 3140 | return 0; |
2949 | } | 3141 | } |
2950 | 3142 | ||
@@ -2956,7 +3148,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2956 | * Cause the #SS fault with 0 error code in VM86 mode. | 3148 | * Cause the #SS fault with 0 error code in VM86 mode. |
2957 | */ | 3149 | */ |
2958 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 3150 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
2959 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) | 3151 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) |
2960 | return 1; | 3152 | return 1; |
2961 | /* | 3153 | /* |
2962 | * Forward all other exceptions that are valid in real mode. | 3154 | * Forward all other exceptions that are valid in real mode. |
@@ -3029,7 +3221,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3029 | enum emulation_result er; | 3221 | enum emulation_result er; |
3030 | 3222 | ||
3031 | vect_info = vmx->idt_vectoring_info; | 3223 | vect_info = vmx->idt_vectoring_info; |
3032 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 3224 | intr_info = vmx->exit_intr_info; |
3033 | 3225 | ||
3034 | if (is_machine_check(intr_info)) | 3226 | if (is_machine_check(intr_info)) |
3035 | return handle_machine_check(vcpu); | 3227 | return handle_machine_check(vcpu); |
@@ -3053,14 +3245,13 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3053 | } | 3245 | } |
3054 | 3246 | ||
3055 | if (is_invalid_opcode(intr_info)) { | 3247 | if (is_invalid_opcode(intr_info)) { |
3056 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); | 3248 | er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); |
3057 | if (er != EMULATE_DONE) | 3249 | if (er != EMULATE_DONE) |
3058 | kvm_queue_exception(vcpu, UD_VECTOR); | 3250 | kvm_queue_exception(vcpu, UD_VECTOR); |
3059 | return 1; | 3251 | return 1; |
3060 | } | 3252 | } |
3061 | 3253 | ||
3062 | error_code = 0; | 3254 | error_code = 0; |
3063 | rip = kvm_rip_read(vcpu); | ||
3064 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 3255 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
3065 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 3256 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
3066 | if (is_page_fault(intr_info)) { | 3257 | if (is_page_fault(intr_info)) { |
@@ -3072,7 +3263,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3072 | 3263 | ||
3073 | if (kvm_event_needs_reinjection(vcpu)) | 3264 | if (kvm_event_needs_reinjection(vcpu)) |
3074 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 3265 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
3075 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 3266 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); |
3076 | } | 3267 | } |
3077 | 3268 | ||
3078 | if (vmx->rmode.vm86_active && | 3269 | if (vmx->rmode.vm86_active && |
@@ -3107,6 +3298,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3107 | vmx->vcpu.arch.event_exit_inst_len = | 3298 | vmx->vcpu.arch.event_exit_inst_len = |
3108 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 3299 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
3109 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 3300 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
3301 | rip = kvm_rip_read(vcpu); | ||
3110 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; | 3302 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; |
3111 | kvm_run->debug.arch.exception = ex_no; | 3303 | kvm_run->debug.arch.exception = ex_no; |
3112 | break; | 3304 | break; |
@@ -3144,7 +3336,7 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
3144 | ++vcpu->stat.io_exits; | 3336 | ++vcpu->stat.io_exits; |
3145 | 3337 | ||
3146 | if (string || in) | 3338 | if (string || in) |
3147 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 3339 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
3148 | 3340 | ||
3149 | port = exit_qualification >> 16; | 3341 | port = exit_qualification >> 16; |
3150 | size = (exit_qualification & 7) + 1; | 3342 | size = (exit_qualification & 7) + 1; |
@@ -3164,14 +3356,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
3164 | hypercall[2] = 0xc1; | 3356 | hypercall[2] = 0xc1; |
3165 | } | 3357 | } |
3166 | 3358 | ||
3167 | static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) | ||
3168 | { | ||
3169 | if (err) | ||
3170 | kvm_inject_gp(vcpu, 0); | ||
3171 | else | ||
3172 | skip_emulated_instruction(vcpu); | ||
3173 | } | ||
3174 | |||
3175 | static int handle_cr(struct kvm_vcpu *vcpu) | 3359 | static int handle_cr(struct kvm_vcpu *vcpu) |
3176 | { | 3360 | { |
3177 | unsigned long exit_qualification, val; | 3361 | unsigned long exit_qualification, val; |
@@ -3189,21 +3373,21 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3189 | switch (cr) { | 3373 | switch (cr) { |
3190 | case 0: | 3374 | case 0: |
3191 | err = kvm_set_cr0(vcpu, val); | 3375 | err = kvm_set_cr0(vcpu, val); |
3192 | complete_insn_gp(vcpu, err); | 3376 | kvm_complete_insn_gp(vcpu, err); |
3193 | return 1; | 3377 | return 1; |
3194 | case 3: | 3378 | case 3: |
3195 | err = kvm_set_cr3(vcpu, val); | 3379 | err = kvm_set_cr3(vcpu, val); |
3196 | complete_insn_gp(vcpu, err); | 3380 | kvm_complete_insn_gp(vcpu, err); |
3197 | return 1; | 3381 | return 1; |
3198 | case 4: | 3382 | case 4: |
3199 | err = kvm_set_cr4(vcpu, val); | 3383 | err = kvm_set_cr4(vcpu, val); |
3200 | complete_insn_gp(vcpu, err); | 3384 | kvm_complete_insn_gp(vcpu, err); |
3201 | return 1; | 3385 | return 1; |
3202 | case 8: { | 3386 | case 8: { |
3203 | u8 cr8_prev = kvm_get_cr8(vcpu); | 3387 | u8 cr8_prev = kvm_get_cr8(vcpu); |
3204 | u8 cr8 = kvm_register_read(vcpu, reg); | 3388 | u8 cr8 = kvm_register_read(vcpu, reg); |
3205 | kvm_set_cr8(vcpu, cr8); | 3389 | err = kvm_set_cr8(vcpu, cr8); |
3206 | skip_emulated_instruction(vcpu); | 3390 | kvm_complete_insn_gp(vcpu, err); |
3207 | if (irqchip_in_kernel(vcpu->kvm)) | 3391 | if (irqchip_in_kernel(vcpu->kvm)) |
3208 | return 1; | 3392 | return 1; |
3209 | if (cr8_prev <= cr8) | 3393 | if (cr8_prev <= cr8) |
@@ -3222,8 +3406,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3222 | case 1: /*mov from cr*/ | 3406 | case 1: /*mov from cr*/ |
3223 | switch (cr) { | 3407 | switch (cr) { |
3224 | case 3: | 3408 | case 3: |
3225 | kvm_register_write(vcpu, reg, vcpu->arch.cr3); | 3409 | val = kvm_read_cr3(vcpu); |
3226 | trace_kvm_cr_read(cr, vcpu->arch.cr3); | 3410 | kvm_register_write(vcpu, reg, val); |
3411 | trace_kvm_cr_read(cr, val); | ||
3227 | skip_emulated_instruction(vcpu); | 3412 | skip_emulated_instruction(vcpu); |
3228 | return 1; | 3413 | return 1; |
3229 | case 8: | 3414 | case 8: |
@@ -3346,6 +3531,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) | |||
3346 | 3531 | ||
3347 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) | 3532 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) |
3348 | { | 3533 | { |
3534 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
3349 | return 1; | 3535 | return 1; |
3350 | } | 3536 | } |
3351 | 3537 | ||
@@ -3358,6 +3544,8 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) | |||
3358 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | 3544 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; |
3359 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 3545 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
3360 | 3546 | ||
3547 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
3548 | |||
3361 | ++vcpu->stat.irq_window_exits; | 3549 | ++vcpu->stat.irq_window_exits; |
3362 | 3550 | ||
3363 | /* | 3551 | /* |
@@ -3392,6 +3580,11 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu) | |||
3392 | return 1; | 3580 | return 1; |
3393 | } | 3581 | } |
3394 | 3582 | ||
3583 | static int handle_invd(struct kvm_vcpu *vcpu) | ||
3584 | { | ||
3585 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | ||
3586 | } | ||
3587 | |||
3395 | static int handle_invlpg(struct kvm_vcpu *vcpu) | 3588 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
3396 | { | 3589 | { |
3397 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3590 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -3420,7 +3613,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) | |||
3420 | 3613 | ||
3421 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 3614 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
3422 | { | 3615 | { |
3423 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 3616 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
3424 | } | 3617 | } |
3425 | 3618 | ||
3426 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 3619 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
@@ -3442,9 +3635,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3442 | switch (type) { | 3635 | switch (type) { |
3443 | case INTR_TYPE_NMI_INTR: | 3636 | case INTR_TYPE_NMI_INTR: |
3444 | vcpu->arch.nmi_injected = false; | 3637 | vcpu->arch.nmi_injected = false; |
3445 | if (cpu_has_virtual_nmis()) | 3638 | vmx_set_nmi_mask(vcpu, true); |
3446 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
3447 | GUEST_INTR_STATE_NMI); | ||
3448 | break; | 3639 | break; |
3449 | case INTR_TYPE_EXT_INTR: | 3640 | case INTR_TYPE_EXT_INTR: |
3450 | case INTR_TYPE_SOFT_INTR: | 3641 | case INTR_TYPE_SOFT_INTR: |
@@ -3519,7 +3710,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
3519 | 3710 | ||
3520 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 3711 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
3521 | trace_kvm_page_fault(gpa, exit_qualification); | 3712 | trace_kvm_page_fault(gpa, exit_qualification); |
3522 | return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); | 3713 | return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0); |
3523 | } | 3714 | } |
3524 | 3715 | ||
3525 | static u64 ept_rsvd_mask(u64 spte, int level) | 3716 | static u64 ept_rsvd_mask(u64 spte, int level) |
@@ -3614,6 +3805,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) | |||
3614 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; | 3805 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; |
3615 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 3806 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
3616 | ++vcpu->stat.nmi_window_exits; | 3807 | ++vcpu->stat.nmi_window_exits; |
3808 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
3617 | 3809 | ||
3618 | return 1; | 3810 | return 1; |
3619 | } | 3811 | } |
@@ -3623,9 +3815,18 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
3623 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3815 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3624 | enum emulation_result err = EMULATE_DONE; | 3816 | enum emulation_result err = EMULATE_DONE; |
3625 | int ret = 1; | 3817 | int ret = 1; |
3818 | u32 cpu_exec_ctrl; | ||
3819 | bool intr_window_requested; | ||
3820 | |||
3821 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
3822 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; | ||
3626 | 3823 | ||
3627 | while (!guest_state_valid(vcpu)) { | 3824 | while (!guest_state_valid(vcpu)) { |
3628 | err = emulate_instruction(vcpu, 0, 0, 0); | 3825 | if (intr_window_requested |
3826 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) | ||
3827 | return handle_interrupt_window(&vmx->vcpu); | ||
3828 | |||
3829 | err = emulate_instruction(vcpu, 0); | ||
3629 | 3830 | ||
3630 | if (err == EMULATE_DO_MMIO) { | 3831 | if (err == EMULATE_DO_MMIO) { |
3631 | ret = 0; | 3832 | ret = 0; |
@@ -3682,6 +3883,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3682 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, | 3883 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, |
3683 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, | 3884 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, |
3684 | [EXIT_REASON_HLT] = handle_halt, | 3885 | [EXIT_REASON_HLT] = handle_halt, |
3886 | [EXIT_REASON_INVD] = handle_invd, | ||
3685 | [EXIT_REASON_INVLPG] = handle_invlpg, | 3887 | [EXIT_REASON_INVLPG] = handle_invlpg, |
3686 | [EXIT_REASON_VMCALL] = handle_vmcall, | 3888 | [EXIT_REASON_VMCALL] = handle_vmcall, |
3687 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, | 3889 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, |
@@ -3709,6 +3911,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3709 | static const int kvm_vmx_max_exit_handlers = | 3911 | static const int kvm_vmx_max_exit_handlers = |
3710 | ARRAY_SIZE(kvm_vmx_exit_handlers); | 3912 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
3711 | 3913 | ||
3914 | static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | ||
3915 | { | ||
3916 | *info1 = vmcs_readl(EXIT_QUALIFICATION); | ||
3917 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | ||
3918 | } | ||
3919 | |||
3712 | /* | 3920 | /* |
3713 | * The guest has exited. See if we can fix it or if we need userspace | 3921 | * The guest has exited. See if we can fix it or if we need userspace |
3714 | * assistance. | 3922 | * assistance. |
@@ -3719,17 +3927,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3719 | u32 exit_reason = vmx->exit_reason; | 3927 | u32 exit_reason = vmx->exit_reason; |
3720 | u32 vectoring_info = vmx->idt_vectoring_info; | 3928 | u32 vectoring_info = vmx->idt_vectoring_info; |
3721 | 3929 | ||
3722 | trace_kvm_exit(exit_reason, vcpu); | 3930 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); |
3723 | 3931 | ||
3724 | /* If guest state is invalid, start emulating */ | 3932 | /* If guest state is invalid, start emulating */ |
3725 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3933 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3726 | return handle_invalid_guest_state(vcpu); | 3934 | return handle_invalid_guest_state(vcpu); |
3727 | 3935 | ||
3728 | /* Access CR3 don't cause VMExit in paging mode, so we need | ||
3729 | * to sync with guest real CR3. */ | ||
3730 | if (enable_ept && is_paging(vcpu)) | ||
3731 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
3732 | |||
3733 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | 3936 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { |
3734 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3937 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3735 | vcpu->run->fail_entry.hardware_entry_failure_reason | 3938 | vcpu->run->fail_entry.hardware_entry_failure_reason |
@@ -3790,23 +3993,19 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
3790 | vmcs_write32(TPR_THRESHOLD, irr); | 3993 | vmcs_write32(TPR_THRESHOLD, irr); |
3791 | } | 3994 | } |
3792 | 3995 | ||
3793 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 3996 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
3794 | { | 3997 | { |
3795 | u32 exit_intr_info; | 3998 | u32 exit_intr_info; |
3796 | u32 idt_vectoring_info = vmx->idt_vectoring_info; | ||
3797 | bool unblock_nmi; | ||
3798 | u8 vector; | ||
3799 | int type; | ||
3800 | bool idtv_info_valid; | ||
3801 | 3999 | ||
3802 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 4000 | if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY |
4001 | || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) | ||
4002 | return; | ||
3803 | 4003 | ||
3804 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 4004 | vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
4005 | exit_intr_info = vmx->exit_intr_info; | ||
3805 | 4006 | ||
3806 | /* Handle machine checks before interrupts are enabled */ | 4007 | /* Handle machine checks before interrupts are enabled */ |
3807 | if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) | 4008 | if (is_machine_check(exit_intr_info)) |
3808 | || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI | ||
3809 | && is_machine_check(exit_intr_info))) | ||
3810 | kvm_machine_check(); | 4009 | kvm_machine_check(); |
3811 | 4010 | ||
3812 | /* We need to handle NMIs before interrupts are enabled */ | 4011 | /* We need to handle NMIs before interrupts are enabled */ |
@@ -3816,10 +4015,25 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3816 | asm("int $2"); | 4015 | asm("int $2"); |
3817 | kvm_after_handle_nmi(&vmx->vcpu); | 4016 | kvm_after_handle_nmi(&vmx->vcpu); |
3818 | } | 4017 | } |
4018 | } | ||
3819 | 4019 | ||
3820 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 4020 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
4021 | { | ||
4022 | u32 exit_intr_info; | ||
4023 | bool unblock_nmi; | ||
4024 | u8 vector; | ||
4025 | bool idtv_info_valid; | ||
4026 | |||
4027 | idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; | ||
3821 | 4028 | ||
3822 | if (cpu_has_virtual_nmis()) { | 4029 | if (cpu_has_virtual_nmis()) { |
4030 | if (vmx->nmi_known_unmasked) | ||
4031 | return; | ||
4032 | /* | ||
4033 | * Can't use vmx->exit_intr_info since we're not sure what | ||
4034 | * the exit reason is. | ||
4035 | */ | ||
4036 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
3823 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; | 4037 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; |
3824 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | 4038 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; |
3825 | /* | 4039 | /* |
@@ -3836,9 +4050,25 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3836 | vector != DF_VECTOR && !idtv_info_valid) | 4050 | vector != DF_VECTOR && !idtv_info_valid) |
3837 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 4051 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
3838 | GUEST_INTR_STATE_NMI); | 4052 | GUEST_INTR_STATE_NMI); |
4053 | else | ||
4054 | vmx->nmi_known_unmasked = | ||
4055 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | ||
4056 | & GUEST_INTR_STATE_NMI); | ||
3839 | } else if (unlikely(vmx->soft_vnmi_blocked)) | 4057 | } else if (unlikely(vmx->soft_vnmi_blocked)) |
3840 | vmx->vnmi_blocked_time += | 4058 | vmx->vnmi_blocked_time += |
3841 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | 4059 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); |
4060 | } | ||
4061 | |||
4062 | static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | ||
4063 | u32 idt_vectoring_info, | ||
4064 | int instr_len_field, | ||
4065 | int error_code_field) | ||
4066 | { | ||
4067 | u8 vector; | ||
4068 | int type; | ||
4069 | bool idtv_info_valid; | ||
4070 | |||
4071 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | ||
3842 | 4072 | ||
3843 | vmx->vcpu.arch.nmi_injected = false; | 4073 | vmx->vcpu.arch.nmi_injected = false; |
3844 | kvm_clear_exception_queue(&vmx->vcpu); | 4074 | kvm_clear_exception_queue(&vmx->vcpu); |
@@ -3847,6 +4077,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3847 | if (!idtv_info_valid) | 4077 | if (!idtv_info_valid) |
3848 | return; | 4078 | return; |
3849 | 4079 | ||
4080 | kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); | ||
4081 | |||
3850 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; | 4082 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; |
3851 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; | 4083 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; |
3852 | 4084 | ||
@@ -3858,23 +4090,22 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3858 | * Clear bit "block by NMI" before VM entry if a NMI | 4090 | * Clear bit "block by NMI" before VM entry if a NMI |
3859 | * delivery faulted. | 4091 | * delivery faulted. |
3860 | */ | 4092 | */ |
3861 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | 4093 | vmx_set_nmi_mask(&vmx->vcpu, false); |
3862 | GUEST_INTR_STATE_NMI); | ||
3863 | break; | 4094 | break; |
3864 | case INTR_TYPE_SOFT_EXCEPTION: | 4095 | case INTR_TYPE_SOFT_EXCEPTION: |
3865 | vmx->vcpu.arch.event_exit_inst_len = | 4096 | vmx->vcpu.arch.event_exit_inst_len = |
3866 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 4097 | vmcs_read32(instr_len_field); |
3867 | /* fall through */ | 4098 | /* fall through */ |
3868 | case INTR_TYPE_HARD_EXCEPTION: | 4099 | case INTR_TYPE_HARD_EXCEPTION: |
3869 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { | 4100 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
3870 | u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE); | 4101 | u32 err = vmcs_read32(error_code_field); |
3871 | kvm_queue_exception_e(&vmx->vcpu, vector, err); | 4102 | kvm_queue_exception_e(&vmx->vcpu, vector, err); |
3872 | } else | 4103 | } else |
3873 | kvm_queue_exception(&vmx->vcpu, vector); | 4104 | kvm_queue_exception(&vmx->vcpu, vector); |
3874 | break; | 4105 | break; |
3875 | case INTR_TYPE_SOFT_INTR: | 4106 | case INTR_TYPE_SOFT_INTR: |
3876 | vmx->vcpu.arch.event_exit_inst_len = | 4107 | vmx->vcpu.arch.event_exit_inst_len = |
3877 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 4108 | vmcs_read32(instr_len_field); |
3878 | /* fall through */ | 4109 | /* fall through */ |
3879 | case INTR_TYPE_EXT_INTR: | 4110 | case INTR_TYPE_EXT_INTR: |
3880 | kvm_queue_interrupt(&vmx->vcpu, vector, | 4111 | kvm_queue_interrupt(&vmx->vcpu, vector, |
@@ -3885,27 +4116,21 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3885 | } | 4116 | } |
3886 | } | 4117 | } |
3887 | 4118 | ||
3888 | /* | 4119 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
3889 | * Failure to inject an interrupt should give us the information | ||
3890 | * in IDT_VECTORING_INFO_FIELD. However, if the failure occurs | ||
3891 | * when fetching the interrupt redirection bitmap in the real-mode | ||
3892 | * tss, this doesn't happen. So we do it ourselves. | ||
3893 | */ | ||
3894 | static void fixup_rmode_irq(struct vcpu_vmx *vmx) | ||
3895 | { | 4120 | { |
3896 | vmx->rmode.irq.pending = 0; | 4121 | __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, |
3897 | if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip) | 4122 | VM_EXIT_INSTRUCTION_LEN, |
3898 | return; | 4123 | IDT_VECTORING_ERROR_CODE); |
3899 | kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip); | 4124 | } |
3900 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { | 4125 | |
3901 | vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK; | 4126 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) |
3902 | vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR; | 4127 | { |
3903 | return; | 4128 | __vmx_complete_interrupts(to_vmx(vcpu), |
3904 | } | 4129 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), |
3905 | vmx->idt_vectoring_info = | 4130 | VM_ENTRY_INSTRUCTION_LEN, |
3906 | VECTORING_INFO_VALID_MASK | 4131 | VM_ENTRY_EXCEPTION_ERROR_CODE); |
3907 | | INTR_TYPE_EXT_INTR | 4132 | |
3908 | | vmx->rmode.irq.vector; | 4133 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); |
3909 | } | 4134 | } |
3910 | 4135 | ||
3911 | #ifdef CONFIG_X86_64 | 4136 | #ifdef CONFIG_X86_64 |
@@ -3916,7 +4141,7 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
3916 | #define Q "l" | 4141 | #define Q "l" |
3917 | #endif | 4142 | #endif |
3918 | 4143 | ||
3919 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | 4144 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3920 | { | 4145 | { |
3921 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4146 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3922 | 4147 | ||
@@ -3945,6 +4170,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3945 | asm( | 4170 | asm( |
3946 | /* Store host registers */ | 4171 | /* Store host registers */ |
3947 | "push %%"R"dx; push %%"R"bp;" | 4172 | "push %%"R"dx; push %%"R"bp;" |
4173 | "push %%"R"cx \n\t" /* placeholder for guest rcx */ | ||
3948 | "push %%"R"cx \n\t" | 4174 | "push %%"R"cx \n\t" |
3949 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" | 4175 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" |
3950 | "je 1f \n\t" | 4176 | "je 1f \n\t" |
@@ -3986,10 +4212,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3986 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" | 4212 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" |
3987 | ".Lkvm_vmx_return: " | 4213 | ".Lkvm_vmx_return: " |
3988 | /* Save guest registers, load host registers, keep flags */ | 4214 | /* Save guest registers, load host registers, keep flags */ |
3989 | "xchg %0, (%%"R"sp) \n\t" | 4215 | "mov %0, %c[wordsize](%%"R"sp) \n\t" |
4216 | "pop %0 \n\t" | ||
3990 | "mov %%"R"ax, %c[rax](%0) \n\t" | 4217 | "mov %%"R"ax, %c[rax](%0) \n\t" |
3991 | "mov %%"R"bx, %c[rbx](%0) \n\t" | 4218 | "mov %%"R"bx, %c[rbx](%0) \n\t" |
3992 | "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t" | 4219 | "pop"Q" %c[rcx](%0) \n\t" |
3993 | "mov %%"R"dx, %c[rdx](%0) \n\t" | 4220 | "mov %%"R"dx, %c[rdx](%0) \n\t" |
3994 | "mov %%"R"si, %c[rsi](%0) \n\t" | 4221 | "mov %%"R"si, %c[rsi](%0) \n\t" |
3995 | "mov %%"R"di, %c[rdi](%0) \n\t" | 4222 | "mov %%"R"di, %c[rdi](%0) \n\t" |
@@ -4007,7 +4234,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4007 | "mov %%cr2, %%"R"ax \n\t" | 4234 | "mov %%cr2, %%"R"ax \n\t" |
4008 | "mov %%"R"ax, %c[cr2](%0) \n\t" | 4235 | "mov %%"R"ax, %c[cr2](%0) \n\t" |
4009 | 4236 | ||
4010 | "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t" | 4237 | "pop %%"R"bp; pop %%"R"dx \n\t" |
4011 | "setbe %c[fail](%0) \n\t" | 4238 | "setbe %c[fail](%0) \n\t" |
4012 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 4239 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
4013 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), | 4240 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), |
@@ -4030,25 +4257,32 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4030 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), | 4257 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), |
4031 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), | 4258 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), |
4032 | #endif | 4259 | #endif |
4033 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) | 4260 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), |
4261 | [wordsize]"i"(sizeof(ulong)) | ||
4034 | : "cc", "memory" | 4262 | : "cc", "memory" |
4035 | , R"bx", R"di", R"si" | 4263 | , R"ax", R"bx", R"di", R"si" |
4036 | #ifdef CONFIG_X86_64 | 4264 | #ifdef CONFIG_X86_64 |
4037 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" | 4265 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" |
4038 | #endif | 4266 | #endif |
4039 | ); | 4267 | ); |
4040 | 4268 | ||
4041 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | 4269 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) |
4042 | | (1 << VCPU_EXREG_PDPTR)); | 4270 | | (1 << VCPU_EXREG_RFLAGS) |
4271 | | (1 << VCPU_EXREG_CPL) | ||
4272 | | (1 << VCPU_EXREG_PDPTR) | ||
4273 | | (1 << VCPU_EXREG_SEGMENTS) | ||
4274 | | (1 << VCPU_EXREG_CR3)); | ||
4043 | vcpu->arch.regs_dirty = 0; | 4275 | vcpu->arch.regs_dirty = 0; |
4044 | 4276 | ||
4045 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 4277 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
4046 | if (vmx->rmode.irq.pending) | ||
4047 | fixup_rmode_irq(vmx); | ||
4048 | 4278 | ||
4049 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 4279 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
4050 | vmx->launched = 1; | 4280 | vmx->launched = 1; |
4051 | 4281 | ||
4282 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
4283 | |||
4284 | vmx_complete_atomic_exit(vmx); | ||
4285 | vmx_recover_nmi_blocking(vmx); | ||
4052 | vmx_complete_interrupts(vmx); | 4286 | vmx_complete_interrupts(vmx); |
4053 | } | 4287 | } |
4054 | 4288 | ||
@@ -4106,8 +4340,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4106 | goto free_vcpu; | 4340 | goto free_vcpu; |
4107 | 4341 | ||
4108 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | 4342 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); |
4343 | err = -ENOMEM; | ||
4109 | if (!vmx->guest_msrs) { | 4344 | if (!vmx->guest_msrs) { |
4110 | err = -ENOMEM; | ||
4111 | goto uninit_vcpu; | 4345 | goto uninit_vcpu; |
4112 | } | 4346 | } |
4113 | 4347 | ||
@@ -4119,21 +4353,26 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4119 | 4353 | ||
4120 | cpu = get_cpu(); | 4354 | cpu = get_cpu(); |
4121 | vmx_vcpu_load(&vmx->vcpu, cpu); | 4355 | vmx_vcpu_load(&vmx->vcpu, cpu); |
4356 | vmx->vcpu.cpu = cpu; | ||
4122 | err = vmx_vcpu_setup(vmx); | 4357 | err = vmx_vcpu_setup(vmx); |
4123 | vmx_vcpu_put(&vmx->vcpu); | 4358 | vmx_vcpu_put(&vmx->vcpu); |
4124 | put_cpu(); | 4359 | put_cpu(); |
4125 | if (err) | 4360 | if (err) |
4126 | goto free_vmcs; | 4361 | goto free_vmcs; |
4127 | if (vm_need_virtualize_apic_accesses(kvm)) | 4362 | if (vm_need_virtualize_apic_accesses(kvm)) |
4128 | if (alloc_apic_access_page(kvm) != 0) | 4363 | err = alloc_apic_access_page(kvm); |
4364 | if (err) | ||
4129 | goto free_vmcs; | 4365 | goto free_vmcs; |
4130 | 4366 | ||
4131 | if (enable_ept) { | 4367 | if (enable_ept) { |
4132 | if (!kvm->arch.ept_identity_map_addr) | 4368 | if (!kvm->arch.ept_identity_map_addr) |
4133 | kvm->arch.ept_identity_map_addr = | 4369 | kvm->arch.ept_identity_map_addr = |
4134 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | 4370 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; |
4371 | err = -ENOMEM; | ||
4135 | if (alloc_identity_pagetable(kvm) != 0) | 4372 | if (alloc_identity_pagetable(kvm) != 0) |
4136 | goto free_vmcs; | 4373 | goto free_vmcs; |
4374 | if (!init_rmode_identity_map(kvm)) | ||
4375 | goto free_vmcs; | ||
4137 | } | 4376 | } |
4138 | 4377 | ||
4139 | return &vmx->vcpu; | 4378 | return &vmx->vcpu; |
@@ -4249,11 +4488,6 @@ static int vmx_get_lpage_level(void) | |||
4249 | return PT_PDPE_LEVEL; | 4488 | return PT_PDPE_LEVEL; |
4250 | } | 4489 | } |
4251 | 4490 | ||
4252 | static inline u32 bit(int bitno) | ||
4253 | { | ||
4254 | return 1 << (bitno & 31); | ||
4255 | } | ||
4256 | |||
4257 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | 4491 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) |
4258 | { | 4492 | { |
4259 | struct kvm_cpuid_entry2 *best; | 4493 | struct kvm_cpuid_entry2 *best; |
@@ -4280,6 +4514,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
4280 | { | 4514 | { |
4281 | } | 4515 | } |
4282 | 4516 | ||
4517 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, | ||
4518 | struct x86_instruction_info *info, | ||
4519 | enum x86_intercept_stage stage) | ||
4520 | { | ||
4521 | return X86EMUL_CONTINUE; | ||
4522 | } | ||
4523 | |||
4283 | static struct kvm_x86_ops vmx_x86_ops = { | 4524 | static struct kvm_x86_ops vmx_x86_ops = { |
4284 | .cpu_has_kvm_support = cpu_has_kvm_support, | 4525 | .cpu_has_kvm_support = cpu_has_kvm_support, |
4285 | .disabled_by_bios = vmx_disabled_by_bios, | 4526 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -4307,6 +4548,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4307 | .get_cpl = vmx_get_cpl, | 4548 | .get_cpl = vmx_get_cpl, |
4308 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 4549 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
4309 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | 4550 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, |
4551 | .decache_cr3 = vmx_decache_cr3, | ||
4310 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 4552 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
4311 | .set_cr0 = vmx_set_cr0, | 4553 | .set_cr0 = vmx_set_cr0, |
4312 | .set_cr3 = vmx_set_cr3, | 4554 | .set_cr3 = vmx_set_cr3, |
@@ -4334,6 +4576,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4334 | .set_irq = vmx_inject_irq, | 4576 | .set_irq = vmx_inject_irq, |
4335 | .set_nmi = vmx_inject_nmi, | 4577 | .set_nmi = vmx_inject_nmi, |
4336 | .queue_exception = vmx_queue_exception, | 4578 | .queue_exception = vmx_queue_exception, |
4579 | .cancel_injection = vmx_cancel_injection, | ||
4337 | .interrupt_allowed = vmx_interrupt_allowed, | 4580 | .interrupt_allowed = vmx_interrupt_allowed, |
4338 | .nmi_allowed = vmx_nmi_allowed, | 4581 | .nmi_allowed = vmx_nmi_allowed, |
4339 | .get_nmi_mask = vmx_get_nmi_mask, | 4582 | .get_nmi_mask = vmx_get_nmi_mask, |
@@ -4346,7 +4589,9 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4346 | .get_tdp_level = get_ept_level, | 4589 | .get_tdp_level = get_ept_level, |
4347 | .get_mt_mask = vmx_get_mt_mask, | 4590 | .get_mt_mask = vmx_get_mt_mask, |
4348 | 4591 | ||
4592 | .get_exit_info = vmx_get_exit_info, | ||
4349 | .exit_reasons_str = vmx_exit_reasons_str, | 4593 | .exit_reasons_str = vmx_exit_reasons_str, |
4594 | |||
4350 | .get_lpage_level = vmx_get_lpage_level, | 4595 | .get_lpage_level = vmx_get_lpage_level, |
4351 | 4596 | ||
4352 | .cpuid_update = vmx_cpuid_update, | 4597 | .cpuid_update = vmx_cpuid_update, |
@@ -4356,6 +4601,15 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4356 | .set_supported_cpuid = vmx_set_supported_cpuid, | 4601 | .set_supported_cpuid = vmx_set_supported_cpuid, |
4357 | 4602 | ||
4358 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, | 4603 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, |
4604 | |||
4605 | .set_tsc_khz = vmx_set_tsc_khz, | ||
4606 | .write_tsc_offset = vmx_write_tsc_offset, | ||
4607 | .adjust_tsc_offset = vmx_adjust_tsc_offset, | ||
4608 | .compute_tsc_offset = vmx_compute_tsc_offset, | ||
4609 | |||
4610 | .set_tdp_cr3 = vmx_set_cr3, | ||
4611 | |||
4612 | .check_intercept = vmx_check_intercept, | ||
4359 | }; | 4613 | }; |
4360 | 4614 | ||
4361 | static int __init vmx_init(void) | 4615 | static int __init vmx_init(void) |
@@ -4417,8 +4671,6 @@ static int __init vmx_init(void) | |||
4417 | 4671 | ||
4418 | if (enable_ept) { | 4672 | if (enable_ept) { |
4419 | bypass_guest_pf = 0; | 4673 | bypass_guest_pf = 0; |
4420 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
4421 | VMX_EPT_WRITABLE_MASK); | ||
4422 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 4674 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
4423 | VMX_EPT_EXECUTABLE_MASK); | 4675 | VMX_EPT_EXECUTABLE_MASK); |
4424 | kvm_enable_tdp(); | 4676 | kvm_enable_tdp(); |