diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 862 |
1 files changed, 534 insertions, 328 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ed53b42caba1..2f8db0ec8ae4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
29 | #include <linux/slab.h> | ||
29 | #include "kvm_cache_regs.h" | 30 | #include "kvm_cache_regs.h" |
30 | #include "x86.h" | 31 | #include "x86.h" |
31 | 32 | ||
@@ -61,12 +62,54 @@ module_param_named(unrestricted_guest, | |||
61 | static int __read_mostly emulate_invalid_guest_state = 0; | 62 | static int __read_mostly emulate_invalid_guest_state = 0; |
62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 63 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
63 | 64 | ||
65 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | ||
66 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | ||
67 | #define KVM_GUEST_CR0_MASK \ | ||
68 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
69 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
70 | (X86_CR0_WP | X86_CR0_NE) | ||
71 | #define KVM_VM_CR0_ALWAYS_ON \ | ||
72 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
73 | #define KVM_CR4_GUEST_OWNED_BITS \ | ||
74 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
75 | | X86_CR4_OSXMMEXCPT) | ||
76 | |||
77 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | ||
78 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | ||
79 | |||
80 | #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) | ||
81 | |||
82 | /* | ||
83 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | ||
84 | * ple_gap: upper bound on the amount of time between two successive | ||
85 | * executions of PAUSE in a loop. Also indicate if ple enabled. | ||
86 | * According to test, this time is usually small than 41 cycles. | ||
87 | * ple_window: upper bound on the amount of time a guest is allowed to execute | ||
88 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | ||
89 | * less than 2^12 cycles | ||
90 | * Time is measured based on a counter that runs at the same rate as the TSC, | ||
91 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | ||
92 | */ | ||
93 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | ||
94 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | ||
95 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | ||
96 | module_param(ple_gap, int, S_IRUGO); | ||
97 | |||
98 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | ||
99 | module_param(ple_window, int, S_IRUGO); | ||
100 | |||
64 | struct vmcs { | 101 | struct vmcs { |
65 | u32 revision_id; | 102 | u32 revision_id; |
66 | u32 abort; | 103 | u32 abort; |
67 | char data[0]; | 104 | char data[0]; |
68 | }; | 105 | }; |
69 | 106 | ||
107 | struct shared_msr_entry { | ||
108 | unsigned index; | ||
109 | u64 data; | ||
110 | u64 mask; | ||
111 | }; | ||
112 | |||
70 | struct vcpu_vmx { | 113 | struct vcpu_vmx { |
71 | struct kvm_vcpu vcpu; | 114 | struct kvm_vcpu vcpu; |
72 | struct list_head local_vcpus_link; | 115 | struct list_head local_vcpus_link; |
@@ -74,13 +117,12 @@ struct vcpu_vmx { | |||
74 | int launched; | 117 | int launched; |
75 | u8 fail; | 118 | u8 fail; |
76 | u32 idt_vectoring_info; | 119 | u32 idt_vectoring_info; |
77 | struct kvm_msr_entry *guest_msrs; | 120 | struct shared_msr_entry *guest_msrs; |
78 | struct kvm_msr_entry *host_msrs; | ||
79 | int nmsrs; | 121 | int nmsrs; |
80 | int save_nmsrs; | 122 | int save_nmsrs; |
81 | int msr_offset_efer; | ||
82 | #ifdef CONFIG_X86_64 | 123 | #ifdef CONFIG_X86_64 |
83 | int msr_offset_kernel_gs_base; | 124 | u64 msr_host_kernel_gs_base; |
125 | u64 msr_guest_kernel_gs_base; | ||
84 | #endif | 126 | #endif |
85 | struct vmcs *vmcs; | 127 | struct vmcs *vmcs; |
86 | struct { | 128 | struct { |
@@ -88,11 +130,10 @@ struct vcpu_vmx { | |||
88 | u16 fs_sel, gs_sel, ldt_sel; | 130 | u16 fs_sel, gs_sel, ldt_sel; |
89 | int gs_ldt_reload_needed; | 131 | int gs_ldt_reload_needed; |
90 | int fs_reload_needed; | 132 | int fs_reload_needed; |
91 | int guest_efer_loaded; | ||
92 | } host_state; | 133 | } host_state; |
93 | struct { | 134 | struct { |
94 | int vm86_active; | 135 | int vm86_active; |
95 | u8 save_iopl; | 136 | ulong save_rflags; |
96 | struct kvm_save_segment { | 137 | struct kvm_save_segment { |
97 | u16 selector; | 138 | u16 selector; |
98 | unsigned long base; | 139 | unsigned long base; |
@@ -107,13 +148,14 @@ struct vcpu_vmx { | |||
107 | } rmode; | 148 | } rmode; |
108 | int vpid; | 149 | int vpid; |
109 | bool emulation_required; | 150 | bool emulation_required; |
110 | enum emulation_result invalid_state_emulation_result; | ||
111 | 151 | ||
112 | /* Support for vnmi-less CPUs */ | 152 | /* Support for vnmi-less CPUs */ |
113 | int soft_vnmi_blocked; | 153 | int soft_vnmi_blocked; |
114 | ktime_t entry_time; | 154 | ktime_t entry_time; |
115 | s64 vnmi_blocked_time; | 155 | s64 vnmi_blocked_time; |
116 | u32 exit_reason; | 156 | u32 exit_reason; |
157 | |||
158 | bool rdtscp_enabled; | ||
117 | }; | 159 | }; |
118 | 160 | ||
119 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 161 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
@@ -176,6 +218,8 @@ static struct kvm_vmx_segment_field { | |||
176 | VMX_SEGMENT_FIELD(LDTR), | 218 | VMX_SEGMENT_FIELD(LDTR), |
177 | }; | 219 | }; |
178 | 220 | ||
221 | static u64 host_efer; | ||
222 | |||
179 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | 223 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); |
180 | 224 | ||
181 | /* | 225 | /* |
@@ -184,28 +228,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | |||
184 | */ | 228 | */ |
185 | static const u32 vmx_msr_index[] = { | 229 | static const u32 vmx_msr_index[] = { |
186 | #ifdef CONFIG_X86_64 | 230 | #ifdef CONFIG_X86_64 |
187 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, | 231 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, |
188 | #endif | 232 | #endif |
189 | MSR_EFER, MSR_K6_STAR, | 233 | MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, |
190 | }; | 234 | }; |
191 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
192 | 236 | ||
193 | static void load_msrs(struct kvm_msr_entry *e, int n) | ||
194 | { | ||
195 | int i; | ||
196 | |||
197 | for (i = 0; i < n; ++i) | ||
198 | wrmsrl(e[i].index, e[i].data); | ||
199 | } | ||
200 | |||
201 | static void save_msrs(struct kvm_msr_entry *e, int n) | ||
202 | { | ||
203 | int i; | ||
204 | |||
205 | for (i = 0; i < n; ++i) | ||
206 | rdmsrl(e[i].index, e[i].data); | ||
207 | } | ||
208 | |||
209 | static inline int is_page_fault(u32 intr_info) | 237 | static inline int is_page_fault(u32 intr_info) |
210 | { | 238 | { |
211 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 239 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
@@ -293,6 +321,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void) | |||
293 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 321 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); |
294 | } | 322 | } |
295 | 323 | ||
324 | static inline bool cpu_has_vmx_ept_1g_page(void) | ||
325 | { | ||
326 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | ||
327 | } | ||
328 | |||
296 | static inline int cpu_has_vmx_invept_individual_addr(void) | 329 | static inline int cpu_has_vmx_invept_individual_addr(void) |
297 | { | 330 | { |
298 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 331 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); |
@@ -320,11 +353,15 @@ static inline int cpu_has_vmx_unrestricted_guest(void) | |||
320 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 353 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
321 | } | 354 | } |
322 | 355 | ||
356 | static inline int cpu_has_vmx_ple(void) | ||
357 | { | ||
358 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
359 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
360 | } | ||
361 | |||
323 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 362 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
324 | { | 363 | { |
325 | return flexpriority_enabled && | 364 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
326 | (cpu_has_vmx_virtualize_apic_accesses()) && | ||
327 | (irqchip_in_kernel(kvm)); | ||
328 | } | 365 | } |
329 | 366 | ||
330 | static inline int cpu_has_vmx_vpid(void) | 367 | static inline int cpu_has_vmx_vpid(void) |
@@ -333,6 +370,12 @@ static inline int cpu_has_vmx_vpid(void) | |||
333 | SECONDARY_EXEC_ENABLE_VPID; | 370 | SECONDARY_EXEC_ENABLE_VPID; |
334 | } | 371 | } |
335 | 372 | ||
373 | static inline int cpu_has_vmx_rdtscp(void) | ||
374 | { | ||
375 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
376 | SECONDARY_EXEC_RDTSCP; | ||
377 | } | ||
378 | |||
336 | static inline int cpu_has_virtual_nmis(void) | 379 | static inline int cpu_has_virtual_nmis(void) |
337 | { | 380 | { |
338 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
@@ -348,7 +391,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | |||
348 | int i; | 391 | int i; |
349 | 392 | ||
350 | for (i = 0; i < vmx->nmsrs; ++i) | 393 | for (i = 0; i < vmx->nmsrs; ++i) |
351 | if (vmx->guest_msrs[i].index == msr) | 394 | if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) |
352 | return i; | 395 | return i; |
353 | return -1; | 396 | return -1; |
354 | } | 397 | } |
@@ -379,7 +422,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) | |||
379 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | 422 | : : "a" (&operand), "c" (ext) : "cc", "memory"); |
380 | } | 423 | } |
381 | 424 | ||
382 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 425 | static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
383 | { | 426 | { |
384 | int i; | 427 | int i; |
385 | 428 | ||
@@ -537,22 +580,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
537 | { | 580 | { |
538 | u32 eb; | 581 | u32 eb; |
539 | 582 | ||
540 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); | 583 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | |
541 | if (!vcpu->fpu_active) | 584 | (1u << NM_VECTOR) | (1u << DB_VECTOR); |
542 | eb |= 1u << NM_VECTOR; | 585 | if ((vcpu->guest_debug & |
543 | /* | 586 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == |
544 | * Unconditionally intercept #DB so we can maintain dr6 without | 587 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) |
545 | * reading it every exit. | 588 | eb |= 1u << BP_VECTOR; |
546 | */ | ||
547 | eb |= 1u << DB_VECTOR; | ||
548 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | ||
549 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | ||
550 | eb |= 1u << BP_VECTOR; | ||
551 | } | ||
552 | if (to_vmx(vcpu)->rmode.vm86_active) | 589 | if (to_vmx(vcpu)->rmode.vm86_active) |
553 | eb = ~0; | 590 | eb = ~0; |
554 | if (enable_ept) | 591 | if (enable_ept) |
555 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 592 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
593 | if (vcpu->fpu_active) | ||
594 | eb &= ~(1u << NM_VECTOR); | ||
556 | vmcs_write32(EXCEPTION_BITMAP, eb); | 595 | vmcs_write32(EXCEPTION_BITMAP, eb); |
557 | } | 596 | } |
558 | 597 | ||
@@ -570,17 +609,12 @@ static void reload_tss(void) | |||
570 | load_TR_desc(); | 609 | load_TR_desc(); |
571 | } | 610 | } |
572 | 611 | ||
573 | static void load_transition_efer(struct vcpu_vmx *vmx) | 612 | static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) |
574 | { | 613 | { |
575 | int efer_offset = vmx->msr_offset_efer; | ||
576 | u64 host_efer; | ||
577 | u64 guest_efer; | 614 | u64 guest_efer; |
578 | u64 ignore_bits; | 615 | u64 ignore_bits; |
579 | 616 | ||
580 | if (efer_offset < 0) | 617 | guest_efer = vmx->vcpu.arch.efer; |
581 | return; | ||
582 | host_efer = vmx->host_msrs[efer_offset].data; | ||
583 | guest_efer = vmx->guest_msrs[efer_offset].data; | ||
584 | 618 | ||
585 | /* | 619 | /* |
586 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 620 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless |
@@ -593,27 +627,17 @@ static void load_transition_efer(struct vcpu_vmx *vmx) | |||
593 | if (guest_efer & EFER_LMA) | 627 | if (guest_efer & EFER_LMA) |
594 | ignore_bits &= ~(u64)EFER_SCE; | 628 | ignore_bits &= ~(u64)EFER_SCE; |
595 | #endif | 629 | #endif |
596 | if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits)) | ||
597 | return; | ||
598 | |||
599 | vmx->host_state.guest_efer_loaded = 1; | ||
600 | guest_efer &= ~ignore_bits; | 630 | guest_efer &= ~ignore_bits; |
601 | guest_efer |= host_efer & ignore_bits; | 631 | guest_efer |= host_efer & ignore_bits; |
602 | wrmsrl(MSR_EFER, guest_efer); | 632 | vmx->guest_msrs[efer_offset].data = guest_efer; |
603 | vmx->vcpu.stat.efer_reload++; | 633 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
604 | } | 634 | return true; |
605 | |||
606 | static void reload_host_efer(struct vcpu_vmx *vmx) | ||
607 | { | ||
608 | if (vmx->host_state.guest_efer_loaded) { | ||
609 | vmx->host_state.guest_efer_loaded = 0; | ||
610 | load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); | ||
611 | } | ||
612 | } | 635 | } |
613 | 636 | ||
614 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 637 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
615 | { | 638 | { |
616 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 639 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
640 | int i; | ||
617 | 641 | ||
618 | if (vmx->host_state.loaded) | 642 | if (vmx->host_state.loaded) |
619 | return; | 643 | return; |
@@ -650,13 +674,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
650 | #endif | 674 | #endif |
651 | 675 | ||
652 | #ifdef CONFIG_X86_64 | 676 | #ifdef CONFIG_X86_64 |
653 | if (is_long_mode(&vmx->vcpu)) | 677 | if (is_long_mode(&vmx->vcpu)) { |
654 | save_msrs(vmx->host_msrs + | 678 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
655 | vmx->msr_offset_kernel_gs_base, 1); | 679 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
656 | 680 | } | |
657 | #endif | 681 | #endif |
658 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 682 | for (i = 0; i < vmx->save_nmsrs; ++i) |
659 | load_transition_efer(vmx); | 683 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
684 | vmx->guest_msrs[i].data, | ||
685 | vmx->guest_msrs[i].mask); | ||
660 | } | 686 | } |
661 | 687 | ||
662 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) | 688 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -684,9 +710,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
684 | local_irq_restore(flags); | 710 | local_irq_restore(flags); |
685 | } | 711 | } |
686 | reload_tss(); | 712 | reload_tss(); |
687 | save_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 713 | #ifdef CONFIG_X86_64 |
688 | load_msrs(vmx->host_msrs, vmx->save_nmsrs); | 714 | if (is_long_mode(&vmx->vcpu)) { |
689 | reload_host_efer(vmx); | 715 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
716 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
717 | } | ||
718 | #endif | ||
690 | } | 719 | } |
691 | 720 | ||
692 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 721 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -763,38 +792,51 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | |||
763 | 792 | ||
764 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 793 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
765 | { | 794 | { |
795 | ulong cr0; | ||
796 | |||
766 | if (vcpu->fpu_active) | 797 | if (vcpu->fpu_active) |
767 | return; | 798 | return; |
768 | vcpu->fpu_active = 1; | 799 | vcpu->fpu_active = 1; |
769 | vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); | 800 | cr0 = vmcs_readl(GUEST_CR0); |
770 | if (vcpu->arch.cr0 & X86_CR0_TS) | 801 | cr0 &= ~(X86_CR0_TS | X86_CR0_MP); |
771 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); | 802 | cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP); |
803 | vmcs_writel(GUEST_CR0, cr0); | ||
772 | update_exception_bitmap(vcpu); | 804 | update_exception_bitmap(vcpu); |
805 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | ||
806 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
773 | } | 807 | } |
774 | 808 | ||
809 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | ||
810 | |||
775 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | 811 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) |
776 | { | 812 | { |
777 | if (!vcpu->fpu_active) | 813 | vmx_decache_cr0_guest_bits(vcpu); |
778 | return; | 814 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); |
779 | vcpu->fpu_active = 0; | ||
780 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); | ||
781 | update_exception_bitmap(vcpu); | 815 | update_exception_bitmap(vcpu); |
816 | vcpu->arch.cr0_guest_owned_bits = 0; | ||
817 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
818 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
782 | } | 819 | } |
783 | 820 | ||
784 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 821 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
785 | { | 822 | { |
786 | unsigned long rflags; | 823 | unsigned long rflags, save_rflags; |
787 | 824 | ||
788 | rflags = vmcs_readl(GUEST_RFLAGS); | 825 | rflags = vmcs_readl(GUEST_RFLAGS); |
789 | if (to_vmx(vcpu)->rmode.vm86_active) | 826 | if (to_vmx(vcpu)->rmode.vm86_active) { |
790 | rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 827 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
828 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | ||
829 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | ||
830 | } | ||
791 | return rflags; | 831 | return rflags; |
792 | } | 832 | } |
793 | 833 | ||
794 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 834 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
795 | { | 835 | { |
796 | if (to_vmx(vcpu)->rmode.vm86_active) | 836 | if (to_vmx(vcpu)->rmode.vm86_active) { |
837 | to_vmx(vcpu)->rmode.save_rflags = rflags; | ||
797 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 838 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
839 | } | ||
798 | vmcs_writel(GUEST_RFLAGS, rflags); | 840 | vmcs_writel(GUEST_RFLAGS, rflags); |
799 | } | 841 | } |
800 | 842 | ||
@@ -874,22 +916,22 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
874 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 916 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
875 | } | 917 | } |
876 | 918 | ||
919 | static bool vmx_rdtscp_supported(void) | ||
920 | { | ||
921 | return cpu_has_vmx_rdtscp(); | ||
922 | } | ||
923 | |||
877 | /* | 924 | /* |
878 | * Swap MSR entry in host/guest MSR entry array. | 925 | * Swap MSR entry in host/guest MSR entry array. |
879 | */ | 926 | */ |
880 | #ifdef CONFIG_X86_64 | ||
881 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | 927 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) |
882 | { | 928 | { |
883 | struct kvm_msr_entry tmp; | 929 | struct shared_msr_entry tmp; |
884 | 930 | ||
885 | tmp = vmx->guest_msrs[to]; | 931 | tmp = vmx->guest_msrs[to]; |
886 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; | 932 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; |
887 | vmx->guest_msrs[from] = tmp; | 933 | vmx->guest_msrs[from] = tmp; |
888 | tmp = vmx->host_msrs[to]; | ||
889 | vmx->host_msrs[to] = vmx->host_msrs[from]; | ||
890 | vmx->host_msrs[from] = tmp; | ||
891 | } | 934 | } |
892 | #endif | ||
893 | 935 | ||
894 | /* | 936 | /* |
895 | * Set up the vmcs to automatically save and restore system | 937 | * Set up the vmcs to automatically save and restore system |
@@ -898,15 +940,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
898 | */ | 940 | */ |
899 | static void setup_msrs(struct vcpu_vmx *vmx) | 941 | static void setup_msrs(struct vcpu_vmx *vmx) |
900 | { | 942 | { |
901 | int save_nmsrs; | 943 | int save_nmsrs, index; |
902 | unsigned long *msr_bitmap; | 944 | unsigned long *msr_bitmap; |
903 | 945 | ||
904 | vmx_load_host_state(vmx); | 946 | vmx_load_host_state(vmx); |
905 | save_nmsrs = 0; | 947 | save_nmsrs = 0; |
906 | #ifdef CONFIG_X86_64 | 948 | #ifdef CONFIG_X86_64 |
907 | if (is_long_mode(&vmx->vcpu)) { | 949 | if (is_long_mode(&vmx->vcpu)) { |
908 | int index; | ||
909 | |||
910 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); | 950 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); |
911 | if (index >= 0) | 951 | if (index >= 0) |
912 | move_msr_up(vmx, index, save_nmsrs++); | 952 | move_msr_up(vmx, index, save_nmsrs++); |
@@ -916,25 +956,23 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
916 | index = __find_msr_index(vmx, MSR_CSTAR); | 956 | index = __find_msr_index(vmx, MSR_CSTAR); |
917 | if (index >= 0) | 957 | if (index >= 0) |
918 | move_msr_up(vmx, index, save_nmsrs++); | 958 | move_msr_up(vmx, index, save_nmsrs++); |
919 | index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | 959 | index = __find_msr_index(vmx, MSR_TSC_AUX); |
920 | if (index >= 0) | 960 | if (index >= 0 && vmx->rdtscp_enabled) |
921 | move_msr_up(vmx, index, save_nmsrs++); | 961 | move_msr_up(vmx, index, save_nmsrs++); |
922 | /* | 962 | /* |
923 | * MSR_K6_STAR is only needed on long mode guests, and only | 963 | * MSR_K6_STAR is only needed on long mode guests, and only |
924 | * if efer.sce is enabled. | 964 | * if efer.sce is enabled. |
925 | */ | 965 | */ |
926 | index = __find_msr_index(vmx, MSR_K6_STAR); | 966 | index = __find_msr_index(vmx, MSR_K6_STAR); |
927 | if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) | 967 | if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) |
928 | move_msr_up(vmx, index, save_nmsrs++); | 968 | move_msr_up(vmx, index, save_nmsrs++); |
929 | } | 969 | } |
930 | #endif | 970 | #endif |
931 | vmx->save_nmsrs = save_nmsrs; | 971 | index = __find_msr_index(vmx, MSR_EFER); |
972 | if (index >= 0 && update_transition_efer(vmx, index)) | ||
973 | move_msr_up(vmx, index, save_nmsrs++); | ||
932 | 974 | ||
933 | #ifdef CONFIG_X86_64 | 975 | vmx->save_nmsrs = save_nmsrs; |
934 | vmx->msr_offset_kernel_gs_base = | ||
935 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | ||
936 | #endif | ||
937 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); | ||
938 | 976 | ||
939 | if (cpu_has_vmx_msr_bitmap()) { | 977 | if (cpu_has_vmx_msr_bitmap()) { |
940 | if (is_long_mode(&vmx->vcpu)) | 978 | if (is_long_mode(&vmx->vcpu)) |
@@ -976,7 +1014,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) | |||
976 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 1014 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
977 | { | 1015 | { |
978 | u64 data; | 1016 | u64 data; |
979 | struct kvm_msr_entry *msr; | 1017 | struct shared_msr_entry *msr; |
980 | 1018 | ||
981 | if (!pdata) { | 1019 | if (!pdata) { |
982 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); | 1020 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); |
@@ -991,9 +1029,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
991 | case MSR_GS_BASE: | 1029 | case MSR_GS_BASE: |
992 | data = vmcs_readl(GUEST_GS_BASE); | 1030 | data = vmcs_readl(GUEST_GS_BASE); |
993 | break; | 1031 | break; |
1032 | case MSR_KERNEL_GS_BASE: | ||
1033 | vmx_load_host_state(to_vmx(vcpu)); | ||
1034 | data = to_vmx(vcpu)->msr_guest_kernel_gs_base; | ||
1035 | break; | ||
1036 | #endif | ||
994 | case MSR_EFER: | 1037 | case MSR_EFER: |
995 | return kvm_get_msr_common(vcpu, msr_index, pdata); | 1038 | return kvm_get_msr_common(vcpu, msr_index, pdata); |
996 | #endif | ||
997 | case MSR_IA32_TSC: | 1039 | case MSR_IA32_TSC: |
998 | data = guest_read_tsc(); | 1040 | data = guest_read_tsc(); |
999 | break; | 1041 | break; |
@@ -1006,7 +1048,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1006 | case MSR_IA32_SYSENTER_ESP: | 1048 | case MSR_IA32_SYSENTER_ESP: |
1007 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 1049 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
1008 | break; | 1050 | break; |
1051 | case MSR_TSC_AUX: | ||
1052 | if (!to_vmx(vcpu)->rdtscp_enabled) | ||
1053 | return 1; | ||
1054 | /* Otherwise falls through */ | ||
1009 | default: | 1055 | default: |
1056 | vmx_load_host_state(to_vmx(vcpu)); | ||
1010 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 1057 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
1011 | if (msr) { | 1058 | if (msr) { |
1012 | vmx_load_host_state(to_vmx(vcpu)); | 1059 | vmx_load_host_state(to_vmx(vcpu)); |
@@ -1028,7 +1075,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1028 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 1075 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
1029 | { | 1076 | { |
1030 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1077 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1031 | struct kvm_msr_entry *msr; | 1078 | struct shared_msr_entry *msr; |
1032 | u64 host_tsc; | 1079 | u64 host_tsc; |
1033 | int ret = 0; | 1080 | int ret = 0; |
1034 | 1081 | ||
@@ -1044,6 +1091,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1044 | case MSR_GS_BASE: | 1091 | case MSR_GS_BASE: |
1045 | vmcs_writel(GUEST_GS_BASE, data); | 1092 | vmcs_writel(GUEST_GS_BASE, data); |
1046 | break; | 1093 | break; |
1094 | case MSR_KERNEL_GS_BASE: | ||
1095 | vmx_load_host_state(vmx); | ||
1096 | vmx->msr_guest_kernel_gs_base = data; | ||
1097 | break; | ||
1047 | #endif | 1098 | #endif |
1048 | case MSR_IA32_SYSENTER_CS: | 1099 | case MSR_IA32_SYSENTER_CS: |
1049 | vmcs_write32(GUEST_SYSENTER_CS, data); | 1100 | vmcs_write32(GUEST_SYSENTER_CS, data); |
@@ -1064,7 +1115,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1064 | vcpu->arch.pat = data; | 1115 | vcpu->arch.pat = data; |
1065 | break; | 1116 | break; |
1066 | } | 1117 | } |
1067 | /* Otherwise falls through to kvm_set_msr_common */ | 1118 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
1119 | break; | ||
1120 | case MSR_TSC_AUX: | ||
1121 | if (!vmx->rdtscp_enabled) | ||
1122 | return 1; | ||
1123 | /* Check reserved bit, higher 32 bits should be zero */ | ||
1124 | if ((data >> 32) != 0) | ||
1125 | return 1; | ||
1126 | /* Otherwise falls through */ | ||
1068 | default: | 1127 | default: |
1069 | msr = find_msr_entry(vmx, msr_index); | 1128 | msr = find_msr_entry(vmx, msr_index); |
1070 | if (msr) { | 1129 | if (msr) { |
@@ -1097,30 +1156,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1097 | } | 1156 | } |
1098 | } | 1157 | } |
1099 | 1158 | ||
1100 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1159 | static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
1101 | { | 1160 | { |
1102 | int old_debug = vcpu->guest_debug; | ||
1103 | unsigned long flags; | ||
1104 | |||
1105 | vcpu->guest_debug = dbg->control; | ||
1106 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
1107 | vcpu->guest_debug = 0; | ||
1108 | |||
1109 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1161 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1110 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); | 1162 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); |
1111 | else | 1163 | else |
1112 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | 1164 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); |
1113 | 1165 | ||
1114 | flags = vmcs_readl(GUEST_RFLAGS); | ||
1115 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1116 | flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
1117 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1118 | flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1119 | vmcs_writel(GUEST_RFLAGS, flags); | ||
1120 | |||
1121 | update_exception_bitmap(vcpu); | 1166 | update_exception_bitmap(vcpu); |
1122 | |||
1123 | return 0; | ||
1124 | } | 1167 | } |
1125 | 1168 | ||
1126 | static __init int cpu_has_kvm_support(void) | 1169 | static __init int cpu_has_kvm_support(void) |
@@ -1139,12 +1182,15 @@ static __init int vmx_disabled_by_bios(void) | |||
1139 | /* locked but not enabled */ | 1182 | /* locked but not enabled */ |
1140 | } | 1183 | } |
1141 | 1184 | ||
1142 | static void hardware_enable(void *garbage) | 1185 | static int hardware_enable(void *garbage) |
1143 | { | 1186 | { |
1144 | int cpu = raw_smp_processor_id(); | 1187 | int cpu = raw_smp_processor_id(); |
1145 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1188 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
1146 | u64 old; | 1189 | u64 old; |
1147 | 1190 | ||
1191 | if (read_cr4() & X86_CR4_VMXE) | ||
1192 | return -EBUSY; | ||
1193 | |||
1148 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1194 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
1149 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1195 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1150 | if ((old & (FEATURE_CONTROL_LOCKED | | 1196 | if ((old & (FEATURE_CONTROL_LOCKED | |
@@ -1159,6 +1205,10 @@ static void hardware_enable(void *garbage) | |||
1159 | asm volatile (ASM_VMX_VMXON_RAX | 1205 | asm volatile (ASM_VMX_VMXON_RAX |
1160 | : : "a"(&phys_addr), "m"(phys_addr) | 1206 | : : "a"(&phys_addr), "m"(phys_addr) |
1161 | : "memory", "cc"); | 1207 | : "memory", "cc"); |
1208 | |||
1209 | ept_sync_global(); | ||
1210 | |||
1211 | return 0; | ||
1162 | } | 1212 | } |
1163 | 1213 | ||
1164 | static void vmclear_local_vcpus(void) | 1214 | static void vmclear_local_vcpus(void) |
@@ -1232,6 +1282,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1232 | CPU_BASED_USE_IO_BITMAPS | | 1282 | CPU_BASED_USE_IO_BITMAPS | |
1233 | CPU_BASED_MOV_DR_EXITING | | 1283 | CPU_BASED_MOV_DR_EXITING | |
1234 | CPU_BASED_USE_TSC_OFFSETING | | 1284 | CPU_BASED_USE_TSC_OFFSETING | |
1285 | CPU_BASED_MWAIT_EXITING | | ||
1286 | CPU_BASED_MONITOR_EXITING | | ||
1235 | CPU_BASED_INVLPG_EXITING; | 1287 | CPU_BASED_INVLPG_EXITING; |
1236 | opt = CPU_BASED_TPR_SHADOW | | 1288 | opt = CPU_BASED_TPR_SHADOW | |
1237 | CPU_BASED_USE_MSR_BITMAPS | | 1289 | CPU_BASED_USE_MSR_BITMAPS | |
@@ -1250,7 +1302,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1250 | SECONDARY_EXEC_WBINVD_EXITING | | 1302 | SECONDARY_EXEC_WBINVD_EXITING | |
1251 | SECONDARY_EXEC_ENABLE_VPID | | 1303 | SECONDARY_EXEC_ENABLE_VPID | |
1252 | SECONDARY_EXEC_ENABLE_EPT | | 1304 | SECONDARY_EXEC_ENABLE_EPT | |
1253 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 1305 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
1306 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | | ||
1307 | SECONDARY_EXEC_RDTSCP; | ||
1254 | if (adjust_vmx_controls(min2, opt2, | 1308 | if (adjust_vmx_controls(min2, opt2, |
1255 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1309 | MSR_IA32_VMX_PROCBASED_CTLS2, |
1256 | &_cpu_based_2nd_exec_control) < 0) | 1310 | &_cpu_based_2nd_exec_control) < 0) |
@@ -1344,15 +1398,17 @@ static void free_kvm_area(void) | |||
1344 | { | 1398 | { |
1345 | int cpu; | 1399 | int cpu; |
1346 | 1400 | ||
1347 | for_each_online_cpu(cpu) | 1401 | for_each_possible_cpu(cpu) { |
1348 | free_vmcs(per_cpu(vmxarea, cpu)); | 1402 | free_vmcs(per_cpu(vmxarea, cpu)); |
1403 | per_cpu(vmxarea, cpu) = NULL; | ||
1404 | } | ||
1349 | } | 1405 | } |
1350 | 1406 | ||
1351 | static __init int alloc_kvm_area(void) | 1407 | static __init int alloc_kvm_area(void) |
1352 | { | 1408 | { |
1353 | int cpu; | 1409 | int cpu; |
1354 | 1410 | ||
1355 | for_each_online_cpu(cpu) { | 1411 | for_each_possible_cpu(cpu) { |
1356 | struct vmcs *vmcs; | 1412 | struct vmcs *vmcs; |
1357 | 1413 | ||
1358 | vmcs = alloc_vmcs_cpu(cpu); | 1414 | vmcs = alloc_vmcs_cpu(cpu); |
@@ -1394,6 +1450,9 @@ static __init int hardware_setup(void) | |||
1394 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | 1450 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) |
1395 | kvm_disable_largepages(); | 1451 | kvm_disable_largepages(); |
1396 | 1452 | ||
1453 | if (!cpu_has_vmx_ple()) | ||
1454 | ple_gap = 0; | ||
1455 | |||
1397 | return alloc_kvm_area(); | 1456 | return alloc_kvm_area(); |
1398 | } | 1457 | } |
1399 | 1458 | ||
@@ -1431,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1431 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | 1490 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
1432 | 1491 | ||
1433 | flags = vmcs_readl(GUEST_RFLAGS); | 1492 | flags = vmcs_readl(GUEST_RFLAGS); |
1434 | flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 1493 | flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
1435 | flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); | 1494 | flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; |
1436 | vmcs_writel(GUEST_RFLAGS, flags); | 1495 | vmcs_writel(GUEST_RFLAGS, flags); |
1437 | 1496 | ||
1438 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | | 1497 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | |
@@ -1459,8 +1518,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1459 | static gva_t rmode_tss_base(struct kvm *kvm) | 1518 | static gva_t rmode_tss_base(struct kvm *kvm) |
1460 | { | 1519 | { |
1461 | if (!kvm->arch.tss_addr) { | 1520 | if (!kvm->arch.tss_addr) { |
1462 | gfn_t base_gfn = kvm->memslots[0].base_gfn + | 1521 | struct kvm_memslots *slots; |
1463 | kvm->memslots[0].npages - 3; | 1522 | gfn_t base_gfn; |
1523 | |||
1524 | slots = rcu_dereference(kvm->memslots); | ||
1525 | base_gfn = kvm->memslots->memslots[0].base_gfn + | ||
1526 | kvm->memslots->memslots[0].npages - 3; | ||
1464 | return base_gfn << PAGE_SHIFT; | 1527 | return base_gfn << PAGE_SHIFT; |
1465 | } | 1528 | } |
1466 | return kvm->arch.tss_addr; | 1529 | return kvm->arch.tss_addr; |
@@ -1501,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1501 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 1564 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
1502 | 1565 | ||
1503 | flags = vmcs_readl(GUEST_RFLAGS); | 1566 | flags = vmcs_readl(GUEST_RFLAGS); |
1504 | vmx->rmode.save_iopl | 1567 | vmx->rmode.save_rflags = flags; |
1505 | = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1506 | 1568 | ||
1507 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 1569 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
1508 | 1570 | ||
@@ -1536,11 +1598,17 @@ continue_rmode: | |||
1536 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1598 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
1537 | { | 1599 | { |
1538 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1600 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1539 | struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); | 1601 | struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); |
1540 | 1602 | ||
1541 | vcpu->arch.shadow_efer = efer; | ||
1542 | if (!msr) | 1603 | if (!msr) |
1543 | return; | 1604 | return; |
1605 | |||
1606 | /* | ||
1607 | * Force kernel_gs_base reloading before EFER changes, as control | ||
1608 | * of this msr depends on is_long_mode(). | ||
1609 | */ | ||
1610 | vmx_load_host_state(to_vmx(vcpu)); | ||
1611 | vcpu->arch.efer = efer; | ||
1544 | if (efer & EFER_LMA) { | 1612 | if (efer & EFER_LMA) { |
1545 | vmcs_write32(VM_ENTRY_CONTROLS, | 1613 | vmcs_write32(VM_ENTRY_CONTROLS, |
1546 | vmcs_read32(VM_ENTRY_CONTROLS) | | 1614 | vmcs_read32(VM_ENTRY_CONTROLS) | |
@@ -1570,13 +1638,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1570 | (guest_tr_ar & ~AR_TYPE_MASK) | 1638 | (guest_tr_ar & ~AR_TYPE_MASK) |
1571 | | AR_TYPE_BUSY_64_TSS); | 1639 | | AR_TYPE_BUSY_64_TSS); |
1572 | } | 1640 | } |
1573 | vcpu->arch.shadow_efer |= EFER_LMA; | 1641 | vcpu->arch.efer |= EFER_LMA; |
1574 | vmx_set_efer(vcpu, vcpu->arch.shadow_efer); | 1642 | vmx_set_efer(vcpu, vcpu->arch.efer); |
1575 | } | 1643 | } |
1576 | 1644 | ||
1577 | static void exit_lmode(struct kvm_vcpu *vcpu) | 1645 | static void exit_lmode(struct kvm_vcpu *vcpu) |
1578 | { | 1646 | { |
1579 | vcpu->arch.shadow_efer &= ~EFER_LMA; | 1647 | vcpu->arch.efer &= ~EFER_LMA; |
1580 | 1648 | ||
1581 | vmcs_write32(VM_ENTRY_CONTROLS, | 1649 | vmcs_write32(VM_ENTRY_CONTROLS, |
1582 | vmcs_read32(VM_ENTRY_CONTROLS) | 1650 | vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1592,10 +1660,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | |||
1592 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1660 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
1593 | } | 1661 | } |
1594 | 1662 | ||
1663 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||
1664 | { | ||
1665 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; | ||
1666 | |||
1667 | vcpu->arch.cr0 &= ~cr0_guest_owned_bits; | ||
1668 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | ||
1669 | } | ||
1670 | |||
1595 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1671 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1596 | { | 1672 | { |
1597 | vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; | 1673 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; |
1598 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 1674 | |
1675 | vcpu->arch.cr4 &= ~cr4_guest_owned_bits; | ||
1676 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; | ||
1599 | } | 1677 | } |
1600 | 1678 | ||
1601 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | 1679 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) |
@@ -1640,7 +1718,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1640 | (CPU_BASED_CR3_LOAD_EXITING | | 1718 | (CPU_BASED_CR3_LOAD_EXITING | |
1641 | CPU_BASED_CR3_STORE_EXITING)); | 1719 | CPU_BASED_CR3_STORE_EXITING)); |
1642 | vcpu->arch.cr0 = cr0; | 1720 | vcpu->arch.cr0 = cr0; |
1643 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1721 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
1644 | } else if (!is_paging(vcpu)) { | 1722 | } else if (!is_paging(vcpu)) { |
1645 | /* From nonpaging to paging */ | 1723 | /* From nonpaging to paging */ |
1646 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1724 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1648,23 +1726,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1648 | ~(CPU_BASED_CR3_LOAD_EXITING | | 1726 | ~(CPU_BASED_CR3_LOAD_EXITING | |
1649 | CPU_BASED_CR3_STORE_EXITING)); | 1727 | CPU_BASED_CR3_STORE_EXITING)); |
1650 | vcpu->arch.cr0 = cr0; | 1728 | vcpu->arch.cr0 = cr0; |
1651 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1729 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
1652 | } | 1730 | } |
1653 | 1731 | ||
1654 | if (!(cr0 & X86_CR0_WP)) | 1732 | if (!(cr0 & X86_CR0_WP)) |
1655 | *hw_cr0 &= ~X86_CR0_WP; | 1733 | *hw_cr0 &= ~X86_CR0_WP; |
1656 | } | 1734 | } |
1657 | 1735 | ||
1658 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | ||
1659 | struct kvm_vcpu *vcpu) | ||
1660 | { | ||
1661 | if (!is_paging(vcpu)) { | ||
1662 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1663 | *hw_cr4 |= X86_CR4_PSE; | ||
1664 | } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) | ||
1665 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1666 | } | ||
1667 | |||
1668 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1736 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
1669 | { | 1737 | { |
1670 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1738 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -1676,8 +1744,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1676 | else | 1744 | else |
1677 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; | 1745 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; |
1678 | 1746 | ||
1679 | vmx_fpu_deactivate(vcpu); | ||
1680 | |||
1681 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) | 1747 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) |
1682 | enter_pmode(vcpu); | 1748 | enter_pmode(vcpu); |
1683 | 1749 | ||
@@ -1685,7 +1751,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1685 | enter_rmode(vcpu); | 1751 | enter_rmode(vcpu); |
1686 | 1752 | ||
1687 | #ifdef CONFIG_X86_64 | 1753 | #ifdef CONFIG_X86_64 |
1688 | if (vcpu->arch.shadow_efer & EFER_LME) { | 1754 | if (vcpu->arch.efer & EFER_LME) { |
1689 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) | 1755 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) |
1690 | enter_lmode(vcpu); | 1756 | enter_lmode(vcpu); |
1691 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) | 1757 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) |
@@ -1696,12 +1762,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1696 | if (enable_ept) | 1762 | if (enable_ept) |
1697 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 1763 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
1698 | 1764 | ||
1765 | if (!vcpu->fpu_active) | ||
1766 | hw_cr0 |= X86_CR0_TS | X86_CR0_MP; | ||
1767 | |||
1699 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1768 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1700 | vmcs_writel(GUEST_CR0, hw_cr0); | 1769 | vmcs_writel(GUEST_CR0, hw_cr0); |
1701 | vcpu->arch.cr0 = cr0; | 1770 | vcpu->arch.cr0 = cr0; |
1702 | |||
1703 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) | ||
1704 | vmx_fpu_activate(vcpu); | ||
1705 | } | 1771 | } |
1706 | 1772 | ||
1707 | static u64 construct_eptp(unsigned long root_hpa) | 1773 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -1727,12 +1793,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1727 | vmcs_write64(EPT_POINTER, eptp); | 1793 | vmcs_write64(EPT_POINTER, eptp); |
1728 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 1794 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : |
1729 | vcpu->kvm->arch.ept_identity_map_addr; | 1795 | vcpu->kvm->arch.ept_identity_map_addr; |
1796 | ept_load_pdptrs(vcpu); | ||
1730 | } | 1797 | } |
1731 | 1798 | ||
1732 | vmx_flush_tlb(vcpu); | 1799 | vmx_flush_tlb(vcpu); |
1733 | vmcs_writel(GUEST_CR3, guest_cr3); | 1800 | vmcs_writel(GUEST_CR3, guest_cr3); |
1734 | if (vcpu->arch.cr0 & X86_CR0_PE) | ||
1735 | vmx_fpu_deactivate(vcpu); | ||
1736 | } | 1801 | } |
1737 | 1802 | ||
1738 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1803 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -1741,8 +1806,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1741 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 1806 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1742 | 1807 | ||
1743 | vcpu->arch.cr4 = cr4; | 1808 | vcpu->arch.cr4 = cr4; |
1744 | if (enable_ept) | 1809 | if (enable_ept) { |
1745 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | 1810 | if (!is_paging(vcpu)) { |
1811 | hw_cr4 &= ~X86_CR4_PAE; | ||
1812 | hw_cr4 |= X86_CR4_PSE; | ||
1813 | } else if (!(cr4 & X86_CR4_PAE)) { | ||
1814 | hw_cr4 &= ~X86_CR4_PAE; | ||
1815 | } | ||
1816 | } | ||
1746 | 1817 | ||
1747 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1818 | vmcs_writel(CR4_READ_SHADOW, cr4); |
1748 | vmcs_writel(GUEST_CR4, hw_cr4); | 1819 | vmcs_writel(GUEST_CR4, hw_cr4); |
@@ -1780,7 +1851,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
1780 | 1851 | ||
1781 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 1852 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
1782 | { | 1853 | { |
1783 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ | 1854 | if (!is_protmode(vcpu)) |
1784 | return 0; | 1855 | return 0; |
1785 | 1856 | ||
1786 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | 1857 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ |
@@ -2035,7 +2106,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | |||
2035 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | 2106 | static bool guest_state_valid(struct kvm_vcpu *vcpu) |
2036 | { | 2107 | { |
2037 | /* real mode guest state checks */ | 2108 | /* real mode guest state checks */ |
2038 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) { | 2109 | if (!is_protmode(vcpu)) { |
2039 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 2110 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
2040 | return false; | 2111 | return false; |
2041 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | 2112 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) |
@@ -2168,7 +2239,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
2168 | struct kvm_userspace_memory_region kvm_userspace_mem; | 2239 | struct kvm_userspace_memory_region kvm_userspace_mem; |
2169 | int r = 0; | 2240 | int r = 0; |
2170 | 2241 | ||
2171 | down_write(&kvm->slots_lock); | 2242 | mutex_lock(&kvm->slots_lock); |
2172 | if (kvm->arch.apic_access_page) | 2243 | if (kvm->arch.apic_access_page) |
2173 | goto out; | 2244 | goto out; |
2174 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | 2245 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; |
@@ -2181,7 +2252,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
2181 | 2252 | ||
2182 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 2253 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
2183 | out: | 2254 | out: |
2184 | up_write(&kvm->slots_lock); | 2255 | mutex_unlock(&kvm->slots_lock); |
2185 | return r; | 2256 | return r; |
2186 | } | 2257 | } |
2187 | 2258 | ||
@@ -2190,7 +2261,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2190 | struct kvm_userspace_memory_region kvm_userspace_mem; | 2261 | struct kvm_userspace_memory_region kvm_userspace_mem; |
2191 | int r = 0; | 2262 | int r = 0; |
2192 | 2263 | ||
2193 | down_write(&kvm->slots_lock); | 2264 | mutex_lock(&kvm->slots_lock); |
2194 | if (kvm->arch.ept_identity_pagetable) | 2265 | if (kvm->arch.ept_identity_pagetable) |
2195 | goto out; | 2266 | goto out; |
2196 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | 2267 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; |
@@ -2205,7 +2276,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2205 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | 2276 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, |
2206 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); | 2277 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); |
2207 | out: | 2278 | out: |
2208 | up_write(&kvm->slots_lock); | 2279 | mutex_unlock(&kvm->slots_lock); |
2209 | return r; | 2280 | return r; |
2210 | } | 2281 | } |
2211 | 2282 | ||
@@ -2302,13 +2373,22 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2302 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2373 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
2303 | if (vmx->vpid == 0) | 2374 | if (vmx->vpid == 0) |
2304 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 2375 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
2305 | if (!enable_ept) | 2376 | if (!enable_ept) { |
2306 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2377 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
2378 | enable_unrestricted_guest = 0; | ||
2379 | } | ||
2307 | if (!enable_unrestricted_guest) | 2380 | if (!enable_unrestricted_guest) |
2308 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2381 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
2382 | if (!ple_gap) | ||
2383 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
2309 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2384 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
2310 | } | 2385 | } |
2311 | 2386 | ||
2387 | if (ple_gap) { | ||
2388 | vmcs_write32(PLE_GAP, ple_gap); | ||
2389 | vmcs_write32(PLE_WINDOW, ple_window); | ||
2390 | } | ||
2391 | |||
2312 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); | 2392 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); |
2313 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2393 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
2314 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2394 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
@@ -2368,18 +2448,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2368 | for (i = 0; i < NR_VMX_MSR; ++i) { | 2448 | for (i = 0; i < NR_VMX_MSR; ++i) { |
2369 | u32 index = vmx_msr_index[i]; | 2449 | u32 index = vmx_msr_index[i]; |
2370 | u32 data_low, data_high; | 2450 | u32 data_low, data_high; |
2371 | u64 data; | ||
2372 | int j = vmx->nmsrs; | 2451 | int j = vmx->nmsrs; |
2373 | 2452 | ||
2374 | if (rdmsr_safe(index, &data_low, &data_high) < 0) | 2453 | if (rdmsr_safe(index, &data_low, &data_high) < 0) |
2375 | continue; | 2454 | continue; |
2376 | if (wrmsr_safe(index, data_low, data_high) < 0) | 2455 | if (wrmsr_safe(index, data_low, data_high) < 0) |
2377 | continue; | 2456 | continue; |
2378 | data = data_low | ((u64)data_high << 32); | 2457 | vmx->guest_msrs[j].index = i; |
2379 | vmx->host_msrs[j].index = index; | 2458 | vmx->guest_msrs[j].data = 0; |
2380 | vmx->host_msrs[j].reserved = 0; | 2459 | vmx->guest_msrs[j].mask = -1ull; |
2381 | vmx->host_msrs[j].data = data; | ||
2382 | vmx->guest_msrs[j] = vmx->host_msrs[j]; | ||
2383 | ++vmx->nmsrs; | 2460 | ++vmx->nmsrs; |
2384 | } | 2461 | } |
2385 | 2462 | ||
@@ -2389,7 +2466,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2389 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); | 2466 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); |
2390 | 2467 | ||
2391 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 2468 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
2392 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); | 2469 | vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; |
2470 | if (enable_ept) | ||
2471 | vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; | ||
2472 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | ||
2393 | 2473 | ||
2394 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; | 2474 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; |
2395 | rdtscll(tsc_this); | 2475 | rdtscll(tsc_this); |
@@ -2414,10 +2494,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2414 | { | 2494 | { |
2415 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2495 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2416 | u64 msr; | 2496 | u64 msr; |
2417 | int ret; | 2497 | int ret, idx; |
2418 | 2498 | ||
2419 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2499 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2420 | down_read(&vcpu->kvm->slots_lock); | 2500 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
2421 | if (!init_rmode(vmx->vcpu.kvm)) { | 2501 | if (!init_rmode(vmx->vcpu.kvm)) { |
2422 | ret = -ENOMEM; | 2502 | ret = -ENOMEM; |
2423 | goto out; | 2503 | goto out; |
@@ -2510,8 +2590,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2510 | if (vmx->vpid != 0) | 2590 | if (vmx->vpid != 0) |
2511 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 2591 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
2512 | 2592 | ||
2513 | vmx->vcpu.arch.cr0 = 0x60000010; | 2593 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
2514 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ | 2594 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ |
2515 | vmx_set_cr4(&vmx->vcpu, 0); | 2595 | vmx_set_cr4(&vmx->vcpu, 0); |
2516 | vmx_set_efer(&vmx->vcpu, 0); | 2596 | vmx_set_efer(&vmx->vcpu, 0); |
2517 | vmx_fpu_activate(&vmx->vcpu); | 2597 | vmx_fpu_activate(&vmx->vcpu); |
@@ -2525,7 +2605,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2525 | vmx->emulation_required = 0; | 2605 | vmx->emulation_required = 0; |
2526 | 2606 | ||
2527 | out: | 2607 | out: |
2528 | up_read(&vcpu->kvm->slots_lock); | 2608 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
2529 | return ret; | 2609 | return ret; |
2530 | } | 2610 | } |
2531 | 2611 | ||
@@ -2623,8 +2703,35 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2623 | return 0; | 2703 | return 0; |
2624 | 2704 | ||
2625 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2705 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2626 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | | 2706 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); |
2627 | GUEST_INTR_STATE_NMI)); | 2707 | } |
2708 | |||
2709 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
2710 | { | ||
2711 | if (!cpu_has_virtual_nmis()) | ||
2712 | return to_vmx(vcpu)->soft_vnmi_blocked; | ||
2713 | else | ||
2714 | return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
2715 | GUEST_INTR_STATE_NMI); | ||
2716 | } | ||
2717 | |||
2718 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
2719 | { | ||
2720 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2721 | |||
2722 | if (!cpu_has_virtual_nmis()) { | ||
2723 | if (vmx->soft_vnmi_blocked != masked) { | ||
2724 | vmx->soft_vnmi_blocked = masked; | ||
2725 | vmx->vnmi_blocked_time = 0; | ||
2726 | } | ||
2727 | } else { | ||
2728 | if (masked) | ||
2729 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2730 | GUEST_INTR_STATE_NMI); | ||
2731 | else | ||
2732 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2733 | GUEST_INTR_STATE_NMI); | ||
2734 | } | ||
2628 | } | 2735 | } |
2629 | 2736 | ||
2630 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 2737 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
@@ -2659,7 +2766,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2659 | * Cause the #SS fault with 0 error code in VM86 mode. | 2766 | * Cause the #SS fault with 0 error code in VM86 mode. |
2660 | */ | 2767 | */ |
2661 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2768 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
2662 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) | 2769 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) |
2663 | return 1; | 2770 | return 1; |
2664 | /* | 2771 | /* |
2665 | * Forward all other exceptions that are valid in real mode. | 2772 | * Forward all other exceptions that are valid in real mode. |
@@ -2674,6 +2781,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2674 | kvm_queue_exception(vcpu, vec); | 2781 | kvm_queue_exception(vcpu, vec); |
2675 | return 1; | 2782 | return 1; |
2676 | case BP_VECTOR: | 2783 | case BP_VECTOR: |
2784 | /* | ||
2785 | * Update instruction length as we may reinject the exception | ||
2786 | * from user space while in guest debugging mode. | ||
2787 | */ | ||
2788 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = | ||
2789 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
2677 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 2790 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
2678 | return 0; | 2791 | return 0; |
2679 | /* fall through */ | 2792 | /* fall through */ |
@@ -2710,15 +2823,16 @@ static void kvm_machine_check(void) | |||
2710 | #endif | 2823 | #endif |
2711 | } | 2824 | } |
2712 | 2825 | ||
2713 | static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2826 | static int handle_machine_check(struct kvm_vcpu *vcpu) |
2714 | { | 2827 | { |
2715 | /* already handled by vcpu_run */ | 2828 | /* already handled by vcpu_run */ |
2716 | return 1; | 2829 | return 1; |
2717 | } | 2830 | } |
2718 | 2831 | ||
2719 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2832 | static int handle_exception(struct kvm_vcpu *vcpu) |
2720 | { | 2833 | { |
2721 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2834 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2835 | struct kvm_run *kvm_run = vcpu->run; | ||
2722 | u32 intr_info, ex_no, error_code; | 2836 | u32 intr_info, ex_no, error_code; |
2723 | unsigned long cr2, rip, dr6; | 2837 | unsigned long cr2, rip, dr6; |
2724 | u32 vect_info; | 2838 | u32 vect_info; |
@@ -2728,12 +2842,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2728 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 2842 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
2729 | 2843 | ||
2730 | if (is_machine_check(intr_info)) | 2844 | if (is_machine_check(intr_info)) |
2731 | return handle_machine_check(vcpu, kvm_run); | 2845 | return handle_machine_check(vcpu); |
2732 | 2846 | ||
2733 | if ((vect_info & VECTORING_INFO_VALID_MASK) && | 2847 | if ((vect_info & VECTORING_INFO_VALID_MASK) && |
2734 | !is_page_fault(intr_info)) | 2848 | !is_page_fault(intr_info)) { |
2735 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " | 2849 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2736 | "intr info 0x%x\n", __func__, vect_info, intr_info); | 2850 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; |
2851 | vcpu->run->internal.ndata = 2; | ||
2852 | vcpu->run->internal.data[0] = vect_info; | ||
2853 | vcpu->run->internal.data[1] = intr_info; | ||
2854 | return 0; | ||
2855 | } | ||
2737 | 2856 | ||
2738 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) | 2857 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
2739 | return 1; /* already handled by vmx_vcpu_run() */ | 2858 | return 1; /* already handled by vmx_vcpu_run() */ |
@@ -2744,7 +2863,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2744 | } | 2863 | } |
2745 | 2864 | ||
2746 | if (is_invalid_opcode(intr_info)) { | 2865 | if (is_invalid_opcode(intr_info)) { |
2747 | er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 2866 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); |
2748 | if (er != EMULATE_DONE) | 2867 | if (er != EMULATE_DONE) |
2749 | kvm_queue_exception(vcpu, UD_VECTOR); | 2868 | kvm_queue_exception(vcpu, UD_VECTOR); |
2750 | return 1; | 2869 | return 1; |
@@ -2790,6 +2909,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2790 | kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); | 2909 | kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); |
2791 | /* fall through */ | 2910 | /* fall through */ |
2792 | case BP_VECTOR: | 2911 | case BP_VECTOR: |
2912 | /* | ||
2913 | * Update instruction length as we may reinject #BP from | ||
2914 | * user space while in guest debugging mode. Reading it for | ||
2915 | * #DB as well causes no harm, it is not used in that case. | ||
2916 | */ | ||
2917 | vmx->vcpu.arch.event_exit_inst_len = | ||
2918 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
2793 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 2919 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
2794 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; | 2920 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; |
2795 | kvm_run->debug.arch.exception = ex_no; | 2921 | kvm_run->debug.arch.exception = ex_no; |
@@ -2803,20 +2929,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2803 | return 0; | 2929 | return 0; |
2804 | } | 2930 | } |
2805 | 2931 | ||
2806 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, | 2932 | static int handle_external_interrupt(struct kvm_vcpu *vcpu) |
2807 | struct kvm_run *kvm_run) | ||
2808 | { | 2933 | { |
2809 | ++vcpu->stat.irq_exits; | 2934 | ++vcpu->stat.irq_exits; |
2810 | return 1; | 2935 | return 1; |
2811 | } | 2936 | } |
2812 | 2937 | ||
2813 | static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2938 | static int handle_triple_fault(struct kvm_vcpu *vcpu) |
2814 | { | 2939 | { |
2815 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 2940 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
2816 | return 0; | 2941 | return 0; |
2817 | } | 2942 | } |
2818 | 2943 | ||
2819 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2944 | static int handle_io(struct kvm_vcpu *vcpu) |
2820 | { | 2945 | { |
2821 | unsigned long exit_qualification; | 2946 | unsigned long exit_qualification; |
2822 | int size, in, string; | 2947 | int size, in, string; |
@@ -2827,8 +2952,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2827 | string = (exit_qualification & 16) != 0; | 2952 | string = (exit_qualification & 16) != 0; |
2828 | 2953 | ||
2829 | if (string) { | 2954 | if (string) { |
2830 | if (emulate_instruction(vcpu, | 2955 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) |
2831 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | ||
2832 | return 0; | 2956 | return 0; |
2833 | return 1; | 2957 | return 1; |
2834 | } | 2958 | } |
@@ -2838,7 +2962,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2838 | port = exit_qualification >> 16; | 2962 | port = exit_qualification >> 16; |
2839 | 2963 | ||
2840 | skip_emulated_instruction(vcpu); | 2964 | skip_emulated_instruction(vcpu); |
2841 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); | 2965 | return kvm_emulate_pio(vcpu, in, size, port); |
2842 | } | 2966 | } |
2843 | 2967 | ||
2844 | static void | 2968 | static void |
@@ -2852,7 +2976,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
2852 | hypercall[2] = 0xc1; | 2976 | hypercall[2] = 0xc1; |
2853 | } | 2977 | } |
2854 | 2978 | ||
2855 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2979 | static int handle_cr(struct kvm_vcpu *vcpu) |
2856 | { | 2980 | { |
2857 | unsigned long exit_qualification, val; | 2981 | unsigned long exit_qualification, val; |
2858 | int cr; | 2982 | int cr; |
@@ -2887,17 +3011,16 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2887 | return 1; | 3011 | return 1; |
2888 | if (cr8_prev <= cr8) | 3012 | if (cr8_prev <= cr8) |
2889 | return 1; | 3013 | return 1; |
2890 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 3014 | vcpu->run->exit_reason = KVM_EXIT_SET_TPR; |
2891 | return 0; | 3015 | return 0; |
2892 | } | 3016 | } |
2893 | }; | 3017 | }; |
2894 | break; | 3018 | break; |
2895 | case 2: /* clts */ | 3019 | case 2: /* clts */ |
2896 | vmx_fpu_deactivate(vcpu); | 3020 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
2897 | vcpu->arch.cr0 &= ~X86_CR0_TS; | 3021 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); |
2898 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
2899 | vmx_fpu_activate(vcpu); | ||
2900 | skip_emulated_instruction(vcpu); | 3022 | skip_emulated_instruction(vcpu); |
3023 | vmx_fpu_activate(vcpu); | ||
2901 | return 1; | 3024 | return 1; |
2902 | case 1: /*mov from cr*/ | 3025 | case 1: /*mov from cr*/ |
2903 | switch (cr) { | 3026 | switch (cr) { |
@@ -2915,25 +3038,37 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2915 | } | 3038 | } |
2916 | break; | 3039 | break; |
2917 | case 3: /* lmsw */ | 3040 | case 3: /* lmsw */ |
2918 | kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); | 3041 | val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; |
3042 | trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); | ||
3043 | kvm_lmsw(vcpu, val); | ||
2919 | 3044 | ||
2920 | skip_emulated_instruction(vcpu); | 3045 | skip_emulated_instruction(vcpu); |
2921 | return 1; | 3046 | return 1; |
2922 | default: | 3047 | default: |
2923 | break; | 3048 | break; |
2924 | } | 3049 | } |
2925 | kvm_run->exit_reason = 0; | 3050 | vcpu->run->exit_reason = 0; |
2926 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", | 3051 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", |
2927 | (int)(exit_qualification >> 4) & 3, cr); | 3052 | (int)(exit_qualification >> 4) & 3, cr); |
2928 | return 0; | 3053 | return 0; |
2929 | } | 3054 | } |
2930 | 3055 | ||
2931 | static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3056 | static int check_dr_alias(struct kvm_vcpu *vcpu) |
3057 | { | ||
3058 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
3059 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3060 | return -1; | ||
3061 | } | ||
3062 | return 0; | ||
3063 | } | ||
3064 | |||
3065 | static int handle_dr(struct kvm_vcpu *vcpu) | ||
2932 | { | 3066 | { |
2933 | unsigned long exit_qualification; | 3067 | unsigned long exit_qualification; |
2934 | unsigned long val; | 3068 | unsigned long val; |
2935 | int dr, reg; | 3069 | int dr, reg; |
2936 | 3070 | ||
3071 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | ||
2937 | if (!kvm_require_cpl(vcpu, 0)) | 3072 | if (!kvm_require_cpl(vcpu, 0)) |
2938 | return 1; | 3073 | return 1; |
2939 | dr = vmcs_readl(GUEST_DR7); | 3074 | dr = vmcs_readl(GUEST_DR7); |
@@ -2944,13 +3079,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2944 | * guest debugging itself. | 3079 | * guest debugging itself. |
2945 | */ | 3080 | */ |
2946 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | 3081 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { |
2947 | kvm_run->debug.arch.dr6 = vcpu->arch.dr6; | 3082 | vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; |
2948 | kvm_run->debug.arch.dr7 = dr; | 3083 | vcpu->run->debug.arch.dr7 = dr; |
2949 | kvm_run->debug.arch.pc = | 3084 | vcpu->run->debug.arch.pc = |
2950 | vmcs_readl(GUEST_CS_BASE) + | 3085 | vmcs_readl(GUEST_CS_BASE) + |
2951 | vmcs_readl(GUEST_RIP); | 3086 | vmcs_readl(GUEST_RIP); |
2952 | kvm_run->debug.arch.exception = DB_VECTOR; | 3087 | vcpu->run->debug.arch.exception = DB_VECTOR; |
2953 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 3088 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; |
2954 | return 0; | 3089 | return 0; |
2955 | } else { | 3090 | } else { |
2956 | vcpu->arch.dr7 &= ~DR7_GD; | 3091 | vcpu->arch.dr7 &= ~DR7_GD; |
@@ -2969,14 +3104,20 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2969 | case 0 ... 3: | 3104 | case 0 ... 3: |
2970 | val = vcpu->arch.db[dr]; | 3105 | val = vcpu->arch.db[dr]; |
2971 | break; | 3106 | break; |
3107 | case 4: | ||
3108 | if (check_dr_alias(vcpu) < 0) | ||
3109 | return 1; | ||
3110 | /* fall through */ | ||
2972 | case 6: | 3111 | case 6: |
2973 | val = vcpu->arch.dr6; | 3112 | val = vcpu->arch.dr6; |
2974 | break; | 3113 | break; |
2975 | case 7: | 3114 | case 5: |
3115 | if (check_dr_alias(vcpu) < 0) | ||
3116 | return 1; | ||
3117 | /* fall through */ | ||
3118 | default: /* 7 */ | ||
2976 | val = vcpu->arch.dr7; | 3119 | val = vcpu->arch.dr7; |
2977 | break; | 3120 | break; |
2978 | default: | ||
2979 | val = 0; | ||
2980 | } | 3121 | } |
2981 | kvm_register_write(vcpu, reg, val); | 3122 | kvm_register_write(vcpu, reg, val); |
2982 | } else { | 3123 | } else { |
@@ -2987,21 +3128,25 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2987 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | 3128 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) |
2988 | vcpu->arch.eff_db[dr] = val; | 3129 | vcpu->arch.eff_db[dr] = val; |
2989 | break; | 3130 | break; |
2990 | case 4 ... 5: | 3131 | case 4: |
2991 | if (vcpu->arch.cr4 & X86_CR4_DE) | 3132 | if (check_dr_alias(vcpu) < 0) |
2992 | kvm_queue_exception(vcpu, UD_VECTOR); | 3133 | return 1; |
2993 | break; | 3134 | /* fall through */ |
2994 | case 6: | 3135 | case 6: |
2995 | if (val & 0xffffffff00000000ULL) { | 3136 | if (val & 0xffffffff00000000ULL) { |
2996 | kvm_queue_exception(vcpu, GP_VECTOR); | 3137 | kvm_inject_gp(vcpu, 0); |
2997 | break; | 3138 | return 1; |
2998 | } | 3139 | } |
2999 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 3140 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
3000 | break; | 3141 | break; |
3001 | case 7: | 3142 | case 5: |
3143 | if (check_dr_alias(vcpu) < 0) | ||
3144 | return 1; | ||
3145 | /* fall through */ | ||
3146 | default: /* 7 */ | ||
3002 | if (val & 0xffffffff00000000ULL) { | 3147 | if (val & 0xffffffff00000000ULL) { |
3003 | kvm_queue_exception(vcpu, GP_VECTOR); | 3148 | kvm_inject_gp(vcpu, 0); |
3004 | break; | 3149 | return 1; |
3005 | } | 3150 | } |
3006 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | 3151 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; |
3007 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 3152 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
@@ -3016,18 +3161,19 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3016 | return 1; | 3161 | return 1; |
3017 | } | 3162 | } |
3018 | 3163 | ||
3019 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3164 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
3020 | { | 3165 | { |
3021 | kvm_emulate_cpuid(vcpu); | 3166 | kvm_emulate_cpuid(vcpu); |
3022 | return 1; | 3167 | return 1; |
3023 | } | 3168 | } |
3024 | 3169 | ||
3025 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3170 | static int handle_rdmsr(struct kvm_vcpu *vcpu) |
3026 | { | 3171 | { |
3027 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3172 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
3028 | u64 data; | 3173 | u64 data; |
3029 | 3174 | ||
3030 | if (vmx_get_msr(vcpu, ecx, &data)) { | 3175 | if (vmx_get_msr(vcpu, ecx, &data)) { |
3176 | trace_kvm_msr_read_ex(ecx); | ||
3031 | kvm_inject_gp(vcpu, 0); | 3177 | kvm_inject_gp(vcpu, 0); |
3032 | return 1; | 3178 | return 1; |
3033 | } | 3179 | } |
@@ -3041,31 +3187,29 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3041 | return 1; | 3187 | return 1; |
3042 | } | 3188 | } |
3043 | 3189 | ||
3044 | static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3190 | static int handle_wrmsr(struct kvm_vcpu *vcpu) |
3045 | { | 3191 | { |
3046 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3192 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
3047 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 3193 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
3048 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 3194 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
3049 | 3195 | ||
3050 | trace_kvm_msr_write(ecx, data); | ||
3051 | |||
3052 | if (vmx_set_msr(vcpu, ecx, data) != 0) { | 3196 | if (vmx_set_msr(vcpu, ecx, data) != 0) { |
3197 | trace_kvm_msr_write_ex(ecx, data); | ||
3053 | kvm_inject_gp(vcpu, 0); | 3198 | kvm_inject_gp(vcpu, 0); |
3054 | return 1; | 3199 | return 1; |
3055 | } | 3200 | } |
3056 | 3201 | ||
3202 | trace_kvm_msr_write(ecx, data); | ||
3057 | skip_emulated_instruction(vcpu); | 3203 | skip_emulated_instruction(vcpu); |
3058 | return 1; | 3204 | return 1; |
3059 | } | 3205 | } |
3060 | 3206 | ||
3061 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, | 3207 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) |
3062 | struct kvm_run *kvm_run) | ||
3063 | { | 3208 | { |
3064 | return 1; | 3209 | return 1; |
3065 | } | 3210 | } |
3066 | 3211 | ||
3067 | static int handle_interrupt_window(struct kvm_vcpu *vcpu, | 3212 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) |
3068 | struct kvm_run *kvm_run) | ||
3069 | { | 3213 | { |
3070 | u32 cpu_based_vm_exec_control; | 3214 | u32 cpu_based_vm_exec_control; |
3071 | 3215 | ||
@@ -3081,34 +3225,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
3081 | * possible | 3225 | * possible |
3082 | */ | 3226 | */ |
3083 | if (!irqchip_in_kernel(vcpu->kvm) && | 3227 | if (!irqchip_in_kernel(vcpu->kvm) && |
3084 | kvm_run->request_interrupt_window && | 3228 | vcpu->run->request_interrupt_window && |
3085 | !kvm_cpu_has_interrupt(vcpu)) { | 3229 | !kvm_cpu_has_interrupt(vcpu)) { |
3086 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 3230 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
3087 | return 0; | 3231 | return 0; |
3088 | } | 3232 | } |
3089 | return 1; | 3233 | return 1; |
3090 | } | 3234 | } |
3091 | 3235 | ||
3092 | static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3236 | static int handle_halt(struct kvm_vcpu *vcpu) |
3093 | { | 3237 | { |
3094 | skip_emulated_instruction(vcpu); | 3238 | skip_emulated_instruction(vcpu); |
3095 | return kvm_emulate_halt(vcpu); | 3239 | return kvm_emulate_halt(vcpu); |
3096 | } | 3240 | } |
3097 | 3241 | ||
3098 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3242 | static int handle_vmcall(struct kvm_vcpu *vcpu) |
3099 | { | 3243 | { |
3100 | skip_emulated_instruction(vcpu); | 3244 | skip_emulated_instruction(vcpu); |
3101 | kvm_emulate_hypercall(vcpu); | 3245 | kvm_emulate_hypercall(vcpu); |
3102 | return 1; | 3246 | return 1; |
3103 | } | 3247 | } |
3104 | 3248 | ||
3105 | static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3249 | static int handle_vmx_insn(struct kvm_vcpu *vcpu) |
3106 | { | 3250 | { |
3107 | kvm_queue_exception(vcpu, UD_VECTOR); | 3251 | kvm_queue_exception(vcpu, UD_VECTOR); |
3108 | return 1; | 3252 | return 1; |
3109 | } | 3253 | } |
3110 | 3254 | ||
3111 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3255 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
3112 | { | 3256 | { |
3113 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3257 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3114 | 3258 | ||
@@ -3117,14 +3261,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3117 | return 1; | 3261 | return 1; |
3118 | } | 3262 | } |
3119 | 3263 | ||
3120 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3264 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
3121 | { | 3265 | { |
3122 | skip_emulated_instruction(vcpu); | 3266 | skip_emulated_instruction(vcpu); |
3123 | /* TODO: Add support for VT-d/pass-through device */ | 3267 | /* TODO: Add support for VT-d/pass-through device */ |
3124 | return 1; | 3268 | return 1; |
3125 | } | 3269 | } |
3126 | 3270 | ||
3127 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3271 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
3128 | { | 3272 | { |
3129 | unsigned long exit_qualification; | 3273 | unsigned long exit_qualification; |
3130 | enum emulation_result er; | 3274 | enum emulation_result er; |
@@ -3133,7 +3277,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3133 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3277 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3134 | offset = exit_qualification & 0xffful; | 3278 | offset = exit_qualification & 0xffful; |
3135 | 3279 | ||
3136 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3280 | er = emulate_instruction(vcpu, 0, 0, 0); |
3137 | 3281 | ||
3138 | if (er != EMULATE_DONE) { | 3282 | if (er != EMULATE_DONE) { |
3139 | printk(KERN_ERR | 3283 | printk(KERN_ERR |
@@ -3144,7 +3288,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3144 | return 1; | 3288 | return 1; |
3145 | } | 3289 | } |
3146 | 3290 | ||
3147 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3291 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
3148 | { | 3292 | { |
3149 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3293 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3150 | unsigned long exit_qualification; | 3294 | unsigned long exit_qualification; |
@@ -3198,7 +3342,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3198 | return 1; | 3342 | return 1; |
3199 | } | 3343 | } |
3200 | 3344 | ||
3201 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3345 | static int handle_ept_violation(struct kvm_vcpu *vcpu) |
3202 | { | 3346 | { |
3203 | unsigned long exit_qualification; | 3347 | unsigned long exit_qualification; |
3204 | gpa_t gpa; | 3348 | gpa_t gpa; |
@@ -3219,8 +3363,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3219 | vmcs_readl(GUEST_LINEAR_ADDRESS)); | 3363 | vmcs_readl(GUEST_LINEAR_ADDRESS)); |
3220 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 3364 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", |
3221 | (long unsigned int)exit_qualification); | 3365 | (long unsigned int)exit_qualification); |
3222 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3366 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3223 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; | 3367 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; |
3224 | return 0; | 3368 | return 0; |
3225 | } | 3369 | } |
3226 | 3370 | ||
@@ -3290,7 +3434,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, | |||
3290 | } | 3434 | } |
3291 | } | 3435 | } |
3292 | 3436 | ||
3293 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3437 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) |
3294 | { | 3438 | { |
3295 | u64 sptes[4]; | 3439 | u64 sptes[4]; |
3296 | int nr_sptes, i; | 3440 | int nr_sptes, i; |
@@ -3306,13 +3450,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3306 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) | 3450 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) |
3307 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); | 3451 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); |
3308 | 3452 | ||
3309 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3453 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3310 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; | 3454 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; |
3311 | 3455 | ||
3312 | return 0; | 3456 | return 0; |
3313 | } | 3457 | } |
3314 | 3458 | ||
3315 | static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3459 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
3316 | { | 3460 | { |
3317 | u32 cpu_based_vm_exec_control; | 3461 | u32 cpu_based_vm_exec_control; |
3318 | 3462 | ||
@@ -3325,36 +3469,55 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3325 | return 1; | 3469 | return 1; |
3326 | } | 3470 | } |
3327 | 3471 | ||
3328 | static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | 3472 | static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) |
3329 | struct kvm_run *kvm_run) | ||
3330 | { | 3473 | { |
3331 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3474 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3332 | enum emulation_result err = EMULATE_DONE; | 3475 | enum emulation_result err = EMULATE_DONE; |
3333 | 3476 | int ret = 1; | |
3334 | local_irq_enable(); | ||
3335 | preempt_enable(); | ||
3336 | 3477 | ||
3337 | while (!guest_state_valid(vcpu)) { | 3478 | while (!guest_state_valid(vcpu)) { |
3338 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3479 | err = emulate_instruction(vcpu, 0, 0, 0); |
3339 | 3480 | ||
3340 | if (err == EMULATE_DO_MMIO) | 3481 | if (err == EMULATE_DO_MMIO) { |
3341 | break; | 3482 | ret = 0; |
3483 | goto out; | ||
3484 | } | ||
3342 | 3485 | ||
3343 | if (err != EMULATE_DONE) { | 3486 | if (err != EMULATE_DONE) { |
3344 | kvm_report_emulation_failure(vcpu, "emulation failure"); | 3487 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
3345 | break; | 3488 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
3489 | vcpu->run->internal.ndata = 0; | ||
3490 | ret = 0; | ||
3491 | goto out; | ||
3346 | } | 3492 | } |
3347 | 3493 | ||
3348 | if (signal_pending(current)) | 3494 | if (signal_pending(current)) |
3349 | break; | 3495 | goto out; |
3350 | if (need_resched()) | 3496 | if (need_resched()) |
3351 | schedule(); | 3497 | schedule(); |
3352 | } | 3498 | } |
3353 | 3499 | ||
3354 | preempt_disable(); | 3500 | vmx->emulation_required = 0; |
3355 | local_irq_disable(); | 3501 | out: |
3502 | return ret; | ||
3503 | } | ||
3504 | |||
3505 | /* | ||
3506 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | ||
3507 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | ||
3508 | */ | ||
3509 | static int handle_pause(struct kvm_vcpu *vcpu) | ||
3510 | { | ||
3511 | skip_emulated_instruction(vcpu); | ||
3512 | kvm_vcpu_on_spin(vcpu); | ||
3513 | |||
3514 | return 1; | ||
3515 | } | ||
3356 | 3516 | ||
3357 | vmx->invalid_state_emulation_result = err; | 3517 | static int handle_invalid_op(struct kvm_vcpu *vcpu) |
3518 | { | ||
3519 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3520 | return 1; | ||
3358 | } | 3521 | } |
3359 | 3522 | ||
3360 | /* | 3523 | /* |
@@ -3362,8 +3525,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
3362 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 3525 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
3363 | * to be done to userspace and return 0. | 3526 | * to be done to userspace and return 0. |
3364 | */ | 3527 | */ |
3365 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | 3528 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { |
3366 | struct kvm_run *kvm_run) = { | ||
3367 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, | 3529 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, |
3368 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | 3530 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, |
3369 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, | 3531 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, |
@@ -3394,6 +3556,9 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
3394 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3556 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
3395 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3557 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
3396 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | 3558 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, |
3559 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | ||
3560 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, | ||
3561 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, | ||
3397 | }; | 3562 | }; |
3398 | 3563 | ||
3399 | static const int kvm_vmx_max_exit_handlers = | 3564 | static const int kvm_vmx_max_exit_handlers = |
@@ -3403,7 +3568,7 @@ static const int kvm_vmx_max_exit_handlers = | |||
3403 | * The guest has exited. See if we can fix it or if we need userspace | 3568 | * The guest has exited. See if we can fix it or if we need userspace |
3404 | * assistance. | 3569 | * assistance. |
3405 | */ | 3570 | */ |
3406 | static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 3571 | static int vmx_handle_exit(struct kvm_vcpu *vcpu) |
3407 | { | 3572 | { |
3408 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3573 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3409 | u32 exit_reason = vmx->exit_reason; | 3574 | u32 exit_reason = vmx->exit_reason; |
@@ -3411,13 +3576,9 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3411 | 3576 | ||
3412 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3577 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); |
3413 | 3578 | ||
3414 | /* If we need to emulate an MMIO from handle_invalid_guest_state | 3579 | /* If guest state is invalid, start emulating */ |
3415 | * we just return 0 */ | 3580 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3416 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3581 | return handle_invalid_guest_state(vcpu); |
3417 | if (guest_state_valid(vcpu)) | ||
3418 | vmx->emulation_required = 0; | ||
3419 | return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; | ||
3420 | } | ||
3421 | 3582 | ||
3422 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3583 | /* Access CR3 don't cause VMExit in paging mode, so we need |
3423 | * to sync with guest real CR3. */ | 3584 | * to sync with guest real CR3. */ |
@@ -3425,8 +3586,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3425 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3586 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
3426 | 3587 | ||
3427 | if (unlikely(vmx->fail)) { | 3588 | if (unlikely(vmx->fail)) { |
3428 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3589 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3429 | kvm_run->fail_entry.hardware_entry_failure_reason | 3590 | vcpu->run->fail_entry.hardware_entry_failure_reason |
3430 | = vmcs_read32(VM_INSTRUCTION_ERROR); | 3591 | = vmcs_read32(VM_INSTRUCTION_ERROR); |
3431 | return 0; | 3592 | return 0; |
3432 | } | 3593 | } |
@@ -3459,10 +3620,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3459 | 3620 | ||
3460 | if (exit_reason < kvm_vmx_max_exit_handlers | 3621 | if (exit_reason < kvm_vmx_max_exit_handlers |
3461 | && kvm_vmx_exit_handlers[exit_reason]) | 3622 | && kvm_vmx_exit_handlers[exit_reason]) |
3462 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 3623 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
3463 | else { | 3624 | else { |
3464 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3625 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3465 | kvm_run->hw.hardware_exit_reason = exit_reason; | 3626 | vcpu->run->hw.hardware_exit_reason = exit_reason; |
3466 | } | 3627 | } |
3467 | return 0; | 3628 | return 0; |
3468 | } | 3629 | } |
@@ -3600,23 +3761,18 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
3600 | #define Q "l" | 3761 | #define Q "l" |
3601 | #endif | 3762 | #endif |
3602 | 3763 | ||
3603 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3764 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3604 | { | 3765 | { |
3605 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3766 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3606 | 3767 | ||
3607 | if (enable_ept && is_paging(vcpu)) { | ||
3608 | vmcs_writel(GUEST_CR3, vcpu->arch.cr3); | ||
3609 | ept_load_pdptrs(vcpu); | ||
3610 | } | ||
3611 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 3768 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
3612 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 3769 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
3613 | vmx->entry_time = ktime_get(); | 3770 | vmx->entry_time = ktime_get(); |
3614 | 3771 | ||
3615 | /* Handle invalid guest state instead of entering VMX */ | 3772 | /* Don't enter VMX if guest state is invalid, let the exit handler |
3616 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3773 | start emulation until we arrive back to a valid state */ |
3617 | handle_invalid_guest_state(vcpu, kvm_run); | 3774 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3618 | return; | 3775 | return; |
3619 | } | ||
3620 | 3776 | ||
3621 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 3777 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
3622 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 3778 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
@@ -3636,9 +3792,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3636 | */ | 3792 | */ |
3637 | vmcs_writel(HOST_CR0, read_cr0()); | 3793 | vmcs_writel(HOST_CR0, read_cr0()); |
3638 | 3794 | ||
3639 | if (vcpu->arch.switch_db_regs) | ||
3640 | set_debugreg(vcpu->arch.dr6, 6); | ||
3641 | |||
3642 | asm( | 3795 | asm( |
3643 | /* Store host registers */ | 3796 | /* Store host registers */ |
3644 | "push %%"R"dx; push %%"R"bp;" | 3797 | "push %%"R"dx; push %%"R"bp;" |
@@ -3739,9 +3892,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3739 | | (1 << VCPU_EXREG_PDPTR)); | 3892 | | (1 << VCPU_EXREG_PDPTR)); |
3740 | vcpu->arch.regs_dirty = 0; | 3893 | vcpu->arch.regs_dirty = 0; |
3741 | 3894 | ||
3742 | if (vcpu->arch.switch_db_regs) | ||
3743 | get_debugreg(vcpu->arch.dr6, 6); | ||
3744 | |||
3745 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 3895 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
3746 | if (vmx->rmode.irq.pending) | 3896 | if (vmx->rmode.irq.pending) |
3747 | fixup_rmode_irq(vmx); | 3897 | fixup_rmode_irq(vmx); |
@@ -3775,7 +3925,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
3775 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | 3925 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); |
3776 | spin_unlock(&vmx_vpid_lock); | 3926 | spin_unlock(&vmx_vpid_lock); |
3777 | vmx_free_vmcs(vcpu); | 3927 | vmx_free_vmcs(vcpu); |
3778 | kfree(vmx->host_msrs); | ||
3779 | kfree(vmx->guest_msrs); | 3928 | kfree(vmx->guest_msrs); |
3780 | kvm_vcpu_uninit(vcpu); | 3929 | kvm_vcpu_uninit(vcpu); |
3781 | kmem_cache_free(kvm_vcpu_cache, vmx); | 3930 | kmem_cache_free(kvm_vcpu_cache, vmx); |
@@ -3802,10 +3951,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3802 | goto uninit_vcpu; | 3951 | goto uninit_vcpu; |
3803 | } | 3952 | } |
3804 | 3953 | ||
3805 | vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3806 | if (!vmx->host_msrs) | ||
3807 | goto free_guest_msrs; | ||
3808 | |||
3809 | vmx->vmcs = alloc_vmcs(); | 3954 | vmx->vmcs = alloc_vmcs(); |
3810 | if (!vmx->vmcs) | 3955 | if (!vmx->vmcs) |
3811 | goto free_msrs; | 3956 | goto free_msrs; |
@@ -3836,8 +3981,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3836 | free_vmcs: | 3981 | free_vmcs: |
3837 | free_vmcs(vmx->vmcs); | 3982 | free_vmcs(vmx->vmcs); |
3838 | free_msrs: | 3983 | free_msrs: |
3839 | kfree(vmx->host_msrs); | ||
3840 | free_guest_msrs: | ||
3841 | kfree(vmx->guest_msrs); | 3984 | kfree(vmx->guest_msrs); |
3842 | uninit_vcpu: | 3985 | uninit_vcpu: |
3843 | kvm_vcpu_uninit(&vmx->vcpu); | 3986 | kvm_vcpu_uninit(&vmx->vcpu); |
@@ -3877,7 +4020,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3877 | * b. VT-d with snooping control feature: snooping control feature of | 4020 | * b. VT-d with snooping control feature: snooping control feature of |
3878 | * VT-d engine can guarantee the cache correctness. Just set it | 4021 | * VT-d engine can guarantee the cache correctness. Just set it |
3879 | * to WB to keep consistent with host. So the same as item 3. | 4022 | * to WB to keep consistent with host. So the same as item 3. |
3880 | * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep | 4023 | * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep |
3881 | * consistent with host MTRR | 4024 | * consistent with host MTRR |
3882 | */ | 4025 | */ |
3883 | if (is_mmio) | 4026 | if (is_mmio) |
@@ -3888,37 +4031,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3888 | VMX_EPT_MT_EPTE_SHIFT; | 4031 | VMX_EPT_MT_EPTE_SHIFT; |
3889 | else | 4032 | else |
3890 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | 4033 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) |
3891 | | VMX_EPT_IGMT_BIT; | 4034 | | VMX_EPT_IPAT_BIT; |
3892 | 4035 | ||
3893 | return ret; | 4036 | return ret; |
3894 | } | 4037 | } |
3895 | 4038 | ||
4039 | #define _ER(x) { EXIT_REASON_##x, #x } | ||
4040 | |||
3896 | static const struct trace_print_flags vmx_exit_reasons_str[] = { | 4041 | static const struct trace_print_flags vmx_exit_reasons_str[] = { |
3897 | { EXIT_REASON_EXCEPTION_NMI, "exception" }, | 4042 | _ER(EXCEPTION_NMI), |
3898 | { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, | 4043 | _ER(EXTERNAL_INTERRUPT), |
3899 | { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, | 4044 | _ER(TRIPLE_FAULT), |
3900 | { EXIT_REASON_NMI_WINDOW, "nmi_window" }, | 4045 | _ER(PENDING_INTERRUPT), |
3901 | { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, | 4046 | _ER(NMI_WINDOW), |
3902 | { EXIT_REASON_CR_ACCESS, "cr_access" }, | 4047 | _ER(TASK_SWITCH), |
3903 | { EXIT_REASON_DR_ACCESS, "dr_access" }, | 4048 | _ER(CPUID), |
3904 | { EXIT_REASON_CPUID, "cpuid" }, | 4049 | _ER(HLT), |
3905 | { EXIT_REASON_MSR_READ, "rdmsr" }, | 4050 | _ER(INVLPG), |
3906 | { EXIT_REASON_MSR_WRITE, "wrmsr" }, | 4051 | _ER(RDPMC), |
3907 | { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, | 4052 | _ER(RDTSC), |
3908 | { EXIT_REASON_HLT, "halt" }, | 4053 | _ER(VMCALL), |
3909 | { EXIT_REASON_INVLPG, "invlpg" }, | 4054 | _ER(VMCLEAR), |
3910 | { EXIT_REASON_VMCALL, "hypercall" }, | 4055 | _ER(VMLAUNCH), |
3911 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, | 4056 | _ER(VMPTRLD), |
3912 | { EXIT_REASON_APIC_ACCESS, "apic_access" }, | 4057 | _ER(VMPTRST), |
3913 | { EXIT_REASON_WBINVD, "wbinvd" }, | 4058 | _ER(VMREAD), |
3914 | { EXIT_REASON_TASK_SWITCH, "task_switch" }, | 4059 | _ER(VMRESUME), |
3915 | { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, | 4060 | _ER(VMWRITE), |
4061 | _ER(VMOFF), | ||
4062 | _ER(VMON), | ||
4063 | _ER(CR_ACCESS), | ||
4064 | _ER(DR_ACCESS), | ||
4065 | _ER(IO_INSTRUCTION), | ||
4066 | _ER(MSR_READ), | ||
4067 | _ER(MSR_WRITE), | ||
4068 | _ER(MWAIT_INSTRUCTION), | ||
4069 | _ER(MONITOR_INSTRUCTION), | ||
4070 | _ER(PAUSE_INSTRUCTION), | ||
4071 | _ER(MCE_DURING_VMENTRY), | ||
4072 | _ER(TPR_BELOW_THRESHOLD), | ||
4073 | _ER(APIC_ACCESS), | ||
4074 | _ER(EPT_VIOLATION), | ||
4075 | _ER(EPT_MISCONFIG), | ||
4076 | _ER(WBINVD), | ||
3916 | { -1, NULL } | 4077 | { -1, NULL } |
3917 | }; | 4078 | }; |
3918 | 4079 | ||
3919 | static bool vmx_gb_page_enable(void) | 4080 | #undef _ER |
4081 | |||
4082 | static int vmx_get_lpage_level(void) | ||
3920 | { | 4083 | { |
3921 | return false; | 4084 | if (enable_ept && !cpu_has_vmx_ept_1g_page()) |
4085 | return PT_DIRECTORY_LEVEL; | ||
4086 | else | ||
4087 | /* For shadow and EPT supported 1GB page */ | ||
4088 | return PT_PDPE_LEVEL; | ||
4089 | } | ||
4090 | |||
4091 | static inline u32 bit(int bitno) | ||
4092 | { | ||
4093 | return 1 << (bitno & 31); | ||
4094 | } | ||
4095 | |||
4096 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | ||
4097 | { | ||
4098 | struct kvm_cpuid_entry2 *best; | ||
4099 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4100 | u32 exec_control; | ||
4101 | |||
4102 | vmx->rdtscp_enabled = false; | ||
4103 | if (vmx_rdtscp_supported()) { | ||
4104 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
4105 | if (exec_control & SECONDARY_EXEC_RDTSCP) { | ||
4106 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
4107 | if (best && (best->edx & bit(X86_FEATURE_RDTSCP))) | ||
4108 | vmx->rdtscp_enabled = true; | ||
4109 | else { | ||
4110 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
4111 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
4112 | exec_control); | ||
4113 | } | ||
4114 | } | ||
4115 | } | ||
3922 | } | 4116 | } |
3923 | 4117 | ||
3924 | static struct kvm_x86_ops vmx_x86_ops = { | 4118 | static struct kvm_x86_ops vmx_x86_ops = { |
@@ -3947,6 +4141,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3947 | .set_segment = vmx_set_segment, | 4141 | .set_segment = vmx_set_segment, |
3948 | .get_cpl = vmx_get_cpl, | 4142 | .get_cpl = vmx_get_cpl, |
3949 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 4143 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
4144 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | ||
3950 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 4145 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
3951 | .set_cr0 = vmx_set_cr0, | 4146 | .set_cr0 = vmx_set_cr0, |
3952 | .set_cr3 = vmx_set_cr3, | 4147 | .set_cr3 = vmx_set_cr3, |
@@ -3959,6 +4154,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3959 | .cache_reg = vmx_cache_reg, | 4154 | .cache_reg = vmx_cache_reg, |
3960 | .get_rflags = vmx_get_rflags, | 4155 | .get_rflags = vmx_get_rflags, |
3961 | .set_rflags = vmx_set_rflags, | 4156 | .set_rflags = vmx_set_rflags, |
4157 | .fpu_activate = vmx_fpu_activate, | ||
4158 | .fpu_deactivate = vmx_fpu_deactivate, | ||
3962 | 4159 | ||
3963 | .tlb_flush = vmx_flush_tlb, | 4160 | .tlb_flush = vmx_flush_tlb, |
3964 | 4161 | ||
@@ -3973,6 +4170,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3973 | .queue_exception = vmx_queue_exception, | 4170 | .queue_exception = vmx_queue_exception, |
3974 | .interrupt_allowed = vmx_interrupt_allowed, | 4171 | .interrupt_allowed = vmx_interrupt_allowed, |
3975 | .nmi_allowed = vmx_nmi_allowed, | 4172 | .nmi_allowed = vmx_nmi_allowed, |
4173 | .get_nmi_mask = vmx_get_nmi_mask, | ||
4174 | .set_nmi_mask = vmx_set_nmi_mask, | ||
3976 | .enable_nmi_window = enable_nmi_window, | 4175 | .enable_nmi_window = enable_nmi_window, |
3977 | .enable_irq_window = enable_irq_window, | 4176 | .enable_irq_window = enable_irq_window, |
3978 | .update_cr8_intercept = update_cr8_intercept, | 4177 | .update_cr8_intercept = update_cr8_intercept, |
@@ -3982,12 +4181,21 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3982 | .get_mt_mask = vmx_get_mt_mask, | 4181 | .get_mt_mask = vmx_get_mt_mask, |
3983 | 4182 | ||
3984 | .exit_reasons_str = vmx_exit_reasons_str, | 4183 | .exit_reasons_str = vmx_exit_reasons_str, |
3985 | .gb_page_enable = vmx_gb_page_enable, | 4184 | .get_lpage_level = vmx_get_lpage_level, |
4185 | |||
4186 | .cpuid_update = vmx_cpuid_update, | ||
4187 | |||
4188 | .rdtscp_supported = vmx_rdtscp_supported, | ||
3986 | }; | 4189 | }; |
3987 | 4190 | ||
3988 | static int __init vmx_init(void) | 4191 | static int __init vmx_init(void) |
3989 | { | 4192 | { |
3990 | int r; | 4193 | int r, i; |
4194 | |||
4195 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
4196 | |||
4197 | for (i = 0; i < NR_VMX_MSR; ++i) | ||
4198 | kvm_define_shared_msr(i, vmx_msr_index[i]); | ||
3991 | 4199 | ||
3992 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); | 4200 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); |
3993 | if (!vmx_io_bitmap_a) | 4201 | if (!vmx_io_bitmap_a) |
@@ -4049,8 +4257,6 @@ static int __init vmx_init(void) | |||
4049 | if (bypass_guest_pf) | 4257 | if (bypass_guest_pf) |
4050 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 4258 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
4051 | 4259 | ||
4052 | ept_sync_global(); | ||
4053 | |||
4054 | return 0; | 4260 | return 0; |
4055 | 4261 | ||
4056 | out3: | 4262 | out3: |