aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c862
1 files changed, 534 insertions, 328 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ed53b42caba1..2f8db0ec8ae4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -26,6 +26,7 @@
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
28#include <linux/ftrace_event.h> 28#include <linux/ftrace_event.h>
29#include <linux/slab.h>
29#include "kvm_cache_regs.h" 30#include "kvm_cache_regs.h"
30#include "x86.h" 31#include "x86.h"
31 32
@@ -61,12 +62,54 @@ module_param_named(unrestricted_guest,
61static int __read_mostly emulate_invalid_guest_state = 0; 62static int __read_mostly emulate_invalid_guest_state = 0;
62module_param(emulate_invalid_guest_state, bool, S_IRUGO); 63module_param(emulate_invalid_guest_state, bool, S_IRUGO);
63 64
65#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
66 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
67#define KVM_GUEST_CR0_MASK \
68 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
69#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
70 (X86_CR0_WP | X86_CR0_NE)
71#define KVM_VM_CR0_ALWAYS_ON \
72 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
73#define KVM_CR4_GUEST_OWNED_BITS \
74 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
75 | X86_CR4_OSXMMEXCPT)
76
77#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
78#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
79
80#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
81
82/*
83 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
84 * ple_gap: upper bound on the amount of time between two successive
85 * executions of PAUSE in a loop. Also indicate if ple enabled.
86 * According to test, this time is usually small than 41 cycles.
87 * ple_window: upper bound on the amount of time a guest is allowed to execute
88 * in a PAUSE loop. Tests indicate that most spinlocks are held for
89 * less than 2^12 cycles
90 * Time is measured based on a counter that runs at the same rate as the TSC,
91 * refer SDM volume 3b section 21.6.13 & 22.1.3.
92 */
93#define KVM_VMX_DEFAULT_PLE_GAP 41
94#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
95static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
96module_param(ple_gap, int, S_IRUGO);
97
98static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
99module_param(ple_window, int, S_IRUGO);
100
64struct vmcs { 101struct vmcs {
65 u32 revision_id; 102 u32 revision_id;
66 u32 abort; 103 u32 abort;
67 char data[0]; 104 char data[0];
68}; 105};
69 106
107struct shared_msr_entry {
108 unsigned index;
109 u64 data;
110 u64 mask;
111};
112
70struct vcpu_vmx { 113struct vcpu_vmx {
71 struct kvm_vcpu vcpu; 114 struct kvm_vcpu vcpu;
72 struct list_head local_vcpus_link; 115 struct list_head local_vcpus_link;
@@ -74,13 +117,12 @@ struct vcpu_vmx {
74 int launched; 117 int launched;
75 u8 fail; 118 u8 fail;
76 u32 idt_vectoring_info; 119 u32 idt_vectoring_info;
77 struct kvm_msr_entry *guest_msrs; 120 struct shared_msr_entry *guest_msrs;
78 struct kvm_msr_entry *host_msrs;
79 int nmsrs; 121 int nmsrs;
80 int save_nmsrs; 122 int save_nmsrs;
81 int msr_offset_efer;
82#ifdef CONFIG_X86_64 123#ifdef CONFIG_X86_64
83 int msr_offset_kernel_gs_base; 124 u64 msr_host_kernel_gs_base;
125 u64 msr_guest_kernel_gs_base;
84#endif 126#endif
85 struct vmcs *vmcs; 127 struct vmcs *vmcs;
86 struct { 128 struct {
@@ -88,11 +130,10 @@ struct vcpu_vmx {
88 u16 fs_sel, gs_sel, ldt_sel; 130 u16 fs_sel, gs_sel, ldt_sel;
89 int gs_ldt_reload_needed; 131 int gs_ldt_reload_needed;
90 int fs_reload_needed; 132 int fs_reload_needed;
91 int guest_efer_loaded;
92 } host_state; 133 } host_state;
93 struct { 134 struct {
94 int vm86_active; 135 int vm86_active;
95 u8 save_iopl; 136 ulong save_rflags;
96 struct kvm_save_segment { 137 struct kvm_save_segment {
97 u16 selector; 138 u16 selector;
98 unsigned long base; 139 unsigned long base;
@@ -107,13 +148,14 @@ struct vcpu_vmx {
107 } rmode; 148 } rmode;
108 int vpid; 149 int vpid;
109 bool emulation_required; 150 bool emulation_required;
110 enum emulation_result invalid_state_emulation_result;
111 151
112 /* Support for vnmi-less CPUs */ 152 /* Support for vnmi-less CPUs */
113 int soft_vnmi_blocked; 153 int soft_vnmi_blocked;
114 ktime_t entry_time; 154 ktime_t entry_time;
115 s64 vnmi_blocked_time; 155 s64 vnmi_blocked_time;
116 u32 exit_reason; 156 u32 exit_reason;
157
158 bool rdtscp_enabled;
117}; 159};
118 160
119static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 161static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -176,6 +218,8 @@ static struct kvm_vmx_segment_field {
176 VMX_SEGMENT_FIELD(LDTR), 218 VMX_SEGMENT_FIELD(LDTR),
177}; 219};
178 220
221static u64 host_efer;
222
179static void ept_save_pdptrs(struct kvm_vcpu *vcpu); 223static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
180 224
181/* 225/*
@@ -184,28 +228,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
184 */ 228 */
185static const u32 vmx_msr_index[] = { 229static const u32 vmx_msr_index[] = {
186#ifdef CONFIG_X86_64 230#ifdef CONFIG_X86_64
187 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, 231 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
188#endif 232#endif
189 MSR_EFER, MSR_K6_STAR, 233 MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR,
190}; 234};
191#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 235#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
192 236
193static void load_msrs(struct kvm_msr_entry *e, int n)
194{
195 int i;
196
197 for (i = 0; i < n; ++i)
198 wrmsrl(e[i].index, e[i].data);
199}
200
201static void save_msrs(struct kvm_msr_entry *e, int n)
202{
203 int i;
204
205 for (i = 0; i < n; ++i)
206 rdmsrl(e[i].index, e[i].data);
207}
208
209static inline int is_page_fault(u32 intr_info) 237static inline int is_page_fault(u32 intr_info)
210{ 238{
211 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 239 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -293,6 +321,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void)
293 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); 321 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
294} 322}
295 323
324static inline bool cpu_has_vmx_ept_1g_page(void)
325{
326 return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT);
327}
328
296static inline int cpu_has_vmx_invept_individual_addr(void) 329static inline int cpu_has_vmx_invept_individual_addr(void)
297{ 330{
298 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); 331 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@@ -320,11 +353,15 @@ static inline int cpu_has_vmx_unrestricted_guest(void)
320 SECONDARY_EXEC_UNRESTRICTED_GUEST; 353 SECONDARY_EXEC_UNRESTRICTED_GUEST;
321} 354}
322 355
356static inline int cpu_has_vmx_ple(void)
357{
358 return vmcs_config.cpu_based_2nd_exec_ctrl &
359 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
360}
361
323static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 362static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
324{ 363{
325 return flexpriority_enabled && 364 return flexpriority_enabled && irqchip_in_kernel(kvm);
326 (cpu_has_vmx_virtualize_apic_accesses()) &&
327 (irqchip_in_kernel(kvm));
328} 365}
329 366
330static inline int cpu_has_vmx_vpid(void) 367static inline int cpu_has_vmx_vpid(void)
@@ -333,6 +370,12 @@ static inline int cpu_has_vmx_vpid(void)
333 SECONDARY_EXEC_ENABLE_VPID; 370 SECONDARY_EXEC_ENABLE_VPID;
334} 371}
335 372
373static inline int cpu_has_vmx_rdtscp(void)
374{
375 return vmcs_config.cpu_based_2nd_exec_ctrl &
376 SECONDARY_EXEC_RDTSCP;
377}
378
336static inline int cpu_has_virtual_nmis(void) 379static inline int cpu_has_virtual_nmis(void)
337{ 380{
338 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 381 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
@@ -348,7 +391,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
348 int i; 391 int i;
349 392
350 for (i = 0; i < vmx->nmsrs; ++i) 393 for (i = 0; i < vmx->nmsrs; ++i)
351 if (vmx->guest_msrs[i].index == msr) 394 if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
352 return i; 395 return i;
353 return -1; 396 return -1;
354} 397}
@@ -379,7 +422,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
379 : : "a" (&operand), "c" (ext) : "cc", "memory"); 422 : : "a" (&operand), "c" (ext) : "cc", "memory");
380} 423}
381 424
382static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) 425static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
383{ 426{
384 int i; 427 int i;
385 428
@@ -537,22 +580,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
537{ 580{
538 u32 eb; 581 u32 eb;
539 582
540 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); 583 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
541 if (!vcpu->fpu_active) 584 (1u << NM_VECTOR) | (1u << DB_VECTOR);
542 eb |= 1u << NM_VECTOR; 585 if ((vcpu->guest_debug &
543 /* 586 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
544 * Unconditionally intercept #DB so we can maintain dr6 without 587 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
545 * reading it every exit. 588 eb |= 1u << BP_VECTOR;
546 */
547 eb |= 1u << DB_VECTOR;
548 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
549 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
550 eb |= 1u << BP_VECTOR;
551 }
552 if (to_vmx(vcpu)->rmode.vm86_active) 589 if (to_vmx(vcpu)->rmode.vm86_active)
553 eb = ~0; 590 eb = ~0;
554 if (enable_ept) 591 if (enable_ept)
555 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ 592 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
593 if (vcpu->fpu_active)
594 eb &= ~(1u << NM_VECTOR);
556 vmcs_write32(EXCEPTION_BITMAP, eb); 595 vmcs_write32(EXCEPTION_BITMAP, eb);
557} 596}
558 597
@@ -570,17 +609,12 @@ static void reload_tss(void)
570 load_TR_desc(); 609 load_TR_desc();
571} 610}
572 611
573static void load_transition_efer(struct vcpu_vmx *vmx) 612static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
574{ 613{
575 int efer_offset = vmx->msr_offset_efer;
576 u64 host_efer;
577 u64 guest_efer; 614 u64 guest_efer;
578 u64 ignore_bits; 615 u64 ignore_bits;
579 616
580 if (efer_offset < 0) 617 guest_efer = vmx->vcpu.arch.efer;
581 return;
582 host_efer = vmx->host_msrs[efer_offset].data;
583 guest_efer = vmx->guest_msrs[efer_offset].data;
584 618
585 /* 619 /*
586 * NX is emulated; LMA and LME handled by hardware; SCE meaninless 620 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -593,27 +627,17 @@ static void load_transition_efer(struct vcpu_vmx *vmx)
593 if (guest_efer & EFER_LMA) 627 if (guest_efer & EFER_LMA)
594 ignore_bits &= ~(u64)EFER_SCE; 628 ignore_bits &= ~(u64)EFER_SCE;
595#endif 629#endif
596 if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits))
597 return;
598
599 vmx->host_state.guest_efer_loaded = 1;
600 guest_efer &= ~ignore_bits; 630 guest_efer &= ~ignore_bits;
601 guest_efer |= host_efer & ignore_bits; 631 guest_efer |= host_efer & ignore_bits;
602 wrmsrl(MSR_EFER, guest_efer); 632 vmx->guest_msrs[efer_offset].data = guest_efer;
603 vmx->vcpu.stat.efer_reload++; 633 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
604} 634 return true;
605
606static void reload_host_efer(struct vcpu_vmx *vmx)
607{
608 if (vmx->host_state.guest_efer_loaded) {
609 vmx->host_state.guest_efer_loaded = 0;
610 load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
611 }
612} 635}
613 636
614static void vmx_save_host_state(struct kvm_vcpu *vcpu) 637static void vmx_save_host_state(struct kvm_vcpu *vcpu)
615{ 638{
616 struct vcpu_vmx *vmx = to_vmx(vcpu); 639 struct vcpu_vmx *vmx = to_vmx(vcpu);
640 int i;
617 641
618 if (vmx->host_state.loaded) 642 if (vmx->host_state.loaded)
619 return; 643 return;
@@ -650,13 +674,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
650#endif 674#endif
651 675
652#ifdef CONFIG_X86_64 676#ifdef CONFIG_X86_64
653 if (is_long_mode(&vmx->vcpu)) 677 if (is_long_mode(&vmx->vcpu)) {
654 save_msrs(vmx->host_msrs + 678 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
655 vmx->msr_offset_kernel_gs_base, 1); 679 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
656 680 }
657#endif 681#endif
658 load_msrs(vmx->guest_msrs, vmx->save_nmsrs); 682 for (i = 0; i < vmx->save_nmsrs; ++i)
659 load_transition_efer(vmx); 683 kvm_set_shared_msr(vmx->guest_msrs[i].index,
684 vmx->guest_msrs[i].data,
685 vmx->guest_msrs[i].mask);
660} 686}
661 687
662static void __vmx_load_host_state(struct vcpu_vmx *vmx) 688static void __vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -684,9 +710,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
684 local_irq_restore(flags); 710 local_irq_restore(flags);
685 } 711 }
686 reload_tss(); 712 reload_tss();
687 save_msrs(vmx->guest_msrs, vmx->save_nmsrs); 713#ifdef CONFIG_X86_64
688 load_msrs(vmx->host_msrs, vmx->save_nmsrs); 714 if (is_long_mode(&vmx->vcpu)) {
689 reload_host_efer(vmx); 715 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
716 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
717 }
718#endif
690} 719}
691 720
692static void vmx_load_host_state(struct vcpu_vmx *vmx) 721static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -763,38 +792,51 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
763 792
764static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 793static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
765{ 794{
795 ulong cr0;
796
766 if (vcpu->fpu_active) 797 if (vcpu->fpu_active)
767 return; 798 return;
768 vcpu->fpu_active = 1; 799 vcpu->fpu_active = 1;
769 vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); 800 cr0 = vmcs_readl(GUEST_CR0);
770 if (vcpu->arch.cr0 & X86_CR0_TS) 801 cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
771 vmcs_set_bits(GUEST_CR0, X86_CR0_TS); 802 cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
803 vmcs_writel(GUEST_CR0, cr0);
772 update_exception_bitmap(vcpu); 804 update_exception_bitmap(vcpu);
805 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
806 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
773} 807}
774 808
809static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
810
775static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) 811static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
776{ 812{
777 if (!vcpu->fpu_active) 813 vmx_decache_cr0_guest_bits(vcpu);
778 return; 814 vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
779 vcpu->fpu_active = 0;
780 vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
781 update_exception_bitmap(vcpu); 815 update_exception_bitmap(vcpu);
816 vcpu->arch.cr0_guest_owned_bits = 0;
817 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
818 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
782} 819}
783 820
784static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 821static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
785{ 822{
786 unsigned long rflags; 823 unsigned long rflags, save_rflags;
787 824
788 rflags = vmcs_readl(GUEST_RFLAGS); 825 rflags = vmcs_readl(GUEST_RFLAGS);
789 if (to_vmx(vcpu)->rmode.vm86_active) 826 if (to_vmx(vcpu)->rmode.vm86_active) {
790 rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 827 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
828 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
829 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
830 }
791 return rflags; 831 return rflags;
792} 832}
793 833
794static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 834static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
795{ 835{
796 if (to_vmx(vcpu)->rmode.vm86_active) 836 if (to_vmx(vcpu)->rmode.vm86_active) {
837 to_vmx(vcpu)->rmode.save_rflags = rflags;
797 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 838 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
839 }
798 vmcs_writel(GUEST_RFLAGS, rflags); 840 vmcs_writel(GUEST_RFLAGS, rflags);
799} 841}
800 842
@@ -874,22 +916,22 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
874 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 916 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
875} 917}
876 918
919static bool vmx_rdtscp_supported(void)
920{
921 return cpu_has_vmx_rdtscp();
922}
923
877/* 924/*
878 * Swap MSR entry in host/guest MSR entry array. 925 * Swap MSR entry in host/guest MSR entry array.
879 */ 926 */
880#ifdef CONFIG_X86_64
881static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) 927static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
882{ 928{
883 struct kvm_msr_entry tmp; 929 struct shared_msr_entry tmp;
884 930
885 tmp = vmx->guest_msrs[to]; 931 tmp = vmx->guest_msrs[to];
886 vmx->guest_msrs[to] = vmx->guest_msrs[from]; 932 vmx->guest_msrs[to] = vmx->guest_msrs[from];
887 vmx->guest_msrs[from] = tmp; 933 vmx->guest_msrs[from] = tmp;
888 tmp = vmx->host_msrs[to];
889 vmx->host_msrs[to] = vmx->host_msrs[from];
890 vmx->host_msrs[from] = tmp;
891} 934}
892#endif
893 935
894/* 936/*
895 * Set up the vmcs to automatically save and restore system 937 * Set up the vmcs to automatically save and restore system
@@ -898,15 +940,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
898 */ 940 */
899static void setup_msrs(struct vcpu_vmx *vmx) 941static void setup_msrs(struct vcpu_vmx *vmx)
900{ 942{
901 int save_nmsrs; 943 int save_nmsrs, index;
902 unsigned long *msr_bitmap; 944 unsigned long *msr_bitmap;
903 945
904 vmx_load_host_state(vmx); 946 vmx_load_host_state(vmx);
905 save_nmsrs = 0; 947 save_nmsrs = 0;
906#ifdef CONFIG_X86_64 948#ifdef CONFIG_X86_64
907 if (is_long_mode(&vmx->vcpu)) { 949 if (is_long_mode(&vmx->vcpu)) {
908 int index;
909
910 index = __find_msr_index(vmx, MSR_SYSCALL_MASK); 950 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
911 if (index >= 0) 951 if (index >= 0)
912 move_msr_up(vmx, index, save_nmsrs++); 952 move_msr_up(vmx, index, save_nmsrs++);
@@ -916,25 +956,23 @@ static void setup_msrs(struct vcpu_vmx *vmx)
916 index = __find_msr_index(vmx, MSR_CSTAR); 956 index = __find_msr_index(vmx, MSR_CSTAR);
917 if (index >= 0) 957 if (index >= 0)
918 move_msr_up(vmx, index, save_nmsrs++); 958 move_msr_up(vmx, index, save_nmsrs++);
919 index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); 959 index = __find_msr_index(vmx, MSR_TSC_AUX);
920 if (index >= 0) 960 if (index >= 0 && vmx->rdtscp_enabled)
921 move_msr_up(vmx, index, save_nmsrs++); 961 move_msr_up(vmx, index, save_nmsrs++);
922 /* 962 /*
923 * MSR_K6_STAR is only needed on long mode guests, and only 963 * MSR_K6_STAR is only needed on long mode guests, and only
924 * if efer.sce is enabled. 964 * if efer.sce is enabled.
925 */ 965 */
926 index = __find_msr_index(vmx, MSR_K6_STAR); 966 index = __find_msr_index(vmx, MSR_K6_STAR);
927 if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) 967 if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
928 move_msr_up(vmx, index, save_nmsrs++); 968 move_msr_up(vmx, index, save_nmsrs++);
929 } 969 }
930#endif 970#endif
931 vmx->save_nmsrs = save_nmsrs; 971 index = __find_msr_index(vmx, MSR_EFER);
972 if (index >= 0 && update_transition_efer(vmx, index))
973 move_msr_up(vmx, index, save_nmsrs++);
932 974
933#ifdef CONFIG_X86_64 975 vmx->save_nmsrs = save_nmsrs;
934 vmx->msr_offset_kernel_gs_base =
935 __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
936#endif
937 vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
938 976
939 if (cpu_has_vmx_msr_bitmap()) { 977 if (cpu_has_vmx_msr_bitmap()) {
940 if (is_long_mode(&vmx->vcpu)) 978 if (is_long_mode(&vmx->vcpu))
@@ -976,7 +1014,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
976static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 1014static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
977{ 1015{
978 u64 data; 1016 u64 data;
979 struct kvm_msr_entry *msr; 1017 struct shared_msr_entry *msr;
980 1018
981 if (!pdata) { 1019 if (!pdata) {
982 printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); 1020 printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
@@ -991,9 +1029,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
991 case MSR_GS_BASE: 1029 case MSR_GS_BASE:
992 data = vmcs_readl(GUEST_GS_BASE); 1030 data = vmcs_readl(GUEST_GS_BASE);
993 break; 1031 break;
1032 case MSR_KERNEL_GS_BASE:
1033 vmx_load_host_state(to_vmx(vcpu));
1034 data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
1035 break;
1036#endif
994 case MSR_EFER: 1037 case MSR_EFER:
995 return kvm_get_msr_common(vcpu, msr_index, pdata); 1038 return kvm_get_msr_common(vcpu, msr_index, pdata);
996#endif
997 case MSR_IA32_TSC: 1039 case MSR_IA32_TSC:
998 data = guest_read_tsc(); 1040 data = guest_read_tsc();
999 break; 1041 break;
@@ -1006,7 +1048,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1006 case MSR_IA32_SYSENTER_ESP: 1048 case MSR_IA32_SYSENTER_ESP:
1007 data = vmcs_readl(GUEST_SYSENTER_ESP); 1049 data = vmcs_readl(GUEST_SYSENTER_ESP);
1008 break; 1050 break;
1051 case MSR_TSC_AUX:
1052 if (!to_vmx(vcpu)->rdtscp_enabled)
1053 return 1;
1054 /* Otherwise falls through */
1009 default: 1055 default:
1056 vmx_load_host_state(to_vmx(vcpu));
1010 msr = find_msr_entry(to_vmx(vcpu), msr_index); 1057 msr = find_msr_entry(to_vmx(vcpu), msr_index);
1011 if (msr) { 1058 if (msr) {
1012 vmx_load_host_state(to_vmx(vcpu)); 1059 vmx_load_host_state(to_vmx(vcpu));
@@ -1028,7 +1075,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1028static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 1075static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1029{ 1076{
1030 struct vcpu_vmx *vmx = to_vmx(vcpu); 1077 struct vcpu_vmx *vmx = to_vmx(vcpu);
1031 struct kvm_msr_entry *msr; 1078 struct shared_msr_entry *msr;
1032 u64 host_tsc; 1079 u64 host_tsc;
1033 int ret = 0; 1080 int ret = 0;
1034 1081
@@ -1044,6 +1091,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1044 case MSR_GS_BASE: 1091 case MSR_GS_BASE:
1045 vmcs_writel(GUEST_GS_BASE, data); 1092 vmcs_writel(GUEST_GS_BASE, data);
1046 break; 1093 break;
1094 case MSR_KERNEL_GS_BASE:
1095 vmx_load_host_state(vmx);
1096 vmx->msr_guest_kernel_gs_base = data;
1097 break;
1047#endif 1098#endif
1048 case MSR_IA32_SYSENTER_CS: 1099 case MSR_IA32_SYSENTER_CS:
1049 vmcs_write32(GUEST_SYSENTER_CS, data); 1100 vmcs_write32(GUEST_SYSENTER_CS, data);
@@ -1064,7 +1115,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1064 vcpu->arch.pat = data; 1115 vcpu->arch.pat = data;
1065 break; 1116 break;
1066 } 1117 }
1067 /* Otherwise falls through to kvm_set_msr_common */ 1118 ret = kvm_set_msr_common(vcpu, msr_index, data);
1119 break;
1120 case MSR_TSC_AUX:
1121 if (!vmx->rdtscp_enabled)
1122 return 1;
1123 /* Check reserved bit, higher 32 bits should be zero */
1124 if ((data >> 32) != 0)
1125 return 1;
1126 /* Otherwise falls through */
1068 default: 1127 default:
1069 msr = find_msr_entry(vmx, msr_index); 1128 msr = find_msr_entry(vmx, msr_index);
1070 if (msr) { 1129 if (msr) {
@@ -1097,30 +1156,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1097 } 1156 }
1098} 1157}
1099 1158
1100static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) 1159static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1101{ 1160{
1102 int old_debug = vcpu->guest_debug;
1103 unsigned long flags;
1104
1105 vcpu->guest_debug = dbg->control;
1106 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
1107 vcpu->guest_debug = 0;
1108
1109 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1161 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1110 vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); 1162 vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
1111 else 1163 else
1112 vmcs_writel(GUEST_DR7, vcpu->arch.dr7); 1164 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
1113 1165
1114 flags = vmcs_readl(GUEST_RFLAGS);
1115 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
1116 flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
1117 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
1118 flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1119 vmcs_writel(GUEST_RFLAGS, flags);
1120
1121 update_exception_bitmap(vcpu); 1166 update_exception_bitmap(vcpu);
1122
1123 return 0;
1124} 1167}
1125 1168
1126static __init int cpu_has_kvm_support(void) 1169static __init int cpu_has_kvm_support(void)
@@ -1139,12 +1182,15 @@ static __init int vmx_disabled_by_bios(void)
1139 /* locked but not enabled */ 1182 /* locked but not enabled */
1140} 1183}
1141 1184
1142static void hardware_enable(void *garbage) 1185static int hardware_enable(void *garbage)
1143{ 1186{
1144 int cpu = raw_smp_processor_id(); 1187 int cpu = raw_smp_processor_id();
1145 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 1188 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
1146 u64 old; 1189 u64 old;
1147 1190
1191 if (read_cr4() & X86_CR4_VMXE)
1192 return -EBUSY;
1193
1148 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); 1194 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
1149 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 1195 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
1150 if ((old & (FEATURE_CONTROL_LOCKED | 1196 if ((old & (FEATURE_CONTROL_LOCKED |
@@ -1159,6 +1205,10 @@ static void hardware_enable(void *garbage)
1159 asm volatile (ASM_VMX_VMXON_RAX 1205 asm volatile (ASM_VMX_VMXON_RAX
1160 : : "a"(&phys_addr), "m"(phys_addr) 1206 : : "a"(&phys_addr), "m"(phys_addr)
1161 : "memory", "cc"); 1207 : "memory", "cc");
1208
1209 ept_sync_global();
1210
1211 return 0;
1162} 1212}
1163 1213
1164static void vmclear_local_vcpus(void) 1214static void vmclear_local_vcpus(void)
@@ -1232,6 +1282,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1232 CPU_BASED_USE_IO_BITMAPS | 1282 CPU_BASED_USE_IO_BITMAPS |
1233 CPU_BASED_MOV_DR_EXITING | 1283 CPU_BASED_MOV_DR_EXITING |
1234 CPU_BASED_USE_TSC_OFFSETING | 1284 CPU_BASED_USE_TSC_OFFSETING |
1285 CPU_BASED_MWAIT_EXITING |
1286 CPU_BASED_MONITOR_EXITING |
1235 CPU_BASED_INVLPG_EXITING; 1287 CPU_BASED_INVLPG_EXITING;
1236 opt = CPU_BASED_TPR_SHADOW | 1288 opt = CPU_BASED_TPR_SHADOW |
1237 CPU_BASED_USE_MSR_BITMAPS | 1289 CPU_BASED_USE_MSR_BITMAPS |
@@ -1250,7 +1302,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1250 SECONDARY_EXEC_WBINVD_EXITING | 1302 SECONDARY_EXEC_WBINVD_EXITING |
1251 SECONDARY_EXEC_ENABLE_VPID | 1303 SECONDARY_EXEC_ENABLE_VPID |
1252 SECONDARY_EXEC_ENABLE_EPT | 1304 SECONDARY_EXEC_ENABLE_EPT |
1253 SECONDARY_EXEC_UNRESTRICTED_GUEST; 1305 SECONDARY_EXEC_UNRESTRICTED_GUEST |
1306 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
1307 SECONDARY_EXEC_RDTSCP;
1254 if (adjust_vmx_controls(min2, opt2, 1308 if (adjust_vmx_controls(min2, opt2,
1255 MSR_IA32_VMX_PROCBASED_CTLS2, 1309 MSR_IA32_VMX_PROCBASED_CTLS2,
1256 &_cpu_based_2nd_exec_control) < 0) 1310 &_cpu_based_2nd_exec_control) < 0)
@@ -1344,15 +1398,17 @@ static void free_kvm_area(void)
1344{ 1398{
1345 int cpu; 1399 int cpu;
1346 1400
1347 for_each_online_cpu(cpu) 1401 for_each_possible_cpu(cpu) {
1348 free_vmcs(per_cpu(vmxarea, cpu)); 1402 free_vmcs(per_cpu(vmxarea, cpu));
1403 per_cpu(vmxarea, cpu) = NULL;
1404 }
1349} 1405}
1350 1406
1351static __init int alloc_kvm_area(void) 1407static __init int alloc_kvm_area(void)
1352{ 1408{
1353 int cpu; 1409 int cpu;
1354 1410
1355 for_each_online_cpu(cpu) { 1411 for_each_possible_cpu(cpu) {
1356 struct vmcs *vmcs; 1412 struct vmcs *vmcs;
1357 1413
1358 vmcs = alloc_vmcs_cpu(cpu); 1414 vmcs = alloc_vmcs_cpu(cpu);
@@ -1394,6 +1450,9 @@ static __init int hardware_setup(void)
1394 if (enable_ept && !cpu_has_vmx_ept_2m_page()) 1450 if (enable_ept && !cpu_has_vmx_ept_2m_page())
1395 kvm_disable_largepages(); 1451 kvm_disable_largepages();
1396 1452
1453 if (!cpu_has_vmx_ple())
1454 ple_gap = 0;
1455
1397 return alloc_kvm_area(); 1456 return alloc_kvm_area();
1398} 1457}
1399 1458
@@ -1431,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1431 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); 1490 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
1432 1491
1433 flags = vmcs_readl(GUEST_RFLAGS); 1492 flags = vmcs_readl(GUEST_RFLAGS);
1434 flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 1493 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1435 flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); 1494 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1436 vmcs_writel(GUEST_RFLAGS, flags); 1495 vmcs_writel(GUEST_RFLAGS, flags);
1437 1496
1438 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | 1497 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -1459,8 +1518,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1459static gva_t rmode_tss_base(struct kvm *kvm) 1518static gva_t rmode_tss_base(struct kvm *kvm)
1460{ 1519{
1461 if (!kvm->arch.tss_addr) { 1520 if (!kvm->arch.tss_addr) {
1462 gfn_t base_gfn = kvm->memslots[0].base_gfn + 1521 struct kvm_memslots *slots;
1463 kvm->memslots[0].npages - 3; 1522 gfn_t base_gfn;
1523
1524 slots = rcu_dereference(kvm->memslots);
1525 base_gfn = kvm->memslots->memslots[0].base_gfn +
1526 kvm->memslots->memslots[0].npages - 3;
1464 return base_gfn << PAGE_SHIFT; 1527 return base_gfn << PAGE_SHIFT;
1465 } 1528 }
1466 return kvm->arch.tss_addr; 1529 return kvm->arch.tss_addr;
@@ -1501,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1501 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 1564 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1502 1565
1503 flags = vmcs_readl(GUEST_RFLAGS); 1566 flags = vmcs_readl(GUEST_RFLAGS);
1504 vmx->rmode.save_iopl 1567 vmx->rmode.save_rflags = flags;
1505 = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1506 1568
1507 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1569 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
1508 1570
@@ -1536,11 +1598,17 @@ continue_rmode:
1536static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 1598static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1537{ 1599{
1538 struct vcpu_vmx *vmx = to_vmx(vcpu); 1600 struct vcpu_vmx *vmx = to_vmx(vcpu);
1539 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); 1601 struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1540 1602
1541 vcpu->arch.shadow_efer = efer;
1542 if (!msr) 1603 if (!msr)
1543 return; 1604 return;
1605
1606 /*
1607 * Force kernel_gs_base reloading before EFER changes, as control
1608 * of this msr depends on is_long_mode().
1609 */
1610 vmx_load_host_state(to_vmx(vcpu));
1611 vcpu->arch.efer = efer;
1544 if (efer & EFER_LMA) { 1612 if (efer & EFER_LMA) {
1545 vmcs_write32(VM_ENTRY_CONTROLS, 1613 vmcs_write32(VM_ENTRY_CONTROLS,
1546 vmcs_read32(VM_ENTRY_CONTROLS) | 1614 vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1570,13 +1638,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1570 (guest_tr_ar & ~AR_TYPE_MASK) 1638 (guest_tr_ar & ~AR_TYPE_MASK)
1571 | AR_TYPE_BUSY_64_TSS); 1639 | AR_TYPE_BUSY_64_TSS);
1572 } 1640 }
1573 vcpu->arch.shadow_efer |= EFER_LMA; 1641 vcpu->arch.efer |= EFER_LMA;
1574 vmx_set_efer(vcpu, vcpu->arch.shadow_efer); 1642 vmx_set_efer(vcpu, vcpu->arch.efer);
1575} 1643}
1576 1644
1577static void exit_lmode(struct kvm_vcpu *vcpu) 1645static void exit_lmode(struct kvm_vcpu *vcpu)
1578{ 1646{
1579 vcpu->arch.shadow_efer &= ~EFER_LMA; 1647 vcpu->arch.efer &= ~EFER_LMA;
1580 1648
1581 vmcs_write32(VM_ENTRY_CONTROLS, 1649 vmcs_write32(VM_ENTRY_CONTROLS,
1582 vmcs_read32(VM_ENTRY_CONTROLS) 1650 vmcs_read32(VM_ENTRY_CONTROLS)
@@ -1592,10 +1660,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1592 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 1660 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
1593} 1661}
1594 1662
1663static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1664{
1665 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
1666
1667 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
1668 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
1669}
1670
1595static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1671static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1596{ 1672{
1597 vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; 1673 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
1598 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 1674
1675 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
1676 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
1599} 1677}
1600 1678
1601static void ept_load_pdptrs(struct kvm_vcpu *vcpu) 1679static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -1640,7 +1718,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1640 (CPU_BASED_CR3_LOAD_EXITING | 1718 (CPU_BASED_CR3_LOAD_EXITING |
1641 CPU_BASED_CR3_STORE_EXITING)); 1719 CPU_BASED_CR3_STORE_EXITING));
1642 vcpu->arch.cr0 = cr0; 1720 vcpu->arch.cr0 = cr0;
1643 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1721 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
1644 } else if (!is_paging(vcpu)) { 1722 } else if (!is_paging(vcpu)) {
1645 /* From nonpaging to paging */ 1723 /* From nonpaging to paging */
1646 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1724 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@@ -1648,23 +1726,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1648 ~(CPU_BASED_CR3_LOAD_EXITING | 1726 ~(CPU_BASED_CR3_LOAD_EXITING |
1649 CPU_BASED_CR3_STORE_EXITING)); 1727 CPU_BASED_CR3_STORE_EXITING));
1650 vcpu->arch.cr0 = cr0; 1728 vcpu->arch.cr0 = cr0;
1651 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1729 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
1652 } 1730 }
1653 1731
1654 if (!(cr0 & X86_CR0_WP)) 1732 if (!(cr0 & X86_CR0_WP))
1655 *hw_cr0 &= ~X86_CR0_WP; 1733 *hw_cr0 &= ~X86_CR0_WP;
1656} 1734}
1657 1735
1658static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1659 struct kvm_vcpu *vcpu)
1660{
1661 if (!is_paging(vcpu)) {
1662 *hw_cr4 &= ~X86_CR4_PAE;
1663 *hw_cr4 |= X86_CR4_PSE;
1664 } else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
1665 *hw_cr4 &= ~X86_CR4_PAE;
1666}
1667
1668static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1736static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1669{ 1737{
1670 struct vcpu_vmx *vmx = to_vmx(vcpu); 1738 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1676,8 +1744,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1676 else 1744 else
1677 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; 1745 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
1678 1746
1679 vmx_fpu_deactivate(vcpu);
1680
1681 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 1747 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
1682 enter_pmode(vcpu); 1748 enter_pmode(vcpu);
1683 1749
@@ -1685,7 +1751,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1685 enter_rmode(vcpu); 1751 enter_rmode(vcpu);
1686 1752
1687#ifdef CONFIG_X86_64 1753#ifdef CONFIG_X86_64
1688 if (vcpu->arch.shadow_efer & EFER_LME) { 1754 if (vcpu->arch.efer & EFER_LME) {
1689 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) 1755 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
1690 enter_lmode(vcpu); 1756 enter_lmode(vcpu);
1691 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) 1757 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
@@ -1696,12 +1762,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1696 if (enable_ept) 1762 if (enable_ept)
1697 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); 1763 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
1698 1764
1765 if (!vcpu->fpu_active)
1766 hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
1767
1699 vmcs_writel(CR0_READ_SHADOW, cr0); 1768 vmcs_writel(CR0_READ_SHADOW, cr0);
1700 vmcs_writel(GUEST_CR0, hw_cr0); 1769 vmcs_writel(GUEST_CR0, hw_cr0);
1701 vcpu->arch.cr0 = cr0; 1770 vcpu->arch.cr0 = cr0;
1702
1703 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
1704 vmx_fpu_activate(vcpu);
1705} 1771}
1706 1772
1707static u64 construct_eptp(unsigned long root_hpa) 1773static u64 construct_eptp(unsigned long root_hpa)
@@ -1727,12 +1793,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1727 vmcs_write64(EPT_POINTER, eptp); 1793 vmcs_write64(EPT_POINTER, eptp);
1728 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : 1794 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
1729 vcpu->kvm->arch.ept_identity_map_addr; 1795 vcpu->kvm->arch.ept_identity_map_addr;
1796 ept_load_pdptrs(vcpu);
1730 } 1797 }
1731 1798
1732 vmx_flush_tlb(vcpu); 1799 vmx_flush_tlb(vcpu);
1733 vmcs_writel(GUEST_CR3, guest_cr3); 1800 vmcs_writel(GUEST_CR3, guest_cr3);
1734 if (vcpu->arch.cr0 & X86_CR0_PE)
1735 vmx_fpu_deactivate(vcpu);
1736} 1801}
1737 1802
1738static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1803static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1741,8 +1806,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1741 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); 1806 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1742 1807
1743 vcpu->arch.cr4 = cr4; 1808 vcpu->arch.cr4 = cr4;
1744 if (enable_ept) 1809 if (enable_ept) {
1745 ept_update_paging_mode_cr4(&hw_cr4, vcpu); 1810 if (!is_paging(vcpu)) {
1811 hw_cr4 &= ~X86_CR4_PAE;
1812 hw_cr4 |= X86_CR4_PSE;
1813 } else if (!(cr4 & X86_CR4_PAE)) {
1814 hw_cr4 &= ~X86_CR4_PAE;
1815 }
1816 }
1746 1817
1747 vmcs_writel(CR4_READ_SHADOW, cr4); 1818 vmcs_writel(CR4_READ_SHADOW, cr4);
1748 vmcs_writel(GUEST_CR4, hw_cr4); 1819 vmcs_writel(GUEST_CR4, hw_cr4);
@@ -1780,7 +1851,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1780 1851
1781static int vmx_get_cpl(struct kvm_vcpu *vcpu) 1852static int vmx_get_cpl(struct kvm_vcpu *vcpu)
1782{ 1853{
1783 if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ 1854 if (!is_protmode(vcpu))
1784 return 0; 1855 return 0;
1785 1856
1786 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 1857 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
@@ -2035,7 +2106,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
2035static bool guest_state_valid(struct kvm_vcpu *vcpu) 2106static bool guest_state_valid(struct kvm_vcpu *vcpu)
2036{ 2107{
2037 /* real mode guest state checks */ 2108 /* real mode guest state checks */
2038 if (!(vcpu->arch.cr0 & X86_CR0_PE)) { 2109 if (!is_protmode(vcpu)) {
2039 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 2110 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
2040 return false; 2111 return false;
2041 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) 2112 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -2168,7 +2239,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
2168 struct kvm_userspace_memory_region kvm_userspace_mem; 2239 struct kvm_userspace_memory_region kvm_userspace_mem;
2169 int r = 0; 2240 int r = 0;
2170 2241
2171 down_write(&kvm->slots_lock); 2242 mutex_lock(&kvm->slots_lock);
2172 if (kvm->arch.apic_access_page) 2243 if (kvm->arch.apic_access_page)
2173 goto out; 2244 goto out;
2174 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; 2245 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -2181,7 +2252,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
2181 2252
2182 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); 2253 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
2183out: 2254out:
2184 up_write(&kvm->slots_lock); 2255 mutex_unlock(&kvm->slots_lock);
2185 return r; 2256 return r;
2186} 2257}
2187 2258
@@ -2190,7 +2261,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2190 struct kvm_userspace_memory_region kvm_userspace_mem; 2261 struct kvm_userspace_memory_region kvm_userspace_mem;
2191 int r = 0; 2262 int r = 0;
2192 2263
2193 down_write(&kvm->slots_lock); 2264 mutex_lock(&kvm->slots_lock);
2194 if (kvm->arch.ept_identity_pagetable) 2265 if (kvm->arch.ept_identity_pagetable)
2195 goto out; 2266 goto out;
2196 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; 2267 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
@@ -2205,7 +2276,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2205 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, 2276 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
2206 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); 2277 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
2207out: 2278out:
2208 up_write(&kvm->slots_lock); 2279 mutex_unlock(&kvm->slots_lock);
2209 return r; 2280 return r;
2210} 2281}
2211 2282
@@ -2302,13 +2373,22 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2302 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 2373 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2303 if (vmx->vpid == 0) 2374 if (vmx->vpid == 0)
2304 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; 2375 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
2305 if (!enable_ept) 2376 if (!enable_ept) {
2306 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; 2377 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
2378 enable_unrestricted_guest = 0;
2379 }
2307 if (!enable_unrestricted_guest) 2380 if (!enable_unrestricted_guest)
2308 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 2381 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2382 if (!ple_gap)
2383 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
2309 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 2384 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
2310 } 2385 }
2311 2386
2387 if (ple_gap) {
2388 vmcs_write32(PLE_GAP, ple_gap);
2389 vmcs_write32(PLE_WINDOW, ple_window);
2390 }
2391
2312 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); 2392 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
2313 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); 2393 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
2314 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ 2394 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
@@ -2368,18 +2448,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2368 for (i = 0; i < NR_VMX_MSR; ++i) { 2448 for (i = 0; i < NR_VMX_MSR; ++i) {
2369 u32 index = vmx_msr_index[i]; 2449 u32 index = vmx_msr_index[i];
2370 u32 data_low, data_high; 2450 u32 data_low, data_high;
2371 u64 data;
2372 int j = vmx->nmsrs; 2451 int j = vmx->nmsrs;
2373 2452
2374 if (rdmsr_safe(index, &data_low, &data_high) < 0) 2453 if (rdmsr_safe(index, &data_low, &data_high) < 0)
2375 continue; 2454 continue;
2376 if (wrmsr_safe(index, data_low, data_high) < 0) 2455 if (wrmsr_safe(index, data_low, data_high) < 0)
2377 continue; 2456 continue;
2378 data = data_low | ((u64)data_high << 32); 2457 vmx->guest_msrs[j].index = i;
2379 vmx->host_msrs[j].index = index; 2458 vmx->guest_msrs[j].data = 0;
2380 vmx->host_msrs[j].reserved = 0; 2459 vmx->guest_msrs[j].mask = -1ull;
2381 vmx->host_msrs[j].data = data;
2382 vmx->guest_msrs[j] = vmx->host_msrs[j];
2383 ++vmx->nmsrs; 2460 ++vmx->nmsrs;
2384 } 2461 }
2385 2462
@@ -2389,7 +2466,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2389 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 2466 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
2390 2467
2391 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 2468 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
2392 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 2469 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
2470 if (enable_ept)
2471 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
2472 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
2393 2473
2394 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; 2474 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
2395 rdtscll(tsc_this); 2475 rdtscll(tsc_this);
@@ -2414,10 +2494,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2414{ 2494{
2415 struct vcpu_vmx *vmx = to_vmx(vcpu); 2495 struct vcpu_vmx *vmx = to_vmx(vcpu);
2416 u64 msr; 2496 u64 msr;
2417 int ret; 2497 int ret, idx;
2418 2498
2419 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 2499 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
2420 down_read(&vcpu->kvm->slots_lock); 2500 idx = srcu_read_lock(&vcpu->kvm->srcu);
2421 if (!init_rmode(vmx->vcpu.kvm)) { 2501 if (!init_rmode(vmx->vcpu.kvm)) {
2422 ret = -ENOMEM; 2502 ret = -ENOMEM;
2423 goto out; 2503 goto out;
@@ -2510,8 +2590,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2510 if (vmx->vpid != 0) 2590 if (vmx->vpid != 0)
2511 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 2591 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2512 2592
2513 vmx->vcpu.arch.cr0 = 0x60000010; 2593 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
2514 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ 2594 vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
2515 vmx_set_cr4(&vmx->vcpu, 0); 2595 vmx_set_cr4(&vmx->vcpu, 0);
2516 vmx_set_efer(&vmx->vcpu, 0); 2596 vmx_set_efer(&vmx->vcpu, 0);
2517 vmx_fpu_activate(&vmx->vcpu); 2597 vmx_fpu_activate(&vmx->vcpu);
@@ -2525,7 +2605,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2525 vmx->emulation_required = 0; 2605 vmx->emulation_required = 0;
2526 2606
2527out: 2607out:
2528 up_read(&vcpu->kvm->slots_lock); 2608 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2529 return ret; 2609 return ret;
2530} 2610}
2531 2611
@@ -2623,8 +2703,35 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
2623 return 0; 2703 return 0;
2624 2704
2625 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 2705 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
2626 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | 2706 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI));
2627 GUEST_INTR_STATE_NMI)); 2707}
2708
2709static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
2710{
2711 if (!cpu_has_virtual_nmis())
2712 return to_vmx(vcpu)->soft_vnmi_blocked;
2713 else
2714 return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
2715 GUEST_INTR_STATE_NMI);
2716}
2717
2718static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
2719{
2720 struct vcpu_vmx *vmx = to_vmx(vcpu);
2721
2722 if (!cpu_has_virtual_nmis()) {
2723 if (vmx->soft_vnmi_blocked != masked) {
2724 vmx->soft_vnmi_blocked = masked;
2725 vmx->vnmi_blocked_time = 0;
2726 }
2727 } else {
2728 if (masked)
2729 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
2730 GUEST_INTR_STATE_NMI);
2731 else
2732 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
2733 GUEST_INTR_STATE_NMI);
2734 }
2628} 2735}
2629 2736
2630static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 2737static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
@@ -2659,7 +2766,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2659 * Cause the #SS fault with 0 error code in VM86 mode. 2766 * Cause the #SS fault with 0 error code in VM86 mode.
2660 */ 2767 */
2661 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) 2768 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
2662 if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) 2769 if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE)
2663 return 1; 2770 return 1;
2664 /* 2771 /*
2665 * Forward all other exceptions that are valid in real mode. 2772 * Forward all other exceptions that are valid in real mode.
@@ -2674,6 +2781,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2674 kvm_queue_exception(vcpu, vec); 2781 kvm_queue_exception(vcpu, vec);
2675 return 1; 2782 return 1;
2676 case BP_VECTOR: 2783 case BP_VECTOR:
2784 /*
2785 * Update instruction length as we may reinject the exception
2786 * from user space while in guest debugging mode.
2787 */
2788 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
2789 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2677 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 2790 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2678 return 0; 2791 return 0;
2679 /* fall through */ 2792 /* fall through */
@@ -2710,15 +2823,16 @@ static void kvm_machine_check(void)
2710#endif 2823#endif
2711} 2824}
2712 2825
2713static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2826static int handle_machine_check(struct kvm_vcpu *vcpu)
2714{ 2827{
2715 /* already handled by vcpu_run */ 2828 /* already handled by vcpu_run */
2716 return 1; 2829 return 1;
2717} 2830}
2718 2831
2719static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2832static int handle_exception(struct kvm_vcpu *vcpu)
2720{ 2833{
2721 struct vcpu_vmx *vmx = to_vmx(vcpu); 2834 struct vcpu_vmx *vmx = to_vmx(vcpu);
2835 struct kvm_run *kvm_run = vcpu->run;
2722 u32 intr_info, ex_no, error_code; 2836 u32 intr_info, ex_no, error_code;
2723 unsigned long cr2, rip, dr6; 2837 unsigned long cr2, rip, dr6;
2724 u32 vect_info; 2838 u32 vect_info;
@@ -2728,12 +2842,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2728 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 2842 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2729 2843
2730 if (is_machine_check(intr_info)) 2844 if (is_machine_check(intr_info))
2731 return handle_machine_check(vcpu, kvm_run); 2845 return handle_machine_check(vcpu);
2732 2846
2733 if ((vect_info & VECTORING_INFO_VALID_MASK) && 2847 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
2734 !is_page_fault(intr_info)) 2848 !is_page_fault(intr_info)) {
2735 printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " 2849 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2736 "intr info 0x%x\n", __func__, vect_info, intr_info); 2850 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
2851 vcpu->run->internal.ndata = 2;
2852 vcpu->run->internal.data[0] = vect_info;
2853 vcpu->run->internal.data[1] = intr_info;
2854 return 0;
2855 }
2737 2856
2738 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) 2857 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
2739 return 1; /* already handled by vmx_vcpu_run() */ 2858 return 1; /* already handled by vmx_vcpu_run() */
@@ -2744,7 +2863,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2744 } 2863 }
2745 2864
2746 if (is_invalid_opcode(intr_info)) { 2865 if (is_invalid_opcode(intr_info)) {
2747 er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); 2866 er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD);
2748 if (er != EMULATE_DONE) 2867 if (er != EMULATE_DONE)
2749 kvm_queue_exception(vcpu, UD_VECTOR); 2868 kvm_queue_exception(vcpu, UD_VECTOR);
2750 return 1; 2869 return 1;
@@ -2790,6 +2909,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2790 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); 2909 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
2791 /* fall through */ 2910 /* fall through */
2792 case BP_VECTOR: 2911 case BP_VECTOR:
2912 /*
2913 * Update instruction length as we may reinject #BP from
2914 * user space while in guest debugging mode. Reading it for
2915 * #DB as well causes no harm, it is not used in that case.
2916 */
2917 vmx->vcpu.arch.event_exit_inst_len =
2918 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2793 kvm_run->exit_reason = KVM_EXIT_DEBUG; 2919 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2794 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 2920 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
2795 kvm_run->debug.arch.exception = ex_no; 2921 kvm_run->debug.arch.exception = ex_no;
@@ -2803,20 +2929,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2803 return 0; 2929 return 0;
2804} 2930}
2805 2931
2806static int handle_external_interrupt(struct kvm_vcpu *vcpu, 2932static int handle_external_interrupt(struct kvm_vcpu *vcpu)
2807 struct kvm_run *kvm_run)
2808{ 2933{
2809 ++vcpu->stat.irq_exits; 2934 ++vcpu->stat.irq_exits;
2810 return 1; 2935 return 1;
2811} 2936}
2812 2937
2813static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2938static int handle_triple_fault(struct kvm_vcpu *vcpu)
2814{ 2939{
2815 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 2940 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
2816 return 0; 2941 return 0;
2817} 2942}
2818 2943
2819static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2944static int handle_io(struct kvm_vcpu *vcpu)
2820{ 2945{
2821 unsigned long exit_qualification; 2946 unsigned long exit_qualification;
2822 int size, in, string; 2947 int size, in, string;
@@ -2827,8 +2952,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2827 string = (exit_qualification & 16) != 0; 2952 string = (exit_qualification & 16) != 0;
2828 2953
2829 if (string) { 2954 if (string) {
2830 if (emulate_instruction(vcpu, 2955 if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO)
2831 kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
2832 return 0; 2956 return 0;
2833 return 1; 2957 return 1;
2834 } 2958 }
@@ -2838,7 +2962,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2838 port = exit_qualification >> 16; 2962 port = exit_qualification >> 16;
2839 2963
2840 skip_emulated_instruction(vcpu); 2964 skip_emulated_instruction(vcpu);
2841 return kvm_emulate_pio(vcpu, kvm_run, in, size, port); 2965 return kvm_emulate_pio(vcpu, in, size, port);
2842} 2966}
2843 2967
2844static void 2968static void
@@ -2852,7 +2976,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
2852 hypercall[2] = 0xc1; 2976 hypercall[2] = 0xc1;
2853} 2977}
2854 2978
2855static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2979static int handle_cr(struct kvm_vcpu *vcpu)
2856{ 2980{
2857 unsigned long exit_qualification, val; 2981 unsigned long exit_qualification, val;
2858 int cr; 2982 int cr;
@@ -2887,17 +3011,16 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2887 return 1; 3011 return 1;
2888 if (cr8_prev <= cr8) 3012 if (cr8_prev <= cr8)
2889 return 1; 3013 return 1;
2890 kvm_run->exit_reason = KVM_EXIT_SET_TPR; 3014 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
2891 return 0; 3015 return 0;
2892 } 3016 }
2893 }; 3017 };
2894 break; 3018 break;
2895 case 2: /* clts */ 3019 case 2: /* clts */
2896 vmx_fpu_deactivate(vcpu); 3020 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
2897 vcpu->arch.cr0 &= ~X86_CR0_TS; 3021 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
2898 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
2899 vmx_fpu_activate(vcpu);
2900 skip_emulated_instruction(vcpu); 3022 skip_emulated_instruction(vcpu);
3023 vmx_fpu_activate(vcpu);
2901 return 1; 3024 return 1;
2902 case 1: /*mov from cr*/ 3025 case 1: /*mov from cr*/
2903 switch (cr) { 3026 switch (cr) {
@@ -2915,25 +3038,37 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2915 } 3038 }
2916 break; 3039 break;
2917 case 3: /* lmsw */ 3040 case 3: /* lmsw */
2918 kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); 3041 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
3042 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
3043 kvm_lmsw(vcpu, val);
2919 3044
2920 skip_emulated_instruction(vcpu); 3045 skip_emulated_instruction(vcpu);
2921 return 1; 3046 return 1;
2922 default: 3047 default:
2923 break; 3048 break;
2924 } 3049 }
2925 kvm_run->exit_reason = 0; 3050 vcpu->run->exit_reason = 0;
2926 pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", 3051 pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
2927 (int)(exit_qualification >> 4) & 3, cr); 3052 (int)(exit_qualification >> 4) & 3, cr);
2928 return 0; 3053 return 0;
2929} 3054}
2930 3055
2931static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3056static int check_dr_alias(struct kvm_vcpu *vcpu)
3057{
3058 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
3059 kvm_queue_exception(vcpu, UD_VECTOR);
3060 return -1;
3061 }
3062 return 0;
3063}
3064
3065static int handle_dr(struct kvm_vcpu *vcpu)
2932{ 3066{
2933 unsigned long exit_qualification; 3067 unsigned long exit_qualification;
2934 unsigned long val; 3068 unsigned long val;
2935 int dr, reg; 3069 int dr, reg;
2936 3070
3071 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
2937 if (!kvm_require_cpl(vcpu, 0)) 3072 if (!kvm_require_cpl(vcpu, 0))
2938 return 1; 3073 return 1;
2939 dr = vmcs_readl(GUEST_DR7); 3074 dr = vmcs_readl(GUEST_DR7);
@@ -2944,13 +3079,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2944 * guest debugging itself. 3079 * guest debugging itself.
2945 */ 3080 */
2946 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { 3081 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
2947 kvm_run->debug.arch.dr6 = vcpu->arch.dr6; 3082 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
2948 kvm_run->debug.arch.dr7 = dr; 3083 vcpu->run->debug.arch.dr7 = dr;
2949 kvm_run->debug.arch.pc = 3084 vcpu->run->debug.arch.pc =
2950 vmcs_readl(GUEST_CS_BASE) + 3085 vmcs_readl(GUEST_CS_BASE) +
2951 vmcs_readl(GUEST_RIP); 3086 vmcs_readl(GUEST_RIP);
2952 kvm_run->debug.arch.exception = DB_VECTOR; 3087 vcpu->run->debug.arch.exception = DB_VECTOR;
2953 kvm_run->exit_reason = KVM_EXIT_DEBUG; 3088 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
2954 return 0; 3089 return 0;
2955 } else { 3090 } else {
2956 vcpu->arch.dr7 &= ~DR7_GD; 3091 vcpu->arch.dr7 &= ~DR7_GD;
@@ -2969,14 +3104,20 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2969 case 0 ... 3: 3104 case 0 ... 3:
2970 val = vcpu->arch.db[dr]; 3105 val = vcpu->arch.db[dr];
2971 break; 3106 break;
3107 case 4:
3108 if (check_dr_alias(vcpu) < 0)
3109 return 1;
3110 /* fall through */
2972 case 6: 3111 case 6:
2973 val = vcpu->arch.dr6; 3112 val = vcpu->arch.dr6;
2974 break; 3113 break;
2975 case 7: 3114 case 5:
3115 if (check_dr_alias(vcpu) < 0)
3116 return 1;
3117 /* fall through */
3118 default: /* 7 */
2976 val = vcpu->arch.dr7; 3119 val = vcpu->arch.dr7;
2977 break; 3120 break;
2978 default:
2979 val = 0;
2980 } 3121 }
2981 kvm_register_write(vcpu, reg, val); 3122 kvm_register_write(vcpu, reg, val);
2982 } else { 3123 } else {
@@ -2987,21 +3128,25 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2987 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 3128 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
2988 vcpu->arch.eff_db[dr] = val; 3129 vcpu->arch.eff_db[dr] = val;
2989 break; 3130 break;
2990 case 4 ... 5: 3131 case 4:
2991 if (vcpu->arch.cr4 & X86_CR4_DE) 3132 if (check_dr_alias(vcpu) < 0)
2992 kvm_queue_exception(vcpu, UD_VECTOR); 3133 return 1;
2993 break; 3134 /* fall through */
2994 case 6: 3135 case 6:
2995 if (val & 0xffffffff00000000ULL) { 3136 if (val & 0xffffffff00000000ULL) {
2996 kvm_queue_exception(vcpu, GP_VECTOR); 3137 kvm_inject_gp(vcpu, 0);
2997 break; 3138 return 1;
2998 } 3139 }
2999 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 3140 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
3000 break; 3141 break;
3001 case 7: 3142 case 5:
3143 if (check_dr_alias(vcpu) < 0)
3144 return 1;
3145 /* fall through */
3146 default: /* 7 */
3002 if (val & 0xffffffff00000000ULL) { 3147 if (val & 0xffffffff00000000ULL) {
3003 kvm_queue_exception(vcpu, GP_VECTOR); 3148 kvm_inject_gp(vcpu, 0);
3004 break; 3149 return 1;
3005 } 3150 }
3006 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 3151 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
3007 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 3152 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
@@ -3016,18 +3161,19 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3016 return 1; 3161 return 1;
3017} 3162}
3018 3163
3019static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3164static int handle_cpuid(struct kvm_vcpu *vcpu)
3020{ 3165{
3021 kvm_emulate_cpuid(vcpu); 3166 kvm_emulate_cpuid(vcpu);
3022 return 1; 3167 return 1;
3023} 3168}
3024 3169
3025static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3170static int handle_rdmsr(struct kvm_vcpu *vcpu)
3026{ 3171{
3027 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; 3172 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
3028 u64 data; 3173 u64 data;
3029 3174
3030 if (vmx_get_msr(vcpu, ecx, &data)) { 3175 if (vmx_get_msr(vcpu, ecx, &data)) {
3176 trace_kvm_msr_read_ex(ecx);
3031 kvm_inject_gp(vcpu, 0); 3177 kvm_inject_gp(vcpu, 0);
3032 return 1; 3178 return 1;
3033 } 3179 }
@@ -3041,31 +3187,29 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3041 return 1; 3187 return 1;
3042} 3188}
3043 3189
3044static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3190static int handle_wrmsr(struct kvm_vcpu *vcpu)
3045{ 3191{
3046 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; 3192 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
3047 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 3193 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
3048 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 3194 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3049 3195
3050 trace_kvm_msr_write(ecx, data);
3051
3052 if (vmx_set_msr(vcpu, ecx, data) != 0) { 3196 if (vmx_set_msr(vcpu, ecx, data) != 0) {
3197 trace_kvm_msr_write_ex(ecx, data);
3053 kvm_inject_gp(vcpu, 0); 3198 kvm_inject_gp(vcpu, 0);
3054 return 1; 3199 return 1;
3055 } 3200 }
3056 3201
3202 trace_kvm_msr_write(ecx, data);
3057 skip_emulated_instruction(vcpu); 3203 skip_emulated_instruction(vcpu);
3058 return 1; 3204 return 1;
3059} 3205}
3060 3206
3061static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, 3207static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
3062 struct kvm_run *kvm_run)
3063{ 3208{
3064 return 1; 3209 return 1;
3065} 3210}
3066 3211
3067static int handle_interrupt_window(struct kvm_vcpu *vcpu, 3212static int handle_interrupt_window(struct kvm_vcpu *vcpu)
3068 struct kvm_run *kvm_run)
3069{ 3213{
3070 u32 cpu_based_vm_exec_control; 3214 u32 cpu_based_vm_exec_control;
3071 3215
@@ -3081,34 +3225,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
3081 * possible 3225 * possible
3082 */ 3226 */
3083 if (!irqchip_in_kernel(vcpu->kvm) && 3227 if (!irqchip_in_kernel(vcpu->kvm) &&
3084 kvm_run->request_interrupt_window && 3228 vcpu->run->request_interrupt_window &&
3085 !kvm_cpu_has_interrupt(vcpu)) { 3229 !kvm_cpu_has_interrupt(vcpu)) {
3086 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 3230 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
3087 return 0; 3231 return 0;
3088 } 3232 }
3089 return 1; 3233 return 1;
3090} 3234}
3091 3235
3092static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3236static int handle_halt(struct kvm_vcpu *vcpu)
3093{ 3237{
3094 skip_emulated_instruction(vcpu); 3238 skip_emulated_instruction(vcpu);
3095 return kvm_emulate_halt(vcpu); 3239 return kvm_emulate_halt(vcpu);
3096} 3240}
3097 3241
3098static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3242static int handle_vmcall(struct kvm_vcpu *vcpu)
3099{ 3243{
3100 skip_emulated_instruction(vcpu); 3244 skip_emulated_instruction(vcpu);
3101 kvm_emulate_hypercall(vcpu); 3245 kvm_emulate_hypercall(vcpu);
3102 return 1; 3246 return 1;
3103} 3247}
3104 3248
3105static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3249static int handle_vmx_insn(struct kvm_vcpu *vcpu)
3106{ 3250{
3107 kvm_queue_exception(vcpu, UD_VECTOR); 3251 kvm_queue_exception(vcpu, UD_VECTOR);
3108 return 1; 3252 return 1;
3109} 3253}
3110 3254
3111static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3255static int handle_invlpg(struct kvm_vcpu *vcpu)
3112{ 3256{
3113 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3257 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
3114 3258
@@ -3117,14 +3261,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3117 return 1; 3261 return 1;
3118} 3262}
3119 3263
3120static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3264static int handle_wbinvd(struct kvm_vcpu *vcpu)
3121{ 3265{
3122 skip_emulated_instruction(vcpu); 3266 skip_emulated_instruction(vcpu);
3123 /* TODO: Add support for VT-d/pass-through device */ 3267 /* TODO: Add support for VT-d/pass-through device */
3124 return 1; 3268 return 1;
3125} 3269}
3126 3270
3127static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3271static int handle_apic_access(struct kvm_vcpu *vcpu)
3128{ 3272{
3129 unsigned long exit_qualification; 3273 unsigned long exit_qualification;
3130 enum emulation_result er; 3274 enum emulation_result er;
@@ -3133,7 +3277,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3133 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3277 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
3134 offset = exit_qualification & 0xffful; 3278 offset = exit_qualification & 0xffful;
3135 3279
3136 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3280 er = emulate_instruction(vcpu, 0, 0, 0);
3137 3281
3138 if (er != EMULATE_DONE) { 3282 if (er != EMULATE_DONE) {
3139 printk(KERN_ERR 3283 printk(KERN_ERR
@@ -3144,7 +3288,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3144 return 1; 3288 return 1;
3145} 3289}
3146 3290
3147static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3291static int handle_task_switch(struct kvm_vcpu *vcpu)
3148{ 3292{
3149 struct vcpu_vmx *vmx = to_vmx(vcpu); 3293 struct vcpu_vmx *vmx = to_vmx(vcpu);
3150 unsigned long exit_qualification; 3294 unsigned long exit_qualification;
@@ -3198,7 +3342,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3198 return 1; 3342 return 1;
3199} 3343}
3200 3344
3201static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3345static int handle_ept_violation(struct kvm_vcpu *vcpu)
3202{ 3346{
3203 unsigned long exit_qualification; 3347 unsigned long exit_qualification;
3204 gpa_t gpa; 3348 gpa_t gpa;
@@ -3219,8 +3363,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3219 vmcs_readl(GUEST_LINEAR_ADDRESS)); 3363 vmcs_readl(GUEST_LINEAR_ADDRESS));
3220 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", 3364 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
3221 (long unsigned int)exit_qualification); 3365 (long unsigned int)exit_qualification);
3222 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3366 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
3223 kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; 3367 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
3224 return 0; 3368 return 0;
3225 } 3369 }
3226 3370
@@ -3290,7 +3434,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
3290 } 3434 }
3291} 3435}
3292 3436
3293static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3437static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
3294{ 3438{
3295 u64 sptes[4]; 3439 u64 sptes[4];
3296 int nr_sptes, i; 3440 int nr_sptes, i;
@@ -3306,13 +3450,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3306 for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) 3450 for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
3307 ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); 3451 ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
3308 3452
3309 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3453 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
3310 kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; 3454 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
3311 3455
3312 return 0; 3456 return 0;
3313} 3457}
3314 3458
3315static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3459static int handle_nmi_window(struct kvm_vcpu *vcpu)
3316{ 3460{
3317 u32 cpu_based_vm_exec_control; 3461 u32 cpu_based_vm_exec_control;
3318 3462
@@ -3325,36 +3469,55 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3325 return 1; 3469 return 1;
3326} 3470}
3327 3471
3328static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, 3472static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
3329 struct kvm_run *kvm_run)
3330{ 3473{
3331 struct vcpu_vmx *vmx = to_vmx(vcpu); 3474 struct vcpu_vmx *vmx = to_vmx(vcpu);
3332 enum emulation_result err = EMULATE_DONE; 3475 enum emulation_result err = EMULATE_DONE;
3333 3476 int ret = 1;
3334 local_irq_enable();
3335 preempt_enable();
3336 3477
3337 while (!guest_state_valid(vcpu)) { 3478 while (!guest_state_valid(vcpu)) {
3338 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3479 err = emulate_instruction(vcpu, 0, 0, 0);
3339 3480
3340 if (err == EMULATE_DO_MMIO) 3481 if (err == EMULATE_DO_MMIO) {
3341 break; 3482 ret = 0;
3483 goto out;
3484 }
3342 3485
3343 if (err != EMULATE_DONE) { 3486 if (err != EMULATE_DONE) {
3344 kvm_report_emulation_failure(vcpu, "emulation failure"); 3487 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3345 break; 3488 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3489 vcpu->run->internal.ndata = 0;
3490 ret = 0;
3491 goto out;
3346 } 3492 }
3347 3493
3348 if (signal_pending(current)) 3494 if (signal_pending(current))
3349 break; 3495 goto out;
3350 if (need_resched()) 3496 if (need_resched())
3351 schedule(); 3497 schedule();
3352 } 3498 }
3353 3499
3354 preempt_disable(); 3500 vmx->emulation_required = 0;
3355 local_irq_disable(); 3501out:
3502 return ret;
3503}
3504
3505/*
3506 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
3507 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
3508 */
3509static int handle_pause(struct kvm_vcpu *vcpu)
3510{
3511 skip_emulated_instruction(vcpu);
3512 kvm_vcpu_on_spin(vcpu);
3513
3514 return 1;
3515}
3356 3516
3357 vmx->invalid_state_emulation_result = err; 3517static int handle_invalid_op(struct kvm_vcpu *vcpu)
3518{
3519 kvm_queue_exception(vcpu, UD_VECTOR);
3520 return 1;
3358} 3521}
3359 3522
3360/* 3523/*
@@ -3362,8 +3525,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3362 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 3525 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
3363 * to be done to userspace and return 0. 3526 * to be done to userspace and return 0.
3364 */ 3527 */
3365static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, 3528static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3366 struct kvm_run *kvm_run) = {
3367 [EXIT_REASON_EXCEPTION_NMI] = handle_exception, 3529 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
3368 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 3530 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
3369 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, 3531 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
@@ -3394,6 +3556,9 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
3394 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, 3556 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
3395 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 3557 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
3396 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 3558 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
3559 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
3560 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
3561 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
3397}; 3562};
3398 3563
3399static const int kvm_vmx_max_exit_handlers = 3564static const int kvm_vmx_max_exit_handlers =
@@ -3403,7 +3568,7 @@ static const int kvm_vmx_max_exit_handlers =
3403 * The guest has exited. See if we can fix it or if we need userspace 3568 * The guest has exited. See if we can fix it or if we need userspace
3404 * assistance. 3569 * assistance.
3405 */ 3570 */
3406static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 3571static int vmx_handle_exit(struct kvm_vcpu *vcpu)
3407{ 3572{
3408 struct vcpu_vmx *vmx = to_vmx(vcpu); 3573 struct vcpu_vmx *vmx = to_vmx(vcpu);
3409 u32 exit_reason = vmx->exit_reason; 3574 u32 exit_reason = vmx->exit_reason;
@@ -3411,13 +3576,9 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3411 3576
3412 trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); 3577 trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
3413 3578
3414 /* If we need to emulate an MMIO from handle_invalid_guest_state 3579 /* If guest state is invalid, start emulating */
3415 * we just return 0 */ 3580 if (vmx->emulation_required && emulate_invalid_guest_state)
3416 if (vmx->emulation_required && emulate_invalid_guest_state) { 3581 return handle_invalid_guest_state(vcpu);
3417 if (guest_state_valid(vcpu))
3418 vmx->emulation_required = 0;
3419 return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
3420 }
3421 3582
3422 /* Access CR3 don't cause VMExit in paging mode, so we need 3583 /* Access CR3 don't cause VMExit in paging mode, so we need
3423 * to sync with guest real CR3. */ 3584 * to sync with guest real CR3. */
@@ -3425,8 +3586,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3425 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 3586 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
3426 3587
3427 if (unlikely(vmx->fail)) { 3588 if (unlikely(vmx->fail)) {
3428 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 3589 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3429 kvm_run->fail_entry.hardware_entry_failure_reason 3590 vcpu->run->fail_entry.hardware_entry_failure_reason
3430 = vmcs_read32(VM_INSTRUCTION_ERROR); 3591 = vmcs_read32(VM_INSTRUCTION_ERROR);
3431 return 0; 3592 return 0;
3432 } 3593 }
@@ -3459,10 +3620,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3459 3620
3460 if (exit_reason < kvm_vmx_max_exit_handlers 3621 if (exit_reason < kvm_vmx_max_exit_handlers
3461 && kvm_vmx_exit_handlers[exit_reason]) 3622 && kvm_vmx_exit_handlers[exit_reason])
3462 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 3623 return kvm_vmx_exit_handlers[exit_reason](vcpu);
3463 else { 3624 else {
3464 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3625 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
3465 kvm_run->hw.hardware_exit_reason = exit_reason; 3626 vcpu->run->hw.hardware_exit_reason = exit_reason;
3466 } 3627 }
3467 return 0; 3628 return 0;
3468} 3629}
@@ -3600,23 +3761,18 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
3600#define Q "l" 3761#define Q "l"
3601#endif 3762#endif
3602 3763
3603static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3764static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3604{ 3765{
3605 struct vcpu_vmx *vmx = to_vmx(vcpu); 3766 struct vcpu_vmx *vmx = to_vmx(vcpu);
3606 3767
3607 if (enable_ept && is_paging(vcpu)) {
3608 vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
3609 ept_load_pdptrs(vcpu);
3610 }
3611 /* Record the guest's net vcpu time for enforced NMI injections. */ 3768 /* Record the guest's net vcpu time for enforced NMI injections. */
3612 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) 3769 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
3613 vmx->entry_time = ktime_get(); 3770 vmx->entry_time = ktime_get();
3614 3771
3615 /* Handle invalid guest state instead of entering VMX */ 3772 /* Don't enter VMX if guest state is invalid, let the exit handler
3616 if (vmx->emulation_required && emulate_invalid_guest_state) { 3773 start emulation until we arrive back to a valid state */
3617 handle_invalid_guest_state(vcpu, kvm_run); 3774 if (vmx->emulation_required && emulate_invalid_guest_state)
3618 return; 3775 return;
3619 }
3620 3776
3621 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) 3777 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
3622 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); 3778 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@@ -3636,9 +3792,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3636 */ 3792 */
3637 vmcs_writel(HOST_CR0, read_cr0()); 3793 vmcs_writel(HOST_CR0, read_cr0());
3638 3794
3639 if (vcpu->arch.switch_db_regs)
3640 set_debugreg(vcpu->arch.dr6, 6);
3641
3642 asm( 3795 asm(
3643 /* Store host registers */ 3796 /* Store host registers */
3644 "push %%"R"dx; push %%"R"bp;" 3797 "push %%"R"dx; push %%"R"bp;"
@@ -3739,9 +3892,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3739 | (1 << VCPU_EXREG_PDPTR)); 3892 | (1 << VCPU_EXREG_PDPTR));
3740 vcpu->arch.regs_dirty = 0; 3893 vcpu->arch.regs_dirty = 0;
3741 3894
3742 if (vcpu->arch.switch_db_regs)
3743 get_debugreg(vcpu->arch.dr6, 6);
3744
3745 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3895 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
3746 if (vmx->rmode.irq.pending) 3896 if (vmx->rmode.irq.pending)
3747 fixup_rmode_irq(vmx); 3897 fixup_rmode_irq(vmx);
@@ -3775,7 +3925,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
3775 __clear_bit(vmx->vpid, vmx_vpid_bitmap); 3925 __clear_bit(vmx->vpid, vmx_vpid_bitmap);
3776 spin_unlock(&vmx_vpid_lock); 3926 spin_unlock(&vmx_vpid_lock);
3777 vmx_free_vmcs(vcpu); 3927 vmx_free_vmcs(vcpu);
3778 kfree(vmx->host_msrs);
3779 kfree(vmx->guest_msrs); 3928 kfree(vmx->guest_msrs);
3780 kvm_vcpu_uninit(vcpu); 3929 kvm_vcpu_uninit(vcpu);
3781 kmem_cache_free(kvm_vcpu_cache, vmx); 3930 kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -3802,10 +3951,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
3802 goto uninit_vcpu; 3951 goto uninit_vcpu;
3803 } 3952 }
3804 3953
3805 vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
3806 if (!vmx->host_msrs)
3807 goto free_guest_msrs;
3808
3809 vmx->vmcs = alloc_vmcs(); 3954 vmx->vmcs = alloc_vmcs();
3810 if (!vmx->vmcs) 3955 if (!vmx->vmcs)
3811 goto free_msrs; 3956 goto free_msrs;
@@ -3836,8 +3981,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
3836free_vmcs: 3981free_vmcs:
3837 free_vmcs(vmx->vmcs); 3982 free_vmcs(vmx->vmcs);
3838free_msrs: 3983free_msrs:
3839 kfree(vmx->host_msrs);
3840free_guest_msrs:
3841 kfree(vmx->guest_msrs); 3984 kfree(vmx->guest_msrs);
3842uninit_vcpu: 3985uninit_vcpu:
3843 kvm_vcpu_uninit(&vmx->vcpu); 3986 kvm_vcpu_uninit(&vmx->vcpu);
@@ -3877,7 +4020,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3877 * b. VT-d with snooping control feature: snooping control feature of 4020 * b. VT-d with snooping control feature: snooping control feature of
3878 * VT-d engine can guarantee the cache correctness. Just set it 4021 * VT-d engine can guarantee the cache correctness. Just set it
3879 * to WB to keep consistent with host. So the same as item 3. 4022 * to WB to keep consistent with host. So the same as item 3.
3880 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep 4023 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
3881 * consistent with host MTRR 4024 * consistent with host MTRR
3882 */ 4025 */
3883 if (is_mmio) 4026 if (is_mmio)
@@ -3888,37 +4031,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3888 VMX_EPT_MT_EPTE_SHIFT; 4031 VMX_EPT_MT_EPTE_SHIFT;
3889 else 4032 else
3890 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) 4033 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
3891 | VMX_EPT_IGMT_BIT; 4034 | VMX_EPT_IPAT_BIT;
3892 4035
3893 return ret; 4036 return ret;
3894} 4037}
3895 4038
4039#define _ER(x) { EXIT_REASON_##x, #x }
4040
3896static const struct trace_print_flags vmx_exit_reasons_str[] = { 4041static const struct trace_print_flags vmx_exit_reasons_str[] = {
3897 { EXIT_REASON_EXCEPTION_NMI, "exception" }, 4042 _ER(EXCEPTION_NMI),
3898 { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, 4043 _ER(EXTERNAL_INTERRUPT),
3899 { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, 4044 _ER(TRIPLE_FAULT),
3900 { EXIT_REASON_NMI_WINDOW, "nmi_window" }, 4045 _ER(PENDING_INTERRUPT),
3901 { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, 4046 _ER(NMI_WINDOW),
3902 { EXIT_REASON_CR_ACCESS, "cr_access" }, 4047 _ER(TASK_SWITCH),
3903 { EXIT_REASON_DR_ACCESS, "dr_access" }, 4048 _ER(CPUID),
3904 { EXIT_REASON_CPUID, "cpuid" }, 4049 _ER(HLT),
3905 { EXIT_REASON_MSR_READ, "rdmsr" }, 4050 _ER(INVLPG),
3906 { EXIT_REASON_MSR_WRITE, "wrmsr" }, 4051 _ER(RDPMC),
3907 { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, 4052 _ER(RDTSC),
3908 { EXIT_REASON_HLT, "halt" }, 4053 _ER(VMCALL),
3909 { EXIT_REASON_INVLPG, "invlpg" }, 4054 _ER(VMCLEAR),
3910 { EXIT_REASON_VMCALL, "hypercall" }, 4055 _ER(VMLAUNCH),
3911 { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, 4056 _ER(VMPTRLD),
3912 { EXIT_REASON_APIC_ACCESS, "apic_access" }, 4057 _ER(VMPTRST),
3913 { EXIT_REASON_WBINVD, "wbinvd" }, 4058 _ER(VMREAD),
3914 { EXIT_REASON_TASK_SWITCH, "task_switch" }, 4059 _ER(VMRESUME),
3915 { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, 4060 _ER(VMWRITE),
4061 _ER(VMOFF),
4062 _ER(VMON),
4063 _ER(CR_ACCESS),
4064 _ER(DR_ACCESS),
4065 _ER(IO_INSTRUCTION),
4066 _ER(MSR_READ),
4067 _ER(MSR_WRITE),
4068 _ER(MWAIT_INSTRUCTION),
4069 _ER(MONITOR_INSTRUCTION),
4070 _ER(PAUSE_INSTRUCTION),
4071 _ER(MCE_DURING_VMENTRY),
4072 _ER(TPR_BELOW_THRESHOLD),
4073 _ER(APIC_ACCESS),
4074 _ER(EPT_VIOLATION),
4075 _ER(EPT_MISCONFIG),
4076 _ER(WBINVD),
3916 { -1, NULL } 4077 { -1, NULL }
3917}; 4078};
3918 4079
3919static bool vmx_gb_page_enable(void) 4080#undef _ER
4081
4082static int vmx_get_lpage_level(void)
3920{ 4083{
3921 return false; 4084 if (enable_ept && !cpu_has_vmx_ept_1g_page())
4085 return PT_DIRECTORY_LEVEL;
4086 else
4087 /* For shadow and EPT supported 1GB page */
4088 return PT_PDPE_LEVEL;
4089}
4090
4091static inline u32 bit(int bitno)
4092{
4093 return 1 << (bitno & 31);
4094}
4095
4096static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
4097{
4098 struct kvm_cpuid_entry2 *best;
4099 struct vcpu_vmx *vmx = to_vmx(vcpu);
4100 u32 exec_control;
4101
4102 vmx->rdtscp_enabled = false;
4103 if (vmx_rdtscp_supported()) {
4104 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
4105 if (exec_control & SECONDARY_EXEC_RDTSCP) {
4106 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
4107 if (best && (best->edx & bit(X86_FEATURE_RDTSCP)))
4108 vmx->rdtscp_enabled = true;
4109 else {
4110 exec_control &= ~SECONDARY_EXEC_RDTSCP;
4111 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
4112 exec_control);
4113 }
4114 }
4115 }
3922} 4116}
3923 4117
3924static struct kvm_x86_ops vmx_x86_ops = { 4118static struct kvm_x86_ops vmx_x86_ops = {
@@ -3947,6 +4141,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3947 .set_segment = vmx_set_segment, 4141 .set_segment = vmx_set_segment,
3948 .get_cpl = vmx_get_cpl, 4142 .get_cpl = vmx_get_cpl,
3949 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 4143 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
4144 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
3950 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 4145 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
3951 .set_cr0 = vmx_set_cr0, 4146 .set_cr0 = vmx_set_cr0,
3952 .set_cr3 = vmx_set_cr3, 4147 .set_cr3 = vmx_set_cr3,
@@ -3959,6 +4154,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
3959 .cache_reg = vmx_cache_reg, 4154 .cache_reg = vmx_cache_reg,
3960 .get_rflags = vmx_get_rflags, 4155 .get_rflags = vmx_get_rflags,
3961 .set_rflags = vmx_set_rflags, 4156 .set_rflags = vmx_set_rflags,
4157 .fpu_activate = vmx_fpu_activate,
4158 .fpu_deactivate = vmx_fpu_deactivate,
3962 4159
3963 .tlb_flush = vmx_flush_tlb, 4160 .tlb_flush = vmx_flush_tlb,
3964 4161
@@ -3973,6 +4170,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
3973 .queue_exception = vmx_queue_exception, 4170 .queue_exception = vmx_queue_exception,
3974 .interrupt_allowed = vmx_interrupt_allowed, 4171 .interrupt_allowed = vmx_interrupt_allowed,
3975 .nmi_allowed = vmx_nmi_allowed, 4172 .nmi_allowed = vmx_nmi_allowed,
4173 .get_nmi_mask = vmx_get_nmi_mask,
4174 .set_nmi_mask = vmx_set_nmi_mask,
3976 .enable_nmi_window = enable_nmi_window, 4175 .enable_nmi_window = enable_nmi_window,
3977 .enable_irq_window = enable_irq_window, 4176 .enable_irq_window = enable_irq_window,
3978 .update_cr8_intercept = update_cr8_intercept, 4177 .update_cr8_intercept = update_cr8_intercept,
@@ -3982,12 +4181,21 @@ static struct kvm_x86_ops vmx_x86_ops = {
3982 .get_mt_mask = vmx_get_mt_mask, 4181 .get_mt_mask = vmx_get_mt_mask,
3983 4182
3984 .exit_reasons_str = vmx_exit_reasons_str, 4183 .exit_reasons_str = vmx_exit_reasons_str,
3985 .gb_page_enable = vmx_gb_page_enable, 4184 .get_lpage_level = vmx_get_lpage_level,
4185
4186 .cpuid_update = vmx_cpuid_update,
4187
4188 .rdtscp_supported = vmx_rdtscp_supported,
3986}; 4189};
3987 4190
3988static int __init vmx_init(void) 4191static int __init vmx_init(void)
3989{ 4192{
3990 int r; 4193 int r, i;
4194
4195 rdmsrl_safe(MSR_EFER, &host_efer);
4196
4197 for (i = 0; i < NR_VMX_MSR; ++i)
4198 kvm_define_shared_msr(i, vmx_msr_index[i]);
3991 4199
3992 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); 4200 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
3993 if (!vmx_io_bitmap_a) 4201 if (!vmx_io_bitmap_a)
@@ -4049,8 +4257,6 @@ static int __init vmx_init(void)
4049 if (bypass_guest_pf) 4257 if (bypass_guest_pf)
4050 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 4258 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
4051 4259
4052 ept_sync_global();
4053
4054 return 0; 4260 return 0;
4055 4261
4056out3: 4262out3: