aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c608
1 files changed, 354 insertions, 254 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c68351b..feb852b04598 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
99static bool __read_mostly nested = 0; 99static bool __read_mostly nested = 0;
100module_param(nested, bool, S_IRUGO); 100module_param(nested, bool, S_IRUGO);
101 101
102static u64 __read_mostly host_xss;
103
102#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) 104#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
103#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) 105#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
104#define KVM_VM_CR0_ALWAYS_ON \ 106#define KVM_VM_CR0_ALWAYS_ON \
105 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 107 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
106#define KVM_CR4_GUEST_OWNED_BITS \ 108#define KVM_CR4_GUEST_OWNED_BITS \
107 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ 109 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
108 | X86_CR4_OSXMMEXCPT) 110 | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
109 111
110#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 112#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
111#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 113#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -214,6 +216,7 @@ struct __packed vmcs12 {
214 u64 virtual_apic_page_addr; 216 u64 virtual_apic_page_addr;
215 u64 apic_access_addr; 217 u64 apic_access_addr;
216 u64 ept_pointer; 218 u64 ept_pointer;
219 u64 xss_exit_bitmap;
217 u64 guest_physical_address; 220 u64 guest_physical_address;
218 u64 vmcs_link_pointer; 221 u64 vmcs_link_pointer;
219 u64 guest_ia32_debugctl; 222 u64 guest_ia32_debugctl;
@@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
616 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), 619 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
617 FIELD64(APIC_ACCESS_ADDR, apic_access_addr), 620 FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
618 FIELD64(EPT_POINTER, ept_pointer), 621 FIELD64(EPT_POINTER, ept_pointer),
622 FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
619 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), 623 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
620 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), 624 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
621 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), 625 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
@@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = {
720 FIELD(HOST_RSP, host_rsp), 724 FIELD(HOST_RSP, host_rsp),
721 FIELD(HOST_RIP, host_rip), 725 FIELD(HOST_RIP, host_rip),
722}; 726};
723static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
724 727
725static inline short vmcs_field_to_offset(unsigned long field) 728static inline short vmcs_field_to_offset(unsigned long field)
726{ 729{
727 if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0) 730 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
728 return -1; 731
732 if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
733 vmcs_field_to_offset_table[field] == 0)
734 return -ENOENT;
735
729 return vmcs_field_to_offset_table[field]; 736 return vmcs_field_to_offset_table[field];
730} 737}
731 738
@@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa);
758static void kvm_cpu_vmxon(u64 addr); 765static void kvm_cpu_vmxon(u64 addr);
759static void kvm_cpu_vmxoff(void); 766static void kvm_cpu_vmxoff(void);
760static bool vmx_mpx_supported(void); 767static bool vmx_mpx_supported(void);
768static bool vmx_xsaves_supported(void);
761static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); 769static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
762static void vmx_set_segment(struct kvm_vcpu *vcpu, 770static void vmx_set_segment(struct kvm_vcpu *vcpu,
763 struct kvm_segment *var, int seg); 771 struct kvm_segment *var, int seg);
@@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
1098 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); 1106 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
1099} 1107}
1100 1108
1109static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
1110{
1111 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
1112 vmx_xsaves_supported();
1113}
1114
1101static inline bool is_exception(u32 intr_info) 1115static inline bool is_exception(u32 intr_info)
1102{ 1116{
1103 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) 1117 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
1659 vmx->guest_msrs[efer_offset].mask = ~ignore_bits; 1673 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
1660 1674
1661 clear_atomic_switch_msr(vmx, MSR_EFER); 1675 clear_atomic_switch_msr(vmx, MSR_EFER);
1662 /* On ept, can't emulate nx, and must switch nx atomically */ 1676
1663 if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { 1677 /*
1678 * On EPT, we can't emulate NX, so we must switch EFER atomically.
1679 * On CPUs that support "load IA32_EFER", always switch EFER
1680 * atomically, since it's faster than switching it manually.
1681 */
1682 if (cpu_has_load_ia32_efer ||
1683 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
1664 guest_efer = vmx->vcpu.arch.efer; 1684 guest_efer = vmx->vcpu.arch.efer;
1665 if (!(guest_efer & EFER_LMA)) 1685 if (!(guest_efer & EFER_LMA))
1666 guest_efer &= ~EFER_LME; 1686 guest_efer &= ~EFER_LME;
1667 add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); 1687 if (guest_efer != host_efer)
1688 add_atomic_switch_msr(vmx, MSR_EFER,
1689 guest_efer, host_efer);
1668 return false; 1690 return false;
1669 } 1691 }
1670 1692
@@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2377 nested_vmx_secondary_ctls_low = 0; 2399 nested_vmx_secondary_ctls_low = 0;
2378 nested_vmx_secondary_ctls_high &= 2400 nested_vmx_secondary_ctls_high &=
2379 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2401 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2380 SECONDARY_EXEC_UNRESTRICTED_GUEST | 2402 SECONDARY_EXEC_WBINVD_EXITING |
2381 SECONDARY_EXEC_WBINVD_EXITING; 2403 SECONDARY_EXEC_XSAVES;
2382 2404
2383 if (enable_ept) { 2405 if (enable_ept) {
2384 /* nested EPT: emulate EPT also to L1 */ 2406 /* nested EPT: emulate EPT also to L1 */
2385 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; 2407 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
2408 SECONDARY_EXEC_UNRESTRICTED_GUEST;
2386 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2409 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
2387 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | 2410 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
2388 VMX_EPT_INVEPT_BIT; 2411 VMX_EPT_INVEPT_BIT;
@@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2558 if (!nested_vmx_allowed(vcpu)) 2581 if (!nested_vmx_allowed(vcpu))
2559 return 1; 2582 return 1;
2560 return vmx_get_vmx_msr(vcpu, msr_index, pdata); 2583 return vmx_get_vmx_msr(vcpu, msr_index, pdata);
2584 case MSR_IA32_XSS:
2585 if (!vmx_xsaves_supported())
2586 return 1;
2587 data = vcpu->arch.ia32_xss;
2588 break;
2561 case MSR_TSC_AUX: 2589 case MSR_TSC_AUX:
2562 if (!to_vmx(vcpu)->rdtscp_enabled) 2590 if (!to_vmx(vcpu)->rdtscp_enabled)
2563 return 1; 2591 return 1;
@@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2649 break; 2677 break;
2650 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: 2678 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2651 return 1; /* they are read-only */ 2679 return 1; /* they are read-only */
2680 case MSR_IA32_XSS:
2681 if (!vmx_xsaves_supported())
2682 return 1;
2683 /*
2684 * The only supported bit as of Skylake is bit 8, but
2685 * it is not supported on KVM.
2686 */
2687 if (data != 0)
2688 return 1;
2689 vcpu->arch.ia32_xss = data;
2690 if (vcpu->arch.ia32_xss != host_xss)
2691 add_atomic_switch_msr(vmx, MSR_IA32_XSS,
2692 vcpu->arch.ia32_xss, host_xss);
2693 else
2694 clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
2695 break;
2652 case MSR_TSC_AUX: 2696 case MSR_TSC_AUX:
2653 if (!vmx->rdtscp_enabled) 2697 if (!vmx->rdtscp_enabled)
2654 return 1; 2698 return 1;
@@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2884 SECONDARY_EXEC_ENABLE_INVPCID | 2928 SECONDARY_EXEC_ENABLE_INVPCID |
2885 SECONDARY_EXEC_APIC_REGISTER_VIRT | 2929 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2886 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 2930 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2887 SECONDARY_EXEC_SHADOW_VMCS; 2931 SECONDARY_EXEC_SHADOW_VMCS |
2932 SECONDARY_EXEC_XSAVES;
2888 if (adjust_vmx_controls(min2, opt2, 2933 if (adjust_vmx_controls(min2, opt2,
2889 MSR_IA32_VMX_PROCBASED_CTLS2, 2934 MSR_IA32_VMX_PROCBASED_CTLS2,
2890 &_cpu_based_2nd_exec_control) < 0) 2935 &_cpu_based_2nd_exec_control) < 0)
@@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
3007 } 3052 }
3008 } 3053 }
3009 3054
3055 if (cpu_has_xsaves)
3056 rdmsrl(MSR_IA32_XSS, host_xss);
3057
3010 return 0; 3058 return 0;
3011} 3059}
3012 3060
@@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void)
3110 return 0; 3158 return 0;
3111} 3159}
3112 3160
3113static __init int hardware_setup(void)
3114{
3115 if (setup_vmcs_config(&vmcs_config) < 0)
3116 return -EIO;
3117
3118 if (boot_cpu_has(X86_FEATURE_NX))
3119 kvm_enable_efer_bits(EFER_NX);
3120
3121 if (!cpu_has_vmx_vpid())
3122 enable_vpid = 0;
3123 if (!cpu_has_vmx_shadow_vmcs())
3124 enable_shadow_vmcs = 0;
3125 if (enable_shadow_vmcs)
3126 init_vmcs_shadow_fields();
3127
3128 if (!cpu_has_vmx_ept() ||
3129 !cpu_has_vmx_ept_4levels()) {
3130 enable_ept = 0;
3131 enable_unrestricted_guest = 0;
3132 enable_ept_ad_bits = 0;
3133 }
3134
3135 if (!cpu_has_vmx_ept_ad_bits())
3136 enable_ept_ad_bits = 0;
3137
3138 if (!cpu_has_vmx_unrestricted_guest())
3139 enable_unrestricted_guest = 0;
3140
3141 if (!cpu_has_vmx_flexpriority()) {
3142 flexpriority_enabled = 0;
3143
3144 /*
3145 * set_apic_access_page_addr() is used to reload apic access
3146 * page upon invalidation. No need to do anything if the
3147 * processor does not have the APIC_ACCESS_ADDR VMCS field.
3148 */
3149 kvm_x86_ops->set_apic_access_page_addr = NULL;
3150 }
3151
3152 if (!cpu_has_vmx_tpr_shadow())
3153 kvm_x86_ops->update_cr8_intercept = NULL;
3154
3155 if (enable_ept && !cpu_has_vmx_ept_2m_page())
3156 kvm_disable_largepages();
3157
3158 if (!cpu_has_vmx_ple())
3159 ple_gap = 0;
3160
3161 if (!cpu_has_vmx_apicv())
3162 enable_apicv = 0;
3163
3164 if (enable_apicv)
3165 kvm_x86_ops->update_cr8_intercept = NULL;
3166 else {
3167 kvm_x86_ops->hwapic_irr_update = NULL;
3168 kvm_x86_ops->deliver_posted_interrupt = NULL;
3169 kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
3170 }
3171
3172 if (nested)
3173 nested_vmx_setup_ctls_msrs();
3174
3175 return alloc_kvm_area();
3176}
3177
3178static __exit void hardware_unsetup(void)
3179{
3180 free_kvm_area();
3181}
3182
3183static bool emulation_required(struct kvm_vcpu *vcpu) 3161static bool emulation_required(struct kvm_vcpu *vcpu)
3184{ 3162{
3185 return emulate_invalid_guest_state && !guest_state_valid(vcpu); 3163 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void)
4396 kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); 4374 kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
4397} 4375}
4398 4376
4377#define VMX_XSS_EXIT_BITMAP 0
4399/* 4378/*
4400 * Sets up the vmcs for emulated real mode. 4379 * Sets up the vmcs for emulated real mode.
4401 */ 4380 */
@@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4505 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 4484 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
4506 set_cr4_guest_host_mask(vmx); 4485 set_cr4_guest_host_mask(vmx);
4507 4486
4487 if (vmx_xsaves_supported())
4488 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
4489
4508 return 0; 4490 return 0;
4509} 4491}
4510 4492
@@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
5163static int handle_dr(struct kvm_vcpu *vcpu) 5145static int handle_dr(struct kvm_vcpu *vcpu)
5164{ 5146{
5165 unsigned long exit_qualification; 5147 unsigned long exit_qualification;
5166 int dr, reg; 5148 int dr, dr7, reg;
5149
5150 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5151 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5152
5153 /* First, if DR does not exist, trigger UD */
5154 if (!kvm_require_dr(vcpu, dr))
5155 return 1;
5167 5156
5168 /* Do not handle if the CPL > 0, will trigger GP on re-entry */ 5157 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
5169 if (!kvm_require_cpl(vcpu, 0)) 5158 if (!kvm_require_cpl(vcpu, 0))
5170 return 1; 5159 return 1;
5171 dr = vmcs_readl(GUEST_DR7); 5160 dr7 = vmcs_readl(GUEST_DR7);
5172 if (dr & DR7_GD) { 5161 if (dr7 & DR7_GD) {
5173 /* 5162 /*
5174 * As the vm-exit takes precedence over the debug trap, we 5163 * As the vm-exit takes precedence over the debug trap, we
5175 * need to emulate the latter, either for the host or the 5164 * need to emulate the latter, either for the host or the
@@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5177 */ 5166 */
5178 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { 5167 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5179 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; 5168 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
5180 vcpu->run->debug.arch.dr7 = dr; 5169 vcpu->run->debug.arch.dr7 = dr7;
5181 vcpu->run->debug.arch.pc = 5170 vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
5182 vmcs_readl(GUEST_CS_BASE) +
5183 vmcs_readl(GUEST_RIP);
5184 vcpu->run->debug.arch.exception = DB_VECTOR; 5171 vcpu->run->debug.arch.exception = DB_VECTOR;
5185 vcpu->run->exit_reason = KVM_EXIT_DEBUG; 5172 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
5186 return 0; 5173 return 0;
5187 } else { 5174 } else {
5188 vcpu->arch.dr7 &= ~DR7_GD; 5175 vcpu->arch.dr6 &= ~15;
5189 vcpu->arch.dr6 |= DR6_BD | DR6_RTM; 5176 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
5190 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
5191 kvm_queue_exception(vcpu, DB_VECTOR); 5177 kvm_queue_exception(vcpu, DB_VECTOR);
5192 return 1; 5178 return 1;
5193 } 5179 }
@@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5209 return 1; 5195 return 1;
5210 } 5196 }
5211 5197
5212 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5213 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5214 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5198 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5215 if (exit_qualification & TYPE_MOV_FROM_DR) { 5199 if (exit_qualification & TYPE_MOV_FROM_DR) {
5216 unsigned long val; 5200 unsigned long val;
@@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
5391 return 1; 5375 return 1;
5392} 5376}
5393 5377
5378static int handle_xsaves(struct kvm_vcpu *vcpu)
5379{
5380 skip_emulated_instruction(vcpu);
5381 WARN(1, "this should never happen\n");
5382 return 1;
5383}
5384
5385static int handle_xrstors(struct kvm_vcpu *vcpu)
5386{
5387 skip_emulated_instruction(vcpu);
5388 WARN(1, "this should never happen\n");
5389 return 1;
5390}
5391
5394static int handle_apic_access(struct kvm_vcpu *vcpu) 5392static int handle_apic_access(struct kvm_vcpu *vcpu)
5395{ 5393{
5396 if (likely(fasteoi)) { 5394 if (likely(fasteoi)) {
@@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
5492 } 5490 }
5493 5491
5494 /* clear all local breakpoint enable flags */ 5492 /* clear all local breakpoint enable flags */
5495 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55); 5493 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
5496 5494
5497 /* 5495 /*
5498 * TODO: What about debug traps on tss switch? 5496 * TODO: What about debug traps on tss switch?
@@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
5539 trace_kvm_page_fault(gpa, exit_qualification); 5537 trace_kvm_page_fault(gpa, exit_qualification);
5540 5538
5541 /* It is a write fault? */ 5539 /* It is a write fault? */
5542 error_code = exit_qualification & (1U << 1); 5540 error_code = exit_qualification & PFERR_WRITE_MASK;
5543 /* It is a fetch fault? */ 5541 /* It is a fetch fault? */
5544 error_code |= (exit_qualification & (1U << 2)) << 2; 5542 error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
5545 /* ept page table is present? */ 5543 /* ept page table is present? */
5546 error_code |= (exit_qualification >> 3) & 0x1; 5544 error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
5547 5545
5548 vcpu->arch.exit_qualification = exit_qualification; 5546 vcpu->arch.exit_qualification = exit_qualification;
5549 5547
@@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void)
5785 ple_window_grow, INT_MIN); 5783 ple_window_grow, INT_MIN);
5786} 5784}
5787 5785
5786static __init int hardware_setup(void)
5787{
5788 int r = -ENOMEM, i, msr;
5789
5790 rdmsrl_safe(MSR_EFER, &host_efer);
5791
5792 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
5793 kvm_define_shared_msr(i, vmx_msr_index[i]);
5794
5795 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
5796 if (!vmx_io_bitmap_a)
5797 return r;
5798
5799 vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
5800 if (!vmx_io_bitmap_b)
5801 goto out;
5802
5803 vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
5804 if (!vmx_msr_bitmap_legacy)
5805 goto out1;
5806
5807 vmx_msr_bitmap_legacy_x2apic =
5808 (unsigned long *)__get_free_page(GFP_KERNEL);
5809 if (!vmx_msr_bitmap_legacy_x2apic)
5810 goto out2;
5811
5812 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
5813 if (!vmx_msr_bitmap_longmode)
5814 goto out3;
5815
5816 vmx_msr_bitmap_longmode_x2apic =
5817 (unsigned long *)__get_free_page(GFP_KERNEL);
5818 if (!vmx_msr_bitmap_longmode_x2apic)
5819 goto out4;
5820 vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
5821 if (!vmx_vmread_bitmap)
5822 goto out5;
5823
5824 vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
5825 if (!vmx_vmwrite_bitmap)
5826 goto out6;
5827
5828 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
5829 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
5830
5831 /*
5832 * Allow direct access to the PC debug port (it is often used for I/O
5833 * delays, but the vmexits simply slow things down).
5834 */
5835 memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
5836 clear_bit(0x80, vmx_io_bitmap_a);
5837
5838 memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
5839
5840 memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
5841 memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
5842
5843 vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
5844 vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
5845 vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
5846 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
5847 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
5848 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
5849 vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
5850
5851 memcpy(vmx_msr_bitmap_legacy_x2apic,
5852 vmx_msr_bitmap_legacy, PAGE_SIZE);
5853 memcpy(vmx_msr_bitmap_longmode_x2apic,
5854 vmx_msr_bitmap_longmode, PAGE_SIZE);
5855
5856 if (enable_apicv) {
5857 for (msr = 0x800; msr <= 0x8ff; msr++)
5858 vmx_disable_intercept_msr_read_x2apic(msr);
5859
5860 /* According SDM, in x2apic mode, the whole id reg is used.
5861 * But in KVM, it only use the highest eight bits. Need to
5862 * intercept it */
5863 vmx_enable_intercept_msr_read_x2apic(0x802);
5864 /* TMCCT */
5865 vmx_enable_intercept_msr_read_x2apic(0x839);
5866 /* TPR */
5867 vmx_disable_intercept_msr_write_x2apic(0x808);
5868 /* EOI */
5869 vmx_disable_intercept_msr_write_x2apic(0x80b);
5870 /* SELF-IPI */
5871 vmx_disable_intercept_msr_write_x2apic(0x83f);
5872 }
5873
5874 if (enable_ept) {
5875 kvm_mmu_set_mask_ptes(0ull,
5876 (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
5877 (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
5878 0ull, VMX_EPT_EXECUTABLE_MASK);
5879 ept_set_mmio_spte_mask();
5880 kvm_enable_tdp();
5881 } else
5882 kvm_disable_tdp();
5883
5884 update_ple_window_actual_max();
5885
5886 if (setup_vmcs_config(&vmcs_config) < 0) {
5887 r = -EIO;
5888 goto out7;
5889 }
5890
5891 if (boot_cpu_has(X86_FEATURE_NX))
5892 kvm_enable_efer_bits(EFER_NX);
5893
5894 if (!cpu_has_vmx_vpid())
5895 enable_vpid = 0;
5896 if (!cpu_has_vmx_shadow_vmcs())
5897 enable_shadow_vmcs = 0;
5898 if (enable_shadow_vmcs)
5899 init_vmcs_shadow_fields();
5900
5901 if (!cpu_has_vmx_ept() ||
5902 !cpu_has_vmx_ept_4levels()) {
5903 enable_ept = 0;
5904 enable_unrestricted_guest = 0;
5905 enable_ept_ad_bits = 0;
5906 }
5907
5908 if (!cpu_has_vmx_ept_ad_bits())
5909 enable_ept_ad_bits = 0;
5910
5911 if (!cpu_has_vmx_unrestricted_guest())
5912 enable_unrestricted_guest = 0;
5913
5914 if (!cpu_has_vmx_flexpriority()) {
5915 flexpriority_enabled = 0;
5916
5917 /*
5918 * set_apic_access_page_addr() is used to reload apic access
5919 * page upon invalidation. No need to do anything if the
5920 * processor does not have the APIC_ACCESS_ADDR VMCS field.
5921 */
5922 kvm_x86_ops->set_apic_access_page_addr = NULL;
5923 }
5924
5925 if (!cpu_has_vmx_tpr_shadow())
5926 kvm_x86_ops->update_cr8_intercept = NULL;
5927
5928 if (enable_ept && !cpu_has_vmx_ept_2m_page())
5929 kvm_disable_largepages();
5930
5931 if (!cpu_has_vmx_ple())
5932 ple_gap = 0;
5933
5934 if (!cpu_has_vmx_apicv())
5935 enable_apicv = 0;
5936
5937 if (enable_apicv)
5938 kvm_x86_ops->update_cr8_intercept = NULL;
5939 else {
5940 kvm_x86_ops->hwapic_irr_update = NULL;
5941 kvm_x86_ops->deliver_posted_interrupt = NULL;
5942 kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
5943 }
5944
5945 if (nested)
5946 nested_vmx_setup_ctls_msrs();
5947
5948 return alloc_kvm_area();
5949
5950out7:
5951 free_page((unsigned long)vmx_vmwrite_bitmap);
5952out6:
5953 free_page((unsigned long)vmx_vmread_bitmap);
5954out5:
5955 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
5956out4:
5957 free_page((unsigned long)vmx_msr_bitmap_longmode);
5958out3:
5959 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
5960out2:
5961 free_page((unsigned long)vmx_msr_bitmap_legacy);
5962out1:
5963 free_page((unsigned long)vmx_io_bitmap_b);
5964out:
5965 free_page((unsigned long)vmx_io_bitmap_a);
5966
5967 return r;
5968}
5969
5970static __exit void hardware_unsetup(void)
5971{
5972 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
5973 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
5974 free_page((unsigned long)vmx_msr_bitmap_legacy);
5975 free_page((unsigned long)vmx_msr_bitmap_longmode);
5976 free_page((unsigned long)vmx_io_bitmap_b);
5977 free_page((unsigned long)vmx_io_bitmap_a);
5978 free_page((unsigned long)vmx_vmwrite_bitmap);
5979 free_page((unsigned long)vmx_vmread_bitmap);
5980
5981 free_kvm_area();
5982}
5983
5788/* 5984/*
5789 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE 5985 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
5790 * exiting, so only get here on cpu with PAUSE-Loop-Exiting. 5986 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field)
6361 * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of 6557 * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
6362 * 64-bit fields are to be returned). 6558 * 64-bit fields are to be returned).
6363 */ 6559 */
6364static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, 6560static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
6365 unsigned long field, u64 *ret) 6561 unsigned long field, u64 *ret)
6366{ 6562{
6367 short offset = vmcs_field_to_offset(field); 6563 short offset = vmcs_field_to_offset(field);
6368 char *p; 6564 char *p;
6369 6565
6370 if (offset < 0) 6566 if (offset < 0)
6371 return 0; 6567 return offset;
6372 6568
6373 p = ((char *)(get_vmcs12(vcpu))) + offset; 6569 p = ((char *)(get_vmcs12(vcpu))) + offset;
6374 6570
6375 switch (vmcs_field_type(field)) { 6571 switch (vmcs_field_type(field)) {
6376 case VMCS_FIELD_TYPE_NATURAL_WIDTH: 6572 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
6377 *ret = *((natural_width *)p); 6573 *ret = *((natural_width *)p);
6378 return 1; 6574 return 0;
6379 case VMCS_FIELD_TYPE_U16: 6575 case VMCS_FIELD_TYPE_U16:
6380 *ret = *((u16 *)p); 6576 *ret = *((u16 *)p);
6381 return 1; 6577 return 0;
6382 case VMCS_FIELD_TYPE_U32: 6578 case VMCS_FIELD_TYPE_U32:
6383 *ret = *((u32 *)p); 6579 *ret = *((u32 *)p);
6384 return 1; 6580 return 0;
6385 case VMCS_FIELD_TYPE_U64: 6581 case VMCS_FIELD_TYPE_U64:
6386 *ret = *((u64 *)p); 6582 *ret = *((u64 *)p);
6387 return 1; 6583 return 0;
6388 default: 6584 default:
6389 return 0; /* can never happen. */ 6585 WARN_ON(1);
6586 return -ENOENT;
6390 } 6587 }
6391} 6588}
6392 6589
6393 6590
6394static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, 6591static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
6395 unsigned long field, u64 field_value){ 6592 unsigned long field, u64 field_value){
6396 short offset = vmcs_field_to_offset(field); 6593 short offset = vmcs_field_to_offset(field);
6397 char *p = ((char *) get_vmcs12(vcpu)) + offset; 6594 char *p = ((char *) get_vmcs12(vcpu)) + offset;
6398 if (offset < 0) 6595 if (offset < 0)
6399 return false; 6596 return offset;
6400 6597
6401 switch (vmcs_field_type(field)) { 6598 switch (vmcs_field_type(field)) {
6402 case VMCS_FIELD_TYPE_U16: 6599 case VMCS_FIELD_TYPE_U16:
6403 *(u16 *)p = field_value; 6600 *(u16 *)p = field_value;
6404 return true; 6601 return 0;
6405 case VMCS_FIELD_TYPE_U32: 6602 case VMCS_FIELD_TYPE_U32:
6406 *(u32 *)p = field_value; 6603 *(u32 *)p = field_value;
6407 return true; 6604 return 0;
6408 case VMCS_FIELD_TYPE_U64: 6605 case VMCS_FIELD_TYPE_U64:
6409 *(u64 *)p = field_value; 6606 *(u64 *)p = field_value;
6410 return true; 6607 return 0;
6411 case VMCS_FIELD_TYPE_NATURAL_WIDTH: 6608 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
6412 *(natural_width *)p = field_value; 6609 *(natural_width *)p = field_value;
6413 return true; 6610 return 0;
6414 default: 6611 default:
6415 return false; /* can never happen. */ 6612 WARN_ON(1);
6613 return -ENOENT;
6416 } 6614 }
6417 6615
6418} 6616}
@@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
6445 case VMCS_FIELD_TYPE_NATURAL_WIDTH: 6643 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
6446 field_value = vmcs_readl(field); 6644 field_value = vmcs_readl(field);
6447 break; 6645 break;
6646 default:
6647 WARN_ON(1);
6648 continue;
6448 } 6649 }
6449 vmcs12_write_any(&vmx->vcpu, field, field_value); 6650 vmcs12_write_any(&vmx->vcpu, field, field_value);
6450 } 6651 }
@@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
6490 case VMCS_FIELD_TYPE_NATURAL_WIDTH: 6691 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
6491 vmcs_writel(field, (long)field_value); 6692 vmcs_writel(field, (long)field_value);
6492 break; 6693 break;
6694 default:
6695 WARN_ON(1);
6696 break;
6493 } 6697 }
6494 } 6698 }
6495 } 6699 }
@@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
6528 /* Decode instruction info and find the field to read */ 6732 /* Decode instruction info and find the field to read */
6529 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); 6733 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
6530 /* Read the field, zero-extended to a u64 field_value */ 6734 /* Read the field, zero-extended to a u64 field_value */
6531 if (!vmcs12_read_any(vcpu, field, &field_value)) { 6735 if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
6532 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 6736 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
6533 skip_emulated_instruction(vcpu); 6737 skip_emulated_instruction(vcpu);
6534 return 1; 6738 return 1;
@@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
6598 return 1; 6802 return 1;
6599 } 6803 }
6600 6804
6601 if (!vmcs12_write_any(vcpu, field, field_value)) { 6805 if (vmcs12_write_any(vcpu, field, field_value) < 0) {
6602 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 6806 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
6603 skip_emulated_instruction(vcpu); 6807 skip_emulated_instruction(vcpu);
6604 return 1; 6808 return 1;
@@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6802 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 7006 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6803 [EXIT_REASON_INVEPT] = handle_invept, 7007 [EXIT_REASON_INVEPT] = handle_invept,
6804 [EXIT_REASON_INVVPID] = handle_invvpid, 7008 [EXIT_REASON_INVVPID] = handle_invvpid,
7009 [EXIT_REASON_XSAVES] = handle_xsaves,
7010 [EXIT_REASON_XRSTORS] = handle_xrstors,
6805}; 7011};
6806 7012
6807static const int kvm_vmx_max_exit_handlers = 7013static const int kvm_vmx_max_exit_handlers =
@@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7089 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); 7295 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
7090 case EXIT_REASON_XSETBV: 7296 case EXIT_REASON_XSETBV:
7091 return 1; 7297 return 1;
7298 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
7299 /*
7300 * This should never happen, since it is not possible to
7301 * set XSS to a non-zero value---neither in L1 nor in L2.
7302 * If if it were, XSS would have to be checked against
7303 * the XSS exit bitmap in vmcs12.
7304 */
7305 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
7092 default: 7306 default:
7093 return 1; 7307 return 1;
7094 } 7308 }
@@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector)
7277 u16 status; 7491 u16 status;
7278 u8 old; 7492 u8 old;
7279 7493
7494 if (vector == -1)
7495 vector = 0;
7496
7280 status = vmcs_read16(GUEST_INTR_STATUS); 7497 status = vmcs_read16(GUEST_INTR_STATUS);
7281 old = (u8)status & 0xff; 7498 old = (u8)status & 0xff;
7282 if ((u8)vector != old) { 7499 if ((u8)vector != old) {
@@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector)
7288 7505
7289static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) 7506static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
7290{ 7507{
7508 if (!is_guest_mode(vcpu)) {
7509 vmx_set_rvi(max_irr);
7510 return;
7511 }
7512
7291 if (max_irr == -1) 7513 if (max_irr == -1)
7292 return; 7514 return;
7293 7515
7294 /* 7516 /*
7295 * If a vmexit is needed, vmx_check_nested_events handles it. 7517 * In guest mode. If a vmexit is needed, vmx_check_nested_events
7518 * handles it.
7296 */ 7519 */
7297 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) 7520 if (nested_exit_on_intr(vcpu))
7298 return; 7521 return;
7299 7522
7300 if (!is_guest_mode(vcpu)) {
7301 vmx_set_rvi(max_irr);
7302 return;
7303 }
7304
7305 /* 7523 /*
7306 * Fall back to pre-APICv interrupt injection since L2 7524 * Else, fall back to pre-APICv interrupt injection since L2
7307 * is run without virtual interrupt delivery. 7525 * is run without virtual interrupt delivery.
7308 */ 7526 */
7309 if (!kvm_event_needs_reinjection(vcpu) && 7527 if (!kvm_event_needs_reinjection(vcpu) &&
@@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void)
7400 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); 7618 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
7401} 7619}
7402 7620
7621static bool vmx_xsaves_supported(void)
7622{
7623 return vmcs_config.cpu_based_2nd_exec_ctrl &
7624 SECONDARY_EXEC_XSAVES;
7625}
7626
7403static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 7627static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
7404{ 7628{
7405 u32 exit_intr_info; 7629 u32 exit_intr_info;
@@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8135 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); 8359 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
8136 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); 8360 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
8137 8361
8362 if (nested_cpu_has_xsaves(vmcs12))
8363 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
8138 vmcs_write64(VMCS_LINK_POINTER, -1ull); 8364 vmcs_write64(VMCS_LINK_POINTER, -1ull);
8139 8365
8140 exec_control = vmcs12->pin_based_vm_exec_control; 8366 exec_control = vmcs12->pin_based_vm_exec_control;
@@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8775 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); 9001 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
8776 if (vmx_mpx_supported()) 9002 if (vmx_mpx_supported())
8777 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); 9003 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
9004 if (nested_cpu_has_xsaves(vmcs12))
9005 vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
8778 9006
8779 /* update exit information fields: */ 9007 /* update exit information fields: */
8780 9008
@@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
9176 .check_intercept = vmx_check_intercept, 9404 .check_intercept = vmx_check_intercept,
9177 .handle_external_intr = vmx_handle_external_intr, 9405 .handle_external_intr = vmx_handle_external_intr,
9178 .mpx_supported = vmx_mpx_supported, 9406 .mpx_supported = vmx_mpx_supported,
9407 .xsaves_supported = vmx_xsaves_supported,
9179 9408
9180 .check_nested_events = vmx_check_nested_events, 9409 .check_nested_events = vmx_check_nested_events,
9181 9410
@@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = {
9184 9413
9185static int __init vmx_init(void) 9414static int __init vmx_init(void)
9186{ 9415{
9187 int r, i, msr; 9416 int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
9188 9417 __alignof__(struct vcpu_vmx), THIS_MODULE);
9189 rdmsrl_safe(MSR_EFER, &host_efer);
9190
9191 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
9192 kvm_define_shared_msr(i, vmx_msr_index[i]);
9193
9194 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
9195 if (!vmx_io_bitmap_a)
9196 return -ENOMEM;
9197
9198 r = -ENOMEM;
9199
9200 vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
9201 if (!vmx_io_bitmap_b)
9202 goto out;
9203
9204 vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
9205 if (!vmx_msr_bitmap_legacy)
9206 goto out1;
9207
9208 vmx_msr_bitmap_legacy_x2apic =
9209 (unsigned long *)__get_free_page(GFP_KERNEL);
9210 if (!vmx_msr_bitmap_legacy_x2apic)
9211 goto out2;
9212
9213 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
9214 if (!vmx_msr_bitmap_longmode)
9215 goto out3;
9216
9217 vmx_msr_bitmap_longmode_x2apic =
9218 (unsigned long *)__get_free_page(GFP_KERNEL);
9219 if (!vmx_msr_bitmap_longmode_x2apic)
9220 goto out4;
9221 vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
9222 if (!vmx_vmread_bitmap)
9223 goto out5;
9224
9225 vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
9226 if (!vmx_vmwrite_bitmap)
9227 goto out6;
9228
9229 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
9230 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
9231
9232 /*
9233 * Allow direct access to the PC debug port (it is often used for I/O
9234 * delays, but the vmexits simply slow things down).
9235 */
9236 memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
9237 clear_bit(0x80, vmx_io_bitmap_a);
9238
9239 memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
9240
9241 memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
9242 memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
9243
9244 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
9245
9246 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
9247 __alignof__(struct vcpu_vmx), THIS_MODULE);
9248 if (r) 9418 if (r)
9249 goto out7; 9419 return r;
9250 9420
9251#ifdef CONFIG_KEXEC 9421#ifdef CONFIG_KEXEC
9252 rcu_assign_pointer(crash_vmclear_loaded_vmcss, 9422 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
9253 crash_vmclear_local_loaded_vmcss); 9423 crash_vmclear_local_loaded_vmcss);
9254#endif 9424#endif
9255 9425
9256 vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
9257 vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
9258 vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
9259 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
9260 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
9261 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
9262 vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
9263
9264 memcpy(vmx_msr_bitmap_legacy_x2apic,
9265 vmx_msr_bitmap_legacy, PAGE_SIZE);
9266 memcpy(vmx_msr_bitmap_longmode_x2apic,
9267 vmx_msr_bitmap_longmode, PAGE_SIZE);
9268
9269 if (enable_apicv) {
9270 for (msr = 0x800; msr <= 0x8ff; msr++)
9271 vmx_disable_intercept_msr_read_x2apic(msr);
9272
9273 /* According SDM, in x2apic mode, the whole id reg is used.
9274 * But in KVM, it only use the highest eight bits. Need to
9275 * intercept it */
9276 vmx_enable_intercept_msr_read_x2apic(0x802);
9277 /* TMCCT */
9278 vmx_enable_intercept_msr_read_x2apic(0x839);
9279 /* TPR */
9280 vmx_disable_intercept_msr_write_x2apic(0x808);
9281 /* EOI */
9282 vmx_disable_intercept_msr_write_x2apic(0x80b);
9283 /* SELF-IPI */
9284 vmx_disable_intercept_msr_write_x2apic(0x83f);
9285 }
9286
9287 if (enable_ept) {
9288 kvm_mmu_set_mask_ptes(0ull,
9289 (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
9290 (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
9291 0ull, VMX_EPT_EXECUTABLE_MASK);
9292 ept_set_mmio_spte_mask();
9293 kvm_enable_tdp();
9294 } else
9295 kvm_disable_tdp();
9296
9297 update_ple_window_actual_max();
9298
9299 return 0; 9426 return 0;
9300
9301out7:
9302 free_page((unsigned long)vmx_vmwrite_bitmap);
9303out6:
9304 free_page((unsigned long)vmx_vmread_bitmap);
9305out5:
9306 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
9307out4:
9308 free_page((unsigned long)vmx_msr_bitmap_longmode);
9309out3:
9310 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
9311out2:
9312 free_page((unsigned long)vmx_msr_bitmap_legacy);
9313out1:
9314 free_page((unsigned long)vmx_io_bitmap_b);
9315out:
9316 free_page((unsigned long)vmx_io_bitmap_a);
9317 return r;
9318} 9427}
9319 9428
9320static void __exit vmx_exit(void) 9429static void __exit vmx_exit(void)
9321{ 9430{
9322 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
9323 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
9324 free_page((unsigned long)vmx_msr_bitmap_legacy);
9325 free_page((unsigned long)vmx_msr_bitmap_longmode);
9326 free_page((unsigned long)vmx_io_bitmap_b);
9327 free_page((unsigned long)vmx_io_bitmap_a);
9328 free_page((unsigned long)vmx_vmwrite_bitmap);
9329 free_page((unsigned long)vmx_vmread_bitmap);
9330
9331#ifdef CONFIG_KEXEC 9431#ifdef CONFIG_KEXEC
9332 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); 9432 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
9333 synchronize_rcu(); 9433 synchronize_rcu();