diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 608 |
1 files changed, 354 insertions, 254 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3e556c68351b..feb852b04598 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); | |||
99 | static bool __read_mostly nested = 0; | 99 | static bool __read_mostly nested = 0; |
100 | module_param(nested, bool, S_IRUGO); | 100 | module_param(nested, bool, S_IRUGO); |
101 | 101 | ||
102 | static u64 __read_mostly host_xss; | ||
103 | |||
102 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) | 104 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) |
103 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) | 105 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) |
104 | #define KVM_VM_CR0_ALWAYS_ON \ | 106 | #define KVM_VM_CR0_ALWAYS_ON \ |
105 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | 107 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) |
106 | #define KVM_CR4_GUEST_OWNED_BITS \ | 108 | #define KVM_CR4_GUEST_OWNED_BITS \ |
107 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 109 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
108 | | X86_CR4_OSXMMEXCPT) | 110 | | X86_CR4_OSXMMEXCPT | X86_CR4_TSD) |
109 | 111 | ||
110 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | 112 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) |
111 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | 113 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) |
@@ -214,6 +216,7 @@ struct __packed vmcs12 { | |||
214 | u64 virtual_apic_page_addr; | 216 | u64 virtual_apic_page_addr; |
215 | u64 apic_access_addr; | 217 | u64 apic_access_addr; |
216 | u64 ept_pointer; | 218 | u64 ept_pointer; |
219 | u64 xss_exit_bitmap; | ||
217 | u64 guest_physical_address; | 220 | u64 guest_physical_address; |
218 | u64 vmcs_link_pointer; | 221 | u64 vmcs_link_pointer; |
219 | u64 guest_ia32_debugctl; | 222 | u64 guest_ia32_debugctl; |
@@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
616 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), | 619 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), |
617 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), | 620 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), |
618 | FIELD64(EPT_POINTER, ept_pointer), | 621 | FIELD64(EPT_POINTER, ept_pointer), |
622 | FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), | ||
619 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), | 623 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), |
620 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), | 624 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), |
621 | FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), | 625 | FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), |
@@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
720 | FIELD(HOST_RSP, host_rsp), | 724 | FIELD(HOST_RSP, host_rsp), |
721 | FIELD(HOST_RIP, host_rip), | 725 | FIELD(HOST_RIP, host_rip), |
722 | }; | 726 | }; |
723 | static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table); | ||
724 | 727 | ||
725 | static inline short vmcs_field_to_offset(unsigned long field) | 728 | static inline short vmcs_field_to_offset(unsigned long field) |
726 | { | 729 | { |
727 | if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0) | 730 | BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); |
728 | return -1; | 731 | |
732 | if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || | ||
733 | vmcs_field_to_offset_table[field] == 0) | ||
734 | return -ENOENT; | ||
735 | |||
729 | return vmcs_field_to_offset_table[field]; | 736 | return vmcs_field_to_offset_table[field]; |
730 | } | 737 | } |
731 | 738 | ||
@@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa); | |||
758 | static void kvm_cpu_vmxon(u64 addr); | 765 | static void kvm_cpu_vmxon(u64 addr); |
759 | static void kvm_cpu_vmxoff(void); | 766 | static void kvm_cpu_vmxoff(void); |
760 | static bool vmx_mpx_supported(void); | 767 | static bool vmx_mpx_supported(void); |
768 | static bool vmx_xsaves_supported(void); | ||
761 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 769 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
762 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 770 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
763 | struct kvm_segment *var, int seg); | 771 | struct kvm_segment *var, int seg); |
@@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | |||
1098 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); | 1106 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); |
1099 | } | 1107 | } |
1100 | 1108 | ||
1109 | static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) | ||
1110 | { | ||
1111 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) && | ||
1112 | vmx_xsaves_supported(); | ||
1113 | } | ||
1114 | |||
1101 | static inline bool is_exception(u32 intr_info) | 1115 | static inline bool is_exception(u32 intr_info) |
1102 | { | 1116 | { |
1103 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 1117 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
@@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
1659 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; | 1673 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
1660 | 1674 | ||
1661 | clear_atomic_switch_msr(vmx, MSR_EFER); | 1675 | clear_atomic_switch_msr(vmx, MSR_EFER); |
1662 | /* On ept, can't emulate nx, and must switch nx atomically */ | 1676 | |
1663 | if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { | 1677 | /* |
1678 | * On EPT, we can't emulate NX, so we must switch EFER atomically. | ||
1679 | * On CPUs that support "load IA32_EFER", always switch EFER | ||
1680 | * atomically, since it's faster than switching it manually. | ||
1681 | */ | ||
1682 | if (cpu_has_load_ia32_efer || | ||
1683 | (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { | ||
1664 | guest_efer = vmx->vcpu.arch.efer; | 1684 | guest_efer = vmx->vcpu.arch.efer; |
1665 | if (!(guest_efer & EFER_LMA)) | 1685 | if (!(guest_efer & EFER_LMA)) |
1666 | guest_efer &= ~EFER_LME; | 1686 | guest_efer &= ~EFER_LME; |
1667 | add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); | 1687 | if (guest_efer != host_efer) |
1688 | add_atomic_switch_msr(vmx, MSR_EFER, | ||
1689 | guest_efer, host_efer); | ||
1668 | return false; | 1690 | return false; |
1669 | } | 1691 | } |
1670 | 1692 | ||
@@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2377 | nested_vmx_secondary_ctls_low = 0; | 2399 | nested_vmx_secondary_ctls_low = 0; |
2378 | nested_vmx_secondary_ctls_high &= | 2400 | nested_vmx_secondary_ctls_high &= |
2379 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2401 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2380 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 2402 | SECONDARY_EXEC_WBINVD_EXITING | |
2381 | SECONDARY_EXEC_WBINVD_EXITING; | 2403 | SECONDARY_EXEC_XSAVES; |
2382 | 2404 | ||
2383 | if (enable_ept) { | 2405 | if (enable_ept) { |
2384 | /* nested EPT: emulate EPT also to L1 */ | 2406 | /* nested EPT: emulate EPT also to L1 */ |
2385 | nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; | 2407 | nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT | |
2408 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | ||
2386 | nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | | 2409 | nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | |
2387 | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | | 2410 | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | |
2388 | VMX_EPT_INVEPT_BIT; | 2411 | VMX_EPT_INVEPT_BIT; |
@@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2558 | if (!nested_vmx_allowed(vcpu)) | 2581 | if (!nested_vmx_allowed(vcpu)) |
2559 | return 1; | 2582 | return 1; |
2560 | return vmx_get_vmx_msr(vcpu, msr_index, pdata); | 2583 | return vmx_get_vmx_msr(vcpu, msr_index, pdata); |
2584 | case MSR_IA32_XSS: | ||
2585 | if (!vmx_xsaves_supported()) | ||
2586 | return 1; | ||
2587 | data = vcpu->arch.ia32_xss; | ||
2588 | break; | ||
2561 | case MSR_TSC_AUX: | 2589 | case MSR_TSC_AUX: |
2562 | if (!to_vmx(vcpu)->rdtscp_enabled) | 2590 | if (!to_vmx(vcpu)->rdtscp_enabled) |
2563 | return 1; | 2591 | return 1; |
@@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2649 | break; | 2677 | break; |
2650 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | 2678 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: |
2651 | return 1; /* they are read-only */ | 2679 | return 1; /* they are read-only */ |
2680 | case MSR_IA32_XSS: | ||
2681 | if (!vmx_xsaves_supported()) | ||
2682 | return 1; | ||
2683 | /* | ||
2684 | * The only supported bit as of Skylake is bit 8, but | ||
2685 | * it is not supported on KVM. | ||
2686 | */ | ||
2687 | if (data != 0) | ||
2688 | return 1; | ||
2689 | vcpu->arch.ia32_xss = data; | ||
2690 | if (vcpu->arch.ia32_xss != host_xss) | ||
2691 | add_atomic_switch_msr(vmx, MSR_IA32_XSS, | ||
2692 | vcpu->arch.ia32_xss, host_xss); | ||
2693 | else | ||
2694 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); | ||
2695 | break; | ||
2652 | case MSR_TSC_AUX: | 2696 | case MSR_TSC_AUX: |
2653 | if (!vmx->rdtscp_enabled) | 2697 | if (!vmx->rdtscp_enabled) |
2654 | return 1; | 2698 | return 1; |
@@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2884 | SECONDARY_EXEC_ENABLE_INVPCID | | 2928 | SECONDARY_EXEC_ENABLE_INVPCID | |
2885 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2929 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2886 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 2930 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
2887 | SECONDARY_EXEC_SHADOW_VMCS; | 2931 | SECONDARY_EXEC_SHADOW_VMCS | |
2932 | SECONDARY_EXEC_XSAVES; | ||
2888 | if (adjust_vmx_controls(min2, opt2, | 2933 | if (adjust_vmx_controls(min2, opt2, |
2889 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2934 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2890 | &_cpu_based_2nd_exec_control) < 0) | 2935 | &_cpu_based_2nd_exec_control) < 0) |
@@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
3007 | } | 3052 | } |
3008 | } | 3053 | } |
3009 | 3054 | ||
3055 | if (cpu_has_xsaves) | ||
3056 | rdmsrl(MSR_IA32_XSS, host_xss); | ||
3057 | |||
3010 | return 0; | 3058 | return 0; |
3011 | } | 3059 | } |
3012 | 3060 | ||
@@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void) | |||
3110 | return 0; | 3158 | return 0; |
3111 | } | 3159 | } |
3112 | 3160 | ||
3113 | static __init int hardware_setup(void) | ||
3114 | { | ||
3115 | if (setup_vmcs_config(&vmcs_config) < 0) | ||
3116 | return -EIO; | ||
3117 | |||
3118 | if (boot_cpu_has(X86_FEATURE_NX)) | ||
3119 | kvm_enable_efer_bits(EFER_NX); | ||
3120 | |||
3121 | if (!cpu_has_vmx_vpid()) | ||
3122 | enable_vpid = 0; | ||
3123 | if (!cpu_has_vmx_shadow_vmcs()) | ||
3124 | enable_shadow_vmcs = 0; | ||
3125 | if (enable_shadow_vmcs) | ||
3126 | init_vmcs_shadow_fields(); | ||
3127 | |||
3128 | if (!cpu_has_vmx_ept() || | ||
3129 | !cpu_has_vmx_ept_4levels()) { | ||
3130 | enable_ept = 0; | ||
3131 | enable_unrestricted_guest = 0; | ||
3132 | enable_ept_ad_bits = 0; | ||
3133 | } | ||
3134 | |||
3135 | if (!cpu_has_vmx_ept_ad_bits()) | ||
3136 | enable_ept_ad_bits = 0; | ||
3137 | |||
3138 | if (!cpu_has_vmx_unrestricted_guest()) | ||
3139 | enable_unrestricted_guest = 0; | ||
3140 | |||
3141 | if (!cpu_has_vmx_flexpriority()) { | ||
3142 | flexpriority_enabled = 0; | ||
3143 | |||
3144 | /* | ||
3145 | * set_apic_access_page_addr() is used to reload apic access | ||
3146 | * page upon invalidation. No need to do anything if the | ||
3147 | * processor does not have the APIC_ACCESS_ADDR VMCS field. | ||
3148 | */ | ||
3149 | kvm_x86_ops->set_apic_access_page_addr = NULL; | ||
3150 | } | ||
3151 | |||
3152 | if (!cpu_has_vmx_tpr_shadow()) | ||
3153 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
3154 | |||
3155 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | ||
3156 | kvm_disable_largepages(); | ||
3157 | |||
3158 | if (!cpu_has_vmx_ple()) | ||
3159 | ple_gap = 0; | ||
3160 | |||
3161 | if (!cpu_has_vmx_apicv()) | ||
3162 | enable_apicv = 0; | ||
3163 | |||
3164 | if (enable_apicv) | ||
3165 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
3166 | else { | ||
3167 | kvm_x86_ops->hwapic_irr_update = NULL; | ||
3168 | kvm_x86_ops->deliver_posted_interrupt = NULL; | ||
3169 | kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; | ||
3170 | } | ||
3171 | |||
3172 | if (nested) | ||
3173 | nested_vmx_setup_ctls_msrs(); | ||
3174 | |||
3175 | return alloc_kvm_area(); | ||
3176 | } | ||
3177 | |||
3178 | static __exit void hardware_unsetup(void) | ||
3179 | { | ||
3180 | free_kvm_area(); | ||
3181 | } | ||
3182 | |||
3183 | static bool emulation_required(struct kvm_vcpu *vcpu) | 3161 | static bool emulation_required(struct kvm_vcpu *vcpu) |
3184 | { | 3162 | { |
3185 | return emulate_invalid_guest_state && !guest_state_valid(vcpu); | 3163 | return emulate_invalid_guest_state && !guest_state_valid(vcpu); |
@@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void) | |||
4396 | kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); | 4374 | kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); |
4397 | } | 4375 | } |
4398 | 4376 | ||
4377 | #define VMX_XSS_EXIT_BITMAP 0 | ||
4399 | /* | 4378 | /* |
4400 | * Sets up the vmcs for emulated real mode. | 4379 | * Sets up the vmcs for emulated real mode. |
4401 | */ | 4380 | */ |
@@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4505 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 4484 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
4506 | set_cr4_guest_host_mask(vmx); | 4485 | set_cr4_guest_host_mask(vmx); |
4507 | 4486 | ||
4487 | if (vmx_xsaves_supported()) | ||
4488 | vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); | ||
4489 | |||
4508 | return 0; | 4490 | return 0; |
4509 | } | 4491 | } |
4510 | 4492 | ||
@@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
5163 | static int handle_dr(struct kvm_vcpu *vcpu) | 5145 | static int handle_dr(struct kvm_vcpu *vcpu) |
5164 | { | 5146 | { |
5165 | unsigned long exit_qualification; | 5147 | unsigned long exit_qualification; |
5166 | int dr, reg; | 5148 | int dr, dr7, reg; |
5149 | |||
5150 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
5151 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | ||
5152 | |||
5153 | /* First, if DR does not exist, trigger UD */ | ||
5154 | if (!kvm_require_dr(vcpu, dr)) | ||
5155 | return 1; | ||
5167 | 5156 | ||
5168 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | 5157 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ |
5169 | if (!kvm_require_cpl(vcpu, 0)) | 5158 | if (!kvm_require_cpl(vcpu, 0)) |
5170 | return 1; | 5159 | return 1; |
5171 | dr = vmcs_readl(GUEST_DR7); | 5160 | dr7 = vmcs_readl(GUEST_DR7); |
5172 | if (dr & DR7_GD) { | 5161 | if (dr7 & DR7_GD) { |
5173 | /* | 5162 | /* |
5174 | * As the vm-exit takes precedence over the debug trap, we | 5163 | * As the vm-exit takes precedence over the debug trap, we |
5175 | * need to emulate the latter, either for the host or the | 5164 | * need to emulate the latter, either for the host or the |
@@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5177 | */ | 5166 | */ |
5178 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | 5167 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { |
5179 | vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; | 5168 | vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; |
5180 | vcpu->run->debug.arch.dr7 = dr; | 5169 | vcpu->run->debug.arch.dr7 = dr7; |
5181 | vcpu->run->debug.arch.pc = | 5170 | vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); |
5182 | vmcs_readl(GUEST_CS_BASE) + | ||
5183 | vmcs_readl(GUEST_RIP); | ||
5184 | vcpu->run->debug.arch.exception = DB_VECTOR; | 5171 | vcpu->run->debug.arch.exception = DB_VECTOR; |
5185 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; | 5172 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; |
5186 | return 0; | 5173 | return 0; |
5187 | } else { | 5174 | } else { |
5188 | vcpu->arch.dr7 &= ~DR7_GD; | 5175 | vcpu->arch.dr6 &= ~15; |
5189 | vcpu->arch.dr6 |= DR6_BD | DR6_RTM; | 5176 | vcpu->arch.dr6 |= DR6_BD | DR6_RTM; |
5190 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | ||
5191 | kvm_queue_exception(vcpu, DB_VECTOR); | 5177 | kvm_queue_exception(vcpu, DB_VECTOR); |
5192 | return 1; | 5178 | return 1; |
5193 | } | 5179 | } |
@@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5209 | return 1; | 5195 | return 1; |
5210 | } | 5196 | } |
5211 | 5197 | ||
5212 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
5213 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | ||
5214 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 5198 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
5215 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 5199 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
5216 | unsigned long val; | 5200 | unsigned long val; |
@@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) | |||
5391 | return 1; | 5375 | return 1; |
5392 | } | 5376 | } |
5393 | 5377 | ||
5378 | static int handle_xsaves(struct kvm_vcpu *vcpu) | ||
5379 | { | ||
5380 | skip_emulated_instruction(vcpu); | ||
5381 | WARN(1, "this should never happen\n"); | ||
5382 | return 1; | ||
5383 | } | ||
5384 | |||
5385 | static int handle_xrstors(struct kvm_vcpu *vcpu) | ||
5386 | { | ||
5387 | skip_emulated_instruction(vcpu); | ||
5388 | WARN(1, "this should never happen\n"); | ||
5389 | return 1; | ||
5390 | } | ||
5391 | |||
5394 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 5392 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
5395 | { | 5393 | { |
5396 | if (likely(fasteoi)) { | 5394 | if (likely(fasteoi)) { |
@@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
5492 | } | 5490 | } |
5493 | 5491 | ||
5494 | /* clear all local breakpoint enable flags */ | 5492 | /* clear all local breakpoint enable flags */ |
5495 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55); | 5493 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155); |
5496 | 5494 | ||
5497 | /* | 5495 | /* |
5498 | * TODO: What about debug traps on tss switch? | 5496 | * TODO: What about debug traps on tss switch? |
@@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
5539 | trace_kvm_page_fault(gpa, exit_qualification); | 5537 | trace_kvm_page_fault(gpa, exit_qualification); |
5540 | 5538 | ||
5541 | /* It is a write fault? */ | 5539 | /* It is a write fault? */ |
5542 | error_code = exit_qualification & (1U << 1); | 5540 | error_code = exit_qualification & PFERR_WRITE_MASK; |
5543 | /* It is a fetch fault? */ | 5541 | /* It is a fetch fault? */ |
5544 | error_code |= (exit_qualification & (1U << 2)) << 2; | 5542 | error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK; |
5545 | /* ept page table is present? */ | 5543 | /* ept page table is present? */ |
5546 | error_code |= (exit_qualification >> 3) & 0x1; | 5544 | error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK; |
5547 | 5545 | ||
5548 | vcpu->arch.exit_qualification = exit_qualification; | 5546 | vcpu->arch.exit_qualification = exit_qualification; |
5549 | 5547 | ||
@@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void) | |||
5785 | ple_window_grow, INT_MIN); | 5783 | ple_window_grow, INT_MIN); |
5786 | } | 5784 | } |
5787 | 5785 | ||
5786 | static __init int hardware_setup(void) | ||
5787 | { | ||
5788 | int r = -ENOMEM, i, msr; | ||
5789 | |||
5790 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
5791 | |||
5792 | for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) | ||
5793 | kvm_define_shared_msr(i, vmx_msr_index[i]); | ||
5794 | |||
5795 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
5796 | if (!vmx_io_bitmap_a) | ||
5797 | return r; | ||
5798 | |||
5799 | vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
5800 | if (!vmx_io_bitmap_b) | ||
5801 | goto out; | ||
5802 | |||
5803 | vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
5804 | if (!vmx_msr_bitmap_legacy) | ||
5805 | goto out1; | ||
5806 | |||
5807 | vmx_msr_bitmap_legacy_x2apic = | ||
5808 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
5809 | if (!vmx_msr_bitmap_legacy_x2apic) | ||
5810 | goto out2; | ||
5811 | |||
5812 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
5813 | if (!vmx_msr_bitmap_longmode) | ||
5814 | goto out3; | ||
5815 | |||
5816 | vmx_msr_bitmap_longmode_x2apic = | ||
5817 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
5818 | if (!vmx_msr_bitmap_longmode_x2apic) | ||
5819 | goto out4; | ||
5820 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
5821 | if (!vmx_vmread_bitmap) | ||
5822 | goto out5; | ||
5823 | |||
5824 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
5825 | if (!vmx_vmwrite_bitmap) | ||
5826 | goto out6; | ||
5827 | |||
5828 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); | ||
5829 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); | ||
5830 | |||
5831 | /* | ||
5832 | * Allow direct access to the PC debug port (it is often used for I/O | ||
5833 | * delays, but the vmexits simply slow things down). | ||
5834 | */ | ||
5835 | memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); | ||
5836 | clear_bit(0x80, vmx_io_bitmap_a); | ||
5837 | |||
5838 | memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); | ||
5839 | |||
5840 | memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); | ||
5841 | memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); | ||
5842 | |||
5843 | vmx_disable_intercept_for_msr(MSR_FS_BASE, false); | ||
5844 | vmx_disable_intercept_for_msr(MSR_GS_BASE, false); | ||
5845 | vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); | ||
5846 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | ||
5847 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | ||
5848 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | ||
5849 | vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); | ||
5850 | |||
5851 | memcpy(vmx_msr_bitmap_legacy_x2apic, | ||
5852 | vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
5853 | memcpy(vmx_msr_bitmap_longmode_x2apic, | ||
5854 | vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
5855 | |||
5856 | if (enable_apicv) { | ||
5857 | for (msr = 0x800; msr <= 0x8ff; msr++) | ||
5858 | vmx_disable_intercept_msr_read_x2apic(msr); | ||
5859 | |||
5860 | /* According SDM, in x2apic mode, the whole id reg is used. | ||
5861 | * But in KVM, it only use the highest eight bits. Need to | ||
5862 | * intercept it */ | ||
5863 | vmx_enable_intercept_msr_read_x2apic(0x802); | ||
5864 | /* TMCCT */ | ||
5865 | vmx_enable_intercept_msr_read_x2apic(0x839); | ||
5866 | /* TPR */ | ||
5867 | vmx_disable_intercept_msr_write_x2apic(0x808); | ||
5868 | /* EOI */ | ||
5869 | vmx_disable_intercept_msr_write_x2apic(0x80b); | ||
5870 | /* SELF-IPI */ | ||
5871 | vmx_disable_intercept_msr_write_x2apic(0x83f); | ||
5872 | } | ||
5873 | |||
5874 | if (enable_ept) { | ||
5875 | kvm_mmu_set_mask_ptes(0ull, | ||
5876 | (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, | ||
5877 | (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, | ||
5878 | 0ull, VMX_EPT_EXECUTABLE_MASK); | ||
5879 | ept_set_mmio_spte_mask(); | ||
5880 | kvm_enable_tdp(); | ||
5881 | } else | ||
5882 | kvm_disable_tdp(); | ||
5883 | |||
5884 | update_ple_window_actual_max(); | ||
5885 | |||
5886 | if (setup_vmcs_config(&vmcs_config) < 0) { | ||
5887 | r = -EIO; | ||
5888 | goto out7; | ||
5889 | } | ||
5890 | |||
5891 | if (boot_cpu_has(X86_FEATURE_NX)) | ||
5892 | kvm_enable_efer_bits(EFER_NX); | ||
5893 | |||
5894 | if (!cpu_has_vmx_vpid()) | ||
5895 | enable_vpid = 0; | ||
5896 | if (!cpu_has_vmx_shadow_vmcs()) | ||
5897 | enable_shadow_vmcs = 0; | ||
5898 | if (enable_shadow_vmcs) | ||
5899 | init_vmcs_shadow_fields(); | ||
5900 | |||
5901 | if (!cpu_has_vmx_ept() || | ||
5902 | !cpu_has_vmx_ept_4levels()) { | ||
5903 | enable_ept = 0; | ||
5904 | enable_unrestricted_guest = 0; | ||
5905 | enable_ept_ad_bits = 0; | ||
5906 | } | ||
5907 | |||
5908 | if (!cpu_has_vmx_ept_ad_bits()) | ||
5909 | enable_ept_ad_bits = 0; | ||
5910 | |||
5911 | if (!cpu_has_vmx_unrestricted_guest()) | ||
5912 | enable_unrestricted_guest = 0; | ||
5913 | |||
5914 | if (!cpu_has_vmx_flexpriority()) { | ||
5915 | flexpriority_enabled = 0; | ||
5916 | |||
5917 | /* | ||
5918 | * set_apic_access_page_addr() is used to reload apic access | ||
5919 | * page upon invalidation. No need to do anything if the | ||
5920 | * processor does not have the APIC_ACCESS_ADDR VMCS field. | ||
5921 | */ | ||
5922 | kvm_x86_ops->set_apic_access_page_addr = NULL; | ||
5923 | } | ||
5924 | |||
5925 | if (!cpu_has_vmx_tpr_shadow()) | ||
5926 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
5927 | |||
5928 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | ||
5929 | kvm_disable_largepages(); | ||
5930 | |||
5931 | if (!cpu_has_vmx_ple()) | ||
5932 | ple_gap = 0; | ||
5933 | |||
5934 | if (!cpu_has_vmx_apicv()) | ||
5935 | enable_apicv = 0; | ||
5936 | |||
5937 | if (enable_apicv) | ||
5938 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
5939 | else { | ||
5940 | kvm_x86_ops->hwapic_irr_update = NULL; | ||
5941 | kvm_x86_ops->deliver_posted_interrupt = NULL; | ||
5942 | kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; | ||
5943 | } | ||
5944 | |||
5945 | if (nested) | ||
5946 | nested_vmx_setup_ctls_msrs(); | ||
5947 | |||
5948 | return alloc_kvm_area(); | ||
5949 | |||
5950 | out7: | ||
5951 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
5952 | out6: | ||
5953 | free_page((unsigned long)vmx_vmread_bitmap); | ||
5954 | out5: | ||
5955 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
5956 | out4: | ||
5957 | free_page((unsigned long)vmx_msr_bitmap_longmode); | ||
5958 | out3: | ||
5959 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
5960 | out2: | ||
5961 | free_page((unsigned long)vmx_msr_bitmap_legacy); | ||
5962 | out1: | ||
5963 | free_page((unsigned long)vmx_io_bitmap_b); | ||
5964 | out: | ||
5965 | free_page((unsigned long)vmx_io_bitmap_a); | ||
5966 | |||
5967 | return r; | ||
5968 | } | ||
5969 | |||
5970 | static __exit void hardware_unsetup(void) | ||
5971 | { | ||
5972 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
5973 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
5974 | free_page((unsigned long)vmx_msr_bitmap_legacy); | ||
5975 | free_page((unsigned long)vmx_msr_bitmap_longmode); | ||
5976 | free_page((unsigned long)vmx_io_bitmap_b); | ||
5977 | free_page((unsigned long)vmx_io_bitmap_a); | ||
5978 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
5979 | free_page((unsigned long)vmx_vmread_bitmap); | ||
5980 | |||
5981 | free_kvm_area(); | ||
5982 | } | ||
5983 | |||
5788 | /* | 5984 | /* |
5789 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | 5985 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE |
5790 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | 5986 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. |
@@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field) | |||
6361 | * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of | 6557 | * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of |
6362 | * 64-bit fields are to be returned). | 6558 | * 64-bit fields are to be returned). |
6363 | */ | 6559 | */ |
6364 | static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, | 6560 | static inline int vmcs12_read_any(struct kvm_vcpu *vcpu, |
6365 | unsigned long field, u64 *ret) | 6561 | unsigned long field, u64 *ret) |
6366 | { | 6562 | { |
6367 | short offset = vmcs_field_to_offset(field); | 6563 | short offset = vmcs_field_to_offset(field); |
6368 | char *p; | 6564 | char *p; |
6369 | 6565 | ||
6370 | if (offset < 0) | 6566 | if (offset < 0) |
6371 | return 0; | 6567 | return offset; |
6372 | 6568 | ||
6373 | p = ((char *)(get_vmcs12(vcpu))) + offset; | 6569 | p = ((char *)(get_vmcs12(vcpu))) + offset; |
6374 | 6570 | ||
6375 | switch (vmcs_field_type(field)) { | 6571 | switch (vmcs_field_type(field)) { |
6376 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | 6572 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: |
6377 | *ret = *((natural_width *)p); | 6573 | *ret = *((natural_width *)p); |
6378 | return 1; | 6574 | return 0; |
6379 | case VMCS_FIELD_TYPE_U16: | 6575 | case VMCS_FIELD_TYPE_U16: |
6380 | *ret = *((u16 *)p); | 6576 | *ret = *((u16 *)p); |
6381 | return 1; | 6577 | return 0; |
6382 | case VMCS_FIELD_TYPE_U32: | 6578 | case VMCS_FIELD_TYPE_U32: |
6383 | *ret = *((u32 *)p); | 6579 | *ret = *((u32 *)p); |
6384 | return 1; | 6580 | return 0; |
6385 | case VMCS_FIELD_TYPE_U64: | 6581 | case VMCS_FIELD_TYPE_U64: |
6386 | *ret = *((u64 *)p); | 6582 | *ret = *((u64 *)p); |
6387 | return 1; | 6583 | return 0; |
6388 | default: | 6584 | default: |
6389 | return 0; /* can never happen. */ | 6585 | WARN_ON(1); |
6586 | return -ENOENT; | ||
6390 | } | 6587 | } |
6391 | } | 6588 | } |
6392 | 6589 | ||
6393 | 6590 | ||
6394 | static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, | 6591 | static inline int vmcs12_write_any(struct kvm_vcpu *vcpu, |
6395 | unsigned long field, u64 field_value){ | 6592 | unsigned long field, u64 field_value){ |
6396 | short offset = vmcs_field_to_offset(field); | 6593 | short offset = vmcs_field_to_offset(field); |
6397 | char *p = ((char *) get_vmcs12(vcpu)) + offset; | 6594 | char *p = ((char *) get_vmcs12(vcpu)) + offset; |
6398 | if (offset < 0) | 6595 | if (offset < 0) |
6399 | return false; | 6596 | return offset; |
6400 | 6597 | ||
6401 | switch (vmcs_field_type(field)) { | 6598 | switch (vmcs_field_type(field)) { |
6402 | case VMCS_FIELD_TYPE_U16: | 6599 | case VMCS_FIELD_TYPE_U16: |
6403 | *(u16 *)p = field_value; | 6600 | *(u16 *)p = field_value; |
6404 | return true; | 6601 | return 0; |
6405 | case VMCS_FIELD_TYPE_U32: | 6602 | case VMCS_FIELD_TYPE_U32: |
6406 | *(u32 *)p = field_value; | 6603 | *(u32 *)p = field_value; |
6407 | return true; | 6604 | return 0; |
6408 | case VMCS_FIELD_TYPE_U64: | 6605 | case VMCS_FIELD_TYPE_U64: |
6409 | *(u64 *)p = field_value; | 6606 | *(u64 *)p = field_value; |
6410 | return true; | 6607 | return 0; |
6411 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | 6608 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: |
6412 | *(natural_width *)p = field_value; | 6609 | *(natural_width *)p = field_value; |
6413 | return true; | 6610 | return 0; |
6414 | default: | 6611 | default: |
6415 | return false; /* can never happen. */ | 6612 | WARN_ON(1); |
6613 | return -ENOENT; | ||
6416 | } | 6614 | } |
6417 | 6615 | ||
6418 | } | 6616 | } |
@@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
6445 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | 6643 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: |
6446 | field_value = vmcs_readl(field); | 6644 | field_value = vmcs_readl(field); |
6447 | break; | 6645 | break; |
6646 | default: | ||
6647 | WARN_ON(1); | ||
6648 | continue; | ||
6448 | } | 6649 | } |
6449 | vmcs12_write_any(&vmx->vcpu, field, field_value); | 6650 | vmcs12_write_any(&vmx->vcpu, field, field_value); |
6450 | } | 6651 | } |
@@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | |||
6490 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | 6691 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: |
6491 | vmcs_writel(field, (long)field_value); | 6692 | vmcs_writel(field, (long)field_value); |
6492 | break; | 6693 | break; |
6694 | default: | ||
6695 | WARN_ON(1); | ||
6696 | break; | ||
6493 | } | 6697 | } |
6494 | } | 6698 | } |
6495 | } | 6699 | } |
@@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) | |||
6528 | /* Decode instruction info and find the field to read */ | 6732 | /* Decode instruction info and find the field to read */ |
6529 | field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); | 6733 | field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); |
6530 | /* Read the field, zero-extended to a u64 field_value */ | 6734 | /* Read the field, zero-extended to a u64 field_value */ |
6531 | if (!vmcs12_read_any(vcpu, field, &field_value)) { | 6735 | if (vmcs12_read_any(vcpu, field, &field_value) < 0) { |
6532 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | 6736 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); |
6533 | skip_emulated_instruction(vcpu); | 6737 | skip_emulated_instruction(vcpu); |
6534 | return 1; | 6738 | return 1; |
@@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
6598 | return 1; | 6802 | return 1; |
6599 | } | 6803 | } |
6600 | 6804 | ||
6601 | if (!vmcs12_write_any(vcpu, field, field_value)) { | 6805 | if (vmcs12_write_any(vcpu, field, field_value) < 0) { |
6602 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | 6806 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); |
6603 | skip_emulated_instruction(vcpu); | 6807 | skip_emulated_instruction(vcpu); |
6604 | return 1; | 6808 | return 1; |
@@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
6802 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, | 7006 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, |
6803 | [EXIT_REASON_INVEPT] = handle_invept, | 7007 | [EXIT_REASON_INVEPT] = handle_invept, |
6804 | [EXIT_REASON_INVVPID] = handle_invvpid, | 7008 | [EXIT_REASON_INVVPID] = handle_invvpid, |
7009 | [EXIT_REASON_XSAVES] = handle_xsaves, | ||
7010 | [EXIT_REASON_XRSTORS] = handle_xrstors, | ||
6805 | }; | 7011 | }; |
6806 | 7012 | ||
6807 | static const int kvm_vmx_max_exit_handlers = | 7013 | static const int kvm_vmx_max_exit_handlers = |
@@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7089 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 7295 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
7090 | case EXIT_REASON_XSETBV: | 7296 | case EXIT_REASON_XSETBV: |
7091 | return 1; | 7297 | return 1; |
7298 | case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: | ||
7299 | /* | ||
7300 | * This should never happen, since it is not possible to | ||
7301 | * set XSS to a non-zero value---neither in L1 nor in L2. | ||
7302 | * If if it were, XSS would have to be checked against | ||
7303 | * the XSS exit bitmap in vmcs12. | ||
7304 | */ | ||
7305 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); | ||
7092 | default: | 7306 | default: |
7093 | return 1; | 7307 | return 1; |
7094 | } | 7308 | } |
@@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector) | |||
7277 | u16 status; | 7491 | u16 status; |
7278 | u8 old; | 7492 | u8 old; |
7279 | 7493 | ||
7494 | if (vector == -1) | ||
7495 | vector = 0; | ||
7496 | |||
7280 | status = vmcs_read16(GUEST_INTR_STATUS); | 7497 | status = vmcs_read16(GUEST_INTR_STATUS); |
7281 | old = (u8)status & 0xff; | 7498 | old = (u8)status & 0xff; |
7282 | if ((u8)vector != old) { | 7499 | if ((u8)vector != old) { |
@@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector) | |||
7288 | 7505 | ||
7289 | static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | 7506 | static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) |
7290 | { | 7507 | { |
7508 | if (!is_guest_mode(vcpu)) { | ||
7509 | vmx_set_rvi(max_irr); | ||
7510 | return; | ||
7511 | } | ||
7512 | |||
7291 | if (max_irr == -1) | 7513 | if (max_irr == -1) |
7292 | return; | 7514 | return; |
7293 | 7515 | ||
7294 | /* | 7516 | /* |
7295 | * If a vmexit is needed, vmx_check_nested_events handles it. | 7517 | * In guest mode. If a vmexit is needed, vmx_check_nested_events |
7518 | * handles it. | ||
7296 | */ | 7519 | */ |
7297 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | 7520 | if (nested_exit_on_intr(vcpu)) |
7298 | return; | 7521 | return; |
7299 | 7522 | ||
7300 | if (!is_guest_mode(vcpu)) { | ||
7301 | vmx_set_rvi(max_irr); | ||
7302 | return; | ||
7303 | } | ||
7304 | |||
7305 | /* | 7523 | /* |
7306 | * Fall back to pre-APICv interrupt injection since L2 | 7524 | * Else, fall back to pre-APICv interrupt injection since L2 |
7307 | * is run without virtual interrupt delivery. | 7525 | * is run without virtual interrupt delivery. |
7308 | */ | 7526 | */ |
7309 | if (!kvm_event_needs_reinjection(vcpu) && | 7527 | if (!kvm_event_needs_reinjection(vcpu) && |
@@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void) | |||
7400 | (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); | 7618 | (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); |
7401 | } | 7619 | } |
7402 | 7620 | ||
7621 | static bool vmx_xsaves_supported(void) | ||
7622 | { | ||
7623 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
7624 | SECONDARY_EXEC_XSAVES; | ||
7625 | } | ||
7626 | |||
7403 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | 7627 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
7404 | { | 7628 | { |
7405 | u32 exit_intr_info; | 7629 | u32 exit_intr_info; |
@@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8135 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); | 8359 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); |
8136 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); | 8360 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); |
8137 | 8361 | ||
8362 | if (nested_cpu_has_xsaves(vmcs12)) | ||
8363 | vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); | ||
8138 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | 8364 | vmcs_write64(VMCS_LINK_POINTER, -1ull); |
8139 | 8365 | ||
8140 | exec_control = vmcs12->pin_based_vm_exec_control; | 8366 | exec_control = vmcs12->pin_based_vm_exec_control; |
@@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
8775 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | 9001 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); |
8776 | if (vmx_mpx_supported()) | 9002 | if (vmx_mpx_supported()) |
8777 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | 9003 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); |
9004 | if (nested_cpu_has_xsaves(vmcs12)) | ||
9005 | vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); | ||
8778 | 9006 | ||
8779 | /* update exit information fields: */ | 9007 | /* update exit information fields: */ |
8780 | 9008 | ||
@@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
9176 | .check_intercept = vmx_check_intercept, | 9404 | .check_intercept = vmx_check_intercept, |
9177 | .handle_external_intr = vmx_handle_external_intr, | 9405 | .handle_external_intr = vmx_handle_external_intr, |
9178 | .mpx_supported = vmx_mpx_supported, | 9406 | .mpx_supported = vmx_mpx_supported, |
9407 | .xsaves_supported = vmx_xsaves_supported, | ||
9179 | 9408 | ||
9180 | .check_nested_events = vmx_check_nested_events, | 9409 | .check_nested_events = vmx_check_nested_events, |
9181 | 9410 | ||
@@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
9184 | 9413 | ||
9185 | static int __init vmx_init(void) | 9414 | static int __init vmx_init(void) |
9186 | { | 9415 | { |
9187 | int r, i, msr; | 9416 | int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
9188 | 9417 | __alignof__(struct vcpu_vmx), THIS_MODULE); | |
9189 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
9190 | |||
9191 | for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) | ||
9192 | kvm_define_shared_msr(i, vmx_msr_index[i]); | ||
9193 | |||
9194 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
9195 | if (!vmx_io_bitmap_a) | ||
9196 | return -ENOMEM; | ||
9197 | |||
9198 | r = -ENOMEM; | ||
9199 | |||
9200 | vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
9201 | if (!vmx_io_bitmap_b) | ||
9202 | goto out; | ||
9203 | |||
9204 | vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
9205 | if (!vmx_msr_bitmap_legacy) | ||
9206 | goto out1; | ||
9207 | |||
9208 | vmx_msr_bitmap_legacy_x2apic = | ||
9209 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
9210 | if (!vmx_msr_bitmap_legacy_x2apic) | ||
9211 | goto out2; | ||
9212 | |||
9213 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
9214 | if (!vmx_msr_bitmap_longmode) | ||
9215 | goto out3; | ||
9216 | |||
9217 | vmx_msr_bitmap_longmode_x2apic = | ||
9218 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
9219 | if (!vmx_msr_bitmap_longmode_x2apic) | ||
9220 | goto out4; | ||
9221 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
9222 | if (!vmx_vmread_bitmap) | ||
9223 | goto out5; | ||
9224 | |||
9225 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
9226 | if (!vmx_vmwrite_bitmap) | ||
9227 | goto out6; | ||
9228 | |||
9229 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); | ||
9230 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); | ||
9231 | |||
9232 | /* | ||
9233 | * Allow direct access to the PC debug port (it is often used for I/O | ||
9234 | * delays, but the vmexits simply slow things down). | ||
9235 | */ | ||
9236 | memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); | ||
9237 | clear_bit(0x80, vmx_io_bitmap_a); | ||
9238 | |||
9239 | memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); | ||
9240 | |||
9241 | memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); | ||
9242 | memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); | ||
9243 | |||
9244 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | ||
9245 | |||
9246 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), | ||
9247 | __alignof__(struct vcpu_vmx), THIS_MODULE); | ||
9248 | if (r) | 9418 | if (r) |
9249 | goto out7; | 9419 | return r; |
9250 | 9420 | ||
9251 | #ifdef CONFIG_KEXEC | 9421 | #ifdef CONFIG_KEXEC |
9252 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, | 9422 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, |
9253 | crash_vmclear_local_loaded_vmcss); | 9423 | crash_vmclear_local_loaded_vmcss); |
9254 | #endif | 9424 | #endif |
9255 | 9425 | ||
9256 | vmx_disable_intercept_for_msr(MSR_FS_BASE, false); | ||
9257 | vmx_disable_intercept_for_msr(MSR_GS_BASE, false); | ||
9258 | vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); | ||
9259 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | ||
9260 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | ||
9261 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | ||
9262 | vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); | ||
9263 | |||
9264 | memcpy(vmx_msr_bitmap_legacy_x2apic, | ||
9265 | vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
9266 | memcpy(vmx_msr_bitmap_longmode_x2apic, | ||
9267 | vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
9268 | |||
9269 | if (enable_apicv) { | ||
9270 | for (msr = 0x800; msr <= 0x8ff; msr++) | ||
9271 | vmx_disable_intercept_msr_read_x2apic(msr); | ||
9272 | |||
9273 | /* According SDM, in x2apic mode, the whole id reg is used. | ||
9274 | * But in KVM, it only use the highest eight bits. Need to | ||
9275 | * intercept it */ | ||
9276 | vmx_enable_intercept_msr_read_x2apic(0x802); | ||
9277 | /* TMCCT */ | ||
9278 | vmx_enable_intercept_msr_read_x2apic(0x839); | ||
9279 | /* TPR */ | ||
9280 | vmx_disable_intercept_msr_write_x2apic(0x808); | ||
9281 | /* EOI */ | ||
9282 | vmx_disable_intercept_msr_write_x2apic(0x80b); | ||
9283 | /* SELF-IPI */ | ||
9284 | vmx_disable_intercept_msr_write_x2apic(0x83f); | ||
9285 | } | ||
9286 | |||
9287 | if (enable_ept) { | ||
9288 | kvm_mmu_set_mask_ptes(0ull, | ||
9289 | (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, | ||
9290 | (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, | ||
9291 | 0ull, VMX_EPT_EXECUTABLE_MASK); | ||
9292 | ept_set_mmio_spte_mask(); | ||
9293 | kvm_enable_tdp(); | ||
9294 | } else | ||
9295 | kvm_disable_tdp(); | ||
9296 | |||
9297 | update_ple_window_actual_max(); | ||
9298 | |||
9299 | return 0; | 9426 | return 0; |
9300 | |||
9301 | out7: | ||
9302 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
9303 | out6: | ||
9304 | free_page((unsigned long)vmx_vmread_bitmap); | ||
9305 | out5: | ||
9306 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
9307 | out4: | ||
9308 | free_page((unsigned long)vmx_msr_bitmap_longmode); | ||
9309 | out3: | ||
9310 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
9311 | out2: | ||
9312 | free_page((unsigned long)vmx_msr_bitmap_legacy); | ||
9313 | out1: | ||
9314 | free_page((unsigned long)vmx_io_bitmap_b); | ||
9315 | out: | ||
9316 | free_page((unsigned long)vmx_io_bitmap_a); | ||
9317 | return r; | ||
9318 | } | 9427 | } |
9319 | 9428 | ||
9320 | static void __exit vmx_exit(void) | 9429 | static void __exit vmx_exit(void) |
9321 | { | 9430 | { |
9322 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
9323 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
9324 | free_page((unsigned long)vmx_msr_bitmap_legacy); | ||
9325 | free_page((unsigned long)vmx_msr_bitmap_longmode); | ||
9326 | free_page((unsigned long)vmx_io_bitmap_b); | ||
9327 | free_page((unsigned long)vmx_io_bitmap_a); | ||
9328 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
9329 | free_page((unsigned long)vmx_vmread_bitmap); | ||
9330 | |||
9331 | #ifdef CONFIG_KEXEC | 9431 | #ifdef CONFIG_KEXEC |
9332 | RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); | 9432 | RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); |
9333 | synchronize_rcu(); | 9433 | synchronize_rcu(); |