diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 334 |
1 files changed, 228 insertions, 106 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 392752834751..1320e0f8e611 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/ftrace_event.h> | 31 | #include <linux/ftrace_event.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/tboot.h> | 33 | #include <linux/tboot.h> |
34 | #include <linux/hrtimer.h> | ||
34 | #include "kvm_cache_regs.h" | 35 | #include "kvm_cache_regs.h" |
35 | #include "x86.h" | 36 | #include "x86.h" |
36 | 37 | ||
@@ -42,6 +43,7 @@ | |||
42 | #include <asm/i387.h> | 43 | #include <asm/i387.h> |
43 | #include <asm/xcr.h> | 44 | #include <asm/xcr.h> |
44 | #include <asm/perf_event.h> | 45 | #include <asm/perf_event.h> |
46 | #include <asm/debugreg.h> | ||
45 | #include <asm/kexec.h> | 47 | #include <asm/kexec.h> |
46 | 48 | ||
47 | #include "trace.h" | 49 | #include "trace.h" |
@@ -110,6 +112,8 @@ module_param(nested, bool, S_IRUGO); | |||
110 | 112 | ||
111 | #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) | 113 | #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) |
112 | 114 | ||
115 | #define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5 | ||
116 | |||
113 | /* | 117 | /* |
114 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 118 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
115 | * ple_gap: upper bound on the amount of time between two successive | 119 | * ple_gap: upper bound on the amount of time between two successive |
@@ -202,6 +206,7 @@ struct __packed vmcs12 { | |||
202 | u64 guest_pdptr1; | 206 | u64 guest_pdptr1; |
203 | u64 guest_pdptr2; | 207 | u64 guest_pdptr2; |
204 | u64 guest_pdptr3; | 208 | u64 guest_pdptr3; |
209 | u64 guest_bndcfgs; | ||
205 | u64 host_ia32_pat; | 210 | u64 host_ia32_pat; |
206 | u64 host_ia32_efer; | 211 | u64 host_ia32_efer; |
207 | u64 host_ia32_perf_global_ctrl; | 212 | u64 host_ia32_perf_global_ctrl; |
@@ -374,6 +379,9 @@ struct nested_vmx { | |||
374 | */ | 379 | */ |
375 | struct page *apic_access_page; | 380 | struct page *apic_access_page; |
376 | u64 msr_ia32_feature_control; | 381 | u64 msr_ia32_feature_control; |
382 | |||
383 | struct hrtimer preemption_timer; | ||
384 | bool preemption_timer_expired; | ||
377 | }; | 385 | }; |
378 | 386 | ||
379 | #define POSTED_INTR_ON 0 | 387 | #define POSTED_INTR_ON 0 |
@@ -441,6 +449,7 @@ struct vcpu_vmx { | |||
441 | #endif | 449 | #endif |
442 | int gs_ldt_reload_needed; | 450 | int gs_ldt_reload_needed; |
443 | int fs_reload_needed; | 451 | int fs_reload_needed; |
452 | u64 msr_host_bndcfgs; | ||
444 | } host_state; | 453 | } host_state; |
445 | struct { | 454 | struct { |
446 | int vm86_active; | 455 | int vm86_active; |
@@ -533,6 +542,7 @@ static const unsigned long shadow_read_write_fields[] = { | |||
533 | GUEST_CS_LIMIT, | 542 | GUEST_CS_LIMIT, |
534 | GUEST_CS_BASE, | 543 | GUEST_CS_BASE, |
535 | GUEST_ES_BASE, | 544 | GUEST_ES_BASE, |
545 | GUEST_BNDCFGS, | ||
536 | CR0_GUEST_HOST_MASK, | 546 | CR0_GUEST_HOST_MASK, |
537 | CR0_READ_SHADOW, | 547 | CR0_READ_SHADOW, |
538 | CR4_READ_SHADOW, | 548 | CR4_READ_SHADOW, |
@@ -588,6 +598,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
588 | FIELD64(GUEST_PDPTR1, guest_pdptr1), | 598 | FIELD64(GUEST_PDPTR1, guest_pdptr1), |
589 | FIELD64(GUEST_PDPTR2, guest_pdptr2), | 599 | FIELD64(GUEST_PDPTR2, guest_pdptr2), |
590 | FIELD64(GUEST_PDPTR3, guest_pdptr3), | 600 | FIELD64(GUEST_PDPTR3, guest_pdptr3), |
601 | FIELD64(GUEST_BNDCFGS, guest_bndcfgs), | ||
591 | FIELD64(HOST_IA32_PAT, host_ia32_pat), | 602 | FIELD64(HOST_IA32_PAT, host_ia32_pat), |
592 | FIELD64(HOST_IA32_EFER, host_ia32_efer), | 603 | FIELD64(HOST_IA32_EFER, host_ia32_efer), |
593 | FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), | 604 | FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), |
@@ -718,6 +729,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | |||
718 | static u64 construct_eptp(unsigned long root_hpa); | 729 | static u64 construct_eptp(unsigned long root_hpa); |
719 | static void kvm_cpu_vmxon(u64 addr); | 730 | static void kvm_cpu_vmxon(u64 addr); |
720 | static void kvm_cpu_vmxoff(void); | 731 | static void kvm_cpu_vmxoff(void); |
732 | static bool vmx_mpx_supported(void); | ||
721 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 733 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
722 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 734 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
723 | struct kvm_segment *var, int seg); | 735 | struct kvm_segment *var, int seg); |
@@ -728,6 +740,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); | |||
728 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); | 740 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); |
729 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); | 741 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); |
730 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); | 742 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); |
743 | static bool vmx_mpx_supported(void); | ||
731 | 744 | ||
732 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 745 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
733 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 746 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -1047,6 +1060,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) | |||
1047 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; | 1060 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; |
1048 | } | 1061 | } |
1049 | 1062 | ||
1063 | static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) | ||
1064 | { | ||
1065 | return vmcs12->pin_based_vm_exec_control & | ||
1066 | PIN_BASED_VMX_PREEMPTION_TIMER; | ||
1067 | } | ||
1068 | |||
1050 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | 1069 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) |
1051 | { | 1070 | { |
1052 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); | 1071 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); |
@@ -1710,6 +1729,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
1710 | if (is_long_mode(&vmx->vcpu)) | 1729 | if (is_long_mode(&vmx->vcpu)) |
1711 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 1730 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
1712 | #endif | 1731 | #endif |
1732 | if (boot_cpu_has(X86_FEATURE_MPX)) | ||
1733 | rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); | ||
1713 | for (i = 0; i < vmx->save_nmsrs; ++i) | 1734 | for (i = 0; i < vmx->save_nmsrs; ++i) |
1714 | kvm_set_shared_msr(vmx->guest_msrs[i].index, | 1735 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
1715 | vmx->guest_msrs[i].data, | 1736 | vmx->guest_msrs[i].data, |
@@ -1747,6 +1768,8 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
1747 | #ifdef CONFIG_X86_64 | 1768 | #ifdef CONFIG_X86_64 |
1748 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 1769 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
1749 | #endif | 1770 | #endif |
1771 | if (vmx->host_state.msr_host_bndcfgs) | ||
1772 | wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); | ||
1750 | /* | 1773 | /* |
1751 | * If the FPU is not active (through the host task or | 1774 | * If the FPU is not active (through the host task or |
1752 | * the guest vcpu), then restore the cr0.TS bit. | 1775 | * the guest vcpu), then restore the cr0.TS bit. |
@@ -2248,9 +2271,9 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2248 | */ | 2271 | */ |
2249 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 2272 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
2250 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | | 2273 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | |
2251 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | | 2274 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; |
2275 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2252 | PIN_BASED_VMX_PREEMPTION_TIMER; | 2276 | PIN_BASED_VMX_PREEMPTION_TIMER; |
2253 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2254 | 2277 | ||
2255 | /* | 2278 | /* |
2256 | * Exit controls | 2279 | * Exit controls |
@@ -2265,15 +2288,12 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2265 | #ifdef CONFIG_X86_64 | 2288 | #ifdef CONFIG_X86_64 |
2266 | VM_EXIT_HOST_ADDR_SPACE_SIZE | | 2289 | VM_EXIT_HOST_ADDR_SPACE_SIZE | |
2267 | #endif | 2290 | #endif |
2268 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | | 2291 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; |
2292 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2293 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | | ||
2269 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | 2294 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; |
2270 | if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) || | 2295 | if (vmx_mpx_supported()) |
2271 | !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { | 2296 | nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; |
2272 | nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | ||
2273 | nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER; | ||
2274 | } | ||
2275 | nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2276 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER); | ||
2277 | 2297 | ||
2278 | /* entry controls */ | 2298 | /* entry controls */ |
2279 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2299 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
@@ -2287,6 +2307,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2287 | VM_ENTRY_LOAD_IA32_PAT; | 2307 | VM_ENTRY_LOAD_IA32_PAT; |
2288 | nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | | 2308 | nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | |
2289 | VM_ENTRY_LOAD_IA32_EFER); | 2309 | VM_ENTRY_LOAD_IA32_EFER); |
2310 | if (vmx_mpx_supported()) | ||
2311 | nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; | ||
2290 | 2312 | ||
2291 | /* cpu-based controls */ | 2313 | /* cpu-based controls */ |
2292 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2314 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
@@ -2342,9 +2364,9 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2342 | 2364 | ||
2343 | /* miscellaneous data */ | 2365 | /* miscellaneous data */ |
2344 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | 2366 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); |
2345 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | 2367 | nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; |
2346 | VMX_MISC_SAVE_EFER_LMA; | 2368 | nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | |
2347 | nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT; | 2369 | VMX_MISC_ACTIVITY_HLT; |
2348 | nested_vmx_misc_high = 0; | 2370 | nested_vmx_misc_high = 0; |
2349 | } | 2371 | } |
2350 | 2372 | ||
@@ -2479,6 +2501,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2479 | case MSR_IA32_SYSENTER_ESP: | 2501 | case MSR_IA32_SYSENTER_ESP: |
2480 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 2502 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
2481 | break; | 2503 | break; |
2504 | case MSR_IA32_BNDCFGS: | ||
2505 | if (!vmx_mpx_supported()) | ||
2506 | return 1; | ||
2507 | data = vmcs_read64(GUEST_BNDCFGS); | ||
2508 | break; | ||
2482 | case MSR_IA32_FEATURE_CONTROL: | 2509 | case MSR_IA32_FEATURE_CONTROL: |
2483 | if (!nested_vmx_allowed(vcpu)) | 2510 | if (!nested_vmx_allowed(vcpu)) |
2484 | return 1; | 2511 | return 1; |
@@ -2547,6 +2574,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2547 | case MSR_IA32_SYSENTER_ESP: | 2574 | case MSR_IA32_SYSENTER_ESP: |
2548 | vmcs_writel(GUEST_SYSENTER_ESP, data); | 2575 | vmcs_writel(GUEST_SYSENTER_ESP, data); |
2549 | break; | 2576 | break; |
2577 | case MSR_IA32_BNDCFGS: | ||
2578 | if (!vmx_mpx_supported()) | ||
2579 | return 1; | ||
2580 | vmcs_write64(GUEST_BNDCFGS, data); | ||
2581 | break; | ||
2550 | case MSR_IA32_TSC: | 2582 | case MSR_IA32_TSC: |
2551 | kvm_write_tsc(vcpu, msr_info); | 2583 | kvm_write_tsc(vcpu, msr_info); |
2552 | break; | 2584 | break; |
@@ -2832,12 +2864,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2832 | vmx_capability.ept, vmx_capability.vpid); | 2864 | vmx_capability.ept, vmx_capability.vpid); |
2833 | } | 2865 | } |
2834 | 2866 | ||
2835 | min = 0; | 2867 | min = VM_EXIT_SAVE_DEBUG_CONTROLS; |
2836 | #ifdef CONFIG_X86_64 | 2868 | #ifdef CONFIG_X86_64 |
2837 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2869 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; |
2838 | #endif | 2870 | #endif |
2839 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | | 2871 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | |
2840 | VM_EXIT_ACK_INTR_ON_EXIT; | 2872 | VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS; |
2841 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, | 2873 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, |
2842 | &_vmexit_control) < 0) | 2874 | &_vmexit_control) < 0) |
2843 | return -EIO; | 2875 | return -EIO; |
@@ -2853,8 +2885,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2853 | !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) | 2885 | !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) |
2854 | _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; | 2886 | _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; |
2855 | 2887 | ||
2856 | min = 0; | 2888 | min = VM_ENTRY_LOAD_DEBUG_CONTROLS; |
2857 | opt = VM_ENTRY_LOAD_IA32_PAT; | 2889 | opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; |
2858 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, | 2890 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, |
2859 | &_vmentry_control) < 0) | 2891 | &_vmentry_control) < 0) |
2860 | return -EIO; | 2892 | return -EIO; |
@@ -4223,6 +4255,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | |||
4223 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | 4255 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
4224 | { | 4256 | { |
4225 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; | 4257 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; |
4258 | |||
4259 | if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) | ||
4260 | exec_control &= ~CPU_BASED_MOV_DR_EXITING; | ||
4261 | |||
4226 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { | 4262 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { |
4227 | exec_control &= ~CPU_BASED_TPR_SHADOW; | 4263 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
4228 | #ifdef CONFIG_X86_64 | 4264 | #ifdef CONFIG_X86_64 |
@@ -4496,39 +4532,28 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) | |||
4496 | PIN_BASED_NMI_EXITING; | 4532 | PIN_BASED_NMI_EXITING; |
4497 | } | 4533 | } |
4498 | 4534 | ||
4499 | static int enable_irq_window(struct kvm_vcpu *vcpu) | 4535 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
4500 | { | 4536 | { |
4501 | u32 cpu_based_vm_exec_control; | 4537 | u32 cpu_based_vm_exec_control; |
4502 | 4538 | ||
4503 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | ||
4504 | /* | ||
4505 | * We get here if vmx_interrupt_allowed() said we can't | ||
4506 | * inject to L1 now because L2 must run. The caller will have | ||
4507 | * to make L2 exit right after entry, so we can inject to L1 | ||
4508 | * more promptly. | ||
4509 | */ | ||
4510 | return -EBUSY; | ||
4511 | |||
4512 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4539 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4513 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 4540 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
4514 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4541 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
4515 | return 0; | ||
4516 | } | 4542 | } |
4517 | 4543 | ||
4518 | static int enable_nmi_window(struct kvm_vcpu *vcpu) | 4544 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
4519 | { | 4545 | { |
4520 | u32 cpu_based_vm_exec_control; | 4546 | u32 cpu_based_vm_exec_control; |
4521 | 4547 | ||
4522 | if (!cpu_has_virtual_nmis()) | 4548 | if (!cpu_has_virtual_nmis() || |
4523 | return enable_irq_window(vcpu); | 4549 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { |
4524 | 4550 | enable_irq_window(vcpu); | |
4525 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) | 4551 | return; |
4526 | return enable_irq_window(vcpu); | 4552 | } |
4527 | 4553 | ||
4528 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4554 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4529 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 4555 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
4530 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4556 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
4531 | return 0; | ||
4532 | } | 4557 | } |
4533 | 4558 | ||
4534 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) | 4559 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
@@ -4620,22 +4645,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
4620 | 4645 | ||
4621 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 4646 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
4622 | { | 4647 | { |
4623 | if (is_guest_mode(vcpu)) { | 4648 | if (to_vmx(vcpu)->nested.nested_run_pending) |
4624 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4649 | return 0; |
4625 | return 0; | ||
4626 | if (nested_exit_on_nmi(vcpu)) { | ||
4627 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, | ||
4628 | NMI_VECTOR | INTR_TYPE_NMI_INTR | | ||
4629 | INTR_INFO_VALID_MASK, 0); | ||
4630 | /* | ||
4631 | * The NMI-triggered VM exit counts as injection: | ||
4632 | * clear this one and block further NMIs. | ||
4633 | */ | ||
4634 | vcpu->arch.nmi_pending = 0; | ||
4635 | vmx_set_nmi_mask(vcpu, true); | ||
4636 | return 0; | ||
4637 | } | ||
4638 | } | ||
4639 | 4650 | ||
4640 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | 4651 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) |
4641 | return 0; | 4652 | return 0; |
@@ -4647,19 +4658,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
4647 | 4658 | ||
4648 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4659 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
4649 | { | 4660 | { |
4650 | if (is_guest_mode(vcpu)) { | 4661 | return (!to_vmx(vcpu)->nested.nested_run_pending && |
4651 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4662 | vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
4652 | return 0; | ||
4653 | if (nested_exit_on_intr(vcpu)) { | ||
4654 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, | ||
4655 | 0, 0); | ||
4656 | /* | ||
4657 | * fall through to normal code, but now in L1, not L2 | ||
4658 | */ | ||
4659 | } | ||
4660 | } | ||
4661 | |||
4662 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
4663 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 4663 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
4664 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); | 4664 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); |
4665 | } | 4665 | } |
@@ -5102,6 +5102,22 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5102 | } | 5102 | } |
5103 | } | 5103 | } |
5104 | 5104 | ||
5105 | if (vcpu->guest_debug == 0) { | ||
5106 | u32 cpu_based_vm_exec_control; | ||
5107 | |||
5108 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
5109 | cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING; | ||
5110 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
5111 | |||
5112 | /* | ||
5113 | * No more DR vmexits; force a reload of the debug registers | ||
5114 | * and reenter on this instruction. The next vmexit will | ||
5115 | * retrieve the full state of the debug registers. | ||
5116 | */ | ||
5117 | vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; | ||
5118 | return 1; | ||
5119 | } | ||
5120 | |||
5105 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 5121 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
5106 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | 5122 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; |
5107 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 5123 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
@@ -5128,6 +5144,24 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) | |||
5128 | { | 5144 | { |
5129 | } | 5145 | } |
5130 | 5146 | ||
5147 | static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) | ||
5148 | { | ||
5149 | u32 cpu_based_vm_exec_control; | ||
5150 | |||
5151 | get_debugreg(vcpu->arch.db[0], 0); | ||
5152 | get_debugreg(vcpu->arch.db[1], 1); | ||
5153 | get_debugreg(vcpu->arch.db[2], 2); | ||
5154 | get_debugreg(vcpu->arch.db[3], 3); | ||
5155 | get_debugreg(vcpu->arch.dr6, 6); | ||
5156 | vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); | ||
5157 | |||
5158 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; | ||
5159 | |||
5160 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
5161 | cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING; | ||
5162 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
5163 | } | ||
5164 | |||
5131 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | 5165 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) |
5132 | { | 5166 | { |
5133 | vmcs_writel(GUEST_DR7, val); | 5167 | vmcs_writel(GUEST_DR7, val); |
@@ -5727,6 +5761,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | |||
5727 | */ | 5761 | */ |
5728 | } | 5762 | } |
5729 | 5763 | ||
5764 | static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) | ||
5765 | { | ||
5766 | struct vcpu_vmx *vmx = | ||
5767 | container_of(timer, struct vcpu_vmx, nested.preemption_timer); | ||
5768 | |||
5769 | vmx->nested.preemption_timer_expired = true; | ||
5770 | kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); | ||
5771 | kvm_vcpu_kick(&vmx->vcpu); | ||
5772 | |||
5773 | return HRTIMER_NORESTART; | ||
5774 | } | ||
5775 | |||
5730 | /* | 5776 | /* |
5731 | * Emulate the VMXON instruction. | 5777 | * Emulate the VMXON instruction. |
5732 | * Currently, we just remember that VMX is active, and do not save or even | 5778 | * Currently, we just remember that VMX is active, and do not save or even |
@@ -5791,6 +5837,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5791 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | 5837 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); |
5792 | vmx->nested.vmcs02_num = 0; | 5838 | vmx->nested.vmcs02_num = 0; |
5793 | 5839 | ||
5840 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | ||
5841 | HRTIMER_MODE_REL); | ||
5842 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | ||
5843 | |||
5794 | vmx->nested.vmxon = true; | 5844 | vmx->nested.vmxon = true; |
5795 | 5845 | ||
5796 | skip_emulated_instruction(vcpu); | 5846 | skip_emulated_instruction(vcpu); |
@@ -6767,9 +6817,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6767 | * table is L0's fault. | 6817 | * table is L0's fault. |
6768 | */ | 6818 | */ |
6769 | return 0; | 6819 | return 0; |
6770 | case EXIT_REASON_PREEMPTION_TIMER: | ||
6771 | return vmcs12->pin_based_vm_exec_control & | ||
6772 | PIN_BASED_VMX_PREEMPTION_TIMER; | ||
6773 | case EXIT_REASON_WBINVD: | 6820 | case EXIT_REASON_WBINVD: |
6774 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 6821 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
6775 | case EXIT_REASON_XSETBV: | 6822 | case EXIT_REASON_XSETBV: |
@@ -6785,27 +6832,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | |||
6785 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | 6832 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); |
6786 | } | 6833 | } |
6787 | 6834 | ||
6788 | static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) | ||
6789 | { | ||
6790 | u64 delta_tsc_l1; | ||
6791 | u32 preempt_val_l1, preempt_val_l2, preempt_scale; | ||
6792 | |||
6793 | if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control & | ||
6794 | PIN_BASED_VMX_PREEMPTION_TIMER)) | ||
6795 | return; | ||
6796 | preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & | ||
6797 | MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; | ||
6798 | preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); | ||
6799 | delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc()) | ||
6800 | - vcpu->arch.last_guest_tsc; | ||
6801 | preempt_val_l1 = delta_tsc_l1 >> preempt_scale; | ||
6802 | if (preempt_val_l2 <= preempt_val_l1) | ||
6803 | preempt_val_l2 = 0; | ||
6804 | else | ||
6805 | preempt_val_l2 -= preempt_val_l1; | ||
6806 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); | ||
6807 | } | ||
6808 | |||
6809 | /* | 6835 | /* |
6810 | * The guest has exited. See if we can fix it or if we need userspace | 6836 | * The guest has exited. See if we can fix it or if we need userspace |
6811 | * assistance. | 6837 | * assistance. |
@@ -7052,6 +7078,12 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | |||
7052 | local_irq_enable(); | 7078 | local_irq_enable(); |
7053 | } | 7079 | } |
7054 | 7080 | ||
7081 | static bool vmx_mpx_supported(void) | ||
7082 | { | ||
7083 | return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) && | ||
7084 | (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); | ||
7085 | } | ||
7086 | |||
7055 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | 7087 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
7056 | { | 7088 | { |
7057 | u32 exit_intr_info; | 7089 | u32 exit_intr_info; |
@@ -7218,8 +7250,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
7218 | atomic_switch_perf_msrs(vmx); | 7250 | atomic_switch_perf_msrs(vmx); |
7219 | debugctlmsr = get_debugctlmsr(); | 7251 | debugctlmsr = get_debugctlmsr(); |
7220 | 7252 | ||
7221 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) | ||
7222 | nested_adjust_preemption_timer(vcpu); | ||
7223 | vmx->__launched = vmx->loaded_vmcs->launched; | 7253 | vmx->__launched = vmx->loaded_vmcs->launched; |
7224 | asm( | 7254 | asm( |
7225 | /* Store host registers */ | 7255 | /* Store host registers */ |
@@ -7616,6 +7646,28 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | |||
7616 | kvm_inject_page_fault(vcpu, fault); | 7646 | kvm_inject_page_fault(vcpu, fault); |
7617 | } | 7647 | } |
7618 | 7648 | ||
7649 | static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) | ||
7650 | { | ||
7651 | u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; | ||
7652 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
7653 | |||
7654 | if (vcpu->arch.virtual_tsc_khz == 0) | ||
7655 | return; | ||
7656 | |||
7657 | /* Make sure short timeouts reliably trigger an immediate vmexit. | ||
7658 | * hrtimer_start does not guarantee this. */ | ||
7659 | if (preemption_timeout <= 1) { | ||
7660 | vmx_preemption_timer_fn(&vmx->nested.preemption_timer); | ||
7661 | return; | ||
7662 | } | ||
7663 | |||
7664 | preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; | ||
7665 | preemption_timeout *= 1000000; | ||
7666 | do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); | ||
7667 | hrtimer_start(&vmx->nested.preemption_timer, | ||
7668 | ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); | ||
7669 | } | ||
7670 | |||
7619 | /* | 7671 | /* |
7620 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | 7672 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested |
7621 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | 7673 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it |
@@ -7629,7 +7681,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7629 | { | 7681 | { |
7630 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7682 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7631 | u32 exec_control; | 7683 | u32 exec_control; |
7632 | u32 exit_control; | ||
7633 | 7684 | ||
7634 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); | 7685 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); |
7635 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); | 7686 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); |
@@ -7687,13 +7738,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7687 | 7738 | ||
7688 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | 7739 | vmcs_write64(VMCS_LINK_POINTER, -1ull); |
7689 | 7740 | ||
7690 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, | 7741 | exec_control = vmcs12->pin_based_vm_exec_control; |
7691 | (vmcs_config.pin_based_exec_ctrl | | 7742 | exec_control |= vmcs_config.pin_based_exec_ctrl; |
7692 | vmcs12->pin_based_vm_exec_control)); | 7743 | exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; |
7744 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); | ||
7693 | 7745 | ||
7694 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | 7746 | vmx->nested.preemption_timer_expired = false; |
7695 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, | 7747 | if (nested_cpu_has_preemption_timer(vmcs12)) |
7696 | vmcs12->vmx_preemption_timer_value); | 7748 | vmx_start_preemption_timer(vcpu); |
7697 | 7749 | ||
7698 | /* | 7750 | /* |
7699 | * Whether page-faults are trapped is determined by a combination of | 7751 | * Whether page-faults are trapped is determined by a combination of |
@@ -7721,7 +7773,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7721 | enable_ept ? vmcs12->page_fault_error_code_match : 0); | 7773 | enable_ept ? vmcs12->page_fault_error_code_match : 0); |
7722 | 7774 | ||
7723 | if (cpu_has_secondary_exec_ctrls()) { | 7775 | if (cpu_has_secondary_exec_ctrls()) { |
7724 | u32 exec_control = vmx_secondary_exec_control(vmx); | 7776 | exec_control = vmx_secondary_exec_control(vmx); |
7725 | if (!vmx->rdtscp_enabled) | 7777 | if (!vmx->rdtscp_enabled) |
7726 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | 7778 | exec_control &= ~SECONDARY_EXEC_RDTSCP; |
7727 | /* Take the following fields only from vmcs12 */ | 7779 | /* Take the following fields only from vmcs12 */ |
@@ -7808,10 +7860,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7808 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER | 7860 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER |
7809 | * bits are further modified by vmx_set_efer() below. | 7861 | * bits are further modified by vmx_set_efer() below. |
7810 | */ | 7862 | */ |
7811 | exit_control = vmcs_config.vmexit_ctrl; | 7863 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); |
7812 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | ||
7813 | exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | ||
7814 | vm_exit_controls_init(vmx, exit_control); | ||
7815 | 7864 | ||
7816 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are | 7865 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are |
7817 | * emulated by vmx_set_efer(), below. | 7866 | * emulated by vmx_set_efer(), below. |
@@ -7830,6 +7879,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7830 | 7879 | ||
7831 | set_cr4_guest_host_mask(vmx); | 7880 | set_cr4_guest_host_mask(vmx); |
7832 | 7881 | ||
7882 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) | ||
7883 | vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); | ||
7884 | |||
7833 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | 7885 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
7834 | vmcs_write64(TSC_OFFSET, | 7886 | vmcs_write64(TSC_OFFSET, |
7835 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); | 7887 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); |
@@ -8155,6 +8207,58 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
8155 | } | 8207 | } |
8156 | } | 8208 | } |
8157 | 8209 | ||
8210 | static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) | ||
8211 | { | ||
8212 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
8213 | |||
8214 | if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && | ||
8215 | vmx->nested.preemption_timer_expired) { | ||
8216 | if (vmx->nested.nested_run_pending) | ||
8217 | return -EBUSY; | ||
8218 | nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); | ||
8219 | return 0; | ||
8220 | } | ||
8221 | |||
8222 | if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { | ||
8223 | if (vmx->nested.nested_run_pending || | ||
8224 | vcpu->arch.interrupt.pending) | ||
8225 | return -EBUSY; | ||
8226 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, | ||
8227 | NMI_VECTOR | INTR_TYPE_NMI_INTR | | ||
8228 | INTR_INFO_VALID_MASK, 0); | ||
8229 | /* | ||
8230 | * The NMI-triggered VM exit counts as injection: | ||
8231 | * clear this one and block further NMIs. | ||
8232 | */ | ||
8233 | vcpu->arch.nmi_pending = 0; | ||
8234 | vmx_set_nmi_mask(vcpu, true); | ||
8235 | return 0; | ||
8236 | } | ||
8237 | |||
8238 | if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && | ||
8239 | nested_exit_on_intr(vcpu)) { | ||
8240 | if (vmx->nested.nested_run_pending) | ||
8241 | return -EBUSY; | ||
8242 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); | ||
8243 | } | ||
8244 | |||
8245 | return 0; | ||
8246 | } | ||
8247 | |||
8248 | static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) | ||
8249 | { | ||
8250 | ktime_t remaining = | ||
8251 | hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer); | ||
8252 | u64 value; | ||
8253 | |||
8254 | if (ktime_to_ns(remaining) <= 0) | ||
8255 | return 0; | ||
8256 | |||
8257 | value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz; | ||
8258 | do_div(value, 1000000); | ||
8259 | return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; | ||
8260 | } | ||
8261 | |||
8158 | /* | 8262 | /* |
8159 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | 8263 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits |
8160 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | 8264 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), |
@@ -8225,10 +8329,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
8225 | else | 8329 | else |
8226 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; | 8330 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; |
8227 | 8331 | ||
8228 | if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && | 8332 | if (nested_cpu_has_preemption_timer(vmcs12)) { |
8229 | (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) | 8333 | if (vmcs12->vm_exit_controls & |
8230 | vmcs12->vmx_preemption_timer_value = | 8334 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) |
8231 | vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); | 8335 | vmcs12->vmx_preemption_timer_value = |
8336 | vmx_get_preemption_timer_value(vcpu); | ||
8337 | hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); | ||
8338 | } | ||
8232 | 8339 | ||
8233 | /* | 8340 | /* |
8234 | * In some cases (usually, nested EPT), L2 is allowed to change its | 8341 | * In some cases (usually, nested EPT), L2 is allowed to change its |
@@ -8260,6 +8367,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
8260 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | 8367 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); |
8261 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | 8368 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); |
8262 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | 8369 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); |
8370 | if (vmx_mpx_supported()) | ||
8371 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | ||
8263 | 8372 | ||
8264 | /* update exit information fields: */ | 8373 | /* update exit information fields: */ |
8265 | 8374 | ||
@@ -8369,6 +8478,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8369 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); | 8478 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); |
8370 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); | 8479 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); |
8371 | 8480 | ||
8481 | /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */ | ||
8482 | if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) | ||
8483 | vmcs_write64(GUEST_BNDCFGS, 0); | ||
8484 | |||
8372 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { | 8485 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { |
8373 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); | 8486 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); |
8374 | vcpu->arch.pat = vmcs12->host_ia32_pat; | 8487 | vcpu->arch.pat = vmcs12->host_ia32_pat; |
@@ -8495,6 +8608,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
8495 | nested_vmx_succeed(vcpu); | 8608 | nested_vmx_succeed(vcpu); |
8496 | if (enable_shadow_vmcs) | 8609 | if (enable_shadow_vmcs) |
8497 | vmx->nested.sync_shadow_vmcs = true; | 8610 | vmx->nested.sync_shadow_vmcs = true; |
8611 | |||
8612 | /* in case we halted in L2 */ | ||
8613 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
8498 | } | 8614 | } |
8499 | 8615 | ||
8500 | /* | 8616 | /* |
@@ -8573,6 +8689,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8573 | .get_dr6 = vmx_get_dr6, | 8689 | .get_dr6 = vmx_get_dr6, |
8574 | .set_dr6 = vmx_set_dr6, | 8690 | .set_dr6 = vmx_set_dr6, |
8575 | .set_dr7 = vmx_set_dr7, | 8691 | .set_dr7 = vmx_set_dr7, |
8692 | .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, | ||
8576 | .cache_reg = vmx_cache_reg, | 8693 | .cache_reg = vmx_cache_reg, |
8577 | .get_rflags = vmx_get_rflags, | 8694 | .get_rflags = vmx_get_rflags, |
8578 | .set_rflags = vmx_set_rflags, | 8695 | .set_rflags = vmx_set_rflags, |
@@ -8634,6 +8751,9 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8634 | 8751 | ||
8635 | .check_intercept = vmx_check_intercept, | 8752 | .check_intercept = vmx_check_intercept, |
8636 | .handle_external_intr = vmx_handle_external_intr, | 8753 | .handle_external_intr = vmx_handle_external_intr, |
8754 | .mpx_supported = vmx_mpx_supported, | ||
8755 | |||
8756 | .check_nested_events = vmx_check_nested_events, | ||
8637 | }; | 8757 | }; |
8638 | 8758 | ||
8639 | static int __init vmx_init(void) | 8759 | static int __init vmx_init(void) |
@@ -8721,6 +8841,8 @@ static int __init vmx_init(void) | |||
8721 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | 8841 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
8722 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | 8842 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
8723 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 8843 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
8844 | vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); | ||
8845 | |||
8724 | memcpy(vmx_msr_bitmap_legacy_x2apic, | 8846 | memcpy(vmx_msr_bitmap_legacy_x2apic, |
8725 | vmx_msr_bitmap_legacy, PAGE_SIZE); | 8847 | vmx_msr_bitmap_legacy, PAGE_SIZE); |
8726 | memcpy(vmx_msr_bitmap_longmode_x2apic, | 8848 | memcpy(vmx_msr_bitmap_longmode_x2apic, |