aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c352
1 files changed, 272 insertions, 80 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 03f574641852..5a87a58af49d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
84static bool __read_mostly fasteoi = 1; 84static bool __read_mostly fasteoi = 1;
85module_param(fasteoi, bool, S_IRUGO); 85module_param(fasteoi, bool, S_IRUGO);
86 86
87static bool __read_mostly enable_apicv_reg_vid; 87static bool __read_mostly enable_apicv = 1;
88module_param(enable_apicv, bool, S_IRUGO);
88 89
89/* 90/*
90 * If nested=1, nested virtualization is supported, i.e., guests may use 91 * If nested=1, nested virtualization is supported, i.e., guests may use
@@ -366,6 +367,31 @@ struct nested_vmx {
366 struct page *apic_access_page; 367 struct page *apic_access_page;
367}; 368};
368 369
370#define POSTED_INTR_ON 0
371/* Posted-Interrupt Descriptor */
372struct pi_desc {
373 u32 pir[8]; /* Posted interrupt requested */
374 u32 control; /* bit 0 of control is outstanding notification bit */
375 u32 rsvd[7];
376} __aligned(64);
377
378static bool pi_test_and_set_on(struct pi_desc *pi_desc)
379{
380 return test_and_set_bit(POSTED_INTR_ON,
381 (unsigned long *)&pi_desc->control);
382}
383
384static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
385{
386 return test_and_clear_bit(POSTED_INTR_ON,
387 (unsigned long *)&pi_desc->control);
388}
389
390static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
391{
392 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
393}
394
369struct vcpu_vmx { 395struct vcpu_vmx {
370 struct kvm_vcpu vcpu; 396 struct kvm_vcpu vcpu;
371 unsigned long host_rsp; 397 unsigned long host_rsp;
@@ -378,6 +404,7 @@ struct vcpu_vmx {
378 struct shared_msr_entry *guest_msrs; 404 struct shared_msr_entry *guest_msrs;
379 int nmsrs; 405 int nmsrs;
380 int save_nmsrs; 406 int save_nmsrs;
407 unsigned long host_idt_base;
381#ifdef CONFIG_X86_64 408#ifdef CONFIG_X86_64
382 u64 msr_host_kernel_gs_base; 409 u64 msr_host_kernel_gs_base;
383 u64 msr_guest_kernel_gs_base; 410 u64 msr_guest_kernel_gs_base;
@@ -429,6 +456,9 @@ struct vcpu_vmx {
429 456
430 bool rdtscp_enabled; 457 bool rdtscp_enabled;
431 458
459 /* Posted interrupt descriptor */
460 struct pi_desc pi_desc;
461
432 /* Support for a guest hypervisor (nested VMX) */ 462 /* Support for a guest hypervisor (nested VMX) */
433 struct nested_vmx nested; 463 struct nested_vmx nested;
434}; 464};
@@ -626,6 +656,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
626 struct kvm_segment *var, int seg); 656 struct kvm_segment *var, int seg);
627static bool guest_state_valid(struct kvm_vcpu *vcpu); 657static bool guest_state_valid(struct kvm_vcpu *vcpu);
628static u32 vmx_segment_access_rights(struct kvm_segment *var); 658static u32 vmx_segment_access_rights(struct kvm_segment *var);
659static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
629 660
630static DEFINE_PER_CPU(struct vmcs *, vmxarea); 661static DEFINE_PER_CPU(struct vmcs *, vmxarea);
631static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 662static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -784,6 +815,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
784 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; 815 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
785} 816}
786 817
818static inline bool cpu_has_vmx_posted_intr(void)
819{
820 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
821}
822
823static inline bool cpu_has_vmx_apicv(void)
824{
825 return cpu_has_vmx_apic_register_virt() &&
826 cpu_has_vmx_virtual_intr_delivery() &&
827 cpu_has_vmx_posted_intr();
828}
829
787static inline bool cpu_has_vmx_flexpriority(void) 830static inline bool cpu_has_vmx_flexpriority(void)
788{ 831{
789 return cpu_has_vmx_tpr_shadow() && 832 return cpu_has_vmx_tpr_shadow() &&
@@ -2551,12 +2594,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2551 u32 _vmexit_control = 0; 2594 u32 _vmexit_control = 0;
2552 u32 _vmentry_control = 0; 2595 u32 _vmentry_control = 0;
2553 2596
2554 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
2555 opt = PIN_BASED_VIRTUAL_NMIS;
2556 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
2557 &_pin_based_exec_control) < 0)
2558 return -EIO;
2559
2560 min = CPU_BASED_HLT_EXITING | 2597 min = CPU_BASED_HLT_EXITING |
2561#ifdef CONFIG_X86_64 2598#ifdef CONFIG_X86_64
2562 CPU_BASED_CR8_LOAD_EXITING | 2599 CPU_BASED_CR8_LOAD_EXITING |
@@ -2627,11 +2664,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2627#ifdef CONFIG_X86_64 2664#ifdef CONFIG_X86_64
2628 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 2665 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
2629#endif 2666#endif
2630 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; 2667 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
2668 VM_EXIT_ACK_INTR_ON_EXIT;
2631 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 2669 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
2632 &_vmexit_control) < 0) 2670 &_vmexit_control) < 0)
2633 return -EIO; 2671 return -EIO;
2634 2672
2673 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
2674 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
2675 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
2676 &_pin_based_exec_control) < 0)
2677 return -EIO;
2678
2679 if (!(_cpu_based_2nd_exec_control &
2680 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
2681 !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
2682 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
2683
2635 min = 0; 2684 min = 0;
2636 opt = VM_ENTRY_LOAD_IA32_PAT; 2685 opt = VM_ENTRY_LOAD_IA32_PAT;
2637 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 2686 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -2810,14 +2859,16 @@ static __init int hardware_setup(void)
2810 if (!cpu_has_vmx_ple()) 2859 if (!cpu_has_vmx_ple())
2811 ple_gap = 0; 2860 ple_gap = 0;
2812 2861
2813 if (!cpu_has_vmx_apic_register_virt() || 2862 if (!cpu_has_vmx_apicv())
2814 !cpu_has_vmx_virtual_intr_delivery()) 2863 enable_apicv = 0;
2815 enable_apicv_reg_vid = 0;
2816 2864
2817 if (enable_apicv_reg_vid) 2865 if (enable_apicv)
2818 kvm_x86_ops->update_cr8_intercept = NULL; 2866 kvm_x86_ops->update_cr8_intercept = NULL;
2819 else 2867 else {
2820 kvm_x86_ops->hwapic_irr_update = NULL; 2868 kvm_x86_ops->hwapic_irr_update = NULL;
2869 kvm_x86_ops->deliver_posted_interrupt = NULL;
2870 kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
2871 }
2821 2872
2822 if (nested) 2873 if (nested)
2823 nested_vmx_setup_ctls_msrs(); 2874 nested_vmx_setup_ctls_msrs();
@@ -3873,13 +3924,57 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
3873 msr, MSR_TYPE_W); 3924 msr, MSR_TYPE_W);
3874} 3925}
3875 3926
3927static int vmx_vm_has_apicv(struct kvm *kvm)
3928{
3929 return enable_apicv && irqchip_in_kernel(kvm);
3930}
3931
3932/*
3933 * Send interrupt to vcpu via posted interrupt way.
3934 * 1. If target vcpu is running(non-root mode), send posted interrupt
3935 * notification to vcpu and hardware will sync PIR to vIRR atomically.
3936 * 2. If target vcpu isn't running(root mode), kick it to pick up the
3937 * interrupt from PIR in next vmentry.
3938 */
3939static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
3940{
3941 struct vcpu_vmx *vmx = to_vmx(vcpu);
3942 int r;
3943
3944 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
3945 return;
3946
3947 r = pi_test_and_set_on(&vmx->pi_desc);
3948 kvm_make_request(KVM_REQ_EVENT, vcpu);
3949 if (!r && (vcpu->mode == IN_GUEST_MODE))
3950 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
3951 POSTED_INTR_VECTOR);
3952 else
3953 kvm_vcpu_kick(vcpu);
3954}
3955
3956static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
3957{
3958 struct vcpu_vmx *vmx = to_vmx(vcpu);
3959
3960 if (!pi_test_and_clear_on(&vmx->pi_desc))
3961 return;
3962
3963 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
3964}
3965
3966static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
3967{
3968 return;
3969}
3970
3876/* 3971/*
3877 * Set up the vmcs's constant host-state fields, i.e., host-state fields that 3972 * Set up the vmcs's constant host-state fields, i.e., host-state fields that
3878 * will not change in the lifetime of the guest. 3973 * will not change in the lifetime of the guest.
3879 * Note that host-state that does change is set elsewhere. E.g., host-state 3974 * Note that host-state that does change is set elsewhere. E.g., host-state
3880 * that is set differently for each CPU is set in vmx_vcpu_load(), not here. 3975 * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
3881 */ 3976 */
3882static void vmx_set_constant_host_state(void) 3977static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
3883{ 3978{
3884 u32 low32, high32; 3979 u32 low32, high32;
3885 unsigned long tmpl; 3980 unsigned long tmpl;
@@ -3907,6 +4002,7 @@ static void vmx_set_constant_host_state(void)
3907 4002
3908 native_store_idt(&dt); 4003 native_store_idt(&dt);
3909 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ 4004 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
4005 vmx->host_idt_base = dt.address;
3910 4006
3911 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ 4007 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
3912 4008
@@ -3932,6 +4028,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
3932 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); 4028 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
3933} 4029}
3934 4030
4031static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
4032{
4033 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
4034
4035 if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
4036 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
4037 return pin_based_exec_ctrl;
4038}
4039
3935static u32 vmx_exec_control(struct vcpu_vmx *vmx) 4040static u32 vmx_exec_control(struct vcpu_vmx *vmx)
3936{ 4041{
3937 u32 exec_control = vmcs_config.cpu_based_exec_ctrl; 4042 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -3949,11 +4054,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
3949 return exec_control; 4054 return exec_control;
3950} 4055}
3951 4056
3952static int vmx_vm_has_apicv(struct kvm *kvm)
3953{
3954 return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
3955}
3956
3957static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) 4057static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
3958{ 4058{
3959 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; 4059 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -4009,8 +4109,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4009 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ 4109 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
4010 4110
4011 /* Control */ 4111 /* Control */
4012 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, 4112 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
4013 vmcs_config.pin_based_exec_ctrl);
4014 4113
4015 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); 4114 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
4016 4115
@@ -4019,13 +4118,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4019 vmx_secondary_exec_control(vmx)); 4118 vmx_secondary_exec_control(vmx));
4020 } 4119 }
4021 4120
4022 if (enable_apicv_reg_vid) { 4121 if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
4023 vmcs_write64(EOI_EXIT_BITMAP0, 0); 4122 vmcs_write64(EOI_EXIT_BITMAP0, 0);
4024 vmcs_write64(EOI_EXIT_BITMAP1, 0); 4123 vmcs_write64(EOI_EXIT_BITMAP1, 0);
4025 vmcs_write64(EOI_EXIT_BITMAP2, 0); 4124 vmcs_write64(EOI_EXIT_BITMAP2, 0);
4026 vmcs_write64(EOI_EXIT_BITMAP3, 0); 4125 vmcs_write64(EOI_EXIT_BITMAP3, 0);
4027 4126
4028 vmcs_write16(GUEST_INTR_STATUS, 0); 4127 vmcs_write16(GUEST_INTR_STATUS, 0);
4128
4129 vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
4130 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
4029 } 4131 }
4030 4132
4031 if (ple_gap) { 4133 if (ple_gap) {
@@ -4039,7 +4141,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4039 4141
4040 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ 4142 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
4041 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ 4143 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
4042 vmx_set_constant_host_state(); 4144 vmx_set_constant_host_state(vmx);
4043#ifdef CONFIG_X86_64 4145#ifdef CONFIG_X86_64
4044 rdmsrl(MSR_FS_BASE, a); 4146 rdmsrl(MSR_FS_BASE, a);
4045 vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ 4147 vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -4167,6 +4269,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4167 vmcs_write64(APIC_ACCESS_ADDR, 4269 vmcs_write64(APIC_ACCESS_ADDR,
4168 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); 4270 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
4169 4271
4272 if (vmx_vm_has_apicv(vcpu->kvm))
4273 memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
4274
4170 if (vmx->vpid != 0) 4275 if (vmx->vpid != 0)
4171 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 4276 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
4172 4277
@@ -4325,16 +4430,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4325 4430
4326static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4431static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4327{ 4432{
4328 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { 4433 if (is_guest_mode(vcpu)) {
4329 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 4434 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4330 if (to_vmx(vcpu)->nested.nested_run_pending || 4435
4331 (vmcs12->idt_vectoring_info_field & 4436 if (to_vmx(vcpu)->nested.nested_run_pending)
4332 VECTORING_INFO_VALID_MASK))
4333 return 0; 4437 return 0;
4334 nested_vmx_vmexit(vcpu); 4438 if (nested_exit_on_intr(vcpu)) {
4335 vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; 4439 nested_vmx_vmexit(vcpu);
4336 vmcs12->vm_exit_intr_info = 0; 4440 vmcs12->vm_exit_reason =
4337 /* fall through to normal code, but now in L1, not L2 */ 4441 EXIT_REASON_EXTERNAL_INTERRUPT;
4442 vmcs12->vm_exit_intr_info = 0;
4443 /*
4444 * fall through to normal code, but now in L1, not L2
4445 */
4446 }
4338 } 4447 }
4339 4448
4340 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && 4449 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -5189,7 +5298,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5189 if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) 5298 if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
5190 return 1; 5299 return 1;
5191 5300
5192 err = emulate_instruction(vcpu, 0); 5301 err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
5193 5302
5194 if (err == EMULATE_DO_MMIO) { 5303 if (err == EMULATE_DO_MMIO) {
5195 ret = 0; 5304 ret = 0;
@@ -6112,14 +6221,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6112 case EXIT_REASON_TRIPLE_FAULT: 6221 case EXIT_REASON_TRIPLE_FAULT:
6113 return 1; 6222 return 1;
6114 case EXIT_REASON_PENDING_INTERRUPT: 6223 case EXIT_REASON_PENDING_INTERRUPT:
6224 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
6115 case EXIT_REASON_NMI_WINDOW: 6225 case EXIT_REASON_NMI_WINDOW:
6116 /* 6226 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
6117 * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit
6118 * (aka Interrupt Window Exiting) only when L1 turned it on,
6119 * so if we got a PENDING_INTERRUPT exit, this must be for L1.
6120 * Same for NMI Window Exiting.
6121 */
6122 return 1;
6123 case EXIT_REASON_TASK_SWITCH: 6227 case EXIT_REASON_TASK_SWITCH:
6124 return 1; 6228 return 1;
6125 case EXIT_REASON_CPUID: 6229 case EXIT_REASON_CPUID:
@@ -6370,6 +6474,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6370 6474
6371static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 6475static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6372{ 6476{
6477 if (!vmx_vm_has_apicv(vcpu->kvm))
6478 return;
6479
6373 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); 6480 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6374 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); 6481 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6375 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); 6482 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
@@ -6400,6 +6507,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
6400 } 6507 }
6401} 6508}
6402 6509
6510static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
6511{
6512 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6513
6514 /*
6515 * If external interrupt exists, IF bit is set in rflags/eflags on the
6516 * interrupt stack frame, and interrupt will be enabled on a return
6517 * from interrupt handler.
6518 */
6519 if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
6520 == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
6521 unsigned int vector;
6522 unsigned long entry;
6523 gate_desc *desc;
6524 struct vcpu_vmx *vmx = to_vmx(vcpu);
6525#ifdef CONFIG_X86_64
6526 unsigned long tmp;
6527#endif
6528
6529 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
6530 desc = (gate_desc *)vmx->host_idt_base + vector;
6531 entry = gate_offset(*desc);
6532 asm volatile(
6533#ifdef CONFIG_X86_64
6534 "mov %%" _ASM_SP ", %[sp]\n\t"
6535 "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
6536 "push $%c[ss]\n\t"
6537 "push %[sp]\n\t"
6538#endif
6539 "pushf\n\t"
6540 "orl $0x200, (%%" _ASM_SP ")\n\t"
6541 __ASM_SIZE(push) " $%c[cs]\n\t"
6542 "call *%[entry]\n\t"
6543 :
6544#ifdef CONFIG_X86_64
6545 [sp]"=&r"(tmp)
6546#endif
6547 :
6548 [entry]"r"(entry),
6549 [ss]"i"(__KERNEL_DS),
6550 [cs]"i"(__KERNEL_CS)
6551 );
6552 } else
6553 local_irq_enable();
6554}
6555
6403static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 6556static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
6404{ 6557{
6405 u32 exit_intr_info; 6558 u32 exit_intr_info;
@@ -6498,8 +6651,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
6498 6651
6499static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 6652static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
6500{ 6653{
6501 if (is_guest_mode(&vmx->vcpu))
6502 return;
6503 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, 6654 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
6504 VM_EXIT_INSTRUCTION_LEN, 6655 VM_EXIT_INSTRUCTION_LEN,
6505 IDT_VECTORING_ERROR_CODE); 6656 IDT_VECTORING_ERROR_CODE);
@@ -6507,8 +6658,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
6507 6658
6508static void vmx_cancel_injection(struct kvm_vcpu *vcpu) 6659static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
6509{ 6660{
6510 if (is_guest_mode(vcpu))
6511 return;
6512 __vmx_complete_interrupts(vcpu, 6661 __vmx_complete_interrupts(vcpu,
6513 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), 6662 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6514 VM_ENTRY_INSTRUCTION_LEN, 6663 VM_ENTRY_INSTRUCTION_LEN,
@@ -6540,21 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6540 struct vcpu_vmx *vmx = to_vmx(vcpu); 6689 struct vcpu_vmx *vmx = to_vmx(vcpu);
6541 unsigned long debugctlmsr; 6690 unsigned long debugctlmsr;
6542 6691
6543 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
6544 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6545 if (vmcs12->idt_vectoring_info_field &
6546 VECTORING_INFO_VALID_MASK) {
6547 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
6548 vmcs12->idt_vectoring_info_field);
6549 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
6550 vmcs12->vm_exit_instruction_len);
6551 if (vmcs12->idt_vectoring_info_field &
6552 VECTORING_INFO_DELIVER_CODE_MASK)
6553 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
6554 vmcs12->idt_vectoring_error_code);
6555 }
6556 }
6557
6558 /* Record the guest's net vcpu time for enforced NMI injections. */ 6692 /* Record the guest's net vcpu time for enforced NMI injections. */
6559 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) 6693 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
6560 vmx->entry_time = ktime_get(); 6694 vmx->entry_time = ktime_get();
@@ -6713,17 +6847,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6713 6847
6714 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 6848 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
6715 6849
6716 if (is_guest_mode(vcpu)) {
6717 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6718 vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
6719 if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
6720 vmcs12->idt_vectoring_error_code =
6721 vmcs_read32(IDT_VECTORING_ERROR_CODE);
6722 vmcs12->vm_exit_instruction_len =
6723 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
6724 }
6725 }
6726
6727 vmx->loaded_vmcs->launched = 1; 6850 vmx->loaded_vmcs->launched = 1;
6728 6851
6729 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 6852 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6785,10 +6908,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
6785 put_cpu(); 6908 put_cpu();
6786 if (err) 6909 if (err)
6787 goto free_vmcs; 6910 goto free_vmcs;
6788 if (vm_need_virtualize_apic_accesses(kvm)) 6911 if (vm_need_virtualize_apic_accesses(kvm)) {
6789 err = alloc_apic_access_page(kvm); 6912 err = alloc_apic_access_page(kvm);
6790 if (err) 6913 if (err)
6791 goto free_vmcs; 6914 goto free_vmcs;
6915 }
6792 6916
6793 if (enable_ept) { 6917 if (enable_ept) {
6794 if (!kvm->arch.ept_identity_map_addr) 6918 if (!kvm->arch.ept_identity_map_addr)
@@ -7071,7 +7195,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7071 * Other fields are different per CPU, and will be set later when 7195 * Other fields are different per CPU, and will be set later when
7072 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. 7196 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
7073 */ 7197 */
7074 vmx_set_constant_host_state(); 7198 vmx_set_constant_host_state(vmx);
7075 7199
7076 /* 7200 /*
7077 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before 7201 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -7330,6 +7454,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7330 vcpu->arch.cr4_guest_owned_bits)); 7454 vcpu->arch.cr4_guest_owned_bits));
7331} 7455}
7332 7456
7457static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
7458 struct vmcs12 *vmcs12)
7459{
7460 u32 idt_vectoring;
7461 unsigned int nr;
7462
7463 if (vcpu->arch.exception.pending) {
7464 nr = vcpu->arch.exception.nr;
7465 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
7466
7467 if (kvm_exception_is_soft(nr)) {
7468 vmcs12->vm_exit_instruction_len =
7469 vcpu->arch.event_exit_inst_len;
7470 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
7471 } else
7472 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
7473
7474 if (vcpu->arch.exception.has_error_code) {
7475 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
7476 vmcs12->idt_vectoring_error_code =
7477 vcpu->arch.exception.error_code;
7478 }
7479
7480 vmcs12->idt_vectoring_info_field = idt_vectoring;
7481 } else if (vcpu->arch.nmi_pending) {
7482 vmcs12->idt_vectoring_info_field =
7483 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
7484 } else if (vcpu->arch.interrupt.pending) {
7485 nr = vcpu->arch.interrupt.nr;
7486 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
7487
7488 if (vcpu->arch.interrupt.soft) {
7489 idt_vectoring |= INTR_TYPE_SOFT_INTR;
7490 vmcs12->vm_entry_instruction_len =
7491 vcpu->arch.event_exit_inst_len;
7492 } else
7493 idt_vectoring |= INTR_TYPE_EXT_INTR;
7494
7495 vmcs12->idt_vectoring_info_field = idt_vectoring;
7496 }
7497}
7498
7333/* 7499/*
7334 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits 7500 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
7335 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), 7501 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -7402,7 +7568,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7402 /* TODO: These cannot have changed unless we have MSR bitmaps and 7568 /* TODO: These cannot have changed unless we have MSR bitmaps and
7403 * the relevant bit asks not to trap the change */ 7569 * the relevant bit asks not to trap the change */
7404 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 7570 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
7405 if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) 7571 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
7406 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 7572 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
7407 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 7573 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
7408 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 7574 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
@@ -7414,16 +7580,34 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7414 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 7580 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7415 7581
7416 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 7582 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7417 vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 7583 if ((vmcs12->vm_exit_intr_info &
7418 vmcs12->idt_vectoring_info_field = to_vmx(vcpu)->idt_vectoring_info; 7584 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
7419 vmcs12->idt_vectoring_error_code = 7585 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
7420 vmcs_read32(IDT_VECTORING_ERROR_CODE); 7586 vmcs12->vm_exit_intr_error_code =
7587 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
7588 vmcs12->idt_vectoring_info_field = 0;
7421 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 7589 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
7422 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 7590 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
7423 7591
7424 /* clear vm-entry fields which are to be cleared on exit */ 7592 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
7425 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) 7593 /* vm_entry_intr_info_field is cleared on exit. Emulate this
7594 * instead of reading the real value. */
7426 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; 7595 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
7596
7597 /*
7598 * Transfer the event that L0 or L1 may wanted to inject into
7599 * L2 to IDT_VECTORING_INFO_FIELD.
7600 */
7601 vmcs12_save_pending_event(vcpu, vmcs12);
7602 }
7603
7604 /*
7605 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
7606 * preserved above and would only end up incorrectly in L1.
7607 */
7608 vcpu->arch.nmi_injected = false;
7609 kvm_clear_exception_queue(vcpu);
7610 kvm_clear_interrupt_queue(vcpu);
7427} 7611}
7428 7612
7429/* 7613/*
@@ -7523,6 +7707,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
7523 int cpu; 7707 int cpu;
7524 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 7708 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7525 7709
7710 /* trying to cancel vmlaunch/vmresume is a bug */
7711 WARN_ON_ONCE(vmx->nested.nested_run_pending);
7712
7526 leave_guest_mode(vcpu); 7713 leave_guest_mode(vcpu);
7527 prepare_vmcs12(vcpu, vmcs12); 7714 prepare_vmcs12(vcpu, vmcs12);
7528 7715
@@ -7657,6 +7844,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
7657 .load_eoi_exitmap = vmx_load_eoi_exitmap, 7844 .load_eoi_exitmap = vmx_load_eoi_exitmap,
7658 .hwapic_irr_update = vmx_hwapic_irr_update, 7845 .hwapic_irr_update = vmx_hwapic_irr_update,
7659 .hwapic_isr_update = vmx_hwapic_isr_update, 7846 .hwapic_isr_update = vmx_hwapic_isr_update,
7847 .sync_pir_to_irr = vmx_sync_pir_to_irr,
7848 .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
7660 7849
7661 .set_tss_addr = vmx_set_tss_addr, 7850 .set_tss_addr = vmx_set_tss_addr,
7662 .get_tdp_level = get_ept_level, 7851 .get_tdp_level = get_ept_level,
@@ -7685,6 +7874,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
7685 .set_tdp_cr3 = vmx_set_cr3, 7874 .set_tdp_cr3 = vmx_set_cr3,
7686 7875
7687 .check_intercept = vmx_check_intercept, 7876 .check_intercept = vmx_check_intercept,
7877 .handle_external_intr = vmx_handle_external_intr,
7688}; 7878};
7689 7879
7690static int __init vmx_init(void) 7880static int __init vmx_init(void)
@@ -7741,7 +7931,7 @@ static int __init vmx_init(void)
7741 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), 7931 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
7742 __alignof__(struct vcpu_vmx), THIS_MODULE); 7932 __alignof__(struct vcpu_vmx), THIS_MODULE);
7743 if (r) 7933 if (r)
7744 goto out3; 7934 goto out5;
7745 7935
7746#ifdef CONFIG_KEXEC 7936#ifdef CONFIG_KEXEC
7747 rcu_assign_pointer(crash_vmclear_loaded_vmcss, 7937 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -7759,7 +7949,7 @@ static int __init vmx_init(void)
7759 memcpy(vmx_msr_bitmap_longmode_x2apic, 7949 memcpy(vmx_msr_bitmap_longmode_x2apic,
7760 vmx_msr_bitmap_longmode, PAGE_SIZE); 7950 vmx_msr_bitmap_longmode, PAGE_SIZE);
7761 7951
7762 if (enable_apicv_reg_vid) { 7952 if (enable_apicv) {
7763 for (msr = 0x800; msr <= 0x8ff; msr++) 7953 for (msr = 0x800; msr <= 0x8ff; msr++)
7764 vmx_disable_intercept_msr_read_x2apic(msr); 7954 vmx_disable_intercept_msr_read_x2apic(msr);
7765 7955
@@ -7789,6 +7979,8 @@ static int __init vmx_init(void)
7789 7979
7790 return 0; 7980 return 0;
7791 7981
7982out5:
7983 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
7792out4: 7984out4:
7793 free_page((unsigned long)vmx_msr_bitmap_longmode); 7985 free_page((unsigned long)vmx_msr_bitmap_longmode);
7794out3: 7986out3: