aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2017-07-27 06:29:32 -0400
committerRadim Krčmář <rkrcmar@redhat.com>2017-08-01 16:24:17 -0400
commitb96fb439774e1bfb7d027ad324fa48606167cb52 (patch)
tree58e3ba4e92cb0e59065aa75423f44937c213ea8d
parent7313c698050387a11c21afb0c6b4c61f21f7c042 (diff)
KVM: nVMX: fixes to nested virt interrupt injection
There are three issues in nested_vmx_check_exception: 1) it is not taking PFEC_MATCH/PFEC_MASK into account, as reported by Wanpeng Li; 2) it should rebuild the interruption info and exit qualification fields from scratch, as reported by Jim Mattson, because the values from the L2->L0 vmexit may be invalid (e.g. if an emulated instruction causes a page fault, the EPT misconfig's exit qualification is incorrect). 3) CR2 and DR6 should not be written for exception intercept vmexits (CR2 only for AMD). This patch fixes the first two and adds a comment about the last, outlining the fix. Cc: Jim Mattson <jmattson@google.com> Cc: Wanpeng Li <wanpeng.li@hotmail.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/kvm/svm.c10
-rw-r--r--arch/x86/kvm/vmx.c87
2 files changed, 72 insertions, 25 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4d8141e533c3..1107626938cc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2430,6 +2430,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
2430 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 2430 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
2431 svm->vmcb->control.exit_code_hi = 0; 2431 svm->vmcb->control.exit_code_hi = 0;
2432 svm->vmcb->control.exit_info_1 = error_code; 2432 svm->vmcb->control.exit_info_1 = error_code;
2433
2434 /*
2435 * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
2436 * The fix is to add the ancillary datum (CR2 or DR6) to structs
2437 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
2438 * written only when inject_pending_event runs (DR6 would written here
2439 * too). This should be conditional on a new capability---if the
2440 * capability is disabled, kvm_multiple_exception would write the
2441 * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
2442 */
2433 if (svm->vcpu.arch.exception.nested_apf) 2443 if (svm->vcpu.arch.exception.nested_apf)
2434 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; 2444 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
2435 else 2445 else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 19465b73cc07..714d4364ef87 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -927,6 +927,10 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
927static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 927static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
928static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 928static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
929static int alloc_identity_pagetable(struct kvm *kvm); 929static int alloc_identity_pagetable(struct kvm *kvm);
930static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
931static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
932static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
933 u16 error_code);
930 934
931static DEFINE_PER_CPU(struct vmcs *, vmxarea); 935static DEFINE_PER_CPU(struct vmcs *, vmxarea);
932static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 936static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2428,6 +2432,30 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
2428 vmx_set_interrupt_shadow(vcpu, 0); 2432 vmx_set_interrupt_shadow(vcpu, 0);
2429} 2433}
2430 2434
2435static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
2436 unsigned long exit_qual)
2437{
2438 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2439 unsigned int nr = vcpu->arch.exception.nr;
2440 u32 intr_info = nr | INTR_INFO_VALID_MASK;
2441
2442 if (vcpu->arch.exception.has_error_code) {
2443 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
2444 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
2445 }
2446
2447 if (kvm_exception_is_soft(nr))
2448 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
2449 else
2450 intr_info |= INTR_TYPE_HARD_EXCEPTION;
2451
2452 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
2453 vmx_get_nmi_mask(vcpu))
2454 intr_info |= INTR_INFO_UNBLOCK_NMI;
2455
2456 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
2457}
2458
2431/* 2459/*
2432 * KVM wants to inject page-faults which it got to the guest. This function 2460 * KVM wants to inject page-faults which it got to the guest. This function
2433 * checks whether in a nested guest, we need to inject them to L1 or L2. 2461 * checks whether in a nested guest, we need to inject them to L1 or L2.
@@ -2437,24 +2465,38 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
2437 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 2465 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2438 unsigned int nr = vcpu->arch.exception.nr; 2466 unsigned int nr = vcpu->arch.exception.nr;
2439 2467
2440 if (!((vmcs12->exception_bitmap & (1u << nr)) || 2468 if (nr == PF_VECTOR) {
2441 (nr == PF_VECTOR && vcpu->arch.exception.nested_apf))) 2469 if (vcpu->arch.exception.nested_apf) {
2442 return 0; 2470 nested_vmx_inject_exception_vmexit(vcpu,
2471 vcpu->arch.apf.nested_apf_token);
2472 return 1;
2473 }
2474 /*
2475 * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
2476 * The fix is to add the ancillary datum (CR2 or DR6) to structs
2477 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
2478 * can be written only when inject_pending_event runs. This should be
2479 * conditional on a new capability---if the capability is disabled,
2480 * kvm_multiple_exception would write the ancillary information to
2481 * CR2 or DR6, for backwards ABI-compatibility.
2482 */
2483 if (nested_vmx_is_page_fault_vmexit(vmcs12,
2484 vcpu->arch.exception.error_code)) {
2485 nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2);
2486 return 1;
2487 }
2488 } else {
2489 unsigned long exit_qual = 0;
2490 if (nr == DB_VECTOR)
2491 exit_qual = vcpu->arch.dr6;
2443 2492
2444 if (vcpu->arch.exception.nested_apf) { 2493 if (vmcs12->exception_bitmap & (1u << nr)) {
2445 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; 2494 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
2446 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 2495 return 1;
2447 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | 2496 }
2448 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
2449 vcpu->arch.apf.nested_apf_token);
2450 return 1;
2451 } 2497 }
2452 2498
2453 vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 2499 return 0;
2454 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
2455 vmcs_read32(VM_EXIT_INTR_INFO),
2456 vmcs_readl(EXIT_QUALIFICATION));
2457 return 1;
2458} 2500}
2459 2501
2460static void vmx_queue_exception(struct kvm_vcpu *vcpu) 2502static void vmx_queue_exception(struct kvm_vcpu *vcpu)
@@ -9529,10 +9571,11 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
9529 WARN_ON(!is_guest_mode(vcpu)); 9571 WARN_ON(!is_guest_mode(vcpu));
9530 9572
9531 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { 9573 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
9532 vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 9574 vmcs12->vm_exit_intr_error_code = fault->error_code;
9533 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, 9575 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
9534 vmcs_read32(VM_EXIT_INTR_INFO), 9576 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
9535 vmcs_readl(EXIT_QUALIFICATION)); 9577 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
9578 fault->address);
9536 } else { 9579 } else {
9537 kvm_inject_page_fault(vcpu, fault); 9580 kvm_inject_page_fault(vcpu, fault);
9538 } 9581 }
@@ -10115,12 +10158,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10115 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, 10158 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
10116 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when 10159 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
10117 * !enable_ept, EB.PF is 1, so the "or" will always be 1. 10160 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
10118 *
10119 * A problem with this approach (when !enable_ept) is that L1 may be
10120 * injected with more page faults than it asked for. This could have
10121 * caused problems, but in practice existing hypervisors don't care.
10122 * To fix this, we will need to emulate the PFEC checking (on the L1
10123 * page tables), using walk_addr(), when injecting PFs to L1.
10124 */ 10161 */
10125 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 10162 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
10126 enable_ept ? vmcs12->page_fault_error_code_mask : 0); 10163 enable_ept ? vmcs12->page_fault_error_code_mask : 0);