aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWanpeng Li <wanpeng.li@hotmail.com>2017-11-05 19:54:49 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2017-11-17 07:20:14 -0500
commit5af4157388adad82c339e3742fb6b67840721347 (patch)
tree273c2610dfcf1a866e11c2ebd3505fbff80dd18a
parentf1b026a3310a441f504640dd3d9765eb533386b8 (diff)
KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure
Commit 4f350c6dbcb (kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly) can result in L1(run kvm-unit-tests/run_tests.sh vmx_controls in L1) null pointer deference and also L0 calltrace when EPT=0 on both L0 and L1. In L1: BUG: unable to handle kernel paging request at ffffffffc015bf8f IP: vmx_vcpu_run+0x202/0x510 [kvm_intel] PGD 146e13067 P4D 146e13067 PUD 146e15067 PMD 3d2686067 PTE 3d4af9161 Oops: 0003 [#1] PREEMPT SMP CPU: 2 PID: 1798 Comm: qemu-system-x86 Not tainted 4.14.0-rc4+ #6 RIP: 0010:vmx_vcpu_run+0x202/0x510 [kvm_intel] Call Trace: WARNING: kernel stack frame pointer at ffffb86f4988bc18 in qemu-system-x86:1798 has bad value 0000000000000002 In L0: -----------[ cut here ]------------ WARNING: CPU: 6 PID: 4460 at /home/kernel/linux/arch/x86/kvm//vmx.c:9845 vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] CPU: 6 PID: 4460 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc7+ #25 RIP: 0010:vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] Call Trace: paging64_page_fault+0x500/0xde0 [kvm] ? paging32_gva_to_gpa_nested+0x120/0x120 [kvm] ? nonpaging_page_fault+0x3b0/0x3b0 [kvm] ? __asan_storeN+0x12/0x20 ? paging64_gva_to_gpa+0xb0/0x120 [kvm] ? paging64_walk_addr_generic+0x11a0/0x11a0 [kvm] ? lock_acquire+0x2c0/0x2c0 ? vmx_read_guest_seg_ar+0x97/0x100 [kvm_intel] ? vmx_get_segment+0x2a6/0x310 [kvm_intel] ? sched_clock+0x1f/0x30 ? check_chain_key+0x137/0x1e0 ? __lock_acquire+0x83c/0x2420 ? kvm_multiple_exception+0xf2/0x220 [kvm] ? debug_check_no_locks_freed+0x240/0x240 ? debug_smp_processor_id+0x17/0x20 ? __lock_is_held+0x9e/0x100 kvm_mmu_page_fault+0x90/0x180 [kvm] kvm_handle_page_fault+0x15c/0x310 [kvm] ? __lock_is_held+0x9e/0x100 handle_exception+0x3c7/0x4d0 [kvm_intel] vmx_handle_exit+0x103/0x1010 [kvm_intel] ? kvm_arch_vcpu_ioctl_run+0x1628/0x2e20 [kvm] The commit avoids to load host state of vmcs12 as vmcs01's guest state since vmcs12 is not modified (except for the VM-instruction error field) if the checking of vmcs control area fails. However, the mmu context is switched to nested mmu in prepare_vmcs02() and it will not be reloaded since load_vmcs12_host_state() is skipped when nested VMLAUNCH/VMRESUME fails. This patch fixes it by reloading mmu context when nested VMLAUNCH/VMRESUME fails. Reviewed-by: Jim Mattson <jmattson@google.com> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Jim Mattson <jmattson@google.com> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
-rw-r--r--arch/x86/kvm/vmx.c34
1 files changed, 22 insertions, 12 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 707aaa954b3d..10474d26a000 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11330,6 +11330,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
11330 kvm_clear_interrupt_queue(vcpu); 11330 kvm_clear_interrupt_queue(vcpu);
11331} 11331}
11332 11332
11333static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
11334 struct vmcs12 *vmcs12)
11335{
11336 u32 entry_failure_code;
11337
11338 nested_ept_uninit_mmu_context(vcpu);
11339
11340 /*
11341 * Only PDPTE load can fail as the value of cr3 was checked on entry and
11342 * couldn't have changed.
11343 */
11344 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
11345 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
11346
11347 if (!enable_ept)
11348 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
11349}
11350
11333/* 11351/*
11334 * A part of what we need to when the nested L2 guest exits and we want to 11352 * A part of what we need to when the nested L2 guest exits and we want to
11335 * run its L1 parent, is to reset L1's guest state to the host state specified 11353 * run its L1 parent, is to reset L1's guest state to the host state specified
@@ -11343,7 +11361,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
11343 struct vmcs12 *vmcs12) 11361 struct vmcs12 *vmcs12)
11344{ 11362{
11345 struct kvm_segment seg; 11363 struct kvm_segment seg;
11346 u32 entry_failure_code;
11347 11364
11348 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) 11365 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
11349 vcpu->arch.efer = vmcs12->host_ia32_efer; 11366 vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -11370,17 +11387,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
11370 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 11387 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
11371 vmx_set_cr4(vcpu, vmcs12->host_cr4); 11388 vmx_set_cr4(vcpu, vmcs12->host_cr4);
11372 11389
11373 nested_ept_uninit_mmu_context(vcpu); 11390 load_vmcs12_mmu_host_state(vcpu, vmcs12);
11374
11375 /*
11376 * Only PDPTE load can fail as the value of cr3 was checked on entry and
11377 * couldn't have changed.
11378 */
11379 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
11380 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
11381
11382 if (!enable_ept)
11383 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
11384 11391
11385 if (enable_vpid) { 11392 if (enable_vpid) {
11386 /* 11393 /*
@@ -11610,6 +11617,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
11610 * accordingly. 11617 * accordingly.
11611 */ 11618 */
11612 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 11619 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
11620
11621 load_vmcs12_mmu_host_state(vcpu, vmcs12);
11622
11613 /* 11623 /*
11614 * The emulated instruction was already skipped in 11624 * The emulated instruction was already skipped in
11615 * nested_vmx_run, but the updated RIP was never 11625 * nested_vmx_run, but the updated RIP was never