diff options
author | Wanpeng Li <wanpeng.li@hotmail.com> | 2017-11-05 19:54:49 -0500 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2017-11-17 07:20:14 -0500 |
commit | 5af4157388adad82c339e3742fb6b67840721347 (patch) | |
tree | 273c2610dfcf1a866e11c2ebd3505fbff80dd18a | |
parent | f1b026a3310a441f504640dd3d9765eb533386b8 (diff) |
KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure
Commit 4f350c6dbcb (kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure
properly) can result in L1(run kvm-unit-tests/run_tests.sh vmx_controls in L1)
null pointer deference and also L0 calltrace when EPT=0 on both L0 and L1.
In L1:
BUG: unable to handle kernel paging request at ffffffffc015bf8f
IP: vmx_vcpu_run+0x202/0x510 [kvm_intel]
PGD 146e13067 P4D 146e13067 PUD 146e15067 PMD 3d2686067 PTE 3d4af9161
Oops: 0003 [#1] PREEMPT SMP
CPU: 2 PID: 1798 Comm: qemu-system-x86 Not tainted 4.14.0-rc4+ #6
RIP: 0010:vmx_vcpu_run+0x202/0x510 [kvm_intel]
Call Trace:
WARNING: kernel stack frame pointer at ffffb86f4988bc18 in qemu-system-x86:1798 has bad value 0000000000000002
In L0:
-----------[ cut here ]------------
WARNING: CPU: 6 PID: 4460 at /home/kernel/linux/arch/x86/kvm//vmx.c:9845 vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel]
CPU: 6 PID: 4460 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc7+ #25
RIP: 0010:vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel]
Call Trace:
paging64_page_fault+0x500/0xde0 [kvm]
? paging32_gva_to_gpa_nested+0x120/0x120 [kvm]
? nonpaging_page_fault+0x3b0/0x3b0 [kvm]
? __asan_storeN+0x12/0x20
? paging64_gva_to_gpa+0xb0/0x120 [kvm]
? paging64_walk_addr_generic+0x11a0/0x11a0 [kvm]
? lock_acquire+0x2c0/0x2c0
? vmx_read_guest_seg_ar+0x97/0x100 [kvm_intel]
? vmx_get_segment+0x2a6/0x310 [kvm_intel]
? sched_clock+0x1f/0x30
? check_chain_key+0x137/0x1e0
? __lock_acquire+0x83c/0x2420
? kvm_multiple_exception+0xf2/0x220 [kvm]
? debug_check_no_locks_freed+0x240/0x240
? debug_smp_processor_id+0x17/0x20
? __lock_is_held+0x9e/0x100
kvm_mmu_page_fault+0x90/0x180 [kvm]
kvm_handle_page_fault+0x15c/0x310 [kvm]
? __lock_is_held+0x9e/0x100
handle_exception+0x3c7/0x4d0 [kvm_intel]
vmx_handle_exit+0x103/0x1010 [kvm_intel]
? kvm_arch_vcpu_ioctl_run+0x1628/0x2e20 [kvm]
The commit avoids to load host state of vmcs12 as vmcs01's guest state
since vmcs12 is not modified (except for the VM-instruction error field)
if the checking of vmcs control area fails. However, the mmu context is
switched to nested mmu in prepare_vmcs02() and it will not be reloaded
since load_vmcs12_host_state() is skipped when nested VMLAUNCH/VMRESUME
fails. This patch fixes it by reloading mmu context when nested
VMLAUNCH/VMRESUME fails.
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Jim Mattson <jmattson@google.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
-rw-r--r-- | arch/x86/kvm/vmx.c | 34 |
1 files changed, 22 insertions, 12 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 707aaa954b3d..10474d26a000 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -11330,6 +11330,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
11330 | kvm_clear_interrupt_queue(vcpu); | 11330 | kvm_clear_interrupt_queue(vcpu); |
11331 | } | 11331 | } |
11332 | 11332 | ||
11333 | static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, | ||
11334 | struct vmcs12 *vmcs12) | ||
11335 | { | ||
11336 | u32 entry_failure_code; | ||
11337 | |||
11338 | nested_ept_uninit_mmu_context(vcpu); | ||
11339 | |||
11340 | /* | ||
11341 | * Only PDPTE load can fail as the value of cr3 was checked on entry and | ||
11342 | * couldn't have changed. | ||
11343 | */ | ||
11344 | if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) | ||
11345 | nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); | ||
11346 | |||
11347 | if (!enable_ept) | ||
11348 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; | ||
11349 | } | ||
11350 | |||
11333 | /* | 11351 | /* |
11334 | * A part of what we need to when the nested L2 guest exits and we want to | 11352 | * A part of what we need to when the nested L2 guest exits and we want to |
11335 | * run its L1 parent, is to reset L1's guest state to the host state specified | 11353 | * run its L1 parent, is to reset L1's guest state to the host state specified |
@@ -11343,7 +11361,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
11343 | struct vmcs12 *vmcs12) | 11361 | struct vmcs12 *vmcs12) |
11344 | { | 11362 | { |
11345 | struct kvm_segment seg; | 11363 | struct kvm_segment seg; |
11346 | u32 entry_failure_code; | ||
11347 | 11364 | ||
11348 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 11365 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
11349 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 11366 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
@@ -11370,17 +11387,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
11370 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); | 11387 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); |
11371 | vmx_set_cr4(vcpu, vmcs12->host_cr4); | 11388 | vmx_set_cr4(vcpu, vmcs12->host_cr4); |
11372 | 11389 | ||
11373 | nested_ept_uninit_mmu_context(vcpu); | 11390 | load_vmcs12_mmu_host_state(vcpu, vmcs12); |
11374 | |||
11375 | /* | ||
11376 | * Only PDPTE load can fail as the value of cr3 was checked on entry and | ||
11377 | * couldn't have changed. | ||
11378 | */ | ||
11379 | if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) | ||
11380 | nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); | ||
11381 | |||
11382 | if (!enable_ept) | ||
11383 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; | ||
11384 | 11391 | ||
11385 | if (enable_vpid) { | 11392 | if (enable_vpid) { |
11386 | /* | 11393 | /* |
@@ -11610,6 +11617,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
11610 | * accordingly. | 11617 | * accordingly. |
11611 | */ | 11618 | */ |
11612 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 11619 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
11620 | |||
11621 | load_vmcs12_mmu_host_state(vcpu, vmcs12); | ||
11622 | |||
11613 | /* | 11623 | /* |
11614 | * The emulated instruction was already skipped in | 11624 | * The emulated instruction was already skipped in |
11615 | * nested_vmx_run, but the updated RIP was never | 11625 | * nested_vmx_run, but the updated RIP was never |