aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
authorNadav Har'El <nyh@il.ibm.com>2011-05-25 16:12:35 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 04:45:16 -0400
commit644d711aa0e16111d8aba6d289caebec013e26ea (patch)
tree73894074e7f2f6a01033fa82f60e7ca0440cba4b /arch/x86/kvm/vmx.c
parent7c1779384a2b2479722e90778721c40811e1b7a7 (diff)
KVM: nVMX: Deciding if L0 or L1 should handle an L2 exit
This patch contains the logic of whether an L2 exit should be handled by L0 and then L2 should be resumed, or whether L1 should be run to handle this exit (using the nested_vmx_vmexit() function of the previous patch). The basic idea is to let L1 handle the exit only if it actually asked to trap this sort of event. For example, when L2 exits on a change to CR0, we check L1's CR0_GUEST_HOST_MASK to see if L1 expressed interest in any bit which changed; If it did, we exit to L1. But if it didn't it means that it is we (L0) that wished to trap this event, so we handle it ourselves. The next two patches add additional logic of what to do when an interrupt or exception is injected: Does L0 need to do it, should we exit to L1 to do it, or should we resume L2 and keep the exception to be injected later. We keep a new flag, "nested_run_pending", which can override the decision of which should run next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending is especially intended to avoid switching to L1 in the injection decision-point described above. Signed-off-by: Nadav Har'El <nyh@il.ibm.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c253
1 files changed, 252 insertions, 1 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee25b9fdfa82..7f62dc36af9b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -346,6 +346,8 @@ struct nested_vmx {
346 struct list_head vmcs02_pool; 346 struct list_head vmcs02_pool;
347 int vmcs02_num; 347 int vmcs02_num;
348 u64 vmcs01_tsc_offset; 348 u64 vmcs01_tsc_offset;
349 /* L2 must run next, and mustn't decide to exit to L1. */
350 bool nested_run_pending;
349 /* 351 /*
350 * Guest pages referred to in vmcs02 with host-physical pointers, so 352 * Guest pages referred to in vmcs02 with host-physical pointers, so
351 * we must keep them pinned while L2 runs. 353 * we must keep them pinned while L2 runs.
@@ -865,6 +867,19 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
865 (vmcs12->secondary_vm_exec_control & bit); 867 (vmcs12->secondary_vm_exec_control & bit);
866} 868}
867 869
870static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
871 struct kvm_vcpu *vcpu)
872{
873 return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
874}
875
876static inline bool is_exception(u32 intr_info)
877{
878 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
879 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
880}
881
882static void nested_vmx_vmexit(struct kvm_vcpu *vcpu);
868static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, 883static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
869 struct vmcs12 *vmcs12, 884 struct vmcs12 *vmcs12,
870 u32 reason, unsigned long qualification); 885 u32 reason, unsigned long qualification);
@@ -5277,6 +5292,229 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
5277static const int kvm_vmx_max_exit_handlers = 5292static const int kvm_vmx_max_exit_handlers =
5278 ARRAY_SIZE(kvm_vmx_exit_handlers); 5293 ARRAY_SIZE(kvm_vmx_exit_handlers);
5279 5294
5295/*
5296 * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
5297 * rather than handle it ourselves in L0. I.e., check whether L1 expressed
5298 * disinterest in the current event (read or write a specific MSR) by using an
5299 * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
5300 */
5301static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
5302 struct vmcs12 *vmcs12, u32 exit_reason)
5303{
5304 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
5305 gpa_t bitmap;
5306
5307 if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS))
5308 return 1;
5309
5310 /*
5311 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
5312 * for the four combinations of read/write and low/high MSR numbers.
5313 * First we need to figure out which of the four to use:
5314 */
5315 bitmap = vmcs12->msr_bitmap;
5316 if (exit_reason == EXIT_REASON_MSR_WRITE)
5317 bitmap += 2048;
5318 if (msr_index >= 0xc0000000) {
5319 msr_index -= 0xc0000000;
5320 bitmap += 1024;
5321 }
5322
5323 /* Then read the msr_index'th bit from this bitmap: */
5324 if (msr_index < 1024*8) {
5325 unsigned char b;
5326 kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1);
5327 return 1 & (b >> (msr_index & 7));
5328 } else
5329 return 1; /* let L1 handle the wrong parameter */
5330}
5331
5332/*
5333 * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
5334 * rather than handle it ourselves in L0. I.e., check if L1 wanted to
5335 * intercept (via guest_host_mask etc.) the current event.
5336 */
5337static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
5338 struct vmcs12 *vmcs12)
5339{
5340 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5341 int cr = exit_qualification & 15;
5342 int reg = (exit_qualification >> 8) & 15;
5343 unsigned long val = kvm_register_read(vcpu, reg);
5344
5345 switch ((exit_qualification >> 4) & 3) {
5346 case 0: /* mov to cr */
5347 switch (cr) {
5348 case 0:
5349 if (vmcs12->cr0_guest_host_mask &
5350 (val ^ vmcs12->cr0_read_shadow))
5351 return 1;
5352 break;
5353 case 3:
5354 if ((vmcs12->cr3_target_count >= 1 &&
5355 vmcs12->cr3_target_value0 == val) ||
5356 (vmcs12->cr3_target_count >= 2 &&
5357 vmcs12->cr3_target_value1 == val) ||
5358 (vmcs12->cr3_target_count >= 3 &&
5359 vmcs12->cr3_target_value2 == val) ||
5360 (vmcs12->cr3_target_count >= 4 &&
5361 vmcs12->cr3_target_value3 == val))
5362 return 0;
5363 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
5364 return 1;
5365 break;
5366 case 4:
5367 if (vmcs12->cr4_guest_host_mask &
5368 (vmcs12->cr4_read_shadow ^ val))
5369 return 1;
5370 break;
5371 case 8:
5372 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
5373 return 1;
5374 break;
5375 }
5376 break;
5377 case 2: /* clts */
5378 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
5379 (vmcs12->cr0_read_shadow & X86_CR0_TS))
5380 return 1;
5381 break;
5382 case 1: /* mov from cr */
5383 switch (cr) {
5384 case 3:
5385 if (vmcs12->cpu_based_vm_exec_control &
5386 CPU_BASED_CR3_STORE_EXITING)
5387 return 1;
5388 break;
5389 case 8:
5390 if (vmcs12->cpu_based_vm_exec_control &
5391 CPU_BASED_CR8_STORE_EXITING)
5392 return 1;
5393 break;
5394 }
5395 break;
5396 case 3: /* lmsw */
5397 /*
5398 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
5399 * cr0. Other attempted changes are ignored, with no exit.
5400 */
5401 if (vmcs12->cr0_guest_host_mask & 0xe &
5402 (val ^ vmcs12->cr0_read_shadow))
5403 return 1;
5404 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
5405 !(vmcs12->cr0_read_shadow & 0x1) &&
5406 (val & 0x1))
5407 return 1;
5408 break;
5409 }
5410 return 0;
5411}
5412
5413/*
5414 * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
5415 * should handle it ourselves in L0 (and then continue L2). Only call this
5416 * when in is_guest_mode (L2).
5417 */
5418static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
5419{
5420 u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
5421 u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
5422 struct vcpu_vmx *vmx = to_vmx(vcpu);
5423 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5424
5425 if (vmx->nested.nested_run_pending)
5426 return 0;
5427
5428 if (unlikely(vmx->fail)) {
5429 printk(KERN_INFO "%s failed vm entry %x\n",
5430 __func__, vmcs_read32(VM_INSTRUCTION_ERROR));
5431 return 1;
5432 }
5433
5434 switch (exit_reason) {
5435 case EXIT_REASON_EXCEPTION_NMI:
5436 if (!is_exception(intr_info))
5437 return 0;
5438 else if (is_page_fault(intr_info))
5439 return enable_ept;
5440 return vmcs12->exception_bitmap &
5441 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
5442 case EXIT_REASON_EXTERNAL_INTERRUPT:
5443 return 0;
5444 case EXIT_REASON_TRIPLE_FAULT:
5445 return 1;
5446 case EXIT_REASON_PENDING_INTERRUPT:
5447 case EXIT_REASON_NMI_WINDOW:
5448 /*
5449 * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit
5450 * (aka Interrupt Window Exiting) only when L1 turned it on,
5451 * so if we got a PENDING_INTERRUPT exit, this must be for L1.
5452 * Same for NMI Window Exiting.
5453 */
5454 return 1;
5455 case EXIT_REASON_TASK_SWITCH:
5456 return 1;
5457 case EXIT_REASON_CPUID:
5458 return 1;
5459 case EXIT_REASON_HLT:
5460 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
5461 case EXIT_REASON_INVD:
5462 return 1;
5463 case EXIT_REASON_INVLPG:
5464 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5465 case EXIT_REASON_RDPMC:
5466 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
5467 case EXIT_REASON_RDTSC:
5468 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
5469 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
5470 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
5471 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
5472 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
5473 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
5474 /*
5475 * VMX instructions trap unconditionally. This allows L1 to
5476 * emulate them for its L2 guest, i.e., allows 3-level nesting!
5477 */
5478 return 1;
5479 case EXIT_REASON_CR_ACCESS:
5480 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
5481 case EXIT_REASON_DR_ACCESS:
5482 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
5483 case EXIT_REASON_IO_INSTRUCTION:
5484 /* TODO: support IO bitmaps */
5485 return 1;
5486 case EXIT_REASON_MSR_READ:
5487 case EXIT_REASON_MSR_WRITE:
5488 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
5489 case EXIT_REASON_INVALID_STATE:
5490 return 1;
5491 case EXIT_REASON_MWAIT_INSTRUCTION:
5492 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
5493 case EXIT_REASON_MONITOR_INSTRUCTION:
5494 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
5495 case EXIT_REASON_PAUSE_INSTRUCTION:
5496 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
5497 nested_cpu_has2(vmcs12,
5498 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
5499 case EXIT_REASON_MCE_DURING_VMENTRY:
5500 return 0;
5501 case EXIT_REASON_TPR_BELOW_THRESHOLD:
5502 return 1;
5503 case EXIT_REASON_APIC_ACCESS:
5504 return nested_cpu_has2(vmcs12,
5505 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
5506 case EXIT_REASON_EPT_VIOLATION:
5507 case EXIT_REASON_EPT_MISCONFIG:
5508 return 0;
5509 case EXIT_REASON_WBINVD:
5510 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
5511 case EXIT_REASON_XSETBV:
5512 return 1;
5513 default:
5514 return 1;
5515 }
5516}
5517
5280static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) 5518static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
5281{ 5519{
5282 *info1 = vmcs_readl(EXIT_QUALIFICATION); 5520 *info1 = vmcs_readl(EXIT_QUALIFICATION);
@@ -5299,6 +5537,17 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
5299 if (vmx->emulation_required && emulate_invalid_guest_state) 5537 if (vmx->emulation_required && emulate_invalid_guest_state)
5300 return handle_invalid_guest_state(vcpu); 5538 return handle_invalid_guest_state(vcpu);
5301 5539
5540 if (exit_reason == EXIT_REASON_VMLAUNCH ||
5541 exit_reason == EXIT_REASON_VMRESUME)
5542 vmx->nested.nested_run_pending = 1;
5543 else
5544 vmx->nested.nested_run_pending = 0;
5545
5546 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
5547 nested_vmx_vmexit(vcpu);
5548 return 1;
5549 }
5550
5302 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { 5551 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
5303 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; 5552 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
5304 vcpu->run->fail_entry.hardware_entry_failure_reason 5553 vcpu->run->fail_entry.hardware_entry_failure_reason
@@ -5321,7 +5570,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
5321 "(0x%x) and exit reason is 0x%x\n", 5570 "(0x%x) and exit reason is 0x%x\n",
5322 __func__, vectoring_info, exit_reason); 5571 __func__, vectoring_info, exit_reason);
5323 5572
5324 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { 5573 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
5574 !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
5575 get_vmcs12(vcpu), vcpu)))) {
5325 if (vmx_interrupt_allowed(vcpu)) { 5576 if (vmx_interrupt_allowed(vcpu)) {
5326 vmx->soft_vnmi_blocked = 0; 5577 vmx->soft_vnmi_blocked = 0;
5327 } else if (vmx->vnmi_blocked_time > 1000000000LL && 5578 } else if (vmx->vnmi_blocked_time > 1000000000LL &&