aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
authorSheng Yang <sheng.yang@intel.com>2008-04-28 00:24:45 -0400
committerAvi Kivity <avi@qumranet.com>2008-05-04 07:44:42 -0400
commit1439442c7b257b47a83aea4daed8fbf4a32cdff9 (patch)
treec85c885ef200480ab67342ddc63c8bc1d2d4e148 /arch/x86/kvm/vmx.c
parentb7ebfb0509692cd923e31650f81ed4d79c9a3e59 (diff)
KVM: VMX: Enable EPT feature for KVM
Signed-off-by: Sheng Yang <sheng.yang@intel.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c229
1 files changed, 221 insertions, 8 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index de5f6150f2f7..bfe4db11989c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -42,7 +42,7 @@ module_param(enable_vpid, bool, 0);
42static int flexpriority_enabled = 1; 42static int flexpriority_enabled = 1;
43module_param(flexpriority_enabled, bool, 0); 43module_param(flexpriority_enabled, bool, 0);
44 44
45static int enable_ept; 45static int enable_ept = 1;
46module_param(enable_ept, bool, 0); 46module_param(enable_ept, bool, 0);
47 47
48struct vmcs { 48struct vmcs {
@@ -284,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
284 : : "a"(&operand), "c"(ext) : "cc", "memory"); 284 : : "a"(&operand), "c"(ext) : "cc", "memory");
285} 285}
286 286
287static inline void __invept(int ext, u64 eptp, gpa_t gpa)
288{
289 struct {
290 u64 eptp, gpa;
291 } operand = {eptp, gpa};
292
293 asm volatile (ASM_VMX_INVEPT
294 /* CF==1 or ZF==1 --> rc = -1 */
295 "; ja 1f ; ud2 ; 1:\n"
296 : : "a" (&operand), "c" (ext) : "cc", "memory");
297}
298
287static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) 299static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
288{ 300{
289 int i; 301 int i;
@@ -335,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
335 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); 347 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
336} 348}
337 349
350static inline void ept_sync_global(void)
351{
352 if (cpu_has_vmx_invept_global())
353 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
354}
355
356static inline void ept_sync_context(u64 eptp)
357{
358 if (vm_need_ept()) {
359 if (cpu_has_vmx_invept_context())
360 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
361 else
362 ept_sync_global();
363 }
364}
365
366static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
367{
368 if (vm_need_ept()) {
369 if (cpu_has_vmx_invept_individual_addr())
370 __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
371 eptp, gpa);
372 else
373 ept_sync_context(eptp);
374 }
375}
376
338static unsigned long vmcs_readl(unsigned long field) 377static unsigned long vmcs_readl(unsigned long field)
339{ 378{
340 unsigned long value; 379 unsigned long value;
@@ -422,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
422 eb |= 1u << 1; 461 eb |= 1u << 1;
423 if (vcpu->arch.rmode.active) 462 if (vcpu->arch.rmode.active)
424 eb = ~0; 463 eb = ~0;
464 if (vm_need_ept())
465 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
425 vmcs_write32(EXCEPTION_BITMAP, eb); 466 vmcs_write32(EXCEPTION_BITMAP, eb);
426} 467}
427 468
@@ -1352,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1352 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 1393 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
1353} 1394}
1354 1395
1396static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
1397{
1398 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1399 if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
1400 printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
1401 return;
1402 }
1403 vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
1404 vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
1405 vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
1406 vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
1407 }
1408}
1409
1410static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
1411
1412static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1413 unsigned long cr0,
1414 struct kvm_vcpu *vcpu)
1415{
1416 if (!(cr0 & X86_CR0_PG)) {
1417 /* From paging/starting to nonpaging */
1418 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1419 vmcs_config.cpu_based_exec_ctrl |
1420 (CPU_BASED_CR3_LOAD_EXITING |
1421 CPU_BASED_CR3_STORE_EXITING));
1422 vcpu->arch.cr0 = cr0;
1423 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1424 *hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
1425 *hw_cr0 &= ~X86_CR0_WP;
1426 } else if (!is_paging(vcpu)) {
1427 /* From nonpaging to paging */
1428 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1429 vmcs_config.cpu_based_exec_ctrl &
1430 ~(CPU_BASED_CR3_LOAD_EXITING |
1431 CPU_BASED_CR3_STORE_EXITING));
1432 vcpu->arch.cr0 = cr0;
1433 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1434 if (!(vcpu->arch.cr0 & X86_CR0_WP))
1435 *hw_cr0 &= ~X86_CR0_WP;
1436 }
1437}
1438
1439static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1440 struct kvm_vcpu *vcpu)
1441{
1442 if (!is_paging(vcpu)) {
1443 *hw_cr4 &= ~X86_CR4_PAE;
1444 *hw_cr4 |= X86_CR4_PSE;
1445 } else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
1446 *hw_cr4 &= ~X86_CR4_PAE;
1447}
1448
1355static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1449static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1356{ 1450{
1451 unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
1452 KVM_VM_CR0_ALWAYS_ON;
1453
1357 vmx_fpu_deactivate(vcpu); 1454 vmx_fpu_deactivate(vcpu);
1358 1455
1359 if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) 1456 if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
@@ -1371,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1371 } 1468 }
1372#endif 1469#endif
1373 1470
1471 if (vm_need_ept())
1472 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
1473
1374 vmcs_writel(CR0_READ_SHADOW, cr0); 1474 vmcs_writel(CR0_READ_SHADOW, cr0);
1375 vmcs_writel(GUEST_CR0, 1475 vmcs_writel(GUEST_CR0, hw_cr0);
1376 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
1377 vcpu->arch.cr0 = cr0; 1476 vcpu->arch.cr0 = cr0;
1378 1477
1379 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) 1478 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
1380 vmx_fpu_activate(vcpu); 1479 vmx_fpu_activate(vcpu);
1381} 1480}
1382 1481
1482static u64 construct_eptp(unsigned long root_hpa)
1483{
1484 u64 eptp;
1485
1486 /* TODO write the value reading from MSR */
1487 eptp = VMX_EPT_DEFAULT_MT |
1488 VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
1489 eptp |= (root_hpa & PAGE_MASK);
1490
1491 return eptp;
1492}
1493
1383static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 1494static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1384{ 1495{
1496 unsigned long guest_cr3;
1497 u64 eptp;
1498
1499 guest_cr3 = cr3;
1500 if (vm_need_ept()) {
1501 eptp = construct_eptp(cr3);
1502 vmcs_write64(EPT_POINTER, eptp);
1503 ept_sync_context(eptp);
1504 ept_load_pdptrs(vcpu);
1505 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
1506 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
1507 }
1508
1385 vmx_flush_tlb(vcpu); 1509 vmx_flush_tlb(vcpu);
1386 vmcs_writel(GUEST_CR3, cr3); 1510 vmcs_writel(GUEST_CR3, guest_cr3);
1387 if (vcpu->arch.cr0 & X86_CR0_PE) 1511 if (vcpu->arch.cr0 & X86_CR0_PE)
1388 vmx_fpu_deactivate(vcpu); 1512 vmx_fpu_deactivate(vcpu);
1389} 1513}
1390 1514
1391static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1515static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1392{ 1516{
1393 vmcs_writel(CR4_READ_SHADOW, cr4); 1517 unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
1394 vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? 1518 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1395 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); 1519
1396 vcpu->arch.cr4 = cr4; 1520 vcpu->arch.cr4 = cr4;
1521 if (vm_need_ept())
1522 ept_update_paging_mode_cr4(&hw_cr4, vcpu);
1523
1524 vmcs_writel(CR4_READ_SHADOW, cr4);
1525 vmcs_writel(GUEST_CR4, hw_cr4);
1397} 1526}
1398 1527
1399static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 1528static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -2116,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2116 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 2245 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
2117 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 2246 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
2118 if (is_page_fault(intr_info)) { 2247 if (is_page_fault(intr_info)) {
2248 /* EPT won't cause page fault directly */
2249 if (vm_need_ept())
2250 BUG();
2119 cr2 = vmcs_readl(EXIT_QUALIFICATION); 2251 cr2 = vmcs_readl(EXIT_QUALIFICATION);
2120 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, 2252 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
2121 (u32)((u64)cr2 >> 32), handler); 2253 (u32)((u64)cr2 >> 32), handler);
@@ -2445,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2445 return kvm_task_switch(vcpu, tss_selector, reason); 2577 return kvm_task_switch(vcpu, tss_selector, reason);
2446} 2578}
2447 2579
2580static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2581{
2582 u64 exit_qualification;
2583 enum emulation_result er;
2584 gpa_t gpa;
2585 unsigned long hva;
2586 int gla_validity;
2587 int r;
2588
2589 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2590
2591 if (exit_qualification & (1 << 6)) {
2592 printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
2593 return -ENOTSUPP;
2594 }
2595
2596 gla_validity = (exit_qualification >> 7) & 0x3;
2597 if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
2598 printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
2599 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
2600 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
2601 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
2602 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
2603 (long unsigned int)exit_qualification);
2604 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
2605 kvm_run->hw.hardware_exit_reason = 0;
2606 return -ENOTSUPP;
2607 }
2608
2609 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
2610 hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
2611 if (!kvm_is_error_hva(hva)) {
2612 r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
2613 if (r < 0) {
2614 printk(KERN_ERR "EPT: Not enough memory!\n");
2615 return -ENOMEM;
2616 }
2617 return 1;
2618 } else {
2619 /* must be MMIO */
2620 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2621
2622 if (er == EMULATE_FAIL) {
2623 printk(KERN_ERR
2624 "EPT: Fail to handle EPT violation vmexit!er is %d\n",
2625 er);
2626 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
2627 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
2628 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
2629 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
2630 (long unsigned int)exit_qualification);
2631 return -ENOTSUPP;
2632 } else if (er == EMULATE_DO_MMIO)
2633 return 0;
2634 }
2635 return 1;
2636}
2637
2448/* 2638/*
2449 * The exit handlers return 1 if the exit was handled fully and guest execution 2639 * The exit handlers return 1 if the exit was handled fully and guest execution
2450 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 2640 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2468,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2468 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 2658 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
2469 [EXIT_REASON_WBINVD] = handle_wbinvd, 2659 [EXIT_REASON_WBINVD] = handle_wbinvd,
2470 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 2660 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
2661 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
2471}; 2662};
2472 2663
2473static const int kvm_vmx_max_exit_handlers = 2664static const int kvm_vmx_max_exit_handlers =
@@ -2486,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2486 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), 2677 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
2487 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); 2678 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
2488 2679
2680 /* Access CR3 don't cause VMExit in paging mode, so we need
2681 * to sync with guest real CR3. */
2682 if (vm_need_ept() && is_paging(vcpu)) {
2683 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2684 ept_load_pdptrs(vcpu);
2685 }
2686
2489 if (unlikely(vmx->fail)) { 2687 if (unlikely(vmx->fail)) {
2490 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2688 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2491 kvm_run->fail_entry.hardware_entry_failure_reason 2689 kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2494,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2494 } 2692 }
2495 2693
2496 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 2694 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
2497 exit_reason != EXIT_REASON_EXCEPTION_NMI) 2695 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
2696 exit_reason != EXIT_REASON_EPT_VIOLATION))
2498 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 2697 printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
2499 "exit reason is 0x%x\n", __func__, exit_reason); 2698 "exit reason is 0x%x\n", __func__, exit_reason);
2500 if (exit_reason < kvm_vmx_max_exit_handlers 2699 if (exit_reason < kvm_vmx_max_exit_handlers
@@ -2796,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2796 return ERR_PTR(-ENOMEM); 2995 return ERR_PTR(-ENOMEM);
2797 2996
2798 allocate_vpid(vmx); 2997 allocate_vpid(vmx);
2998 if (id == 0 && vm_need_ept()) {
2999 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3000 VMX_EPT_WRITABLE_MASK |
3001 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3002 kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
3003 VMX_EPT_FAKE_DIRTY_MASK, 0ull,
3004 VMX_EPT_EXECUTABLE_MASK);
3005 kvm_enable_tdp();
3006 }
2799 3007
2800 err = kvm_vcpu_init(&vmx->vcpu, kvm, id); 3008 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
2801 if (err) 3009 if (err)
@@ -2975,9 +3183,14 @@ static int __init vmx_init(void)
2975 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); 3183 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
2976 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); 3184 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
2977 3185
3186 if (cpu_has_vmx_ept())
3187 bypass_guest_pf = 0;
3188
2978 if (bypass_guest_pf) 3189 if (bypass_guest_pf)
2979 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 3190 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
2980 3191
3192 ept_sync_global();
3193
2981 return 0; 3194 return 0;
2982 3195
2983out2: 3196out2: