diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 107 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 12 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 22 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 133 |
6 files changed, 200 insertions, 85 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8d45fabc5f3b..ce3251ce5504 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -21,6 +21,7 @@ config KVM | |||
21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
23 | select PREEMPT_NOTIFIERS | 23 | select PREEMPT_NOTIFIERS |
24 | select MMU_NOTIFIER | ||
24 | select ANON_INODES | 25 | select ANON_INODES |
25 | ---help--- | 26 | ---help--- |
26 | Support hosting fully virtualized guest machines using hardware | 27 | Support hosting fully virtualized guest machines using hardware |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b0e4ddca6c18..0bfe2bd305eb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
653 | account_shadowed(kvm, gfn); | 653 | account_shadowed(kvm, gfn); |
654 | } | 654 | } |
655 | 655 | ||
656 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | ||
657 | { | ||
658 | u64 *spte; | ||
659 | int need_tlb_flush = 0; | ||
660 | |||
661 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | ||
662 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | ||
663 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | ||
664 | rmap_remove(kvm, spte); | ||
665 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | ||
666 | need_tlb_flush = 1; | ||
667 | } | ||
668 | return need_tlb_flush; | ||
669 | } | ||
670 | |||
671 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
672 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | ||
673 | { | ||
674 | int i; | ||
675 | int retval = 0; | ||
676 | |||
677 | /* | ||
678 | * If mmap_sem isn't taken, we can look the memslots with only | ||
679 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | ||
680 | */ | ||
681 | for (i = 0; i < kvm->nmemslots; i++) { | ||
682 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | ||
683 | unsigned long start = memslot->userspace_addr; | ||
684 | unsigned long end; | ||
685 | |||
686 | /* mmu_lock protects userspace_addr */ | ||
687 | if (!start) | ||
688 | continue; | ||
689 | |||
690 | end = start + (memslot->npages << PAGE_SHIFT); | ||
691 | if (hva >= start && hva < end) { | ||
692 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | ||
693 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | ||
694 | retval |= handler(kvm, | ||
695 | &memslot->lpage_info[ | ||
696 | gfn_offset / | ||
697 | KVM_PAGES_PER_HPAGE].rmap_pde); | ||
698 | } | ||
699 | } | ||
700 | |||
701 | return retval; | ||
702 | } | ||
703 | |||
704 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
705 | { | ||
706 | return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
707 | } | ||
708 | |||
709 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | ||
710 | { | ||
711 | u64 *spte; | ||
712 | int young = 0; | ||
713 | |||
714 | spte = rmap_next(kvm, rmapp, NULL); | ||
715 | while (spte) { | ||
716 | int _young; | ||
717 | u64 _spte = *spte; | ||
718 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | ||
719 | _young = _spte & PT_ACCESSED_MASK; | ||
720 | if (_young) { | ||
721 | young = 1; | ||
722 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | ||
723 | } | ||
724 | spte = rmap_next(kvm, rmapp, spte); | ||
725 | } | ||
726 | return young; | ||
727 | } | ||
728 | |||
729 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
730 | { | ||
731 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | ||
732 | } | ||
733 | |||
656 | #ifdef MMU_DEBUG | 734 | #ifdef MMU_DEBUG |
657 | static int is_empty_shadow_page(u64 *spt) | 735 | static int is_empty_shadow_page(u64 *spt) |
658 | { | 736 | { |
@@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1203 | int r; | 1281 | int r; |
1204 | int largepage = 0; | 1282 | int largepage = 0; |
1205 | pfn_t pfn; | 1283 | pfn_t pfn; |
1284 | unsigned long mmu_seq; | ||
1206 | 1285 | ||
1207 | down_read(¤t->mm->mmap_sem); | 1286 | down_read(¤t->mm->mmap_sem); |
1208 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1287 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
@@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1210 | largepage = 1; | 1289 | largepage = 1; |
1211 | } | 1290 | } |
1212 | 1291 | ||
1292 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1293 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1213 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1294 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1214 | up_read(¤t->mm->mmap_sem); | 1295 | up_read(¤t->mm->mmap_sem); |
1215 | 1296 | ||
@@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1220 | } | 1301 | } |
1221 | 1302 | ||
1222 | spin_lock(&vcpu->kvm->mmu_lock); | 1303 | spin_lock(&vcpu->kvm->mmu_lock); |
1304 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
1305 | goto out_unlock; | ||
1223 | kvm_mmu_free_some_pages(vcpu); | 1306 | kvm_mmu_free_some_pages(vcpu); |
1224 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, | 1307 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, |
1225 | PT32E_ROOT_LEVEL); | 1308 | PT32E_ROOT_LEVEL); |
@@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1227 | 1310 | ||
1228 | 1311 | ||
1229 | return r; | 1312 | return r; |
1313 | |||
1314 | out_unlock: | ||
1315 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
1316 | kvm_release_pfn_clean(pfn); | ||
1317 | return 0; | ||
1230 | } | 1318 | } |
1231 | 1319 | ||
1232 | 1320 | ||
@@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1345 | int r; | 1433 | int r; |
1346 | int largepage = 0; | 1434 | int largepage = 0; |
1347 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1435 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1436 | unsigned long mmu_seq; | ||
1348 | 1437 | ||
1349 | ASSERT(vcpu); | 1438 | ASSERT(vcpu); |
1350 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 1439 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
@@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1358 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1447 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
1359 | largepage = 1; | 1448 | largepage = 1; |
1360 | } | 1449 | } |
1450 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1451 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1361 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1452 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1362 | up_read(¤t->mm->mmap_sem); | 1453 | up_read(¤t->mm->mmap_sem); |
1363 | if (is_error_pfn(pfn)) { | 1454 | if (is_error_pfn(pfn)) { |
@@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1365 | return 1; | 1456 | return 1; |
1366 | } | 1457 | } |
1367 | spin_lock(&vcpu->kvm->mmu_lock); | 1458 | spin_lock(&vcpu->kvm->mmu_lock); |
1459 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
1460 | goto out_unlock; | ||
1368 | kvm_mmu_free_some_pages(vcpu); | 1461 | kvm_mmu_free_some_pages(vcpu); |
1369 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1462 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
1370 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); | 1463 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); |
1371 | spin_unlock(&vcpu->kvm->mmu_lock); | 1464 | spin_unlock(&vcpu->kvm->mmu_lock); |
1372 | 1465 | ||
1373 | return r; | 1466 | return r; |
1467 | |||
1468 | out_unlock: | ||
1469 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
1470 | kvm_release_pfn_clean(pfn); | ||
1471 | return 0; | ||
1374 | } | 1472 | } |
1375 | 1473 | ||
1376 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 1474 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
@@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1670 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1768 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
1671 | vcpu->arch.update_pte.largepage = 1; | 1769 | vcpu->arch.update_pte.largepage = 1; |
1672 | } | 1770 | } |
1771 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1772 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1673 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1773 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1674 | up_read(¤t->mm->mmap_sem); | 1774 | up_read(¤t->mm->mmap_sem); |
1675 | 1775 | ||
@@ -1814,6 +1914,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
1814 | spin_unlock(&vcpu->kvm->mmu_lock); | 1914 | spin_unlock(&vcpu->kvm->mmu_lock); |
1815 | return r; | 1915 | return r; |
1816 | } | 1916 | } |
1917 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | ||
1817 | 1918 | ||
1818 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 1919 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
1819 | { | 1920 | { |
@@ -1870,6 +1971,12 @@ void kvm_enable_tdp(void) | |||
1870 | } | 1971 | } |
1871 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); | 1972 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); |
1872 | 1973 | ||
1974 | void kvm_disable_tdp(void) | ||
1975 | { | ||
1976 | tdp_enabled = false; | ||
1977 | } | ||
1978 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); | ||
1979 | |||
1873 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 1980 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
1874 | { | 1981 | { |
1875 | struct kvm_mmu_page *sp; | 1982 | struct kvm_mmu_page *sp; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4d918220baeb..f72ac1fa35f0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
263 | pfn = vcpu->arch.update_pte.pfn; | 263 | pfn = vcpu->arch.update_pte.pfn; |
264 | if (is_error_pfn(pfn)) | 264 | if (is_error_pfn(pfn)) |
265 | return; | 265 | return; |
266 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | ||
267 | return; | ||
266 | kvm_get_pfn(pfn); | 268 | kvm_get_pfn(pfn); |
267 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 269 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
268 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), | 270 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), |
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
380 | int r; | 382 | int r; |
381 | pfn_t pfn; | 383 | pfn_t pfn; |
382 | int largepage = 0; | 384 | int largepage = 0; |
385 | unsigned long mmu_seq; | ||
383 | 386 | ||
384 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 387 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
385 | kvm_mmu_audit(vcpu, "pre page fault"); | 388 | kvm_mmu_audit(vcpu, "pre page fault"); |
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
413 | largepage = 1; | 416 | largepage = 1; |
414 | } | 417 | } |
415 | } | 418 | } |
419 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
420 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
416 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 421 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
417 | up_read(¤t->mm->mmap_sem); | 422 | up_read(¤t->mm->mmap_sem); |
418 | 423 | ||
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
424 | } | 429 | } |
425 | 430 | ||
426 | spin_lock(&vcpu->kvm->mmu_lock); | 431 | spin_lock(&vcpu->kvm->mmu_lock); |
432 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
433 | goto out_unlock; | ||
427 | kvm_mmu_free_some_pages(vcpu); | 434 | kvm_mmu_free_some_pages(vcpu); |
428 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 435 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
429 | largepage, &write_pt, pfn); | 436 | largepage, &write_pt, pfn); |
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
439 | spin_unlock(&vcpu->kvm->mmu_lock); | 446 | spin_unlock(&vcpu->kvm->mmu_lock); |
440 | 447 | ||
441 | return write_pt; | 448 | return write_pt; |
449 | |||
450 | out_unlock: | ||
451 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
452 | kvm_release_pfn_clean(pfn); | ||
453 | return 0; | ||
442 | } | 454 | } |
443 | 455 | ||
444 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 456 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b756e876dce3..e2ee264740c7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -453,7 +453,8 @@ static __init int svm_hardware_setup(void) | |||
453 | if (npt_enabled) { | 453 | if (npt_enabled) { |
454 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); | 454 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); |
455 | kvm_enable_tdp(); | 455 | kvm_enable_tdp(); |
456 | } | 456 | } else |
457 | kvm_disable_tdp(); | ||
457 | 458 | ||
458 | return 0; | 459 | return 0; |
459 | 460 | ||
@@ -1007,10 +1008,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1007 | struct kvm *kvm = svm->vcpu.kvm; | 1008 | struct kvm *kvm = svm->vcpu.kvm; |
1008 | u64 fault_address; | 1009 | u64 fault_address; |
1009 | u32 error_code; | 1010 | u32 error_code; |
1011 | bool event_injection = false; | ||
1010 | 1012 | ||
1011 | if (!irqchip_in_kernel(kvm) && | 1013 | if (!irqchip_in_kernel(kvm) && |
1012 | is_external_interrupt(exit_int_info)) | 1014 | is_external_interrupt(exit_int_info)) { |
1015 | event_injection = true; | ||
1013 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); | 1016 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); |
1017 | } | ||
1014 | 1018 | ||
1015 | fault_address = svm->vmcb->control.exit_info_2; | 1019 | fault_address = svm->vmcb->control.exit_info_2; |
1016 | error_code = svm->vmcb->control.exit_info_1; | 1020 | error_code = svm->vmcb->control.exit_info_1; |
@@ -1024,6 +1028,8 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1024 | (u32)fault_address, (u32)(fault_address >> 32), | 1028 | (u32)fault_address, (u32)(fault_address >> 32), |
1025 | handler); | 1029 | handler); |
1026 | 1030 | ||
1031 | if (event_injection) | ||
1032 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | ||
1027 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1033 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
1028 | } | 1034 | } |
1029 | 1035 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0cac63701719..2a69773e3b26 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2298,6 +2298,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
2300 | (u32)((u64)cr2 >> 32), handler); | 2300 | (u32)((u64)cr2 >> 32), handler); |
2301 | if (vect_info & VECTORING_INFO_VALID_MASK) | ||
2302 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | ||
2301 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2303 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
2302 | } | 2304 | } |
2303 | 2305 | ||
@@ -3116,15 +3118,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3116 | return ERR_PTR(-ENOMEM); | 3118 | return ERR_PTR(-ENOMEM); |
3117 | 3119 | ||
3118 | allocate_vpid(vmx); | 3120 | allocate_vpid(vmx); |
3119 | if (id == 0 && vm_need_ept()) { | ||
3120 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3121 | VMX_EPT_WRITABLE_MASK | | ||
3122 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3123 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
3124 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
3125 | VMX_EPT_EXECUTABLE_MASK); | ||
3126 | kvm_enable_tdp(); | ||
3127 | } | ||
3128 | 3121 | ||
3129 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 3122 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
3130 | if (err) | 3123 | if (err) |
@@ -3303,8 +3296,17 @@ static int __init vmx_init(void) | |||
3303 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3296 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); |
3304 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3297 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); |
3305 | 3298 | ||
3306 | if (cpu_has_vmx_ept()) | 3299 | if (vm_need_ept()) { |
3307 | bypass_guest_pf = 0; | 3300 | bypass_guest_pf = 0; |
3301 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3302 | VMX_EPT_WRITABLE_MASK | | ||
3303 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3304 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
3305 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
3306 | VMX_EPT_EXECUTABLE_MASK); | ||
3307 | kvm_enable_tdp(); | ||
3308 | } else | ||
3309 | kvm_disable_tdp(); | ||
3308 | 3310 | ||
3309 | if (bypass_guest_pf) | 3311 | if (bypass_guest_pf) |
3310 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 3312 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f1cdb011cff..0d682fc6aeb3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
883 | case KVM_CAP_PIT: | 883 | case KVM_CAP_PIT: |
884 | case KVM_CAP_NOP_IO_DELAY: | 884 | case KVM_CAP_NOP_IO_DELAY: |
885 | case KVM_CAP_MP_STATE: | 885 | case KVM_CAP_MP_STATE: |
886 | case KVM_CAP_SYNC_MMU: | ||
886 | r = 1; | 887 | r = 1; |
887 | break; | 888 | break; |
888 | case KVM_CAP_COALESCED_MMIO: | 889 | case KVM_CAP_COALESCED_MMIO: |
@@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1495 | goto out; | 1496 | goto out; |
1496 | 1497 | ||
1497 | down_write(&kvm->slots_lock); | 1498 | down_write(&kvm->slots_lock); |
1499 | spin_lock(&kvm->mmu_lock); | ||
1498 | 1500 | ||
1499 | p = &kvm->arch.aliases[alias->slot]; | 1501 | p = &kvm->arch.aliases[alias->slot]; |
1500 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 1502 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
@@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1506 | break; | 1508 | break; |
1507 | kvm->arch.naliases = n; | 1509 | kvm->arch.naliases = n; |
1508 | 1510 | ||
1511 | spin_unlock(&kvm->mmu_lock); | ||
1509 | kvm_mmu_zap_all(kvm); | 1512 | kvm_mmu_zap_all(kvm); |
1510 | 1513 | ||
1511 | up_write(&kvm->slots_lock); | 1514 | up_write(&kvm->slots_lock); |
@@ -3184,6 +3187,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | |||
3184 | kvm_desct->base |= seg_desc->base2 << 24; | 3187 | kvm_desct->base |= seg_desc->base2 << 24; |
3185 | kvm_desct->limit = seg_desc->limit0; | 3188 | kvm_desct->limit = seg_desc->limit0; |
3186 | kvm_desct->limit |= seg_desc->limit << 16; | 3189 | kvm_desct->limit |= seg_desc->limit << 16; |
3190 | if (seg_desc->g) { | ||
3191 | kvm_desct->limit <<= 12; | ||
3192 | kvm_desct->limit |= 0xfff; | ||
3193 | } | ||
3187 | kvm_desct->selector = selector; | 3194 | kvm_desct->selector = selector; |
3188 | kvm_desct->type = seg_desc->type; | 3195 | kvm_desct->type = seg_desc->type; |
3189 | kvm_desct->present = seg_desc->p; | 3196 | kvm_desct->present = seg_desc->p; |
@@ -3223,6 +3230,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, | |||
3223 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3230 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
3224 | struct desc_struct *seg_desc) | 3231 | struct desc_struct *seg_desc) |
3225 | { | 3232 | { |
3233 | gpa_t gpa; | ||
3226 | struct descriptor_table dtable; | 3234 | struct descriptor_table dtable; |
3227 | u16 index = selector >> 3; | 3235 | u16 index = selector >> 3; |
3228 | 3236 | ||
@@ -3232,13 +3240,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3232 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 3240 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
3233 | return 1; | 3241 | return 1; |
3234 | } | 3242 | } |
3235 | return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | 3243 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); |
3244 | gpa += index * 8; | ||
3245 | return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8); | ||
3236 | } | 3246 | } |
3237 | 3247 | ||
3238 | /* allowed just for 8 bytes segments */ | 3248 | /* allowed just for 8 bytes segments */ |
3239 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3249 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
3240 | struct desc_struct *seg_desc) | 3250 | struct desc_struct *seg_desc) |
3241 | { | 3251 | { |
3252 | gpa_t gpa; | ||
3242 | struct descriptor_table dtable; | 3253 | struct descriptor_table dtable; |
3243 | u16 index = selector >> 3; | 3254 | u16 index = selector >> 3; |
3244 | 3255 | ||
@@ -3246,7 +3257,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3246 | 3257 | ||
3247 | if (dtable.limit < index * 8 + 7) | 3258 | if (dtable.limit < index * 8 + 7) |
3248 | return 1; | 3259 | return 1; |
3249 | return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | 3260 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); |
3261 | gpa += index * 8; | ||
3262 | return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8); | ||
3250 | } | 3263 | } |
3251 | 3264 | ||
3252 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | 3265 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, |
@@ -3258,55 +3271,7 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | |||
3258 | base_addr |= (seg_desc->base1 << 16); | 3271 | base_addr |= (seg_desc->base1 << 16); |
3259 | base_addr |= (seg_desc->base2 << 24); | 3272 | base_addr |= (seg_desc->base2 << 24); |
3260 | 3273 | ||
3261 | return base_addr; | 3274 | return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); |
3262 | } | ||
3263 | |||
3264 | static int load_tss_segment32(struct kvm_vcpu *vcpu, | ||
3265 | struct desc_struct *seg_desc, | ||
3266 | struct tss_segment_32 *tss) | ||
3267 | { | ||
3268 | u32 base_addr; | ||
3269 | |||
3270 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3271 | |||
3272 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
3273 | sizeof(struct tss_segment_32)); | ||
3274 | } | ||
3275 | |||
3276 | static int save_tss_segment32(struct kvm_vcpu *vcpu, | ||
3277 | struct desc_struct *seg_desc, | ||
3278 | struct tss_segment_32 *tss) | ||
3279 | { | ||
3280 | u32 base_addr; | ||
3281 | |||
3282 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3283 | |||
3284 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
3285 | sizeof(struct tss_segment_32)); | ||
3286 | } | ||
3287 | |||
3288 | static int load_tss_segment16(struct kvm_vcpu *vcpu, | ||
3289 | struct desc_struct *seg_desc, | ||
3290 | struct tss_segment_16 *tss) | ||
3291 | { | ||
3292 | u32 base_addr; | ||
3293 | |||
3294 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3295 | |||
3296 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
3297 | sizeof(struct tss_segment_16)); | ||
3298 | } | ||
3299 | |||
3300 | static int save_tss_segment16(struct kvm_vcpu *vcpu, | ||
3301 | struct desc_struct *seg_desc, | ||
3302 | struct tss_segment_16 *tss) | ||
3303 | { | ||
3304 | u32 base_addr; | ||
3305 | |||
3306 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3307 | |||
3308 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
3309 | sizeof(struct tss_segment_16)); | ||
3310 | } | 3275 | } |
3311 | 3276 | ||
3312 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | 3277 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) |
@@ -3466,20 +3431,26 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
3466 | } | 3431 | } |
3467 | 3432 | ||
3468 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | 3433 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, |
3469 | struct desc_struct *cseg_desc, | 3434 | u32 old_tss_base, |
3470 | struct desc_struct *nseg_desc) | 3435 | struct desc_struct *nseg_desc) |
3471 | { | 3436 | { |
3472 | struct tss_segment_16 tss_segment_16; | 3437 | struct tss_segment_16 tss_segment_16; |
3473 | int ret = 0; | 3438 | int ret = 0; |
3474 | 3439 | ||
3475 | if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) | 3440 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, |
3441 | sizeof tss_segment_16)) | ||
3476 | goto out; | 3442 | goto out; |
3477 | 3443 | ||
3478 | save_state_to_tss16(vcpu, &tss_segment_16); | 3444 | save_state_to_tss16(vcpu, &tss_segment_16); |
3479 | save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); | ||
3480 | 3445 | ||
3481 | if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) | 3446 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, |
3447 | sizeof tss_segment_16)) | ||
3448 | goto out; | ||
3449 | |||
3450 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | ||
3451 | &tss_segment_16, sizeof tss_segment_16)) | ||
3482 | goto out; | 3452 | goto out; |
3453 | |||
3483 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | 3454 | if (load_state_from_tss16(vcpu, &tss_segment_16)) |
3484 | goto out; | 3455 | goto out; |
3485 | 3456 | ||
@@ -3489,20 +3460,26 @@ out: | |||
3489 | } | 3460 | } |
3490 | 3461 | ||
3491 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | 3462 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, |
3492 | struct desc_struct *cseg_desc, | 3463 | u32 old_tss_base, |
3493 | struct desc_struct *nseg_desc) | 3464 | struct desc_struct *nseg_desc) |
3494 | { | 3465 | { |
3495 | struct tss_segment_32 tss_segment_32; | 3466 | struct tss_segment_32 tss_segment_32; |
3496 | int ret = 0; | 3467 | int ret = 0; |
3497 | 3468 | ||
3498 | if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) | 3469 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, |
3470 | sizeof tss_segment_32)) | ||
3499 | goto out; | 3471 | goto out; |
3500 | 3472 | ||
3501 | save_state_to_tss32(vcpu, &tss_segment_32); | 3473 | save_state_to_tss32(vcpu, &tss_segment_32); |
3502 | save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); | ||
3503 | 3474 | ||
3504 | if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) | 3475 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, |
3476 | sizeof tss_segment_32)) | ||
3505 | goto out; | 3477 | goto out; |
3478 | |||
3479 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | ||
3480 | &tss_segment_32, sizeof tss_segment_32)) | ||
3481 | goto out; | ||
3482 | |||
3506 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | 3483 | if (load_state_from_tss32(vcpu, &tss_segment_32)) |
3507 | goto out; | 3484 | goto out; |
3508 | 3485 | ||
@@ -3517,16 +3494,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3517 | struct desc_struct cseg_desc; | 3494 | struct desc_struct cseg_desc; |
3518 | struct desc_struct nseg_desc; | 3495 | struct desc_struct nseg_desc; |
3519 | int ret = 0; | 3496 | int ret = 0; |
3497 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
3498 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
3520 | 3499 | ||
3521 | kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 3500 | old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); |
3522 | 3501 | ||
3502 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
3503 | * descriptors should generate a pagefault. | ||
3504 | */ | ||
3523 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | 3505 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) |
3524 | goto out; | 3506 | goto out; |
3525 | 3507 | ||
3526 | if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) | 3508 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) |
3527 | goto out; | 3509 | goto out; |
3528 | 3510 | ||
3529 | |||
3530 | if (reason != TASK_SWITCH_IRET) { | 3511 | if (reason != TASK_SWITCH_IRET) { |
3531 | int cpl; | 3512 | int cpl; |
3532 | 3513 | ||
@@ -3544,8 +3525,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3544 | 3525 | ||
3545 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 3526 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
3546 | cseg_desc.type &= ~(1 << 1); //clear the B flag | 3527 | cseg_desc.type &= ~(1 << 1); //clear the B flag |
3547 | save_guest_segment_descriptor(vcpu, tr_seg.selector, | 3528 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); |
3548 | &cseg_desc); | ||
3549 | } | 3529 | } |
3550 | 3530 | ||
3551 | if (reason == TASK_SWITCH_IRET) { | 3531 | if (reason == TASK_SWITCH_IRET) { |
@@ -3557,10 +3537,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3557 | kvm_x86_ops->cache_regs(vcpu); | 3537 | kvm_x86_ops->cache_regs(vcpu); |
3558 | 3538 | ||
3559 | if (nseg_desc.type & 8) | 3539 | if (nseg_desc.type & 8) |
3560 | ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, | 3540 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, |
3561 | &nseg_desc); | 3541 | &nseg_desc); |
3562 | else | 3542 | else |
3563 | ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, | 3543 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, |
3564 | &nseg_desc); | 3544 | &nseg_desc); |
3565 | 3545 | ||
3566 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 3546 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
@@ -3995,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
3995 | */ | 3975 | */ |
3996 | if (!user_alloc) { | 3976 | if (!user_alloc) { |
3997 | if (npages && !old.rmap) { | 3977 | if (npages && !old.rmap) { |
3978 | unsigned long userspace_addr; | ||
3979 | |||
3998 | down_write(¤t->mm->mmap_sem); | 3980 | down_write(¤t->mm->mmap_sem); |
3999 | memslot->userspace_addr = do_mmap(NULL, 0, | 3981 | userspace_addr = do_mmap(NULL, 0, |
4000 | npages * PAGE_SIZE, | 3982 | npages * PAGE_SIZE, |
4001 | PROT_READ | PROT_WRITE, | 3983 | PROT_READ | PROT_WRITE, |
4002 | MAP_SHARED | MAP_ANONYMOUS, | 3984 | MAP_SHARED | MAP_ANONYMOUS, |
4003 | 0); | 3985 | 0); |
4004 | up_write(¤t->mm->mmap_sem); | 3986 | up_write(¤t->mm->mmap_sem); |
4005 | 3987 | ||
4006 | if (IS_ERR((void *)memslot->userspace_addr)) | 3988 | if (IS_ERR((void *)userspace_addr)) |
4007 | return PTR_ERR((void *)memslot->userspace_addr); | 3989 | return PTR_ERR((void *)userspace_addr); |
3990 | |||
3991 | /* set userspace_addr atomically for kvm_hva_to_rmapp */ | ||
3992 | spin_lock(&kvm->mmu_lock); | ||
3993 | memslot->userspace_addr = userspace_addr; | ||
3994 | spin_unlock(&kvm->mmu_lock); | ||
4008 | } else { | 3995 | } else { |
4009 | if (!old.user_alloc && old.rmap) { | 3996 | if (!old.user_alloc && old.rmap) { |
4010 | int ret; | 3997 | int ret; |