aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/mmu.c111
-rw-r--r--arch/x86/kvm/paging_tmpl.h14
-rw-r--r--arch/x86/kvm/svm.c22
-rw-r--r--arch/x86/kvm/vmx.c21
-rw-r--r--arch/x86/kvm/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c133
7 files changed, 216 insertions, 88 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8d45fabc5f3b..ce3251ce5504 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support" 21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM 22 depends on HAVE_KVM
23 select PREEMPT_NOTIFIERS 23 select PREEMPT_NOTIFIERS
24 select MMU_NOTIFIER
24 select ANON_INODES 25 select ANON_INODES
25 ---help--- 26 ---help---
26 Support hosting fully virtualized guest machines using hardware 27 Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b0e4ddca6c18..3da2508eb22a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -653,6 +653,88 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
653 account_shadowed(kvm, gfn); 653 account_shadowed(kvm, gfn);
654} 654}
655 655
656static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
657{
658 u64 *spte;
659 int need_tlb_flush = 0;
660
661 while ((spte = rmap_next(kvm, rmapp, NULL))) {
662 BUG_ON(!(*spte & PT_PRESENT_MASK));
663 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
664 rmap_remove(kvm, spte);
665 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
666 need_tlb_flush = 1;
667 }
668 return need_tlb_flush;
669}
670
671static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
672 int (*handler)(struct kvm *kvm, unsigned long *rmapp))
673{
674 int i;
675 int retval = 0;
676
677 /*
678 * If mmap_sem isn't taken, we can look the memslots with only
679 * the mmu_lock by skipping over the slots with userspace_addr == 0.
680 */
681 for (i = 0; i < kvm->nmemslots; i++) {
682 struct kvm_memory_slot *memslot = &kvm->memslots[i];
683 unsigned long start = memslot->userspace_addr;
684 unsigned long end;
685
686 /* mmu_lock protects userspace_addr */
687 if (!start)
688 continue;
689
690 end = start + (memslot->npages << PAGE_SHIFT);
691 if (hva >= start && hva < end) {
692 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
693 retval |= handler(kvm, &memslot->rmap[gfn_offset]);
694 retval |= handler(kvm,
695 &memslot->lpage_info[
696 gfn_offset /
697 KVM_PAGES_PER_HPAGE].rmap_pde);
698 }
699 }
700
701 return retval;
702}
703
704int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
705{
706 return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
707}
708
709static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
710{
711 u64 *spte;
712 int young = 0;
713
714 /* always return old for EPT */
715 if (!shadow_accessed_mask)
716 return 0;
717
718 spte = rmap_next(kvm, rmapp, NULL);
719 while (spte) {
720 int _young;
721 u64 _spte = *spte;
722 BUG_ON(!(_spte & PT_PRESENT_MASK));
723 _young = _spte & PT_ACCESSED_MASK;
724 if (_young) {
725 young = 1;
726 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
727 }
728 spte = rmap_next(kvm, rmapp, spte);
729 }
730 return young;
731}
732
733int kvm_age_hva(struct kvm *kvm, unsigned long hva)
734{
735 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
736}
737
656#ifdef MMU_DEBUG 738#ifdef MMU_DEBUG
657static int is_empty_shadow_page(u64 *spt) 739static int is_empty_shadow_page(u64 *spt)
658{ 740{
@@ -1203,6 +1285,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1203 int r; 1285 int r;
1204 int largepage = 0; 1286 int largepage = 0;
1205 pfn_t pfn; 1287 pfn_t pfn;
1288 unsigned long mmu_seq;
1206 1289
1207 down_read(&current->mm->mmap_sem); 1290 down_read(&current->mm->mmap_sem);
1208 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { 1291 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1210,6 +1293,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1210 largepage = 1; 1293 largepage = 1;
1211 } 1294 }
1212 1295
1296 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1297 /* implicit mb(), we'll read before PT lock is unlocked */
1213 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1298 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1214 up_read(&current->mm->mmap_sem); 1299 up_read(&current->mm->mmap_sem);
1215 1300
@@ -1220,6 +1305,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1220 } 1305 }
1221 1306
1222 spin_lock(&vcpu->kvm->mmu_lock); 1307 spin_lock(&vcpu->kvm->mmu_lock);
1308 if (mmu_notifier_retry(vcpu, mmu_seq))
1309 goto out_unlock;
1223 kvm_mmu_free_some_pages(vcpu); 1310 kvm_mmu_free_some_pages(vcpu);
1224 r = __direct_map(vcpu, v, write, largepage, gfn, pfn, 1311 r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
1225 PT32E_ROOT_LEVEL); 1312 PT32E_ROOT_LEVEL);
@@ -1227,6 +1314,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1227 1314
1228 1315
1229 return r; 1316 return r;
1317
1318out_unlock:
1319 spin_unlock(&vcpu->kvm->mmu_lock);
1320 kvm_release_pfn_clean(pfn);
1321 return 0;
1230} 1322}
1231 1323
1232 1324
@@ -1345,6 +1437,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1345 int r; 1437 int r;
1346 int largepage = 0; 1438 int largepage = 0;
1347 gfn_t gfn = gpa >> PAGE_SHIFT; 1439 gfn_t gfn = gpa >> PAGE_SHIFT;
1440 unsigned long mmu_seq;
1348 1441
1349 ASSERT(vcpu); 1442 ASSERT(vcpu);
1350 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 1443 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1358,6 +1451,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1358 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1451 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1359 largepage = 1; 1452 largepage = 1;
1360 } 1453 }
1454 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1455 /* implicit mb(), we'll read before PT lock is unlocked */
1361 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1456 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1362 up_read(&current->mm->mmap_sem); 1457 up_read(&current->mm->mmap_sem);
1363 if (is_error_pfn(pfn)) { 1458 if (is_error_pfn(pfn)) {
@@ -1365,12 +1460,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1365 return 1; 1460 return 1;
1366 } 1461 }
1367 spin_lock(&vcpu->kvm->mmu_lock); 1462 spin_lock(&vcpu->kvm->mmu_lock);
1463 if (mmu_notifier_retry(vcpu, mmu_seq))
1464 goto out_unlock;
1368 kvm_mmu_free_some_pages(vcpu); 1465 kvm_mmu_free_some_pages(vcpu);
1369 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 1466 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1370 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); 1467 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
1371 spin_unlock(&vcpu->kvm->mmu_lock); 1468 spin_unlock(&vcpu->kvm->mmu_lock);
1372 1469
1373 return r; 1470 return r;
1471
1472out_unlock:
1473 spin_unlock(&vcpu->kvm->mmu_lock);
1474 kvm_release_pfn_clean(pfn);
1475 return 0;
1374} 1476}
1375 1477
1376static void nonpaging_free(struct kvm_vcpu *vcpu) 1478static void nonpaging_free(struct kvm_vcpu *vcpu)
@@ -1670,6 +1772,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1670 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1772 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1671 vcpu->arch.update_pte.largepage = 1; 1773 vcpu->arch.update_pte.largepage = 1;
1672 } 1774 }
1775 vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
1776 /* implicit mb(), we'll read before PT lock is unlocked */
1673 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1777 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1674 up_read(&current->mm->mmap_sem); 1778 up_read(&current->mm->mmap_sem);
1675 1779
@@ -1814,6 +1918,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1814 spin_unlock(&vcpu->kvm->mmu_lock); 1918 spin_unlock(&vcpu->kvm->mmu_lock);
1815 return r; 1919 return r;
1816} 1920}
1921EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
1817 1922
1818void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 1923void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1819{ 1924{
@@ -1870,6 +1975,12 @@ void kvm_enable_tdp(void)
1870} 1975}
1871EXPORT_SYMBOL_GPL(kvm_enable_tdp); 1976EXPORT_SYMBOL_GPL(kvm_enable_tdp);
1872 1977
1978void kvm_disable_tdp(void)
1979{
1980 tdp_enabled = false;
1981}
1982EXPORT_SYMBOL_GPL(kvm_disable_tdp);
1983
1873static void free_mmu_pages(struct kvm_vcpu *vcpu) 1984static void free_mmu_pages(struct kvm_vcpu *vcpu)
1874{ 1985{
1875 struct kvm_mmu_page *sp; 1986 struct kvm_mmu_page *sp;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4d918220baeb..4a814bff21f2 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
263 pfn = vcpu->arch.update_pte.pfn; 263 pfn = vcpu->arch.update_pte.pfn;
264 if (is_error_pfn(pfn)) 264 if (is_error_pfn(pfn))
265 return; 265 return;
266 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
267 return;
266 kvm_get_pfn(pfn); 268 kvm_get_pfn(pfn);
267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 269 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 270 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
@@ -343,7 +345,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
343 shadow_addr = __pa(shadow_page->spt); 345 shadow_addr = __pa(shadow_page->spt);
344 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK 346 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
345 | PT_WRITABLE_MASK | PT_USER_MASK; 347 | PT_WRITABLE_MASK | PT_USER_MASK;
346 *shadow_ent = shadow_pte; 348 set_shadow_pte(shadow_ent, shadow_pte);
347 } 349 }
348 350
349 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, 351 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
380 int r; 382 int r;
381 pfn_t pfn; 383 pfn_t pfn;
382 int largepage = 0; 384 int largepage = 0;
385 unsigned long mmu_seq;
383 386
384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 387 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
385 kvm_mmu_audit(vcpu, "pre page fault"); 388 kvm_mmu_audit(vcpu, "pre page fault");
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
413 largepage = 1; 416 largepage = 1;
414 } 417 }
415 } 418 }
419 mmu_seq = vcpu->kvm->mmu_notifier_seq;
420 /* implicit mb(), we'll read before PT lock is unlocked */
416 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 421 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
417 up_read(&current->mm->mmap_sem); 422 up_read(&current->mm->mmap_sem);
418 423
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
424 } 429 }
425 430
426 spin_lock(&vcpu->kvm->mmu_lock); 431 spin_lock(&vcpu->kvm->mmu_lock);
432 if (mmu_notifier_retry(vcpu, mmu_seq))
433 goto out_unlock;
427 kvm_mmu_free_some_pages(vcpu); 434 kvm_mmu_free_some_pages(vcpu);
428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 435 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
429 largepage, &write_pt, pfn); 436 largepage, &write_pt, pfn);
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
439 spin_unlock(&vcpu->kvm->mmu_lock); 446 spin_unlock(&vcpu->kvm->mmu_lock);
440 447
441 return write_pt; 448 return write_pt;
449
450out_unlock:
451 spin_unlock(&vcpu->kvm->mmu_lock);
452 kvm_release_pfn_clean(pfn);
453 return 0;
442} 454}
443 455
444static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 456static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b756e876dce3..8233b86c778c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -62,6 +62,7 @@ static int npt = 1;
62module_param(npt, int, S_IRUGO); 62module_param(npt, int, S_IRUGO);
63 63
64static void kvm_reput_irq(struct vcpu_svm *svm); 64static void kvm_reput_irq(struct vcpu_svm *svm);
65static void svm_flush_tlb(struct kvm_vcpu *vcpu);
65 66
66static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 67static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
67{ 68{
@@ -453,7 +454,8 @@ static __init int svm_hardware_setup(void)
453 if (npt_enabled) { 454 if (npt_enabled) {
454 printk(KERN_INFO "kvm: Nested Paging enabled\n"); 455 printk(KERN_INFO "kvm: Nested Paging enabled\n");
455 kvm_enable_tdp(); 456 kvm_enable_tdp();
456 } 457 } else
458 kvm_disable_tdp();
457 459
458 return 0; 460 return 0;
459 461
@@ -877,6 +879,10 @@ set:
877static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 879static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
878{ 880{
879 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; 881 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
882 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
883
884 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
885 force_new_asid(vcpu);
880 886
881 vcpu->arch.cr4 = cr4; 887 vcpu->arch.cr4 = cr4;
882 if (!npt_enabled) 888 if (!npt_enabled)
@@ -1007,10 +1013,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1007 struct kvm *kvm = svm->vcpu.kvm; 1013 struct kvm *kvm = svm->vcpu.kvm;
1008 u64 fault_address; 1014 u64 fault_address;
1009 u32 error_code; 1015 u32 error_code;
1016 bool event_injection = false;
1010 1017
1011 if (!irqchip_in_kernel(kvm) && 1018 if (!irqchip_in_kernel(kvm) &&
1012 is_external_interrupt(exit_int_info)) 1019 is_external_interrupt(exit_int_info)) {
1020 event_injection = true;
1013 push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); 1021 push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
1022 }
1014 1023
1015 fault_address = svm->vmcb->control.exit_info_2; 1024 fault_address = svm->vmcb->control.exit_info_2;
1016 error_code = svm->vmcb->control.exit_info_1; 1025 error_code = svm->vmcb->control.exit_info_1;
@@ -1023,7 +1032,16 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1023 KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code, 1032 KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
1024 (u32)fault_address, (u32)(fault_address >> 32), 1033 (u32)fault_address, (u32)(fault_address >> 32),
1025 handler); 1034 handler);
1035 /*
1036 * FIXME: Tis shouldn't be necessary here, but there is a flush
1037 * missing in the MMU code. Until we find this bug, flush the
1038 * complete TLB here on an NPF
1039 */
1040 if (npt_enabled)
1041 svm_flush_tlb(&svm->vcpu);
1026 1042
1043 if (event_injection)
1044 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1027 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1045 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1028} 1046}
1029 1047
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0cac63701719..7041cc52b562 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2298,6 +2298,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2298 cr2 = vmcs_readl(EXIT_QUALIFICATION); 2298 cr2 = vmcs_readl(EXIT_QUALIFICATION);
2299 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, 2299 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
2300 (u32)((u64)cr2 >> 32), handler); 2300 (u32)((u64)cr2 >> 32), handler);
2301 if (vect_info & VECTORING_INFO_VALID_MASK)
2302 kvm_mmu_unprotect_page_virt(vcpu, cr2);
2301 return kvm_mmu_page_fault(vcpu, cr2, error_code); 2303 return kvm_mmu_page_fault(vcpu, cr2, error_code);
2302 } 2304 }
2303 2305
@@ -3116,15 +3118,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
3116 return ERR_PTR(-ENOMEM); 3118 return ERR_PTR(-ENOMEM);
3117 3119
3118 allocate_vpid(vmx); 3120 allocate_vpid(vmx);
3119 if (id == 0 && vm_need_ept()) {
3120 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3121 VMX_EPT_WRITABLE_MASK |
3122 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3123 kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
3124 VMX_EPT_FAKE_DIRTY_MASK, 0ull,
3125 VMX_EPT_EXECUTABLE_MASK);
3126 kvm_enable_tdp();
3127 }
3128 3121
3129 err = kvm_vcpu_init(&vmx->vcpu, kvm, id); 3122 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
3130 if (err) 3123 if (err)
@@ -3303,8 +3296,16 @@ static int __init vmx_init(void)
3303 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); 3296 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
3304 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); 3297 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
3305 3298
3306 if (cpu_has_vmx_ept()) 3299 if (vm_need_ept()) {
3307 bypass_guest_pf = 0; 3300 bypass_guest_pf = 0;
3301 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3302 VMX_EPT_WRITABLE_MASK |
3303 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3304 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3305 VMX_EPT_EXECUTABLE_MASK);
3306 kvm_enable_tdp();
3307 } else
3308 kvm_disable_tdp();
3308 3309
3309 if (bypass_guest_pf) 3310 if (bypass_guest_pf)
3310 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 3311 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 425a13436b3f..23e8373507ad 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -370,8 +370,6 @@ enum vmcs_field {
370#define VMX_EPT_READABLE_MASK 0x1ull 370#define VMX_EPT_READABLE_MASK 0x1ull
371#define VMX_EPT_WRITABLE_MASK 0x2ull 371#define VMX_EPT_WRITABLE_MASK 0x2ull
372#define VMX_EPT_EXECUTABLE_MASK 0x4ull 372#define VMX_EPT_EXECUTABLE_MASK 0x4ull
373#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
374#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
375 373
376#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 374#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
377 375
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f1cdb011cff..0d682fc6aeb3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext)
883 case KVM_CAP_PIT: 883 case KVM_CAP_PIT:
884 case KVM_CAP_NOP_IO_DELAY: 884 case KVM_CAP_NOP_IO_DELAY:
885 case KVM_CAP_MP_STATE: 885 case KVM_CAP_MP_STATE:
886 case KVM_CAP_SYNC_MMU:
886 r = 1; 887 r = 1;
887 break; 888 break;
888 case KVM_CAP_COALESCED_MMIO: 889 case KVM_CAP_COALESCED_MMIO:
@@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1495 goto out; 1496 goto out;
1496 1497
1497 down_write(&kvm->slots_lock); 1498 down_write(&kvm->slots_lock);
1499 spin_lock(&kvm->mmu_lock);
1498 1500
1499 p = &kvm->arch.aliases[alias->slot]; 1501 p = &kvm->arch.aliases[alias->slot];
1500 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 1502 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1506 break; 1508 break;
1507 kvm->arch.naliases = n; 1509 kvm->arch.naliases = n;
1508 1510
1511 spin_unlock(&kvm->mmu_lock);
1509 kvm_mmu_zap_all(kvm); 1512 kvm_mmu_zap_all(kvm);
1510 1513
1511 up_write(&kvm->slots_lock); 1514 up_write(&kvm->slots_lock);
@@ -3184,6 +3187,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3184 kvm_desct->base |= seg_desc->base2 << 24; 3187 kvm_desct->base |= seg_desc->base2 << 24;
3185 kvm_desct->limit = seg_desc->limit0; 3188 kvm_desct->limit = seg_desc->limit0;
3186 kvm_desct->limit |= seg_desc->limit << 16; 3189 kvm_desct->limit |= seg_desc->limit << 16;
3190 if (seg_desc->g) {
3191 kvm_desct->limit <<= 12;
3192 kvm_desct->limit |= 0xfff;
3193 }
3187 kvm_desct->selector = selector; 3194 kvm_desct->selector = selector;
3188 kvm_desct->type = seg_desc->type; 3195 kvm_desct->type = seg_desc->type;
3189 kvm_desct->present = seg_desc->p; 3196 kvm_desct->present = seg_desc->p;
@@ -3223,6 +3230,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3223static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3230static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3224 struct desc_struct *seg_desc) 3231 struct desc_struct *seg_desc)
3225{ 3232{
3233 gpa_t gpa;
3226 struct descriptor_table dtable; 3234 struct descriptor_table dtable;
3227 u16 index = selector >> 3; 3235 u16 index = selector >> 3;
3228 3236
@@ -3232,13 +3240,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3232 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 3240 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
3233 return 1; 3241 return 1;
3234 } 3242 }
3235 return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); 3243 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
3244 gpa += index * 8;
3245 return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
3236} 3246}
3237 3247
3238/* allowed just for 8 bytes segments */ 3248/* allowed just for 8 bytes segments */
3239static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3249static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3240 struct desc_struct *seg_desc) 3250 struct desc_struct *seg_desc)
3241{ 3251{
3252 gpa_t gpa;
3242 struct descriptor_table dtable; 3253 struct descriptor_table dtable;
3243 u16 index = selector >> 3; 3254 u16 index = selector >> 3;
3244 3255
@@ -3246,7 +3257,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3246 3257
3247 if (dtable.limit < index * 8 + 7) 3258 if (dtable.limit < index * 8 + 7)
3248 return 1; 3259 return 1;
3249 return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); 3260 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
3261 gpa += index * 8;
3262 return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
3250} 3263}
3251 3264
3252static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, 3265static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
@@ -3258,55 +3271,7 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
3258 base_addr |= (seg_desc->base1 << 16); 3271 base_addr |= (seg_desc->base1 << 16);
3259 base_addr |= (seg_desc->base2 << 24); 3272 base_addr |= (seg_desc->base2 << 24);
3260 3273
3261 return base_addr; 3274 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
3262}
3263
3264static int load_tss_segment32(struct kvm_vcpu *vcpu,
3265 struct desc_struct *seg_desc,
3266 struct tss_segment_32 *tss)
3267{
3268 u32 base_addr;
3269
3270 base_addr = get_tss_base_addr(vcpu, seg_desc);
3271
3272 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3273 sizeof(struct tss_segment_32));
3274}
3275
3276static int save_tss_segment32(struct kvm_vcpu *vcpu,
3277 struct desc_struct *seg_desc,
3278 struct tss_segment_32 *tss)
3279{
3280 u32 base_addr;
3281
3282 base_addr = get_tss_base_addr(vcpu, seg_desc);
3283
3284 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3285 sizeof(struct tss_segment_32));
3286}
3287
3288static int load_tss_segment16(struct kvm_vcpu *vcpu,
3289 struct desc_struct *seg_desc,
3290 struct tss_segment_16 *tss)
3291{
3292 u32 base_addr;
3293
3294 base_addr = get_tss_base_addr(vcpu, seg_desc);
3295
3296 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3297 sizeof(struct tss_segment_16));
3298}
3299
3300static int save_tss_segment16(struct kvm_vcpu *vcpu,
3301 struct desc_struct *seg_desc,
3302 struct tss_segment_16 *tss)
3303{
3304 u32 base_addr;
3305
3306 base_addr = get_tss_base_addr(vcpu, seg_desc);
3307
3308 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3309 sizeof(struct tss_segment_16));
3310} 3275}
3311 3276
3312static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 3277static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -3466,20 +3431,26 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3466} 3431}
3467 3432
3468static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, 3433static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3469 struct desc_struct *cseg_desc, 3434 u32 old_tss_base,
3470 struct desc_struct *nseg_desc) 3435 struct desc_struct *nseg_desc)
3471{ 3436{
3472 struct tss_segment_16 tss_segment_16; 3437 struct tss_segment_16 tss_segment_16;
3473 int ret = 0; 3438 int ret = 0;
3474 3439
3475 if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) 3440 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
3441 sizeof tss_segment_16))
3476 goto out; 3442 goto out;
3477 3443
3478 save_state_to_tss16(vcpu, &tss_segment_16); 3444 save_state_to_tss16(vcpu, &tss_segment_16);
3479 save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
3480 3445
3481 if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) 3446 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
3447 sizeof tss_segment_16))
3448 goto out;
3449
3450 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
3451 &tss_segment_16, sizeof tss_segment_16))
3482 goto out; 3452 goto out;
3453
3483 if (load_state_from_tss16(vcpu, &tss_segment_16)) 3454 if (load_state_from_tss16(vcpu, &tss_segment_16))
3484 goto out; 3455 goto out;
3485 3456
@@ -3489,20 +3460,26 @@ out:
3489} 3460}
3490 3461
3491static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, 3462static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3492 struct desc_struct *cseg_desc, 3463 u32 old_tss_base,
3493 struct desc_struct *nseg_desc) 3464 struct desc_struct *nseg_desc)
3494{ 3465{
3495 struct tss_segment_32 tss_segment_32; 3466 struct tss_segment_32 tss_segment_32;
3496 int ret = 0; 3467 int ret = 0;
3497 3468
3498 if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) 3469 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
3470 sizeof tss_segment_32))
3499 goto out; 3471 goto out;
3500 3472
3501 save_state_to_tss32(vcpu, &tss_segment_32); 3473 save_state_to_tss32(vcpu, &tss_segment_32);
3502 save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
3503 3474
3504 if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) 3475 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
3476 sizeof tss_segment_32))
3505 goto out; 3477 goto out;
3478
3479 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
3480 &tss_segment_32, sizeof tss_segment_32))
3481 goto out;
3482
3506 if (load_state_from_tss32(vcpu, &tss_segment_32)) 3483 if (load_state_from_tss32(vcpu, &tss_segment_32))
3507 goto out; 3484 goto out;
3508 3485
@@ -3517,16 +3494,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3517 struct desc_struct cseg_desc; 3494 struct desc_struct cseg_desc;
3518 struct desc_struct nseg_desc; 3495 struct desc_struct nseg_desc;
3519 int ret = 0; 3496 int ret = 0;
3497 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
3498 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
3520 3499
3521 kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); 3500 old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
3522 3501
3502 /* FIXME: Handle errors. Failure to read either TSS or their
3503 * descriptors should generate a pagefault.
3504 */
3523 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) 3505 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
3524 goto out; 3506 goto out;
3525 3507
3526 if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) 3508 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
3527 goto out; 3509 goto out;
3528 3510
3529
3530 if (reason != TASK_SWITCH_IRET) { 3511 if (reason != TASK_SWITCH_IRET) {
3531 int cpl; 3512 int cpl;
3532 3513
@@ -3544,8 +3525,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3544 3525
3545 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 3526 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3546 cseg_desc.type &= ~(1 << 1); //clear the B flag 3527 cseg_desc.type &= ~(1 << 1); //clear the B flag
3547 save_guest_segment_descriptor(vcpu, tr_seg.selector, 3528 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
3548 &cseg_desc);
3549 } 3529 }
3550 3530
3551 if (reason == TASK_SWITCH_IRET) { 3531 if (reason == TASK_SWITCH_IRET) {
@@ -3557,10 +3537,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3557 kvm_x86_ops->cache_regs(vcpu); 3537 kvm_x86_ops->cache_regs(vcpu);
3558 3538
3559 if (nseg_desc.type & 8) 3539 if (nseg_desc.type & 8)
3560 ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, 3540 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
3561 &nseg_desc); 3541 &nseg_desc);
3562 else 3542 else
3563 ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, 3543 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
3564 &nseg_desc); 3544 &nseg_desc);
3565 3545
3566 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { 3546 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
@@ -3995,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3995 */ 3975 */
3996 if (!user_alloc) { 3976 if (!user_alloc) {
3997 if (npages && !old.rmap) { 3977 if (npages && !old.rmap) {
3978 unsigned long userspace_addr;
3979
3998 down_write(&current->mm->mmap_sem); 3980 down_write(&current->mm->mmap_sem);
3999 memslot->userspace_addr = do_mmap(NULL, 0, 3981 userspace_addr = do_mmap(NULL, 0,
4000 npages * PAGE_SIZE, 3982 npages * PAGE_SIZE,
4001 PROT_READ | PROT_WRITE, 3983 PROT_READ | PROT_WRITE,
4002 MAP_SHARED | MAP_ANONYMOUS, 3984 MAP_SHARED | MAP_ANONYMOUS,
4003 0); 3985 0);
4004 up_write(&current->mm->mmap_sem); 3986 up_write(&current->mm->mmap_sem);
4005 3987
4006 if (IS_ERR((void *)memslot->userspace_addr)) 3988 if (IS_ERR((void *)userspace_addr))
4007 return PTR_ERR((void *)memslot->userspace_addr); 3989 return PTR_ERR((void *)userspace_addr);
3990
3991 /* set userspace_addr atomically for kvm_hva_to_rmapp */
3992 spin_lock(&kvm->mmu_lock);
3993 memslot->userspace_addr = userspace_addr;
3994 spin_unlock(&kvm->mmu_lock);
4008 } else { 3995 } else {
4009 if (!old.user_alloc && old.rmap) { 3996 if (!old.user_alloc && old.rmap) {
4010 int ret; 3997 int ret;