aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2007-12-20 19:18:26 -0500
committerAvi Kivity <avi@qumranet.com>2008-01-30 11:01:21 -0500
commitaaee2c94f7a1f7726e360a6cfb40173bd552bcff (patch)
treee9066ae5509c349bfd6a187e85d52cc476e16a12
parentd7824fff896a1698a07a8046dc362f4500c302f7 (diff)
KVM: MMU: Switch to mmu spinlock
Convert the synchronization of the shadow handling to a separate mmu_lock spinlock. Also guard fetch() by mmap_sem in read-mode to protect against alias and memslot changes. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--arch/x86/kvm/mmu.c48
-rw-r--r--arch/x86/kvm/paging_tmpl.h10
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--include/linux/kvm_host.h3
-rw-r--r--virt/kvm/kvm_main.c3
5 files changed, 35 insertions, 31 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c0b757be7b99..834698d24595 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -971,16 +971,12 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
971{ 971{
972} 972}
973 973
974static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 974static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
975 gfn_t gfn, struct page *page)
975{ 976{
976 int level = PT32E_ROOT_LEVEL; 977 int level = PT32E_ROOT_LEVEL;
977 hpa_t table_addr = vcpu->arch.mmu.root_hpa; 978 hpa_t table_addr = vcpu->arch.mmu.root_hpa;
978 int pt_write = 0; 979 int pt_write = 0;
979 struct page *page;
980
981 down_read(&current->mm->mmap_sem);
982 page = gfn_to_page(vcpu->kvm, gfn);
983 up_read(&current->mm->mmap_sem);
984 980
985 for (; ; level--) { 981 for (; ; level--) {
986 u32 index = PT64_INDEX(v, level); 982 u32 index = PT64_INDEX(v, level);
@@ -1022,9 +1018,17 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1022{ 1018{
1023 int r; 1019 int r;
1024 1020
1025 mutex_lock(&vcpu->kvm->lock); 1021 struct page *page;
1026 r = __nonpaging_map(vcpu, v, write, gfn); 1022
1027 mutex_unlock(&vcpu->kvm->lock); 1023 down_read(&current->mm->mmap_sem);
1024 page = gfn_to_page(vcpu->kvm, gfn);
1025
1026 spin_lock(&vcpu->kvm->mmu_lock);
1027 r = __nonpaging_map(vcpu, v, write, gfn, page);
1028 spin_unlock(&vcpu->kvm->mmu_lock);
1029
1030 up_read(&current->mm->mmap_sem);
1031
1028 return r; 1032 return r;
1029} 1033}
1030 1034
@@ -1045,7 +1049,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1045 1049
1046 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 1050 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
1047 return; 1051 return;
1048 mutex_lock(&vcpu->kvm->lock); 1052 spin_lock(&vcpu->kvm->mmu_lock);
1049#ifdef CONFIG_X86_64 1053#ifdef CONFIG_X86_64
1050 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { 1054 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1051 hpa_t root = vcpu->arch.mmu.root_hpa; 1055 hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -1053,7 +1057,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1053 sp = page_header(root); 1057 sp = page_header(root);
1054 --sp->root_count; 1058 --sp->root_count;
1055 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 1059 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1056 mutex_unlock(&vcpu->kvm->lock); 1060 spin_unlock(&vcpu->kvm->mmu_lock);
1057 return; 1061 return;
1058 } 1062 }
1059#endif 1063#endif
@@ -1067,7 +1071,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1067 } 1071 }
1068 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 1072 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
1069 } 1073 }
1070 mutex_unlock(&vcpu->kvm->lock); 1074 spin_unlock(&vcpu->kvm->mmu_lock);
1071 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 1075 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1072} 1076}
1073 1077
@@ -1270,9 +1274,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
1270 r = mmu_topup_memory_caches(vcpu); 1274 r = mmu_topup_memory_caches(vcpu);
1271 if (r) 1275 if (r)
1272 goto out; 1276 goto out;
1273 mutex_lock(&vcpu->kvm->lock); 1277 spin_lock(&vcpu->kvm->mmu_lock);
1274 mmu_alloc_roots(vcpu); 1278 mmu_alloc_roots(vcpu);
1275 mutex_unlock(&vcpu->kvm->lock); 1279 spin_unlock(&vcpu->kvm->mmu_lock);
1276 kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); 1280 kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
1277 kvm_mmu_flush_tlb(vcpu); 1281 kvm_mmu_flush_tlb(vcpu);
1278out: 1282out:
@@ -1408,7 +1412,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1408 1412
1409 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); 1413 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
1410 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); 1414 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
1411 mutex_lock(&vcpu->kvm->lock); 1415 spin_lock(&vcpu->kvm->mmu_lock);
1412 ++vcpu->kvm->stat.mmu_pte_write; 1416 ++vcpu->kvm->stat.mmu_pte_write;
1413 kvm_mmu_audit(vcpu, "pre pte write"); 1417 kvm_mmu_audit(vcpu, "pre pte write");
1414 if (gfn == vcpu->arch.last_pt_write_gfn 1418 if (gfn == vcpu->arch.last_pt_write_gfn
@@ -1477,7 +1481,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1477 } 1481 }
1478 } 1482 }
1479 kvm_mmu_audit(vcpu, "post pte write"); 1483 kvm_mmu_audit(vcpu, "post pte write");
1480 mutex_unlock(&vcpu->kvm->lock); 1484 spin_unlock(&vcpu->kvm->mmu_lock);
1481 if (vcpu->arch.update_pte.page) { 1485 if (vcpu->arch.update_pte.page) {
1482 kvm_release_page_clean(vcpu->arch.update_pte.page); 1486 kvm_release_page_clean(vcpu->arch.update_pte.page);
1483 vcpu->arch.update_pte.page = NULL; 1487 vcpu->arch.update_pte.page = NULL;
@@ -1493,15 +1497,15 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1493 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 1497 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
1494 up_read(&current->mm->mmap_sem); 1498 up_read(&current->mm->mmap_sem);
1495 1499
1496 mutex_lock(&vcpu->kvm->lock); 1500 spin_lock(&vcpu->kvm->mmu_lock);
1497 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 1501 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1498 mutex_unlock(&vcpu->kvm->lock); 1502 spin_unlock(&vcpu->kvm->mmu_lock);
1499 return r; 1503 return r;
1500} 1504}
1501 1505
1502void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 1506void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1503{ 1507{
1504 mutex_lock(&vcpu->kvm->lock); 1508 spin_lock(&vcpu->kvm->mmu_lock);
1505 while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) { 1509 while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
1506 struct kvm_mmu_page *sp; 1510 struct kvm_mmu_page *sp;
1507 1511
@@ -1510,7 +1514,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1510 kvm_mmu_zap_page(vcpu->kvm, sp); 1514 kvm_mmu_zap_page(vcpu->kvm, sp);
1511 ++vcpu->kvm->stat.mmu_recycled; 1515 ++vcpu->kvm->stat.mmu_recycled;
1512 } 1516 }
1513 mutex_unlock(&vcpu->kvm->lock); 1517 spin_unlock(&vcpu->kvm->mmu_lock);
1514} 1518}
1515 1519
1516int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) 1520int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
@@ -1642,10 +1646,10 @@ void kvm_mmu_zap_all(struct kvm *kvm)
1642{ 1646{
1643 struct kvm_mmu_page *sp, *node; 1647 struct kvm_mmu_page *sp, *node;
1644 1648
1645 mutex_lock(&kvm->lock); 1649 spin_lock(&kvm->mmu_lock);
1646 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) 1650 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
1647 kvm_mmu_zap_page(kvm, sp); 1651 kvm_mmu_zap_page(kvm, sp);
1648 mutex_unlock(&kvm->lock); 1652 spin_unlock(&kvm->mmu_lock);
1649 1653
1650 kvm_flush_remote_tlbs(kvm); 1654 kvm_flush_remote_tlbs(kvm);
1651} 1655}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 3d7846ba26e1..a35b83a4fef2 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -387,7 +387,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
387 */ 387 */
388 r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, 388 r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
389 fetch_fault); 389 fetch_fault);
390 up_read(&current->mm->mmap_sem);
391 390
392 /* 391 /*
393 * The page is not mapped by the guest. Let the guest handle it. 392 * The page is not mapped by the guest. Let the guest handle it.
@@ -396,12 +395,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
396 pgprintk("%s: guest page fault\n", __FUNCTION__); 395 pgprintk("%s: guest page fault\n", __FUNCTION__);
397 inject_page_fault(vcpu, addr, walker.error_code); 396 inject_page_fault(vcpu, addr, walker.error_code);
398 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 397 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
398 up_read(&current->mm->mmap_sem);
399 return 0; 399 return 0;
400 } 400 }
401 401
402 page = gfn_to_page(vcpu->kvm, walker.gfn); 402 page = gfn_to_page(vcpu->kvm, walker.gfn);
403 403
404 mutex_lock(&vcpu->kvm->lock); 404 spin_lock(&vcpu->kvm->mmu_lock);
405 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 405 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
406 &write_pt, page); 406 &write_pt, page);
407 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, 407 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
@@ -414,13 +414,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
414 * mmio: emulate if accessible, otherwise its a guest fault. 414 * mmio: emulate if accessible, otherwise its a guest fault.
415 */ 415 */
416 if (shadow_pte && is_io_pte(*shadow_pte)) { 416 if (shadow_pte && is_io_pte(*shadow_pte)) {
417 mutex_unlock(&vcpu->kvm->lock); 417 spin_unlock(&vcpu->kvm->mmu_lock);
418 up_read(&current->mm->mmap_sem);
418 return 1; 419 return 1;
419 } 420 }
420 421
421 ++vcpu->stat.pf_fixed; 422 ++vcpu->stat.pf_fixed;
422 kvm_mmu_audit(vcpu, "post page fault (fixed)"); 423 kvm_mmu_audit(vcpu, "post page fault (fixed)");
423 mutex_unlock(&vcpu->kvm->lock); 424 spin_unlock(&vcpu->kvm->mmu_lock);
425 up_read(&current->mm->mmap_sem);
424 426
425 return write_pt; 427 return write_pt;
426} 428}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c39493feba46..3d251f894a8d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1477,7 +1477,6 @@ static int alloc_apic_access_page(struct kvm *kvm)
1477 struct kvm_userspace_memory_region kvm_userspace_mem; 1477 struct kvm_userspace_memory_region kvm_userspace_mem;
1478 int r = 0; 1478 int r = 0;
1479 1479
1480 mutex_lock(&kvm->lock);
1481 down_write(&current->mm->mmap_sem); 1480 down_write(&current->mm->mmap_sem);
1482 if (kvm->arch.apic_access_page) 1481 if (kvm->arch.apic_access_page)
1483 goto out; 1482 goto out;
@@ -1491,7 +1490,6 @@ static int alloc_apic_access_page(struct kvm *kvm)
1491 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); 1490 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
1492out: 1491out:
1493 up_write(&current->mm->mmap_sem); 1492 up_write(&current->mm->mmap_sem);
1494 mutex_unlock(&kvm->lock);
1495 return r; 1493 return r;
1496} 1494}
1497 1495
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a020fb280540..2714068ee8bc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -104,7 +104,8 @@ struct kvm_memory_slot {
104}; 104};
105 105
106struct kvm { 106struct kvm {
107 struct mutex lock; /* protects everything except vcpus */ 107 struct mutex lock; /* protects the vcpus array and APIC accesses */
108 spinlock_t mmu_lock;
108 struct mm_struct *mm; /* userspace tied to this vm */ 109 struct mm_struct *mm; /* userspace tied to this vm */
109 int nmemslots; 110 int nmemslots;
110 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + 111 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8d0b7c16c2f7..3c4fe26096fc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void)
165 165
166 kvm->mm = current->mm; 166 kvm->mm = current->mm;
167 atomic_inc(&kvm->mm->mm_count); 167 atomic_inc(&kvm->mm->mm_count);
168 spin_lock_init(&kvm->mmu_lock);
168 kvm_io_bus_init(&kvm->pio_bus); 169 kvm_io_bus_init(&kvm->pio_bus);
169 mutex_init(&kvm->lock); 170 mutex_init(&kvm->lock);
170 kvm_io_bus_init(&kvm->mmio_bus); 171 kvm_io_bus_init(&kvm->mmio_bus);
@@ -552,9 +553,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
552 addr = gfn_to_hva(kvm, gfn); 553 addr = gfn_to_hva(kvm, gfn);
553 if (kvm_is_error_hva(addr)) 554 if (kvm_is_error_hva(addr))
554 return -EFAULT; 555 return -EFAULT;
555 pagefault_disable();
556 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 556 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
557 pagefault_enable();
558 if (r) 557 if (r)
559 return -EFAULT; 558 return -EFAULT;
560 return 0; 559 return 0;