diff options
author | Marcelo Tosatti <mtosatti@redhat.com> | 2007-12-20 19:18:26 -0500 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-01-30 11:01:21 -0500 |
commit | aaee2c94f7a1f7726e360a6cfb40173bd552bcff (patch) | |
tree | e9066ae5509c349bfd6a187e85d52cc476e16a12 | |
parent | d7824fff896a1698a07a8046dc362f4500c302f7 (diff) |
KVM: MMU: Switch to mmu spinlock
Convert the synchronization of the shadow handling to a separate mmu_lock
spinlock.
Also guard fetch() by mmap_sem in read-mode to protect against alias
and memslot changes.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r-- | arch/x86/kvm/mmu.c | 48 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 10 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 2 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 3 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 3 |
5 files changed, 35 insertions, 31 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c0b757be7b99..834698d24595 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -971,16 +971,12 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
971 | { | 971 | { |
972 | } | 972 | } |
973 | 973 | ||
974 | static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 974 | static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, |
975 | gfn_t gfn, struct page *page) | ||
975 | { | 976 | { |
976 | int level = PT32E_ROOT_LEVEL; | 977 | int level = PT32E_ROOT_LEVEL; |
977 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; | 978 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; |
978 | int pt_write = 0; | 979 | int pt_write = 0; |
979 | struct page *page; | ||
980 | |||
981 | down_read(¤t->mm->mmap_sem); | ||
982 | page = gfn_to_page(vcpu->kvm, gfn); | ||
983 | up_read(¤t->mm->mmap_sem); | ||
984 | 980 | ||
985 | for (; ; level--) { | 981 | for (; ; level--) { |
986 | u32 index = PT64_INDEX(v, level); | 982 | u32 index = PT64_INDEX(v, level); |
@@ -1022,9 +1018,17 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1022 | { | 1018 | { |
1023 | int r; | 1019 | int r; |
1024 | 1020 | ||
1025 | mutex_lock(&vcpu->kvm->lock); | 1021 | struct page *page; |
1026 | r = __nonpaging_map(vcpu, v, write, gfn); | 1022 | |
1027 | mutex_unlock(&vcpu->kvm->lock); | 1023 | down_read(¤t->mm->mmap_sem); |
1024 | page = gfn_to_page(vcpu->kvm, gfn); | ||
1025 | |||
1026 | spin_lock(&vcpu->kvm->mmu_lock); | ||
1027 | r = __nonpaging_map(vcpu, v, write, gfn, page); | ||
1028 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
1029 | |||
1030 | up_read(¤t->mm->mmap_sem); | ||
1031 | |||
1028 | return r; | 1032 | return r; |
1029 | } | 1033 | } |
1030 | 1034 | ||
@@ -1045,7 +1049,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1045 | 1049 | ||
1046 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 1050 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
1047 | return; | 1051 | return; |
1048 | mutex_lock(&vcpu->kvm->lock); | 1052 | spin_lock(&vcpu->kvm->mmu_lock); |
1049 | #ifdef CONFIG_X86_64 | 1053 | #ifdef CONFIG_X86_64 |
1050 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 1054 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
1051 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1055 | hpa_t root = vcpu->arch.mmu.root_hpa; |
@@ -1053,7 +1057,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1053 | sp = page_header(root); | 1057 | sp = page_header(root); |
1054 | --sp->root_count; | 1058 | --sp->root_count; |
1055 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 1059 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
1056 | mutex_unlock(&vcpu->kvm->lock); | 1060 | spin_unlock(&vcpu->kvm->mmu_lock); |
1057 | return; | 1061 | return; |
1058 | } | 1062 | } |
1059 | #endif | 1063 | #endif |
@@ -1067,7 +1071,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1067 | } | 1071 | } |
1068 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 1072 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
1069 | } | 1073 | } |
1070 | mutex_unlock(&vcpu->kvm->lock); | 1074 | spin_unlock(&vcpu->kvm->mmu_lock); |
1071 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 1075 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
1072 | } | 1076 | } |
1073 | 1077 | ||
@@ -1270,9 +1274,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
1270 | r = mmu_topup_memory_caches(vcpu); | 1274 | r = mmu_topup_memory_caches(vcpu); |
1271 | if (r) | 1275 | if (r) |
1272 | goto out; | 1276 | goto out; |
1273 | mutex_lock(&vcpu->kvm->lock); | 1277 | spin_lock(&vcpu->kvm->mmu_lock); |
1274 | mmu_alloc_roots(vcpu); | 1278 | mmu_alloc_roots(vcpu); |
1275 | mutex_unlock(&vcpu->kvm->lock); | 1279 | spin_unlock(&vcpu->kvm->mmu_lock); |
1276 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); | 1280 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); |
1277 | kvm_mmu_flush_tlb(vcpu); | 1281 | kvm_mmu_flush_tlb(vcpu); |
1278 | out: | 1282 | out: |
@@ -1408,7 +1412,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1408 | 1412 | ||
1409 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); | 1413 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); |
1410 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 1414 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); |
1411 | mutex_lock(&vcpu->kvm->lock); | 1415 | spin_lock(&vcpu->kvm->mmu_lock); |
1412 | ++vcpu->kvm->stat.mmu_pte_write; | 1416 | ++vcpu->kvm->stat.mmu_pte_write; |
1413 | kvm_mmu_audit(vcpu, "pre pte write"); | 1417 | kvm_mmu_audit(vcpu, "pre pte write"); |
1414 | if (gfn == vcpu->arch.last_pt_write_gfn | 1418 | if (gfn == vcpu->arch.last_pt_write_gfn |
@@ -1477,7 +1481,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1477 | } | 1481 | } |
1478 | } | 1482 | } |
1479 | kvm_mmu_audit(vcpu, "post pte write"); | 1483 | kvm_mmu_audit(vcpu, "post pte write"); |
1480 | mutex_unlock(&vcpu->kvm->lock); | 1484 | spin_unlock(&vcpu->kvm->mmu_lock); |
1481 | if (vcpu->arch.update_pte.page) { | 1485 | if (vcpu->arch.update_pte.page) { |
1482 | kvm_release_page_clean(vcpu->arch.update_pte.page); | 1486 | kvm_release_page_clean(vcpu->arch.update_pte.page); |
1483 | vcpu->arch.update_pte.page = NULL; | 1487 | vcpu->arch.update_pte.page = NULL; |
@@ -1493,15 +1497,15 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
1493 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1497 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); |
1494 | up_read(¤t->mm->mmap_sem); | 1498 | up_read(¤t->mm->mmap_sem); |
1495 | 1499 | ||
1496 | mutex_lock(&vcpu->kvm->lock); | 1500 | spin_lock(&vcpu->kvm->mmu_lock); |
1497 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 1501 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
1498 | mutex_unlock(&vcpu->kvm->lock); | 1502 | spin_unlock(&vcpu->kvm->mmu_lock); |
1499 | return r; | 1503 | return r; |
1500 | } | 1504 | } |
1501 | 1505 | ||
1502 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 1506 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
1503 | { | 1507 | { |
1504 | mutex_lock(&vcpu->kvm->lock); | 1508 | spin_lock(&vcpu->kvm->mmu_lock); |
1505 | while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) { | 1509 | while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) { |
1506 | struct kvm_mmu_page *sp; | 1510 | struct kvm_mmu_page *sp; |
1507 | 1511 | ||
@@ -1510,7 +1514,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
1510 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1514 | kvm_mmu_zap_page(vcpu->kvm, sp); |
1511 | ++vcpu->kvm->stat.mmu_recycled; | 1515 | ++vcpu->kvm->stat.mmu_recycled; |
1512 | } | 1516 | } |
1513 | mutex_unlock(&vcpu->kvm->lock); | 1517 | spin_unlock(&vcpu->kvm->mmu_lock); |
1514 | } | 1518 | } |
1515 | 1519 | ||
1516 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 1520 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) |
@@ -1642,10 +1646,10 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
1642 | { | 1646 | { |
1643 | struct kvm_mmu_page *sp, *node; | 1647 | struct kvm_mmu_page *sp, *node; |
1644 | 1648 | ||
1645 | mutex_lock(&kvm->lock); | 1649 | spin_lock(&kvm->mmu_lock); |
1646 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 1650 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
1647 | kvm_mmu_zap_page(kvm, sp); | 1651 | kvm_mmu_zap_page(kvm, sp); |
1648 | mutex_unlock(&kvm->lock); | 1652 | spin_unlock(&kvm->mmu_lock); |
1649 | 1653 | ||
1650 | kvm_flush_remote_tlbs(kvm); | 1654 | kvm_flush_remote_tlbs(kvm); |
1651 | } | 1655 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3d7846ba26e1..a35b83a4fef2 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -387,7 +387,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
387 | */ | 387 | */ |
388 | r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, | 388 | r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, |
389 | fetch_fault); | 389 | fetch_fault); |
390 | up_read(¤t->mm->mmap_sem); | ||
391 | 390 | ||
392 | /* | 391 | /* |
393 | * The page is not mapped by the guest. Let the guest handle it. | 392 | * The page is not mapped by the guest. Let the guest handle it. |
@@ -396,12 +395,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
396 | pgprintk("%s: guest page fault\n", __FUNCTION__); | 395 | pgprintk("%s: guest page fault\n", __FUNCTION__); |
397 | inject_page_fault(vcpu, addr, walker.error_code); | 396 | inject_page_fault(vcpu, addr, walker.error_code); |
398 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 397 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ |
398 | up_read(¤t->mm->mmap_sem); | ||
399 | return 0; | 399 | return 0; |
400 | } | 400 | } |
401 | 401 | ||
402 | page = gfn_to_page(vcpu->kvm, walker.gfn); | 402 | page = gfn_to_page(vcpu->kvm, walker.gfn); |
403 | 403 | ||
404 | mutex_lock(&vcpu->kvm->lock); | 404 | spin_lock(&vcpu->kvm->mmu_lock); |
405 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 405 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
406 | &write_pt, page); | 406 | &write_pt, page); |
407 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, | 407 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, |
@@ -414,13 +414,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
414 | * mmio: emulate if accessible, otherwise its a guest fault. | 414 | * mmio: emulate if accessible, otherwise its a guest fault. |
415 | */ | 415 | */ |
416 | if (shadow_pte && is_io_pte(*shadow_pte)) { | 416 | if (shadow_pte && is_io_pte(*shadow_pte)) { |
417 | mutex_unlock(&vcpu->kvm->lock); | 417 | spin_unlock(&vcpu->kvm->mmu_lock); |
418 | up_read(¤t->mm->mmap_sem); | ||
418 | return 1; | 419 | return 1; |
419 | } | 420 | } |
420 | 421 | ||
421 | ++vcpu->stat.pf_fixed; | 422 | ++vcpu->stat.pf_fixed; |
422 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); | 423 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); |
423 | mutex_unlock(&vcpu->kvm->lock); | 424 | spin_unlock(&vcpu->kvm->mmu_lock); |
425 | up_read(¤t->mm->mmap_sem); | ||
424 | 426 | ||
425 | return write_pt; | 427 | return write_pt; |
426 | } | 428 | } |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c39493feba46..3d251f894a8d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1477,7 +1477,6 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
1477 | struct kvm_userspace_memory_region kvm_userspace_mem; | 1477 | struct kvm_userspace_memory_region kvm_userspace_mem; |
1478 | int r = 0; | 1478 | int r = 0; |
1479 | 1479 | ||
1480 | mutex_lock(&kvm->lock); | ||
1481 | down_write(¤t->mm->mmap_sem); | 1480 | down_write(¤t->mm->mmap_sem); |
1482 | if (kvm->arch.apic_access_page) | 1481 | if (kvm->arch.apic_access_page) |
1483 | goto out; | 1482 | goto out; |
@@ -1491,7 +1490,6 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
1491 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 1490 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
1492 | out: | 1491 | out: |
1493 | up_write(¤t->mm->mmap_sem); | 1492 | up_write(¤t->mm->mmap_sem); |
1494 | mutex_unlock(&kvm->lock); | ||
1495 | return r; | 1493 | return r; |
1496 | } | 1494 | } |
1497 | 1495 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a020fb280540..2714068ee8bc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -104,7 +104,8 @@ struct kvm_memory_slot { | |||
104 | }; | 104 | }; |
105 | 105 | ||
106 | struct kvm { | 106 | struct kvm { |
107 | struct mutex lock; /* protects everything except vcpus */ | 107 | struct mutex lock; /* protects the vcpus array and APIC accesses */ |
108 | spinlock_t mmu_lock; | ||
108 | struct mm_struct *mm; /* userspace tied to this vm */ | 109 | struct mm_struct *mm; /* userspace tied to this vm */ |
109 | int nmemslots; | 110 | int nmemslots; |
110 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + | 111 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8d0b7c16c2f7..3c4fe26096fc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void) | |||
165 | 165 | ||
166 | kvm->mm = current->mm; | 166 | kvm->mm = current->mm; |
167 | atomic_inc(&kvm->mm->mm_count); | 167 | atomic_inc(&kvm->mm->mm_count); |
168 | spin_lock_init(&kvm->mmu_lock); | ||
168 | kvm_io_bus_init(&kvm->pio_bus); | 169 | kvm_io_bus_init(&kvm->pio_bus); |
169 | mutex_init(&kvm->lock); | 170 | mutex_init(&kvm->lock); |
170 | kvm_io_bus_init(&kvm->mmio_bus); | 171 | kvm_io_bus_init(&kvm->mmio_bus); |
@@ -552,9 +553,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | |||
552 | addr = gfn_to_hva(kvm, gfn); | 553 | addr = gfn_to_hva(kvm, gfn); |
553 | if (kvm_is_error_hva(addr)) | 554 | if (kvm_is_error_hva(addr)) |
554 | return -EFAULT; | 555 | return -EFAULT; |
555 | pagefault_disable(); | ||
556 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); | 556 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); |
557 | pagefault_enable(); | ||
558 | if (r) | 557 | if (r) |
559 | return -EFAULT; | 558 | return -EFAULT; |
560 | return 0; | 559 | return 0; |