diff options
author | Marcelo Tosatti <mtosatti@redhat.com> | 2009-12-23 11:35:21 -0500 |
---|---|---|
committer | Marcelo Tosatti <mtosatti@redhat.com> | 2010-03-01 10:35:44 -0500 |
commit | bc6678a33d9b952981a8e44a4f876c3ad64ca4d8 (patch) | |
tree | e26027179eb0d76f234509145a395dd6e5910074 | |
parent | 3ad26d8139a82b0510b1e0435ee82ae461d33401 (diff) |
KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
Use two steps for memslot deletion: mark the slot invalid (which stops
instantiation of new shadow pages for that slot, but allows destruction),
then instantiate the new empty slot.
Also simplifies kvm_handle_hva locking.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
-rw-r--r-- | arch/ia64/kvm/kvm-ia64.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 28 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 6 | ||||
-rw-r--r-- | include/linux/kvm.h | 2 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 7 | ||||
-rw-r--r-- | virt/kvm/assigned-dev.c | 8 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 4 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 141 |
8 files changed, 136 insertions, 64 deletions
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 0757c7027986..b2e4d16dd39e 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -1382,7 +1382,7 @@ static void kvm_release_vm_pages(struct kvm *kvm) | |||
1382 | int i, j; | 1382 | int i, j; |
1383 | unsigned long base_gfn; | 1383 | unsigned long base_gfn; |
1384 | 1384 | ||
1385 | slots = kvm->memslots; | 1385 | slots = rcu_dereference(kvm->memslots); |
1386 | for (i = 0; i < slots->nmemslots; i++) { | 1386 | for (i = 0; i < slots->nmemslots; i++) { |
1387 | memslot = &slots->memslots[i]; | 1387 | memslot = &slots->memslots[i]; |
1388 | base_gfn = memslot->base_gfn; | 1388 | base_gfn = memslot->base_gfn; |
@@ -1837,6 +1837,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1837 | struct kvm_memory_slot *memslot; | 1837 | struct kvm_memory_slot *memslot; |
1838 | int is_dirty = 0; | 1838 | int is_dirty = 0; |
1839 | 1839 | ||
1840 | down_write(&kvm->slots_lock); | ||
1840 | spin_lock(&kvm->arch.dirty_log_lock); | 1841 | spin_lock(&kvm->arch.dirty_log_lock); |
1841 | 1842 | ||
1842 | r = kvm_ia64_sync_dirty_log(kvm, log); | 1843 | r = kvm_ia64_sync_dirty_log(kvm, log); |
@@ -1856,6 +1857,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1856 | } | 1857 | } |
1857 | r = 0; | 1858 | r = 0; |
1858 | out: | 1859 | out: |
1860 | up_write(&kvm->slots_lock); | ||
1859 | spin_unlock(&kvm->arch.dirty_log_lock); | 1861 | spin_unlock(&kvm->arch.dirty_log_lock); |
1860 | return r; | 1862 | return r; |
1861 | } | 1863 | } |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 81f84d326a84..f8bf42a25995 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/swap.h> | 29 | #include <linux/swap.h> |
30 | #include <linux/hugetlb.h> | 30 | #include <linux/hugetlb.h> |
31 | #include <linux/compiler.h> | 31 | #include <linux/compiler.h> |
32 | #include <linux/srcu.h> | ||
32 | 33 | ||
33 | #include <asm/page.h> | 34 | #include <asm/page.h> |
34 | #include <asm/cmpxchg.h> | 35 | #include <asm/cmpxchg.h> |
@@ -807,21 +808,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
807 | { | 808 | { |
808 | int i, j; | 809 | int i, j; |
809 | int retval = 0; | 810 | int retval = 0; |
810 | struct kvm_memslots *slots = kvm->memslots; | 811 | struct kvm_memslots *slots; |
812 | |||
813 | slots = rcu_dereference(kvm->memslots); | ||
811 | 814 | ||
812 | /* | ||
813 | * If mmap_sem isn't taken, we can look the memslots with only | ||
814 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | ||
815 | */ | ||
816 | for (i = 0; i < slots->nmemslots; i++) { | 815 | for (i = 0; i < slots->nmemslots; i++) { |
817 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 816 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
818 | unsigned long start = memslot->userspace_addr; | 817 | unsigned long start = memslot->userspace_addr; |
819 | unsigned long end; | 818 | unsigned long end; |
820 | 819 | ||
821 | /* mmu_lock protects userspace_addr */ | ||
822 | if (!start) | ||
823 | continue; | ||
824 | |||
825 | end = start + (memslot->npages << PAGE_SHIFT); | 820 | end = start + (memslot->npages << PAGE_SHIFT); |
826 | if (hva >= start && hva < end) { | 821 | if (hva >= start && hva < end) { |
827 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 822 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
@@ -1617,7 +1612,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1617 | 1612 | ||
1618 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1613 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
1619 | { | 1614 | { |
1620 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); | 1615 | int slot = memslot_id(kvm, gfn); |
1621 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | 1616 | struct kvm_mmu_page *sp = page_header(__pa(pte)); |
1622 | 1617 | ||
1623 | __set_bit(slot, sp->slot_bitmap); | 1618 | __set_bit(slot, sp->slot_bitmap); |
@@ -3021,9 +3016,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3021 | int i; | 3016 | int i; |
3022 | unsigned int nr_mmu_pages; | 3017 | unsigned int nr_mmu_pages; |
3023 | unsigned int nr_pages = 0; | 3018 | unsigned int nr_pages = 0; |
3019 | struct kvm_memslots *slots; | ||
3024 | 3020 | ||
3025 | for (i = 0; i < kvm->memslots->nmemslots; i++) | 3021 | slots = rcu_dereference(kvm->memslots); |
3026 | nr_pages += kvm->memslots->memslots[i].npages; | 3022 | for (i = 0; i < slots->nmemslots; i++) |
3023 | nr_pages += slots->memslots[i].npages; | ||
3027 | 3024 | ||
3028 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3025 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
3029 | nr_mmu_pages = max(nr_mmu_pages, | 3026 | nr_mmu_pages = max(nr_mmu_pages, |
@@ -3293,10 +3290,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3293 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3290 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3294 | { | 3291 | { |
3295 | int nmaps = 0; | 3292 | int nmaps = 0; |
3296 | int i, j, k; | 3293 | int i, j, k, idx; |
3297 | 3294 | ||
3295 | idx = srcu_read_lock(&kvm->srcu); | ||
3296 | slots = rcu_dereference(kvm->memslots); | ||
3298 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3297 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3299 | struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i]; | 3298 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3300 | struct kvm_rmap_desc *d; | 3299 | struct kvm_rmap_desc *d; |
3301 | 3300 | ||
3302 | for (j = 0; j < m->npages; ++j) { | 3301 | for (j = 0; j < m->npages; ++j) { |
@@ -3319,6 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3319 | } | 3318 | } |
3320 | } | 3319 | } |
3321 | } | 3320 | } |
3321 | srcu_read_unlock(&kvm->srcu, idx); | ||
3322 | return nmaps; | 3322 | return nmaps; |
3323 | } | 3323 | } |
3324 | 3324 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 18698799e365..f1cae7d6113d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1503,7 +1503,11 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1503 | static gva_t rmode_tss_base(struct kvm *kvm) | 1503 | static gva_t rmode_tss_base(struct kvm *kvm) |
1504 | { | 1504 | { |
1505 | if (!kvm->arch.tss_addr) { | 1505 | if (!kvm->arch.tss_addr) { |
1506 | gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn + | 1506 | struct kvm_memslots *slots; |
1507 | gfn_t base_gfn; | ||
1508 | |||
1509 | slots = rcu_dereference(kvm->memslots); | ||
1510 | base_gfn = kvm->memslots->memslots[0].base_gfn + | ||
1507 | kvm->memslots->memslots[0].npages - 3; | 1511 | kvm->memslots->memslots[0].npages - 3; |
1508 | return base_gfn << PAGE_SHIFT; | 1512 | return base_gfn << PAGE_SHIFT; |
1509 | } | 1513 | } |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index a24de0b1858e..f2feef68ffd6 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -103,7 +103,7 @@ struct kvm_userspace_memory_region { | |||
103 | 103 | ||
104 | /* for kvm_memory_region::flags */ | 104 | /* for kvm_memory_region::flags */ |
105 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL | 105 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL |
106 | 106 | #define KVM_MEMSLOT_INVALID (1UL << 1) | |
107 | 107 | ||
108 | /* for KVM_IRQ_LINE */ | 108 | /* for KVM_IRQ_LINE */ |
109 | struct kvm_irq_level { | 109 | struct kvm_irq_level { |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9af240387fe6..93bd30701ca7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -162,6 +162,7 @@ struct kvm { | |||
162 | struct rw_semaphore slots_lock; | 162 | struct rw_semaphore slots_lock; |
163 | struct mm_struct *mm; /* userspace tied to this vm */ | 163 | struct mm_struct *mm; /* userspace tied to this vm */ |
164 | struct kvm_memslots *memslots; | 164 | struct kvm_memslots *memslots; |
165 | struct srcu_struct srcu; | ||
165 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 166 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
166 | u32 bsp_vcpu_id; | 167 | u32 bsp_vcpu_id; |
167 | struct kvm_vcpu *bsp_vcpu; | 168 | struct kvm_vcpu *bsp_vcpu; |
@@ -275,6 +276,7 @@ void kvm_set_page_accessed(struct page *page); | |||
275 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); | 276 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); |
276 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 277 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
277 | struct kvm_memory_slot *slot, gfn_t gfn); | 278 | struct kvm_memory_slot *slot, gfn_t gfn); |
279 | int memslot_id(struct kvm *kvm, gfn_t gfn); | ||
278 | void kvm_release_pfn_dirty(pfn_t); | 280 | void kvm_release_pfn_dirty(pfn_t); |
279 | void kvm_release_pfn_clean(pfn_t pfn); | 281 | void kvm_release_pfn_clean(pfn_t pfn); |
280 | void kvm_set_pfn_dirty(pfn_t pfn); | 282 | void kvm_set_pfn_dirty(pfn_t pfn); |
@@ -490,11 +492,6 @@ static inline void kvm_guest_exit(void) | |||
490 | current->flags &= ~PF_VCPU; | 492 | current->flags &= ~PF_VCPU; |
491 | } | 493 | } |
492 | 494 | ||
493 | static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot) | ||
494 | { | ||
495 | return slot - kvm->memslots->memslots; | ||
496 | } | ||
497 | |||
498 | static inline gpa_t gfn_to_gpa(gfn_t gfn) | 495 | static inline gpa_t gfn_to_gpa(gfn_t gfn) |
499 | { | 496 | { |
500 | return (gpa_t)gfn << PAGE_SHIFT; | 497 | return (gpa_t)gfn << PAGE_SHIFT; |
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index f73de631e3ee..f51e684dd238 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -504,12 +504,12 @@ out: | |||
504 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | 504 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, |
505 | struct kvm_assigned_pci_dev *assigned_dev) | 505 | struct kvm_assigned_pci_dev *assigned_dev) |
506 | { | 506 | { |
507 | int r = 0; | 507 | int r = 0, idx; |
508 | struct kvm_assigned_dev_kernel *match; | 508 | struct kvm_assigned_dev_kernel *match; |
509 | struct pci_dev *dev; | 509 | struct pci_dev *dev; |
510 | 510 | ||
511 | mutex_lock(&kvm->lock); | 511 | mutex_lock(&kvm->lock); |
512 | down_read(&kvm->slots_lock); | 512 | idx = srcu_read_lock(&kvm->srcu); |
513 | 513 | ||
514 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | 514 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
515 | assigned_dev->assigned_dev_id); | 515 | assigned_dev->assigned_dev_id); |
@@ -573,7 +573,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
573 | } | 573 | } |
574 | 574 | ||
575 | out: | 575 | out: |
576 | up_read(&kvm->slots_lock); | 576 | srcu_read_unlock(&kvm->srcu, idx); |
577 | mutex_unlock(&kvm->lock); | 577 | mutex_unlock(&kvm->lock); |
578 | return r; | 578 | return r; |
579 | out_list_del: | 579 | out_list_del: |
@@ -585,7 +585,7 @@ out_put: | |||
585 | pci_dev_put(dev); | 585 | pci_dev_put(dev); |
586 | out_free: | 586 | out_free: |
587 | kfree(match); | 587 | kfree(match); |
588 | up_read(&kvm->slots_lock); | 588 | srcu_read_unlock(&kvm->srcu, idx); |
589 | mutex_unlock(&kvm->lock); | 589 | mutex_unlock(&kvm->lock); |
590 | return r; | 590 | return r; |
591 | } | 591 | } |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index cf567d8033db..65a51432c8e5 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) | |||
78 | int i, r = 0; | 78 | int i, r = 0; |
79 | struct kvm_memslots *slots; | 79 | struct kvm_memslots *slots; |
80 | 80 | ||
81 | slots = kvm->memslots; | 81 | slots = rcu_dereference(kvm->memslots); |
82 | 82 | ||
83 | for (i = 0; i < slots->nmemslots; i++) { | 83 | for (i = 0; i < slots->nmemslots; i++) { |
84 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); | 84 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); |
@@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) | |||
214 | int i; | 214 | int i; |
215 | struct kvm_memslots *slots; | 215 | struct kvm_memslots *slots; |
216 | 216 | ||
217 | slots = kvm->memslots; | 217 | slots = rcu_dereference(kvm->memslots); |
218 | 218 | ||
219 | for (i = 0; i < slots->nmemslots; i++) { | 219 | for (i = 0; i < slots->nmemslots; i++) { |
220 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, | 220 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 87d296d8b270..2bb24a814fdf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/bitops.h> | 44 | #include <linux/bitops.h> |
45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
46 | #include <linux/compat.h> | 46 | #include <linux/compat.h> |
47 | #include <linux/srcu.h> | ||
47 | 48 | ||
48 | #include <asm/processor.h> | 49 | #include <asm/processor.h> |
49 | #include <asm/io.h> | 50 | #include <asm/io.h> |
@@ -213,7 +214,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
213 | unsigned long address) | 214 | unsigned long address) |
214 | { | 215 | { |
215 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 216 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
216 | int need_tlb_flush; | 217 | int need_tlb_flush, idx; |
217 | 218 | ||
218 | /* | 219 | /* |
219 | * When ->invalidate_page runs, the linux pte has been zapped | 220 | * When ->invalidate_page runs, the linux pte has been zapped |
@@ -233,10 +234,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
233 | * pte after kvm_unmap_hva returned, without noticing the page | 234 | * pte after kvm_unmap_hva returned, without noticing the page |
234 | * is going to be freed. | 235 | * is going to be freed. |
235 | */ | 236 | */ |
237 | idx = srcu_read_lock(&kvm->srcu); | ||
236 | spin_lock(&kvm->mmu_lock); | 238 | spin_lock(&kvm->mmu_lock); |
237 | kvm->mmu_notifier_seq++; | 239 | kvm->mmu_notifier_seq++; |
238 | need_tlb_flush = kvm_unmap_hva(kvm, address); | 240 | need_tlb_flush = kvm_unmap_hva(kvm, address); |
239 | spin_unlock(&kvm->mmu_lock); | 241 | spin_unlock(&kvm->mmu_lock); |
242 | srcu_read_unlock(&kvm->srcu, idx); | ||
240 | 243 | ||
241 | /* we've to flush the tlb before the pages can be freed */ | 244 | /* we've to flush the tlb before the pages can be freed */ |
242 | if (need_tlb_flush) | 245 | if (need_tlb_flush) |
@@ -250,11 +253,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | |||
250 | pte_t pte) | 253 | pte_t pte) |
251 | { | 254 | { |
252 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 255 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
256 | int idx; | ||
253 | 257 | ||
258 | idx = srcu_read_lock(&kvm->srcu); | ||
254 | spin_lock(&kvm->mmu_lock); | 259 | spin_lock(&kvm->mmu_lock); |
255 | kvm->mmu_notifier_seq++; | 260 | kvm->mmu_notifier_seq++; |
256 | kvm_set_spte_hva(kvm, address, pte); | 261 | kvm_set_spte_hva(kvm, address, pte); |
257 | spin_unlock(&kvm->mmu_lock); | 262 | spin_unlock(&kvm->mmu_lock); |
263 | srcu_read_unlock(&kvm->srcu, idx); | ||
258 | } | 264 | } |
259 | 265 | ||
260 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | 266 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, |
@@ -263,8 +269,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
263 | unsigned long end) | 269 | unsigned long end) |
264 | { | 270 | { |
265 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 271 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
266 | int need_tlb_flush = 0; | 272 | int need_tlb_flush = 0, idx; |
267 | 273 | ||
274 | idx = srcu_read_lock(&kvm->srcu); | ||
268 | spin_lock(&kvm->mmu_lock); | 275 | spin_lock(&kvm->mmu_lock); |
269 | /* | 276 | /* |
270 | * The count increase must become visible at unlock time as no | 277 | * The count increase must become visible at unlock time as no |
@@ -275,6 +282,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
275 | for (; start < end; start += PAGE_SIZE) | 282 | for (; start < end; start += PAGE_SIZE) |
276 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | 283 | need_tlb_flush |= kvm_unmap_hva(kvm, start); |
277 | spin_unlock(&kvm->mmu_lock); | 284 | spin_unlock(&kvm->mmu_lock); |
285 | srcu_read_unlock(&kvm->srcu, idx); | ||
278 | 286 | ||
279 | /* we've to flush the tlb before the pages can be freed */ | 287 | /* we've to flush the tlb before the pages can be freed */ |
280 | if (need_tlb_flush) | 288 | if (need_tlb_flush) |
@@ -312,11 +320,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
312 | unsigned long address) | 320 | unsigned long address) |
313 | { | 321 | { |
314 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 322 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
315 | int young; | 323 | int young, idx; |
316 | 324 | ||
325 | idx = srcu_read_lock(&kvm->srcu); | ||
317 | spin_lock(&kvm->mmu_lock); | 326 | spin_lock(&kvm->mmu_lock); |
318 | young = kvm_age_hva(kvm, address); | 327 | young = kvm_age_hva(kvm, address); |
319 | spin_unlock(&kvm->mmu_lock); | 328 | spin_unlock(&kvm->mmu_lock); |
329 | srcu_read_unlock(&kvm->srcu, idx); | ||
320 | 330 | ||
321 | if (young) | 331 | if (young) |
322 | kvm_flush_remote_tlbs(kvm); | 332 | kvm_flush_remote_tlbs(kvm); |
@@ -379,11 +389,15 @@ static struct kvm *kvm_create_vm(void) | |||
379 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 389 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
380 | if (!kvm->memslots) | 390 | if (!kvm->memslots) |
381 | goto out_err; | 391 | goto out_err; |
392 | if (init_srcu_struct(&kvm->srcu)) | ||
393 | goto out_err; | ||
382 | 394 | ||
383 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 395 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
384 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 396 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
385 | if (!page) | 397 | if (!page) { |
398 | cleanup_srcu_struct(&kvm->srcu); | ||
386 | goto out_err; | 399 | goto out_err; |
400 | } | ||
387 | 401 | ||
388 | kvm->coalesced_mmio_ring = | 402 | kvm->coalesced_mmio_ring = |
389 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 403 | (struct kvm_coalesced_mmio_ring *)page_address(page); |
@@ -391,6 +405,7 @@ static struct kvm *kvm_create_vm(void) | |||
391 | 405 | ||
392 | r = kvm_init_mmu_notifier(kvm); | 406 | r = kvm_init_mmu_notifier(kvm); |
393 | if (r) { | 407 | if (r) { |
408 | cleanup_srcu_struct(&kvm->srcu); | ||
394 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 409 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
395 | put_page(page); | 410 | put_page(page); |
396 | #endif | 411 | #endif |
@@ -480,6 +495,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
480 | #else | 495 | #else |
481 | kvm_arch_flush_shadow(kvm); | 496 | kvm_arch_flush_shadow(kvm); |
482 | #endif | 497 | #endif |
498 | cleanup_srcu_struct(&kvm->srcu); | ||
483 | kvm_arch_destroy_vm(kvm); | 499 | kvm_arch_destroy_vm(kvm); |
484 | hardware_disable_all(); | 500 | hardware_disable_all(); |
485 | mmdrop(mm); | 501 | mmdrop(mm); |
@@ -521,12 +537,13 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
521 | struct kvm_userspace_memory_region *mem, | 537 | struct kvm_userspace_memory_region *mem, |
522 | int user_alloc) | 538 | int user_alloc) |
523 | { | 539 | { |
524 | int r; | 540 | int r, flush_shadow = 0; |
525 | gfn_t base_gfn; | 541 | gfn_t base_gfn; |
526 | unsigned long npages; | 542 | unsigned long npages; |
527 | unsigned long i; | 543 | unsigned long i; |
528 | struct kvm_memory_slot *memslot; | 544 | struct kvm_memory_slot *memslot; |
529 | struct kvm_memory_slot old, new; | 545 | struct kvm_memory_slot old, new; |
546 | struct kvm_memslots *slots, *old_memslots; | ||
530 | 547 | ||
531 | r = -EINVAL; | 548 | r = -EINVAL; |
532 | /* General sanity checks */ | 549 | /* General sanity checks */ |
@@ -588,15 +605,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
588 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); | 605 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); |
589 | 606 | ||
590 | new.user_alloc = user_alloc; | 607 | new.user_alloc = user_alloc; |
591 | /* | 608 | new.userspace_addr = mem->userspace_addr; |
592 | * hva_to_rmmap() serialzies with the mmu_lock and to be | ||
593 | * safe it has to ignore memslots with !user_alloc && | ||
594 | * !userspace_addr. | ||
595 | */ | ||
596 | if (user_alloc) | ||
597 | new.userspace_addr = mem->userspace_addr; | ||
598 | else | ||
599 | new.userspace_addr = 0; | ||
600 | } | 609 | } |
601 | if (!npages) | 610 | if (!npages) |
602 | goto skip_lpage; | 611 | goto skip_lpage; |
@@ -651,8 +660,9 @@ skip_lpage: | |||
651 | if (!new.dirty_bitmap) | 660 | if (!new.dirty_bitmap) |
652 | goto out_free; | 661 | goto out_free; |
653 | memset(new.dirty_bitmap, 0, dirty_bytes); | 662 | memset(new.dirty_bitmap, 0, dirty_bytes); |
663 | /* destroy any largepage mappings for dirty tracking */ | ||
654 | if (old.npages) | 664 | if (old.npages) |
655 | kvm_arch_flush_shadow(kvm); | 665 | flush_shadow = 1; |
656 | } | 666 | } |
657 | #else /* not defined CONFIG_S390 */ | 667 | #else /* not defined CONFIG_S390 */ |
658 | new.user_alloc = user_alloc; | 668 | new.user_alloc = user_alloc; |
@@ -660,34 +670,72 @@ skip_lpage: | |||
660 | new.userspace_addr = mem->userspace_addr; | 670 | new.userspace_addr = mem->userspace_addr; |
661 | #endif /* not defined CONFIG_S390 */ | 671 | #endif /* not defined CONFIG_S390 */ |
662 | 672 | ||
663 | if (!npages) | 673 | if (!npages) { |
674 | r = -ENOMEM; | ||
675 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
676 | if (!slots) | ||
677 | goto out_free; | ||
678 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
679 | if (mem->slot >= slots->nmemslots) | ||
680 | slots->nmemslots = mem->slot + 1; | ||
681 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; | ||
682 | |||
683 | old_memslots = kvm->memslots; | ||
684 | rcu_assign_pointer(kvm->memslots, slots); | ||
685 | synchronize_srcu_expedited(&kvm->srcu); | ||
686 | /* From this point no new shadow pages pointing to a deleted | ||
687 | * memslot will be created. | ||
688 | * | ||
689 | * validation of sp->gfn happens in: | ||
690 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) | ||
691 | * - kvm_is_visible_gfn (mmu_check_roots) | ||
692 | */ | ||
664 | kvm_arch_flush_shadow(kvm); | 693 | kvm_arch_flush_shadow(kvm); |
694 | kfree(old_memslots); | ||
695 | } | ||
665 | 696 | ||
666 | r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); | 697 | r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); |
667 | if (r) | 698 | if (r) |
668 | goto out_free; | 699 | goto out_free; |
669 | 700 | ||
670 | spin_lock(&kvm->mmu_lock); | 701 | #ifdef CONFIG_DMAR |
671 | if (mem->slot >= kvm->memslots->nmemslots) | 702 | /* map the pages in iommu page table */ |
672 | kvm->memslots->nmemslots = mem->slot + 1; | 703 | if (npages) { |
704 | r = kvm_iommu_map_pages(kvm, &new); | ||
705 | if (r) | ||
706 | goto out_free; | ||
707 | } | ||
708 | #endif | ||
673 | 709 | ||
674 | *memslot = new; | 710 | r = -ENOMEM; |
675 | spin_unlock(&kvm->mmu_lock); | 711 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
712 | if (!slots) | ||
713 | goto out_free; | ||
714 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
715 | if (mem->slot >= slots->nmemslots) | ||
716 | slots->nmemslots = mem->slot + 1; | ||
717 | |||
718 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | ||
719 | if (!npages) { | ||
720 | new.rmap = NULL; | ||
721 | new.dirty_bitmap = NULL; | ||
722 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) | ||
723 | new.lpage_info[i] = NULL; | ||
724 | } | ||
725 | |||
726 | slots->memslots[mem->slot] = new; | ||
727 | old_memslots = kvm->memslots; | ||
728 | rcu_assign_pointer(kvm->memslots, slots); | ||
729 | synchronize_srcu_expedited(&kvm->srcu); | ||
676 | 730 | ||
677 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); | 731 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); |
678 | 732 | ||
679 | kvm_free_physmem_slot(&old, npages ? &new : NULL); | 733 | kvm_free_physmem_slot(&old, &new); |
680 | /* Slot deletion case: we have to update the current slot */ | 734 | kfree(old_memslots); |
681 | spin_lock(&kvm->mmu_lock); | 735 | |
682 | if (!npages) | 736 | if (flush_shadow) |
683 | *memslot = old; | 737 | kvm_arch_flush_shadow(kvm); |
684 | spin_unlock(&kvm->mmu_lock); | 738 | |
685 | #ifdef CONFIG_DMAR | ||
686 | /* map the pages in iommu page table */ | ||
687 | r = kvm_iommu_map_pages(kvm, memslot); | ||
688 | if (r) | ||
689 | goto out; | ||
690 | #endif | ||
691 | return 0; | 739 | return 0; |
692 | 740 | ||
693 | out_free: | 741 | out_free: |
@@ -787,7 +835,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); | |||
787 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | 835 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) |
788 | { | 836 | { |
789 | int i; | 837 | int i; |
790 | struct kvm_memslots *slots = kvm->memslots; | 838 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); |
791 | 839 | ||
792 | for (i = 0; i < slots->nmemslots; ++i) { | 840 | for (i = 0; i < slots->nmemslots; ++i) { |
793 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 841 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
@@ -809,12 +857,15 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
809 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 857 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
810 | { | 858 | { |
811 | int i; | 859 | int i; |
812 | struct kvm_memslots *slots = kvm->memslots; | 860 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); |
813 | 861 | ||
814 | gfn = unalias_gfn(kvm, gfn); | 862 | gfn = unalias_gfn(kvm, gfn); |
815 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 863 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
816 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 864 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
817 | 865 | ||
866 | if (memslot->flags & KVM_MEMSLOT_INVALID) | ||
867 | continue; | ||
868 | |||
818 | if (gfn >= memslot->base_gfn | 869 | if (gfn >= memslot->base_gfn |
819 | && gfn < memslot->base_gfn + memslot->npages) | 870 | && gfn < memslot->base_gfn + memslot->npages) |
820 | return 1; | 871 | return 1; |
@@ -823,13 +874,31 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | |||
823 | } | 874 | } |
824 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 875 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
825 | 876 | ||
877 | int memslot_id(struct kvm *kvm, gfn_t gfn) | ||
878 | { | ||
879 | int i; | ||
880 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
881 | struct kvm_memory_slot *memslot = NULL; | ||
882 | |||
883 | gfn = unalias_gfn(kvm, gfn); | ||
884 | for (i = 0; i < slots->nmemslots; ++i) { | ||
885 | memslot = &slots->memslots[i]; | ||
886 | |||
887 | if (gfn >= memslot->base_gfn | ||
888 | && gfn < memslot->base_gfn + memslot->npages) | ||
889 | break; | ||
890 | } | ||
891 | |||
892 | return memslot - slots->memslots; | ||
893 | } | ||
894 | |||
826 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 895 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
827 | { | 896 | { |
828 | struct kvm_memory_slot *slot; | 897 | struct kvm_memory_slot *slot; |
829 | 898 | ||
830 | gfn = unalias_gfn(kvm, gfn); | 899 | gfn = unalias_gfn(kvm, gfn); |
831 | slot = gfn_to_memslot_unaliased(kvm, gfn); | 900 | slot = gfn_to_memslot_unaliased(kvm, gfn); |
832 | if (!slot) | 901 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
833 | return bad_hva(); | 902 | return bad_hva(); |
834 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | 903 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); |
835 | } | 904 | } |