aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2009-12-23 11:35:21 -0500
committerMarcelo Tosatti <mtosatti@redhat.com>2010-03-01 10:35:44 -0500
commitbc6678a33d9b952981a8e44a4f876c3ad64ca4d8 (patch)
treee26027179eb0d76f234509145a395dd6e5910074
parent3ad26d8139a82b0510b1e0435ee82ae461d33401 (diff)
KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
Use two steps for memslot deletion: mark the slot invalid (which stops instantiation of new shadow pages for that slot, but allows destruction), then instantiate the new empty slot. Also simplifies kvm_handle_hva locking. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
-rw-r--r--arch/ia64/kvm/kvm-ia64.c4
-rw-r--r--arch/x86/kvm/mmu.c28
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--include/linux/kvm.h2
-rw-r--r--include/linux/kvm_host.h7
-rw-r--r--virt/kvm/assigned-dev.c8
-rw-r--r--virt/kvm/iommu.c4
-rw-r--r--virt/kvm/kvm_main.c141
8 files changed, 136 insertions, 64 deletions
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0757c7027986..b2e4d16dd39e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1382,7 +1382,7 @@ static void kvm_release_vm_pages(struct kvm *kvm)
1382 int i, j; 1382 int i, j;
1383 unsigned long base_gfn; 1383 unsigned long base_gfn;
1384 1384
1385 slots = kvm->memslots; 1385 slots = rcu_dereference(kvm->memslots);
1386 for (i = 0; i < slots->nmemslots; i++) { 1386 for (i = 0; i < slots->nmemslots; i++) {
1387 memslot = &slots->memslots[i]; 1387 memslot = &slots->memslots[i];
1388 base_gfn = memslot->base_gfn; 1388 base_gfn = memslot->base_gfn;
@@ -1837,6 +1837,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1837 struct kvm_memory_slot *memslot; 1837 struct kvm_memory_slot *memslot;
1838 int is_dirty = 0; 1838 int is_dirty = 0;
1839 1839
1840 down_write(&kvm->slots_lock);
1840 spin_lock(&kvm->arch.dirty_log_lock); 1841 spin_lock(&kvm->arch.dirty_log_lock);
1841 1842
1842 r = kvm_ia64_sync_dirty_log(kvm, log); 1843 r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1856,6 +1857,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1856 } 1857 }
1857 r = 0; 1858 r = 0;
1858out: 1859out:
1860 up_write(&kvm->slots_lock);
1859 spin_unlock(&kvm->arch.dirty_log_lock); 1861 spin_unlock(&kvm->arch.dirty_log_lock);
1860 return r; 1862 return r;
1861} 1863}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 81f84d326a84..f8bf42a25995 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -29,6 +29,7 @@
29#include <linux/swap.h> 29#include <linux/swap.h>
30#include <linux/hugetlb.h> 30#include <linux/hugetlb.h>
31#include <linux/compiler.h> 31#include <linux/compiler.h>
32#include <linux/srcu.h>
32 33
33#include <asm/page.h> 34#include <asm/page.h>
34#include <asm/cmpxchg.h> 35#include <asm/cmpxchg.h>
@@ -807,21 +808,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
807{ 808{
808 int i, j; 809 int i, j;
809 int retval = 0; 810 int retval = 0;
810 struct kvm_memslots *slots = kvm->memslots; 811 struct kvm_memslots *slots;
812
813 slots = rcu_dereference(kvm->memslots);
811 814
812 /*
813 * If mmap_sem isn't taken, we can look the memslots with only
814 * the mmu_lock by skipping over the slots with userspace_addr == 0.
815 */
816 for (i = 0; i < slots->nmemslots; i++) { 815 for (i = 0; i < slots->nmemslots; i++) {
817 struct kvm_memory_slot *memslot = &slots->memslots[i]; 816 struct kvm_memory_slot *memslot = &slots->memslots[i];
818 unsigned long start = memslot->userspace_addr; 817 unsigned long start = memslot->userspace_addr;
819 unsigned long end; 818 unsigned long end;
820 819
821 /* mmu_lock protects userspace_addr */
822 if (!start)
823 continue;
824
825 end = start + (memslot->npages << PAGE_SHIFT); 820 end = start + (memslot->npages << PAGE_SHIFT);
826 if (hva >= start && hva < end) { 821 if (hva >= start && hva < end) {
827 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 822 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
@@ -1617,7 +1612,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1617 1612
1618static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) 1613static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
1619{ 1614{
1620 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1615 int slot = memslot_id(kvm, gfn);
1621 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1616 struct kvm_mmu_page *sp = page_header(__pa(pte));
1622 1617
1623 __set_bit(slot, sp->slot_bitmap); 1618 __set_bit(slot, sp->slot_bitmap);
@@ -3021,9 +3016,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
3021 int i; 3016 int i;
3022 unsigned int nr_mmu_pages; 3017 unsigned int nr_mmu_pages;
3023 unsigned int nr_pages = 0; 3018 unsigned int nr_pages = 0;
3019 struct kvm_memslots *slots;
3024 3020
3025 for (i = 0; i < kvm->memslots->nmemslots; i++) 3021 slots = rcu_dereference(kvm->memslots);
3026 nr_pages += kvm->memslots->memslots[i].npages; 3022 for (i = 0; i < slots->nmemslots; i++)
3023 nr_pages += slots->memslots[i].npages;
3027 3024
3028 nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; 3025 nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
3029 nr_mmu_pages = max(nr_mmu_pages, 3026 nr_mmu_pages = max(nr_mmu_pages,
@@ -3293,10 +3290,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
3293static int count_rmaps(struct kvm_vcpu *vcpu) 3290static int count_rmaps(struct kvm_vcpu *vcpu)
3294{ 3291{
3295 int nmaps = 0; 3292 int nmaps = 0;
3296 int i, j, k; 3293 int i, j, k, idx;
3297 3294
3295 idx = srcu_read_lock(&kvm->srcu);
3296 slots = rcu_dereference(kvm->memslots);
3298 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 3297 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
3299 struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i]; 3298 struct kvm_memory_slot *m = &slots->memslots[i];
3300 struct kvm_rmap_desc *d; 3299 struct kvm_rmap_desc *d;
3301 3300
3302 for (j = 0; j < m->npages; ++j) { 3301 for (j = 0; j < m->npages; ++j) {
@@ -3319,6 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
3319 } 3318 }
3320 } 3319 }
3321 } 3320 }
3321 srcu_read_unlock(&kvm->srcu, idx);
3322 return nmaps; 3322 return nmaps;
3323} 3323}
3324 3324
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 18698799e365..f1cae7d6113d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1503,7 +1503,11 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1503static gva_t rmode_tss_base(struct kvm *kvm) 1503static gva_t rmode_tss_base(struct kvm *kvm)
1504{ 1504{
1505 if (!kvm->arch.tss_addr) { 1505 if (!kvm->arch.tss_addr) {
1506 gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn + 1506 struct kvm_memslots *slots;
1507 gfn_t base_gfn;
1508
1509 slots = rcu_dereference(kvm->memslots);
1510 base_gfn = kvm->memslots->memslots[0].base_gfn +
1507 kvm->memslots->memslots[0].npages - 3; 1511 kvm->memslots->memslots[0].npages - 3;
1508 return base_gfn << PAGE_SHIFT; 1512 return base_gfn << PAGE_SHIFT;
1509 } 1513 }
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a24de0b1858e..f2feef68ffd6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -103,7 +103,7 @@ struct kvm_userspace_memory_region {
103 103
104/* for kvm_memory_region::flags */ 104/* for kvm_memory_region::flags */
105#define KVM_MEM_LOG_DIRTY_PAGES 1UL 105#define KVM_MEM_LOG_DIRTY_PAGES 1UL
106 106#define KVM_MEMSLOT_INVALID (1UL << 1)
107 107
108/* for KVM_IRQ_LINE */ 108/* for KVM_IRQ_LINE */
109struct kvm_irq_level { 109struct kvm_irq_level {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9af240387fe6..93bd30701ca7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,6 +162,7 @@ struct kvm {
162 struct rw_semaphore slots_lock; 162 struct rw_semaphore slots_lock;
163 struct mm_struct *mm; /* userspace tied to this vm */ 163 struct mm_struct *mm; /* userspace tied to this vm */
164 struct kvm_memslots *memslots; 164 struct kvm_memslots *memslots;
165 struct srcu_struct srcu;
165#ifdef CONFIG_KVM_APIC_ARCHITECTURE 166#ifdef CONFIG_KVM_APIC_ARCHITECTURE
166 u32 bsp_vcpu_id; 167 u32 bsp_vcpu_id;
167 struct kvm_vcpu *bsp_vcpu; 168 struct kvm_vcpu *bsp_vcpu;
@@ -275,6 +276,7 @@ void kvm_set_page_accessed(struct page *page);
275pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); 276pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
276pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 277pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
277 struct kvm_memory_slot *slot, gfn_t gfn); 278 struct kvm_memory_slot *slot, gfn_t gfn);
279int memslot_id(struct kvm *kvm, gfn_t gfn);
278void kvm_release_pfn_dirty(pfn_t); 280void kvm_release_pfn_dirty(pfn_t);
279void kvm_release_pfn_clean(pfn_t pfn); 281void kvm_release_pfn_clean(pfn_t pfn);
280void kvm_set_pfn_dirty(pfn_t pfn); 282void kvm_set_pfn_dirty(pfn_t pfn);
@@ -490,11 +492,6 @@ static inline void kvm_guest_exit(void)
490 current->flags &= ~PF_VCPU; 492 current->flags &= ~PF_VCPU;
491} 493}
492 494
493static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
494{
495 return slot - kvm->memslots->memslots;
496}
497
498static inline gpa_t gfn_to_gpa(gfn_t gfn) 495static inline gpa_t gfn_to_gpa(gfn_t gfn)
499{ 496{
500 return (gpa_t)gfn << PAGE_SHIFT; 497 return (gpa_t)gfn << PAGE_SHIFT;
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f73de631e3ee..f51e684dd238 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -504,12 +504,12 @@ out:
504static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 504static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
505 struct kvm_assigned_pci_dev *assigned_dev) 505 struct kvm_assigned_pci_dev *assigned_dev)
506{ 506{
507 int r = 0; 507 int r = 0, idx;
508 struct kvm_assigned_dev_kernel *match; 508 struct kvm_assigned_dev_kernel *match;
509 struct pci_dev *dev; 509 struct pci_dev *dev;
510 510
511 mutex_lock(&kvm->lock); 511 mutex_lock(&kvm->lock);
512 down_read(&kvm->slots_lock); 512 idx = srcu_read_lock(&kvm->srcu);
513 513
514 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 514 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
515 assigned_dev->assigned_dev_id); 515 assigned_dev->assigned_dev_id);
@@ -573,7 +573,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
573 } 573 }
574 574
575out: 575out:
576 up_read(&kvm->slots_lock); 576 srcu_read_unlock(&kvm->srcu, idx);
577 mutex_unlock(&kvm->lock); 577 mutex_unlock(&kvm->lock);
578 return r; 578 return r;
579out_list_del: 579out_list_del:
@@ -585,7 +585,7 @@ out_put:
585 pci_dev_put(dev); 585 pci_dev_put(dev);
586out_free: 586out_free:
587 kfree(match); 587 kfree(match);
588 up_read(&kvm->slots_lock); 588 srcu_read_unlock(&kvm->srcu, idx);
589 mutex_unlock(&kvm->lock); 589 mutex_unlock(&kvm->lock);
590 return r; 590 return r;
591} 591}
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index cf567d8033db..65a51432c8e5 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
78 int i, r = 0; 78 int i, r = 0;
79 struct kvm_memslots *slots; 79 struct kvm_memslots *slots;
80 80
81 slots = kvm->memslots; 81 slots = rcu_dereference(kvm->memslots);
82 82
83 for (i = 0; i < slots->nmemslots; i++) { 83 for (i = 0; i < slots->nmemslots; i++) {
84 r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); 84 r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
@@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
214 int i; 214 int i;
215 struct kvm_memslots *slots; 215 struct kvm_memslots *slots;
216 216
217 slots = kvm->memslots; 217 slots = rcu_dereference(kvm->memslots);
218 218
219 for (i = 0; i < slots->nmemslots; i++) { 219 for (i = 0; i < slots->nmemslots; i++) {
220 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, 220 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 87d296d8b270..2bb24a814fdf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -44,6 +44,7 @@
44#include <linux/bitops.h> 44#include <linux/bitops.h>
45#include <linux/spinlock.h> 45#include <linux/spinlock.h>
46#include <linux/compat.h> 46#include <linux/compat.h>
47#include <linux/srcu.h>
47 48
48#include <asm/processor.h> 49#include <asm/processor.h>
49#include <asm/io.h> 50#include <asm/io.h>
@@ -213,7 +214,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
213 unsigned long address) 214 unsigned long address)
214{ 215{
215 struct kvm *kvm = mmu_notifier_to_kvm(mn); 216 struct kvm *kvm = mmu_notifier_to_kvm(mn);
216 int need_tlb_flush; 217 int need_tlb_flush, idx;
217 218
218 /* 219 /*
219 * When ->invalidate_page runs, the linux pte has been zapped 220 * When ->invalidate_page runs, the linux pte has been zapped
@@ -233,10 +234,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
233 * pte after kvm_unmap_hva returned, without noticing the page 234 * pte after kvm_unmap_hva returned, without noticing the page
234 * is going to be freed. 235 * is going to be freed.
235 */ 236 */
237 idx = srcu_read_lock(&kvm->srcu);
236 spin_lock(&kvm->mmu_lock); 238 spin_lock(&kvm->mmu_lock);
237 kvm->mmu_notifier_seq++; 239 kvm->mmu_notifier_seq++;
238 need_tlb_flush = kvm_unmap_hva(kvm, address); 240 need_tlb_flush = kvm_unmap_hva(kvm, address);
239 spin_unlock(&kvm->mmu_lock); 241 spin_unlock(&kvm->mmu_lock);
242 srcu_read_unlock(&kvm->srcu, idx);
240 243
241 /* we've to flush the tlb before the pages can be freed */ 244 /* we've to flush the tlb before the pages can be freed */
242 if (need_tlb_flush) 245 if (need_tlb_flush)
@@ -250,11 +253,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
250 pte_t pte) 253 pte_t pte)
251{ 254{
252 struct kvm *kvm = mmu_notifier_to_kvm(mn); 255 struct kvm *kvm = mmu_notifier_to_kvm(mn);
256 int idx;
253 257
258 idx = srcu_read_lock(&kvm->srcu);
254 spin_lock(&kvm->mmu_lock); 259 spin_lock(&kvm->mmu_lock);
255 kvm->mmu_notifier_seq++; 260 kvm->mmu_notifier_seq++;
256 kvm_set_spte_hva(kvm, address, pte); 261 kvm_set_spte_hva(kvm, address, pte);
257 spin_unlock(&kvm->mmu_lock); 262 spin_unlock(&kvm->mmu_lock);
263 srcu_read_unlock(&kvm->srcu, idx);
258} 264}
259 265
260static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 266static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -263,8 +269,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
263 unsigned long end) 269 unsigned long end)
264{ 270{
265 struct kvm *kvm = mmu_notifier_to_kvm(mn); 271 struct kvm *kvm = mmu_notifier_to_kvm(mn);
266 int need_tlb_flush = 0; 272 int need_tlb_flush = 0, idx;
267 273
274 idx = srcu_read_lock(&kvm->srcu);
268 spin_lock(&kvm->mmu_lock); 275 spin_lock(&kvm->mmu_lock);
269 /* 276 /*
270 * The count increase must become visible at unlock time as no 277 * The count increase must become visible at unlock time as no
@@ -275,6 +282,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
275 for (; start < end; start += PAGE_SIZE) 282 for (; start < end; start += PAGE_SIZE)
276 need_tlb_flush |= kvm_unmap_hva(kvm, start); 283 need_tlb_flush |= kvm_unmap_hva(kvm, start);
277 spin_unlock(&kvm->mmu_lock); 284 spin_unlock(&kvm->mmu_lock);
285 srcu_read_unlock(&kvm->srcu, idx);
278 286
279 /* we've to flush the tlb before the pages can be freed */ 287 /* we've to flush the tlb before the pages can be freed */
280 if (need_tlb_flush) 288 if (need_tlb_flush)
@@ -312,11 +320,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
312 unsigned long address) 320 unsigned long address)
313{ 321{
314 struct kvm *kvm = mmu_notifier_to_kvm(mn); 322 struct kvm *kvm = mmu_notifier_to_kvm(mn);
315 int young; 323 int young, idx;
316 324
325 idx = srcu_read_lock(&kvm->srcu);
317 spin_lock(&kvm->mmu_lock); 326 spin_lock(&kvm->mmu_lock);
318 young = kvm_age_hva(kvm, address); 327 young = kvm_age_hva(kvm, address);
319 spin_unlock(&kvm->mmu_lock); 328 spin_unlock(&kvm->mmu_lock);
329 srcu_read_unlock(&kvm->srcu, idx);
320 330
321 if (young) 331 if (young)
322 kvm_flush_remote_tlbs(kvm); 332 kvm_flush_remote_tlbs(kvm);
@@ -379,11 +389,15 @@ static struct kvm *kvm_create_vm(void)
379 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 389 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
380 if (!kvm->memslots) 390 if (!kvm->memslots)
381 goto out_err; 391 goto out_err;
392 if (init_srcu_struct(&kvm->srcu))
393 goto out_err;
382 394
383#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 395#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
384 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 396 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
385 if (!page) 397 if (!page) {
398 cleanup_srcu_struct(&kvm->srcu);
386 goto out_err; 399 goto out_err;
400 }
387 401
388 kvm->coalesced_mmio_ring = 402 kvm->coalesced_mmio_ring =
389 (struct kvm_coalesced_mmio_ring *)page_address(page); 403 (struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -391,6 +405,7 @@ static struct kvm *kvm_create_vm(void)
391 405
392 r = kvm_init_mmu_notifier(kvm); 406 r = kvm_init_mmu_notifier(kvm);
393 if (r) { 407 if (r) {
408 cleanup_srcu_struct(&kvm->srcu);
394#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 409#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
395 put_page(page); 410 put_page(page);
396#endif 411#endif
@@ -480,6 +495,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
480#else 495#else
481 kvm_arch_flush_shadow(kvm); 496 kvm_arch_flush_shadow(kvm);
482#endif 497#endif
498 cleanup_srcu_struct(&kvm->srcu);
483 kvm_arch_destroy_vm(kvm); 499 kvm_arch_destroy_vm(kvm);
484 hardware_disable_all(); 500 hardware_disable_all();
485 mmdrop(mm); 501 mmdrop(mm);
@@ -521,12 +537,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
521 struct kvm_userspace_memory_region *mem, 537 struct kvm_userspace_memory_region *mem,
522 int user_alloc) 538 int user_alloc)
523{ 539{
524 int r; 540 int r, flush_shadow = 0;
525 gfn_t base_gfn; 541 gfn_t base_gfn;
526 unsigned long npages; 542 unsigned long npages;
527 unsigned long i; 543 unsigned long i;
528 struct kvm_memory_slot *memslot; 544 struct kvm_memory_slot *memslot;
529 struct kvm_memory_slot old, new; 545 struct kvm_memory_slot old, new;
546 struct kvm_memslots *slots, *old_memslots;
530 547
531 r = -EINVAL; 548 r = -EINVAL;
532 /* General sanity checks */ 549 /* General sanity checks */
@@ -588,15 +605,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
588 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 605 memset(new.rmap, 0, npages * sizeof(*new.rmap));
589 606
590 new.user_alloc = user_alloc; 607 new.user_alloc = user_alloc;
591 /* 608 new.userspace_addr = mem->userspace_addr;
592 * hva_to_rmmap() serialzies with the mmu_lock and to be
593 * safe it has to ignore memslots with !user_alloc &&
594 * !userspace_addr.
595 */
596 if (user_alloc)
597 new.userspace_addr = mem->userspace_addr;
598 else
599 new.userspace_addr = 0;
600 } 609 }
601 if (!npages) 610 if (!npages)
602 goto skip_lpage; 611 goto skip_lpage;
@@ -651,8 +660,9 @@ skip_lpage:
651 if (!new.dirty_bitmap) 660 if (!new.dirty_bitmap)
652 goto out_free; 661 goto out_free;
653 memset(new.dirty_bitmap, 0, dirty_bytes); 662 memset(new.dirty_bitmap, 0, dirty_bytes);
663 /* destroy any largepage mappings for dirty tracking */
654 if (old.npages) 664 if (old.npages)
655 kvm_arch_flush_shadow(kvm); 665 flush_shadow = 1;
656 } 666 }
657#else /* not defined CONFIG_S390 */ 667#else /* not defined CONFIG_S390 */
658 new.user_alloc = user_alloc; 668 new.user_alloc = user_alloc;
@@ -660,34 +670,72 @@ skip_lpage:
660 new.userspace_addr = mem->userspace_addr; 670 new.userspace_addr = mem->userspace_addr;
661#endif /* not defined CONFIG_S390 */ 671#endif /* not defined CONFIG_S390 */
662 672
663 if (!npages) 673 if (!npages) {
674 r = -ENOMEM;
675 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
676 if (!slots)
677 goto out_free;
678 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
679 if (mem->slot >= slots->nmemslots)
680 slots->nmemslots = mem->slot + 1;
681 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
682
683 old_memslots = kvm->memslots;
684 rcu_assign_pointer(kvm->memslots, slots);
685 synchronize_srcu_expedited(&kvm->srcu);
686 /* From this point no new shadow pages pointing to a deleted
687 * memslot will be created.
688 *
689 * validation of sp->gfn happens in:
690 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
691 * - kvm_is_visible_gfn (mmu_check_roots)
692 */
664 kvm_arch_flush_shadow(kvm); 693 kvm_arch_flush_shadow(kvm);
694 kfree(old_memslots);
695 }
665 696
666 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); 697 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
667 if (r) 698 if (r)
668 goto out_free; 699 goto out_free;
669 700
670 spin_lock(&kvm->mmu_lock); 701#ifdef CONFIG_DMAR
671 if (mem->slot >= kvm->memslots->nmemslots) 702 /* map the pages in iommu page table */
672 kvm->memslots->nmemslots = mem->slot + 1; 703 if (npages) {
704 r = kvm_iommu_map_pages(kvm, &new);
705 if (r)
706 goto out_free;
707 }
708#endif
673 709
674 *memslot = new; 710 r = -ENOMEM;
675 spin_unlock(&kvm->mmu_lock); 711 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
712 if (!slots)
713 goto out_free;
714 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
715 if (mem->slot >= slots->nmemslots)
716 slots->nmemslots = mem->slot + 1;
717
718 /* actual memory is freed via old in kvm_free_physmem_slot below */
719 if (!npages) {
720 new.rmap = NULL;
721 new.dirty_bitmap = NULL;
722 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
723 new.lpage_info[i] = NULL;
724 }
725
726 slots->memslots[mem->slot] = new;
727 old_memslots = kvm->memslots;
728 rcu_assign_pointer(kvm->memslots, slots);
729 synchronize_srcu_expedited(&kvm->srcu);
676 730
677 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); 731 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
678 732
679 kvm_free_physmem_slot(&old, npages ? &new : NULL); 733 kvm_free_physmem_slot(&old, &new);
680 /* Slot deletion case: we have to update the current slot */ 734 kfree(old_memslots);
681 spin_lock(&kvm->mmu_lock); 735
682 if (!npages) 736 if (flush_shadow)
683 *memslot = old; 737 kvm_arch_flush_shadow(kvm);
684 spin_unlock(&kvm->mmu_lock); 738
685#ifdef CONFIG_DMAR
686 /* map the pages in iommu page table */
687 r = kvm_iommu_map_pages(kvm, memslot);
688 if (r)
689 goto out;
690#endif
691 return 0; 739 return 0;
692 740
693out_free: 741out_free:
@@ -787,7 +835,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
787struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 835struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
788{ 836{
789 int i; 837 int i;
790 struct kvm_memslots *slots = kvm->memslots; 838 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
791 839
792 for (i = 0; i < slots->nmemslots; ++i) { 840 for (i = 0; i < slots->nmemslots; ++i) {
793 struct kvm_memory_slot *memslot = &slots->memslots[i]; 841 struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -809,12 +857,15 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
809int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 857int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
810{ 858{
811 int i; 859 int i;
812 struct kvm_memslots *slots = kvm->memslots; 860 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
813 861
814 gfn = unalias_gfn(kvm, gfn); 862 gfn = unalias_gfn(kvm, gfn);
815 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 863 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
816 struct kvm_memory_slot *memslot = &slots->memslots[i]; 864 struct kvm_memory_slot *memslot = &slots->memslots[i];
817 865
866 if (memslot->flags & KVM_MEMSLOT_INVALID)
867 continue;
868
818 if (gfn >= memslot->base_gfn 869 if (gfn >= memslot->base_gfn
819 && gfn < memslot->base_gfn + memslot->npages) 870 && gfn < memslot->base_gfn + memslot->npages)
820 return 1; 871 return 1;
@@ -823,13 +874,31 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
823} 874}
824EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 875EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
825 876
877int memslot_id(struct kvm *kvm, gfn_t gfn)
878{
879 int i;
880 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
881 struct kvm_memory_slot *memslot = NULL;
882
883 gfn = unalias_gfn(kvm, gfn);
884 for (i = 0; i < slots->nmemslots; ++i) {
885 memslot = &slots->memslots[i];
886
887 if (gfn >= memslot->base_gfn
888 && gfn < memslot->base_gfn + memslot->npages)
889 break;
890 }
891
892 return memslot - slots->memslots;
893}
894
826unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 895unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
827{ 896{
828 struct kvm_memory_slot *slot; 897 struct kvm_memory_slot *slot;
829 898
830 gfn = unalias_gfn(kvm, gfn); 899 gfn = unalias_gfn(kvm, gfn);
831 slot = gfn_to_memslot_unaliased(kvm, gfn); 900 slot = gfn_to_memslot_unaliased(kvm, gfn);
832 if (!slot) 901 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
833 return bad_hva(); 902 return bad_hva();
834 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 903 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
835} 904}