aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c141
1 files changed, 105 insertions, 36 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 87d296d8b270..2bb24a814fdf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -44,6 +44,7 @@
44#include <linux/bitops.h> 44#include <linux/bitops.h>
45#include <linux/spinlock.h> 45#include <linux/spinlock.h>
46#include <linux/compat.h> 46#include <linux/compat.h>
47#include <linux/srcu.h>
47 48
48#include <asm/processor.h> 49#include <asm/processor.h>
49#include <asm/io.h> 50#include <asm/io.h>
@@ -213,7 +214,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
213 unsigned long address) 214 unsigned long address)
214{ 215{
215 struct kvm *kvm = mmu_notifier_to_kvm(mn); 216 struct kvm *kvm = mmu_notifier_to_kvm(mn);
216 int need_tlb_flush; 217 int need_tlb_flush, idx;
217 218
218 /* 219 /*
219 * When ->invalidate_page runs, the linux pte has been zapped 220 * When ->invalidate_page runs, the linux pte has been zapped
@@ -233,10 +234,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
233 * pte after kvm_unmap_hva returned, without noticing the page 234 * pte after kvm_unmap_hva returned, without noticing the page
234 * is going to be freed. 235 * is going to be freed.
235 */ 236 */
237 idx = srcu_read_lock(&kvm->srcu);
236 spin_lock(&kvm->mmu_lock); 238 spin_lock(&kvm->mmu_lock);
237 kvm->mmu_notifier_seq++; 239 kvm->mmu_notifier_seq++;
238 need_tlb_flush = kvm_unmap_hva(kvm, address); 240 need_tlb_flush = kvm_unmap_hva(kvm, address);
239 spin_unlock(&kvm->mmu_lock); 241 spin_unlock(&kvm->mmu_lock);
242 srcu_read_unlock(&kvm->srcu, idx);
240 243
241 /* we've to flush the tlb before the pages can be freed */ 244 /* we've to flush the tlb before the pages can be freed */
242 if (need_tlb_flush) 245 if (need_tlb_flush)
@@ -250,11 +253,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
250 pte_t pte) 253 pte_t pte)
251{ 254{
252 struct kvm *kvm = mmu_notifier_to_kvm(mn); 255 struct kvm *kvm = mmu_notifier_to_kvm(mn);
256 int idx;
253 257
258 idx = srcu_read_lock(&kvm->srcu);
254 spin_lock(&kvm->mmu_lock); 259 spin_lock(&kvm->mmu_lock);
255 kvm->mmu_notifier_seq++; 260 kvm->mmu_notifier_seq++;
256 kvm_set_spte_hva(kvm, address, pte); 261 kvm_set_spte_hva(kvm, address, pte);
257 spin_unlock(&kvm->mmu_lock); 262 spin_unlock(&kvm->mmu_lock);
263 srcu_read_unlock(&kvm->srcu, idx);
258} 264}
259 265
260static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 266static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -263,8 +269,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
263 unsigned long end) 269 unsigned long end)
264{ 270{
265 struct kvm *kvm = mmu_notifier_to_kvm(mn); 271 struct kvm *kvm = mmu_notifier_to_kvm(mn);
266 int need_tlb_flush = 0; 272 int need_tlb_flush = 0, idx;
267 273
274 idx = srcu_read_lock(&kvm->srcu);
268 spin_lock(&kvm->mmu_lock); 275 spin_lock(&kvm->mmu_lock);
269 /* 276 /*
270 * The count increase must become visible at unlock time as no 277 * The count increase must become visible at unlock time as no
@@ -275,6 +282,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
275 for (; start < end; start += PAGE_SIZE) 282 for (; start < end; start += PAGE_SIZE)
276 need_tlb_flush |= kvm_unmap_hva(kvm, start); 283 need_tlb_flush |= kvm_unmap_hva(kvm, start);
277 spin_unlock(&kvm->mmu_lock); 284 spin_unlock(&kvm->mmu_lock);
285 srcu_read_unlock(&kvm->srcu, idx);
278 286
279 /* we've to flush the tlb before the pages can be freed */ 287 /* we've to flush the tlb before the pages can be freed */
280 if (need_tlb_flush) 288 if (need_tlb_flush)
@@ -312,11 +320,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
312 unsigned long address) 320 unsigned long address)
313{ 321{
314 struct kvm *kvm = mmu_notifier_to_kvm(mn); 322 struct kvm *kvm = mmu_notifier_to_kvm(mn);
315 int young; 323 int young, idx;
316 324
325 idx = srcu_read_lock(&kvm->srcu);
317 spin_lock(&kvm->mmu_lock); 326 spin_lock(&kvm->mmu_lock);
318 young = kvm_age_hva(kvm, address); 327 young = kvm_age_hva(kvm, address);
319 spin_unlock(&kvm->mmu_lock); 328 spin_unlock(&kvm->mmu_lock);
329 srcu_read_unlock(&kvm->srcu, idx);
320 330
321 if (young) 331 if (young)
322 kvm_flush_remote_tlbs(kvm); 332 kvm_flush_remote_tlbs(kvm);
@@ -379,11 +389,15 @@ static struct kvm *kvm_create_vm(void)
379 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 389 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
380 if (!kvm->memslots) 390 if (!kvm->memslots)
381 goto out_err; 391 goto out_err;
392 if (init_srcu_struct(&kvm->srcu))
393 goto out_err;
382 394
383#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 395#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
384 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 396 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
385 if (!page) 397 if (!page) {
398 cleanup_srcu_struct(&kvm->srcu);
386 goto out_err; 399 goto out_err;
400 }
387 401
388 kvm->coalesced_mmio_ring = 402 kvm->coalesced_mmio_ring =
389 (struct kvm_coalesced_mmio_ring *)page_address(page); 403 (struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -391,6 +405,7 @@ static struct kvm *kvm_create_vm(void)
391 405
392 r = kvm_init_mmu_notifier(kvm); 406 r = kvm_init_mmu_notifier(kvm);
393 if (r) { 407 if (r) {
408 cleanup_srcu_struct(&kvm->srcu);
394#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 409#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
395 put_page(page); 410 put_page(page);
396#endif 411#endif
@@ -480,6 +495,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
480#else 495#else
481 kvm_arch_flush_shadow(kvm); 496 kvm_arch_flush_shadow(kvm);
482#endif 497#endif
498 cleanup_srcu_struct(&kvm->srcu);
483 kvm_arch_destroy_vm(kvm); 499 kvm_arch_destroy_vm(kvm);
484 hardware_disable_all(); 500 hardware_disable_all();
485 mmdrop(mm); 501 mmdrop(mm);
@@ -521,12 +537,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
521 struct kvm_userspace_memory_region *mem, 537 struct kvm_userspace_memory_region *mem,
522 int user_alloc) 538 int user_alloc)
523{ 539{
524 int r; 540 int r, flush_shadow = 0;
525 gfn_t base_gfn; 541 gfn_t base_gfn;
526 unsigned long npages; 542 unsigned long npages;
527 unsigned long i; 543 unsigned long i;
528 struct kvm_memory_slot *memslot; 544 struct kvm_memory_slot *memslot;
529 struct kvm_memory_slot old, new; 545 struct kvm_memory_slot old, new;
546 struct kvm_memslots *slots, *old_memslots;
530 547
531 r = -EINVAL; 548 r = -EINVAL;
532 /* General sanity checks */ 549 /* General sanity checks */
@@ -588,15 +605,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
588 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 605 memset(new.rmap, 0, npages * sizeof(*new.rmap));
589 606
590 new.user_alloc = user_alloc; 607 new.user_alloc = user_alloc;
591 /* 608 new.userspace_addr = mem->userspace_addr;
592 * hva_to_rmmap() serialzies with the mmu_lock and to be
593 * safe it has to ignore memslots with !user_alloc &&
594 * !userspace_addr.
595 */
596 if (user_alloc)
597 new.userspace_addr = mem->userspace_addr;
598 else
599 new.userspace_addr = 0;
600 } 609 }
601 if (!npages) 610 if (!npages)
602 goto skip_lpage; 611 goto skip_lpage;
@@ -651,8 +660,9 @@ skip_lpage:
651 if (!new.dirty_bitmap) 660 if (!new.dirty_bitmap)
652 goto out_free; 661 goto out_free;
653 memset(new.dirty_bitmap, 0, dirty_bytes); 662 memset(new.dirty_bitmap, 0, dirty_bytes);
663 /* destroy any largepage mappings for dirty tracking */
654 if (old.npages) 664 if (old.npages)
655 kvm_arch_flush_shadow(kvm); 665 flush_shadow = 1;
656 } 666 }
657#else /* not defined CONFIG_S390 */ 667#else /* not defined CONFIG_S390 */
658 new.user_alloc = user_alloc; 668 new.user_alloc = user_alloc;
@@ -660,34 +670,72 @@ skip_lpage:
660 new.userspace_addr = mem->userspace_addr; 670 new.userspace_addr = mem->userspace_addr;
661#endif /* not defined CONFIG_S390 */ 671#endif /* not defined CONFIG_S390 */
662 672
663 if (!npages) 673 if (!npages) {
674 r = -ENOMEM;
675 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
676 if (!slots)
677 goto out_free;
678 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
679 if (mem->slot >= slots->nmemslots)
680 slots->nmemslots = mem->slot + 1;
681 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
682
683 old_memslots = kvm->memslots;
684 rcu_assign_pointer(kvm->memslots, slots);
685 synchronize_srcu_expedited(&kvm->srcu);
686 /* From this point no new shadow pages pointing to a deleted
687 * memslot will be created.
688 *
689 * validation of sp->gfn happens in:
690 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
691 * - kvm_is_visible_gfn (mmu_check_roots)
692 */
664 kvm_arch_flush_shadow(kvm); 693 kvm_arch_flush_shadow(kvm);
694 kfree(old_memslots);
695 }
665 696
666 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); 697 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
667 if (r) 698 if (r)
668 goto out_free; 699 goto out_free;
669 700
670 spin_lock(&kvm->mmu_lock); 701#ifdef CONFIG_DMAR
671 if (mem->slot >= kvm->memslots->nmemslots) 702 /* map the pages in iommu page table */
672 kvm->memslots->nmemslots = mem->slot + 1; 703 if (npages) {
704 r = kvm_iommu_map_pages(kvm, &new);
705 if (r)
706 goto out_free;
707 }
708#endif
673 709
674 *memslot = new; 710 r = -ENOMEM;
675 spin_unlock(&kvm->mmu_lock); 711 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
712 if (!slots)
713 goto out_free;
714 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
715 if (mem->slot >= slots->nmemslots)
716 slots->nmemslots = mem->slot + 1;
717
718 /* actual memory is freed via old in kvm_free_physmem_slot below */
719 if (!npages) {
720 new.rmap = NULL;
721 new.dirty_bitmap = NULL;
722 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
723 new.lpage_info[i] = NULL;
724 }
725
726 slots->memslots[mem->slot] = new;
727 old_memslots = kvm->memslots;
728 rcu_assign_pointer(kvm->memslots, slots);
729 synchronize_srcu_expedited(&kvm->srcu);
676 730
677 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); 731 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
678 732
679 kvm_free_physmem_slot(&old, npages ? &new : NULL); 733 kvm_free_physmem_slot(&old, &new);
680 /* Slot deletion case: we have to update the current slot */ 734 kfree(old_memslots);
681 spin_lock(&kvm->mmu_lock); 735
682 if (!npages) 736 if (flush_shadow)
683 *memslot = old; 737 kvm_arch_flush_shadow(kvm);
684 spin_unlock(&kvm->mmu_lock); 738
685#ifdef CONFIG_DMAR
686 /* map the pages in iommu page table */
687 r = kvm_iommu_map_pages(kvm, memslot);
688 if (r)
689 goto out;
690#endif
691 return 0; 739 return 0;
692 740
693out_free: 741out_free:
@@ -787,7 +835,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
787struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 835struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
788{ 836{
789 int i; 837 int i;
790 struct kvm_memslots *slots = kvm->memslots; 838 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
791 839
792 for (i = 0; i < slots->nmemslots; ++i) { 840 for (i = 0; i < slots->nmemslots; ++i) {
793 struct kvm_memory_slot *memslot = &slots->memslots[i]; 841 struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -809,12 +857,15 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
809int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 857int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
810{ 858{
811 int i; 859 int i;
812 struct kvm_memslots *slots = kvm->memslots; 860 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
813 861
814 gfn = unalias_gfn(kvm, gfn); 862 gfn = unalias_gfn(kvm, gfn);
815 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 863 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
816 struct kvm_memory_slot *memslot = &slots->memslots[i]; 864 struct kvm_memory_slot *memslot = &slots->memslots[i];
817 865
866 if (memslot->flags & KVM_MEMSLOT_INVALID)
867 continue;
868
818 if (gfn >= memslot->base_gfn 869 if (gfn >= memslot->base_gfn
819 && gfn < memslot->base_gfn + memslot->npages) 870 && gfn < memslot->base_gfn + memslot->npages)
820 return 1; 871 return 1;
@@ -823,13 +874,31 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
823} 874}
824EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 875EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
825 876
877int memslot_id(struct kvm *kvm, gfn_t gfn)
878{
879 int i;
880 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
881 struct kvm_memory_slot *memslot = NULL;
882
883 gfn = unalias_gfn(kvm, gfn);
884 for (i = 0; i < slots->nmemslots; ++i) {
885 memslot = &slots->memslots[i];
886
887 if (gfn >= memslot->base_gfn
888 && gfn < memslot->base_gfn + memslot->npages)
889 break;
890 }
891
892 return memslot - slots->memslots;
893}
894
826unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 895unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
827{ 896{
828 struct kvm_memory_slot *slot; 897 struct kvm_memory_slot *slot;
829 898
830 gfn = unalias_gfn(kvm, gfn); 899 gfn = unalias_gfn(kvm, gfn);
831 slot = gfn_to_memslot_unaliased(kvm, gfn); 900 slot = gfn_to_memslot_unaliased(kvm, gfn);
832 if (!slot) 901 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
833 return bad_hva(); 902 return bad_hva();
834 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 903 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
835} 904}