aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c144
1 files changed, 39 insertions, 105 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a91f980077d8..42b73930a6de 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -203,7 +203,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
203 203
204void kvm_flush_remote_tlbs(struct kvm *kvm) 204void kvm_flush_remote_tlbs(struct kvm *kvm)
205{ 205{
206 int dirty_count = kvm->tlbs_dirty; 206 long dirty_count = kvm->tlbs_dirty;
207 207
208 smp_mb(); 208 smp_mb();
209 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 209 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
@@ -289,15 +289,15 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
289 */ 289 */
290 idx = srcu_read_lock(&kvm->srcu); 290 idx = srcu_read_lock(&kvm->srcu);
291 spin_lock(&kvm->mmu_lock); 291 spin_lock(&kvm->mmu_lock);
292
292 kvm->mmu_notifier_seq++; 293 kvm->mmu_notifier_seq++;
293 need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; 294 need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
294 spin_unlock(&kvm->mmu_lock);
295 srcu_read_unlock(&kvm->srcu, idx);
296
297 /* we've to flush the tlb before the pages can be freed */ 295 /* we've to flush the tlb before the pages can be freed */
298 if (need_tlb_flush) 296 if (need_tlb_flush)
299 kvm_flush_remote_tlbs(kvm); 297 kvm_flush_remote_tlbs(kvm);
300 298
299 spin_unlock(&kvm->mmu_lock);
300 srcu_read_unlock(&kvm->srcu, idx);
301} 301}
302 302
303static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, 303static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
@@ -335,12 +335,12 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
335 for (; start < end; start += PAGE_SIZE) 335 for (; start < end; start += PAGE_SIZE)
336 need_tlb_flush |= kvm_unmap_hva(kvm, start); 336 need_tlb_flush |= kvm_unmap_hva(kvm, start);
337 need_tlb_flush |= kvm->tlbs_dirty; 337 need_tlb_flush |= kvm->tlbs_dirty;
338 spin_unlock(&kvm->mmu_lock);
339 srcu_read_unlock(&kvm->srcu, idx);
340
341 /* we've to flush the tlb before the pages can be freed */ 338 /* we've to flush the tlb before the pages can be freed */
342 if (need_tlb_flush) 339 if (need_tlb_flush)
343 kvm_flush_remote_tlbs(kvm); 340 kvm_flush_remote_tlbs(kvm);
341
342 spin_unlock(&kvm->mmu_lock);
343 srcu_read_unlock(&kvm->srcu, idx);
344} 344}
345 345
346static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 346static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
357 * been freed. 357 * been freed.
358 */ 358 */
359 kvm->mmu_notifier_seq++; 359 kvm->mmu_notifier_seq++;
360 smp_wmb();
360 /* 361 /*
361 * The above sequence increase must be visible before the 362 * The above sequence increase must be visible before the
362 * below count decrease but both values are read by the kvm 363 * below count decrease, which is ensured by the smp_wmb above
363 * page fault under mmu_lock spinlock so we don't need to add 364 * in conjunction with the smp_rmb in mmu_notifier_retry().
364 * a smb_wmb() here in between the two.
365 */ 365 */
366 kvm->mmu_notifier_count--; 366 kvm->mmu_notifier_count--;
367 spin_unlock(&kvm->mmu_lock); 367 spin_unlock(&kvm->mmu_lock);
@@ -378,13 +378,14 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
378 378
379 idx = srcu_read_lock(&kvm->srcu); 379 idx = srcu_read_lock(&kvm->srcu);
380 spin_lock(&kvm->mmu_lock); 380 spin_lock(&kvm->mmu_lock);
381 young = kvm_age_hva(kvm, address);
382 spin_unlock(&kvm->mmu_lock);
383 srcu_read_unlock(&kvm->srcu, idx);
384 381
382 young = kvm_age_hva(kvm, address);
385 if (young) 383 if (young)
386 kvm_flush_remote_tlbs(kvm); 384 kvm_flush_remote_tlbs(kvm);
387 385
386 spin_unlock(&kvm->mmu_lock);
387 srcu_read_unlock(&kvm->srcu, idx);
388
388 return young; 389 return young;
389} 390}
390 391
@@ -449,7 +450,7 @@ static void kvm_init_memslots_id(struct kvm *kvm)
449 slots->id_to_index[i] = slots->memslots[i].id = i; 450 slots->id_to_index[i] = slots->memslots[i].id = i;
450} 451}
451 452
452static struct kvm *kvm_create_vm(void) 453static struct kvm *kvm_create_vm(unsigned long type)
453{ 454{
454 int r, i; 455 int r, i;
455 struct kvm *kvm = kvm_arch_alloc_vm(); 456 struct kvm *kvm = kvm_arch_alloc_vm();
@@ -457,7 +458,7 @@ static struct kvm *kvm_create_vm(void)
457 if (!kvm) 458 if (!kvm)
458 return ERR_PTR(-ENOMEM); 459 return ERR_PTR(-ENOMEM);
459 460
460 r = kvm_arch_init_vm(kvm); 461 r = kvm_arch_init_vm(kvm, type);
461 if (r) 462 if (r)
462 goto out_err_nodisable; 463 goto out_err_nodisable;
463 464
@@ -535,21 +536,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
535static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 536static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
536 struct kvm_memory_slot *dont) 537 struct kvm_memory_slot *dont)
537{ 538{
538 int i;
539
540 if (!dont || free->rmap != dont->rmap) 539 if (!dont || free->rmap != dont->rmap)
541 vfree(free->rmap); 540 vfree(free->rmap);
542 541
543 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 542 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
544 kvm_destroy_dirty_bitmap(free); 543 kvm_destroy_dirty_bitmap(free);
545 544
546 545 kvm_arch_free_memslot(free, dont);
547 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
548 if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
549 vfree(free->lpage_info[i]);
550 free->lpage_info[i] = NULL;
551 }
552 }
553 546
554 free->npages = 0; 547 free->npages = 0;
555 free->rmap = NULL; 548 free->rmap = NULL;
@@ -616,7 +609,6 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
616 return 0; 609 return 0;
617} 610}
618 611
619#ifndef CONFIG_S390
620/* 612/*
621 * Allocation size is twice as large as the actual dirty bitmap size. 613 * Allocation size is twice as large as the actual dirty bitmap size.
622 * This makes it possible to do double buffering: see x86's 614 * This makes it possible to do double buffering: see x86's
@@ -624,6 +616,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
624 */ 616 */
625static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) 617static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
626{ 618{
619#ifndef CONFIG_S390
627 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); 620 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
628 621
629 if (dirty_bytes > PAGE_SIZE) 622 if (dirty_bytes > PAGE_SIZE)
@@ -636,21 +629,8 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
636 629
637 memslot->dirty_bitmap_head = memslot->dirty_bitmap; 630 memslot->dirty_bitmap_head = memslot->dirty_bitmap;
638 memslot->nr_dirty_pages = 0; 631 memslot->nr_dirty_pages = 0;
639 return 0;
640}
641#endif /* !CONFIG_S390 */ 632#endif /* !CONFIG_S390 */
642 633 return 0;
643static struct kvm_memory_slot *
644search_memslots(struct kvm_memslots *slots, gfn_t gfn)
645{
646 struct kvm_memory_slot *memslot;
647
648 kvm_for_each_memslot(memslot, slots)
649 if (gfn >= memslot->base_gfn &&
650 gfn < memslot->base_gfn + memslot->npages)
651 return memslot;
652
653 return NULL;
654} 634}
655 635
656static int cmp_memslot(const void *slot1, const void *slot2) 636static int cmp_memslot(const void *slot1, const void *slot2)
@@ -778,69 +758,24 @@ int __kvm_set_memory_region(struct kvm *kvm,
778 r = -ENOMEM; 758 r = -ENOMEM;
779 759
780 /* Allocate if a slot is being created */ 760 /* Allocate if a slot is being created */
761 if (npages && !old.npages) {
762 new.user_alloc = user_alloc;
763 new.userspace_addr = mem->userspace_addr;
781#ifndef CONFIG_S390 764#ifndef CONFIG_S390
782 if (npages && !new.rmap) {
783 new.rmap = vzalloc(npages * sizeof(*new.rmap)); 765 new.rmap = vzalloc(npages * sizeof(*new.rmap));
784
785 if (!new.rmap) 766 if (!new.rmap)
786 goto out_free; 767 goto out_free;
787 768#endif /* not defined CONFIG_S390 */
788 new.user_alloc = user_alloc; 769 if (kvm_arch_create_memslot(&new, npages))
789 new.userspace_addr = mem->userspace_addr;
790 }
791 if (!npages)
792 goto skip_lpage;
793
794 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
795 unsigned long ugfn;
796 unsigned long j;
797 int lpages;
798 int level = i + 2;
799
800 /* Avoid unused variable warning if no large pages */
801 (void)level;
802
803 if (new.lpage_info[i])
804 continue;
805
806 lpages = 1 + ((base_gfn + npages - 1)
807 >> KVM_HPAGE_GFN_SHIFT(level));
808 lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level);
809
810 new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
811
812 if (!new.lpage_info[i])
813 goto out_free; 770 goto out_free;
814
815 if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
816 new.lpage_info[i][0].write_count = 1;
817 if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
818 new.lpage_info[i][lpages - 1].write_count = 1;
819 ugfn = new.userspace_addr >> PAGE_SHIFT;
820 /*
821 * If the gfn and userspace address are not aligned wrt each
822 * other, or if explicitly asked to, disable large page
823 * support for this slot
824 */
825 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
826 !largepages_enabled)
827 for (j = 0; j < lpages; ++j)
828 new.lpage_info[i][j].write_count = 1;
829 } 771 }
830 772
831skip_lpage:
832
833 /* Allocate page dirty bitmap if needed */ 773 /* Allocate page dirty bitmap if needed */
834 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 774 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
835 if (kvm_create_dirty_bitmap(&new) < 0) 775 if (kvm_create_dirty_bitmap(&new) < 0)
836 goto out_free; 776 goto out_free;
837 /* destroy any largepage mappings for dirty tracking */ 777 /* destroy any largepage mappings for dirty tracking */
838 } 778 }
839#else /* not defined CONFIG_S390 */
840 new.user_alloc = user_alloc;
841 if (user_alloc)
842 new.userspace_addr = mem->userspace_addr;
843#endif /* not defined CONFIG_S390 */
844 779
845 if (!npages) { 780 if (!npages) {
846 struct kvm_memory_slot *slot; 781 struct kvm_memory_slot *slot;
@@ -890,8 +825,7 @@ skip_lpage:
890 if (!npages) { 825 if (!npages) {
891 new.rmap = NULL; 826 new.rmap = NULL;
892 new.dirty_bitmap = NULL; 827 new.dirty_bitmap = NULL;
893 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) 828 memset(&new.arch, 0, sizeof(new.arch));
894 new.lpage_info[i] = NULL;
895 } 829 }
896 830
897 update_memslots(slots, &new); 831 update_memslots(slots, &new);
@@ -978,6 +912,11 @@ out:
978 return r; 912 return r;
979} 913}
980 914
915bool kvm_largepages_enabled(void)
916{
917 return largepages_enabled;
918}
919
981void kvm_disable_largepages(void) 920void kvm_disable_largepages(void)
982{ 921{
983 largepages_enabled = false; 922 largepages_enabled = false;
@@ -1031,12 +970,6 @@ int kvm_is_error_hva(unsigned long addr)
1031} 970}
1032EXPORT_SYMBOL_GPL(kvm_is_error_hva); 971EXPORT_SYMBOL_GPL(kvm_is_error_hva);
1033 972
1034static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
1035 gfn_t gfn)
1036{
1037 return search_memslots(slots, gfn);
1038}
1039
1040struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 973struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
1041{ 974{
1042 return __gfn_to_memslot(kvm_memslots(kvm), gfn); 975 return __gfn_to_memslot(kvm_memslots(kvm), gfn);
@@ -1459,7 +1392,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1459 1392
1460 ghc->gpa = gpa; 1393 ghc->gpa = gpa;
1461 ghc->generation = slots->generation; 1394 ghc->generation = slots->generation;
1462 ghc->memslot = __gfn_to_memslot(slots, gfn); 1395 ghc->memslot = gfn_to_memslot(kvm, gfn);
1463 ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); 1396 ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL);
1464 if (!kvm_is_error_hva(ghc->hva)) 1397 if (!kvm_is_error_hva(ghc->hva))
1465 ghc->hva += offset; 1398 ghc->hva += offset;
@@ -1657,7 +1590,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1657 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 1590 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
1658#endif 1591#endif
1659 else 1592 else
1660 return VM_FAULT_SIGBUS; 1593 return kvm_arch_vcpu_fault(vcpu, vmf);
1661 get_page(page); 1594 get_page(page);
1662 vmf->page = page; 1595 vmf->page = page;
1663 return 0; 1596 return 0;
@@ -1718,6 +1651,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
1718 goto vcpu_destroy; 1651 goto vcpu_destroy;
1719 1652
1720 mutex_lock(&kvm->lock); 1653 mutex_lock(&kvm->lock);
1654 if (!kvm_vcpu_compatible(vcpu)) {
1655 r = -EINVAL;
1656 goto unlock_vcpu_destroy;
1657 }
1721 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { 1658 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
1722 r = -EINVAL; 1659 r = -EINVAL;
1723 goto unlock_vcpu_destroy; 1660 goto unlock_vcpu_destroy;
@@ -2198,12 +2135,12 @@ static struct file_operations kvm_vm_fops = {
2198 .llseek = noop_llseek, 2135 .llseek = noop_llseek,
2199}; 2136};
2200 2137
2201static int kvm_dev_ioctl_create_vm(void) 2138static int kvm_dev_ioctl_create_vm(unsigned long type)
2202{ 2139{
2203 int r; 2140 int r;
2204 struct kvm *kvm; 2141 struct kvm *kvm;
2205 2142
2206 kvm = kvm_create_vm(); 2143 kvm = kvm_create_vm(type);
2207 if (IS_ERR(kvm)) 2144 if (IS_ERR(kvm))
2208 return PTR_ERR(kvm); 2145 return PTR_ERR(kvm);
2209#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 2146#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
@@ -2254,10 +2191,7 @@ static long kvm_dev_ioctl(struct file *filp,
2254 r = KVM_API_VERSION; 2191 r = KVM_API_VERSION;
2255 break; 2192 break;
2256 case KVM_CREATE_VM: 2193 case KVM_CREATE_VM:
2257 r = -EINVAL; 2194 r = kvm_dev_ioctl_create_vm(arg);
2258 if (arg)
2259 goto out;
2260 r = kvm_dev_ioctl_create_vm();
2261 break; 2195 break;
2262 case KVM_CHECK_EXTENSION: 2196 case KVM_CHECK_EXTENSION:
2263 r = kvm_dev_ioctl_check_extension_generic(arg); 2197 r = kvm_dev_ioctl_check_extension_generic(arg);