diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 144 |
1 files changed, 39 insertions, 105 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a91f980077d8..42b73930a6de 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -203,7 +203,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
203 | 203 | ||
204 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 204 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
205 | { | 205 | { |
206 | int dirty_count = kvm->tlbs_dirty; | 206 | long dirty_count = kvm->tlbs_dirty; |
207 | 207 | ||
208 | smp_mb(); | 208 | smp_mb(); |
209 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) | 209 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
@@ -289,15 +289,15 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
289 | */ | 289 | */ |
290 | idx = srcu_read_lock(&kvm->srcu); | 290 | idx = srcu_read_lock(&kvm->srcu); |
291 | spin_lock(&kvm->mmu_lock); | 291 | spin_lock(&kvm->mmu_lock); |
292 | |||
292 | kvm->mmu_notifier_seq++; | 293 | kvm->mmu_notifier_seq++; |
293 | need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; | 294 | need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; |
294 | spin_unlock(&kvm->mmu_lock); | ||
295 | srcu_read_unlock(&kvm->srcu, idx); | ||
296 | |||
297 | /* we've to flush the tlb before the pages can be freed */ | 295 | /* we've to flush the tlb before the pages can be freed */ |
298 | if (need_tlb_flush) | 296 | if (need_tlb_flush) |
299 | kvm_flush_remote_tlbs(kvm); | 297 | kvm_flush_remote_tlbs(kvm); |
300 | 298 | ||
299 | spin_unlock(&kvm->mmu_lock); | ||
300 | srcu_read_unlock(&kvm->srcu, idx); | ||
301 | } | 301 | } |
302 | 302 | ||
303 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | 303 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, |
@@ -335,12 +335,12 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
335 | for (; start < end; start += PAGE_SIZE) | 335 | for (; start < end; start += PAGE_SIZE) |
336 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | 336 | need_tlb_flush |= kvm_unmap_hva(kvm, start); |
337 | need_tlb_flush |= kvm->tlbs_dirty; | 337 | need_tlb_flush |= kvm->tlbs_dirty; |
338 | spin_unlock(&kvm->mmu_lock); | ||
339 | srcu_read_unlock(&kvm->srcu, idx); | ||
340 | |||
341 | /* we've to flush the tlb before the pages can be freed */ | 338 | /* we've to flush the tlb before the pages can be freed */ |
342 | if (need_tlb_flush) | 339 | if (need_tlb_flush) |
343 | kvm_flush_remote_tlbs(kvm); | 340 | kvm_flush_remote_tlbs(kvm); |
341 | |||
342 | spin_unlock(&kvm->mmu_lock); | ||
343 | srcu_read_unlock(&kvm->srcu, idx); | ||
344 | } | 344 | } |
345 | 345 | ||
346 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | 346 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, |
@@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | |||
357 | * been freed. | 357 | * been freed. |
358 | */ | 358 | */ |
359 | kvm->mmu_notifier_seq++; | 359 | kvm->mmu_notifier_seq++; |
360 | smp_wmb(); | ||
360 | /* | 361 | /* |
361 | * The above sequence increase must be visible before the | 362 | * The above sequence increase must be visible before the |
362 | * below count decrease but both values are read by the kvm | 363 | * below count decrease, which is ensured by the smp_wmb above |
363 | * page fault under mmu_lock spinlock so we don't need to add | 364 | * in conjunction with the smp_rmb in mmu_notifier_retry(). |
364 | * a smb_wmb() here in between the two. | ||
365 | */ | 365 | */ |
366 | kvm->mmu_notifier_count--; | 366 | kvm->mmu_notifier_count--; |
367 | spin_unlock(&kvm->mmu_lock); | 367 | spin_unlock(&kvm->mmu_lock); |
@@ -378,13 +378,14 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
378 | 378 | ||
379 | idx = srcu_read_lock(&kvm->srcu); | 379 | idx = srcu_read_lock(&kvm->srcu); |
380 | spin_lock(&kvm->mmu_lock); | 380 | spin_lock(&kvm->mmu_lock); |
381 | young = kvm_age_hva(kvm, address); | ||
382 | spin_unlock(&kvm->mmu_lock); | ||
383 | srcu_read_unlock(&kvm->srcu, idx); | ||
384 | 381 | ||
382 | young = kvm_age_hva(kvm, address); | ||
385 | if (young) | 383 | if (young) |
386 | kvm_flush_remote_tlbs(kvm); | 384 | kvm_flush_remote_tlbs(kvm); |
387 | 385 | ||
386 | spin_unlock(&kvm->mmu_lock); | ||
387 | srcu_read_unlock(&kvm->srcu, idx); | ||
388 | |||
388 | return young; | 389 | return young; |
389 | } | 390 | } |
390 | 391 | ||
@@ -449,7 +450,7 @@ static void kvm_init_memslots_id(struct kvm *kvm) | |||
449 | slots->id_to_index[i] = slots->memslots[i].id = i; | 450 | slots->id_to_index[i] = slots->memslots[i].id = i; |
450 | } | 451 | } |
451 | 452 | ||
452 | static struct kvm *kvm_create_vm(void) | 453 | static struct kvm *kvm_create_vm(unsigned long type) |
453 | { | 454 | { |
454 | int r, i; | 455 | int r, i; |
455 | struct kvm *kvm = kvm_arch_alloc_vm(); | 456 | struct kvm *kvm = kvm_arch_alloc_vm(); |
@@ -457,7 +458,7 @@ static struct kvm *kvm_create_vm(void) | |||
457 | if (!kvm) | 458 | if (!kvm) |
458 | return ERR_PTR(-ENOMEM); | 459 | return ERR_PTR(-ENOMEM); |
459 | 460 | ||
460 | r = kvm_arch_init_vm(kvm); | 461 | r = kvm_arch_init_vm(kvm, type); |
461 | if (r) | 462 | if (r) |
462 | goto out_err_nodisable; | 463 | goto out_err_nodisable; |
463 | 464 | ||
@@ -535,21 +536,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
535 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | 536 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, |
536 | struct kvm_memory_slot *dont) | 537 | struct kvm_memory_slot *dont) |
537 | { | 538 | { |
538 | int i; | ||
539 | |||
540 | if (!dont || free->rmap != dont->rmap) | 539 | if (!dont || free->rmap != dont->rmap) |
541 | vfree(free->rmap); | 540 | vfree(free->rmap); |
542 | 541 | ||
543 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 542 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
544 | kvm_destroy_dirty_bitmap(free); | 543 | kvm_destroy_dirty_bitmap(free); |
545 | 544 | ||
546 | 545 | kvm_arch_free_memslot(free, dont); | |
547 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
548 | if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { | ||
549 | vfree(free->lpage_info[i]); | ||
550 | free->lpage_info[i] = NULL; | ||
551 | } | ||
552 | } | ||
553 | 546 | ||
554 | free->npages = 0; | 547 | free->npages = 0; |
555 | free->rmap = NULL; | 548 | free->rmap = NULL; |
@@ -616,7 +609,6 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
616 | return 0; | 609 | return 0; |
617 | } | 610 | } |
618 | 611 | ||
619 | #ifndef CONFIG_S390 | ||
620 | /* | 612 | /* |
621 | * Allocation size is twice as large as the actual dirty bitmap size. | 613 | * Allocation size is twice as large as the actual dirty bitmap size. |
622 | * This makes it possible to do double buffering: see x86's | 614 | * This makes it possible to do double buffering: see x86's |
@@ -624,6 +616,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
624 | */ | 616 | */ |
625 | static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | 617 | static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) |
626 | { | 618 | { |
619 | #ifndef CONFIG_S390 | ||
627 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); | 620 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); |
628 | 621 | ||
629 | if (dirty_bytes > PAGE_SIZE) | 622 | if (dirty_bytes > PAGE_SIZE) |
@@ -636,21 +629,8 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
636 | 629 | ||
637 | memslot->dirty_bitmap_head = memslot->dirty_bitmap; | 630 | memslot->dirty_bitmap_head = memslot->dirty_bitmap; |
638 | memslot->nr_dirty_pages = 0; | 631 | memslot->nr_dirty_pages = 0; |
639 | return 0; | ||
640 | } | ||
641 | #endif /* !CONFIG_S390 */ | 632 | #endif /* !CONFIG_S390 */ |
642 | 633 | return 0; | |
643 | static struct kvm_memory_slot * | ||
644 | search_memslots(struct kvm_memslots *slots, gfn_t gfn) | ||
645 | { | ||
646 | struct kvm_memory_slot *memslot; | ||
647 | |||
648 | kvm_for_each_memslot(memslot, slots) | ||
649 | if (gfn >= memslot->base_gfn && | ||
650 | gfn < memslot->base_gfn + memslot->npages) | ||
651 | return memslot; | ||
652 | |||
653 | return NULL; | ||
654 | } | 634 | } |
655 | 635 | ||
656 | static int cmp_memslot(const void *slot1, const void *slot2) | 636 | static int cmp_memslot(const void *slot1, const void *slot2) |
@@ -778,69 +758,24 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
778 | r = -ENOMEM; | 758 | r = -ENOMEM; |
779 | 759 | ||
780 | /* Allocate if a slot is being created */ | 760 | /* Allocate if a slot is being created */ |
761 | if (npages && !old.npages) { | ||
762 | new.user_alloc = user_alloc; | ||
763 | new.userspace_addr = mem->userspace_addr; | ||
781 | #ifndef CONFIG_S390 | 764 | #ifndef CONFIG_S390 |
782 | if (npages && !new.rmap) { | ||
783 | new.rmap = vzalloc(npages * sizeof(*new.rmap)); | 765 | new.rmap = vzalloc(npages * sizeof(*new.rmap)); |
784 | |||
785 | if (!new.rmap) | 766 | if (!new.rmap) |
786 | goto out_free; | 767 | goto out_free; |
787 | 768 | #endif /* not defined CONFIG_S390 */ | |
788 | new.user_alloc = user_alloc; | 769 | if (kvm_arch_create_memslot(&new, npages)) |
789 | new.userspace_addr = mem->userspace_addr; | ||
790 | } | ||
791 | if (!npages) | ||
792 | goto skip_lpage; | ||
793 | |||
794 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
795 | unsigned long ugfn; | ||
796 | unsigned long j; | ||
797 | int lpages; | ||
798 | int level = i + 2; | ||
799 | |||
800 | /* Avoid unused variable warning if no large pages */ | ||
801 | (void)level; | ||
802 | |||
803 | if (new.lpage_info[i]) | ||
804 | continue; | ||
805 | |||
806 | lpages = 1 + ((base_gfn + npages - 1) | ||
807 | >> KVM_HPAGE_GFN_SHIFT(level)); | ||
808 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); | ||
809 | |||
810 | new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i])); | ||
811 | |||
812 | if (!new.lpage_info[i]) | ||
813 | goto out_free; | 770 | goto out_free; |
814 | |||
815 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
816 | new.lpage_info[i][0].write_count = 1; | ||
817 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
818 | new.lpage_info[i][lpages - 1].write_count = 1; | ||
819 | ugfn = new.userspace_addr >> PAGE_SHIFT; | ||
820 | /* | ||
821 | * If the gfn and userspace address are not aligned wrt each | ||
822 | * other, or if explicitly asked to, disable large page | ||
823 | * support for this slot | ||
824 | */ | ||
825 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || | ||
826 | !largepages_enabled) | ||
827 | for (j = 0; j < lpages; ++j) | ||
828 | new.lpage_info[i][j].write_count = 1; | ||
829 | } | 771 | } |
830 | 772 | ||
831 | skip_lpage: | ||
832 | |||
833 | /* Allocate page dirty bitmap if needed */ | 773 | /* Allocate page dirty bitmap if needed */ |
834 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 774 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
835 | if (kvm_create_dirty_bitmap(&new) < 0) | 775 | if (kvm_create_dirty_bitmap(&new) < 0) |
836 | goto out_free; | 776 | goto out_free; |
837 | /* destroy any largepage mappings for dirty tracking */ | 777 | /* destroy any largepage mappings for dirty tracking */ |
838 | } | 778 | } |
839 | #else /* not defined CONFIG_S390 */ | ||
840 | new.user_alloc = user_alloc; | ||
841 | if (user_alloc) | ||
842 | new.userspace_addr = mem->userspace_addr; | ||
843 | #endif /* not defined CONFIG_S390 */ | ||
844 | 779 | ||
845 | if (!npages) { | 780 | if (!npages) { |
846 | struct kvm_memory_slot *slot; | 781 | struct kvm_memory_slot *slot; |
@@ -890,8 +825,7 @@ skip_lpage: | |||
890 | if (!npages) { | 825 | if (!npages) { |
891 | new.rmap = NULL; | 826 | new.rmap = NULL; |
892 | new.dirty_bitmap = NULL; | 827 | new.dirty_bitmap = NULL; |
893 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) | 828 | memset(&new.arch, 0, sizeof(new.arch)); |
894 | new.lpage_info[i] = NULL; | ||
895 | } | 829 | } |
896 | 830 | ||
897 | update_memslots(slots, &new); | 831 | update_memslots(slots, &new); |
@@ -978,6 +912,11 @@ out: | |||
978 | return r; | 912 | return r; |
979 | } | 913 | } |
980 | 914 | ||
915 | bool kvm_largepages_enabled(void) | ||
916 | { | ||
917 | return largepages_enabled; | ||
918 | } | ||
919 | |||
981 | void kvm_disable_largepages(void) | 920 | void kvm_disable_largepages(void) |
982 | { | 921 | { |
983 | largepages_enabled = false; | 922 | largepages_enabled = false; |
@@ -1031,12 +970,6 @@ int kvm_is_error_hva(unsigned long addr) | |||
1031 | } | 970 | } |
1032 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | 971 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); |
1033 | 972 | ||
1034 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, | ||
1035 | gfn_t gfn) | ||
1036 | { | ||
1037 | return search_memslots(slots, gfn); | ||
1038 | } | ||
1039 | |||
1040 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 973 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
1041 | { | 974 | { |
1042 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); | 975 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); |
@@ -1459,7 +1392,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
1459 | 1392 | ||
1460 | ghc->gpa = gpa; | 1393 | ghc->gpa = gpa; |
1461 | ghc->generation = slots->generation; | 1394 | ghc->generation = slots->generation; |
1462 | ghc->memslot = __gfn_to_memslot(slots, gfn); | 1395 | ghc->memslot = gfn_to_memslot(kvm, gfn); |
1463 | ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); | 1396 | ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); |
1464 | if (!kvm_is_error_hva(ghc->hva)) | 1397 | if (!kvm_is_error_hva(ghc->hva)) |
1465 | ghc->hva += offset; | 1398 | ghc->hva += offset; |
@@ -1657,7 +1590,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1657 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); | 1590 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); |
1658 | #endif | 1591 | #endif |
1659 | else | 1592 | else |
1660 | return VM_FAULT_SIGBUS; | 1593 | return kvm_arch_vcpu_fault(vcpu, vmf); |
1661 | get_page(page); | 1594 | get_page(page); |
1662 | vmf->page = page; | 1595 | vmf->page = page; |
1663 | return 0; | 1596 | return 0; |
@@ -1718,6 +1651,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) | |||
1718 | goto vcpu_destroy; | 1651 | goto vcpu_destroy; |
1719 | 1652 | ||
1720 | mutex_lock(&kvm->lock); | 1653 | mutex_lock(&kvm->lock); |
1654 | if (!kvm_vcpu_compatible(vcpu)) { | ||
1655 | r = -EINVAL; | ||
1656 | goto unlock_vcpu_destroy; | ||
1657 | } | ||
1721 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { | 1658 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { |
1722 | r = -EINVAL; | 1659 | r = -EINVAL; |
1723 | goto unlock_vcpu_destroy; | 1660 | goto unlock_vcpu_destroy; |
@@ -2198,12 +2135,12 @@ static struct file_operations kvm_vm_fops = { | |||
2198 | .llseek = noop_llseek, | 2135 | .llseek = noop_llseek, |
2199 | }; | 2136 | }; |
2200 | 2137 | ||
2201 | static int kvm_dev_ioctl_create_vm(void) | 2138 | static int kvm_dev_ioctl_create_vm(unsigned long type) |
2202 | { | 2139 | { |
2203 | int r; | 2140 | int r; |
2204 | struct kvm *kvm; | 2141 | struct kvm *kvm; |
2205 | 2142 | ||
2206 | kvm = kvm_create_vm(); | 2143 | kvm = kvm_create_vm(type); |
2207 | if (IS_ERR(kvm)) | 2144 | if (IS_ERR(kvm)) |
2208 | return PTR_ERR(kvm); | 2145 | return PTR_ERR(kvm); |
2209 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 2146 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
@@ -2254,10 +2191,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2254 | r = KVM_API_VERSION; | 2191 | r = KVM_API_VERSION; |
2255 | break; | 2192 | break; |
2256 | case KVM_CREATE_VM: | 2193 | case KVM_CREATE_VM: |
2257 | r = -EINVAL; | 2194 | r = kvm_dev_ioctl_create_vm(arg); |
2258 | if (arg) | ||
2259 | goto out; | ||
2260 | r = kvm_dev_ioctl_create_vm(); | ||
2261 | break; | 2195 | break; |
2262 | case KVM_CHECK_EXTENSION: | 2196 | case KVM_CHECK_EXTENSION: |
2263 | r = kvm_dev_ioctl_check_extension_generic(arg); | 2197 | r = kvm_dev_ioctl_check_extension_generic(arg); |