aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c392
1 files changed, 257 insertions, 135 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a944be392d6..548f9253c19 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -44,6 +44,8 @@
44#include <linux/bitops.h> 44#include <linux/bitops.h>
45#include <linux/spinlock.h> 45#include <linux/spinlock.h>
46#include <linux/compat.h> 46#include <linux/compat.h>
47#include <linux/srcu.h>
48#include <linux/hugetlb.h>
47 49
48#include <asm/processor.h> 50#include <asm/processor.h>
49#include <asm/io.h> 51#include <asm/io.h>
@@ -51,9 +53,7 @@
51#include <asm/pgtable.h> 53#include <asm/pgtable.h>
52#include <asm-generic/bitops/le.h> 54#include <asm-generic/bitops/le.h>
53 55
54#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
55#include "coalesced_mmio.h" 56#include "coalesced_mmio.h"
56#endif
57 57
58#define CREATE_TRACE_POINTS 58#define CREATE_TRACE_POINTS
59#include <trace/events/kvm.h> 59#include <trace/events/kvm.h>
@@ -86,6 +86,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
86static int hardware_enable_all(void); 86static int hardware_enable_all(void);
87static void hardware_disable_all(void); 87static void hardware_disable_all(void);
88 88
89static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
90
89static bool kvm_rebooting; 91static bool kvm_rebooting;
90 92
91static bool largepages_enabled = true; 93static bool largepages_enabled = true;
@@ -136,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
136 138
137 zalloc_cpumask_var(&cpus, GFP_ATOMIC); 139 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
138 140
139 spin_lock(&kvm->requests_lock); 141 raw_spin_lock(&kvm->requests_lock);
140 me = smp_processor_id(); 142 me = smp_processor_id();
141 kvm_for_each_vcpu(i, vcpu, kvm) { 143 kvm_for_each_vcpu(i, vcpu, kvm) {
142 if (test_and_set_bit(req, &vcpu->requests)) 144 if (test_and_set_bit(req, &vcpu->requests))
@@ -151,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
151 smp_call_function_many(cpus, ack_flush, NULL, 1); 153 smp_call_function_many(cpus, ack_flush, NULL, 1);
152 else 154 else
153 called = false; 155 called = false;
154 spin_unlock(&kvm->requests_lock); 156 raw_spin_unlock(&kvm->requests_lock);
155 free_cpumask_var(cpus); 157 free_cpumask_var(cpus);
156 return called; 158 return called;
157} 159}
@@ -215,7 +217,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
215 unsigned long address) 217 unsigned long address)
216{ 218{
217 struct kvm *kvm = mmu_notifier_to_kvm(mn); 219 struct kvm *kvm = mmu_notifier_to_kvm(mn);
218 int need_tlb_flush; 220 int need_tlb_flush, idx;
219 221
220 /* 222 /*
221 * When ->invalidate_page runs, the linux pte has been zapped 223 * When ->invalidate_page runs, the linux pte has been zapped
@@ -235,10 +237,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
235 * pte after kvm_unmap_hva returned, without noticing the page 237 * pte after kvm_unmap_hva returned, without noticing the page
236 * is going to be freed. 238 * is going to be freed.
237 */ 239 */
240 idx = srcu_read_lock(&kvm->srcu);
238 spin_lock(&kvm->mmu_lock); 241 spin_lock(&kvm->mmu_lock);
239 kvm->mmu_notifier_seq++; 242 kvm->mmu_notifier_seq++;
240 need_tlb_flush = kvm_unmap_hva(kvm, address); 243 need_tlb_flush = kvm_unmap_hva(kvm, address);
241 spin_unlock(&kvm->mmu_lock); 244 spin_unlock(&kvm->mmu_lock);
245 srcu_read_unlock(&kvm->srcu, idx);
242 246
243 /* we've to flush the tlb before the pages can be freed */ 247 /* we've to flush the tlb before the pages can be freed */
244 if (need_tlb_flush) 248 if (need_tlb_flush)
@@ -252,11 +256,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
252 pte_t pte) 256 pte_t pte)
253{ 257{
254 struct kvm *kvm = mmu_notifier_to_kvm(mn); 258 struct kvm *kvm = mmu_notifier_to_kvm(mn);
259 int idx;
255 260
261 idx = srcu_read_lock(&kvm->srcu);
256 spin_lock(&kvm->mmu_lock); 262 spin_lock(&kvm->mmu_lock);
257 kvm->mmu_notifier_seq++; 263 kvm->mmu_notifier_seq++;
258 kvm_set_spte_hva(kvm, address, pte); 264 kvm_set_spte_hva(kvm, address, pte);
259 spin_unlock(&kvm->mmu_lock); 265 spin_unlock(&kvm->mmu_lock);
266 srcu_read_unlock(&kvm->srcu, idx);
260} 267}
261 268
262static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 269static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -265,8 +272,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
265 unsigned long end) 272 unsigned long end)
266{ 273{
267 struct kvm *kvm = mmu_notifier_to_kvm(mn); 274 struct kvm *kvm = mmu_notifier_to_kvm(mn);
268 int need_tlb_flush = 0; 275 int need_tlb_flush = 0, idx;
269 276
277 idx = srcu_read_lock(&kvm->srcu);
270 spin_lock(&kvm->mmu_lock); 278 spin_lock(&kvm->mmu_lock);
271 /* 279 /*
272 * The count increase must become visible at unlock time as no 280 * The count increase must become visible at unlock time as no
@@ -277,6 +285,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
277 for (; start < end; start += PAGE_SIZE) 285 for (; start < end; start += PAGE_SIZE)
278 need_tlb_flush |= kvm_unmap_hva(kvm, start); 286 need_tlb_flush |= kvm_unmap_hva(kvm, start);
279 spin_unlock(&kvm->mmu_lock); 287 spin_unlock(&kvm->mmu_lock);
288 srcu_read_unlock(&kvm->srcu, idx);
280 289
281 /* we've to flush the tlb before the pages can be freed */ 290 /* we've to flush the tlb before the pages can be freed */
282 if (need_tlb_flush) 291 if (need_tlb_flush)
@@ -314,11 +323,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
314 unsigned long address) 323 unsigned long address)
315{ 324{
316 struct kvm *kvm = mmu_notifier_to_kvm(mn); 325 struct kvm *kvm = mmu_notifier_to_kvm(mn);
317 int young; 326 int young, idx;
318 327
328 idx = srcu_read_lock(&kvm->srcu);
319 spin_lock(&kvm->mmu_lock); 329 spin_lock(&kvm->mmu_lock);
320 young = kvm_age_hva(kvm, address); 330 young = kvm_age_hva(kvm, address);
321 spin_unlock(&kvm->mmu_lock); 331 spin_unlock(&kvm->mmu_lock);
332 srcu_read_unlock(&kvm->srcu, idx);
322 333
323 if (young) 334 if (young)
324 kvm_flush_remote_tlbs(kvm); 335 kvm_flush_remote_tlbs(kvm);
@@ -341,15 +352,26 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
341 .change_pte = kvm_mmu_notifier_change_pte, 352 .change_pte = kvm_mmu_notifier_change_pte,
342 .release = kvm_mmu_notifier_release, 353 .release = kvm_mmu_notifier_release,
343}; 354};
355
356static int kvm_init_mmu_notifier(struct kvm *kvm)
357{
358 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
359 return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
360}
361
362#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
363
364static int kvm_init_mmu_notifier(struct kvm *kvm)
365{
366 return 0;
367}
368
344#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 369#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
345 370
346static struct kvm *kvm_create_vm(void) 371static struct kvm *kvm_create_vm(void)
347{ 372{
348 int r = 0; 373 int r = 0, i;
349 struct kvm *kvm = kvm_arch_create_vm(); 374 struct kvm *kvm = kvm_arch_create_vm();
350#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
351 struct page *page;
352#endif
353 375
354 if (IS_ERR(kvm)) 376 if (IS_ERR(kvm))
355 goto out; 377 goto out;
@@ -363,39 +385,35 @@ static struct kvm *kvm_create_vm(void)
363 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 385 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
364#endif 386#endif
365 387
366#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 388 r = -ENOMEM;
367 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 389 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
368 if (!page) { 390 if (!kvm->memslots)
369 r = -ENOMEM;
370 goto out_err; 391 goto out_err;
371 } 392 if (init_srcu_struct(&kvm->srcu))
372 kvm->coalesced_mmio_ring = 393 goto out_err;
373 (struct kvm_coalesced_mmio_ring *)page_address(page); 394 for (i = 0; i < KVM_NR_BUSES; i++) {
374#endif 395 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
375 396 GFP_KERNEL);
376#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 397 if (!kvm->buses[i]) {
377 { 398 cleanup_srcu_struct(&kvm->srcu);
378 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
379 r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
380 if (r) {
381#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
382 put_page(page);
383#endif
384 goto out_err; 399 goto out_err;
385 } 400 }
386 } 401 }
387#endif 402
403 r = kvm_init_mmu_notifier(kvm);
404 if (r) {
405 cleanup_srcu_struct(&kvm->srcu);
406 goto out_err;
407 }
388 408
389 kvm->mm = current->mm; 409 kvm->mm = current->mm;
390 atomic_inc(&kvm->mm->mm_count); 410 atomic_inc(&kvm->mm->mm_count);
391 spin_lock_init(&kvm->mmu_lock); 411 spin_lock_init(&kvm->mmu_lock);
392 spin_lock_init(&kvm->requests_lock); 412 raw_spin_lock_init(&kvm->requests_lock);
393 kvm_io_bus_init(&kvm->pio_bus);
394 kvm_eventfd_init(kvm); 413 kvm_eventfd_init(kvm);
395 mutex_init(&kvm->lock); 414 mutex_init(&kvm->lock);
396 mutex_init(&kvm->irq_lock); 415 mutex_init(&kvm->irq_lock);
397 kvm_io_bus_init(&kvm->mmio_bus); 416 mutex_init(&kvm->slots_lock);
398 init_rwsem(&kvm->slots_lock);
399 atomic_set(&kvm->users_count, 1); 417 atomic_set(&kvm->users_count, 1);
400 spin_lock(&kvm_lock); 418 spin_lock(&kvm_lock);
401 list_add(&kvm->vm_list, &vm_list); 419 list_add(&kvm->vm_list, &vm_list);
@@ -406,12 +424,12 @@ static struct kvm *kvm_create_vm(void)
406out: 424out:
407 return kvm; 425 return kvm;
408 426
409#if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \
410 (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
411out_err: 427out_err:
412 hardware_disable_all(); 428 hardware_disable_all();
413#endif
414out_err_nodisable: 429out_err_nodisable:
430 for (i = 0; i < KVM_NR_BUSES; i++)
431 kfree(kvm->buses[i]);
432 kfree(kvm->memslots);
415 kfree(kvm); 433 kfree(kvm);
416 return ERR_PTR(r); 434 return ERR_PTR(r);
417} 435}
@@ -446,13 +464,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
446void kvm_free_physmem(struct kvm *kvm) 464void kvm_free_physmem(struct kvm *kvm)
447{ 465{
448 int i; 466 int i;
467 struct kvm_memslots *slots = kvm->memslots;
468
469 for (i = 0; i < slots->nmemslots; ++i)
470 kvm_free_physmem_slot(&slots->memslots[i], NULL);
449 471
450 for (i = 0; i < kvm->nmemslots; ++i) 472 kfree(kvm->memslots);
451 kvm_free_physmem_slot(&kvm->memslots[i], NULL);
452} 473}
453 474
454static void kvm_destroy_vm(struct kvm *kvm) 475static void kvm_destroy_vm(struct kvm *kvm)
455{ 476{
477 int i;
456 struct mm_struct *mm = kvm->mm; 478 struct mm_struct *mm = kvm->mm;
457 479
458 kvm_arch_sync_events(kvm); 480 kvm_arch_sync_events(kvm);
@@ -460,12 +482,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
460 list_del(&kvm->vm_list); 482 list_del(&kvm->vm_list);
461 spin_unlock(&kvm_lock); 483 spin_unlock(&kvm_lock);
462 kvm_free_irq_routing(kvm); 484 kvm_free_irq_routing(kvm);
463 kvm_io_bus_destroy(&kvm->pio_bus); 485 for (i = 0; i < KVM_NR_BUSES; i++)
464 kvm_io_bus_destroy(&kvm->mmio_bus); 486 kvm_io_bus_destroy(kvm->buses[i]);
465#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 487 kvm_coalesced_mmio_free(kvm);
466 if (kvm->coalesced_mmio_ring != NULL)
467 free_page((unsigned long)kvm->coalesced_mmio_ring);
468#endif
469#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 488#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
470 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 489 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
471#else 490#else
@@ -512,12 +531,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
512 struct kvm_userspace_memory_region *mem, 531 struct kvm_userspace_memory_region *mem,
513 int user_alloc) 532 int user_alloc)
514{ 533{
515 int r; 534 int r, flush_shadow = 0;
516 gfn_t base_gfn; 535 gfn_t base_gfn;
517 unsigned long npages; 536 unsigned long npages;
518 unsigned long i; 537 unsigned long i;
519 struct kvm_memory_slot *memslot; 538 struct kvm_memory_slot *memslot;
520 struct kvm_memory_slot old, new; 539 struct kvm_memory_slot old, new;
540 struct kvm_memslots *slots, *old_memslots;
521 541
522 r = -EINVAL; 542 r = -EINVAL;
523 /* General sanity checks */ 543 /* General sanity checks */
@@ -532,7 +552,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
532 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 552 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
533 goto out; 553 goto out;
534 554
535 memslot = &kvm->memslots[mem->slot]; 555 memslot = &kvm->memslots->memslots[mem->slot];
536 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 556 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
537 npages = mem->memory_size >> PAGE_SHIFT; 557 npages = mem->memory_size >> PAGE_SHIFT;
538 558
@@ -553,7 +573,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
553 /* Check for overlaps */ 573 /* Check for overlaps */
554 r = -EEXIST; 574 r = -EEXIST;
555 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 575 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
556 struct kvm_memory_slot *s = &kvm->memslots[i]; 576 struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
557 577
558 if (s == memslot || !s->npages) 578 if (s == memslot || !s->npages)
559 continue; 579 continue;
@@ -579,15 +599,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
579 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 599 memset(new.rmap, 0, npages * sizeof(*new.rmap));
580 600
581 new.user_alloc = user_alloc; 601 new.user_alloc = user_alloc;
582 /* 602 new.userspace_addr = mem->userspace_addr;
583 * hva_to_rmmap() serialzies with the mmu_lock and to be
584 * safe it has to ignore memslots with !user_alloc &&
585 * !userspace_addr.
586 */
587 if (user_alloc)
588 new.userspace_addr = mem->userspace_addr;
589 else
590 new.userspace_addr = 0;
591 } 603 }
592 if (!npages) 604 if (!npages)
593 goto skip_lpage; 605 goto skip_lpage;
@@ -642,8 +654,9 @@ skip_lpage:
642 if (!new.dirty_bitmap) 654 if (!new.dirty_bitmap)
643 goto out_free; 655 goto out_free;
644 memset(new.dirty_bitmap, 0, dirty_bytes); 656 memset(new.dirty_bitmap, 0, dirty_bytes);
657 /* destroy any largepage mappings for dirty tracking */
645 if (old.npages) 658 if (old.npages)
646 kvm_arch_flush_shadow(kvm); 659 flush_shadow = 1;
647 } 660 }
648#else /* not defined CONFIG_S390 */ 661#else /* not defined CONFIG_S390 */
649 new.user_alloc = user_alloc; 662 new.user_alloc = user_alloc;
@@ -651,36 +664,72 @@ skip_lpage:
651 new.userspace_addr = mem->userspace_addr; 664 new.userspace_addr = mem->userspace_addr;
652#endif /* not defined CONFIG_S390 */ 665#endif /* not defined CONFIG_S390 */
653 666
654 if (!npages) 667 if (!npages) {
668 r = -ENOMEM;
669 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
670 if (!slots)
671 goto out_free;
672 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
673 if (mem->slot >= slots->nmemslots)
674 slots->nmemslots = mem->slot + 1;
675 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
676
677 old_memslots = kvm->memslots;
678 rcu_assign_pointer(kvm->memslots, slots);
679 synchronize_srcu_expedited(&kvm->srcu);
680 /* From this point no new shadow pages pointing to a deleted
681 * memslot will be created.
682 *
683 * validation of sp->gfn happens in:
684 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
685 * - kvm_is_visible_gfn (mmu_check_roots)
686 */
655 kvm_arch_flush_shadow(kvm); 687 kvm_arch_flush_shadow(kvm);
688 kfree(old_memslots);
689 }
656 690
657 spin_lock(&kvm->mmu_lock); 691 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
658 if (mem->slot >= kvm->nmemslots) 692 if (r)
659 kvm->nmemslots = mem->slot + 1;
660
661 *memslot = new;
662 spin_unlock(&kvm->mmu_lock);
663
664 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
665 if (r) {
666 spin_lock(&kvm->mmu_lock);
667 *memslot = old;
668 spin_unlock(&kvm->mmu_lock);
669 goto out_free; 693 goto out_free;
670 }
671 694
672 kvm_free_physmem_slot(&old, npages ? &new : NULL);
673 /* Slot deletion case: we have to update the current slot */
674 spin_lock(&kvm->mmu_lock);
675 if (!npages)
676 *memslot = old;
677 spin_unlock(&kvm->mmu_lock);
678#ifdef CONFIG_DMAR 695#ifdef CONFIG_DMAR
679 /* map the pages in iommu page table */ 696 /* map the pages in iommu page table */
680 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 697 if (npages) {
681 if (r) 698 r = kvm_iommu_map_pages(kvm, &new);
682 goto out; 699 if (r)
700 goto out_free;
701 }
683#endif 702#endif
703
704 r = -ENOMEM;
705 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
706 if (!slots)
707 goto out_free;
708 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
709 if (mem->slot >= slots->nmemslots)
710 slots->nmemslots = mem->slot + 1;
711
712 /* actual memory is freed via old in kvm_free_physmem_slot below */
713 if (!npages) {
714 new.rmap = NULL;
715 new.dirty_bitmap = NULL;
716 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
717 new.lpage_info[i] = NULL;
718 }
719
720 slots->memslots[mem->slot] = new;
721 old_memslots = kvm->memslots;
722 rcu_assign_pointer(kvm->memslots, slots);
723 synchronize_srcu_expedited(&kvm->srcu);
724
725 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
726
727 kvm_free_physmem_slot(&old, &new);
728 kfree(old_memslots);
729
730 if (flush_shadow)
731 kvm_arch_flush_shadow(kvm);
732
684 return 0; 733 return 0;
685 734
686out_free: 735out_free:
@@ -697,9 +746,9 @@ int kvm_set_memory_region(struct kvm *kvm,
697{ 746{
698 int r; 747 int r;
699 748
700 down_write(&kvm->slots_lock); 749 mutex_lock(&kvm->slots_lock);
701 r = __kvm_set_memory_region(kvm, mem, user_alloc); 750 r = __kvm_set_memory_region(kvm, mem, user_alloc);
702 up_write(&kvm->slots_lock); 751 mutex_unlock(&kvm->slots_lock);
703 return r; 752 return r;
704} 753}
705EXPORT_SYMBOL_GPL(kvm_set_memory_region); 754EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -726,7 +775,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
726 if (log->slot >= KVM_MEMORY_SLOTS) 775 if (log->slot >= KVM_MEMORY_SLOTS)
727 goto out; 776 goto out;
728 777
729 memslot = &kvm->memslots[log->slot]; 778 memslot = &kvm->memslots->memslots[log->slot];
730 r = -ENOENT; 779 r = -ENOENT;
731 if (!memslot->dirty_bitmap) 780 if (!memslot->dirty_bitmap)
732 goto out; 781 goto out;
@@ -780,9 +829,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
780struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 829struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
781{ 830{
782 int i; 831 int i;
832 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
783 833
784 for (i = 0; i < kvm->nmemslots; ++i) { 834 for (i = 0; i < slots->nmemslots; ++i) {
785 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 835 struct kvm_memory_slot *memslot = &slots->memslots[i];
786 836
787 if (gfn >= memslot->base_gfn 837 if (gfn >= memslot->base_gfn
788 && gfn < memslot->base_gfn + memslot->npages) 838 && gfn < memslot->base_gfn + memslot->npages)
@@ -801,10 +851,14 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
801int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 851int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
802{ 852{
803 int i; 853 int i;
854 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
804 855
805 gfn = unalias_gfn(kvm, gfn); 856 gfn = unalias_gfn_instantiation(kvm, gfn);
806 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 857 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
807 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 858 struct kvm_memory_slot *memslot = &slots->memslots[i];
859
860 if (memslot->flags & KVM_MEMSLOT_INVALID)
861 continue;
808 862
809 if (gfn >= memslot->base_gfn 863 if (gfn >= memslot->base_gfn
810 && gfn < memslot->base_gfn + memslot->npages) 864 && gfn < memslot->base_gfn + memslot->npages)
@@ -814,33 +868,68 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
814} 868}
815EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 869EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
816 870
871unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
872{
873 struct vm_area_struct *vma;
874 unsigned long addr, size;
875
876 size = PAGE_SIZE;
877
878 addr = gfn_to_hva(kvm, gfn);
879 if (kvm_is_error_hva(addr))
880 return PAGE_SIZE;
881
882 down_read(&current->mm->mmap_sem);
883 vma = find_vma(current->mm, addr);
884 if (!vma)
885 goto out;
886
887 size = vma_kernel_pagesize(vma);
888
889out:
890 up_read(&current->mm->mmap_sem);
891
892 return size;
893}
894
895int memslot_id(struct kvm *kvm, gfn_t gfn)
896{
897 int i;
898 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
899 struct kvm_memory_slot *memslot = NULL;
900
901 gfn = unalias_gfn(kvm, gfn);
902 for (i = 0; i < slots->nmemslots; ++i) {
903 memslot = &slots->memslots[i];
904
905 if (gfn >= memslot->base_gfn
906 && gfn < memslot->base_gfn + memslot->npages)
907 break;
908 }
909
910 return memslot - slots->memslots;
911}
912
817unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 913unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
818{ 914{
819 struct kvm_memory_slot *slot; 915 struct kvm_memory_slot *slot;
820 916
821 gfn = unalias_gfn(kvm, gfn); 917 gfn = unalias_gfn_instantiation(kvm, gfn);
822 slot = gfn_to_memslot_unaliased(kvm, gfn); 918 slot = gfn_to_memslot_unaliased(kvm, gfn);
823 if (!slot) 919 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
824 return bad_hva(); 920 return bad_hva();
825 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 921 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
826} 922}
827EXPORT_SYMBOL_GPL(gfn_to_hva); 923EXPORT_SYMBOL_GPL(gfn_to_hva);
828 924
829pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 925static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
830{ 926{
831 struct page *page[1]; 927 struct page *page[1];
832 unsigned long addr;
833 int npages; 928 int npages;
834 pfn_t pfn; 929 pfn_t pfn;
835 930
836 might_sleep(); 931 might_sleep();
837 932
838 addr = gfn_to_hva(kvm, gfn);
839 if (kvm_is_error_hva(addr)) {
840 get_page(bad_page);
841 return page_to_pfn(bad_page);
842 }
843
844 npages = get_user_pages_fast(addr, 1, 1, page); 933 npages = get_user_pages_fast(addr, 1, 1, page);
845 934
846 if (unlikely(npages != 1)) { 935 if (unlikely(npages != 1)) {
@@ -865,8 +954,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
865 return pfn; 954 return pfn;
866} 955}
867 956
957pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
958{
959 unsigned long addr;
960
961 addr = gfn_to_hva(kvm, gfn);
962 if (kvm_is_error_hva(addr)) {
963 get_page(bad_page);
964 return page_to_pfn(bad_page);
965 }
966
967 return hva_to_pfn(kvm, addr);
968}
868EXPORT_SYMBOL_GPL(gfn_to_pfn); 969EXPORT_SYMBOL_GPL(gfn_to_pfn);
869 970
971static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
972{
973 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
974}
975
976pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
977 struct kvm_memory_slot *slot, gfn_t gfn)
978{
979 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
980 return hva_to_pfn(kvm, addr);
981}
982
870struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 983struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
871{ 984{
872 pfn_t pfn; 985 pfn_t pfn;
@@ -1854,12 +1967,7 @@ static struct notifier_block kvm_reboot_notifier = {
1854 .priority = 0, 1967 .priority = 0,
1855}; 1968};
1856 1969
1857void kvm_io_bus_init(struct kvm_io_bus *bus) 1970static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1858{
1859 memset(bus, 0, sizeof(*bus));
1860}
1861
1862void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1863{ 1971{
1864 int i; 1972 int i;
1865 1973
@@ -1868,13 +1976,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1868 1976
1869 kvm_iodevice_destructor(pos); 1977 kvm_iodevice_destructor(pos);
1870 } 1978 }
1979 kfree(bus);
1871} 1980}
1872 1981
1873/* kvm_io_bus_write - called under kvm->slots_lock */ 1982/* kvm_io_bus_write - called under kvm->slots_lock */
1874int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, 1983int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1875 int len, const void *val) 1984 int len, const void *val)
1876{ 1985{
1877 int i; 1986 int i;
1987 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
1878 for (i = 0; i < bus->dev_count; i++) 1988 for (i = 0; i < bus->dev_count; i++)
1879 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 1989 if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
1880 return 0; 1990 return 0;
@@ -1882,59 +1992,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
1882} 1992}
1883 1993
1884/* kvm_io_bus_read - called under kvm->slots_lock */ 1994/* kvm_io_bus_read - called under kvm->slots_lock */
1885int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) 1995int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1996 int len, void *val)
1886{ 1997{
1887 int i; 1998 int i;
1999 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
2000
1888 for (i = 0; i < bus->dev_count; i++) 2001 for (i = 0; i < bus->dev_count; i++)
1889 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2002 if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
1890 return 0; 2003 return 0;
1891 return -EOPNOTSUPP; 2004 return -EOPNOTSUPP;
1892} 2005}
1893 2006
1894int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, 2007/* Caller must hold slots_lock. */
1895 struct kvm_io_device *dev) 2008int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2009 struct kvm_io_device *dev)
1896{ 2010{
1897 int ret; 2011 struct kvm_io_bus *new_bus, *bus;
1898
1899 down_write(&kvm->slots_lock);
1900 ret = __kvm_io_bus_register_dev(bus, dev);
1901 up_write(&kvm->slots_lock);
1902 2012
1903 return ret; 2013 bus = kvm->buses[bus_idx];
1904}
1905
1906/* An unlocked version. Caller must have write lock on slots_lock. */
1907int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
1908 struct kvm_io_device *dev)
1909{
1910 if (bus->dev_count > NR_IOBUS_DEVS-1) 2014 if (bus->dev_count > NR_IOBUS_DEVS-1)
1911 return -ENOSPC; 2015 return -ENOSPC;
1912 2016
1913 bus->devs[bus->dev_count++] = dev; 2017 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2018 if (!new_bus)
2019 return -ENOMEM;
2020 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2021 new_bus->devs[new_bus->dev_count++] = dev;
2022 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2023 synchronize_srcu_expedited(&kvm->srcu);
2024 kfree(bus);
1914 2025
1915 return 0; 2026 return 0;
1916} 2027}
1917 2028
1918void kvm_io_bus_unregister_dev(struct kvm *kvm, 2029/* Caller must hold slots_lock. */
1919 struct kvm_io_bus *bus, 2030int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
1920 struct kvm_io_device *dev) 2031 struct kvm_io_device *dev)
1921{ 2032{
1922 down_write(&kvm->slots_lock); 2033 int i, r;
1923 __kvm_io_bus_unregister_dev(bus, dev); 2034 struct kvm_io_bus *new_bus, *bus;
1924 up_write(&kvm->slots_lock);
1925}
1926 2035
1927/* An unlocked version. Caller must have write lock on slots_lock. */ 2036 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
1928void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, 2037 if (!new_bus)
1929 struct kvm_io_device *dev) 2038 return -ENOMEM;
1930{
1931 int i;
1932 2039
1933 for (i = 0; i < bus->dev_count; i++) 2040 bus = kvm->buses[bus_idx];
1934 if (bus->devs[i] == dev) { 2041 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
1935 bus->devs[i] = bus->devs[--bus->dev_count]; 2042
2043 r = -ENOENT;
2044 for (i = 0; i < new_bus->dev_count; i++)
2045 if (new_bus->devs[i] == dev) {
2046 r = 0;
2047 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
1936 break; 2048 break;
1937 } 2049 }
2050
2051 if (r) {
2052 kfree(new_bus);
2053 return r;
2054 }
2055
2056 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2057 synchronize_srcu_expedited(&kvm->srcu);
2058 kfree(bus);
2059 return r;
1938} 2060}
1939 2061
1940static struct notifier_block kvm_cpu_notifier = { 2062static struct notifier_block kvm_cpu_notifier = {