diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 392 |
1 files changed, 257 insertions, 135 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a944be392d6..548f9253c19 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -44,6 +44,8 @@ | |||
44 | #include <linux/bitops.h> | 44 | #include <linux/bitops.h> |
45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
46 | #include <linux/compat.h> | 46 | #include <linux/compat.h> |
47 | #include <linux/srcu.h> | ||
48 | #include <linux/hugetlb.h> | ||
47 | 49 | ||
48 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
49 | #include <asm/io.h> | 51 | #include <asm/io.h> |
@@ -51,9 +53,7 @@ | |||
51 | #include <asm/pgtable.h> | 53 | #include <asm/pgtable.h> |
52 | #include <asm-generic/bitops/le.h> | 54 | #include <asm-generic/bitops/le.h> |
53 | 55 | ||
54 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
55 | #include "coalesced_mmio.h" | 56 | #include "coalesced_mmio.h" |
56 | #endif | ||
57 | 57 | ||
58 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
59 | #include <trace/events/kvm.h> | 59 | #include <trace/events/kvm.h> |
@@ -86,6 +86,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | |||
86 | static int hardware_enable_all(void); | 86 | static int hardware_enable_all(void); |
87 | static void hardware_disable_all(void); | 87 | static void hardware_disable_all(void); |
88 | 88 | ||
89 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); | ||
90 | |||
89 | static bool kvm_rebooting; | 91 | static bool kvm_rebooting; |
90 | 92 | ||
91 | static bool largepages_enabled = true; | 93 | static bool largepages_enabled = true; |
@@ -136,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
136 | 138 | ||
137 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); | 139 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); |
138 | 140 | ||
139 | spin_lock(&kvm->requests_lock); | 141 | raw_spin_lock(&kvm->requests_lock); |
140 | me = smp_processor_id(); | 142 | me = smp_processor_id(); |
141 | kvm_for_each_vcpu(i, vcpu, kvm) { | 143 | kvm_for_each_vcpu(i, vcpu, kvm) { |
142 | if (test_and_set_bit(req, &vcpu->requests)) | 144 | if (test_and_set_bit(req, &vcpu->requests)) |
@@ -151,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
151 | smp_call_function_many(cpus, ack_flush, NULL, 1); | 153 | smp_call_function_many(cpus, ack_flush, NULL, 1); |
152 | else | 154 | else |
153 | called = false; | 155 | called = false; |
154 | spin_unlock(&kvm->requests_lock); | 156 | raw_spin_unlock(&kvm->requests_lock); |
155 | free_cpumask_var(cpus); | 157 | free_cpumask_var(cpus); |
156 | return called; | 158 | return called; |
157 | } | 159 | } |
@@ -215,7 +217,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
215 | unsigned long address) | 217 | unsigned long address) |
216 | { | 218 | { |
217 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 219 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
218 | int need_tlb_flush; | 220 | int need_tlb_flush, idx; |
219 | 221 | ||
220 | /* | 222 | /* |
221 | * When ->invalidate_page runs, the linux pte has been zapped | 223 | * When ->invalidate_page runs, the linux pte has been zapped |
@@ -235,10 +237,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
235 | * pte after kvm_unmap_hva returned, without noticing the page | 237 | * pte after kvm_unmap_hva returned, without noticing the page |
236 | * is going to be freed. | 238 | * is going to be freed. |
237 | */ | 239 | */ |
240 | idx = srcu_read_lock(&kvm->srcu); | ||
238 | spin_lock(&kvm->mmu_lock); | 241 | spin_lock(&kvm->mmu_lock); |
239 | kvm->mmu_notifier_seq++; | 242 | kvm->mmu_notifier_seq++; |
240 | need_tlb_flush = kvm_unmap_hva(kvm, address); | 243 | need_tlb_flush = kvm_unmap_hva(kvm, address); |
241 | spin_unlock(&kvm->mmu_lock); | 244 | spin_unlock(&kvm->mmu_lock); |
245 | srcu_read_unlock(&kvm->srcu, idx); | ||
242 | 246 | ||
243 | /* we've to flush the tlb before the pages can be freed */ | 247 | /* we've to flush the tlb before the pages can be freed */ |
244 | if (need_tlb_flush) | 248 | if (need_tlb_flush) |
@@ -252,11 +256,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | |||
252 | pte_t pte) | 256 | pte_t pte) |
253 | { | 257 | { |
254 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 258 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
259 | int idx; | ||
255 | 260 | ||
261 | idx = srcu_read_lock(&kvm->srcu); | ||
256 | spin_lock(&kvm->mmu_lock); | 262 | spin_lock(&kvm->mmu_lock); |
257 | kvm->mmu_notifier_seq++; | 263 | kvm->mmu_notifier_seq++; |
258 | kvm_set_spte_hva(kvm, address, pte); | 264 | kvm_set_spte_hva(kvm, address, pte); |
259 | spin_unlock(&kvm->mmu_lock); | 265 | spin_unlock(&kvm->mmu_lock); |
266 | srcu_read_unlock(&kvm->srcu, idx); | ||
260 | } | 267 | } |
261 | 268 | ||
262 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | 269 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, |
@@ -265,8 +272,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
265 | unsigned long end) | 272 | unsigned long end) |
266 | { | 273 | { |
267 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 274 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
268 | int need_tlb_flush = 0; | 275 | int need_tlb_flush = 0, idx; |
269 | 276 | ||
277 | idx = srcu_read_lock(&kvm->srcu); | ||
270 | spin_lock(&kvm->mmu_lock); | 278 | spin_lock(&kvm->mmu_lock); |
271 | /* | 279 | /* |
272 | * The count increase must become visible at unlock time as no | 280 | * The count increase must become visible at unlock time as no |
@@ -277,6 +285,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
277 | for (; start < end; start += PAGE_SIZE) | 285 | for (; start < end; start += PAGE_SIZE) |
278 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | 286 | need_tlb_flush |= kvm_unmap_hva(kvm, start); |
279 | spin_unlock(&kvm->mmu_lock); | 287 | spin_unlock(&kvm->mmu_lock); |
288 | srcu_read_unlock(&kvm->srcu, idx); | ||
280 | 289 | ||
281 | /* we've to flush the tlb before the pages can be freed */ | 290 | /* we've to flush the tlb before the pages can be freed */ |
282 | if (need_tlb_flush) | 291 | if (need_tlb_flush) |
@@ -314,11 +323,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
314 | unsigned long address) | 323 | unsigned long address) |
315 | { | 324 | { |
316 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 325 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
317 | int young; | 326 | int young, idx; |
318 | 327 | ||
328 | idx = srcu_read_lock(&kvm->srcu); | ||
319 | spin_lock(&kvm->mmu_lock); | 329 | spin_lock(&kvm->mmu_lock); |
320 | young = kvm_age_hva(kvm, address); | 330 | young = kvm_age_hva(kvm, address); |
321 | spin_unlock(&kvm->mmu_lock); | 331 | spin_unlock(&kvm->mmu_lock); |
332 | srcu_read_unlock(&kvm->srcu, idx); | ||
322 | 333 | ||
323 | if (young) | 334 | if (young) |
324 | kvm_flush_remote_tlbs(kvm); | 335 | kvm_flush_remote_tlbs(kvm); |
@@ -341,15 +352,26 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
341 | .change_pte = kvm_mmu_notifier_change_pte, | 352 | .change_pte = kvm_mmu_notifier_change_pte, |
342 | .release = kvm_mmu_notifier_release, | 353 | .release = kvm_mmu_notifier_release, |
343 | }; | 354 | }; |
355 | |||
356 | static int kvm_init_mmu_notifier(struct kvm *kvm) | ||
357 | { | ||
358 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | ||
359 | return mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
360 | } | ||
361 | |||
362 | #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ | ||
363 | |||
364 | static int kvm_init_mmu_notifier(struct kvm *kvm) | ||
365 | { | ||
366 | return 0; | ||
367 | } | ||
368 | |||
344 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 369 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
345 | 370 | ||
346 | static struct kvm *kvm_create_vm(void) | 371 | static struct kvm *kvm_create_vm(void) |
347 | { | 372 | { |
348 | int r = 0; | 373 | int r = 0, i; |
349 | struct kvm *kvm = kvm_arch_create_vm(); | 374 | struct kvm *kvm = kvm_arch_create_vm(); |
350 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
351 | struct page *page; | ||
352 | #endif | ||
353 | 375 | ||
354 | if (IS_ERR(kvm)) | 376 | if (IS_ERR(kvm)) |
355 | goto out; | 377 | goto out; |
@@ -363,39 +385,35 @@ static struct kvm *kvm_create_vm(void) | |||
363 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | 385 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); |
364 | #endif | 386 | #endif |
365 | 387 | ||
366 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 388 | r = -ENOMEM; |
367 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 389 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
368 | if (!page) { | 390 | if (!kvm->memslots) |
369 | r = -ENOMEM; | ||
370 | goto out_err; | 391 | goto out_err; |
371 | } | 392 | if (init_srcu_struct(&kvm->srcu)) |
372 | kvm->coalesced_mmio_ring = | 393 | goto out_err; |
373 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 394 | for (i = 0; i < KVM_NR_BUSES; i++) { |
374 | #endif | 395 | kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), |
375 | 396 | GFP_KERNEL); | |
376 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 397 | if (!kvm->buses[i]) { |
377 | { | 398 | cleanup_srcu_struct(&kvm->srcu); |
378 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | ||
379 | r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
380 | if (r) { | ||
381 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
382 | put_page(page); | ||
383 | #endif | ||
384 | goto out_err; | 399 | goto out_err; |
385 | } | 400 | } |
386 | } | 401 | } |
387 | #endif | 402 | |
403 | r = kvm_init_mmu_notifier(kvm); | ||
404 | if (r) { | ||
405 | cleanup_srcu_struct(&kvm->srcu); | ||
406 | goto out_err; | ||
407 | } | ||
388 | 408 | ||
389 | kvm->mm = current->mm; | 409 | kvm->mm = current->mm; |
390 | atomic_inc(&kvm->mm->mm_count); | 410 | atomic_inc(&kvm->mm->mm_count); |
391 | spin_lock_init(&kvm->mmu_lock); | 411 | spin_lock_init(&kvm->mmu_lock); |
392 | spin_lock_init(&kvm->requests_lock); | 412 | raw_spin_lock_init(&kvm->requests_lock); |
393 | kvm_io_bus_init(&kvm->pio_bus); | ||
394 | kvm_eventfd_init(kvm); | 413 | kvm_eventfd_init(kvm); |
395 | mutex_init(&kvm->lock); | 414 | mutex_init(&kvm->lock); |
396 | mutex_init(&kvm->irq_lock); | 415 | mutex_init(&kvm->irq_lock); |
397 | kvm_io_bus_init(&kvm->mmio_bus); | 416 | mutex_init(&kvm->slots_lock); |
398 | init_rwsem(&kvm->slots_lock); | ||
399 | atomic_set(&kvm->users_count, 1); | 417 | atomic_set(&kvm->users_count, 1); |
400 | spin_lock(&kvm_lock); | 418 | spin_lock(&kvm_lock); |
401 | list_add(&kvm->vm_list, &vm_list); | 419 | list_add(&kvm->vm_list, &vm_list); |
@@ -406,12 +424,12 @@ static struct kvm *kvm_create_vm(void) | |||
406 | out: | 424 | out: |
407 | return kvm; | 425 | return kvm; |
408 | 426 | ||
409 | #if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \ | ||
410 | (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) | ||
411 | out_err: | 427 | out_err: |
412 | hardware_disable_all(); | 428 | hardware_disable_all(); |
413 | #endif | ||
414 | out_err_nodisable: | 429 | out_err_nodisable: |
430 | for (i = 0; i < KVM_NR_BUSES; i++) | ||
431 | kfree(kvm->buses[i]); | ||
432 | kfree(kvm->memslots); | ||
415 | kfree(kvm); | 433 | kfree(kvm); |
416 | return ERR_PTR(r); | 434 | return ERR_PTR(r); |
417 | } | 435 | } |
@@ -446,13 +464,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
446 | void kvm_free_physmem(struct kvm *kvm) | 464 | void kvm_free_physmem(struct kvm *kvm) |
447 | { | 465 | { |
448 | int i; | 466 | int i; |
467 | struct kvm_memslots *slots = kvm->memslots; | ||
468 | |||
469 | for (i = 0; i < slots->nmemslots; ++i) | ||
470 | kvm_free_physmem_slot(&slots->memslots[i], NULL); | ||
449 | 471 | ||
450 | for (i = 0; i < kvm->nmemslots; ++i) | 472 | kfree(kvm->memslots); |
451 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); | ||
452 | } | 473 | } |
453 | 474 | ||
454 | static void kvm_destroy_vm(struct kvm *kvm) | 475 | static void kvm_destroy_vm(struct kvm *kvm) |
455 | { | 476 | { |
477 | int i; | ||
456 | struct mm_struct *mm = kvm->mm; | 478 | struct mm_struct *mm = kvm->mm; |
457 | 479 | ||
458 | kvm_arch_sync_events(kvm); | 480 | kvm_arch_sync_events(kvm); |
@@ -460,12 +482,9 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
460 | list_del(&kvm->vm_list); | 482 | list_del(&kvm->vm_list); |
461 | spin_unlock(&kvm_lock); | 483 | spin_unlock(&kvm_lock); |
462 | kvm_free_irq_routing(kvm); | 484 | kvm_free_irq_routing(kvm); |
463 | kvm_io_bus_destroy(&kvm->pio_bus); | 485 | for (i = 0; i < KVM_NR_BUSES; i++) |
464 | kvm_io_bus_destroy(&kvm->mmio_bus); | 486 | kvm_io_bus_destroy(kvm->buses[i]); |
465 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 487 | kvm_coalesced_mmio_free(kvm); |
466 | if (kvm->coalesced_mmio_ring != NULL) | ||
467 | free_page((unsigned long)kvm->coalesced_mmio_ring); | ||
468 | #endif | ||
469 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 488 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
470 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | 489 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
471 | #else | 490 | #else |
@@ -512,12 +531,13 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
512 | struct kvm_userspace_memory_region *mem, | 531 | struct kvm_userspace_memory_region *mem, |
513 | int user_alloc) | 532 | int user_alloc) |
514 | { | 533 | { |
515 | int r; | 534 | int r, flush_shadow = 0; |
516 | gfn_t base_gfn; | 535 | gfn_t base_gfn; |
517 | unsigned long npages; | 536 | unsigned long npages; |
518 | unsigned long i; | 537 | unsigned long i; |
519 | struct kvm_memory_slot *memslot; | 538 | struct kvm_memory_slot *memslot; |
520 | struct kvm_memory_slot old, new; | 539 | struct kvm_memory_slot old, new; |
540 | struct kvm_memslots *slots, *old_memslots; | ||
521 | 541 | ||
522 | r = -EINVAL; | 542 | r = -EINVAL; |
523 | /* General sanity checks */ | 543 | /* General sanity checks */ |
@@ -532,7 +552,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
532 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) | 552 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) |
533 | goto out; | 553 | goto out; |
534 | 554 | ||
535 | memslot = &kvm->memslots[mem->slot]; | 555 | memslot = &kvm->memslots->memslots[mem->slot]; |
536 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 556 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
537 | npages = mem->memory_size >> PAGE_SHIFT; | 557 | npages = mem->memory_size >> PAGE_SHIFT; |
538 | 558 | ||
@@ -553,7 +573,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
553 | /* Check for overlaps */ | 573 | /* Check for overlaps */ |
554 | r = -EEXIST; | 574 | r = -EEXIST; |
555 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 575 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
556 | struct kvm_memory_slot *s = &kvm->memslots[i]; | 576 | struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; |
557 | 577 | ||
558 | if (s == memslot || !s->npages) | 578 | if (s == memslot || !s->npages) |
559 | continue; | 579 | continue; |
@@ -579,15 +599,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
579 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); | 599 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); |
580 | 600 | ||
581 | new.user_alloc = user_alloc; | 601 | new.user_alloc = user_alloc; |
582 | /* | 602 | new.userspace_addr = mem->userspace_addr; |
583 | * hva_to_rmmap() serialzies with the mmu_lock and to be | ||
584 | * safe it has to ignore memslots with !user_alloc && | ||
585 | * !userspace_addr. | ||
586 | */ | ||
587 | if (user_alloc) | ||
588 | new.userspace_addr = mem->userspace_addr; | ||
589 | else | ||
590 | new.userspace_addr = 0; | ||
591 | } | 603 | } |
592 | if (!npages) | 604 | if (!npages) |
593 | goto skip_lpage; | 605 | goto skip_lpage; |
@@ -642,8 +654,9 @@ skip_lpage: | |||
642 | if (!new.dirty_bitmap) | 654 | if (!new.dirty_bitmap) |
643 | goto out_free; | 655 | goto out_free; |
644 | memset(new.dirty_bitmap, 0, dirty_bytes); | 656 | memset(new.dirty_bitmap, 0, dirty_bytes); |
657 | /* destroy any largepage mappings for dirty tracking */ | ||
645 | if (old.npages) | 658 | if (old.npages) |
646 | kvm_arch_flush_shadow(kvm); | 659 | flush_shadow = 1; |
647 | } | 660 | } |
648 | #else /* not defined CONFIG_S390 */ | 661 | #else /* not defined CONFIG_S390 */ |
649 | new.user_alloc = user_alloc; | 662 | new.user_alloc = user_alloc; |
@@ -651,36 +664,72 @@ skip_lpage: | |||
651 | new.userspace_addr = mem->userspace_addr; | 664 | new.userspace_addr = mem->userspace_addr; |
652 | #endif /* not defined CONFIG_S390 */ | 665 | #endif /* not defined CONFIG_S390 */ |
653 | 666 | ||
654 | if (!npages) | 667 | if (!npages) { |
668 | r = -ENOMEM; | ||
669 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
670 | if (!slots) | ||
671 | goto out_free; | ||
672 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
673 | if (mem->slot >= slots->nmemslots) | ||
674 | slots->nmemslots = mem->slot + 1; | ||
675 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; | ||
676 | |||
677 | old_memslots = kvm->memslots; | ||
678 | rcu_assign_pointer(kvm->memslots, slots); | ||
679 | synchronize_srcu_expedited(&kvm->srcu); | ||
680 | /* From this point no new shadow pages pointing to a deleted | ||
681 | * memslot will be created. | ||
682 | * | ||
683 | * validation of sp->gfn happens in: | ||
684 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) | ||
685 | * - kvm_is_visible_gfn (mmu_check_roots) | ||
686 | */ | ||
655 | kvm_arch_flush_shadow(kvm); | 687 | kvm_arch_flush_shadow(kvm); |
688 | kfree(old_memslots); | ||
689 | } | ||
656 | 690 | ||
657 | spin_lock(&kvm->mmu_lock); | 691 | r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); |
658 | if (mem->slot >= kvm->nmemslots) | 692 | if (r) |
659 | kvm->nmemslots = mem->slot + 1; | ||
660 | |||
661 | *memslot = new; | ||
662 | spin_unlock(&kvm->mmu_lock); | ||
663 | |||
664 | r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); | ||
665 | if (r) { | ||
666 | spin_lock(&kvm->mmu_lock); | ||
667 | *memslot = old; | ||
668 | spin_unlock(&kvm->mmu_lock); | ||
669 | goto out_free; | 693 | goto out_free; |
670 | } | ||
671 | 694 | ||
672 | kvm_free_physmem_slot(&old, npages ? &new : NULL); | ||
673 | /* Slot deletion case: we have to update the current slot */ | ||
674 | spin_lock(&kvm->mmu_lock); | ||
675 | if (!npages) | ||
676 | *memslot = old; | ||
677 | spin_unlock(&kvm->mmu_lock); | ||
678 | #ifdef CONFIG_DMAR | 695 | #ifdef CONFIG_DMAR |
679 | /* map the pages in iommu page table */ | 696 | /* map the pages in iommu page table */ |
680 | r = kvm_iommu_map_pages(kvm, base_gfn, npages); | 697 | if (npages) { |
681 | if (r) | 698 | r = kvm_iommu_map_pages(kvm, &new); |
682 | goto out; | 699 | if (r) |
700 | goto out_free; | ||
701 | } | ||
683 | #endif | 702 | #endif |
703 | |||
704 | r = -ENOMEM; | ||
705 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
706 | if (!slots) | ||
707 | goto out_free; | ||
708 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
709 | if (mem->slot >= slots->nmemslots) | ||
710 | slots->nmemslots = mem->slot + 1; | ||
711 | |||
712 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | ||
713 | if (!npages) { | ||
714 | new.rmap = NULL; | ||
715 | new.dirty_bitmap = NULL; | ||
716 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) | ||
717 | new.lpage_info[i] = NULL; | ||
718 | } | ||
719 | |||
720 | slots->memslots[mem->slot] = new; | ||
721 | old_memslots = kvm->memslots; | ||
722 | rcu_assign_pointer(kvm->memslots, slots); | ||
723 | synchronize_srcu_expedited(&kvm->srcu); | ||
724 | |||
725 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); | ||
726 | |||
727 | kvm_free_physmem_slot(&old, &new); | ||
728 | kfree(old_memslots); | ||
729 | |||
730 | if (flush_shadow) | ||
731 | kvm_arch_flush_shadow(kvm); | ||
732 | |||
684 | return 0; | 733 | return 0; |
685 | 734 | ||
686 | out_free: | 735 | out_free: |
@@ -697,9 +746,9 @@ int kvm_set_memory_region(struct kvm *kvm, | |||
697 | { | 746 | { |
698 | int r; | 747 | int r; |
699 | 748 | ||
700 | down_write(&kvm->slots_lock); | 749 | mutex_lock(&kvm->slots_lock); |
701 | r = __kvm_set_memory_region(kvm, mem, user_alloc); | 750 | r = __kvm_set_memory_region(kvm, mem, user_alloc); |
702 | up_write(&kvm->slots_lock); | 751 | mutex_unlock(&kvm->slots_lock); |
703 | return r; | 752 | return r; |
704 | } | 753 | } |
705 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); | 754 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); |
@@ -726,7 +775,7 @@ int kvm_get_dirty_log(struct kvm *kvm, | |||
726 | if (log->slot >= KVM_MEMORY_SLOTS) | 775 | if (log->slot >= KVM_MEMORY_SLOTS) |
727 | goto out; | 776 | goto out; |
728 | 777 | ||
729 | memslot = &kvm->memslots[log->slot]; | 778 | memslot = &kvm->memslots->memslots[log->slot]; |
730 | r = -ENOENT; | 779 | r = -ENOENT; |
731 | if (!memslot->dirty_bitmap) | 780 | if (!memslot->dirty_bitmap) |
732 | goto out; | 781 | goto out; |
@@ -780,9 +829,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); | |||
780 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | 829 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) |
781 | { | 830 | { |
782 | int i; | 831 | int i; |
832 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
783 | 833 | ||
784 | for (i = 0; i < kvm->nmemslots; ++i) { | 834 | for (i = 0; i < slots->nmemslots; ++i) { |
785 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | 835 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
786 | 836 | ||
787 | if (gfn >= memslot->base_gfn | 837 | if (gfn >= memslot->base_gfn |
788 | && gfn < memslot->base_gfn + memslot->npages) | 838 | && gfn < memslot->base_gfn + memslot->npages) |
@@ -801,10 +851,14 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
801 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 851 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
802 | { | 852 | { |
803 | int i; | 853 | int i; |
854 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
804 | 855 | ||
805 | gfn = unalias_gfn(kvm, gfn); | 856 | gfn = unalias_gfn_instantiation(kvm, gfn); |
806 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 857 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
807 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | 858 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
859 | |||
860 | if (memslot->flags & KVM_MEMSLOT_INVALID) | ||
861 | continue; | ||
808 | 862 | ||
809 | if (gfn >= memslot->base_gfn | 863 | if (gfn >= memslot->base_gfn |
810 | && gfn < memslot->base_gfn + memslot->npages) | 864 | && gfn < memslot->base_gfn + memslot->npages) |
@@ -814,33 +868,68 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | |||
814 | } | 868 | } |
815 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 869 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
816 | 870 | ||
871 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) | ||
872 | { | ||
873 | struct vm_area_struct *vma; | ||
874 | unsigned long addr, size; | ||
875 | |||
876 | size = PAGE_SIZE; | ||
877 | |||
878 | addr = gfn_to_hva(kvm, gfn); | ||
879 | if (kvm_is_error_hva(addr)) | ||
880 | return PAGE_SIZE; | ||
881 | |||
882 | down_read(¤t->mm->mmap_sem); | ||
883 | vma = find_vma(current->mm, addr); | ||
884 | if (!vma) | ||
885 | goto out; | ||
886 | |||
887 | size = vma_kernel_pagesize(vma); | ||
888 | |||
889 | out: | ||
890 | up_read(¤t->mm->mmap_sem); | ||
891 | |||
892 | return size; | ||
893 | } | ||
894 | |||
895 | int memslot_id(struct kvm *kvm, gfn_t gfn) | ||
896 | { | ||
897 | int i; | ||
898 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
899 | struct kvm_memory_slot *memslot = NULL; | ||
900 | |||
901 | gfn = unalias_gfn(kvm, gfn); | ||
902 | for (i = 0; i < slots->nmemslots; ++i) { | ||
903 | memslot = &slots->memslots[i]; | ||
904 | |||
905 | if (gfn >= memslot->base_gfn | ||
906 | && gfn < memslot->base_gfn + memslot->npages) | ||
907 | break; | ||
908 | } | ||
909 | |||
910 | return memslot - slots->memslots; | ||
911 | } | ||
912 | |||
817 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 913 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
818 | { | 914 | { |
819 | struct kvm_memory_slot *slot; | 915 | struct kvm_memory_slot *slot; |
820 | 916 | ||
821 | gfn = unalias_gfn(kvm, gfn); | 917 | gfn = unalias_gfn_instantiation(kvm, gfn); |
822 | slot = gfn_to_memslot_unaliased(kvm, gfn); | 918 | slot = gfn_to_memslot_unaliased(kvm, gfn); |
823 | if (!slot) | 919 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
824 | return bad_hva(); | 920 | return bad_hva(); |
825 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | 921 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); |
826 | } | 922 | } |
827 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 923 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
828 | 924 | ||
829 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | 925 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) |
830 | { | 926 | { |
831 | struct page *page[1]; | 927 | struct page *page[1]; |
832 | unsigned long addr; | ||
833 | int npages; | 928 | int npages; |
834 | pfn_t pfn; | 929 | pfn_t pfn; |
835 | 930 | ||
836 | might_sleep(); | 931 | might_sleep(); |
837 | 932 | ||
838 | addr = gfn_to_hva(kvm, gfn); | ||
839 | if (kvm_is_error_hva(addr)) { | ||
840 | get_page(bad_page); | ||
841 | return page_to_pfn(bad_page); | ||
842 | } | ||
843 | |||
844 | npages = get_user_pages_fast(addr, 1, 1, page); | 933 | npages = get_user_pages_fast(addr, 1, 1, page); |
845 | 934 | ||
846 | if (unlikely(npages != 1)) { | 935 | if (unlikely(npages != 1)) { |
@@ -865,8 +954,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
865 | return pfn; | 954 | return pfn; |
866 | } | 955 | } |
867 | 956 | ||
957 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | ||
958 | { | ||
959 | unsigned long addr; | ||
960 | |||
961 | addr = gfn_to_hva(kvm, gfn); | ||
962 | if (kvm_is_error_hva(addr)) { | ||
963 | get_page(bad_page); | ||
964 | return page_to_pfn(bad_page); | ||
965 | } | ||
966 | |||
967 | return hva_to_pfn(kvm, addr); | ||
968 | } | ||
868 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 969 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
869 | 970 | ||
971 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
972 | { | ||
973 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | ||
974 | } | ||
975 | |||
976 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | ||
977 | struct kvm_memory_slot *slot, gfn_t gfn) | ||
978 | { | ||
979 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); | ||
980 | return hva_to_pfn(kvm, addr); | ||
981 | } | ||
982 | |||
870 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 983 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) |
871 | { | 984 | { |
872 | pfn_t pfn; | 985 | pfn_t pfn; |
@@ -1854,12 +1967,7 @@ static struct notifier_block kvm_reboot_notifier = { | |||
1854 | .priority = 0, | 1967 | .priority = 0, |
1855 | }; | 1968 | }; |
1856 | 1969 | ||
1857 | void kvm_io_bus_init(struct kvm_io_bus *bus) | 1970 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus) |
1858 | { | ||
1859 | memset(bus, 0, sizeof(*bus)); | ||
1860 | } | ||
1861 | |||
1862 | void kvm_io_bus_destroy(struct kvm_io_bus *bus) | ||
1863 | { | 1971 | { |
1864 | int i; | 1972 | int i; |
1865 | 1973 | ||
@@ -1868,13 +1976,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
1868 | 1976 | ||
1869 | kvm_iodevice_destructor(pos); | 1977 | kvm_iodevice_destructor(pos); |
1870 | } | 1978 | } |
1979 | kfree(bus); | ||
1871 | } | 1980 | } |
1872 | 1981 | ||
1873 | /* kvm_io_bus_write - called under kvm->slots_lock */ | 1982 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
1874 | int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, | 1983 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
1875 | int len, const void *val) | 1984 | int len, const void *val) |
1876 | { | 1985 | { |
1877 | int i; | 1986 | int i; |
1987 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | ||
1878 | for (i = 0; i < bus->dev_count; i++) | 1988 | for (i = 0; i < bus->dev_count; i++) |
1879 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 1989 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) |
1880 | return 0; | 1990 | return 0; |
@@ -1882,59 +1992,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, | |||
1882 | } | 1992 | } |
1883 | 1993 | ||
1884 | /* kvm_io_bus_read - called under kvm->slots_lock */ | 1994 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
1885 | int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) | 1995 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
1996 | int len, void *val) | ||
1886 | { | 1997 | { |
1887 | int i; | 1998 | int i; |
1999 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | ||
2000 | |||
1888 | for (i = 0; i < bus->dev_count; i++) | 2001 | for (i = 0; i < bus->dev_count; i++) |
1889 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2002 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) |
1890 | return 0; | 2003 | return 0; |
1891 | return -EOPNOTSUPP; | 2004 | return -EOPNOTSUPP; |
1892 | } | 2005 | } |
1893 | 2006 | ||
1894 | int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, | 2007 | /* Caller must hold slots_lock. */ |
1895 | struct kvm_io_device *dev) | 2008 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
2009 | struct kvm_io_device *dev) | ||
1896 | { | 2010 | { |
1897 | int ret; | 2011 | struct kvm_io_bus *new_bus, *bus; |
1898 | |||
1899 | down_write(&kvm->slots_lock); | ||
1900 | ret = __kvm_io_bus_register_dev(bus, dev); | ||
1901 | up_write(&kvm->slots_lock); | ||
1902 | 2012 | ||
1903 | return ret; | 2013 | bus = kvm->buses[bus_idx]; |
1904 | } | ||
1905 | |||
1906 | /* An unlocked version. Caller must have write lock on slots_lock. */ | ||
1907 | int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, | ||
1908 | struct kvm_io_device *dev) | ||
1909 | { | ||
1910 | if (bus->dev_count > NR_IOBUS_DEVS-1) | 2014 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
1911 | return -ENOSPC; | 2015 | return -ENOSPC; |
1912 | 2016 | ||
1913 | bus->devs[bus->dev_count++] = dev; | 2017 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
2018 | if (!new_bus) | ||
2019 | return -ENOMEM; | ||
2020 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | ||
2021 | new_bus->devs[new_bus->dev_count++] = dev; | ||
2022 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | ||
2023 | synchronize_srcu_expedited(&kvm->srcu); | ||
2024 | kfree(bus); | ||
1914 | 2025 | ||
1915 | return 0; | 2026 | return 0; |
1916 | } | 2027 | } |
1917 | 2028 | ||
1918 | void kvm_io_bus_unregister_dev(struct kvm *kvm, | 2029 | /* Caller must hold slots_lock. */ |
1919 | struct kvm_io_bus *bus, | 2030 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
1920 | struct kvm_io_device *dev) | 2031 | struct kvm_io_device *dev) |
1921 | { | 2032 | { |
1922 | down_write(&kvm->slots_lock); | 2033 | int i, r; |
1923 | __kvm_io_bus_unregister_dev(bus, dev); | 2034 | struct kvm_io_bus *new_bus, *bus; |
1924 | up_write(&kvm->slots_lock); | ||
1925 | } | ||
1926 | 2035 | ||
1927 | /* An unlocked version. Caller must have write lock on slots_lock. */ | 2036 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
1928 | void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, | 2037 | if (!new_bus) |
1929 | struct kvm_io_device *dev) | 2038 | return -ENOMEM; |
1930 | { | ||
1931 | int i; | ||
1932 | 2039 | ||
1933 | for (i = 0; i < bus->dev_count; i++) | 2040 | bus = kvm->buses[bus_idx]; |
1934 | if (bus->devs[i] == dev) { | 2041 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); |
1935 | bus->devs[i] = bus->devs[--bus->dev_count]; | 2042 | |
2043 | r = -ENOENT; | ||
2044 | for (i = 0; i < new_bus->dev_count; i++) | ||
2045 | if (new_bus->devs[i] == dev) { | ||
2046 | r = 0; | ||
2047 | new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; | ||
1936 | break; | 2048 | break; |
1937 | } | 2049 | } |
2050 | |||
2051 | if (r) { | ||
2052 | kfree(new_bus); | ||
2053 | return r; | ||
2054 | } | ||
2055 | |||
2056 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | ||
2057 | synchronize_srcu_expedited(&kvm->srcu); | ||
2058 | kfree(bus); | ||
2059 | return r; | ||
1938 | } | 2060 | } |
1939 | 2061 | ||
1940 | static struct notifier_block kvm_cpu_notifier = { | 2062 | static struct notifier_block kvm_cpu_notifier = { |