aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c334
1 files changed, 245 insertions, 89 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5225052aebc..7f686251f71 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -55,6 +55,7 @@
55#include <asm-generic/bitops/le.h> 55#include <asm-generic/bitops/le.h>
56 56
57#include "coalesced_mmio.h" 57#include "coalesced_mmio.h"
58#include "async_pf.h"
58 59
59#define CREATE_TRACE_POINTS 60#define CREATE_TRACE_POINTS
60#include <trace/events/kvm.h> 61#include <trace/events/kvm.h>
@@ -89,7 +90,8 @@ static void hardware_disable_all(void);
89 90
90static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 91static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
91 92
92static bool kvm_rebooting; 93bool kvm_rebooting;
94EXPORT_SYMBOL_GPL(kvm_rebooting);
93 95
94static bool largepages_enabled = true; 96static bool largepages_enabled = true;
95 97
@@ -167,8 +169,12 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
167 169
168void kvm_flush_remote_tlbs(struct kvm *kvm) 170void kvm_flush_remote_tlbs(struct kvm *kvm)
169{ 171{
172 int dirty_count = kvm->tlbs_dirty;
173
174 smp_mb();
170 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 175 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
171 ++kvm->stat.remote_tlb_flush; 176 ++kvm->stat.remote_tlb_flush;
177 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
172} 178}
173 179
174void kvm_reload_remote_mmus(struct kvm *kvm) 180void kvm_reload_remote_mmus(struct kvm *kvm)
@@ -186,6 +192,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
186 vcpu->kvm = kvm; 192 vcpu->kvm = kvm;
187 vcpu->vcpu_id = id; 193 vcpu->vcpu_id = id;
188 init_waitqueue_head(&vcpu->wq); 194 init_waitqueue_head(&vcpu->wq);
195 kvm_async_pf_vcpu_init(vcpu);
189 196
190 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 197 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
191 if (!page) { 198 if (!page) {
@@ -247,7 +254,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
247 idx = srcu_read_lock(&kvm->srcu); 254 idx = srcu_read_lock(&kvm->srcu);
248 spin_lock(&kvm->mmu_lock); 255 spin_lock(&kvm->mmu_lock);
249 kvm->mmu_notifier_seq++; 256 kvm->mmu_notifier_seq++;
250 need_tlb_flush = kvm_unmap_hva(kvm, address); 257 need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
251 spin_unlock(&kvm->mmu_lock); 258 spin_unlock(&kvm->mmu_lock);
252 srcu_read_unlock(&kvm->srcu, idx); 259 srcu_read_unlock(&kvm->srcu, idx);
253 260
@@ -291,6 +298,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
291 kvm->mmu_notifier_count++; 298 kvm->mmu_notifier_count++;
292 for (; start < end; start += PAGE_SIZE) 299 for (; start < end; start += PAGE_SIZE)
293 need_tlb_flush |= kvm_unmap_hva(kvm, start); 300 need_tlb_flush |= kvm_unmap_hva(kvm, start);
301 need_tlb_flush |= kvm->tlbs_dirty;
294 spin_unlock(&kvm->mmu_lock); 302 spin_unlock(&kvm->mmu_lock);
295 srcu_read_unlock(&kvm->srcu, idx); 303 srcu_read_unlock(&kvm->srcu, idx);
296 304
@@ -381,11 +389,15 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
381 389
382static struct kvm *kvm_create_vm(void) 390static struct kvm *kvm_create_vm(void)
383{ 391{
384 int r = 0, i; 392 int r, i;
385 struct kvm *kvm = kvm_arch_create_vm(); 393 struct kvm *kvm = kvm_arch_alloc_vm();
386 394
387 if (IS_ERR(kvm)) 395 if (!kvm)
388 goto out; 396 return ERR_PTR(-ENOMEM);
397
398 r = kvm_arch_init_vm(kvm);
399 if (r)
400 goto out_err_nodisable;
389 401
390 r = hardware_enable_all(); 402 r = hardware_enable_all();
391 if (r) 403 if (r)
@@ -399,23 +411,19 @@ static struct kvm *kvm_create_vm(void)
399 r = -ENOMEM; 411 r = -ENOMEM;
400 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 412 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
401 if (!kvm->memslots) 413 if (!kvm->memslots)
402 goto out_err; 414 goto out_err_nosrcu;
403 if (init_srcu_struct(&kvm->srcu)) 415 if (init_srcu_struct(&kvm->srcu))
404 goto out_err; 416 goto out_err_nosrcu;
405 for (i = 0; i < KVM_NR_BUSES; i++) { 417 for (i = 0; i < KVM_NR_BUSES; i++) {
406 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), 418 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
407 GFP_KERNEL); 419 GFP_KERNEL);
408 if (!kvm->buses[i]) { 420 if (!kvm->buses[i])
409 cleanup_srcu_struct(&kvm->srcu);
410 goto out_err; 421 goto out_err;
411 }
412 } 422 }
413 423
414 r = kvm_init_mmu_notifier(kvm); 424 r = kvm_init_mmu_notifier(kvm);
415 if (r) { 425 if (r)
416 cleanup_srcu_struct(&kvm->srcu);
417 goto out_err; 426 goto out_err;
418 }
419 427
420 kvm->mm = current->mm; 428 kvm->mm = current->mm;
421 atomic_inc(&kvm->mm->mm_count); 429 atomic_inc(&kvm->mm->mm_count);
@@ -429,19 +437,35 @@ static struct kvm *kvm_create_vm(void)
429 spin_lock(&kvm_lock); 437 spin_lock(&kvm_lock);
430 list_add(&kvm->vm_list, &vm_list); 438 list_add(&kvm->vm_list, &vm_list);
431 spin_unlock(&kvm_lock); 439 spin_unlock(&kvm_lock);
432out: 440
433 return kvm; 441 return kvm;
434 442
435out_err: 443out_err:
444 cleanup_srcu_struct(&kvm->srcu);
445out_err_nosrcu:
436 hardware_disable_all(); 446 hardware_disable_all();
437out_err_nodisable: 447out_err_nodisable:
438 for (i = 0; i < KVM_NR_BUSES; i++) 448 for (i = 0; i < KVM_NR_BUSES; i++)
439 kfree(kvm->buses[i]); 449 kfree(kvm->buses[i]);
440 kfree(kvm->memslots); 450 kfree(kvm->memslots);
441 kfree(kvm); 451 kvm_arch_free_vm(kvm);
442 return ERR_PTR(r); 452 return ERR_PTR(r);
443} 453}
444 454
455static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
456{
457 if (!memslot->dirty_bitmap)
458 return;
459
460 if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
461 vfree(memslot->dirty_bitmap_head);
462 else
463 kfree(memslot->dirty_bitmap_head);
464
465 memslot->dirty_bitmap = NULL;
466 memslot->dirty_bitmap_head = NULL;
467}
468
445/* 469/*
446 * Free any memory in @free but not in @dont. 470 * Free any memory in @free but not in @dont.
447 */ 471 */
@@ -454,7 +478,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
454 vfree(free->rmap); 478 vfree(free->rmap);
455 479
456 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 480 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
457 vfree(free->dirty_bitmap); 481 kvm_destroy_dirty_bitmap(free);
458 482
459 483
460 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 484 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
@@ -465,7 +489,6 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
465 } 489 }
466 490
467 free->npages = 0; 491 free->npages = 0;
468 free->dirty_bitmap = NULL;
469 free->rmap = NULL; 492 free->rmap = NULL;
470} 493}
471 494
@@ -499,6 +522,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
499 kvm_arch_flush_shadow(kvm); 522 kvm_arch_flush_shadow(kvm);
500#endif 523#endif
501 kvm_arch_destroy_vm(kvm); 524 kvm_arch_destroy_vm(kvm);
525 kvm_free_physmem(kvm);
526 cleanup_srcu_struct(&kvm->srcu);
527 kvm_arch_free_vm(kvm);
502 hardware_disable_all(); 528 hardware_disable_all();
503 mmdrop(mm); 529 mmdrop(mm);
504} 530}
@@ -528,6 +554,27 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
528} 554}
529 555
530/* 556/*
557 * Allocation size is twice as large as the actual dirty bitmap size.
558 * This makes it possible to do double buffering: see x86's
559 * kvm_vm_ioctl_get_dirty_log().
560 */
561static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
562{
563 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
564
565 if (dirty_bytes > PAGE_SIZE)
566 memslot->dirty_bitmap = vzalloc(dirty_bytes);
567 else
568 memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL);
569
570 if (!memslot->dirty_bitmap)
571 return -ENOMEM;
572
573 memslot->dirty_bitmap_head = memslot->dirty_bitmap;
574 return 0;
575}
576
577/*
531 * Allocate some memory and give it an address in the guest physical address 578 * Allocate some memory and give it an address in the guest physical address
532 * space. 579 * space.
533 * 580 *
@@ -604,13 +651,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
604 /* Allocate if a slot is being created */ 651 /* Allocate if a slot is being created */
605#ifndef CONFIG_S390 652#ifndef CONFIG_S390
606 if (npages && !new.rmap) { 653 if (npages && !new.rmap) {
607 new.rmap = vmalloc(npages * sizeof(*new.rmap)); 654 new.rmap = vzalloc(npages * sizeof(*new.rmap));
608 655
609 if (!new.rmap) 656 if (!new.rmap)
610 goto out_free; 657 goto out_free;
611 658
612 memset(new.rmap, 0, npages * sizeof(*new.rmap));
613
614 new.user_alloc = user_alloc; 659 new.user_alloc = user_alloc;
615 new.userspace_addr = mem->userspace_addr; 660 new.userspace_addr = mem->userspace_addr;
616 } 661 }
@@ -633,14 +678,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
633 >> KVM_HPAGE_GFN_SHIFT(level)); 678 >> KVM_HPAGE_GFN_SHIFT(level));
634 lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); 679 lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level);
635 680
636 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); 681 new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
637 682
638 if (!new.lpage_info[i]) 683 if (!new.lpage_info[i])
639 goto out_free; 684 goto out_free;
640 685
641 memset(new.lpage_info[i], 0,
642 lpages * sizeof(*new.lpage_info[i]));
643
644 if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) 686 if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
645 new.lpage_info[i][0].write_count = 1; 687 new.lpage_info[i][0].write_count = 1;
646 if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) 688 if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
@@ -661,12 +703,8 @@ skip_lpage:
661 703
662 /* Allocate page dirty bitmap if needed */ 704 /* Allocate page dirty bitmap if needed */
663 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 705 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
664 unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); 706 if (kvm_create_dirty_bitmap(&new) < 0)
665
666 new.dirty_bitmap = vmalloc(dirty_bytes);
667 if (!new.dirty_bitmap)
668 goto out_free; 707 goto out_free;
669 memset(new.dirty_bitmap, 0, dirty_bytes);
670 /* destroy any largepage mappings for dirty tracking */ 708 /* destroy any largepage mappings for dirty tracking */
671 if (old.npages) 709 if (old.npages)
672 flush_shadow = 1; 710 flush_shadow = 1;
@@ -685,6 +723,7 @@ skip_lpage:
685 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 723 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
686 if (mem->slot >= slots->nmemslots) 724 if (mem->slot >= slots->nmemslots)
687 slots->nmemslots = mem->slot + 1; 725 slots->nmemslots = mem->slot + 1;
726 slots->generation++;
688 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; 727 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
689 728
690 old_memslots = kvm->memslots; 729 old_memslots = kvm->memslots;
@@ -719,6 +758,7 @@ skip_lpage:
719 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 758 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
720 if (mem->slot >= slots->nmemslots) 759 if (mem->slot >= slots->nmemslots)
721 slots->nmemslots = mem->slot + 1; 760 slots->nmemslots = mem->slot + 1;
761 slots->generation++;
722 762
723 /* actual memory is freed via old in kvm_free_physmem_slot below */ 763 /* actual memory is freed via old in kvm_free_physmem_slot below */
724 if (!npages) { 764 if (!npages) {
@@ -849,10 +889,10 @@ int kvm_is_error_hva(unsigned long addr)
849} 889}
850EXPORT_SYMBOL_GPL(kvm_is_error_hva); 890EXPORT_SYMBOL_GPL(kvm_is_error_hva);
851 891
852struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 892static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
893 gfn_t gfn)
853{ 894{
854 int i; 895 int i;
855 struct kvm_memslots *slots = kvm_memslots(kvm);
856 896
857 for (i = 0; i < slots->nmemslots; ++i) { 897 for (i = 0; i < slots->nmemslots; ++i) {
858 struct kvm_memory_slot *memslot = &slots->memslots[i]; 898 struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -863,6 +903,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
863 } 903 }
864 return NULL; 904 return NULL;
865} 905}
906
907struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
908{
909 return __gfn_to_memslot(kvm_memslots(kvm), gfn);
910}
866EXPORT_SYMBOL_GPL(gfn_to_memslot); 911EXPORT_SYMBOL_GPL(gfn_to_memslot);
867 912
868int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 913int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
@@ -925,12 +970,9 @@ int memslot_id(struct kvm *kvm, gfn_t gfn)
925 return memslot - slots->memslots; 970 return memslot - slots->memslots;
926} 971}
927 972
928static unsigned long gfn_to_hva_many(struct kvm *kvm, gfn_t gfn, 973static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
929 gfn_t *nr_pages) 974 gfn_t *nr_pages)
930{ 975{
931 struct kvm_memory_slot *slot;
932
933 slot = gfn_to_memslot(kvm, gfn);
934 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 976 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
935 return bad_hva(); 977 return bad_hva();
936 978
@@ -942,28 +984,61 @@ static unsigned long gfn_to_hva_many(struct kvm *kvm, gfn_t gfn,
942 984
943unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 985unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
944{ 986{
945 return gfn_to_hva_many(kvm, gfn, NULL); 987 return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
946} 988}
947EXPORT_SYMBOL_GPL(gfn_to_hva); 989EXPORT_SYMBOL_GPL(gfn_to_hva);
948 990
949static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic) 991static pfn_t get_fault_pfn(void)
992{
993 get_page(fault_page);
994 return fault_pfn;
995}
996
997static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
998 bool *async, bool write_fault, bool *writable)
950{ 999{
951 struct page *page[1]; 1000 struct page *page[1];
952 int npages; 1001 int npages = 0;
953 pfn_t pfn; 1002 pfn_t pfn;
954 1003
955 if (atomic) 1004 /* we can do it either atomically or asynchronously, not both */
1005 BUG_ON(atomic && async);
1006
1007 BUG_ON(!write_fault && !writable);
1008
1009 if (writable)
1010 *writable = true;
1011
1012 if (atomic || async)
956 npages = __get_user_pages_fast(addr, 1, 1, page); 1013 npages = __get_user_pages_fast(addr, 1, 1, page);
957 else { 1014
1015 if (unlikely(npages != 1) && !atomic) {
958 might_sleep(); 1016 might_sleep();
959 npages = get_user_pages_fast(addr, 1, 1, page); 1017
1018 if (writable)
1019 *writable = write_fault;
1020
1021 npages = get_user_pages_fast(addr, 1, write_fault, page);
1022
1023 /* map read fault as writable if possible */
1024 if (unlikely(!write_fault) && npages == 1) {
1025 struct page *wpage[1];
1026
1027 npages = __get_user_pages_fast(addr, 1, 1, wpage);
1028 if (npages == 1) {
1029 *writable = true;
1030 put_page(page[0]);
1031 page[0] = wpage[0];
1032 }
1033 npages = 1;
1034 }
960 } 1035 }
961 1036
962 if (unlikely(npages != 1)) { 1037 if (unlikely(npages != 1)) {
963 struct vm_area_struct *vma; 1038 struct vm_area_struct *vma;
964 1039
965 if (atomic) 1040 if (atomic)
966 goto return_fault_page; 1041 return get_fault_pfn();
967 1042
968 down_read(&current->mm->mmap_sem); 1043 down_read(&current->mm->mmap_sem);
969 if (is_hwpoison_address(addr)) { 1044 if (is_hwpoison_address(addr)) {
@@ -972,19 +1047,20 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
972 return page_to_pfn(hwpoison_page); 1047 return page_to_pfn(hwpoison_page);
973 } 1048 }
974 1049
975 vma = find_vma(current->mm, addr); 1050 vma = find_vma_intersection(current->mm, addr, addr+1);
976 1051
977 if (vma == NULL || addr < vma->vm_start || 1052 if (vma == NULL)
978 !(vma->vm_flags & VM_PFNMAP)) { 1053 pfn = get_fault_pfn();
979 up_read(&current->mm->mmap_sem); 1054 else if ((vma->vm_flags & VM_PFNMAP)) {
980return_fault_page: 1055 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
981 get_page(fault_page); 1056 vma->vm_pgoff;
982 return page_to_pfn(fault_page); 1057 BUG_ON(!kvm_is_mmio_pfn(pfn));
1058 } else {
1059 if (async && (vma->vm_flags & VM_WRITE))
1060 *async = true;
1061 pfn = get_fault_pfn();
983 } 1062 }
984
985 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
986 up_read(&current->mm->mmap_sem); 1063 up_read(&current->mm->mmap_sem);
987 BUG_ON(!kvm_is_mmio_pfn(pfn));
988 } else 1064 } else
989 pfn = page_to_pfn(page[0]); 1065 pfn = page_to_pfn(page[0]);
990 1066
@@ -993,40 +1069,58 @@ return_fault_page:
993 1069
994pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) 1070pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
995{ 1071{
996 return hva_to_pfn(kvm, addr, true); 1072 return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
997} 1073}
998EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); 1074EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
999 1075
1000static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic) 1076static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
1077 bool write_fault, bool *writable)
1001{ 1078{
1002 unsigned long addr; 1079 unsigned long addr;
1003 1080
1081 if (async)
1082 *async = false;
1083
1004 addr = gfn_to_hva(kvm, gfn); 1084 addr = gfn_to_hva(kvm, gfn);
1005 if (kvm_is_error_hva(addr)) { 1085 if (kvm_is_error_hva(addr)) {
1006 get_page(bad_page); 1086 get_page(bad_page);
1007 return page_to_pfn(bad_page); 1087 return page_to_pfn(bad_page);
1008 } 1088 }
1009 1089
1010 return hva_to_pfn(kvm, addr, atomic); 1090 return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
1011} 1091}
1012 1092
1013pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) 1093pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
1014{ 1094{
1015 return __gfn_to_pfn(kvm, gfn, true); 1095 return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
1016} 1096}
1017EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); 1097EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
1018 1098
1099pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
1100 bool write_fault, bool *writable)
1101{
1102 return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
1103}
1104EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
1105
1019pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 1106pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
1020{ 1107{
1021 return __gfn_to_pfn(kvm, gfn, false); 1108 return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
1022} 1109}
1023EXPORT_SYMBOL_GPL(gfn_to_pfn); 1110EXPORT_SYMBOL_GPL(gfn_to_pfn);
1024 1111
1112pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
1113 bool *writable)
1114{
1115 return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
1116}
1117EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
1118
1025pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 1119pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
1026 struct kvm_memory_slot *slot, gfn_t gfn) 1120 struct kvm_memory_slot *slot, gfn_t gfn)
1027{ 1121{
1028 unsigned long addr = gfn_to_hva_memslot(slot, gfn); 1122 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
1029 return hva_to_pfn(kvm, addr, false); 1123 return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
1030} 1124}
1031 1125
1032int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, 1126int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -1035,7 +1129,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
1035 unsigned long addr; 1129 unsigned long addr;
1036 gfn_t entry; 1130 gfn_t entry;
1037 1131
1038 addr = gfn_to_hva_many(kvm, gfn, &entry); 1132 addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry);
1039 if (kvm_is_error_hva(addr)) 1133 if (kvm_is_error_hva(addr))
1040 return -1; 1134 return -1;
1041 1135
@@ -1219,9 +1313,51 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
1219 return 0; 1313 return 0;
1220} 1314}
1221 1315
1316int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1317 gpa_t gpa)
1318{
1319 struct kvm_memslots *slots = kvm_memslots(kvm);
1320 int offset = offset_in_page(gpa);
1321 gfn_t gfn = gpa >> PAGE_SHIFT;
1322
1323 ghc->gpa = gpa;
1324 ghc->generation = slots->generation;
1325 ghc->memslot = __gfn_to_memslot(slots, gfn);
1326 ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL);
1327 if (!kvm_is_error_hva(ghc->hva))
1328 ghc->hva += offset;
1329 else
1330 return -EFAULT;
1331
1332 return 0;
1333}
1334EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
1335
1336int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1337 void *data, unsigned long len)
1338{
1339 struct kvm_memslots *slots = kvm_memslots(kvm);
1340 int r;
1341
1342 if (slots->generation != ghc->generation)
1343 kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa);
1344
1345 if (kvm_is_error_hva(ghc->hva))
1346 return -EFAULT;
1347
1348 r = copy_to_user((void __user *)ghc->hva, data, len);
1349 if (r)
1350 return -EFAULT;
1351 mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
1352
1353 return 0;
1354}
1355EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
1356
1222int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1357int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
1223{ 1358{
1224 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); 1359 return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
1360 offset, len);
1225} 1361}
1226EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1362EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
1227 1363
@@ -1244,11 +1380,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
1244} 1380}
1245EXPORT_SYMBOL_GPL(kvm_clear_guest); 1381EXPORT_SYMBOL_GPL(kvm_clear_guest);
1246 1382
1247void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 1383void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
1384 gfn_t gfn)
1248{ 1385{
1249 struct kvm_memory_slot *memslot;
1250
1251 memslot = gfn_to_memslot(kvm, gfn);
1252 if (memslot && memslot->dirty_bitmap) { 1386 if (memslot && memslot->dirty_bitmap) {
1253 unsigned long rel_gfn = gfn - memslot->base_gfn; 1387 unsigned long rel_gfn = gfn - memslot->base_gfn;
1254 1388
@@ -1256,6 +1390,14 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1256 } 1390 }
1257} 1391}
1258 1392
1393void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1394{
1395 struct kvm_memory_slot *memslot;
1396
1397 memslot = gfn_to_memslot(kvm, gfn);
1398 mark_page_dirty_in_slot(kvm, memslot, gfn);
1399}
1400
1259/* 1401/*
1260 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1402 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1261 */ 1403 */
@@ -1457,6 +1599,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
1457 if (arg) 1599 if (arg)
1458 goto out; 1600 goto out;
1459 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 1601 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1602 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
1460 break; 1603 break;
1461 case KVM_GET_REGS: { 1604 case KVM_GET_REGS: {
1462 struct kvm_regs *kvm_regs; 1605 struct kvm_regs *kvm_regs;
@@ -1824,7 +1967,7 @@ static struct file_operations kvm_vm_fops = {
1824 1967
1825static int kvm_dev_ioctl_create_vm(void) 1968static int kvm_dev_ioctl_create_vm(void)
1826{ 1969{
1827 int fd, r; 1970 int r;
1828 struct kvm *kvm; 1971 struct kvm *kvm;
1829 1972
1830 kvm = kvm_create_vm(); 1973 kvm = kvm_create_vm();
@@ -1837,11 +1980,11 @@ static int kvm_dev_ioctl_create_vm(void)
1837 return r; 1980 return r;
1838 } 1981 }
1839#endif 1982#endif
1840 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); 1983 r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
1841 if (fd < 0) 1984 if (r < 0)
1842 kvm_put_kvm(kvm); 1985 kvm_put_kvm(kvm);
1843 1986
1844 return fd; 1987 return r;
1845} 1988}
1846 1989
1847static long kvm_dev_ioctl_check_extension_generic(long arg) 1990static long kvm_dev_ioctl_check_extension_generic(long arg)
@@ -1922,7 +2065,7 @@ static struct miscdevice kvm_dev = {
1922 &kvm_chardev_ops, 2065 &kvm_chardev_ops,
1923}; 2066};
1924 2067
1925static void hardware_enable(void *junk) 2068static void hardware_enable_nolock(void *junk)
1926{ 2069{
1927 int cpu = raw_smp_processor_id(); 2070 int cpu = raw_smp_processor_id();
1928 int r; 2071 int r;
@@ -1942,7 +2085,14 @@ static void hardware_enable(void *junk)
1942 } 2085 }
1943} 2086}
1944 2087
1945static void hardware_disable(void *junk) 2088static void hardware_enable(void *junk)
2089{
2090 spin_lock(&kvm_lock);
2091 hardware_enable_nolock(junk);
2092 spin_unlock(&kvm_lock);
2093}
2094
2095static void hardware_disable_nolock(void *junk)
1946{ 2096{
1947 int cpu = raw_smp_processor_id(); 2097 int cpu = raw_smp_processor_id();
1948 2098
@@ -1952,13 +2102,20 @@ static void hardware_disable(void *junk)
1952 kvm_arch_hardware_disable(NULL); 2102 kvm_arch_hardware_disable(NULL);
1953} 2103}
1954 2104
2105static void hardware_disable(void *junk)
2106{
2107 spin_lock(&kvm_lock);
2108 hardware_disable_nolock(junk);
2109 spin_unlock(&kvm_lock);
2110}
2111
1955static void hardware_disable_all_nolock(void) 2112static void hardware_disable_all_nolock(void)
1956{ 2113{
1957 BUG_ON(!kvm_usage_count); 2114 BUG_ON(!kvm_usage_count);
1958 2115
1959 kvm_usage_count--; 2116 kvm_usage_count--;
1960 if (!kvm_usage_count) 2117 if (!kvm_usage_count)
1961 on_each_cpu(hardware_disable, NULL, 1); 2118 on_each_cpu(hardware_disable_nolock, NULL, 1);
1962} 2119}
1963 2120
1964static void hardware_disable_all(void) 2121static void hardware_disable_all(void)
@@ -1977,7 +2134,7 @@ static int hardware_enable_all(void)
1977 kvm_usage_count++; 2134 kvm_usage_count++;
1978 if (kvm_usage_count == 1) { 2135 if (kvm_usage_count == 1) {
1979 atomic_set(&hardware_enable_failed, 0); 2136 atomic_set(&hardware_enable_failed, 0);
1980 on_each_cpu(hardware_enable, NULL, 1); 2137 on_each_cpu(hardware_enable_nolock, NULL, 1);
1981 2138
1982 if (atomic_read(&hardware_enable_failed)) { 2139 if (atomic_read(&hardware_enable_failed)) {
1983 hardware_disable_all_nolock(); 2140 hardware_disable_all_nolock();
@@ -2008,27 +2165,19 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2008 case CPU_STARTING: 2165 case CPU_STARTING:
2009 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 2166 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
2010 cpu); 2167 cpu);
2011 spin_lock(&kvm_lock);
2012 hardware_enable(NULL); 2168 hardware_enable(NULL);
2013 spin_unlock(&kvm_lock);
2014 break; 2169 break;
2015 } 2170 }
2016 return NOTIFY_OK; 2171 return NOTIFY_OK;
2017} 2172}
2018 2173
2019 2174
2020asmlinkage void kvm_handle_fault_on_reboot(void) 2175asmlinkage void kvm_spurious_fault(void)
2021{ 2176{
2022 if (kvm_rebooting) {
2023 /* spin while reset goes on */
2024 local_irq_enable();
2025 while (true)
2026 cpu_relax();
2027 }
2028 /* Fault while not rebooting. We want the trace. */ 2177 /* Fault while not rebooting. We want the trace. */
2029 BUG(); 2178 BUG();
2030} 2179}
2031EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); 2180EXPORT_SYMBOL_GPL(kvm_spurious_fault);
2032 2181
2033static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 2182static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2034 void *v) 2183 void *v)
@@ -2041,7 +2190,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2041 */ 2190 */
2042 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 2191 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2043 kvm_rebooting = true; 2192 kvm_rebooting = true;
2044 on_each_cpu(hardware_disable, NULL, 1); 2193 on_each_cpu(hardware_disable_nolock, NULL, 1);
2045 return NOTIFY_OK; 2194 return NOTIFY_OK;
2046} 2195}
2047 2196
@@ -2211,7 +2360,7 @@ static void kvm_exit_debug(void)
2211static int kvm_suspend(struct sys_device *dev, pm_message_t state) 2360static int kvm_suspend(struct sys_device *dev, pm_message_t state)
2212{ 2361{
2213 if (kvm_usage_count) 2362 if (kvm_usage_count)
2214 hardware_disable(NULL); 2363 hardware_disable_nolock(NULL);
2215 return 0; 2364 return 0;
2216} 2365}
2217 2366
@@ -2219,7 +2368,7 @@ static int kvm_resume(struct sys_device *dev)
2219{ 2368{
2220 if (kvm_usage_count) { 2369 if (kvm_usage_count) {
2221 WARN_ON(spin_is_locked(&kvm_lock)); 2370 WARN_ON(spin_is_locked(&kvm_lock));
2222 hardware_enable(NULL); 2371 hardware_enable_nolock(NULL);
2223 } 2372 }
2224 return 0; 2373 return 0;
2225} 2374}
@@ -2336,6 +2485,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2336 goto out_free_5; 2485 goto out_free_5;
2337 } 2486 }
2338 2487
2488 r = kvm_async_pf_init();
2489 if (r)
2490 goto out_free;
2491
2339 kvm_chardev_ops.owner = module; 2492 kvm_chardev_ops.owner = module;
2340 kvm_vm_fops.owner = module; 2493 kvm_vm_fops.owner = module;
2341 kvm_vcpu_fops.owner = module; 2494 kvm_vcpu_fops.owner = module;
@@ -2343,7 +2496,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2343 r = misc_register(&kvm_dev); 2496 r = misc_register(&kvm_dev);
2344 if (r) { 2497 if (r) {
2345 printk(KERN_ERR "kvm: misc device register failed\n"); 2498 printk(KERN_ERR "kvm: misc device register failed\n");
2346 goto out_free; 2499 goto out_unreg;
2347 } 2500 }
2348 2501
2349 kvm_preempt_ops.sched_in = kvm_sched_in; 2502 kvm_preempt_ops.sched_in = kvm_sched_in;
@@ -2353,6 +2506,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2353 2506
2354 return 0; 2507 return 0;
2355 2508
2509out_unreg:
2510 kvm_async_pf_deinit();
2356out_free: 2511out_free:
2357 kmem_cache_destroy(kvm_vcpu_cache); 2512 kmem_cache_destroy(kvm_vcpu_cache);
2358out_free_5: 2513out_free_5:
@@ -2385,11 +2540,12 @@ void kvm_exit(void)
2385 kvm_exit_debug(); 2540 kvm_exit_debug();
2386 misc_deregister(&kvm_dev); 2541 misc_deregister(&kvm_dev);
2387 kmem_cache_destroy(kvm_vcpu_cache); 2542 kmem_cache_destroy(kvm_vcpu_cache);
2543 kvm_async_pf_deinit();
2388 sysdev_unregister(&kvm_sysdev); 2544 sysdev_unregister(&kvm_sysdev);
2389 sysdev_class_unregister(&kvm_sysdev_class); 2545 sysdev_class_unregister(&kvm_sysdev_class);
2390 unregister_reboot_notifier(&kvm_reboot_notifier); 2546 unregister_reboot_notifier(&kvm_reboot_notifier);
2391 unregister_cpu_notifier(&kvm_cpu_notifier); 2547 unregister_cpu_notifier(&kvm_cpu_notifier);
2392 on_each_cpu(hardware_disable, NULL, 1); 2548 on_each_cpu(hardware_disable_nolock, NULL, 1);
2393 kvm_arch_hardware_unsetup(); 2549 kvm_arch_hardware_unsetup();
2394 kvm_arch_exit(); 2550 kvm_arch_exit();
2395 free_cpumask_var(cpus_hardware_enabled); 2551 free_cpumask_var(cpus_hardware_enabled);