aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/async_pf.c22
-rw-r--r--virt/kvm/iommu.c38
-rw-r--r--virt/kvm/kvm_main.c150
-rw-r--r--virt/kvm/vfio.c264
5 files changed, 359 insertions, 118 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 779262f59e25..fbe1a48bd629 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -27,3 +27,6 @@ config HAVE_KVM_MSI
27 27
28config HAVE_KVM_CPU_RELAX_INTERCEPT 28config HAVE_KVM_CPU_RELAX_INTERCEPT
29 bool 29 bool
30
31config KVM_VFIO
32 bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8a39dda7a325..8631d9c14320 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
56 56
57static void async_pf_execute(struct work_struct *work) 57static void async_pf_execute(struct work_struct *work)
58{ 58{
59 struct page *page = NULL;
60 struct kvm_async_pf *apf = 59 struct kvm_async_pf *apf =
61 container_of(work, struct kvm_async_pf, work); 60 container_of(work, struct kvm_async_pf, work);
62 struct mm_struct *mm = apf->mm; 61 struct mm_struct *mm = apf->mm;
@@ -68,14 +67,12 @@ static void async_pf_execute(struct work_struct *work)
68 67
69 use_mm(mm); 68 use_mm(mm);
70 down_read(&mm->mmap_sem); 69 down_read(&mm->mmap_sem);
71 get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); 70 get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
72 up_read(&mm->mmap_sem); 71 up_read(&mm->mmap_sem);
73 unuse_mm(mm); 72 unuse_mm(mm);
74 73
75 spin_lock(&vcpu->async_pf.lock); 74 spin_lock(&vcpu->async_pf.lock);
76 list_add_tail(&apf->link, &vcpu->async_pf.done); 75 list_add_tail(&apf->link, &vcpu->async_pf.done);
77 apf->page = page;
78 apf->done = true;
79 spin_unlock(&vcpu->async_pf.lock); 76 spin_unlock(&vcpu->async_pf.lock);
80 77
81 /* 78 /*
@@ -83,7 +80,7 @@ static void async_pf_execute(struct work_struct *work)
83 * this point 80 * this point
84 */ 81 */
85 82
86 trace_kvm_async_pf_completed(addr, page, gva); 83 trace_kvm_async_pf_completed(addr, gva);
87 84
88 if (waitqueue_active(&vcpu->wq)) 85 if (waitqueue_active(&vcpu->wq))
89 wake_up_interruptible(&vcpu->wq); 86 wake_up_interruptible(&vcpu->wq);
@@ -99,9 +96,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
99 struct kvm_async_pf *work = 96 struct kvm_async_pf *work =
100 list_entry(vcpu->async_pf.queue.next, 97 list_entry(vcpu->async_pf.queue.next,
101 typeof(*work), queue); 98 typeof(*work), queue);
102 cancel_work_sync(&work->work);
103 list_del(&work->queue); 99 list_del(&work->queue);
104 if (!work->done) { /* work was canceled */ 100 if (cancel_work_sync(&work->work)) {
105 mmdrop(work->mm); 101 mmdrop(work->mm);
106 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 102 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
107 kmem_cache_free(async_pf_cache, work); 103 kmem_cache_free(async_pf_cache, work);
@@ -114,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
114 list_entry(vcpu->async_pf.done.next, 110 list_entry(vcpu->async_pf.done.next,
115 typeof(*work), link); 111 typeof(*work), link);
116 list_del(&work->link); 112 list_del(&work->link);
117 if (!is_error_page(work->page))
118 kvm_release_page_clean(work->page);
119 kmem_cache_free(async_pf_cache, work); 113 kmem_cache_free(async_pf_cache, work);
120 } 114 }
121 spin_unlock(&vcpu->async_pf.lock); 115 spin_unlock(&vcpu->async_pf.lock);
@@ -135,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
135 list_del(&work->link); 129 list_del(&work->link);
136 spin_unlock(&vcpu->async_pf.lock); 130 spin_unlock(&vcpu->async_pf.lock);
137 131
138 if (work->page) 132 kvm_arch_async_page_ready(vcpu, work);
139 kvm_arch_async_page_ready(vcpu, work);
140 kvm_arch_async_page_present(vcpu, work); 133 kvm_arch_async_page_present(vcpu, work);
141 134
142 list_del(&work->queue); 135 list_del(&work->queue);
143 vcpu->async_pf.queued--; 136 vcpu->async_pf.queued--;
144 if (!is_error_page(work->page))
145 kvm_release_page_clean(work->page);
146 kmem_cache_free(async_pf_cache, work); 137 kmem_cache_free(async_pf_cache, work);
147 } 138 }
148} 139}
@@ -165,8 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
165 if (!work) 156 if (!work)
166 return 0; 157 return 0;
167 158
168 work->page = NULL; 159 work->wakeup_all = false;
169 work->done = false;
170 work->vcpu = vcpu; 160 work->vcpu = vcpu;
171 work->gva = gva; 161 work->gva = gva;
172 work->addr = gfn_to_hva(vcpu->kvm, gfn); 162 work->addr = gfn_to_hva(vcpu->kvm, gfn);
@@ -206,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
206 if (!work) 196 if (!work)
207 return -ENOMEM; 197 return -ENOMEM;
208 198
209 work->page = KVM_ERR_PTR_BAD_PAGE; 199 work->wakeup_all = true;
210 INIT_LIST_HEAD(&work->queue); /* for list_del to work */ 200 INIT_LIST_HEAD(&work->queue); /* for list_del to work */
211 201
212 spin_lock(&vcpu->async_pf.lock); 202 spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 72a130bc448a..0df7d4b34dfe 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -79,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
79 flags = IOMMU_READ; 79 flags = IOMMU_READ;
80 if (!(slot->flags & KVM_MEM_READONLY)) 80 if (!(slot->flags & KVM_MEM_READONLY))
81 flags |= IOMMU_WRITE; 81 flags |= IOMMU_WRITE;
82 if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) 82 if (!kvm->arch.iommu_noncoherent)
83 flags |= IOMMU_CACHE; 83 flags |= IOMMU_CACHE;
84 84
85 85
@@ -103,6 +103,10 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
103 while ((gfn << PAGE_SHIFT) & (page_size - 1)) 103 while ((gfn << PAGE_SHIFT) & (page_size - 1))
104 page_size >>= 1; 104 page_size >>= 1;
105 105
106 /* Make sure hva is aligned to the page size we want to map */
107 while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
108 page_size >>= 1;
109
106 /* 110 /*
107 * Pin all pages we are about to map in memory. This is 111 * Pin all pages we are about to map in memory. This is
108 * important because we unmap and unpin in 4kb steps later. 112 * important because we unmap and unpin in 4kb steps later.
@@ -140,6 +144,9 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
140 struct kvm_memslots *slots; 144 struct kvm_memslots *slots;
141 struct kvm_memory_slot *memslot; 145 struct kvm_memory_slot *memslot;
142 146
147 if (kvm->arch.iommu_noncoherent)
148 kvm_arch_register_noncoherent_dma(kvm);
149
143 idx = srcu_read_lock(&kvm->srcu); 150 idx = srcu_read_lock(&kvm->srcu);
144 slots = kvm_memslots(kvm); 151 slots = kvm_memslots(kvm);
145 152
@@ -158,7 +165,8 @@ int kvm_assign_device(struct kvm *kvm,
158{ 165{
159 struct pci_dev *pdev = NULL; 166 struct pci_dev *pdev = NULL;
160 struct iommu_domain *domain = kvm->arch.iommu_domain; 167 struct iommu_domain *domain = kvm->arch.iommu_domain;
161 int r, last_flags; 168 int r;
169 bool noncoherent;
162 170
163 /* check if iommu exists and in use */ 171 /* check if iommu exists and in use */
164 if (!domain) 172 if (!domain)
@@ -174,15 +182,13 @@ int kvm_assign_device(struct kvm *kvm,
174 return r; 182 return r;
175 } 183 }
176 184
177 last_flags = kvm->arch.iommu_flags; 185 noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain,
178 if (iommu_domain_has_cap(kvm->arch.iommu_domain, 186 IOMMU_CAP_CACHE_COHERENCY);
179 IOMMU_CAP_CACHE_COHERENCY))
180 kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
181 187
182 /* Check if need to update IOMMU page table for guest memory */ 188 /* Check if need to update IOMMU page table for guest memory */
183 if ((last_flags ^ kvm->arch.iommu_flags) == 189 if (noncoherent != kvm->arch.iommu_noncoherent) {
184 KVM_IOMMU_CACHE_COHERENCY) {
185 kvm_iommu_unmap_memslots(kvm); 190 kvm_iommu_unmap_memslots(kvm);
191 kvm->arch.iommu_noncoherent = noncoherent;
186 r = kvm_iommu_map_memslots(kvm); 192 r = kvm_iommu_map_memslots(kvm);
187 if (r) 193 if (r)
188 goto out_unmap; 194 goto out_unmap;
@@ -190,11 +196,7 @@ int kvm_assign_device(struct kvm *kvm,
190 196
191 pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; 197 pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
192 198
193 printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", 199 dev_info(&pdev->dev, "kvm assign device\n");
194 assigned_dev->host_segnr,
195 assigned_dev->host_busnr,
196 PCI_SLOT(assigned_dev->host_devfn),
197 PCI_FUNC(assigned_dev->host_devfn));
198 200
199 return 0; 201 return 0;
200out_unmap: 202out_unmap:
@@ -220,11 +222,7 @@ int kvm_deassign_device(struct kvm *kvm,
220 222
221 pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; 223 pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
222 224
223 printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", 225 dev_info(&pdev->dev, "kvm deassign device\n");
224 assigned_dev->host_segnr,
225 assigned_dev->host_busnr,
226 PCI_SLOT(assigned_dev->host_devfn),
227 PCI_FUNC(assigned_dev->host_devfn));
228 226
229 return 0; 227 return 0;
230} 228}
@@ -336,6 +334,9 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
336 334
337 srcu_read_unlock(&kvm->srcu, idx); 335 srcu_read_unlock(&kvm->srcu, idx);
338 336
337 if (kvm->arch.iommu_noncoherent)
338 kvm_arch_unregister_noncoherent_dma(kvm);
339
339 return 0; 340 return 0;
340} 341}
341 342
@@ -350,6 +351,7 @@ int kvm_iommu_unmap_guest(struct kvm *kvm)
350 mutex_lock(&kvm->slots_lock); 351 mutex_lock(&kvm->slots_lock);
351 kvm_iommu_unmap_memslots(kvm); 352 kvm_iommu_unmap_memslots(kvm);
352 kvm->arch.iommu_domain = NULL; 353 kvm->arch.iommu_domain = NULL;
354 kvm->arch.iommu_noncoherent = false;
353 mutex_unlock(&kvm->slots_lock); 355 mutex_unlock(&kvm->slots_lock);
354 356
355 iommu_domain_free(domain); 357 iommu_domain_free(domain);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 979bff485fb0..4f588bc94186 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,8 @@ MODULE_LICENSE("GPL");
70 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 70 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
71 */ 71 */
72 72
73DEFINE_RAW_SPINLOCK(kvm_lock); 73DEFINE_SPINLOCK(kvm_lock);
74static DEFINE_RAW_SPINLOCK(kvm_count_lock);
74LIST_HEAD(vm_list); 75LIST_HEAD(vm_list);
75 76
76static cpumask_var_t cpus_hardware_enabled; 77static cpumask_var_t cpus_hardware_enabled;
@@ -186,6 +187,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
186 ++kvm->stat.remote_tlb_flush; 187 ++kvm->stat.remote_tlb_flush;
187 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 188 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
188} 189}
190EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
189 191
190void kvm_reload_remote_mmus(struct kvm *kvm) 192void kvm_reload_remote_mmus(struct kvm *kvm)
191{ 193{
@@ -490,9 +492,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
490 if (r) 492 if (r)
491 goto out_err; 493 goto out_err;
492 494
493 raw_spin_lock(&kvm_lock); 495 spin_lock(&kvm_lock);
494 list_add(&kvm->vm_list, &vm_list); 496 list_add(&kvm->vm_list, &vm_list);
495 raw_spin_unlock(&kvm_lock); 497 spin_unlock(&kvm_lock);
496 498
497 return kvm; 499 return kvm;
498 500
@@ -540,13 +542,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
540/* 542/*
541 * Free any memory in @free but not in @dont. 543 * Free any memory in @free but not in @dont.
542 */ 544 */
543static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 545static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
544 struct kvm_memory_slot *dont) 546 struct kvm_memory_slot *dont)
545{ 547{
546 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 548 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
547 kvm_destroy_dirty_bitmap(free); 549 kvm_destroy_dirty_bitmap(free);
548 550
549 kvm_arch_free_memslot(free, dont); 551 kvm_arch_free_memslot(kvm, free, dont);
550 552
551 free->npages = 0; 553 free->npages = 0;
552} 554}
@@ -557,7 +559,7 @@ void kvm_free_physmem(struct kvm *kvm)
557 struct kvm_memory_slot *memslot; 559 struct kvm_memory_slot *memslot;
558 560
559 kvm_for_each_memslot(memslot, slots) 561 kvm_for_each_memslot(memslot, slots)
560 kvm_free_physmem_slot(memslot, NULL); 562 kvm_free_physmem_slot(kvm, memslot, NULL);
561 563
562 kfree(kvm->memslots); 564 kfree(kvm->memslots);
563} 565}
@@ -581,9 +583,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
581 struct mm_struct *mm = kvm->mm; 583 struct mm_struct *mm = kvm->mm;
582 584
583 kvm_arch_sync_events(kvm); 585 kvm_arch_sync_events(kvm);
584 raw_spin_lock(&kvm_lock); 586 spin_lock(&kvm_lock);
585 list_del(&kvm->vm_list); 587 list_del(&kvm->vm_list);
586 raw_spin_unlock(&kvm_lock); 588 spin_unlock(&kvm_lock);
587 kvm_free_irq_routing(kvm); 589 kvm_free_irq_routing(kvm);
588 for (i = 0; i < KVM_NR_BUSES; i++) 590 for (i = 0; i < KVM_NR_BUSES; i++)
589 kvm_io_bus_destroy(kvm->buses[i]); 591 kvm_io_bus_destroy(kvm->buses[i]);
@@ -821,7 +823,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
821 if (change == KVM_MR_CREATE) { 823 if (change == KVM_MR_CREATE) {
822 new.userspace_addr = mem->userspace_addr; 824 new.userspace_addr = mem->userspace_addr;
823 825
824 if (kvm_arch_create_memslot(&new, npages)) 826 if (kvm_arch_create_memslot(kvm, &new, npages))
825 goto out_free; 827 goto out_free;
826 } 828 }
827 829
@@ -872,6 +874,19 @@ int __kvm_set_memory_region(struct kvm *kvm,
872 goto out_free; 874 goto out_free;
873 } 875 }
874 876
877 /* actual memory is freed via old in kvm_free_physmem_slot below */
878 if (change == KVM_MR_DELETE) {
879 new.dirty_bitmap = NULL;
880 memset(&new.arch, 0, sizeof(new.arch));
881 }
882
883 old_memslots = install_new_memslots(kvm, slots, &new);
884
885 kvm_arch_commit_memory_region(kvm, mem, &old, change);
886
887 kvm_free_physmem_slot(kvm, &old, &new);
888 kfree(old_memslots);
889
875 /* 890 /*
876 * IOMMU mapping: New slots need to be mapped. Old slots need to be 891 * IOMMU mapping: New slots need to be mapped. Old slots need to be
877 * un-mapped and re-mapped if their base changes. Since base change 892 * un-mapped and re-mapped if their base changes. Since base change
@@ -883,29 +898,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
883 */ 898 */
884 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { 899 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
885 r = kvm_iommu_map_pages(kvm, &new); 900 r = kvm_iommu_map_pages(kvm, &new);
886 if (r) 901 return r;
887 goto out_slots;
888 }
889
890 /* actual memory is freed via old in kvm_free_physmem_slot below */
891 if (change == KVM_MR_DELETE) {
892 new.dirty_bitmap = NULL;
893 memset(&new.arch, 0, sizeof(new.arch));
894 } 902 }
895 903
896 old_memslots = install_new_memslots(kvm, slots, &new);
897
898 kvm_arch_commit_memory_region(kvm, mem, &old, change);
899
900 kvm_free_physmem_slot(&old, &new);
901 kfree(old_memslots);
902
903 return 0; 904 return 0;
904 905
905out_slots: 906out_slots:
906 kfree(slots); 907 kfree(slots);
907out_free: 908out_free:
908 kvm_free_physmem_slot(&new, &old); 909 kvm_free_physmem_slot(kvm, &new, &old);
909out: 910out:
910 return r; 911 return r;
911} 912}
@@ -964,6 +965,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
964out: 965out:
965 return r; 966 return r;
966} 967}
968EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
967 969
968bool kvm_largepages_enabled(void) 970bool kvm_largepages_enabled(void)
969{ 971{
@@ -1064,10 +1066,12 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
1064unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) 1066unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1065{ 1067{
1066 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 1068 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1067 if (writable) 1069 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
1070
1071 if (!kvm_is_error_hva(hva) && writable)
1068 *writable = !memslot_is_readonly(slot); 1072 *writable = !memslot_is_readonly(slot);
1069 1073
1070 return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); 1074 return hva;
1071} 1075}
1072 1076
1073static int kvm_read_hva(void *data, void __user *hva, int len) 1077static int kvm_read_hva(void *data, void __user *hva, int len)
@@ -1611,8 +1615,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
1611 1615
1612int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1616int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
1613{ 1617{
1614 return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page, 1618 const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
1615 offset, len); 1619
1620 return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
1616} 1621}
1617EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1622EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
1618 1623
@@ -1652,6 +1657,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1652 memslot = gfn_to_memslot(kvm, gfn); 1657 memslot = gfn_to_memslot(kvm, gfn);
1653 mark_page_dirty_in_slot(kvm, memslot, gfn); 1658 mark_page_dirty_in_slot(kvm, memslot, gfn);
1654} 1659}
1660EXPORT_SYMBOL_GPL(mark_page_dirty);
1655 1661
1656/* 1662/*
1657 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1663 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
@@ -1677,6 +1683,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1677 1683
1678 finish_wait(&vcpu->wq, &wait); 1684 finish_wait(&vcpu->wq, &wait);
1679} 1685}
1686EXPORT_SYMBOL_GPL(kvm_vcpu_block);
1680 1687
1681#ifndef CONFIG_S390 1688#ifndef CONFIG_S390
1682/* 1689/*
@@ -1891,6 +1898,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
1891 int r; 1898 int r;
1892 struct kvm_vcpu *vcpu, *v; 1899 struct kvm_vcpu *vcpu, *v;
1893 1900
1901 if (id >= KVM_MAX_VCPUS)
1902 return -EINVAL;
1903
1894 vcpu = kvm_arch_vcpu_create(kvm, id); 1904 vcpu = kvm_arch_vcpu_create(kvm, id);
1895 if (IS_ERR(vcpu)) 1905 if (IS_ERR(vcpu))
1896 return PTR_ERR(vcpu); 1906 return PTR_ERR(vcpu);
@@ -2269,6 +2279,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2269 ops = &kvm_xics_ops; 2279 ops = &kvm_xics_ops;
2270 break; 2280 break;
2271#endif 2281#endif
2282#ifdef CONFIG_KVM_VFIO
2283 case KVM_DEV_TYPE_VFIO:
2284 ops = &kvm_vfio_ops;
2285 break;
2286#endif
2272 default: 2287 default:
2273 return -ENODEV; 2288 return -ENODEV;
2274 } 2289 }
@@ -2517,44 +2532,12 @@ out:
2517} 2532}
2518#endif 2533#endif
2519 2534
2520static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2521{
2522 struct page *page[1];
2523 unsigned long addr;
2524 int npages;
2525 gfn_t gfn = vmf->pgoff;
2526 struct kvm *kvm = vma->vm_file->private_data;
2527
2528 addr = gfn_to_hva(kvm, gfn);
2529 if (kvm_is_error_hva(addr))
2530 return VM_FAULT_SIGBUS;
2531
2532 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
2533 NULL);
2534 if (unlikely(npages != 1))
2535 return VM_FAULT_SIGBUS;
2536
2537 vmf->page = page[0];
2538 return 0;
2539}
2540
2541static const struct vm_operations_struct kvm_vm_vm_ops = {
2542 .fault = kvm_vm_fault,
2543};
2544
2545static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
2546{
2547 vma->vm_ops = &kvm_vm_vm_ops;
2548 return 0;
2549}
2550
2551static struct file_operations kvm_vm_fops = { 2535static struct file_operations kvm_vm_fops = {
2552 .release = kvm_vm_release, 2536 .release = kvm_vm_release,
2553 .unlocked_ioctl = kvm_vm_ioctl, 2537 .unlocked_ioctl = kvm_vm_ioctl,
2554#ifdef CONFIG_COMPAT 2538#ifdef CONFIG_COMPAT
2555 .compat_ioctl = kvm_vm_compat_ioctl, 2539 .compat_ioctl = kvm_vm_compat_ioctl,
2556#endif 2540#endif
2557 .mmap = kvm_vm_mmap,
2558 .llseek = noop_llseek, 2541 .llseek = noop_llseek,
2559}; 2542};
2560 2543
@@ -2681,11 +2664,12 @@ static void hardware_enable_nolock(void *junk)
2681 } 2664 }
2682} 2665}
2683 2666
2684static void hardware_enable(void *junk) 2667static void hardware_enable(void)
2685{ 2668{
2686 raw_spin_lock(&kvm_lock); 2669 raw_spin_lock(&kvm_count_lock);
2687 hardware_enable_nolock(junk); 2670 if (kvm_usage_count)
2688 raw_spin_unlock(&kvm_lock); 2671 hardware_enable_nolock(NULL);
2672 raw_spin_unlock(&kvm_count_lock);
2689} 2673}
2690 2674
2691static void hardware_disable_nolock(void *junk) 2675static void hardware_disable_nolock(void *junk)
@@ -2698,11 +2682,12 @@ static void hardware_disable_nolock(void *junk)
2698 kvm_arch_hardware_disable(NULL); 2682 kvm_arch_hardware_disable(NULL);
2699} 2683}
2700 2684
2701static void hardware_disable(void *junk) 2685static void hardware_disable(void)
2702{ 2686{
2703 raw_spin_lock(&kvm_lock); 2687 raw_spin_lock(&kvm_count_lock);
2704 hardware_disable_nolock(junk); 2688 if (kvm_usage_count)
2705 raw_spin_unlock(&kvm_lock); 2689 hardware_disable_nolock(NULL);
2690 raw_spin_unlock(&kvm_count_lock);
2706} 2691}
2707 2692
2708static void hardware_disable_all_nolock(void) 2693static void hardware_disable_all_nolock(void)
@@ -2716,16 +2701,16 @@ static void hardware_disable_all_nolock(void)
2716 2701
2717static void hardware_disable_all(void) 2702static void hardware_disable_all(void)
2718{ 2703{
2719 raw_spin_lock(&kvm_lock); 2704 raw_spin_lock(&kvm_count_lock);
2720 hardware_disable_all_nolock(); 2705 hardware_disable_all_nolock();
2721 raw_spin_unlock(&kvm_lock); 2706 raw_spin_unlock(&kvm_count_lock);
2722} 2707}
2723 2708
2724static int hardware_enable_all(void) 2709static int hardware_enable_all(void)
2725{ 2710{
2726 int r = 0; 2711 int r = 0;
2727 2712
2728 raw_spin_lock(&kvm_lock); 2713 raw_spin_lock(&kvm_count_lock);
2729 2714
2730 kvm_usage_count++; 2715 kvm_usage_count++;
2731 if (kvm_usage_count == 1) { 2716 if (kvm_usage_count == 1) {
@@ -2738,7 +2723,7 @@ static int hardware_enable_all(void)
2738 } 2723 }
2739 } 2724 }
2740 2725
2741 raw_spin_unlock(&kvm_lock); 2726 raw_spin_unlock(&kvm_count_lock);
2742 2727
2743 return r; 2728 return r;
2744} 2729}
@@ -2748,20 +2733,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2748{ 2733{
2749 int cpu = (long)v; 2734 int cpu = (long)v;
2750 2735
2751 if (!kvm_usage_count)
2752 return NOTIFY_OK;
2753
2754 val &= ~CPU_TASKS_FROZEN; 2736 val &= ~CPU_TASKS_FROZEN;
2755 switch (val) { 2737 switch (val) {
2756 case CPU_DYING: 2738 case CPU_DYING:
2757 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 2739 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
2758 cpu); 2740 cpu);
2759 hardware_disable(NULL); 2741 hardware_disable();
2760 break; 2742 break;
2761 case CPU_STARTING: 2743 case CPU_STARTING:
2762 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 2744 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
2763 cpu); 2745 cpu);
2764 hardware_enable(NULL); 2746 hardware_enable();
2765 break; 2747 break;
2766 } 2748 }
2767 return NOTIFY_OK; 2749 return NOTIFY_OK;
@@ -3054,10 +3036,10 @@ static int vm_stat_get(void *_offset, u64 *val)
3054 struct kvm *kvm; 3036 struct kvm *kvm;
3055 3037
3056 *val = 0; 3038 *val = 0;
3057 raw_spin_lock(&kvm_lock); 3039 spin_lock(&kvm_lock);
3058 list_for_each_entry(kvm, &vm_list, vm_list) 3040 list_for_each_entry(kvm, &vm_list, vm_list)
3059 *val += *(u32 *)((void *)kvm + offset); 3041 *val += *(u32 *)((void *)kvm + offset);
3060 raw_spin_unlock(&kvm_lock); 3042 spin_unlock(&kvm_lock);
3061 return 0; 3043 return 0;
3062} 3044}
3063 3045
@@ -3071,12 +3053,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
3071 int i; 3053 int i;
3072 3054
3073 *val = 0; 3055 *val = 0;
3074 raw_spin_lock(&kvm_lock); 3056 spin_lock(&kvm_lock);
3075 list_for_each_entry(kvm, &vm_list, vm_list) 3057 list_for_each_entry(kvm, &vm_list, vm_list)
3076 kvm_for_each_vcpu(i, vcpu, kvm) 3058 kvm_for_each_vcpu(i, vcpu, kvm)
3077 *val += *(u32 *)((void *)vcpu + offset); 3059 *val += *(u32 *)((void *)vcpu + offset);
3078 3060
3079 raw_spin_unlock(&kvm_lock); 3061 spin_unlock(&kvm_lock);
3080 return 0; 3062 return 0;
3081} 3063}
3082 3064
@@ -3089,7 +3071,7 @@ static const struct file_operations *stat_fops[] = {
3089 3071
3090static int kvm_init_debug(void) 3072static int kvm_init_debug(void)
3091{ 3073{
3092 int r = -EFAULT; 3074 int r = -EEXIST;
3093 struct kvm_stats_debugfs_item *p; 3075 struct kvm_stats_debugfs_item *p;
3094 3076
3095 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); 3077 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
@@ -3131,7 +3113,7 @@ static int kvm_suspend(void)
3131static void kvm_resume(void) 3113static void kvm_resume(void)
3132{ 3114{
3133 if (kvm_usage_count) { 3115 if (kvm_usage_count) {
3134 WARN_ON(raw_spin_is_locked(&kvm_lock)); 3116 WARN_ON(raw_spin_is_locked(&kvm_count_lock));
3135 hardware_enable_nolock(NULL); 3117 hardware_enable_nolock(NULL);
3136 } 3118 }
3137} 3119}
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
new file mode 100644
index 000000000000..ca4260e35037
--- /dev/null
+++ b/virt/kvm/vfio.c
@@ -0,0 +1,264 @@
1/*
2 * VFIO-KVM bridge pseudo device
3 *
4 * Copyright (C) 2013 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/errno.h>
13#include <linux/file.h>
14#include <linux/kvm_host.h>
15#include <linux/list.h>
16#include <linux/module.h>
17#include <linux/mutex.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/vfio.h>
21
22struct kvm_vfio_group {
23 struct list_head node;
24 struct vfio_group *vfio_group;
25};
26
27struct kvm_vfio {
28 struct list_head group_list;
29 struct mutex lock;
30 bool noncoherent;
31};
32
33static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
34{
35 struct vfio_group *vfio_group;
36 struct vfio_group *(*fn)(struct file *);
37
38 fn = symbol_get(vfio_group_get_external_user);
39 if (!fn)
40 return ERR_PTR(-EINVAL);
41
42 vfio_group = fn(filep);
43
44 symbol_put(vfio_group_get_external_user);
45
46 return vfio_group;
47}
48
49static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
50{
51 void (*fn)(struct vfio_group *);
52
53 fn = symbol_get(vfio_group_put_external_user);
54 if (!fn)
55 return;
56
57 fn(vfio_group);
58
59 symbol_put(vfio_group_put_external_user);
60}
61
62/*
63 * Groups can use the same or different IOMMU domains. If the same then
64 * adding a new group may change the coherency of groups we've previously
65 * been told about. We don't want to care about any of that so we retest
66 * each group and bail as soon as we find one that's noncoherent. This
67 * means we only ever [un]register_noncoherent_dma once for the whole device.
68 */
69static void kvm_vfio_update_coherency(struct kvm_device *dev)
70{
71 struct kvm_vfio *kv = dev->private;
72 bool noncoherent = false;
73 struct kvm_vfio_group *kvg;
74
75 mutex_lock(&kv->lock);
76
77 list_for_each_entry(kvg, &kv->group_list, node) {
78 /*
79 * TODO: We need an interface to check the coherency of
80 * the IOMMU domain this group is using. For now, assume
81 * it's always noncoherent.
82 */
83 noncoherent = true;
84 break;
85 }
86
87 if (noncoherent != kv->noncoherent) {
88 kv->noncoherent = noncoherent;
89
90 if (kv->noncoherent)
91 kvm_arch_register_noncoherent_dma(dev->kvm);
92 else
93 kvm_arch_unregister_noncoherent_dma(dev->kvm);
94 }
95
96 mutex_unlock(&kv->lock);
97}
98
99static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
100{
101 struct kvm_vfio *kv = dev->private;
102 struct vfio_group *vfio_group;
103 struct kvm_vfio_group *kvg;
104 void __user *argp = (void __user *)arg;
105 struct fd f;
106 int32_t fd;
107 int ret;
108
109 switch (attr) {
110 case KVM_DEV_VFIO_GROUP_ADD:
111 if (get_user(fd, (int32_t __user *)argp))
112 return -EFAULT;
113
114 f = fdget(fd);
115 if (!f.file)
116 return -EBADF;
117
118 vfio_group = kvm_vfio_group_get_external_user(f.file);
119 fdput(f);
120
121 if (IS_ERR(vfio_group))
122 return PTR_ERR(vfio_group);
123
124 mutex_lock(&kv->lock);
125
126 list_for_each_entry(kvg, &kv->group_list, node) {
127 if (kvg->vfio_group == vfio_group) {
128 mutex_unlock(&kv->lock);
129 kvm_vfio_group_put_external_user(vfio_group);
130 return -EEXIST;
131 }
132 }
133
134 kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
135 if (!kvg) {
136 mutex_unlock(&kv->lock);
137 kvm_vfio_group_put_external_user(vfio_group);
138 return -ENOMEM;
139 }
140
141 list_add_tail(&kvg->node, &kv->group_list);
142 kvg->vfio_group = vfio_group;
143
144 mutex_unlock(&kv->lock);
145
146 kvm_vfio_update_coherency(dev);
147
148 return 0;
149
150 case KVM_DEV_VFIO_GROUP_DEL:
151 if (get_user(fd, (int32_t __user *)argp))
152 return -EFAULT;
153
154 f = fdget(fd);
155 if (!f.file)
156 return -EBADF;
157
158 vfio_group = kvm_vfio_group_get_external_user(f.file);
159 fdput(f);
160
161 if (IS_ERR(vfio_group))
162 return PTR_ERR(vfio_group);
163
164 ret = -ENOENT;
165
166 mutex_lock(&kv->lock);
167
168 list_for_each_entry(kvg, &kv->group_list, node) {
169 if (kvg->vfio_group != vfio_group)
170 continue;
171
172 list_del(&kvg->node);
173 kvm_vfio_group_put_external_user(kvg->vfio_group);
174 kfree(kvg);
175 ret = 0;
176 break;
177 }
178
179 mutex_unlock(&kv->lock);
180
181 kvm_vfio_group_put_external_user(vfio_group);
182
183 kvm_vfio_update_coherency(dev);
184
185 return ret;
186 }
187
188 return -ENXIO;
189}
190
191static int kvm_vfio_set_attr(struct kvm_device *dev,
192 struct kvm_device_attr *attr)
193{
194 switch (attr->group) {
195 case KVM_DEV_VFIO_GROUP:
196 return kvm_vfio_set_group(dev, attr->attr, attr->addr);
197 }
198
199 return -ENXIO;
200}
201
202static int kvm_vfio_has_attr(struct kvm_device *dev,
203 struct kvm_device_attr *attr)
204{
205 switch (attr->group) {
206 case KVM_DEV_VFIO_GROUP:
207 switch (attr->attr) {
208 case KVM_DEV_VFIO_GROUP_ADD:
209 case KVM_DEV_VFIO_GROUP_DEL:
210 return 0;
211 }
212
213 break;
214 }
215
216 return -ENXIO;
217}
218
219static void kvm_vfio_destroy(struct kvm_device *dev)
220{
221 struct kvm_vfio *kv = dev->private;
222 struct kvm_vfio_group *kvg, *tmp;
223
224 list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
225 kvm_vfio_group_put_external_user(kvg->vfio_group);
226 list_del(&kvg->node);
227 kfree(kvg);
228 }
229
230 kvm_vfio_update_coherency(dev);
231
232 kfree(kv);
233 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
234}
235
236static int kvm_vfio_create(struct kvm_device *dev, u32 type)
237{
238 struct kvm_device *tmp;
239 struct kvm_vfio *kv;
240
241 /* Only one VFIO "device" per VM */
242 list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
243 if (tmp->ops == &kvm_vfio_ops)
244 return -EBUSY;
245
246 kv = kzalloc(sizeof(*kv), GFP_KERNEL);
247 if (!kv)
248 return -ENOMEM;
249
250 INIT_LIST_HEAD(&kv->group_list);
251 mutex_init(&kv->lock);
252
253 dev->private = kv;
254
255 return 0;
256}
257
258struct kvm_device_ops kvm_vfio_ops = {
259 .name = "kvm-vfio",
260 .create = kvm_vfio_create,
261 .destroy = kvm_vfio_destroy,
262 .set_attr = kvm_vfio_set_attr,
263 .has_attr = kvm_vfio_has_attr,
264};