diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/assigned-dev.c | 2 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 4 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 3 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 10 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 79 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 4 | ||||
-rw-r--r-- | virt/kvm/vfio.h | 13 |
7 files changed, 91 insertions, 24 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 5819a2708d7e..e05000e200d2 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -302,7 +302,7 @@ static void kvm_free_assigned_device(struct kvm *kvm, | |||
302 | else | 302 | else |
303 | pci_restore_state(assigned_dev->dev); | 303 | pci_restore_state(assigned_dev->dev); |
304 | 304 | ||
305 | assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; | 305 | pci_clear_dev_assigned(assigned_dev->dev); |
306 | 306 | ||
307 | pci_release_regions(assigned_dev->dev); | 307 | pci_release_regions(assigned_dev->dev); |
308 | pci_disable_device(assigned_dev->dev); | 308 | pci_disable_device(assigned_dev->dev); |
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index d6a3d0993d88..5ff7f7f2689a 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work) | |||
80 | 80 | ||
81 | might_sleep(); | 81 | might_sleep(); |
82 | 82 | ||
83 | down_read(&mm->mmap_sem); | 83 | kvm_get_user_page_io(NULL, mm, addr, 1, NULL); |
84 | get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL); | ||
85 | up_read(&mm->mmap_sem); | ||
86 | kvm_async_page_present_sync(vcpu, apf); | 84 | kvm_async_page_present_sync(vcpu, apf); |
87 | 85 | ||
88 | spin_lock(&vcpu->async_pf.lock); | 86 | spin_lock(&vcpu->async_pf.lock); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 0c712a779b44..b0fb390943c6 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -36,6 +36,9 @@ | |||
36 | #include <linux/seqlock.h> | 36 | #include <linux/seqlock.h> |
37 | #include <trace/events/kvm.h> | 37 | #include <trace/events/kvm.h> |
38 | 38 | ||
39 | #ifdef __KVM_HAVE_IOAPIC | ||
40 | #include "ioapic.h" | ||
41 | #endif | ||
39 | #include "iodev.h" | 42 | #include "iodev.h" |
40 | 43 | ||
41 | #ifdef CONFIG_HAVE_KVM_IRQFD | 44 | #ifdef CONFIG_HAVE_KVM_IRQFD |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 714b94932312..e51d9f9b995f 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -191,8 +191,7 @@ int kvm_assign_device(struct kvm *kvm, | |||
191 | return r; | 191 | return r; |
192 | } | 192 | } |
193 | 193 | ||
194 | noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain, | 194 | noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY); |
195 | IOMMU_CAP_CACHE_COHERENCY); | ||
196 | 195 | ||
197 | /* Check if need to update IOMMU page table for guest memory */ | 196 | /* Check if need to update IOMMU page table for guest memory */ |
198 | if (noncoherent != kvm->arch.iommu_noncoherent) { | 197 | if (noncoherent != kvm->arch.iommu_noncoherent) { |
@@ -203,7 +202,7 @@ int kvm_assign_device(struct kvm *kvm, | |||
203 | goto out_unmap; | 202 | goto out_unmap; |
204 | } | 203 | } |
205 | 204 | ||
206 | pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; | 205 | pci_set_dev_assigned(pdev); |
207 | 206 | ||
208 | dev_info(&pdev->dev, "kvm assign device\n"); | 207 | dev_info(&pdev->dev, "kvm assign device\n"); |
209 | 208 | ||
@@ -229,7 +228,7 @@ int kvm_deassign_device(struct kvm *kvm, | |||
229 | 228 | ||
230 | iommu_detach_device(domain, &pdev->dev); | 229 | iommu_detach_device(domain, &pdev->dev); |
231 | 230 | ||
232 | pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; | 231 | pci_clear_dev_assigned(pdev); |
233 | 232 | ||
234 | dev_info(&pdev->dev, "kvm deassign device\n"); | 233 | dev_info(&pdev->dev, "kvm deassign device\n"); |
235 | 234 | ||
@@ -254,8 +253,7 @@ int kvm_iommu_map_guest(struct kvm *kvm) | |||
254 | } | 253 | } |
255 | 254 | ||
256 | if (!allow_unsafe_assigned_interrupts && | 255 | if (!allow_unsafe_assigned_interrupts && |
257 | !iommu_domain_has_cap(kvm->arch.iommu_domain, | 256 | !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) { |
258 | IOMMU_CAP_INTR_REMAP)) { | ||
259 | printk(KERN_WARNING "%s: No interrupt remapping support," | 257 | printk(KERN_WARNING "%s: No interrupt remapping support," |
260 | " disallowing device assignment." | 258 | " disallowing device assignment." |
261 | " Re-enble with \"allow_unsafe_assigned_interrupts=1\"" | 259 | " Re-enble with \"allow_unsafe_assigned_interrupts=1\"" |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 278232025129..384eaa7b02fa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -52,11 +52,13 @@ | |||
52 | 52 | ||
53 | #include <asm/processor.h> | 53 | #include <asm/processor.h> |
54 | #include <asm/io.h> | 54 | #include <asm/io.h> |
55 | #include <asm/ioctl.h> | ||
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/pgtable.h> | 57 | #include <asm/pgtable.h> |
57 | 58 | ||
58 | #include "coalesced_mmio.h" | 59 | #include "coalesced_mmio.h" |
59 | #include "async_pf.h" | 60 | #include "async_pf.h" |
61 | #include "vfio.h" | ||
60 | 62 | ||
61 | #define CREATE_TRACE_POINTS | 63 | #define CREATE_TRACE_POINTS |
62 | #include <trace/events/kvm.h> | 64 | #include <trace/events/kvm.h> |
@@ -108,7 +110,7 @@ static bool largepages_enabled = true; | |||
108 | bool kvm_is_mmio_pfn(pfn_t pfn) | 110 | bool kvm_is_mmio_pfn(pfn_t pfn) |
109 | { | 111 | { |
110 | if (pfn_valid(pfn)) | 112 | if (pfn_valid(pfn)) |
111 | return PageReserved(pfn_to_page(pfn)); | 113 | return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); |
112 | 114 | ||
113 | return true; | 115 | return true; |
114 | } | 116 | } |
@@ -151,7 +153,7 @@ static void ack_flush(void *_completed) | |||
151 | { | 153 | { |
152 | } | 154 | } |
153 | 155 | ||
154 | static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | 156 | bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) |
155 | { | 157 | { |
156 | int i, cpu, me; | 158 | int i, cpu, me; |
157 | cpumask_var_t cpus; | 159 | cpumask_var_t cpus; |
@@ -188,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
188 | long dirty_count = kvm->tlbs_dirty; | 190 | long dirty_count = kvm->tlbs_dirty; |
189 | 191 | ||
190 | smp_mb(); | 192 | smp_mb(); |
191 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) | 193 | if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
192 | ++kvm->stat.remote_tlb_flush; | 194 | ++kvm->stat.remote_tlb_flush; |
193 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); | 195 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); |
194 | } | 196 | } |
@@ -196,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); | |||
196 | 198 | ||
197 | void kvm_reload_remote_mmus(struct kvm *kvm) | 199 | void kvm_reload_remote_mmus(struct kvm *kvm) |
198 | { | 200 | { |
199 | make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); | 201 | kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); |
200 | } | 202 | } |
201 | 203 | ||
202 | void kvm_make_mclock_inprogress_request(struct kvm *kvm) | 204 | void kvm_make_mclock_inprogress_request(struct kvm *kvm) |
203 | { | 205 | { |
204 | make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); | 206 | kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); |
205 | } | 207 | } |
206 | 208 | ||
207 | void kvm_make_scan_ioapic_request(struct kvm *kvm) | 209 | void kvm_make_scan_ioapic_request(struct kvm *kvm) |
208 | { | 210 | { |
209 | make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); | 211 | kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); |
210 | } | 212 | } |
211 | 213 | ||
212 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 214 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
@@ -294,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
294 | kvm_flush_remote_tlbs(kvm); | 296 | kvm_flush_remote_tlbs(kvm); |
295 | 297 | ||
296 | spin_unlock(&kvm->mmu_lock); | 298 | spin_unlock(&kvm->mmu_lock); |
299 | |||
300 | kvm_arch_mmu_notifier_invalidate_page(kvm, address); | ||
301 | |||
297 | srcu_read_unlock(&kvm->srcu, idx); | 302 | srcu_read_unlock(&kvm->srcu, idx); |
298 | } | 303 | } |
299 | 304 | ||
@@ -367,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | |||
367 | 372 | ||
368 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | 373 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, |
369 | struct mm_struct *mm, | 374 | struct mm_struct *mm, |
370 | unsigned long address) | 375 | unsigned long start, |
376 | unsigned long end) | ||
371 | { | 377 | { |
372 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 378 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
373 | int young, idx; | 379 | int young, idx; |
@@ -375,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
375 | idx = srcu_read_lock(&kvm->srcu); | 381 | idx = srcu_read_lock(&kvm->srcu); |
376 | spin_lock(&kvm->mmu_lock); | 382 | spin_lock(&kvm->mmu_lock); |
377 | 383 | ||
378 | young = kvm_age_hva(kvm, address); | 384 | young = kvm_age_hva(kvm, start, end); |
379 | if (young) | 385 | if (young) |
380 | kvm_flush_remote_tlbs(kvm); | 386 | kvm_flush_remote_tlbs(kvm); |
381 | 387 | ||
@@ -1128,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | |||
1128 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); | 1134 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); |
1129 | } | 1135 | } |
1130 | 1136 | ||
1137 | int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, | ||
1138 | unsigned long addr, bool write_fault, | ||
1139 | struct page **pagep) | ||
1140 | { | ||
1141 | int npages; | ||
1142 | int locked = 1; | ||
1143 | int flags = FOLL_TOUCH | FOLL_HWPOISON | | ||
1144 | (pagep ? FOLL_GET : 0) | | ||
1145 | (write_fault ? FOLL_WRITE : 0); | ||
1146 | |||
1147 | /* | ||
1148 | * If retrying the fault, we get here *not* having allowed the filemap | ||
1149 | * to wait on the page lock. We should now allow waiting on the IO with | ||
1150 | * the mmap semaphore released. | ||
1151 | */ | ||
1152 | down_read(&mm->mmap_sem); | ||
1153 | npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL, | ||
1154 | &locked); | ||
1155 | if (!locked) { | ||
1156 | VM_BUG_ON(npages); | ||
1157 | |||
1158 | if (!pagep) | ||
1159 | return 0; | ||
1160 | |||
1161 | /* | ||
1162 | * The previous call has now waited on the IO. Now we can | ||
1163 | * retry and complete. Pass TRIED to ensure we do not re | ||
1164 | * schedule async IO (see e.g. filemap_fault). | ||
1165 | */ | ||
1166 | down_read(&mm->mmap_sem); | ||
1167 | npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED, | ||
1168 | pagep, NULL, NULL); | ||
1169 | } | ||
1170 | up_read(&mm->mmap_sem); | ||
1171 | return npages; | ||
1172 | } | ||
1173 | |||
1131 | static inline int check_user_page_hwpoison(unsigned long addr) | 1174 | static inline int check_user_page_hwpoison(unsigned long addr) |
1132 | { | 1175 | { |
1133 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; | 1176 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; |
@@ -1190,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, | |||
1190 | npages = get_user_page_nowait(current, current->mm, | 1233 | npages = get_user_page_nowait(current, current->mm, |
1191 | addr, write_fault, page); | 1234 | addr, write_fault, page); |
1192 | up_read(¤t->mm->mmap_sem); | 1235 | up_read(¤t->mm->mmap_sem); |
1193 | } else | 1236 | } else { |
1194 | npages = get_user_pages_fast(addr, 1, write_fault, | 1237 | /* |
1195 | page); | 1238 | * By now we have tried gup_fast, and possibly async_pf, and we |
1239 | * are certainly not atomic. Time to retry the gup, allowing | ||
1240 | * mmap semaphore to be relinquished in the case of IO. | ||
1241 | */ | ||
1242 | npages = kvm_get_user_page_io(current, current->mm, addr, | ||
1243 | write_fault, page); | ||
1244 | } | ||
1196 | if (npages != 1) | 1245 | if (npages != 1) |
1197 | return npages; | 1246 | return npages; |
1198 | 1247 | ||
@@ -1746,7 +1795,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target) | |||
1746 | rcu_read_lock(); | 1795 | rcu_read_lock(); |
1747 | pid = rcu_dereference(target->pid); | 1796 | pid = rcu_dereference(target->pid); |
1748 | if (pid) | 1797 | if (pid) |
1749 | task = get_pid_task(target->pid, PIDTYPE_PID); | 1798 | task = get_pid_task(pid, PIDTYPE_PID); |
1750 | rcu_read_unlock(); | 1799 | rcu_read_unlock(); |
1751 | if (!task) | 1800 | if (!task) |
1752 | return ret; | 1801 | return ret; |
@@ -1995,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
1995 | if (vcpu->kvm->mm != current->mm) | 2044 | if (vcpu->kvm->mm != current->mm) |
1996 | return -EIO; | 2045 | return -EIO; |
1997 | 2046 | ||
2047 | if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) | ||
2048 | return -EINVAL; | ||
2049 | |||
1998 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) | 2050 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) |
1999 | /* | 2051 | /* |
2000 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, | 2052 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, |
@@ -3233,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
3233 | goto out_undebugfs; | 3285 | goto out_undebugfs; |
3234 | } | 3286 | } |
3235 | 3287 | ||
3288 | r = kvm_vfio_ops_init(); | ||
3289 | WARN_ON(r); | ||
3290 | |||
3236 | return 0; | 3291 | return 0; |
3237 | 3292 | ||
3238 | out_undebugfs: | 3293 | out_undebugfs: |
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index bb11b36ee8a2..281e7cf2b8e5 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
20 | #include <linux/vfio.h> | 20 | #include <linux/vfio.h> |
21 | #include "vfio.h" | ||
21 | 22 | ||
22 | struct kvm_vfio_group { | 23 | struct kvm_vfio_group { |
23 | struct list_head node; | 24 | struct list_head node; |
@@ -278,8 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) | |||
278 | return 0; | 279 | return 0; |
279 | } | 280 | } |
280 | 281 | ||
281 | static int __init kvm_vfio_ops_init(void) | 282 | int kvm_vfio_ops_init(void) |
282 | { | 283 | { |
283 | return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); | 284 | return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); |
284 | } | 285 | } |
285 | module_init(kvm_vfio_ops_init); | ||
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h new file mode 100644 index 000000000000..92eac75d6b62 --- /dev/null +++ b/virt/kvm/vfio.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef __KVM_VFIO_H | ||
2 | #define __KVM_VFIO_H | ||
3 | |||
4 | #ifdef CONFIG_KVM_VFIO | ||
5 | int kvm_vfio_ops_init(void); | ||
6 | #else | ||
7 | static inline int kvm_vfio_ops_init(void) | ||
8 | { | ||
9 | return 0; | ||
10 | } | ||
11 | #endif | ||
12 | |||
13 | #endif | ||