diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/assigned-dev.c | 821 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 44 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 15 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 42 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 119 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 7 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 36 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 236 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 1366 |
10 files changed, 1582 insertions, 1107 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index daece36c0a57..7f1178f6b839 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -12,3 +12,6 @@ config HAVE_KVM_EVENTFD | |||
12 | 12 | ||
13 | config KVM_APIC_ARCHITECTURE | 13 | config KVM_APIC_ARCHITECTURE |
14 | bool | 14 | bool |
15 | |||
16 | config KVM_MMIO | ||
17 | bool | ||
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c new file mode 100644 index 000000000000..02ff2b19dbe2 --- /dev/null +++ b/virt/kvm/assigned-dev.c | |||
@@ -0,0 +1,821 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine - device assignment support | ||
3 | * | ||
4 | * Copyright (C) 2006-9 Red Hat, Inc | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
7 | * the COPYING file in the top-level directory. | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/kvm_host.h> | ||
12 | #include <linux/kvm.h> | ||
13 | #include <linux/uaccess.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/pci.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include "irq.h" | ||
21 | |||
22 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
23 | int assigned_dev_id) | ||
24 | { | ||
25 | struct list_head *ptr; | ||
26 | struct kvm_assigned_dev_kernel *match; | ||
27 | |||
28 | list_for_each(ptr, head) { | ||
29 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
30 | if (match->assigned_dev_id == assigned_dev_id) | ||
31 | return match; | ||
32 | } | ||
33 | return NULL; | ||
34 | } | ||
35 | |||
36 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
37 | *assigned_dev, int irq) | ||
38 | { | ||
39 | int i, index; | ||
40 | struct msix_entry *host_msix_entries; | ||
41 | |||
42 | host_msix_entries = assigned_dev->host_msix_entries; | ||
43 | |||
44 | index = -1; | ||
45 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
46 | if (irq == host_msix_entries[i].vector) { | ||
47 | index = i; | ||
48 | break; | ||
49 | } | ||
50 | if (index < 0) { | ||
51 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | return index; | ||
56 | } | ||
57 | |||
58 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
59 | { | ||
60 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
61 | struct kvm *kvm; | ||
62 | int i; | ||
63 | |||
64 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
65 | interrupt_work); | ||
66 | kvm = assigned_dev->kvm; | ||
67 | |||
68 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
69 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
70 | struct kvm_guest_msix_entry *guest_entries = | ||
71 | assigned_dev->guest_msix_entries; | ||
72 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
73 | if (!(guest_entries[i].flags & | ||
74 | KVM_ASSIGNED_MSIX_PENDING)) | ||
75 | continue; | ||
76 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
77 | kvm_set_irq(assigned_dev->kvm, | ||
78 | assigned_dev->irq_source_id, | ||
79 | guest_entries[i].vector, 1); | ||
80 | } | ||
81 | } else | ||
82 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
83 | assigned_dev->guest_irq, 1); | ||
84 | |||
85 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
86 | } | ||
87 | |||
88 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
89 | { | ||
90 | unsigned long flags; | ||
91 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
92 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
93 | |||
94 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
95 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
96 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
97 | if (index < 0) | ||
98 | goto out; | ||
99 | assigned_dev->guest_msix_entries[index].flags |= | ||
100 | KVM_ASSIGNED_MSIX_PENDING; | ||
101 | } | ||
102 | |||
103 | schedule_work(&assigned_dev->interrupt_work); | ||
104 | |||
105 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
106 | disable_irq_nosync(irq); | ||
107 | assigned_dev->host_irq_disabled = true; | ||
108 | } | ||
109 | |||
110 | out: | ||
111 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
112 | return IRQ_HANDLED; | ||
113 | } | ||
114 | |||
115 | /* Ack the irq line for an assigned device */ | ||
116 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
117 | { | ||
118 | struct kvm_assigned_dev_kernel *dev; | ||
119 | unsigned long flags; | ||
120 | |||
121 | if (kian->gsi == -1) | ||
122 | return; | ||
123 | |||
124 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
125 | ack_notifier); | ||
126 | |||
127 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
128 | |||
129 | /* The guest irq may be shared so this ack may be | ||
130 | * from another device. | ||
131 | */ | ||
132 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
133 | if (dev->host_irq_disabled) { | ||
134 | enable_irq(dev->host_irq); | ||
135 | dev->host_irq_disabled = false; | ||
136 | } | ||
137 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
138 | } | ||
139 | |||
140 | static void deassign_guest_irq(struct kvm *kvm, | ||
141 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
142 | { | ||
143 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | ||
144 | assigned_dev->ack_notifier.gsi = -1; | ||
145 | |||
146 | if (assigned_dev->irq_source_id != -1) | ||
147 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
148 | assigned_dev->irq_source_id = -1; | ||
149 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
150 | } | ||
151 | |||
152 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
153 | static void deassign_host_irq(struct kvm *kvm, | ||
154 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
155 | { | ||
156 | /* | ||
157 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
158 | * 1. work is scheduled, and then cancelled. | ||
159 | * 2. work callback is executed. | ||
160 | * | ||
161 | * The first one ensured that the irq is disabled and no more events | ||
162 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
163 | * for MSI). So we disable irq here to prevent further events. | ||
164 | * | ||
165 | * Notice this maybe result in nested disable if the interrupt type is | ||
166 | * INTx, but it's OK for we are going to free it. | ||
167 | * | ||
168 | * If this function is a part of VM destroy, please ensure that till | ||
169 | * now, the kvm state is still legal for probably we also have to wait | ||
170 | * interrupt_work done. | ||
171 | */ | ||
172 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
173 | int i; | ||
174 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
175 | disable_irq_nosync(assigned_dev-> | ||
176 | host_msix_entries[i].vector); | ||
177 | |||
178 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
179 | |||
180 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
181 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
182 | (void *)assigned_dev); | ||
183 | |||
184 | assigned_dev->entries_nr = 0; | ||
185 | kfree(assigned_dev->host_msix_entries); | ||
186 | kfree(assigned_dev->guest_msix_entries); | ||
187 | pci_disable_msix(assigned_dev->dev); | ||
188 | } else { | ||
189 | /* Deal with MSI and INTx */ | ||
190 | disable_irq_nosync(assigned_dev->host_irq); | ||
191 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
192 | |||
193 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
194 | |||
195 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
196 | pci_disable_msi(assigned_dev->dev); | ||
197 | } | ||
198 | |||
199 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
200 | } | ||
201 | |||
202 | static int kvm_deassign_irq(struct kvm *kvm, | ||
203 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
204 | unsigned long irq_requested_type) | ||
205 | { | ||
206 | unsigned long guest_irq_type, host_irq_type; | ||
207 | |||
208 | if (!irqchip_in_kernel(kvm)) | ||
209 | return -EINVAL; | ||
210 | /* no irq assignment to deassign */ | ||
211 | if (!assigned_dev->irq_requested_type) | ||
212 | return -ENXIO; | ||
213 | |||
214 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
215 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
216 | |||
217 | if (host_irq_type) | ||
218 | deassign_host_irq(kvm, assigned_dev); | ||
219 | if (guest_irq_type) | ||
220 | deassign_guest_irq(kvm, assigned_dev); | ||
221 | |||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
226 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
227 | { | ||
228 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
229 | } | ||
230 | |||
231 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
232 | struct kvm_assigned_dev_kernel | ||
233 | *assigned_dev) | ||
234 | { | ||
235 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
236 | |||
237 | pci_reset_function(assigned_dev->dev); | ||
238 | |||
239 | pci_release_regions(assigned_dev->dev); | ||
240 | pci_disable_device(assigned_dev->dev); | ||
241 | pci_dev_put(assigned_dev->dev); | ||
242 | |||
243 | list_del(&assigned_dev->list); | ||
244 | kfree(assigned_dev); | ||
245 | } | ||
246 | |||
247 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
248 | { | ||
249 | struct list_head *ptr, *ptr2; | ||
250 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
251 | |||
252 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
253 | assigned_dev = list_entry(ptr, | ||
254 | struct kvm_assigned_dev_kernel, | ||
255 | list); | ||
256 | |||
257 | kvm_free_assigned_device(kvm, assigned_dev); | ||
258 | } | ||
259 | } | ||
260 | |||
261 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
262 | struct kvm_assigned_dev_kernel *dev) | ||
263 | { | ||
264 | dev->host_irq = dev->dev->irq; | ||
265 | /* Even though this is PCI, we don't want to use shared | ||
266 | * interrupts. Sharing host devices with guest-assigned devices | ||
267 | * on the same interrupt line is not a happy situation: there | ||
268 | * are going to be long delays in accepting, acking, etc. | ||
269 | */ | ||
270 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
271 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
272 | return -EIO; | ||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | #ifdef __KVM_HAVE_MSI | ||
277 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
278 | struct kvm_assigned_dev_kernel *dev) | ||
279 | { | ||
280 | int r; | ||
281 | |||
282 | if (!dev->dev->msi_enabled) { | ||
283 | r = pci_enable_msi(dev->dev); | ||
284 | if (r) | ||
285 | return r; | ||
286 | } | ||
287 | |||
288 | dev->host_irq = dev->dev->irq; | ||
289 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
290 | "kvm_assigned_msi_device", (void *)dev)) { | ||
291 | pci_disable_msi(dev->dev); | ||
292 | return -EIO; | ||
293 | } | ||
294 | |||
295 | return 0; | ||
296 | } | ||
297 | #endif | ||
298 | |||
299 | #ifdef __KVM_HAVE_MSIX | ||
300 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
301 | struct kvm_assigned_dev_kernel *dev) | ||
302 | { | ||
303 | int i, r = -EINVAL; | ||
304 | |||
305 | /* host_msix_entries and guest_msix_entries should have been | ||
306 | * initialized */ | ||
307 | if (dev->entries_nr == 0) | ||
308 | return r; | ||
309 | |||
310 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
311 | if (r) | ||
312 | return r; | ||
313 | |||
314 | for (i = 0; i < dev->entries_nr; i++) { | ||
315 | r = request_irq(dev->host_msix_entries[i].vector, | ||
316 | kvm_assigned_dev_intr, 0, | ||
317 | "kvm_assigned_msix_device", | ||
318 | (void *)dev); | ||
319 | /* FIXME: free requested_irq's on failure */ | ||
320 | if (r) | ||
321 | return r; | ||
322 | } | ||
323 | |||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | #endif | ||
328 | |||
329 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
330 | struct kvm_assigned_dev_kernel *dev, | ||
331 | struct kvm_assigned_irq *irq) | ||
332 | { | ||
333 | dev->guest_irq = irq->guest_irq; | ||
334 | dev->ack_notifier.gsi = irq->guest_irq; | ||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | #ifdef __KVM_HAVE_MSI | ||
339 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
340 | struct kvm_assigned_dev_kernel *dev, | ||
341 | struct kvm_assigned_irq *irq) | ||
342 | { | ||
343 | dev->guest_irq = irq->guest_irq; | ||
344 | dev->ack_notifier.gsi = -1; | ||
345 | dev->host_irq_disabled = false; | ||
346 | return 0; | ||
347 | } | ||
348 | #endif | ||
349 | |||
350 | #ifdef __KVM_HAVE_MSIX | ||
351 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
352 | struct kvm_assigned_dev_kernel *dev, | ||
353 | struct kvm_assigned_irq *irq) | ||
354 | { | ||
355 | dev->guest_irq = irq->guest_irq; | ||
356 | dev->ack_notifier.gsi = -1; | ||
357 | dev->host_irq_disabled = false; | ||
358 | return 0; | ||
359 | } | ||
360 | #endif | ||
361 | |||
362 | static int assign_host_irq(struct kvm *kvm, | ||
363 | struct kvm_assigned_dev_kernel *dev, | ||
364 | __u32 host_irq_type) | ||
365 | { | ||
366 | int r = -EEXIST; | ||
367 | |||
368 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
369 | return r; | ||
370 | |||
371 | switch (host_irq_type) { | ||
372 | case KVM_DEV_IRQ_HOST_INTX: | ||
373 | r = assigned_device_enable_host_intx(kvm, dev); | ||
374 | break; | ||
375 | #ifdef __KVM_HAVE_MSI | ||
376 | case KVM_DEV_IRQ_HOST_MSI: | ||
377 | r = assigned_device_enable_host_msi(kvm, dev); | ||
378 | break; | ||
379 | #endif | ||
380 | #ifdef __KVM_HAVE_MSIX | ||
381 | case KVM_DEV_IRQ_HOST_MSIX: | ||
382 | r = assigned_device_enable_host_msix(kvm, dev); | ||
383 | break; | ||
384 | #endif | ||
385 | default: | ||
386 | r = -EINVAL; | ||
387 | } | ||
388 | |||
389 | if (!r) | ||
390 | dev->irq_requested_type |= host_irq_type; | ||
391 | |||
392 | return r; | ||
393 | } | ||
394 | |||
395 | static int assign_guest_irq(struct kvm *kvm, | ||
396 | struct kvm_assigned_dev_kernel *dev, | ||
397 | struct kvm_assigned_irq *irq, | ||
398 | unsigned long guest_irq_type) | ||
399 | { | ||
400 | int id; | ||
401 | int r = -EEXIST; | ||
402 | |||
403 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
404 | return r; | ||
405 | |||
406 | id = kvm_request_irq_source_id(kvm); | ||
407 | if (id < 0) | ||
408 | return id; | ||
409 | |||
410 | dev->irq_source_id = id; | ||
411 | |||
412 | switch (guest_irq_type) { | ||
413 | case KVM_DEV_IRQ_GUEST_INTX: | ||
414 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
415 | break; | ||
416 | #ifdef __KVM_HAVE_MSI | ||
417 | case KVM_DEV_IRQ_GUEST_MSI: | ||
418 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
419 | break; | ||
420 | #endif | ||
421 | #ifdef __KVM_HAVE_MSIX | ||
422 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
423 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
424 | break; | ||
425 | #endif | ||
426 | default: | ||
427 | r = -EINVAL; | ||
428 | } | ||
429 | |||
430 | if (!r) { | ||
431 | dev->irq_requested_type |= guest_irq_type; | ||
432 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
433 | } else | ||
434 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
435 | |||
436 | return r; | ||
437 | } | ||
438 | |||
439 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
440 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
441 | struct kvm_assigned_irq *assigned_irq) | ||
442 | { | ||
443 | int r = -EINVAL; | ||
444 | struct kvm_assigned_dev_kernel *match; | ||
445 | unsigned long host_irq_type, guest_irq_type; | ||
446 | |||
447 | if (!capable(CAP_SYS_RAWIO)) | ||
448 | return -EPERM; | ||
449 | |||
450 | if (!irqchip_in_kernel(kvm)) | ||
451 | return r; | ||
452 | |||
453 | mutex_lock(&kvm->lock); | ||
454 | r = -ENODEV; | ||
455 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
456 | assigned_irq->assigned_dev_id); | ||
457 | if (!match) | ||
458 | goto out; | ||
459 | |||
460 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
461 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
462 | |||
463 | r = -EINVAL; | ||
464 | /* can only assign one type at a time */ | ||
465 | if (hweight_long(host_irq_type) > 1) | ||
466 | goto out; | ||
467 | if (hweight_long(guest_irq_type) > 1) | ||
468 | goto out; | ||
469 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
470 | goto out; | ||
471 | |||
472 | r = 0; | ||
473 | if (host_irq_type) | ||
474 | r = assign_host_irq(kvm, match, host_irq_type); | ||
475 | if (r) | ||
476 | goto out; | ||
477 | |||
478 | if (guest_irq_type) | ||
479 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
480 | out: | ||
481 | mutex_unlock(&kvm->lock); | ||
482 | return r; | ||
483 | } | ||
484 | |||
485 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
486 | struct kvm_assigned_irq | ||
487 | *assigned_irq) | ||
488 | { | ||
489 | int r = -ENODEV; | ||
490 | struct kvm_assigned_dev_kernel *match; | ||
491 | |||
492 | mutex_lock(&kvm->lock); | ||
493 | |||
494 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
495 | assigned_irq->assigned_dev_id); | ||
496 | if (!match) | ||
497 | goto out; | ||
498 | |||
499 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
500 | out: | ||
501 | mutex_unlock(&kvm->lock); | ||
502 | return r; | ||
503 | } | ||
504 | |||
505 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
506 | struct kvm_assigned_pci_dev *assigned_dev) | ||
507 | { | ||
508 | int r = 0, idx; | ||
509 | struct kvm_assigned_dev_kernel *match; | ||
510 | struct pci_dev *dev; | ||
511 | |||
512 | mutex_lock(&kvm->lock); | ||
513 | idx = srcu_read_lock(&kvm->srcu); | ||
514 | |||
515 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
516 | assigned_dev->assigned_dev_id); | ||
517 | if (match) { | ||
518 | /* device already assigned */ | ||
519 | r = -EEXIST; | ||
520 | goto out; | ||
521 | } | ||
522 | |||
523 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
524 | if (match == NULL) { | ||
525 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
526 | __func__); | ||
527 | r = -ENOMEM; | ||
528 | goto out; | ||
529 | } | ||
530 | dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, | ||
531 | assigned_dev->busnr, | ||
532 | assigned_dev->devfn); | ||
533 | if (!dev) { | ||
534 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
535 | r = -EINVAL; | ||
536 | goto out_free; | ||
537 | } | ||
538 | if (pci_enable_device(dev)) { | ||
539 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
540 | r = -EBUSY; | ||
541 | goto out_put; | ||
542 | } | ||
543 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
544 | if (r) { | ||
545 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
546 | __func__); | ||
547 | goto out_disable; | ||
548 | } | ||
549 | |||
550 | pci_reset_function(dev); | ||
551 | |||
552 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
553 | match->host_segnr = assigned_dev->segnr; | ||
554 | match->host_busnr = assigned_dev->busnr; | ||
555 | match->host_devfn = assigned_dev->devfn; | ||
556 | match->flags = assigned_dev->flags; | ||
557 | match->dev = dev; | ||
558 | spin_lock_init(&match->assigned_dev_lock); | ||
559 | match->irq_source_id = -1; | ||
560 | match->kvm = kvm; | ||
561 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
562 | INIT_WORK(&match->interrupt_work, | ||
563 | kvm_assigned_dev_interrupt_work_handler); | ||
564 | |||
565 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
566 | |||
567 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
568 | if (!kvm->arch.iommu_domain) { | ||
569 | r = kvm_iommu_map_guest(kvm); | ||
570 | if (r) | ||
571 | goto out_list_del; | ||
572 | } | ||
573 | r = kvm_assign_device(kvm, match); | ||
574 | if (r) | ||
575 | goto out_list_del; | ||
576 | } | ||
577 | |||
578 | out: | ||
579 | srcu_read_unlock(&kvm->srcu, idx); | ||
580 | mutex_unlock(&kvm->lock); | ||
581 | return r; | ||
582 | out_list_del: | ||
583 | list_del(&match->list); | ||
584 | pci_release_regions(dev); | ||
585 | out_disable: | ||
586 | pci_disable_device(dev); | ||
587 | out_put: | ||
588 | pci_dev_put(dev); | ||
589 | out_free: | ||
590 | kfree(match); | ||
591 | srcu_read_unlock(&kvm->srcu, idx); | ||
592 | mutex_unlock(&kvm->lock); | ||
593 | return r; | ||
594 | } | ||
595 | |||
596 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
597 | struct kvm_assigned_pci_dev *assigned_dev) | ||
598 | { | ||
599 | int r = 0; | ||
600 | struct kvm_assigned_dev_kernel *match; | ||
601 | |||
602 | mutex_lock(&kvm->lock); | ||
603 | |||
604 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
605 | assigned_dev->assigned_dev_id); | ||
606 | if (!match) { | ||
607 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
608 | "so cannot be deassigned\n", __func__); | ||
609 | r = -EINVAL; | ||
610 | goto out; | ||
611 | } | ||
612 | |||
613 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
614 | kvm_deassign_device(kvm, match); | ||
615 | |||
616 | kvm_free_assigned_device(kvm, match); | ||
617 | |||
618 | out: | ||
619 | mutex_unlock(&kvm->lock); | ||
620 | return r; | ||
621 | } | ||
622 | |||
623 | |||
624 | #ifdef __KVM_HAVE_MSIX | ||
625 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
626 | struct kvm_assigned_msix_nr *entry_nr) | ||
627 | { | ||
628 | int r = 0; | ||
629 | struct kvm_assigned_dev_kernel *adev; | ||
630 | |||
631 | mutex_lock(&kvm->lock); | ||
632 | |||
633 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
634 | entry_nr->assigned_dev_id); | ||
635 | if (!adev) { | ||
636 | r = -EINVAL; | ||
637 | goto msix_nr_out; | ||
638 | } | ||
639 | |||
640 | if (adev->entries_nr == 0) { | ||
641 | adev->entries_nr = entry_nr->entry_nr; | ||
642 | if (adev->entries_nr == 0 || | ||
643 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
644 | r = -EINVAL; | ||
645 | goto msix_nr_out; | ||
646 | } | ||
647 | |||
648 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
649 | entry_nr->entry_nr, | ||
650 | GFP_KERNEL); | ||
651 | if (!adev->host_msix_entries) { | ||
652 | r = -ENOMEM; | ||
653 | goto msix_nr_out; | ||
654 | } | ||
655 | adev->guest_msix_entries = kzalloc( | ||
656 | sizeof(struct kvm_guest_msix_entry) * | ||
657 | entry_nr->entry_nr, GFP_KERNEL); | ||
658 | if (!adev->guest_msix_entries) { | ||
659 | kfree(adev->host_msix_entries); | ||
660 | r = -ENOMEM; | ||
661 | goto msix_nr_out; | ||
662 | } | ||
663 | } else /* Not allowed set MSI-X number twice */ | ||
664 | r = -EINVAL; | ||
665 | msix_nr_out: | ||
666 | mutex_unlock(&kvm->lock); | ||
667 | return r; | ||
668 | } | ||
669 | |||
670 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
671 | struct kvm_assigned_msix_entry *entry) | ||
672 | { | ||
673 | int r = 0, i; | ||
674 | struct kvm_assigned_dev_kernel *adev; | ||
675 | |||
676 | mutex_lock(&kvm->lock); | ||
677 | |||
678 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
679 | entry->assigned_dev_id); | ||
680 | |||
681 | if (!adev) { | ||
682 | r = -EINVAL; | ||
683 | goto msix_entry_out; | ||
684 | } | ||
685 | |||
686 | for (i = 0; i < adev->entries_nr; i++) | ||
687 | if (adev->guest_msix_entries[i].vector == 0 || | ||
688 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
689 | adev->guest_msix_entries[i].entry = entry->entry; | ||
690 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
691 | adev->host_msix_entries[i].entry = entry->entry; | ||
692 | break; | ||
693 | } | ||
694 | if (i == adev->entries_nr) { | ||
695 | r = -ENOSPC; | ||
696 | goto msix_entry_out; | ||
697 | } | ||
698 | |||
699 | msix_entry_out: | ||
700 | mutex_unlock(&kvm->lock); | ||
701 | |||
702 | return r; | ||
703 | } | ||
704 | #endif | ||
705 | |||
706 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
707 | unsigned long arg) | ||
708 | { | ||
709 | void __user *argp = (void __user *)arg; | ||
710 | int r = -ENOTTY; | ||
711 | |||
712 | switch (ioctl) { | ||
713 | case KVM_ASSIGN_PCI_DEVICE: { | ||
714 | struct kvm_assigned_pci_dev assigned_dev; | ||
715 | |||
716 | r = -EFAULT; | ||
717 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
718 | goto out; | ||
719 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
720 | if (r) | ||
721 | goto out; | ||
722 | break; | ||
723 | } | ||
724 | case KVM_ASSIGN_IRQ: { | ||
725 | r = -EOPNOTSUPP; | ||
726 | break; | ||
727 | } | ||
728 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
729 | case KVM_ASSIGN_DEV_IRQ: { | ||
730 | struct kvm_assigned_irq assigned_irq; | ||
731 | |||
732 | r = -EFAULT; | ||
733 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
734 | goto out; | ||
735 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
736 | if (r) | ||
737 | goto out; | ||
738 | break; | ||
739 | } | ||
740 | case KVM_DEASSIGN_DEV_IRQ: { | ||
741 | struct kvm_assigned_irq assigned_irq; | ||
742 | |||
743 | r = -EFAULT; | ||
744 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
745 | goto out; | ||
746 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
747 | if (r) | ||
748 | goto out; | ||
749 | break; | ||
750 | } | ||
751 | #endif | ||
752 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
753 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
754 | struct kvm_assigned_pci_dev assigned_dev; | ||
755 | |||
756 | r = -EFAULT; | ||
757 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
758 | goto out; | ||
759 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
760 | if (r) | ||
761 | goto out; | ||
762 | break; | ||
763 | } | ||
764 | #endif | ||
765 | #ifdef KVM_CAP_IRQ_ROUTING | ||
766 | case KVM_SET_GSI_ROUTING: { | ||
767 | struct kvm_irq_routing routing; | ||
768 | struct kvm_irq_routing __user *urouting; | ||
769 | struct kvm_irq_routing_entry *entries; | ||
770 | |||
771 | r = -EFAULT; | ||
772 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
773 | goto out; | ||
774 | r = -EINVAL; | ||
775 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
776 | goto out; | ||
777 | if (routing.flags) | ||
778 | goto out; | ||
779 | r = -ENOMEM; | ||
780 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
781 | if (!entries) | ||
782 | goto out; | ||
783 | r = -EFAULT; | ||
784 | urouting = argp; | ||
785 | if (copy_from_user(entries, urouting->entries, | ||
786 | routing.nr * sizeof(*entries))) | ||
787 | goto out_free_irq_routing; | ||
788 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
789 | routing.flags); | ||
790 | out_free_irq_routing: | ||
791 | vfree(entries); | ||
792 | break; | ||
793 | } | ||
794 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
795 | #ifdef __KVM_HAVE_MSIX | ||
796 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
797 | struct kvm_assigned_msix_nr entry_nr; | ||
798 | r = -EFAULT; | ||
799 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
800 | goto out; | ||
801 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
802 | if (r) | ||
803 | goto out; | ||
804 | break; | ||
805 | } | ||
806 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
807 | struct kvm_assigned_msix_entry entry; | ||
808 | r = -EFAULT; | ||
809 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
810 | goto out; | ||
811 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
812 | if (r) | ||
813 | goto out; | ||
814 | break; | ||
815 | } | ||
816 | #endif | ||
817 | } | ||
818 | out: | ||
819 | return r; | ||
820 | } | ||
821 | |||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 04d69cd7049b..36e258029649 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include "iodev.h" | 10 | #include "iodev.h" |
11 | 11 | ||
12 | #include <linux/kvm_host.h> | 12 | #include <linux/kvm_host.h> |
13 | #include <linux/slab.h> | ||
13 | #include <linux/kvm.h> | 14 | #include <linux/kvm.h> |
14 | 15 | ||
15 | #include "coalesced_mmio.h" | 16 | #include "coalesced_mmio.h" |
@@ -92,41 +93,64 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { | |||
92 | int kvm_coalesced_mmio_init(struct kvm *kvm) | 93 | int kvm_coalesced_mmio_init(struct kvm *kvm) |
93 | { | 94 | { |
94 | struct kvm_coalesced_mmio_dev *dev; | 95 | struct kvm_coalesced_mmio_dev *dev; |
96 | struct page *page; | ||
95 | int ret; | 97 | int ret; |
96 | 98 | ||
99 | ret = -ENOMEM; | ||
100 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
101 | if (!page) | ||
102 | goto out_err; | ||
103 | kvm->coalesced_mmio_ring = page_address(page); | ||
104 | |||
105 | ret = -ENOMEM; | ||
97 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | 106 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); |
98 | if (!dev) | 107 | if (!dev) |
99 | return -ENOMEM; | 108 | goto out_free_page; |
100 | spin_lock_init(&dev->lock); | 109 | spin_lock_init(&dev->lock); |
101 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); | 110 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); |
102 | dev->kvm = kvm; | 111 | dev->kvm = kvm; |
103 | kvm->coalesced_mmio_dev = dev; | 112 | kvm->coalesced_mmio_dev = dev; |
104 | 113 | ||
105 | ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); | 114 | mutex_lock(&kvm->slots_lock); |
115 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); | ||
116 | mutex_unlock(&kvm->slots_lock); | ||
106 | if (ret < 0) | 117 | if (ret < 0) |
107 | kfree(dev); | 118 | goto out_free_dev; |
119 | |||
120 | return ret; | ||
108 | 121 | ||
122 | out_free_dev: | ||
123 | kfree(dev); | ||
124 | out_free_page: | ||
125 | __free_page(page); | ||
126 | out_err: | ||
109 | return ret; | 127 | return ret; |
110 | } | 128 | } |
111 | 129 | ||
130 | void kvm_coalesced_mmio_free(struct kvm *kvm) | ||
131 | { | ||
132 | if (kvm->coalesced_mmio_ring) | ||
133 | free_page((unsigned long)kvm->coalesced_mmio_ring); | ||
134 | } | ||
135 | |||
112 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 136 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
113 | struct kvm_coalesced_mmio_zone *zone) | 137 | struct kvm_coalesced_mmio_zone *zone) |
114 | { | 138 | { |
115 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | 139 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; |
116 | 140 | ||
117 | if (dev == NULL) | 141 | if (dev == NULL) |
118 | return -EINVAL; | 142 | return -EINVAL; |
119 | 143 | ||
120 | down_write(&kvm->slots_lock); | 144 | mutex_lock(&kvm->slots_lock); |
121 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 145 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { |
122 | up_write(&kvm->slots_lock); | 146 | mutex_unlock(&kvm->slots_lock); |
123 | return -ENOBUFS; | 147 | return -ENOBUFS; |
124 | } | 148 | } |
125 | 149 | ||
126 | dev->zone[dev->nb_zones] = *zone; | 150 | dev->zone[dev->nb_zones] = *zone; |
127 | dev->nb_zones++; | 151 | dev->nb_zones++; |
128 | 152 | ||
129 | up_write(&kvm->slots_lock); | 153 | mutex_unlock(&kvm->slots_lock); |
130 | return 0; | 154 | return 0; |
131 | } | 155 | } |
132 | 156 | ||
@@ -140,10 +164,10 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
140 | if (dev == NULL) | 164 | if (dev == NULL) |
141 | return -EINVAL; | 165 | return -EINVAL; |
142 | 166 | ||
143 | down_write(&kvm->slots_lock); | 167 | mutex_lock(&kvm->slots_lock); |
144 | 168 | ||
145 | i = dev->nb_zones; | 169 | i = dev->nb_zones; |
146 | while(i) { | 170 | while (i) { |
147 | z = &dev->zone[i - 1]; | 171 | z = &dev->zone[i - 1]; |
148 | 172 | ||
149 | /* unregister all zones | 173 | /* unregister all zones |
@@ -158,7 +182,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
158 | i--; | 182 | i--; |
159 | } | 183 | } |
160 | 184 | ||
161 | up_write(&kvm->slots_lock); | 185 | mutex_unlock(&kvm->slots_lock); |
162 | 186 | ||
163 | return 0; | 187 | return 0; |
164 | } | 188 | } |
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 4b49f27fa31e..8a5959e3535f 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h | |||
@@ -1,3 +1,6 @@ | |||
1 | #ifndef __KVM_COALESCED_MMIO_H__ | ||
2 | #define __KVM_COALESCED_MMIO_H__ | ||
3 | |||
1 | /* | 4 | /* |
2 | * KVM coalesced MMIO | 5 | * KVM coalesced MMIO |
3 | * | 6 | * |
@@ -7,6 +10,8 @@ | |||
7 | * | 10 | * |
8 | */ | 11 | */ |
9 | 12 | ||
13 | #ifdef CONFIG_KVM_MMIO | ||
14 | |||
10 | #define KVM_COALESCED_MMIO_ZONE_MAX 100 | 15 | #define KVM_COALESCED_MMIO_ZONE_MAX 100 |
11 | 16 | ||
12 | struct kvm_coalesced_mmio_dev { | 17 | struct kvm_coalesced_mmio_dev { |
@@ -18,7 +23,17 @@ struct kvm_coalesced_mmio_dev { | |||
18 | }; | 23 | }; |
19 | 24 | ||
20 | int kvm_coalesced_mmio_init(struct kvm *kvm); | 25 | int kvm_coalesced_mmio_init(struct kvm *kvm); |
26 | void kvm_coalesced_mmio_free(struct kvm *kvm); | ||
21 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 27 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
22 | struct kvm_coalesced_mmio_zone *zone); | 28 | struct kvm_coalesced_mmio_zone *zone); |
23 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | 29 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, |
24 | struct kvm_coalesced_mmio_zone *zone); | 30 | struct kvm_coalesced_mmio_zone *zone); |
31 | |||
32 | #else | ||
33 | |||
34 | static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; } | ||
35 | static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { } | ||
36 | |||
37 | #endif | ||
38 | |||
39 | #endif | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index bb4ebd89b9ff..b81f0ebbaaad 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/list.h> | 30 | #include <linux/list.h> |
31 | #include <linux/eventfd.h> | 31 | #include <linux/eventfd.h> |
32 | #include <linux/kernel.h> | 32 | #include <linux/kernel.h> |
33 | #include <linux/slab.h> | ||
33 | 34 | ||
34 | #include "iodev.h" | 35 | #include "iodev.h" |
35 | 36 | ||
@@ -47,7 +48,6 @@ struct _irqfd { | |||
47 | int gsi; | 48 | int gsi; |
48 | struct list_head list; | 49 | struct list_head list; |
49 | poll_table pt; | 50 | poll_table pt; |
50 | wait_queue_head_t *wqh; | ||
51 | wait_queue_t wait; | 51 | wait_queue_t wait; |
52 | struct work_struct inject; | 52 | struct work_struct inject; |
53 | struct work_struct shutdown; | 53 | struct work_struct shutdown; |
@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work) | |||
61 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 61 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); |
62 | struct kvm *kvm = irqfd->kvm; | 62 | struct kvm *kvm = irqfd->kvm; |
63 | 63 | ||
64 | mutex_lock(&kvm->irq_lock); | ||
65 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | 64 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); |
66 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | 65 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); |
67 | mutex_unlock(&kvm->irq_lock); | ||
68 | } | 66 | } |
69 | 67 | ||
70 | /* | 68 | /* |
@@ -74,12 +72,13 @@ static void | |||
74 | irqfd_shutdown(struct work_struct *work) | 72 | irqfd_shutdown(struct work_struct *work) |
75 | { | 73 | { |
76 | struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); | 74 | struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); |
75 | u64 cnt; | ||
77 | 76 | ||
78 | /* | 77 | /* |
79 | * Synchronize with the wait-queue and unhook ourselves to prevent | 78 | * Synchronize with the wait-queue and unhook ourselves to prevent |
80 | * further events. | 79 | * further events. |
81 | */ | 80 | */ |
82 | remove_wait_queue(irqfd->wqh, &irqfd->wait); | 81 | eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); |
83 | 82 | ||
84 | /* | 83 | /* |
85 | * We know no new events will be scheduled at this point, so block | 84 | * We know no new events will be scheduled at this point, so block |
@@ -160,15 +159,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | |||
160 | poll_table *pt) | 159 | poll_table *pt) |
161 | { | 160 | { |
162 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); | 161 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); |
163 | |||
164 | irqfd->wqh = wqh; | ||
165 | add_wait_queue(wqh, &irqfd->wait); | 162 | add_wait_queue(wqh, &irqfd->wait); |
166 | } | 163 | } |
167 | 164 | ||
168 | static int | 165 | static int |
169 | kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | 166 | kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) |
170 | { | 167 | { |
171 | struct _irqfd *irqfd; | 168 | struct _irqfd *irqfd, *tmp; |
172 | struct file *file = NULL; | 169 | struct file *file = NULL; |
173 | struct eventfd_ctx *eventfd = NULL; | 170 | struct eventfd_ctx *eventfd = NULL; |
174 | int ret; | 171 | int ret; |
@@ -205,9 +202,20 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | |||
205 | init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); | 202 | init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); |
206 | init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); | 203 | init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); |
207 | 204 | ||
205 | spin_lock_irq(&kvm->irqfds.lock); | ||
206 | |||
207 | ret = 0; | ||
208 | list_for_each_entry(tmp, &kvm->irqfds.items, list) { | ||
209 | if (irqfd->eventfd != tmp->eventfd) | ||
210 | continue; | ||
211 | /* This fd is used for another irq already. */ | ||
212 | ret = -EBUSY; | ||
213 | spin_unlock_irq(&kvm->irqfds.lock); | ||
214 | goto fail; | ||
215 | } | ||
216 | |||
208 | events = file->f_op->poll(file, &irqfd->pt); | 217 | events = file->f_op->poll(file, &irqfd->pt); |
209 | 218 | ||
210 | spin_lock_irq(&kvm->irqfds.lock); | ||
211 | list_add_tail(&irqfd->list, &kvm->irqfds.items); | 219 | list_add_tail(&irqfd->list, &kvm->irqfds.items); |
212 | spin_unlock_irq(&kvm->irqfds.lock); | 220 | spin_unlock_irq(&kvm->irqfds.lock); |
213 | 221 | ||
@@ -453,7 +461,7 @@ static int | |||
453 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 461 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
454 | { | 462 | { |
455 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | 463 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; |
456 | struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; | 464 | enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; |
457 | struct _ioeventfd *p; | 465 | struct _ioeventfd *p; |
458 | struct eventfd_ctx *eventfd; | 466 | struct eventfd_ctx *eventfd; |
459 | int ret; | 467 | int ret; |
@@ -498,7 +506,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
498 | else | 506 | else |
499 | p->wildcard = true; | 507 | p->wildcard = true; |
500 | 508 | ||
501 | down_write(&kvm->slots_lock); | 509 | mutex_lock(&kvm->slots_lock); |
502 | 510 | ||
503 | /* Verify that there isnt a match already */ | 511 | /* Verify that there isnt a match already */ |
504 | if (ioeventfd_check_collision(kvm, p)) { | 512 | if (ioeventfd_check_collision(kvm, p)) { |
@@ -508,18 +516,18 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
508 | 516 | ||
509 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); | 517 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); |
510 | 518 | ||
511 | ret = __kvm_io_bus_register_dev(bus, &p->dev); | 519 | ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); |
512 | if (ret < 0) | 520 | if (ret < 0) |
513 | goto unlock_fail; | 521 | goto unlock_fail; |
514 | 522 | ||
515 | list_add_tail(&p->list, &kvm->ioeventfds); | 523 | list_add_tail(&p->list, &kvm->ioeventfds); |
516 | 524 | ||
517 | up_write(&kvm->slots_lock); | 525 | mutex_unlock(&kvm->slots_lock); |
518 | 526 | ||
519 | return 0; | 527 | return 0; |
520 | 528 | ||
521 | unlock_fail: | 529 | unlock_fail: |
522 | up_write(&kvm->slots_lock); | 530 | mutex_unlock(&kvm->slots_lock); |
523 | 531 | ||
524 | fail: | 532 | fail: |
525 | kfree(p); | 533 | kfree(p); |
@@ -532,7 +540,7 @@ static int | |||
532 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 540 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
533 | { | 541 | { |
534 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | 542 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; |
535 | struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; | 543 | enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; |
536 | struct _ioeventfd *p, *tmp; | 544 | struct _ioeventfd *p, *tmp; |
537 | struct eventfd_ctx *eventfd; | 545 | struct eventfd_ctx *eventfd; |
538 | int ret = -ENOENT; | 546 | int ret = -ENOENT; |
@@ -541,7 +549,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
541 | if (IS_ERR(eventfd)) | 549 | if (IS_ERR(eventfd)) |
542 | return PTR_ERR(eventfd); | 550 | return PTR_ERR(eventfd); |
543 | 551 | ||
544 | down_write(&kvm->slots_lock); | 552 | mutex_lock(&kvm->slots_lock); |
545 | 553 | ||
546 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { | 554 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { |
547 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); | 555 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); |
@@ -555,13 +563,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
555 | if (!p->wildcard && p->datamatch != args->datamatch) | 563 | if (!p->wildcard && p->datamatch != args->datamatch) |
556 | continue; | 564 | continue; |
557 | 565 | ||
558 | __kvm_io_bus_unregister_dev(bus, &p->dev); | 566 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); |
559 | ioeventfd_release(p); | 567 | ioeventfd_release(p); |
560 | ret = 0; | 568 | ret = 0; |
561 | break; | 569 | break; |
562 | } | 570 | } |
563 | 571 | ||
564 | up_write(&kvm->slots_lock); | 572 | mutex_unlock(&kvm->slots_lock); |
565 | 573 | ||
566 | eventfd_ctx_put(eventfd); | 574 | eventfd_ctx_put(eventfd); |
567 | 575 | ||
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 9fe140bb38ec..7c79c1d76d0c 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/smp.h> | 33 | #include <linux/smp.h> |
34 | #include <linux/hrtimer.h> | 34 | #include <linux/hrtimer.h> |
35 | #include <linux/io.h> | 35 | #include <linux/io.h> |
36 | #include <linux/slab.h> | ||
36 | #include <asm/processor.h> | 37 | #include <asm/processor.h> |
37 | #include <asm/page.h> | 38 | #include <asm/page.h> |
38 | #include <asm/current.h> | 39 | #include <asm/current.h> |
@@ -100,6 +101,19 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) | |||
100 | return injected; | 101 | return injected; |
101 | } | 102 | } |
102 | 103 | ||
104 | static void update_handled_vectors(struct kvm_ioapic *ioapic) | ||
105 | { | ||
106 | DECLARE_BITMAP(handled_vectors, 256); | ||
107 | int i; | ||
108 | |||
109 | memset(handled_vectors, 0, sizeof(handled_vectors)); | ||
110 | for (i = 0; i < IOAPIC_NUM_PINS; ++i) | ||
111 | __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); | ||
112 | memcpy(ioapic->handled_vectors, handled_vectors, | ||
113 | sizeof(handled_vectors)); | ||
114 | smp_wmb(); | ||
115 | } | ||
116 | |||
103 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | 117 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) |
104 | { | 118 | { |
105 | unsigned index; | 119 | unsigned index; |
@@ -134,6 +148,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
134 | e->bits |= (u32) val; | 148 | e->bits |= (u32) val; |
135 | e->fields.remote_irr = 0; | 149 | e->fields.remote_irr = 0; |
136 | } | 150 | } |
151 | update_handled_vectors(ioapic); | ||
137 | mask_after = e->fields.mask; | 152 | mask_after = e->fields.mask; |
138 | if (mask_before != mask_after) | 153 | if (mask_before != mask_after) |
139 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); | 154 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); |
@@ -182,6 +197,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
182 | union kvm_ioapic_redirect_entry entry; | 197 | union kvm_ioapic_redirect_entry entry; |
183 | int ret = 1; | 198 | int ret = 1; |
184 | 199 | ||
200 | spin_lock(&ioapic->lock); | ||
185 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { | 201 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { |
186 | entry = ioapic->redirtbl[irq]; | 202 | entry = ioapic->redirtbl[irq]; |
187 | level ^= entry.fields.polarity; | 203 | level ^= entry.fields.polarity; |
@@ -198,34 +214,54 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
198 | } | 214 | } |
199 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | 215 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); |
200 | } | 216 | } |
217 | spin_unlock(&ioapic->lock); | ||
218 | |||
201 | return ret; | 219 | return ret; |
202 | } | 220 | } |
203 | 221 | ||
204 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, | 222 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, |
205 | int trigger_mode) | 223 | int trigger_mode) |
206 | { | 224 | { |
207 | union kvm_ioapic_redirect_entry *ent; | 225 | int i; |
226 | |||
227 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { | ||
228 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; | ||
208 | 229 | ||
209 | ent = &ioapic->redirtbl[pin]; | 230 | if (ent->fields.vector != vector) |
231 | continue; | ||
210 | 232 | ||
211 | kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); | 233 | /* |
234 | * We are dropping lock while calling ack notifiers because ack | ||
235 | * notifier callbacks for assigned devices call into IOAPIC | ||
236 | * recursively. Since remote_irr is cleared only after call | ||
237 | * to notifiers if the same vector will be delivered while lock | ||
238 | * is dropped it will be put into irr and will be delivered | ||
239 | * after ack notifier returns. | ||
240 | */ | ||
241 | spin_unlock(&ioapic->lock); | ||
242 | kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); | ||
243 | spin_lock(&ioapic->lock); | ||
244 | |||
245 | if (trigger_mode != IOAPIC_LEVEL_TRIG) | ||
246 | continue; | ||
212 | 247 | ||
213 | if (trigger_mode == IOAPIC_LEVEL_TRIG) { | ||
214 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | 248 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); |
215 | ent->fields.remote_irr = 0; | 249 | ent->fields.remote_irr = 0; |
216 | if (!ent->fields.mask && (ioapic->irr & (1 << pin))) | 250 | if (!ent->fields.mask && (ioapic->irr & (1 << i))) |
217 | ioapic_service(ioapic, pin); | 251 | ioapic_service(ioapic, i); |
218 | } | 252 | } |
219 | } | 253 | } |
220 | 254 | ||
221 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) | 255 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) |
222 | { | 256 | { |
223 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 257 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
224 | int i; | ||
225 | 258 | ||
226 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | 259 | smp_rmb(); |
227 | if (ioapic->redirtbl[i].fields.vector == vector) | 260 | if (!test_bit(vector, ioapic->handled_vectors)) |
228 | __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); | 261 | return; |
262 | spin_lock(&ioapic->lock); | ||
263 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); | ||
264 | spin_unlock(&ioapic->lock); | ||
229 | } | 265 | } |
230 | 266 | ||
231 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) | 267 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) |
@@ -250,8 +286,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
250 | ioapic_debug("addr %lx\n", (unsigned long)addr); | 286 | ioapic_debug("addr %lx\n", (unsigned long)addr); |
251 | ASSERT(!(addr & 0xf)); /* check alignment */ | 287 | ASSERT(!(addr & 0xf)); /* check alignment */ |
252 | 288 | ||
253 | mutex_lock(&ioapic->kvm->irq_lock); | ||
254 | addr &= 0xff; | 289 | addr &= 0xff; |
290 | spin_lock(&ioapic->lock); | ||
255 | switch (addr) { | 291 | switch (addr) { |
256 | case IOAPIC_REG_SELECT: | 292 | case IOAPIC_REG_SELECT: |
257 | result = ioapic->ioregsel; | 293 | result = ioapic->ioregsel; |
@@ -265,6 +301,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
265 | result = 0; | 301 | result = 0; |
266 | break; | 302 | break; |
267 | } | 303 | } |
304 | spin_unlock(&ioapic->lock); | ||
305 | |||
268 | switch (len) { | 306 | switch (len) { |
269 | case 8: | 307 | case 8: |
270 | *(u64 *) val = result; | 308 | *(u64 *) val = result; |
@@ -277,7 +315,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
277 | default: | 315 | default: |
278 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); | 316 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); |
279 | } | 317 | } |
280 | mutex_unlock(&ioapic->kvm->irq_lock); | ||
281 | return 0; | 318 | return 0; |
282 | } | 319 | } |
283 | 320 | ||
@@ -293,15 +330,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
293 | (void*)addr, len, val); | 330 | (void*)addr, len, val); |
294 | ASSERT(!(addr & 0xf)); /* check alignment */ | 331 | ASSERT(!(addr & 0xf)); /* check alignment */ |
295 | 332 | ||
296 | mutex_lock(&ioapic->kvm->irq_lock); | ||
297 | if (len == 4 || len == 8) | 333 | if (len == 4 || len == 8) |
298 | data = *(u32 *) val; | 334 | data = *(u32 *) val; |
299 | else { | 335 | else { |
300 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); | 336 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); |
301 | goto unlock; | 337 | return 0; |
302 | } | 338 | } |
303 | 339 | ||
304 | addr &= 0xff; | 340 | addr &= 0xff; |
341 | spin_lock(&ioapic->lock); | ||
305 | switch (addr) { | 342 | switch (addr) { |
306 | case IOAPIC_REG_SELECT: | 343 | case IOAPIC_REG_SELECT: |
307 | ioapic->ioregsel = data; | 344 | ioapic->ioregsel = data; |
@@ -312,15 +349,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
312 | break; | 349 | break; |
313 | #ifdef CONFIG_IA64 | 350 | #ifdef CONFIG_IA64 |
314 | case IOAPIC_REG_EOI: | 351 | case IOAPIC_REG_EOI: |
315 | kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); | 352 | __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); |
316 | break; | 353 | break; |
317 | #endif | 354 | #endif |
318 | 355 | ||
319 | default: | 356 | default: |
320 | break; | 357 | break; |
321 | } | 358 | } |
322 | unlock: | 359 | spin_unlock(&ioapic->lock); |
323 | mutex_unlock(&ioapic->kvm->irq_lock); | ||
324 | return 0; | 360 | return 0; |
325 | } | 361 | } |
326 | 362 | ||
@@ -334,6 +370,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
334 | ioapic->ioregsel = 0; | 370 | ioapic->ioregsel = 0; |
335 | ioapic->irr = 0; | 371 | ioapic->irr = 0; |
336 | ioapic->id = 0; | 372 | ioapic->id = 0; |
373 | update_handled_vectors(ioapic); | ||
337 | } | 374 | } |
338 | 375 | ||
339 | static const struct kvm_io_device_ops ioapic_mmio_ops = { | 376 | static const struct kvm_io_device_ops ioapic_mmio_ops = { |
@@ -349,14 +386,54 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
349 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); | 386 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); |
350 | if (!ioapic) | 387 | if (!ioapic) |
351 | return -ENOMEM; | 388 | return -ENOMEM; |
389 | spin_lock_init(&ioapic->lock); | ||
352 | kvm->arch.vioapic = ioapic; | 390 | kvm->arch.vioapic = ioapic; |
353 | kvm_ioapic_reset(ioapic); | 391 | kvm_ioapic_reset(ioapic); |
354 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 392 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
355 | ioapic->kvm = kvm; | 393 | ioapic->kvm = kvm; |
356 | ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); | 394 | mutex_lock(&kvm->slots_lock); |
357 | if (ret < 0) | 395 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); |
396 | mutex_unlock(&kvm->slots_lock); | ||
397 | if (ret < 0) { | ||
398 | kvm->arch.vioapic = NULL; | ||
358 | kfree(ioapic); | 399 | kfree(ioapic); |
400 | } | ||
359 | 401 | ||
360 | return ret; | 402 | return ret; |
361 | } | 403 | } |
362 | 404 | ||
405 | void kvm_ioapic_destroy(struct kvm *kvm) | ||
406 | { | ||
407 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | ||
408 | |||
409 | if (ioapic) { | ||
410 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | ||
411 | kvm->arch.vioapic = NULL; | ||
412 | kfree(ioapic); | ||
413 | } | ||
414 | } | ||
415 | |||
416 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | ||
417 | { | ||
418 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | ||
419 | if (!ioapic) | ||
420 | return -EINVAL; | ||
421 | |||
422 | spin_lock(&ioapic->lock); | ||
423 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); | ||
424 | spin_unlock(&ioapic->lock); | ||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | ||
429 | { | ||
430 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | ||
431 | if (!ioapic) | ||
432 | return -EINVAL; | ||
433 | |||
434 | spin_lock(&ioapic->lock); | ||
435 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | ||
436 | update_handled_vectors(ioapic); | ||
437 | spin_unlock(&ioapic->lock); | ||
438 | return 0; | ||
439 | } | ||
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7080b713c160..0b190c34ccc3 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
@@ -41,9 +41,12 @@ struct kvm_ioapic { | |||
41 | u32 irr; | 41 | u32 irr; |
42 | u32 pad; | 42 | u32 pad; |
43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; | 43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; |
44 | unsigned long irq_states[IOAPIC_NUM_PINS]; | ||
44 | struct kvm_io_device dev; | 45 | struct kvm_io_device dev; |
45 | struct kvm *kvm; | 46 | struct kvm *kvm; |
46 | void (*ack_notifier)(void *opaque, int irq); | 47 | void (*ack_notifier)(void *opaque, int irq); |
48 | spinlock_t lock; | ||
49 | DECLARE_BITMAP(handled_vectors, 256); | ||
47 | }; | 50 | }; |
48 | 51 | ||
49 | #ifdef DEBUG | 52 | #ifdef DEBUG |
@@ -69,8 +72,12 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
69 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); | 72 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); |
70 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); | 73 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); |
71 | int kvm_ioapic_init(struct kvm *kvm); | 74 | int kvm_ioapic_init(struct kvm *kvm); |
75 | void kvm_ioapic_destroy(struct kvm *kvm); | ||
72 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | 76 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); |
73 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 77 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
74 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 78 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
75 | struct kvm_lapic_irq *irq); | 79 | struct kvm_lapic_irq *irq); |
80 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | ||
81 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | ||
82 | |||
76 | #endif | 83 | #endif |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 15147583abd1..80fd3ad3b2de 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -32,10 +32,10 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); | |||
32 | static void kvm_iommu_put_pages(struct kvm *kvm, | 32 | static void kvm_iommu_put_pages(struct kvm *kvm, |
33 | gfn_t base_gfn, unsigned long npages); | 33 | gfn_t base_gfn, unsigned long npages); |
34 | 34 | ||
35 | int kvm_iommu_map_pages(struct kvm *kvm, | 35 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) |
36 | gfn_t base_gfn, unsigned long npages) | ||
37 | { | 36 | { |
38 | gfn_t gfn = base_gfn; | 37 | gfn_t gfn = slot->base_gfn; |
38 | unsigned long npages = slot->npages; | ||
39 | pfn_t pfn; | 39 | pfn_t pfn; |
40 | int i, r = 0; | 40 | int i, r = 0; |
41 | struct iommu_domain *domain = kvm->arch.iommu_domain; | 41 | struct iommu_domain *domain = kvm->arch.iommu_domain; |
@@ -54,7 +54,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, | |||
54 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) | 54 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) |
55 | continue; | 55 | continue; |
56 | 56 | ||
57 | pfn = gfn_to_pfn(kvm, gfn); | 57 | pfn = gfn_to_pfn_memslot(kvm, slot, gfn); |
58 | r = iommu_map_range(domain, | 58 | r = iommu_map_range(domain, |
59 | gfn_to_gpa(gfn), | 59 | gfn_to_gpa(gfn), |
60 | pfn_to_hpa(pfn), | 60 | pfn_to_hpa(pfn), |
@@ -69,17 +69,19 @@ int kvm_iommu_map_pages(struct kvm *kvm, | |||
69 | return 0; | 69 | return 0; |
70 | 70 | ||
71 | unmap_pages: | 71 | unmap_pages: |
72 | kvm_iommu_put_pages(kvm, base_gfn, i); | 72 | kvm_iommu_put_pages(kvm, slot->base_gfn, i); |
73 | return r; | 73 | return r; |
74 | } | 74 | } |
75 | 75 | ||
76 | static int kvm_iommu_map_memslots(struct kvm *kvm) | 76 | static int kvm_iommu_map_memslots(struct kvm *kvm) |
77 | { | 77 | { |
78 | int i, r = 0; | 78 | int i, r = 0; |
79 | struct kvm_memslots *slots; | ||
80 | |||
81 | slots = rcu_dereference(kvm->memslots); | ||
79 | 82 | ||
80 | for (i = 0; i < kvm->nmemslots; i++) { | 83 | for (i = 0; i < slots->nmemslots; i++) { |
81 | r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, | 84 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); |
82 | kvm->memslots[i].npages); | ||
83 | if (r) | 85 | if (r) |
84 | break; | 86 | break; |
85 | } | 87 | } |
@@ -104,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm, | |||
104 | 106 | ||
105 | r = iommu_attach_device(domain, &pdev->dev); | 107 | r = iommu_attach_device(domain, &pdev->dev); |
106 | if (r) { | 108 | if (r) { |
107 | printk(KERN_ERR "assign device %x:%x.%x failed", | 109 | printk(KERN_ERR "assign device %x:%x:%x.%x failed", |
110 | pci_domain_nr(pdev->bus), | ||
108 | pdev->bus->number, | 111 | pdev->bus->number, |
109 | PCI_SLOT(pdev->devfn), | 112 | PCI_SLOT(pdev->devfn), |
110 | PCI_FUNC(pdev->devfn)); | 113 | PCI_FUNC(pdev->devfn)); |
@@ -125,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm, | |||
125 | goto out_unmap; | 128 | goto out_unmap; |
126 | } | 129 | } |
127 | 130 | ||
128 | printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", | 131 | printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", |
132 | assigned_dev->host_segnr, | ||
129 | assigned_dev->host_busnr, | 133 | assigned_dev->host_busnr, |
130 | PCI_SLOT(assigned_dev->host_devfn), | 134 | PCI_SLOT(assigned_dev->host_devfn), |
131 | PCI_FUNC(assigned_dev->host_devfn)); | 135 | PCI_FUNC(assigned_dev->host_devfn)); |
@@ -152,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm, | |||
152 | 156 | ||
153 | iommu_detach_device(domain, &pdev->dev); | 157 | iommu_detach_device(domain, &pdev->dev); |
154 | 158 | ||
155 | printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", | 159 | printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", |
160 | assigned_dev->host_segnr, | ||
156 | assigned_dev->host_busnr, | 161 | assigned_dev->host_busnr, |
157 | PCI_SLOT(assigned_dev->host_devfn), | 162 | PCI_SLOT(assigned_dev->host_devfn), |
158 | PCI_FUNC(assigned_dev->host_devfn)); | 163 | PCI_FUNC(assigned_dev->host_devfn)); |
@@ -210,10 +215,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
210 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) | 215 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) |
211 | { | 216 | { |
212 | int i; | 217 | int i; |
218 | struct kvm_memslots *slots; | ||
219 | |||
220 | slots = rcu_dereference(kvm->memslots); | ||
213 | 221 | ||
214 | for (i = 0; i < kvm->nmemslots; i++) { | 222 | for (i = 0; i < slots->nmemslots; i++) { |
215 | kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, | 223 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, |
216 | kvm->memslots[i].npages); | 224 | slots->memslots[i].npages); |
217 | } | 225 | } |
218 | 226 | ||
219 | return 0; | 227 | return 0; |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 001663ff401a..a0e88809e45e 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -20,6 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
23 | #include <linux/slab.h> | ||
23 | #include <trace/events/kvm.h> | 24 | #include <trace/events/kvm.h> |
24 | 25 | ||
25 | #include <asm/msidef.h> | 26 | #include <asm/msidef.h> |
@@ -31,20 +32,39 @@ | |||
31 | 32 | ||
32 | #include "ioapic.h" | 33 | #include "ioapic.h" |
33 | 34 | ||
35 | static inline int kvm_irq_line_state(unsigned long *irq_state, | ||
36 | int irq_source_id, int level) | ||
37 | { | ||
38 | /* Logical OR for level trig interrupt */ | ||
39 | if (level) | ||
40 | set_bit(irq_source_id, irq_state); | ||
41 | else | ||
42 | clear_bit(irq_source_id, irq_state); | ||
43 | |||
44 | return !!(*irq_state); | ||
45 | } | ||
46 | |||
34 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | 47 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
35 | struct kvm *kvm, int level) | 48 | struct kvm *kvm, int irq_source_id, int level) |
36 | { | 49 | { |
37 | #ifdef CONFIG_X86 | 50 | #ifdef CONFIG_X86 |
38 | return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); | 51 | struct kvm_pic *pic = pic_irqchip(kvm); |
52 | level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], | ||
53 | irq_source_id, level); | ||
54 | return kvm_pic_set_irq(pic, e->irqchip.pin, level); | ||
39 | #else | 55 | #else |
40 | return -1; | 56 | return -1; |
41 | #endif | 57 | #endif |
42 | } | 58 | } |
43 | 59 | ||
44 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | 60 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, |
45 | struct kvm *kvm, int level) | 61 | struct kvm *kvm, int irq_source_id, int level) |
46 | { | 62 | { |
47 | return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); | 63 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
64 | level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], | ||
65 | irq_source_id, level); | ||
66 | |||
67 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); | ||
48 | } | 68 | } |
49 | 69 | ||
50 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) | 70 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) |
@@ -63,8 +83,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
63 | int i, r = -1; | 83 | int i, r = -1; |
64 | struct kvm_vcpu *vcpu, *lowest = NULL; | 84 | struct kvm_vcpu *vcpu, *lowest = NULL; |
65 | 85 | ||
66 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
67 | |||
68 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | 86 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && |
69 | kvm_is_dm_lowest_prio(irq)) | 87 | kvm_is_dm_lowest_prio(irq)) |
70 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | 88 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); |
@@ -96,10 +114,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
96 | } | 114 | } |
97 | 115 | ||
98 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 116 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
99 | struct kvm *kvm, int level) | 117 | struct kvm *kvm, int irq_source_id, int level) |
100 | { | 118 | { |
101 | struct kvm_lapic_irq irq; | 119 | struct kvm_lapic_irq irq; |
102 | 120 | ||
121 | if (!level) | ||
122 | return -1; | ||
123 | |||
103 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); | 124 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); |
104 | 125 | ||
105 | irq.dest_id = (e->msi.address_lo & | 126 | irq.dest_id = (e->msi.address_lo & |
@@ -116,78 +137,67 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
116 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | 137 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); |
117 | } | 138 | } |
118 | 139 | ||
119 | /* This should be called with the kvm->irq_lock mutex held | 140 | /* |
120 | * Return value: | 141 | * Return value: |
121 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | 142 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) |
122 | * = 0 Interrupt was coalesced (previous irq is still pending) | 143 | * = 0 Interrupt was coalesced (previous irq is still pending) |
123 | * > 0 Number of CPUs interrupt was delivered to | 144 | * > 0 Number of CPUs interrupt was delivered to |
124 | */ | 145 | */ |
125 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) | 146 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) |
126 | { | 147 | { |
127 | struct kvm_kernel_irq_routing_entry *e; | 148 | struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; |
128 | unsigned long *irq_state, sig_level; | 149 | int ret = -1, i = 0; |
129 | int ret = -1; | 150 | struct kvm_irq_routing_table *irq_rt; |
151 | struct hlist_node *n; | ||
130 | 152 | ||
131 | trace_kvm_set_irq(irq, level, irq_source_id); | 153 | trace_kvm_set_irq(irq, level, irq_source_id); |
132 | 154 | ||
133 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
134 | |||
135 | if (irq < KVM_IOAPIC_NUM_PINS) { | ||
136 | irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; | ||
137 | |||
138 | /* Logical OR for level trig interrupt */ | ||
139 | if (level) | ||
140 | set_bit(irq_source_id, irq_state); | ||
141 | else | ||
142 | clear_bit(irq_source_id, irq_state); | ||
143 | sig_level = !!(*irq_state); | ||
144 | } else if (!level) | ||
145 | return ret; | ||
146 | else /* Deal with MSI/MSI-X */ | ||
147 | sig_level = 1; | ||
148 | |||
149 | /* Not possible to detect if the guest uses the PIC or the | 155 | /* Not possible to detect if the guest uses the PIC or the |
150 | * IOAPIC. So set the bit in both. The guest will ignore | 156 | * IOAPIC. So set the bit in both. The guest will ignore |
151 | * writes to the unused one. | 157 | * writes to the unused one. |
152 | */ | 158 | */ |
153 | list_for_each_entry(e, &kvm->irq_routing, link) | 159 | rcu_read_lock(); |
154 | if (e->gsi == irq) { | 160 | irq_rt = rcu_dereference(kvm->irq_routing); |
155 | int r = e->set(e, kvm, sig_level); | 161 | if (irq < irq_rt->nr_rt_entries) |
156 | if (r < 0) | 162 | hlist_for_each_entry(e, n, &irq_rt->map[irq], link) |
157 | continue; | 163 | irq_set[i++] = *e; |
164 | rcu_read_unlock(); | ||
165 | |||
166 | while(i--) { | ||
167 | int r; | ||
168 | r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); | ||
169 | if (r < 0) | ||
170 | continue; | ||
171 | |||
172 | ret = r + ((ret < 0) ? 0 : ret); | ||
173 | } | ||
158 | 174 | ||
159 | ret = r + ((ret < 0) ? 0 : ret); | ||
160 | } | ||
161 | return ret; | 175 | return ret; |
162 | } | 176 | } |
163 | 177 | ||
164 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 178 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) |
165 | { | 179 | { |
166 | struct kvm_kernel_irq_routing_entry *e; | ||
167 | struct kvm_irq_ack_notifier *kian; | 180 | struct kvm_irq_ack_notifier *kian; |
168 | struct hlist_node *n; | 181 | struct hlist_node *n; |
169 | unsigned gsi = pin; | 182 | int gsi; |
170 | 183 | ||
171 | trace_kvm_ack_irq(irqchip, pin); | 184 | trace_kvm_ack_irq(irqchip, pin); |
172 | 185 | ||
173 | list_for_each_entry(e, &kvm->irq_routing, link) | 186 | rcu_read_lock(); |
174 | if (e->type == KVM_IRQ_ROUTING_IRQCHIP && | 187 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; |
175 | e->irqchip.irqchip == irqchip && | 188 | if (gsi != -1) |
176 | e->irqchip.pin == pin) { | 189 | hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, |
177 | gsi = e->gsi; | 190 | link) |
178 | break; | 191 | if (kian->gsi == gsi) |
179 | } | 192 | kian->irq_acked(kian); |
180 | 193 | rcu_read_unlock(); | |
181 | hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) | ||
182 | if (kian->gsi == gsi) | ||
183 | kian->irq_acked(kian); | ||
184 | } | 194 | } |
185 | 195 | ||
186 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 196 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
187 | struct kvm_irq_ack_notifier *kian) | 197 | struct kvm_irq_ack_notifier *kian) |
188 | { | 198 | { |
189 | mutex_lock(&kvm->irq_lock); | 199 | mutex_lock(&kvm->irq_lock); |
190 | hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); | 200 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); |
191 | mutex_unlock(&kvm->irq_lock); | 201 | mutex_unlock(&kvm->irq_lock); |
192 | } | 202 | } |
193 | 203 | ||
@@ -195,8 +205,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | |||
195 | struct kvm_irq_ack_notifier *kian) | 205 | struct kvm_irq_ack_notifier *kian) |
196 | { | 206 | { |
197 | mutex_lock(&kvm->irq_lock); | 207 | mutex_lock(&kvm->irq_lock); |
198 | hlist_del_init(&kian->link); | 208 | hlist_del_init_rcu(&kian->link); |
199 | mutex_unlock(&kvm->irq_lock); | 209 | mutex_unlock(&kvm->irq_lock); |
210 | synchronize_rcu(); | ||
200 | } | 211 | } |
201 | 212 | ||
202 | int kvm_request_irq_source_id(struct kvm *kvm) | 213 | int kvm_request_irq_source_id(struct kvm *kvm) |
@@ -205,16 +216,17 @@ int kvm_request_irq_source_id(struct kvm *kvm) | |||
205 | int irq_source_id; | 216 | int irq_source_id; |
206 | 217 | ||
207 | mutex_lock(&kvm->irq_lock); | 218 | mutex_lock(&kvm->irq_lock); |
208 | irq_source_id = find_first_zero_bit(bitmap, | 219 | irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG); |
209 | sizeof(kvm->arch.irq_sources_bitmap)); | ||
210 | 220 | ||
211 | if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 221 | if (irq_source_id >= BITS_PER_LONG) { |
212 | printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); | 222 | printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); |
213 | return -EFAULT; | 223 | irq_source_id = -EFAULT; |
224 | goto unlock; | ||
214 | } | 225 | } |
215 | 226 | ||
216 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 227 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
217 | set_bit(irq_source_id, bitmap); | 228 | set_bit(irq_source_id, bitmap); |
229 | unlock: | ||
218 | mutex_unlock(&kvm->irq_lock); | 230 | mutex_unlock(&kvm->irq_lock); |
219 | 231 | ||
220 | return irq_source_id; | 232 | return irq_source_id; |
@@ -228,13 +240,23 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
228 | 240 | ||
229 | mutex_lock(&kvm->irq_lock); | 241 | mutex_lock(&kvm->irq_lock); |
230 | if (irq_source_id < 0 || | 242 | if (irq_source_id < 0 || |
231 | irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 243 | irq_source_id >= BITS_PER_LONG) { |
232 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); | 244 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); |
233 | return; | 245 | goto unlock; |
234 | } | 246 | } |
235 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) | ||
236 | clear_bit(irq_source_id, &kvm->arch.irq_states[i]); | ||
237 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); | 247 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); |
248 | if (!irqchip_in_kernel(kvm)) | ||
249 | goto unlock; | ||
250 | |||
251 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { | ||
252 | clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); | ||
253 | if (i >= 16) | ||
254 | continue; | ||
255 | #ifdef CONFIG_X86 | ||
256 | clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); | ||
257 | #endif | ||
258 | } | ||
259 | unlock: | ||
238 | mutex_unlock(&kvm->irq_lock); | 260 | mutex_unlock(&kvm->irq_lock); |
239 | } | 261 | } |
240 | 262 | ||
@@ -243,7 +265,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | |||
243 | { | 265 | { |
244 | mutex_lock(&kvm->irq_lock); | 266 | mutex_lock(&kvm->irq_lock); |
245 | kimn->irq = irq; | 267 | kimn->irq = irq; |
246 | hlist_add_head(&kimn->link, &kvm->mask_notifier_list); | 268 | hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list); |
247 | mutex_unlock(&kvm->irq_lock); | 269 | mutex_unlock(&kvm->irq_lock); |
248 | } | 270 | } |
249 | 271 | ||
@@ -251,8 +273,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |||
251 | struct kvm_irq_mask_notifier *kimn) | 273 | struct kvm_irq_mask_notifier *kimn) |
252 | { | 274 | { |
253 | mutex_lock(&kvm->irq_lock); | 275 | mutex_lock(&kvm->irq_lock); |
254 | hlist_del(&kimn->link); | 276 | hlist_del_rcu(&kimn->link); |
255 | mutex_unlock(&kvm->irq_lock); | 277 | mutex_unlock(&kvm->irq_lock); |
278 | synchronize_rcu(); | ||
256 | } | 279 | } |
257 | 280 | ||
258 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | 281 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) |
@@ -260,33 +283,38 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | |||
260 | struct kvm_irq_mask_notifier *kimn; | 283 | struct kvm_irq_mask_notifier *kimn; |
261 | struct hlist_node *n; | 284 | struct hlist_node *n; |
262 | 285 | ||
263 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | 286 | rcu_read_lock(); |
264 | 287 | hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) | |
265 | hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) | ||
266 | if (kimn->irq == irq) | 288 | if (kimn->irq == irq) |
267 | kimn->func(kimn, mask); | 289 | kimn->func(kimn, mask); |
268 | } | 290 | rcu_read_unlock(); |
269 | |||
270 | static void __kvm_free_irq_routing(struct list_head *irq_routing) | ||
271 | { | ||
272 | struct kvm_kernel_irq_routing_entry *e, *n; | ||
273 | |||
274 | list_for_each_entry_safe(e, n, irq_routing, link) | ||
275 | kfree(e); | ||
276 | } | 291 | } |
277 | 292 | ||
278 | void kvm_free_irq_routing(struct kvm *kvm) | 293 | void kvm_free_irq_routing(struct kvm *kvm) |
279 | { | 294 | { |
280 | mutex_lock(&kvm->irq_lock); | 295 | /* Called only during vm destruction. Nobody can use the pointer |
281 | __kvm_free_irq_routing(&kvm->irq_routing); | 296 | at this stage */ |
282 | mutex_unlock(&kvm->irq_lock); | 297 | kfree(kvm->irq_routing); |
283 | } | 298 | } |
284 | 299 | ||
285 | static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | 300 | static int setup_routing_entry(struct kvm_irq_routing_table *rt, |
301 | struct kvm_kernel_irq_routing_entry *e, | ||
286 | const struct kvm_irq_routing_entry *ue) | 302 | const struct kvm_irq_routing_entry *ue) |
287 | { | 303 | { |
288 | int r = -EINVAL; | 304 | int r = -EINVAL; |
289 | int delta; | 305 | int delta; |
306 | unsigned max_pin; | ||
307 | struct kvm_kernel_irq_routing_entry *ei; | ||
308 | struct hlist_node *n; | ||
309 | |||
310 | /* | ||
311 | * Do not allow GSI to be mapped to the same irqchip more than once. | ||
312 | * Allow only one to one mapping between GSI and MSI. | ||
313 | */ | ||
314 | hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) | ||
315 | if (ei->type == KVM_IRQ_ROUTING_MSI || | ||
316 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | ||
317 | return r; | ||
290 | 318 | ||
291 | e->gsi = ue->gsi; | 319 | e->gsi = ue->gsi; |
292 | e->type = ue->type; | 320 | e->type = ue->type; |
@@ -296,12 +324,15 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
296 | switch (ue->u.irqchip.irqchip) { | 324 | switch (ue->u.irqchip.irqchip) { |
297 | case KVM_IRQCHIP_PIC_MASTER: | 325 | case KVM_IRQCHIP_PIC_MASTER: |
298 | e->set = kvm_set_pic_irq; | 326 | e->set = kvm_set_pic_irq; |
327 | max_pin = 16; | ||
299 | break; | 328 | break; |
300 | case KVM_IRQCHIP_PIC_SLAVE: | 329 | case KVM_IRQCHIP_PIC_SLAVE: |
301 | e->set = kvm_set_pic_irq; | 330 | e->set = kvm_set_pic_irq; |
331 | max_pin = 16; | ||
302 | delta = 8; | 332 | delta = 8; |
303 | break; | 333 | break; |
304 | case KVM_IRQCHIP_IOAPIC: | 334 | case KVM_IRQCHIP_IOAPIC: |
335 | max_pin = KVM_IOAPIC_NUM_PINS; | ||
305 | e->set = kvm_set_ioapic_irq; | 336 | e->set = kvm_set_ioapic_irq; |
306 | break; | 337 | break; |
307 | default: | 338 | default: |
@@ -309,6 +340,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
309 | } | 340 | } |
310 | e->irqchip.irqchip = ue->u.irqchip.irqchip; | 341 | e->irqchip.irqchip = ue->u.irqchip.irqchip; |
311 | e->irqchip.pin = ue->u.irqchip.pin + delta; | 342 | e->irqchip.pin = ue->u.irqchip.pin + delta; |
343 | if (e->irqchip.pin >= max_pin) | ||
344 | goto out; | ||
345 | rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; | ||
312 | break; | 346 | break; |
313 | case KVM_IRQ_ROUTING_MSI: | 347 | case KVM_IRQ_ROUTING_MSI: |
314 | e->set = kvm_set_msi; | 348 | e->set = kvm_set_msi; |
@@ -319,6 +353,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
319 | default: | 353 | default: |
320 | goto out; | 354 | goto out; |
321 | } | 355 | } |
356 | |||
357 | hlist_add_head(&e->link, &rt->map[e->gsi]); | ||
322 | r = 0; | 358 | r = 0; |
323 | out: | 359 | out: |
324 | return r; | 360 | return r; |
@@ -330,43 +366,53 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
330 | unsigned nr, | 366 | unsigned nr, |
331 | unsigned flags) | 367 | unsigned flags) |
332 | { | 368 | { |
333 | struct list_head irq_list = LIST_HEAD_INIT(irq_list); | 369 | struct kvm_irq_routing_table *new, *old; |
334 | struct list_head tmp = LIST_HEAD_INIT(tmp); | 370 | u32 i, j, nr_rt_entries = 0; |
335 | struct kvm_kernel_irq_routing_entry *e = NULL; | ||
336 | unsigned i; | ||
337 | int r; | 371 | int r; |
338 | 372 | ||
339 | for (i = 0; i < nr; ++i) { | 373 | for (i = 0; i < nr; ++i) { |
374 | if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) | ||
375 | return -EINVAL; | ||
376 | nr_rt_entries = max(nr_rt_entries, ue[i].gsi); | ||
377 | } | ||
378 | |||
379 | nr_rt_entries += 1; | ||
380 | |||
381 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) | ||
382 | + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), | ||
383 | GFP_KERNEL); | ||
384 | |||
385 | if (!new) | ||
386 | return -ENOMEM; | ||
387 | |||
388 | new->rt_entries = (void *)&new->map[nr_rt_entries]; | ||
389 | |||
390 | new->nr_rt_entries = nr_rt_entries; | ||
391 | for (i = 0; i < 3; i++) | ||
392 | for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) | ||
393 | new->chip[i][j] = -1; | ||
394 | |||
395 | for (i = 0; i < nr; ++i) { | ||
340 | r = -EINVAL; | 396 | r = -EINVAL; |
341 | if (ue->gsi >= KVM_MAX_IRQ_ROUTES) | ||
342 | goto out; | ||
343 | if (ue->flags) | 397 | if (ue->flags) |
344 | goto out; | 398 | goto out; |
345 | r = -ENOMEM; | 399 | r = setup_routing_entry(new, &new->rt_entries[i], ue); |
346 | e = kzalloc(sizeof(*e), GFP_KERNEL); | ||
347 | if (!e) | ||
348 | goto out; | ||
349 | r = setup_routing_entry(e, ue); | ||
350 | if (r) | 400 | if (r) |
351 | goto out; | 401 | goto out; |
352 | ++ue; | 402 | ++ue; |
353 | list_add(&e->link, &irq_list); | ||
354 | e = NULL; | ||
355 | } | 403 | } |
356 | 404 | ||
357 | mutex_lock(&kvm->irq_lock); | 405 | mutex_lock(&kvm->irq_lock); |
358 | list_splice(&kvm->irq_routing, &tmp); | 406 | old = kvm->irq_routing; |
359 | INIT_LIST_HEAD(&kvm->irq_routing); | 407 | rcu_assign_pointer(kvm->irq_routing, new); |
360 | list_splice(&irq_list, &kvm->irq_routing); | ||
361 | INIT_LIST_HEAD(&irq_list); | ||
362 | list_splice(&tmp, &irq_list); | ||
363 | mutex_unlock(&kvm->irq_lock); | 408 | mutex_unlock(&kvm->irq_lock); |
409 | synchronize_rcu(); | ||
364 | 410 | ||
411 | new = old; | ||
365 | r = 0; | 412 | r = 0; |
366 | 413 | ||
367 | out: | 414 | out: |
368 | kfree(e); | 415 | kfree(new); |
369 | __kvm_free_irq_routing(&irq_list); | ||
370 | return r; | 416 | return r; |
371 | } | 417 | } |
372 | 418 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7495ce347344..c82ae2492634 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
25 | #include <linux/gfp.h> | ||
26 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
27 | #include <linux/miscdevice.h> | 26 | #include <linux/miscdevice.h> |
28 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
@@ -43,21 +42,18 @@ | |||
43 | #include <linux/swap.h> | 42 | #include <linux/swap.h> |
44 | #include <linux/bitops.h> | 43 | #include <linux/bitops.h> |
45 | #include <linux/spinlock.h> | 44 | #include <linux/spinlock.h> |
45 | #include <linux/compat.h> | ||
46 | #include <linux/srcu.h> | ||
47 | #include <linux/hugetlb.h> | ||
48 | #include <linux/slab.h> | ||
46 | 49 | ||
47 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
48 | #include <asm/io.h> | 51 | #include <asm/io.h> |
49 | #include <asm/uaccess.h> | 52 | #include <asm/uaccess.h> |
50 | #include <asm/pgtable.h> | 53 | #include <asm/pgtable.h> |
54 | #include <asm-generic/bitops/le.h> | ||
51 | 55 | ||
52 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
53 | #include "coalesced_mmio.h" | 56 | #include "coalesced_mmio.h" |
54 | #endif | ||
55 | |||
56 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
57 | #include <linux/pci.h> | ||
58 | #include <linux/interrupt.h> | ||
59 | #include "irq.h" | ||
60 | #endif | ||
61 | 57 | ||
62 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
63 | #include <trace/events/kvm.h> | 59 | #include <trace/events/kvm.h> |
@@ -68,13 +64,15 @@ MODULE_LICENSE("GPL"); | |||
68 | /* | 64 | /* |
69 | * Ordering of locks: | 65 | * Ordering of locks: |
70 | * | 66 | * |
71 | * kvm->slots_lock --> kvm->lock --> kvm->irq_lock | 67 | * kvm->lock --> kvm->slots_lock --> kvm->irq_lock |
72 | */ | 68 | */ |
73 | 69 | ||
74 | DEFINE_SPINLOCK(kvm_lock); | 70 | DEFINE_SPINLOCK(kvm_lock); |
75 | LIST_HEAD(vm_list); | 71 | LIST_HEAD(vm_list); |
76 | 72 | ||
77 | static cpumask_var_t cpus_hardware_enabled; | 73 | static cpumask_var_t cpus_hardware_enabled; |
74 | static int kvm_usage_count = 0; | ||
75 | static atomic_t hardware_enable_failed; | ||
78 | 76 | ||
79 | struct kmem_cache *kvm_vcpu_cache; | 77 | struct kmem_cache *kvm_vcpu_cache; |
80 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | 78 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); |
@@ -85,615 +83,15 @@ struct dentry *kvm_debugfs_dir; | |||
85 | 83 | ||
86 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 84 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
87 | unsigned long arg); | 85 | unsigned long arg); |
86 | static int hardware_enable_all(void); | ||
87 | static void hardware_disable_all(void); | ||
88 | |||
89 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); | ||
88 | 90 | ||
89 | static bool kvm_rebooting; | 91 | static bool kvm_rebooting; |
90 | 92 | ||
91 | static bool largepages_enabled = true; | 93 | static bool largepages_enabled = true; |
92 | 94 | ||
93 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
94 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
95 | int assigned_dev_id) | ||
96 | { | ||
97 | struct list_head *ptr; | ||
98 | struct kvm_assigned_dev_kernel *match; | ||
99 | |||
100 | list_for_each(ptr, head) { | ||
101 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
102 | if (match->assigned_dev_id == assigned_dev_id) | ||
103 | return match; | ||
104 | } | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
109 | *assigned_dev, int irq) | ||
110 | { | ||
111 | int i, index; | ||
112 | struct msix_entry *host_msix_entries; | ||
113 | |||
114 | host_msix_entries = assigned_dev->host_msix_entries; | ||
115 | |||
116 | index = -1; | ||
117 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
118 | if (irq == host_msix_entries[i].vector) { | ||
119 | index = i; | ||
120 | break; | ||
121 | } | ||
122 | if (index < 0) { | ||
123 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | return index; | ||
128 | } | ||
129 | |||
130 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
131 | { | ||
132 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
133 | struct kvm *kvm; | ||
134 | int i; | ||
135 | |||
136 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
137 | interrupt_work); | ||
138 | kvm = assigned_dev->kvm; | ||
139 | |||
140 | mutex_lock(&kvm->irq_lock); | ||
141 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
142 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
143 | struct kvm_guest_msix_entry *guest_entries = | ||
144 | assigned_dev->guest_msix_entries; | ||
145 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
146 | if (!(guest_entries[i].flags & | ||
147 | KVM_ASSIGNED_MSIX_PENDING)) | ||
148 | continue; | ||
149 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
150 | kvm_set_irq(assigned_dev->kvm, | ||
151 | assigned_dev->irq_source_id, | ||
152 | guest_entries[i].vector, 1); | ||
153 | } | ||
154 | } else | ||
155 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
156 | assigned_dev->guest_irq, 1); | ||
157 | |||
158 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
159 | mutex_unlock(&assigned_dev->kvm->irq_lock); | ||
160 | } | ||
161 | |||
162 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
163 | { | ||
164 | unsigned long flags; | ||
165 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
166 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
167 | |||
168 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
169 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
170 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
171 | if (index < 0) | ||
172 | goto out; | ||
173 | assigned_dev->guest_msix_entries[index].flags |= | ||
174 | KVM_ASSIGNED_MSIX_PENDING; | ||
175 | } | ||
176 | |||
177 | schedule_work(&assigned_dev->interrupt_work); | ||
178 | |||
179 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
180 | disable_irq_nosync(irq); | ||
181 | assigned_dev->host_irq_disabled = true; | ||
182 | } | ||
183 | |||
184 | out: | ||
185 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
186 | return IRQ_HANDLED; | ||
187 | } | ||
188 | |||
189 | /* Ack the irq line for an assigned device */ | ||
190 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
191 | { | ||
192 | struct kvm_assigned_dev_kernel *dev; | ||
193 | unsigned long flags; | ||
194 | |||
195 | if (kian->gsi == -1) | ||
196 | return; | ||
197 | |||
198 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
199 | ack_notifier); | ||
200 | |||
201 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
202 | |||
203 | /* The guest irq may be shared so this ack may be | ||
204 | * from another device. | ||
205 | */ | ||
206 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
207 | if (dev->host_irq_disabled) { | ||
208 | enable_irq(dev->host_irq); | ||
209 | dev->host_irq_disabled = false; | ||
210 | } | ||
211 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
212 | } | ||
213 | |||
214 | static void deassign_guest_irq(struct kvm *kvm, | ||
215 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
216 | { | ||
217 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | ||
218 | assigned_dev->ack_notifier.gsi = -1; | ||
219 | |||
220 | if (assigned_dev->irq_source_id != -1) | ||
221 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
222 | assigned_dev->irq_source_id = -1; | ||
223 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
224 | } | ||
225 | |||
226 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
227 | static void deassign_host_irq(struct kvm *kvm, | ||
228 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
229 | { | ||
230 | /* | ||
231 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
232 | * 1. work is scheduled, and then cancelled. | ||
233 | * 2. work callback is executed. | ||
234 | * | ||
235 | * The first one ensured that the irq is disabled and no more events | ||
236 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
237 | * for MSI). So we disable irq here to prevent further events. | ||
238 | * | ||
239 | * Notice this maybe result in nested disable if the interrupt type is | ||
240 | * INTx, but it's OK for we are going to free it. | ||
241 | * | ||
242 | * If this function is a part of VM destroy, please ensure that till | ||
243 | * now, the kvm state is still legal for probably we also have to wait | ||
244 | * interrupt_work done. | ||
245 | */ | ||
246 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
247 | int i; | ||
248 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
249 | disable_irq_nosync(assigned_dev-> | ||
250 | host_msix_entries[i].vector); | ||
251 | |||
252 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
253 | |||
254 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
255 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
256 | (void *)assigned_dev); | ||
257 | |||
258 | assigned_dev->entries_nr = 0; | ||
259 | kfree(assigned_dev->host_msix_entries); | ||
260 | kfree(assigned_dev->guest_msix_entries); | ||
261 | pci_disable_msix(assigned_dev->dev); | ||
262 | } else { | ||
263 | /* Deal with MSI and INTx */ | ||
264 | disable_irq_nosync(assigned_dev->host_irq); | ||
265 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
266 | |||
267 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
268 | |||
269 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
270 | pci_disable_msi(assigned_dev->dev); | ||
271 | } | ||
272 | |||
273 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
274 | } | ||
275 | |||
276 | static int kvm_deassign_irq(struct kvm *kvm, | ||
277 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
278 | unsigned long irq_requested_type) | ||
279 | { | ||
280 | unsigned long guest_irq_type, host_irq_type; | ||
281 | |||
282 | if (!irqchip_in_kernel(kvm)) | ||
283 | return -EINVAL; | ||
284 | /* no irq assignment to deassign */ | ||
285 | if (!assigned_dev->irq_requested_type) | ||
286 | return -ENXIO; | ||
287 | |||
288 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
289 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
290 | |||
291 | if (host_irq_type) | ||
292 | deassign_host_irq(kvm, assigned_dev); | ||
293 | if (guest_irq_type) | ||
294 | deassign_guest_irq(kvm, assigned_dev); | ||
295 | |||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
300 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
301 | { | ||
302 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
303 | } | ||
304 | |||
305 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
306 | struct kvm_assigned_dev_kernel | ||
307 | *assigned_dev) | ||
308 | { | ||
309 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
310 | |||
311 | pci_reset_function(assigned_dev->dev); | ||
312 | |||
313 | pci_release_regions(assigned_dev->dev); | ||
314 | pci_disable_device(assigned_dev->dev); | ||
315 | pci_dev_put(assigned_dev->dev); | ||
316 | |||
317 | list_del(&assigned_dev->list); | ||
318 | kfree(assigned_dev); | ||
319 | } | ||
320 | |||
321 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
322 | { | ||
323 | struct list_head *ptr, *ptr2; | ||
324 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
325 | |||
326 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
327 | assigned_dev = list_entry(ptr, | ||
328 | struct kvm_assigned_dev_kernel, | ||
329 | list); | ||
330 | |||
331 | kvm_free_assigned_device(kvm, assigned_dev); | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
336 | struct kvm_assigned_dev_kernel *dev) | ||
337 | { | ||
338 | dev->host_irq = dev->dev->irq; | ||
339 | /* Even though this is PCI, we don't want to use shared | ||
340 | * interrupts. Sharing host devices with guest-assigned devices | ||
341 | * on the same interrupt line is not a happy situation: there | ||
342 | * are going to be long delays in accepting, acking, etc. | ||
343 | */ | ||
344 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
345 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
346 | return -EIO; | ||
347 | return 0; | ||
348 | } | ||
349 | |||
350 | #ifdef __KVM_HAVE_MSI | ||
351 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
352 | struct kvm_assigned_dev_kernel *dev) | ||
353 | { | ||
354 | int r; | ||
355 | |||
356 | if (!dev->dev->msi_enabled) { | ||
357 | r = pci_enable_msi(dev->dev); | ||
358 | if (r) | ||
359 | return r; | ||
360 | } | ||
361 | |||
362 | dev->host_irq = dev->dev->irq; | ||
363 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
364 | "kvm_assigned_msi_device", (void *)dev)) { | ||
365 | pci_disable_msi(dev->dev); | ||
366 | return -EIO; | ||
367 | } | ||
368 | |||
369 | return 0; | ||
370 | } | ||
371 | #endif | ||
372 | |||
373 | #ifdef __KVM_HAVE_MSIX | ||
374 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
375 | struct kvm_assigned_dev_kernel *dev) | ||
376 | { | ||
377 | int i, r = -EINVAL; | ||
378 | |||
379 | /* host_msix_entries and guest_msix_entries should have been | ||
380 | * initialized */ | ||
381 | if (dev->entries_nr == 0) | ||
382 | return r; | ||
383 | |||
384 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
385 | if (r) | ||
386 | return r; | ||
387 | |||
388 | for (i = 0; i < dev->entries_nr; i++) { | ||
389 | r = request_irq(dev->host_msix_entries[i].vector, | ||
390 | kvm_assigned_dev_intr, 0, | ||
391 | "kvm_assigned_msix_device", | ||
392 | (void *)dev); | ||
393 | /* FIXME: free requested_irq's on failure */ | ||
394 | if (r) | ||
395 | return r; | ||
396 | } | ||
397 | |||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | #endif | ||
402 | |||
403 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
404 | struct kvm_assigned_dev_kernel *dev, | ||
405 | struct kvm_assigned_irq *irq) | ||
406 | { | ||
407 | dev->guest_irq = irq->guest_irq; | ||
408 | dev->ack_notifier.gsi = irq->guest_irq; | ||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | #ifdef __KVM_HAVE_MSI | ||
413 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
414 | struct kvm_assigned_dev_kernel *dev, | ||
415 | struct kvm_assigned_irq *irq) | ||
416 | { | ||
417 | dev->guest_irq = irq->guest_irq; | ||
418 | dev->ack_notifier.gsi = -1; | ||
419 | dev->host_irq_disabled = false; | ||
420 | return 0; | ||
421 | } | ||
422 | #endif | ||
423 | #ifdef __KVM_HAVE_MSIX | ||
424 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
425 | struct kvm_assigned_dev_kernel *dev, | ||
426 | struct kvm_assigned_irq *irq) | ||
427 | { | ||
428 | dev->guest_irq = irq->guest_irq; | ||
429 | dev->ack_notifier.gsi = -1; | ||
430 | dev->host_irq_disabled = false; | ||
431 | return 0; | ||
432 | } | ||
433 | #endif | ||
434 | |||
435 | static int assign_host_irq(struct kvm *kvm, | ||
436 | struct kvm_assigned_dev_kernel *dev, | ||
437 | __u32 host_irq_type) | ||
438 | { | ||
439 | int r = -EEXIST; | ||
440 | |||
441 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
442 | return r; | ||
443 | |||
444 | switch (host_irq_type) { | ||
445 | case KVM_DEV_IRQ_HOST_INTX: | ||
446 | r = assigned_device_enable_host_intx(kvm, dev); | ||
447 | break; | ||
448 | #ifdef __KVM_HAVE_MSI | ||
449 | case KVM_DEV_IRQ_HOST_MSI: | ||
450 | r = assigned_device_enable_host_msi(kvm, dev); | ||
451 | break; | ||
452 | #endif | ||
453 | #ifdef __KVM_HAVE_MSIX | ||
454 | case KVM_DEV_IRQ_HOST_MSIX: | ||
455 | r = assigned_device_enable_host_msix(kvm, dev); | ||
456 | break; | ||
457 | #endif | ||
458 | default: | ||
459 | r = -EINVAL; | ||
460 | } | ||
461 | |||
462 | if (!r) | ||
463 | dev->irq_requested_type |= host_irq_type; | ||
464 | |||
465 | return r; | ||
466 | } | ||
467 | |||
468 | static int assign_guest_irq(struct kvm *kvm, | ||
469 | struct kvm_assigned_dev_kernel *dev, | ||
470 | struct kvm_assigned_irq *irq, | ||
471 | unsigned long guest_irq_type) | ||
472 | { | ||
473 | int id; | ||
474 | int r = -EEXIST; | ||
475 | |||
476 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
477 | return r; | ||
478 | |||
479 | id = kvm_request_irq_source_id(kvm); | ||
480 | if (id < 0) | ||
481 | return id; | ||
482 | |||
483 | dev->irq_source_id = id; | ||
484 | |||
485 | switch (guest_irq_type) { | ||
486 | case KVM_DEV_IRQ_GUEST_INTX: | ||
487 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
488 | break; | ||
489 | #ifdef __KVM_HAVE_MSI | ||
490 | case KVM_DEV_IRQ_GUEST_MSI: | ||
491 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
492 | break; | ||
493 | #endif | ||
494 | #ifdef __KVM_HAVE_MSIX | ||
495 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
496 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
497 | break; | ||
498 | #endif | ||
499 | default: | ||
500 | r = -EINVAL; | ||
501 | } | ||
502 | |||
503 | if (!r) { | ||
504 | dev->irq_requested_type |= guest_irq_type; | ||
505 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
506 | } else | ||
507 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
508 | |||
509 | return r; | ||
510 | } | ||
511 | |||
512 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
513 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
514 | struct kvm_assigned_irq *assigned_irq) | ||
515 | { | ||
516 | int r = -EINVAL; | ||
517 | struct kvm_assigned_dev_kernel *match; | ||
518 | unsigned long host_irq_type, guest_irq_type; | ||
519 | |||
520 | if (!capable(CAP_SYS_RAWIO)) | ||
521 | return -EPERM; | ||
522 | |||
523 | if (!irqchip_in_kernel(kvm)) | ||
524 | return r; | ||
525 | |||
526 | mutex_lock(&kvm->lock); | ||
527 | r = -ENODEV; | ||
528 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
529 | assigned_irq->assigned_dev_id); | ||
530 | if (!match) | ||
531 | goto out; | ||
532 | |||
533 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
534 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
535 | |||
536 | r = -EINVAL; | ||
537 | /* can only assign one type at a time */ | ||
538 | if (hweight_long(host_irq_type) > 1) | ||
539 | goto out; | ||
540 | if (hweight_long(guest_irq_type) > 1) | ||
541 | goto out; | ||
542 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
543 | goto out; | ||
544 | |||
545 | r = 0; | ||
546 | if (host_irq_type) | ||
547 | r = assign_host_irq(kvm, match, host_irq_type); | ||
548 | if (r) | ||
549 | goto out; | ||
550 | |||
551 | if (guest_irq_type) | ||
552 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
553 | out: | ||
554 | mutex_unlock(&kvm->lock); | ||
555 | return r; | ||
556 | } | ||
557 | |||
558 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
559 | struct kvm_assigned_irq | ||
560 | *assigned_irq) | ||
561 | { | ||
562 | int r = -ENODEV; | ||
563 | struct kvm_assigned_dev_kernel *match; | ||
564 | |||
565 | mutex_lock(&kvm->lock); | ||
566 | |||
567 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
568 | assigned_irq->assigned_dev_id); | ||
569 | if (!match) | ||
570 | goto out; | ||
571 | |||
572 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
573 | out: | ||
574 | mutex_unlock(&kvm->lock); | ||
575 | return r; | ||
576 | } | ||
577 | |||
578 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
579 | struct kvm_assigned_pci_dev *assigned_dev) | ||
580 | { | ||
581 | int r = 0; | ||
582 | struct kvm_assigned_dev_kernel *match; | ||
583 | struct pci_dev *dev; | ||
584 | |||
585 | down_read(&kvm->slots_lock); | ||
586 | mutex_lock(&kvm->lock); | ||
587 | |||
588 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
589 | assigned_dev->assigned_dev_id); | ||
590 | if (match) { | ||
591 | /* device already assigned */ | ||
592 | r = -EEXIST; | ||
593 | goto out; | ||
594 | } | ||
595 | |||
596 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
597 | if (match == NULL) { | ||
598 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
599 | __func__); | ||
600 | r = -ENOMEM; | ||
601 | goto out; | ||
602 | } | ||
603 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
604 | assigned_dev->devfn); | ||
605 | if (!dev) { | ||
606 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
607 | r = -EINVAL; | ||
608 | goto out_free; | ||
609 | } | ||
610 | if (pci_enable_device(dev)) { | ||
611 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
612 | r = -EBUSY; | ||
613 | goto out_put; | ||
614 | } | ||
615 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
616 | if (r) { | ||
617 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
618 | __func__); | ||
619 | goto out_disable; | ||
620 | } | ||
621 | |||
622 | pci_reset_function(dev); | ||
623 | |||
624 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
625 | match->host_busnr = assigned_dev->busnr; | ||
626 | match->host_devfn = assigned_dev->devfn; | ||
627 | match->flags = assigned_dev->flags; | ||
628 | match->dev = dev; | ||
629 | spin_lock_init(&match->assigned_dev_lock); | ||
630 | match->irq_source_id = -1; | ||
631 | match->kvm = kvm; | ||
632 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
633 | INIT_WORK(&match->interrupt_work, | ||
634 | kvm_assigned_dev_interrupt_work_handler); | ||
635 | |||
636 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
637 | |||
638 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
639 | if (!kvm->arch.iommu_domain) { | ||
640 | r = kvm_iommu_map_guest(kvm); | ||
641 | if (r) | ||
642 | goto out_list_del; | ||
643 | } | ||
644 | r = kvm_assign_device(kvm, match); | ||
645 | if (r) | ||
646 | goto out_list_del; | ||
647 | } | ||
648 | |||
649 | out: | ||
650 | mutex_unlock(&kvm->lock); | ||
651 | up_read(&kvm->slots_lock); | ||
652 | return r; | ||
653 | out_list_del: | ||
654 | list_del(&match->list); | ||
655 | pci_release_regions(dev); | ||
656 | out_disable: | ||
657 | pci_disable_device(dev); | ||
658 | out_put: | ||
659 | pci_dev_put(dev); | ||
660 | out_free: | ||
661 | kfree(match); | ||
662 | mutex_unlock(&kvm->lock); | ||
663 | up_read(&kvm->slots_lock); | ||
664 | return r; | ||
665 | } | ||
666 | #endif | ||
667 | |||
668 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
669 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
670 | struct kvm_assigned_pci_dev *assigned_dev) | ||
671 | { | ||
672 | int r = 0; | ||
673 | struct kvm_assigned_dev_kernel *match; | ||
674 | |||
675 | mutex_lock(&kvm->lock); | ||
676 | |||
677 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
678 | assigned_dev->assigned_dev_id); | ||
679 | if (!match) { | ||
680 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
681 | "so cannot be deassigned\n", __func__); | ||
682 | r = -EINVAL; | ||
683 | goto out; | ||
684 | } | ||
685 | |||
686 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
687 | kvm_deassign_device(kvm, match); | ||
688 | |||
689 | kvm_free_assigned_device(kvm, match); | ||
690 | |||
691 | out: | ||
692 | mutex_unlock(&kvm->lock); | ||
693 | return r; | ||
694 | } | ||
695 | #endif | ||
696 | |||
697 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 95 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
698 | { | 96 | { |
699 | if (pfn_valid(pfn)) { | 97 | if (pfn_valid(pfn)) { |
@@ -740,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
740 | 138 | ||
741 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); | 139 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); |
742 | 140 | ||
743 | spin_lock(&kvm->requests_lock); | 141 | raw_spin_lock(&kvm->requests_lock); |
744 | me = smp_processor_id(); | 142 | me = smp_processor_id(); |
745 | kvm_for_each_vcpu(i, vcpu, kvm) { | 143 | kvm_for_each_vcpu(i, vcpu, kvm) { |
746 | if (test_and_set_bit(req, &vcpu->requests)) | 144 | if (test_and_set_bit(req, &vcpu->requests)) |
@@ -755,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
755 | smp_call_function_many(cpus, ack_flush, NULL, 1); | 153 | smp_call_function_many(cpus, ack_flush, NULL, 1); |
756 | else | 154 | else |
757 | called = false; | 155 | called = false; |
758 | spin_unlock(&kvm->requests_lock); | 156 | raw_spin_unlock(&kvm->requests_lock); |
759 | free_cpumask_var(cpus); | 157 | free_cpumask_var(cpus); |
760 | return called; | 158 | return called; |
761 | } | 159 | } |
@@ -819,7 +217,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
819 | unsigned long address) | 217 | unsigned long address) |
820 | { | 218 | { |
821 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 219 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
822 | int need_tlb_flush; | 220 | int need_tlb_flush, idx; |
823 | 221 | ||
824 | /* | 222 | /* |
825 | * When ->invalidate_page runs, the linux pte has been zapped | 223 | * When ->invalidate_page runs, the linux pte has been zapped |
@@ -839,10 +237,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
839 | * pte after kvm_unmap_hva returned, without noticing the page | 237 | * pte after kvm_unmap_hva returned, without noticing the page |
840 | * is going to be freed. | 238 | * is going to be freed. |
841 | */ | 239 | */ |
240 | idx = srcu_read_lock(&kvm->srcu); | ||
842 | spin_lock(&kvm->mmu_lock); | 241 | spin_lock(&kvm->mmu_lock); |
843 | kvm->mmu_notifier_seq++; | 242 | kvm->mmu_notifier_seq++; |
844 | need_tlb_flush = kvm_unmap_hva(kvm, address); | 243 | need_tlb_flush = kvm_unmap_hva(kvm, address); |
845 | spin_unlock(&kvm->mmu_lock); | 244 | spin_unlock(&kvm->mmu_lock); |
245 | srcu_read_unlock(&kvm->srcu, idx); | ||
846 | 246 | ||
847 | /* we've to flush the tlb before the pages can be freed */ | 247 | /* we've to flush the tlb before the pages can be freed */ |
848 | if (need_tlb_flush) | 248 | if (need_tlb_flush) |
@@ -856,11 +256,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | |||
856 | pte_t pte) | 256 | pte_t pte) |
857 | { | 257 | { |
858 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 258 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
259 | int idx; | ||
859 | 260 | ||
261 | idx = srcu_read_lock(&kvm->srcu); | ||
860 | spin_lock(&kvm->mmu_lock); | 262 | spin_lock(&kvm->mmu_lock); |
861 | kvm->mmu_notifier_seq++; | 263 | kvm->mmu_notifier_seq++; |
862 | kvm_set_spte_hva(kvm, address, pte); | 264 | kvm_set_spte_hva(kvm, address, pte); |
863 | spin_unlock(&kvm->mmu_lock); | 265 | spin_unlock(&kvm->mmu_lock); |
266 | srcu_read_unlock(&kvm->srcu, idx); | ||
864 | } | 267 | } |
865 | 268 | ||
866 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | 269 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, |
@@ -869,8 +272,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
869 | unsigned long end) | 272 | unsigned long end) |
870 | { | 273 | { |
871 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 274 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
872 | int need_tlb_flush = 0; | 275 | int need_tlb_flush = 0, idx; |
873 | 276 | ||
277 | idx = srcu_read_lock(&kvm->srcu); | ||
874 | spin_lock(&kvm->mmu_lock); | 278 | spin_lock(&kvm->mmu_lock); |
875 | /* | 279 | /* |
876 | * The count increase must become visible at unlock time as no | 280 | * The count increase must become visible at unlock time as no |
@@ -881,6 +285,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
881 | for (; start < end; start += PAGE_SIZE) | 285 | for (; start < end; start += PAGE_SIZE) |
882 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | 286 | need_tlb_flush |= kvm_unmap_hva(kvm, start); |
883 | spin_unlock(&kvm->mmu_lock); | 287 | spin_unlock(&kvm->mmu_lock); |
288 | srcu_read_unlock(&kvm->srcu, idx); | ||
884 | 289 | ||
885 | /* we've to flush the tlb before the pages can be freed */ | 290 | /* we've to flush the tlb before the pages can be freed */ |
886 | if (need_tlb_flush) | 291 | if (need_tlb_flush) |
@@ -918,11 +323,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
918 | unsigned long address) | 323 | unsigned long address) |
919 | { | 324 | { |
920 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 325 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
921 | int young; | 326 | int young, idx; |
922 | 327 | ||
328 | idx = srcu_read_lock(&kvm->srcu); | ||
923 | spin_lock(&kvm->mmu_lock); | 329 | spin_lock(&kvm->mmu_lock); |
924 | young = kvm_age_hva(kvm, address); | 330 | young = kvm_age_hva(kvm, address); |
925 | spin_unlock(&kvm->mmu_lock); | 331 | spin_unlock(&kvm->mmu_lock); |
332 | srcu_read_unlock(&kvm->srcu, idx); | ||
926 | 333 | ||
927 | if (young) | 334 | if (young) |
928 | kvm_flush_remote_tlbs(kvm); | 335 | kvm_flush_remote_tlbs(kvm); |
@@ -934,7 +341,11 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, | |||
934 | struct mm_struct *mm) | 341 | struct mm_struct *mm) |
935 | { | 342 | { |
936 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 343 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
344 | int idx; | ||
345 | |||
346 | idx = srcu_read_lock(&kvm->srcu); | ||
937 | kvm_arch_flush_shadow(kvm); | 347 | kvm_arch_flush_shadow(kvm); |
348 | srcu_read_unlock(&kvm->srcu, idx); | ||
938 | } | 349 | } |
939 | 350 | ||
940 | static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | 351 | static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { |
@@ -945,57 +356,68 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
945 | .change_pte = kvm_mmu_notifier_change_pte, | 356 | .change_pte = kvm_mmu_notifier_change_pte, |
946 | .release = kvm_mmu_notifier_release, | 357 | .release = kvm_mmu_notifier_release, |
947 | }; | 358 | }; |
359 | |||
360 | static int kvm_init_mmu_notifier(struct kvm *kvm) | ||
361 | { | ||
362 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | ||
363 | return mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
364 | } | ||
365 | |||
366 | #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ | ||
367 | |||
368 | static int kvm_init_mmu_notifier(struct kvm *kvm) | ||
369 | { | ||
370 | return 0; | ||
371 | } | ||
372 | |||
948 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 373 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
949 | 374 | ||
950 | static struct kvm *kvm_create_vm(void) | 375 | static struct kvm *kvm_create_vm(void) |
951 | { | 376 | { |
377 | int r = 0, i; | ||
952 | struct kvm *kvm = kvm_arch_create_vm(); | 378 | struct kvm *kvm = kvm_arch_create_vm(); |
953 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
954 | struct page *page; | ||
955 | #endif | ||
956 | 379 | ||
957 | if (IS_ERR(kvm)) | 380 | if (IS_ERR(kvm)) |
958 | goto out; | 381 | goto out; |
382 | |||
383 | r = hardware_enable_all(); | ||
384 | if (r) | ||
385 | goto out_err_nodisable; | ||
386 | |||
959 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 387 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
960 | INIT_LIST_HEAD(&kvm->irq_routing); | ||
961 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); | 388 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); |
389 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | ||
962 | #endif | 390 | #endif |
963 | 391 | ||
964 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 392 | r = -ENOMEM; |
965 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 393 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
966 | if (!page) { | 394 | if (!kvm->memslots) |
967 | kfree(kvm); | 395 | goto out_err; |
968 | return ERR_PTR(-ENOMEM); | 396 | if (init_srcu_struct(&kvm->srcu)) |
397 | goto out_err; | ||
398 | for (i = 0; i < KVM_NR_BUSES; i++) { | ||
399 | kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), | ||
400 | GFP_KERNEL); | ||
401 | if (!kvm->buses[i]) { | ||
402 | cleanup_srcu_struct(&kvm->srcu); | ||
403 | goto out_err; | ||
404 | } | ||
969 | } | 405 | } |
970 | kvm->coalesced_mmio_ring = | ||
971 | (struct kvm_coalesced_mmio_ring *)page_address(page); | ||
972 | #endif | ||
973 | 406 | ||
974 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 407 | r = kvm_init_mmu_notifier(kvm); |
975 | { | 408 | if (r) { |
976 | int err; | 409 | cleanup_srcu_struct(&kvm->srcu); |
977 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | 410 | goto out_err; |
978 | err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
979 | if (err) { | ||
980 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
981 | put_page(page); | ||
982 | #endif | ||
983 | kfree(kvm); | ||
984 | return ERR_PTR(err); | ||
985 | } | ||
986 | } | 411 | } |
987 | #endif | ||
988 | 412 | ||
989 | kvm->mm = current->mm; | 413 | kvm->mm = current->mm; |
990 | atomic_inc(&kvm->mm->mm_count); | 414 | atomic_inc(&kvm->mm->mm_count); |
991 | spin_lock_init(&kvm->mmu_lock); | 415 | spin_lock_init(&kvm->mmu_lock); |
992 | spin_lock_init(&kvm->requests_lock); | 416 | raw_spin_lock_init(&kvm->requests_lock); |
993 | kvm_io_bus_init(&kvm->pio_bus); | ||
994 | kvm_eventfd_init(kvm); | 417 | kvm_eventfd_init(kvm); |
995 | mutex_init(&kvm->lock); | 418 | mutex_init(&kvm->lock); |
996 | mutex_init(&kvm->irq_lock); | 419 | mutex_init(&kvm->irq_lock); |
997 | kvm_io_bus_init(&kvm->mmio_bus); | 420 | mutex_init(&kvm->slots_lock); |
998 | init_rwsem(&kvm->slots_lock); | ||
999 | atomic_set(&kvm->users_count, 1); | 421 | atomic_set(&kvm->users_count, 1); |
1000 | spin_lock(&kvm_lock); | 422 | spin_lock(&kvm_lock); |
1001 | list_add(&kvm->vm_list, &vm_list); | 423 | list_add(&kvm->vm_list, &vm_list); |
@@ -1005,6 +427,15 @@ static struct kvm *kvm_create_vm(void) | |||
1005 | #endif | 427 | #endif |
1006 | out: | 428 | out: |
1007 | return kvm; | 429 | return kvm; |
430 | |||
431 | out_err: | ||
432 | hardware_disable_all(); | ||
433 | out_err_nodisable: | ||
434 | for (i = 0; i < KVM_NR_BUSES; i++) | ||
435 | kfree(kvm->buses[i]); | ||
436 | kfree(kvm->memslots); | ||
437 | kfree(kvm); | ||
438 | return ERR_PTR(r); | ||
1008 | } | 439 | } |
1009 | 440 | ||
1010 | /* | 441 | /* |
@@ -1037,13 +468,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
1037 | void kvm_free_physmem(struct kvm *kvm) | 468 | void kvm_free_physmem(struct kvm *kvm) |
1038 | { | 469 | { |
1039 | int i; | 470 | int i; |
471 | struct kvm_memslots *slots = kvm->memslots; | ||
472 | |||
473 | for (i = 0; i < slots->nmemslots; ++i) | ||
474 | kvm_free_physmem_slot(&slots->memslots[i], NULL); | ||
1040 | 475 | ||
1041 | for (i = 0; i < kvm->nmemslots; ++i) | 476 | kfree(kvm->memslots); |
1042 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); | ||
1043 | } | 477 | } |
1044 | 478 | ||
1045 | static void kvm_destroy_vm(struct kvm *kvm) | 479 | static void kvm_destroy_vm(struct kvm *kvm) |
1046 | { | 480 | { |
481 | int i; | ||
1047 | struct mm_struct *mm = kvm->mm; | 482 | struct mm_struct *mm = kvm->mm; |
1048 | 483 | ||
1049 | kvm_arch_sync_events(kvm); | 484 | kvm_arch_sync_events(kvm); |
@@ -1051,18 +486,16 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
1051 | list_del(&kvm->vm_list); | 486 | list_del(&kvm->vm_list); |
1052 | spin_unlock(&kvm_lock); | 487 | spin_unlock(&kvm_lock); |
1053 | kvm_free_irq_routing(kvm); | 488 | kvm_free_irq_routing(kvm); |
1054 | kvm_io_bus_destroy(&kvm->pio_bus); | 489 | for (i = 0; i < KVM_NR_BUSES; i++) |
1055 | kvm_io_bus_destroy(&kvm->mmio_bus); | 490 | kvm_io_bus_destroy(kvm->buses[i]); |
1056 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 491 | kvm_coalesced_mmio_free(kvm); |
1057 | if (kvm->coalesced_mmio_ring != NULL) | ||
1058 | free_page((unsigned long)kvm->coalesced_mmio_ring); | ||
1059 | #endif | ||
1060 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 492 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
1061 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | 493 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
1062 | #else | 494 | #else |
1063 | kvm_arch_flush_shadow(kvm); | 495 | kvm_arch_flush_shadow(kvm); |
1064 | #endif | 496 | #endif |
1065 | kvm_arch_destroy_vm(kvm); | 497 | kvm_arch_destroy_vm(kvm); |
498 | hardware_disable_all(); | ||
1066 | mmdrop(mm); | 499 | mmdrop(mm); |
1067 | } | 500 | } |
1068 | 501 | ||
@@ -1102,12 +535,13 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1102 | struct kvm_userspace_memory_region *mem, | 535 | struct kvm_userspace_memory_region *mem, |
1103 | int user_alloc) | 536 | int user_alloc) |
1104 | { | 537 | { |
1105 | int r; | 538 | int r, flush_shadow = 0; |
1106 | gfn_t base_gfn; | 539 | gfn_t base_gfn; |
1107 | unsigned long npages; | 540 | unsigned long npages; |
1108 | unsigned long i; | 541 | unsigned long i; |
1109 | struct kvm_memory_slot *memslot; | 542 | struct kvm_memory_slot *memslot; |
1110 | struct kvm_memory_slot old, new; | 543 | struct kvm_memory_slot old, new; |
544 | struct kvm_memslots *slots, *old_memslots; | ||
1111 | 545 | ||
1112 | r = -EINVAL; | 546 | r = -EINVAL; |
1113 | /* General sanity checks */ | 547 | /* General sanity checks */ |
@@ -1122,7 +556,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1122 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) | 556 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) |
1123 | goto out; | 557 | goto out; |
1124 | 558 | ||
1125 | memslot = &kvm->memslots[mem->slot]; | 559 | memslot = &kvm->memslots->memslots[mem->slot]; |
1126 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 560 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
1127 | npages = mem->memory_size >> PAGE_SHIFT; | 561 | npages = mem->memory_size >> PAGE_SHIFT; |
1128 | 562 | ||
@@ -1143,7 +577,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1143 | /* Check for overlaps */ | 577 | /* Check for overlaps */ |
1144 | r = -EEXIST; | 578 | r = -EEXIST; |
1145 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 579 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
1146 | struct kvm_memory_slot *s = &kvm->memslots[i]; | 580 | struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; |
1147 | 581 | ||
1148 | if (s == memslot || !s->npages) | 582 | if (s == memslot || !s->npages) |
1149 | continue; | 583 | continue; |
@@ -1169,15 +603,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1169 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); | 603 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); |
1170 | 604 | ||
1171 | new.user_alloc = user_alloc; | 605 | new.user_alloc = user_alloc; |
1172 | /* | 606 | new.userspace_addr = mem->userspace_addr; |
1173 | * hva_to_rmmap() serialzies with the mmu_lock and to be | ||
1174 | * safe it has to ignore memslots with !user_alloc && | ||
1175 | * !userspace_addr. | ||
1176 | */ | ||
1177 | if (user_alloc) | ||
1178 | new.userspace_addr = mem->userspace_addr; | ||
1179 | else | ||
1180 | new.userspace_addr = 0; | ||
1181 | } | 607 | } |
1182 | if (!npages) | 608 | if (!npages) |
1183 | goto skip_lpage; | 609 | goto skip_lpage; |
@@ -1226,14 +652,15 @@ skip_lpage: | |||
1226 | 652 | ||
1227 | /* Allocate page dirty bitmap if needed */ | 653 | /* Allocate page dirty bitmap if needed */ |
1228 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 654 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
1229 | unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; | 655 | unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); |
1230 | 656 | ||
1231 | new.dirty_bitmap = vmalloc(dirty_bytes); | 657 | new.dirty_bitmap = vmalloc(dirty_bytes); |
1232 | if (!new.dirty_bitmap) | 658 | if (!new.dirty_bitmap) |
1233 | goto out_free; | 659 | goto out_free; |
1234 | memset(new.dirty_bitmap, 0, dirty_bytes); | 660 | memset(new.dirty_bitmap, 0, dirty_bytes); |
661 | /* destroy any largepage mappings for dirty tracking */ | ||
1235 | if (old.npages) | 662 | if (old.npages) |
1236 | kvm_arch_flush_shadow(kvm); | 663 | flush_shadow = 1; |
1237 | } | 664 | } |
1238 | #else /* not defined CONFIG_S390 */ | 665 | #else /* not defined CONFIG_S390 */ |
1239 | new.user_alloc = user_alloc; | 666 | new.user_alloc = user_alloc; |
@@ -1241,36 +668,72 @@ skip_lpage: | |||
1241 | new.userspace_addr = mem->userspace_addr; | 668 | new.userspace_addr = mem->userspace_addr; |
1242 | #endif /* not defined CONFIG_S390 */ | 669 | #endif /* not defined CONFIG_S390 */ |
1243 | 670 | ||
1244 | if (!npages) | 671 | if (!npages) { |
672 | r = -ENOMEM; | ||
673 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
674 | if (!slots) | ||
675 | goto out_free; | ||
676 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
677 | if (mem->slot >= slots->nmemslots) | ||
678 | slots->nmemslots = mem->slot + 1; | ||
679 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; | ||
680 | |||
681 | old_memslots = kvm->memslots; | ||
682 | rcu_assign_pointer(kvm->memslots, slots); | ||
683 | synchronize_srcu_expedited(&kvm->srcu); | ||
684 | /* From this point no new shadow pages pointing to a deleted | ||
685 | * memslot will be created. | ||
686 | * | ||
687 | * validation of sp->gfn happens in: | ||
688 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) | ||
689 | * - kvm_is_visible_gfn (mmu_check_roots) | ||
690 | */ | ||
1245 | kvm_arch_flush_shadow(kvm); | 691 | kvm_arch_flush_shadow(kvm); |
692 | kfree(old_memslots); | ||
693 | } | ||
1246 | 694 | ||
1247 | spin_lock(&kvm->mmu_lock); | 695 | r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); |
1248 | if (mem->slot >= kvm->nmemslots) | 696 | if (r) |
1249 | kvm->nmemslots = mem->slot + 1; | ||
1250 | |||
1251 | *memslot = new; | ||
1252 | spin_unlock(&kvm->mmu_lock); | ||
1253 | |||
1254 | r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); | ||
1255 | if (r) { | ||
1256 | spin_lock(&kvm->mmu_lock); | ||
1257 | *memslot = old; | ||
1258 | spin_unlock(&kvm->mmu_lock); | ||
1259 | goto out_free; | 697 | goto out_free; |
1260 | } | ||
1261 | 698 | ||
1262 | kvm_free_physmem_slot(&old, npages ? &new : NULL); | ||
1263 | /* Slot deletion case: we have to update the current slot */ | ||
1264 | spin_lock(&kvm->mmu_lock); | ||
1265 | if (!npages) | ||
1266 | *memslot = old; | ||
1267 | spin_unlock(&kvm->mmu_lock); | ||
1268 | #ifdef CONFIG_DMAR | 699 | #ifdef CONFIG_DMAR |
1269 | /* map the pages in iommu page table */ | 700 | /* map the pages in iommu page table */ |
1270 | r = kvm_iommu_map_pages(kvm, base_gfn, npages); | 701 | if (npages) { |
1271 | if (r) | 702 | r = kvm_iommu_map_pages(kvm, &new); |
1272 | goto out; | 703 | if (r) |
704 | goto out_free; | ||
705 | } | ||
1273 | #endif | 706 | #endif |
707 | |||
708 | r = -ENOMEM; | ||
709 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
710 | if (!slots) | ||
711 | goto out_free; | ||
712 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
713 | if (mem->slot >= slots->nmemslots) | ||
714 | slots->nmemslots = mem->slot + 1; | ||
715 | |||
716 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | ||
717 | if (!npages) { | ||
718 | new.rmap = NULL; | ||
719 | new.dirty_bitmap = NULL; | ||
720 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) | ||
721 | new.lpage_info[i] = NULL; | ||
722 | } | ||
723 | |||
724 | slots->memslots[mem->slot] = new; | ||
725 | old_memslots = kvm->memslots; | ||
726 | rcu_assign_pointer(kvm->memslots, slots); | ||
727 | synchronize_srcu_expedited(&kvm->srcu); | ||
728 | |||
729 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); | ||
730 | |||
731 | kvm_free_physmem_slot(&old, &new); | ||
732 | kfree(old_memslots); | ||
733 | |||
734 | if (flush_shadow) | ||
735 | kvm_arch_flush_shadow(kvm); | ||
736 | |||
1274 | return 0; | 737 | return 0; |
1275 | 738 | ||
1276 | out_free: | 739 | out_free: |
@@ -1287,9 +750,9 @@ int kvm_set_memory_region(struct kvm *kvm, | |||
1287 | { | 750 | { |
1288 | int r; | 751 | int r; |
1289 | 752 | ||
1290 | down_write(&kvm->slots_lock); | 753 | mutex_lock(&kvm->slots_lock); |
1291 | r = __kvm_set_memory_region(kvm, mem, user_alloc); | 754 | r = __kvm_set_memory_region(kvm, mem, user_alloc); |
1292 | up_write(&kvm->slots_lock); | 755 | mutex_unlock(&kvm->slots_lock); |
1293 | return r; | 756 | return r; |
1294 | } | 757 | } |
1295 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); | 758 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); |
@@ -1309,19 +772,19 @@ int kvm_get_dirty_log(struct kvm *kvm, | |||
1309 | { | 772 | { |
1310 | struct kvm_memory_slot *memslot; | 773 | struct kvm_memory_slot *memslot; |
1311 | int r, i; | 774 | int r, i; |
1312 | int n; | 775 | unsigned long n; |
1313 | unsigned long any = 0; | 776 | unsigned long any = 0; |
1314 | 777 | ||
1315 | r = -EINVAL; | 778 | r = -EINVAL; |
1316 | if (log->slot >= KVM_MEMORY_SLOTS) | 779 | if (log->slot >= KVM_MEMORY_SLOTS) |
1317 | goto out; | 780 | goto out; |
1318 | 781 | ||
1319 | memslot = &kvm->memslots[log->slot]; | 782 | memslot = &kvm->memslots->memslots[log->slot]; |
1320 | r = -ENOENT; | 783 | r = -ENOENT; |
1321 | if (!memslot->dirty_bitmap) | 784 | if (!memslot->dirty_bitmap) |
1322 | goto out; | 785 | goto out; |
1323 | 786 | ||
1324 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 787 | n = kvm_dirty_bitmap_bytes(memslot); |
1325 | 788 | ||
1326 | for (i = 0; !any && i < n/sizeof(long); ++i) | 789 | for (i = 0; !any && i < n/sizeof(long); ++i) |
1327 | any = memslot->dirty_bitmap[i]; | 790 | any = memslot->dirty_bitmap[i]; |
@@ -1370,9 +833,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); | |||
1370 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | 833 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) |
1371 | { | 834 | { |
1372 | int i; | 835 | int i; |
836 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
1373 | 837 | ||
1374 | for (i = 0; i < kvm->nmemslots; ++i) { | 838 | for (i = 0; i < slots->nmemslots; ++i) { |
1375 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | 839 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
1376 | 840 | ||
1377 | if (gfn >= memslot->base_gfn | 841 | if (gfn >= memslot->base_gfn |
1378 | && gfn < memslot->base_gfn + memslot->npages) | 842 | && gfn < memslot->base_gfn + memslot->npages) |
@@ -1391,10 +855,14 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
1391 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 855 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
1392 | { | 856 | { |
1393 | int i; | 857 | int i; |
858 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
1394 | 859 | ||
1395 | gfn = unalias_gfn(kvm, gfn); | 860 | gfn = unalias_gfn_instantiation(kvm, gfn); |
1396 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 861 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
1397 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | 862 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
863 | |||
864 | if (memslot->flags & KVM_MEMSLOT_INVALID) | ||
865 | continue; | ||
1398 | 866 | ||
1399 | if (gfn >= memslot->base_gfn | 867 | if (gfn >= memslot->base_gfn |
1400 | && gfn < memslot->base_gfn + memslot->npages) | 868 | && gfn < memslot->base_gfn + memslot->npages) |
@@ -1404,33 +872,68 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | |||
1404 | } | 872 | } |
1405 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 873 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
1406 | 874 | ||
875 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) | ||
876 | { | ||
877 | struct vm_area_struct *vma; | ||
878 | unsigned long addr, size; | ||
879 | |||
880 | size = PAGE_SIZE; | ||
881 | |||
882 | addr = gfn_to_hva(kvm, gfn); | ||
883 | if (kvm_is_error_hva(addr)) | ||
884 | return PAGE_SIZE; | ||
885 | |||
886 | down_read(¤t->mm->mmap_sem); | ||
887 | vma = find_vma(current->mm, addr); | ||
888 | if (!vma) | ||
889 | goto out; | ||
890 | |||
891 | size = vma_kernel_pagesize(vma); | ||
892 | |||
893 | out: | ||
894 | up_read(¤t->mm->mmap_sem); | ||
895 | |||
896 | return size; | ||
897 | } | ||
898 | |||
899 | int memslot_id(struct kvm *kvm, gfn_t gfn) | ||
900 | { | ||
901 | int i; | ||
902 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
903 | struct kvm_memory_slot *memslot = NULL; | ||
904 | |||
905 | gfn = unalias_gfn(kvm, gfn); | ||
906 | for (i = 0; i < slots->nmemslots; ++i) { | ||
907 | memslot = &slots->memslots[i]; | ||
908 | |||
909 | if (gfn >= memslot->base_gfn | ||
910 | && gfn < memslot->base_gfn + memslot->npages) | ||
911 | break; | ||
912 | } | ||
913 | |||
914 | return memslot - slots->memslots; | ||
915 | } | ||
916 | |||
1407 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 917 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
1408 | { | 918 | { |
1409 | struct kvm_memory_slot *slot; | 919 | struct kvm_memory_slot *slot; |
1410 | 920 | ||
1411 | gfn = unalias_gfn(kvm, gfn); | 921 | gfn = unalias_gfn_instantiation(kvm, gfn); |
1412 | slot = gfn_to_memslot_unaliased(kvm, gfn); | 922 | slot = gfn_to_memslot_unaliased(kvm, gfn); |
1413 | if (!slot) | 923 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
1414 | return bad_hva(); | 924 | return bad_hva(); |
1415 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | 925 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); |
1416 | } | 926 | } |
1417 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 927 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
1418 | 928 | ||
1419 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | 929 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) |
1420 | { | 930 | { |
1421 | struct page *page[1]; | 931 | struct page *page[1]; |
1422 | unsigned long addr; | ||
1423 | int npages; | 932 | int npages; |
1424 | pfn_t pfn; | 933 | pfn_t pfn; |
1425 | 934 | ||
1426 | might_sleep(); | 935 | might_sleep(); |
1427 | 936 | ||
1428 | addr = gfn_to_hva(kvm, gfn); | ||
1429 | if (kvm_is_error_hva(addr)) { | ||
1430 | get_page(bad_page); | ||
1431 | return page_to_pfn(bad_page); | ||
1432 | } | ||
1433 | |||
1434 | npages = get_user_pages_fast(addr, 1, 1, page); | 937 | npages = get_user_pages_fast(addr, 1, 1, page); |
1435 | 938 | ||
1436 | if (unlikely(npages != 1)) { | 939 | if (unlikely(npages != 1)) { |
@@ -1455,8 +958,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
1455 | return pfn; | 958 | return pfn; |
1456 | } | 959 | } |
1457 | 960 | ||
961 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | ||
962 | { | ||
963 | unsigned long addr; | ||
964 | |||
965 | addr = gfn_to_hva(kvm, gfn); | ||
966 | if (kvm_is_error_hva(addr)) { | ||
967 | get_page(bad_page); | ||
968 | return page_to_pfn(bad_page); | ||
969 | } | ||
970 | |||
971 | return hva_to_pfn(kvm, addr); | ||
972 | } | ||
1458 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 973 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
1459 | 974 | ||
975 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
976 | { | ||
977 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | ||
978 | } | ||
979 | |||
980 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | ||
981 | struct kvm_memory_slot *slot, gfn_t gfn) | ||
982 | { | ||
983 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); | ||
984 | return hva_to_pfn(kvm, addr); | ||
985 | } | ||
986 | |||
1460 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 987 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) |
1461 | { | 988 | { |
1462 | pfn_t pfn; | 989 | pfn_t pfn; |
@@ -1663,10 +1190,13 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
1663 | memslot = gfn_to_memslot_unaliased(kvm, gfn); | 1190 | memslot = gfn_to_memslot_unaliased(kvm, gfn); |
1664 | if (memslot && memslot->dirty_bitmap) { | 1191 | if (memslot && memslot->dirty_bitmap) { |
1665 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1192 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
1193 | unsigned long *p = memslot->dirty_bitmap + | ||
1194 | rel_gfn / BITS_PER_LONG; | ||
1195 | int offset = rel_gfn % BITS_PER_LONG; | ||
1666 | 1196 | ||
1667 | /* avoid RMW */ | 1197 | /* avoid RMW */ |
1668 | if (!test_bit(rel_gfn, memslot->dirty_bitmap)) | 1198 | if (!generic_test_le_bit(offset, p)) |
1669 | set_bit(rel_gfn, memslot->dirty_bitmap); | 1199 | generic___set_le_bit(offset, p); |
1670 | } | 1200 | } |
1671 | } | 1201 | } |
1672 | 1202 | ||
@@ -1689,9 +1219,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1689 | if (signal_pending(current)) | 1219 | if (signal_pending(current)) |
1690 | break; | 1220 | break; |
1691 | 1221 | ||
1692 | vcpu_put(vcpu); | ||
1693 | schedule(); | 1222 | schedule(); |
1694 | vcpu_load(vcpu); | ||
1695 | } | 1223 | } |
1696 | 1224 | ||
1697 | finish_wait(&vcpu->wq, &wait); | 1225 | finish_wait(&vcpu->wq, &wait); |
@@ -1705,6 +1233,21 @@ void kvm_resched(struct kvm_vcpu *vcpu) | |||
1705 | } | 1233 | } |
1706 | EXPORT_SYMBOL_GPL(kvm_resched); | 1234 | EXPORT_SYMBOL_GPL(kvm_resched); |
1707 | 1235 | ||
1236 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) | ||
1237 | { | ||
1238 | ktime_t expires; | ||
1239 | DEFINE_WAIT(wait); | ||
1240 | |||
1241 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | ||
1242 | |||
1243 | /* Sleep for 100 us, and hope lock-holder got scheduled */ | ||
1244 | expires = ktime_add_ns(ktime_get(), 100000UL); | ||
1245 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1246 | |||
1247 | finish_wait(&vcpu->wq, &wait); | ||
1248 | } | ||
1249 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | ||
1250 | |||
1708 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1251 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1709 | { | 1252 | { |
1710 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; | 1253 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; |
@@ -1757,7 +1300,7 @@ static struct file_operations kvm_vcpu_fops = { | |||
1757 | */ | 1300 | */ |
1758 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) | 1301 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) |
1759 | { | 1302 | { |
1760 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); | 1303 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); |
1761 | } | 1304 | } |
1762 | 1305 | ||
1763 | /* | 1306 | /* |
@@ -1828,88 +1371,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | |||
1828 | return 0; | 1371 | return 0; |
1829 | } | 1372 | } |
1830 | 1373 | ||
1831 | #ifdef __KVM_HAVE_MSIX | ||
1832 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
1833 | struct kvm_assigned_msix_nr *entry_nr) | ||
1834 | { | ||
1835 | int r = 0; | ||
1836 | struct kvm_assigned_dev_kernel *adev; | ||
1837 | |||
1838 | mutex_lock(&kvm->lock); | ||
1839 | |||
1840 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
1841 | entry_nr->assigned_dev_id); | ||
1842 | if (!adev) { | ||
1843 | r = -EINVAL; | ||
1844 | goto msix_nr_out; | ||
1845 | } | ||
1846 | |||
1847 | if (adev->entries_nr == 0) { | ||
1848 | adev->entries_nr = entry_nr->entry_nr; | ||
1849 | if (adev->entries_nr == 0 || | ||
1850 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
1851 | r = -EINVAL; | ||
1852 | goto msix_nr_out; | ||
1853 | } | ||
1854 | |||
1855 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
1856 | entry_nr->entry_nr, | ||
1857 | GFP_KERNEL); | ||
1858 | if (!adev->host_msix_entries) { | ||
1859 | r = -ENOMEM; | ||
1860 | goto msix_nr_out; | ||
1861 | } | ||
1862 | adev->guest_msix_entries = kzalloc( | ||
1863 | sizeof(struct kvm_guest_msix_entry) * | ||
1864 | entry_nr->entry_nr, GFP_KERNEL); | ||
1865 | if (!adev->guest_msix_entries) { | ||
1866 | kfree(adev->host_msix_entries); | ||
1867 | r = -ENOMEM; | ||
1868 | goto msix_nr_out; | ||
1869 | } | ||
1870 | } else /* Not allowed set MSI-X number twice */ | ||
1871 | r = -EINVAL; | ||
1872 | msix_nr_out: | ||
1873 | mutex_unlock(&kvm->lock); | ||
1874 | return r; | ||
1875 | } | ||
1876 | |||
1877 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
1878 | struct kvm_assigned_msix_entry *entry) | ||
1879 | { | ||
1880 | int r = 0, i; | ||
1881 | struct kvm_assigned_dev_kernel *adev; | ||
1882 | |||
1883 | mutex_lock(&kvm->lock); | ||
1884 | |||
1885 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
1886 | entry->assigned_dev_id); | ||
1887 | |||
1888 | if (!adev) { | ||
1889 | r = -EINVAL; | ||
1890 | goto msix_entry_out; | ||
1891 | } | ||
1892 | |||
1893 | for (i = 0; i < adev->entries_nr; i++) | ||
1894 | if (adev->guest_msix_entries[i].vector == 0 || | ||
1895 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
1896 | adev->guest_msix_entries[i].entry = entry->entry; | ||
1897 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
1898 | adev->host_msix_entries[i].entry = entry->entry; | ||
1899 | break; | ||
1900 | } | ||
1901 | if (i == adev->entries_nr) { | ||
1902 | r = -ENOSPC; | ||
1903 | goto msix_entry_out; | ||
1904 | } | ||
1905 | |||
1906 | msix_entry_out: | ||
1907 | mutex_unlock(&kvm->lock); | ||
1908 | |||
1909 | return r; | ||
1910 | } | ||
1911 | #endif | ||
1912 | |||
1913 | static long kvm_vcpu_ioctl(struct file *filp, | 1374 | static long kvm_vcpu_ioctl(struct file *filp, |
1914 | unsigned int ioctl, unsigned long arg) | 1375 | unsigned int ioctl, unsigned long arg) |
1915 | { | 1376 | { |
@@ -2168,112 +1629,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2168 | break; | 1629 | break; |
2169 | } | 1630 | } |
2170 | #endif | 1631 | #endif |
2171 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
2172 | case KVM_ASSIGN_PCI_DEVICE: { | ||
2173 | struct kvm_assigned_pci_dev assigned_dev; | ||
2174 | |||
2175 | r = -EFAULT; | ||
2176 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
2177 | goto out; | ||
2178 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
2179 | if (r) | ||
2180 | goto out; | ||
2181 | break; | ||
2182 | } | ||
2183 | case KVM_ASSIGN_IRQ: { | ||
2184 | r = -EOPNOTSUPP; | ||
2185 | break; | ||
2186 | } | ||
2187 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
2188 | case KVM_ASSIGN_DEV_IRQ: { | ||
2189 | struct kvm_assigned_irq assigned_irq; | ||
2190 | |||
2191 | r = -EFAULT; | ||
2192 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
2193 | goto out; | ||
2194 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
2195 | if (r) | ||
2196 | goto out; | ||
2197 | break; | ||
2198 | } | ||
2199 | case KVM_DEASSIGN_DEV_IRQ: { | ||
2200 | struct kvm_assigned_irq assigned_irq; | ||
2201 | |||
2202 | r = -EFAULT; | ||
2203 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
2204 | goto out; | ||
2205 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
2206 | if (r) | ||
2207 | goto out; | ||
2208 | break; | ||
2209 | } | ||
2210 | #endif | ||
2211 | #endif | ||
2212 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
2213 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
2214 | struct kvm_assigned_pci_dev assigned_dev; | ||
2215 | |||
2216 | r = -EFAULT; | ||
2217 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
2218 | goto out; | ||
2219 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
2220 | if (r) | ||
2221 | goto out; | ||
2222 | break; | ||
2223 | } | ||
2224 | #endif | ||
2225 | #ifdef KVM_CAP_IRQ_ROUTING | ||
2226 | case KVM_SET_GSI_ROUTING: { | ||
2227 | struct kvm_irq_routing routing; | ||
2228 | struct kvm_irq_routing __user *urouting; | ||
2229 | struct kvm_irq_routing_entry *entries; | ||
2230 | |||
2231 | r = -EFAULT; | ||
2232 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
2233 | goto out; | ||
2234 | r = -EINVAL; | ||
2235 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
2236 | goto out; | ||
2237 | if (routing.flags) | ||
2238 | goto out; | ||
2239 | r = -ENOMEM; | ||
2240 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
2241 | if (!entries) | ||
2242 | goto out; | ||
2243 | r = -EFAULT; | ||
2244 | urouting = argp; | ||
2245 | if (copy_from_user(entries, urouting->entries, | ||
2246 | routing.nr * sizeof(*entries))) | ||
2247 | goto out_free_irq_routing; | ||
2248 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
2249 | routing.flags); | ||
2250 | out_free_irq_routing: | ||
2251 | vfree(entries); | ||
2252 | break; | ||
2253 | } | ||
2254 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
2255 | #ifdef __KVM_HAVE_MSIX | ||
2256 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
2257 | struct kvm_assigned_msix_nr entry_nr; | ||
2258 | r = -EFAULT; | ||
2259 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
2260 | goto out; | ||
2261 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
2262 | if (r) | ||
2263 | goto out; | ||
2264 | break; | ||
2265 | } | ||
2266 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
2267 | struct kvm_assigned_msix_entry entry; | ||
2268 | r = -EFAULT; | ||
2269 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
2270 | goto out; | ||
2271 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
2272 | if (r) | ||
2273 | goto out; | ||
2274 | break; | ||
2275 | } | ||
2276 | #endif | ||
2277 | case KVM_IRQFD: { | 1632 | case KVM_IRQFD: { |
2278 | struct kvm_irqfd data; | 1633 | struct kvm_irqfd data; |
2279 | 1634 | ||
@@ -2305,10 +1660,58 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2305 | #endif | 1660 | #endif |
2306 | default: | 1661 | default: |
2307 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1662 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
1663 | if (r == -ENOTTY) | ||
1664 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); | ||
1665 | } | ||
1666 | out: | ||
1667 | return r; | ||
1668 | } | ||
1669 | |||
1670 | #ifdef CONFIG_COMPAT | ||
1671 | struct compat_kvm_dirty_log { | ||
1672 | __u32 slot; | ||
1673 | __u32 padding1; | ||
1674 | union { | ||
1675 | compat_uptr_t dirty_bitmap; /* one bit per page */ | ||
1676 | __u64 padding2; | ||
1677 | }; | ||
1678 | }; | ||
1679 | |||
1680 | static long kvm_vm_compat_ioctl(struct file *filp, | ||
1681 | unsigned int ioctl, unsigned long arg) | ||
1682 | { | ||
1683 | struct kvm *kvm = filp->private_data; | ||
1684 | int r; | ||
1685 | |||
1686 | if (kvm->mm != current->mm) | ||
1687 | return -EIO; | ||
1688 | switch (ioctl) { | ||
1689 | case KVM_GET_DIRTY_LOG: { | ||
1690 | struct compat_kvm_dirty_log compat_log; | ||
1691 | struct kvm_dirty_log log; | ||
1692 | |||
1693 | r = -EFAULT; | ||
1694 | if (copy_from_user(&compat_log, (void __user *)arg, | ||
1695 | sizeof(compat_log))) | ||
1696 | goto out; | ||
1697 | log.slot = compat_log.slot; | ||
1698 | log.padding1 = compat_log.padding1; | ||
1699 | log.padding2 = compat_log.padding2; | ||
1700 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); | ||
1701 | |||
1702 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | ||
1703 | if (r) | ||
1704 | goto out; | ||
1705 | break; | ||
1706 | } | ||
1707 | default: | ||
1708 | r = kvm_vm_ioctl(filp, ioctl, arg); | ||
2308 | } | 1709 | } |
1710 | |||
2309 | out: | 1711 | out: |
2310 | return r; | 1712 | return r; |
2311 | } | 1713 | } |
1714 | #endif | ||
2312 | 1715 | ||
2313 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1716 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
2314 | { | 1717 | { |
@@ -2344,7 +1747,9 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
2344 | static struct file_operations kvm_vm_fops = { | 1747 | static struct file_operations kvm_vm_fops = { |
2345 | .release = kvm_vm_release, | 1748 | .release = kvm_vm_release, |
2346 | .unlocked_ioctl = kvm_vm_ioctl, | 1749 | .unlocked_ioctl = kvm_vm_ioctl, |
2347 | .compat_ioctl = kvm_vm_ioctl, | 1750 | #ifdef CONFIG_COMPAT |
1751 | .compat_ioctl = kvm_vm_compat_ioctl, | ||
1752 | #endif | ||
2348 | .mmap = kvm_vm_mmap, | 1753 | .mmap = kvm_vm_mmap, |
2349 | }; | 1754 | }; |
2350 | 1755 | ||
@@ -2356,7 +1761,7 @@ static int kvm_dev_ioctl_create_vm(void) | |||
2356 | kvm = kvm_create_vm(); | 1761 | kvm = kvm_create_vm(); |
2357 | if (IS_ERR(kvm)) | 1762 | if (IS_ERR(kvm)) |
2358 | return PTR_ERR(kvm); | 1763 | return PTR_ERR(kvm); |
2359 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0); | 1764 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); |
2360 | if (fd < 0) | 1765 | if (fd < 0) |
2361 | kvm_put_kvm(kvm); | 1766 | kvm_put_kvm(kvm); |
2362 | 1767 | ||
@@ -2372,6 +1777,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) | |||
2372 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 1777 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
2373 | case KVM_CAP_SET_BOOT_CPU_ID: | 1778 | case KVM_CAP_SET_BOOT_CPU_ID: |
2374 | #endif | 1779 | #endif |
1780 | case KVM_CAP_INTERNAL_ERROR_DATA: | ||
2375 | return 1; | 1781 | return 1; |
2376 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 1782 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
2377 | case KVM_CAP_IRQ_ROUTING: | 1783 | case KVM_CAP_IRQ_ROUTING: |
@@ -2442,11 +1848,21 @@ static struct miscdevice kvm_dev = { | |||
2442 | static void hardware_enable(void *junk) | 1848 | static void hardware_enable(void *junk) |
2443 | { | 1849 | { |
2444 | int cpu = raw_smp_processor_id(); | 1850 | int cpu = raw_smp_processor_id(); |
1851 | int r; | ||
2445 | 1852 | ||
2446 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 1853 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
2447 | return; | 1854 | return; |
1855 | |||
2448 | cpumask_set_cpu(cpu, cpus_hardware_enabled); | 1856 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
2449 | kvm_arch_hardware_enable(NULL); | 1857 | |
1858 | r = kvm_arch_hardware_enable(NULL); | ||
1859 | |||
1860 | if (r) { | ||
1861 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | ||
1862 | atomic_inc(&hardware_enable_failed); | ||
1863 | printk(KERN_INFO "kvm: enabling virtualization on " | ||
1864 | "CPU%d failed\n", cpu); | ||
1865 | } | ||
2450 | } | 1866 | } |
2451 | 1867 | ||
2452 | static void hardware_disable(void *junk) | 1868 | static void hardware_disable(void *junk) |
@@ -2459,11 +1875,52 @@ static void hardware_disable(void *junk) | |||
2459 | kvm_arch_hardware_disable(NULL); | 1875 | kvm_arch_hardware_disable(NULL); |
2460 | } | 1876 | } |
2461 | 1877 | ||
1878 | static void hardware_disable_all_nolock(void) | ||
1879 | { | ||
1880 | BUG_ON(!kvm_usage_count); | ||
1881 | |||
1882 | kvm_usage_count--; | ||
1883 | if (!kvm_usage_count) | ||
1884 | on_each_cpu(hardware_disable, NULL, 1); | ||
1885 | } | ||
1886 | |||
1887 | static void hardware_disable_all(void) | ||
1888 | { | ||
1889 | spin_lock(&kvm_lock); | ||
1890 | hardware_disable_all_nolock(); | ||
1891 | spin_unlock(&kvm_lock); | ||
1892 | } | ||
1893 | |||
1894 | static int hardware_enable_all(void) | ||
1895 | { | ||
1896 | int r = 0; | ||
1897 | |||
1898 | spin_lock(&kvm_lock); | ||
1899 | |||
1900 | kvm_usage_count++; | ||
1901 | if (kvm_usage_count == 1) { | ||
1902 | atomic_set(&hardware_enable_failed, 0); | ||
1903 | on_each_cpu(hardware_enable, NULL, 1); | ||
1904 | |||
1905 | if (atomic_read(&hardware_enable_failed)) { | ||
1906 | hardware_disable_all_nolock(); | ||
1907 | r = -EBUSY; | ||
1908 | } | ||
1909 | } | ||
1910 | |||
1911 | spin_unlock(&kvm_lock); | ||
1912 | |||
1913 | return r; | ||
1914 | } | ||
1915 | |||
2462 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 1916 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
2463 | void *v) | 1917 | void *v) |
2464 | { | 1918 | { |
2465 | int cpu = (long)v; | 1919 | int cpu = (long)v; |
2466 | 1920 | ||
1921 | if (!kvm_usage_count) | ||
1922 | return NOTIFY_OK; | ||
1923 | |||
2467 | val &= ~CPU_TASKS_FROZEN; | 1924 | val &= ~CPU_TASKS_FROZEN; |
2468 | switch (val) { | 1925 | switch (val) { |
2469 | case CPU_DYING: | 1926 | case CPU_DYING: |
@@ -2517,12 +1974,7 @@ static struct notifier_block kvm_reboot_notifier = { | |||
2517 | .priority = 0, | 1974 | .priority = 0, |
2518 | }; | 1975 | }; |
2519 | 1976 | ||
2520 | void kvm_io_bus_init(struct kvm_io_bus *bus) | 1977 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus) |
2521 | { | ||
2522 | memset(bus, 0, sizeof(*bus)); | ||
2523 | } | ||
2524 | |||
2525 | void kvm_io_bus_destroy(struct kvm_io_bus *bus) | ||
2526 | { | 1978 | { |
2527 | int i; | 1979 | int i; |
2528 | 1980 | ||
@@ -2531,13 +1983,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
2531 | 1983 | ||
2532 | kvm_iodevice_destructor(pos); | 1984 | kvm_iodevice_destructor(pos); |
2533 | } | 1985 | } |
1986 | kfree(bus); | ||
2534 | } | 1987 | } |
2535 | 1988 | ||
2536 | /* kvm_io_bus_write - called under kvm->slots_lock */ | 1989 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
2537 | int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, | 1990 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2538 | int len, const void *val) | 1991 | int len, const void *val) |
2539 | { | 1992 | { |
2540 | int i; | 1993 | int i; |
1994 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | ||
2541 | for (i = 0; i < bus->dev_count; i++) | 1995 | for (i = 0; i < bus->dev_count; i++) |
2542 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 1996 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) |
2543 | return 0; | 1997 | return 0; |
@@ -2545,59 +1999,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, | |||
2545 | } | 1999 | } |
2546 | 2000 | ||
2547 | /* kvm_io_bus_read - called under kvm->slots_lock */ | 2001 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
2548 | int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) | 2002 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2003 | int len, void *val) | ||
2549 | { | 2004 | { |
2550 | int i; | 2005 | int i; |
2006 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | ||
2007 | |||
2551 | for (i = 0; i < bus->dev_count; i++) | 2008 | for (i = 0; i < bus->dev_count; i++) |
2552 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2009 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) |
2553 | return 0; | 2010 | return 0; |
2554 | return -EOPNOTSUPP; | 2011 | return -EOPNOTSUPP; |
2555 | } | 2012 | } |
2556 | 2013 | ||
2557 | int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, | 2014 | /* Caller must hold slots_lock. */ |
2558 | struct kvm_io_device *dev) | 2015 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
2016 | struct kvm_io_device *dev) | ||
2559 | { | 2017 | { |
2560 | int ret; | 2018 | struct kvm_io_bus *new_bus, *bus; |
2561 | |||
2562 | down_write(&kvm->slots_lock); | ||
2563 | ret = __kvm_io_bus_register_dev(bus, dev); | ||
2564 | up_write(&kvm->slots_lock); | ||
2565 | |||
2566 | return ret; | ||
2567 | } | ||
2568 | 2019 | ||
2569 | /* An unlocked version. Caller must have write lock on slots_lock. */ | 2020 | bus = kvm->buses[bus_idx]; |
2570 | int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, | ||
2571 | struct kvm_io_device *dev) | ||
2572 | { | ||
2573 | if (bus->dev_count > NR_IOBUS_DEVS-1) | 2021 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
2574 | return -ENOSPC; | 2022 | return -ENOSPC; |
2575 | 2023 | ||
2576 | bus->devs[bus->dev_count++] = dev; | 2024 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
2025 | if (!new_bus) | ||
2026 | return -ENOMEM; | ||
2027 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | ||
2028 | new_bus->devs[new_bus->dev_count++] = dev; | ||
2029 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | ||
2030 | synchronize_srcu_expedited(&kvm->srcu); | ||
2031 | kfree(bus); | ||
2577 | 2032 | ||
2578 | return 0; | 2033 | return 0; |
2579 | } | 2034 | } |
2580 | 2035 | ||
2581 | void kvm_io_bus_unregister_dev(struct kvm *kvm, | 2036 | /* Caller must hold slots_lock. */ |
2582 | struct kvm_io_bus *bus, | 2037 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
2583 | struct kvm_io_device *dev) | 2038 | struct kvm_io_device *dev) |
2584 | { | 2039 | { |
2585 | down_write(&kvm->slots_lock); | 2040 | int i, r; |
2586 | __kvm_io_bus_unregister_dev(bus, dev); | 2041 | struct kvm_io_bus *new_bus, *bus; |
2587 | up_write(&kvm->slots_lock); | ||
2588 | } | ||
2589 | 2042 | ||
2590 | /* An unlocked version. Caller must have write lock on slots_lock. */ | 2043 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
2591 | void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, | 2044 | if (!new_bus) |
2592 | struct kvm_io_device *dev) | 2045 | return -ENOMEM; |
2593 | { | ||
2594 | int i; | ||
2595 | 2046 | ||
2596 | for (i = 0; i < bus->dev_count; i++) | 2047 | bus = kvm->buses[bus_idx]; |
2597 | if (bus->devs[i] == dev) { | 2048 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); |
2598 | bus->devs[i] = bus->devs[--bus->dev_count]; | 2049 | |
2050 | r = -ENOENT; | ||
2051 | for (i = 0; i < new_bus->dev_count; i++) | ||
2052 | if (new_bus->devs[i] == dev) { | ||
2053 | r = 0; | ||
2054 | new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; | ||
2599 | break; | 2055 | break; |
2600 | } | 2056 | } |
2057 | |||
2058 | if (r) { | ||
2059 | kfree(new_bus); | ||
2060 | return r; | ||
2061 | } | ||
2062 | |||
2063 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | ||
2064 | synchronize_srcu_expedited(&kvm->srcu); | ||
2065 | kfree(bus); | ||
2066 | return r; | ||
2601 | } | 2067 | } |
2602 | 2068 | ||
2603 | static struct notifier_block kvm_cpu_notifier = { | 2069 | static struct notifier_block kvm_cpu_notifier = { |
@@ -2666,13 +2132,15 @@ static void kvm_exit_debug(void) | |||
2666 | 2132 | ||
2667 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 2133 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
2668 | { | 2134 | { |
2669 | hardware_disable(NULL); | 2135 | if (kvm_usage_count) |
2136 | hardware_disable(NULL); | ||
2670 | return 0; | 2137 | return 0; |
2671 | } | 2138 | } |
2672 | 2139 | ||
2673 | static int kvm_resume(struct sys_device *dev) | 2140 | static int kvm_resume(struct sys_device *dev) |
2674 | { | 2141 | { |
2675 | hardware_enable(NULL); | 2142 | if (kvm_usage_count) |
2143 | hardware_enable(NULL); | ||
2676 | return 0; | 2144 | return 0; |
2677 | } | 2145 | } |
2678 | 2146 | ||
@@ -2747,7 +2215,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2747 | goto out_free_1; | 2215 | goto out_free_1; |
2748 | } | 2216 | } |
2749 | 2217 | ||
2750 | on_each_cpu(hardware_enable, NULL, 1); | ||
2751 | r = register_cpu_notifier(&kvm_cpu_notifier); | 2218 | r = register_cpu_notifier(&kvm_cpu_notifier); |
2752 | if (r) | 2219 | if (r) |
2753 | goto out_free_2; | 2220 | goto out_free_2; |
@@ -2797,7 +2264,6 @@ out_free_3: | |||
2797 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2264 | unregister_reboot_notifier(&kvm_reboot_notifier); |
2798 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2265 | unregister_cpu_notifier(&kvm_cpu_notifier); |
2799 | out_free_2: | 2266 | out_free_2: |
2800 | on_each_cpu(hardware_disable, NULL, 1); | ||
2801 | out_free_1: | 2267 | out_free_1: |
2802 | kvm_arch_hardware_unsetup(); | 2268 | kvm_arch_hardware_unsetup(); |
2803 | out_free_0a: | 2269 | out_free_0a: |