diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/assigned-dev.c | 818 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 2 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 80 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 5 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 231 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 961 |
6 files changed, 1167 insertions, 930 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c new file mode 100644 index 000000000000..fd9c097b760a --- /dev/null +++ b/virt/kvm/assigned-dev.c | |||
@@ -0,0 +1,818 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine - device assignment support | ||
3 | * | ||
4 | * Copyright (C) 2006-9 Red Hat, Inc | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
7 | * the COPYING file in the top-level directory. | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/kvm_host.h> | ||
12 | #include <linux/kvm.h> | ||
13 | #include <linux/uaccess.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/pci.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include "irq.h" | ||
20 | |||
21 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
22 | int assigned_dev_id) | ||
23 | { | ||
24 | struct list_head *ptr; | ||
25 | struct kvm_assigned_dev_kernel *match; | ||
26 | |||
27 | list_for_each(ptr, head) { | ||
28 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
29 | if (match->assigned_dev_id == assigned_dev_id) | ||
30 | return match; | ||
31 | } | ||
32 | return NULL; | ||
33 | } | ||
34 | |||
35 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
36 | *assigned_dev, int irq) | ||
37 | { | ||
38 | int i, index; | ||
39 | struct msix_entry *host_msix_entries; | ||
40 | |||
41 | host_msix_entries = assigned_dev->host_msix_entries; | ||
42 | |||
43 | index = -1; | ||
44 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
45 | if (irq == host_msix_entries[i].vector) { | ||
46 | index = i; | ||
47 | break; | ||
48 | } | ||
49 | if (index < 0) { | ||
50 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | return index; | ||
55 | } | ||
56 | |||
57 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
58 | { | ||
59 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
60 | struct kvm *kvm; | ||
61 | int i; | ||
62 | |||
63 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
64 | interrupt_work); | ||
65 | kvm = assigned_dev->kvm; | ||
66 | |||
67 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
68 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
69 | struct kvm_guest_msix_entry *guest_entries = | ||
70 | assigned_dev->guest_msix_entries; | ||
71 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
72 | if (!(guest_entries[i].flags & | ||
73 | KVM_ASSIGNED_MSIX_PENDING)) | ||
74 | continue; | ||
75 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
76 | kvm_set_irq(assigned_dev->kvm, | ||
77 | assigned_dev->irq_source_id, | ||
78 | guest_entries[i].vector, 1); | ||
79 | } | ||
80 | } else | ||
81 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
82 | assigned_dev->guest_irq, 1); | ||
83 | |||
84 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
85 | } | ||
86 | |||
87 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
88 | { | ||
89 | unsigned long flags; | ||
90 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
91 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
92 | |||
93 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
94 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
95 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
96 | if (index < 0) | ||
97 | goto out; | ||
98 | assigned_dev->guest_msix_entries[index].flags |= | ||
99 | KVM_ASSIGNED_MSIX_PENDING; | ||
100 | } | ||
101 | |||
102 | schedule_work(&assigned_dev->interrupt_work); | ||
103 | |||
104 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
105 | disable_irq_nosync(irq); | ||
106 | assigned_dev->host_irq_disabled = true; | ||
107 | } | ||
108 | |||
109 | out: | ||
110 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
111 | return IRQ_HANDLED; | ||
112 | } | ||
113 | |||
114 | /* Ack the irq line for an assigned device */ | ||
115 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
116 | { | ||
117 | struct kvm_assigned_dev_kernel *dev; | ||
118 | unsigned long flags; | ||
119 | |||
120 | if (kian->gsi == -1) | ||
121 | return; | ||
122 | |||
123 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
124 | ack_notifier); | ||
125 | |||
126 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
127 | |||
128 | /* The guest irq may be shared so this ack may be | ||
129 | * from another device. | ||
130 | */ | ||
131 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
132 | if (dev->host_irq_disabled) { | ||
133 | enable_irq(dev->host_irq); | ||
134 | dev->host_irq_disabled = false; | ||
135 | } | ||
136 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
137 | } | ||
138 | |||
139 | static void deassign_guest_irq(struct kvm *kvm, | ||
140 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
141 | { | ||
142 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | ||
143 | assigned_dev->ack_notifier.gsi = -1; | ||
144 | |||
145 | if (assigned_dev->irq_source_id != -1) | ||
146 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
147 | assigned_dev->irq_source_id = -1; | ||
148 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
149 | } | ||
150 | |||
151 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
152 | static void deassign_host_irq(struct kvm *kvm, | ||
153 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
154 | { | ||
155 | /* | ||
156 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
157 | * 1. work is scheduled, and then cancelled. | ||
158 | * 2. work callback is executed. | ||
159 | * | ||
160 | * The first one ensured that the irq is disabled and no more events | ||
161 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
162 | * for MSI). So we disable irq here to prevent further events. | ||
163 | * | ||
164 | * Notice this maybe result in nested disable if the interrupt type is | ||
165 | * INTx, but it's OK for we are going to free it. | ||
166 | * | ||
167 | * If this function is a part of VM destroy, please ensure that till | ||
168 | * now, the kvm state is still legal for probably we also have to wait | ||
169 | * interrupt_work done. | ||
170 | */ | ||
171 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
172 | int i; | ||
173 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
174 | disable_irq_nosync(assigned_dev-> | ||
175 | host_msix_entries[i].vector); | ||
176 | |||
177 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
178 | |||
179 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
180 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
181 | (void *)assigned_dev); | ||
182 | |||
183 | assigned_dev->entries_nr = 0; | ||
184 | kfree(assigned_dev->host_msix_entries); | ||
185 | kfree(assigned_dev->guest_msix_entries); | ||
186 | pci_disable_msix(assigned_dev->dev); | ||
187 | } else { | ||
188 | /* Deal with MSI and INTx */ | ||
189 | disable_irq_nosync(assigned_dev->host_irq); | ||
190 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
191 | |||
192 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
193 | |||
194 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
195 | pci_disable_msi(assigned_dev->dev); | ||
196 | } | ||
197 | |||
198 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
199 | } | ||
200 | |||
201 | static int kvm_deassign_irq(struct kvm *kvm, | ||
202 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
203 | unsigned long irq_requested_type) | ||
204 | { | ||
205 | unsigned long guest_irq_type, host_irq_type; | ||
206 | |||
207 | if (!irqchip_in_kernel(kvm)) | ||
208 | return -EINVAL; | ||
209 | /* no irq assignment to deassign */ | ||
210 | if (!assigned_dev->irq_requested_type) | ||
211 | return -ENXIO; | ||
212 | |||
213 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
214 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
215 | |||
216 | if (host_irq_type) | ||
217 | deassign_host_irq(kvm, assigned_dev); | ||
218 | if (guest_irq_type) | ||
219 | deassign_guest_irq(kvm, assigned_dev); | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
225 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
226 | { | ||
227 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
228 | } | ||
229 | |||
230 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
231 | struct kvm_assigned_dev_kernel | ||
232 | *assigned_dev) | ||
233 | { | ||
234 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
235 | |||
236 | pci_reset_function(assigned_dev->dev); | ||
237 | |||
238 | pci_release_regions(assigned_dev->dev); | ||
239 | pci_disable_device(assigned_dev->dev); | ||
240 | pci_dev_put(assigned_dev->dev); | ||
241 | |||
242 | list_del(&assigned_dev->list); | ||
243 | kfree(assigned_dev); | ||
244 | } | ||
245 | |||
246 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
247 | { | ||
248 | struct list_head *ptr, *ptr2; | ||
249 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
250 | |||
251 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
252 | assigned_dev = list_entry(ptr, | ||
253 | struct kvm_assigned_dev_kernel, | ||
254 | list); | ||
255 | |||
256 | kvm_free_assigned_device(kvm, assigned_dev); | ||
257 | } | ||
258 | } | ||
259 | |||
260 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
261 | struct kvm_assigned_dev_kernel *dev) | ||
262 | { | ||
263 | dev->host_irq = dev->dev->irq; | ||
264 | /* Even though this is PCI, we don't want to use shared | ||
265 | * interrupts. Sharing host devices with guest-assigned devices | ||
266 | * on the same interrupt line is not a happy situation: there | ||
267 | * are going to be long delays in accepting, acking, etc. | ||
268 | */ | ||
269 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
270 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
271 | return -EIO; | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | #ifdef __KVM_HAVE_MSI | ||
276 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
277 | struct kvm_assigned_dev_kernel *dev) | ||
278 | { | ||
279 | int r; | ||
280 | |||
281 | if (!dev->dev->msi_enabled) { | ||
282 | r = pci_enable_msi(dev->dev); | ||
283 | if (r) | ||
284 | return r; | ||
285 | } | ||
286 | |||
287 | dev->host_irq = dev->dev->irq; | ||
288 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
289 | "kvm_assigned_msi_device", (void *)dev)) { | ||
290 | pci_disable_msi(dev->dev); | ||
291 | return -EIO; | ||
292 | } | ||
293 | |||
294 | return 0; | ||
295 | } | ||
296 | #endif | ||
297 | |||
298 | #ifdef __KVM_HAVE_MSIX | ||
299 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
300 | struct kvm_assigned_dev_kernel *dev) | ||
301 | { | ||
302 | int i, r = -EINVAL; | ||
303 | |||
304 | /* host_msix_entries and guest_msix_entries should have been | ||
305 | * initialized */ | ||
306 | if (dev->entries_nr == 0) | ||
307 | return r; | ||
308 | |||
309 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
310 | if (r) | ||
311 | return r; | ||
312 | |||
313 | for (i = 0; i < dev->entries_nr; i++) { | ||
314 | r = request_irq(dev->host_msix_entries[i].vector, | ||
315 | kvm_assigned_dev_intr, 0, | ||
316 | "kvm_assigned_msix_device", | ||
317 | (void *)dev); | ||
318 | /* FIXME: free requested_irq's on failure */ | ||
319 | if (r) | ||
320 | return r; | ||
321 | } | ||
322 | |||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | #endif | ||
327 | |||
328 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
329 | struct kvm_assigned_dev_kernel *dev, | ||
330 | struct kvm_assigned_irq *irq) | ||
331 | { | ||
332 | dev->guest_irq = irq->guest_irq; | ||
333 | dev->ack_notifier.gsi = irq->guest_irq; | ||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | #ifdef __KVM_HAVE_MSI | ||
338 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
339 | struct kvm_assigned_dev_kernel *dev, | ||
340 | struct kvm_assigned_irq *irq) | ||
341 | { | ||
342 | dev->guest_irq = irq->guest_irq; | ||
343 | dev->ack_notifier.gsi = -1; | ||
344 | dev->host_irq_disabled = false; | ||
345 | return 0; | ||
346 | } | ||
347 | #endif | ||
348 | |||
349 | #ifdef __KVM_HAVE_MSIX | ||
350 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
351 | struct kvm_assigned_dev_kernel *dev, | ||
352 | struct kvm_assigned_irq *irq) | ||
353 | { | ||
354 | dev->guest_irq = irq->guest_irq; | ||
355 | dev->ack_notifier.gsi = -1; | ||
356 | dev->host_irq_disabled = false; | ||
357 | return 0; | ||
358 | } | ||
359 | #endif | ||
360 | |||
361 | static int assign_host_irq(struct kvm *kvm, | ||
362 | struct kvm_assigned_dev_kernel *dev, | ||
363 | __u32 host_irq_type) | ||
364 | { | ||
365 | int r = -EEXIST; | ||
366 | |||
367 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
368 | return r; | ||
369 | |||
370 | switch (host_irq_type) { | ||
371 | case KVM_DEV_IRQ_HOST_INTX: | ||
372 | r = assigned_device_enable_host_intx(kvm, dev); | ||
373 | break; | ||
374 | #ifdef __KVM_HAVE_MSI | ||
375 | case KVM_DEV_IRQ_HOST_MSI: | ||
376 | r = assigned_device_enable_host_msi(kvm, dev); | ||
377 | break; | ||
378 | #endif | ||
379 | #ifdef __KVM_HAVE_MSIX | ||
380 | case KVM_DEV_IRQ_HOST_MSIX: | ||
381 | r = assigned_device_enable_host_msix(kvm, dev); | ||
382 | break; | ||
383 | #endif | ||
384 | default: | ||
385 | r = -EINVAL; | ||
386 | } | ||
387 | |||
388 | if (!r) | ||
389 | dev->irq_requested_type |= host_irq_type; | ||
390 | |||
391 | return r; | ||
392 | } | ||
393 | |||
394 | static int assign_guest_irq(struct kvm *kvm, | ||
395 | struct kvm_assigned_dev_kernel *dev, | ||
396 | struct kvm_assigned_irq *irq, | ||
397 | unsigned long guest_irq_type) | ||
398 | { | ||
399 | int id; | ||
400 | int r = -EEXIST; | ||
401 | |||
402 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
403 | return r; | ||
404 | |||
405 | id = kvm_request_irq_source_id(kvm); | ||
406 | if (id < 0) | ||
407 | return id; | ||
408 | |||
409 | dev->irq_source_id = id; | ||
410 | |||
411 | switch (guest_irq_type) { | ||
412 | case KVM_DEV_IRQ_GUEST_INTX: | ||
413 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
414 | break; | ||
415 | #ifdef __KVM_HAVE_MSI | ||
416 | case KVM_DEV_IRQ_GUEST_MSI: | ||
417 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
418 | break; | ||
419 | #endif | ||
420 | #ifdef __KVM_HAVE_MSIX | ||
421 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
422 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
423 | break; | ||
424 | #endif | ||
425 | default: | ||
426 | r = -EINVAL; | ||
427 | } | ||
428 | |||
429 | if (!r) { | ||
430 | dev->irq_requested_type |= guest_irq_type; | ||
431 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
432 | } else | ||
433 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
434 | |||
435 | return r; | ||
436 | } | ||
437 | |||
438 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
439 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
440 | struct kvm_assigned_irq *assigned_irq) | ||
441 | { | ||
442 | int r = -EINVAL; | ||
443 | struct kvm_assigned_dev_kernel *match; | ||
444 | unsigned long host_irq_type, guest_irq_type; | ||
445 | |||
446 | if (!capable(CAP_SYS_RAWIO)) | ||
447 | return -EPERM; | ||
448 | |||
449 | if (!irqchip_in_kernel(kvm)) | ||
450 | return r; | ||
451 | |||
452 | mutex_lock(&kvm->lock); | ||
453 | r = -ENODEV; | ||
454 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
455 | assigned_irq->assigned_dev_id); | ||
456 | if (!match) | ||
457 | goto out; | ||
458 | |||
459 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
460 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
461 | |||
462 | r = -EINVAL; | ||
463 | /* can only assign one type at a time */ | ||
464 | if (hweight_long(host_irq_type) > 1) | ||
465 | goto out; | ||
466 | if (hweight_long(guest_irq_type) > 1) | ||
467 | goto out; | ||
468 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
469 | goto out; | ||
470 | |||
471 | r = 0; | ||
472 | if (host_irq_type) | ||
473 | r = assign_host_irq(kvm, match, host_irq_type); | ||
474 | if (r) | ||
475 | goto out; | ||
476 | |||
477 | if (guest_irq_type) | ||
478 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
479 | out: | ||
480 | mutex_unlock(&kvm->lock); | ||
481 | return r; | ||
482 | } | ||
483 | |||
484 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
485 | struct kvm_assigned_irq | ||
486 | *assigned_irq) | ||
487 | { | ||
488 | int r = -ENODEV; | ||
489 | struct kvm_assigned_dev_kernel *match; | ||
490 | |||
491 | mutex_lock(&kvm->lock); | ||
492 | |||
493 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
494 | assigned_irq->assigned_dev_id); | ||
495 | if (!match) | ||
496 | goto out; | ||
497 | |||
498 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
499 | out: | ||
500 | mutex_unlock(&kvm->lock); | ||
501 | return r; | ||
502 | } | ||
503 | |||
504 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
505 | struct kvm_assigned_pci_dev *assigned_dev) | ||
506 | { | ||
507 | int r = 0; | ||
508 | struct kvm_assigned_dev_kernel *match; | ||
509 | struct pci_dev *dev; | ||
510 | |||
511 | down_read(&kvm->slots_lock); | ||
512 | mutex_lock(&kvm->lock); | ||
513 | |||
514 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
515 | assigned_dev->assigned_dev_id); | ||
516 | if (match) { | ||
517 | /* device already assigned */ | ||
518 | r = -EEXIST; | ||
519 | goto out; | ||
520 | } | ||
521 | |||
522 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
523 | if (match == NULL) { | ||
524 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
525 | __func__); | ||
526 | r = -ENOMEM; | ||
527 | goto out; | ||
528 | } | ||
529 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
530 | assigned_dev->devfn); | ||
531 | if (!dev) { | ||
532 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
533 | r = -EINVAL; | ||
534 | goto out_free; | ||
535 | } | ||
536 | if (pci_enable_device(dev)) { | ||
537 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
538 | r = -EBUSY; | ||
539 | goto out_put; | ||
540 | } | ||
541 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
542 | if (r) { | ||
543 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
544 | __func__); | ||
545 | goto out_disable; | ||
546 | } | ||
547 | |||
548 | pci_reset_function(dev); | ||
549 | |||
550 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
551 | match->host_busnr = assigned_dev->busnr; | ||
552 | match->host_devfn = assigned_dev->devfn; | ||
553 | match->flags = assigned_dev->flags; | ||
554 | match->dev = dev; | ||
555 | spin_lock_init(&match->assigned_dev_lock); | ||
556 | match->irq_source_id = -1; | ||
557 | match->kvm = kvm; | ||
558 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
559 | INIT_WORK(&match->interrupt_work, | ||
560 | kvm_assigned_dev_interrupt_work_handler); | ||
561 | |||
562 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
563 | |||
564 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
565 | if (!kvm->arch.iommu_domain) { | ||
566 | r = kvm_iommu_map_guest(kvm); | ||
567 | if (r) | ||
568 | goto out_list_del; | ||
569 | } | ||
570 | r = kvm_assign_device(kvm, match); | ||
571 | if (r) | ||
572 | goto out_list_del; | ||
573 | } | ||
574 | |||
575 | out: | ||
576 | mutex_unlock(&kvm->lock); | ||
577 | up_read(&kvm->slots_lock); | ||
578 | return r; | ||
579 | out_list_del: | ||
580 | list_del(&match->list); | ||
581 | pci_release_regions(dev); | ||
582 | out_disable: | ||
583 | pci_disable_device(dev); | ||
584 | out_put: | ||
585 | pci_dev_put(dev); | ||
586 | out_free: | ||
587 | kfree(match); | ||
588 | mutex_unlock(&kvm->lock); | ||
589 | up_read(&kvm->slots_lock); | ||
590 | return r; | ||
591 | } | ||
592 | |||
593 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
594 | struct kvm_assigned_pci_dev *assigned_dev) | ||
595 | { | ||
596 | int r = 0; | ||
597 | struct kvm_assigned_dev_kernel *match; | ||
598 | |||
599 | mutex_lock(&kvm->lock); | ||
600 | |||
601 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
602 | assigned_dev->assigned_dev_id); | ||
603 | if (!match) { | ||
604 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
605 | "so cannot be deassigned\n", __func__); | ||
606 | r = -EINVAL; | ||
607 | goto out; | ||
608 | } | ||
609 | |||
610 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
611 | kvm_deassign_device(kvm, match); | ||
612 | |||
613 | kvm_free_assigned_device(kvm, match); | ||
614 | |||
615 | out: | ||
616 | mutex_unlock(&kvm->lock); | ||
617 | return r; | ||
618 | } | ||
619 | |||
620 | |||
621 | #ifdef __KVM_HAVE_MSIX | ||
622 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
623 | struct kvm_assigned_msix_nr *entry_nr) | ||
624 | { | ||
625 | int r = 0; | ||
626 | struct kvm_assigned_dev_kernel *adev; | ||
627 | |||
628 | mutex_lock(&kvm->lock); | ||
629 | |||
630 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
631 | entry_nr->assigned_dev_id); | ||
632 | if (!adev) { | ||
633 | r = -EINVAL; | ||
634 | goto msix_nr_out; | ||
635 | } | ||
636 | |||
637 | if (adev->entries_nr == 0) { | ||
638 | adev->entries_nr = entry_nr->entry_nr; | ||
639 | if (adev->entries_nr == 0 || | ||
640 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
641 | r = -EINVAL; | ||
642 | goto msix_nr_out; | ||
643 | } | ||
644 | |||
645 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
646 | entry_nr->entry_nr, | ||
647 | GFP_KERNEL); | ||
648 | if (!adev->host_msix_entries) { | ||
649 | r = -ENOMEM; | ||
650 | goto msix_nr_out; | ||
651 | } | ||
652 | adev->guest_msix_entries = kzalloc( | ||
653 | sizeof(struct kvm_guest_msix_entry) * | ||
654 | entry_nr->entry_nr, GFP_KERNEL); | ||
655 | if (!adev->guest_msix_entries) { | ||
656 | kfree(adev->host_msix_entries); | ||
657 | r = -ENOMEM; | ||
658 | goto msix_nr_out; | ||
659 | } | ||
660 | } else /* Not allowed set MSI-X number twice */ | ||
661 | r = -EINVAL; | ||
662 | msix_nr_out: | ||
663 | mutex_unlock(&kvm->lock); | ||
664 | return r; | ||
665 | } | ||
666 | |||
667 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
668 | struct kvm_assigned_msix_entry *entry) | ||
669 | { | ||
670 | int r = 0, i; | ||
671 | struct kvm_assigned_dev_kernel *adev; | ||
672 | |||
673 | mutex_lock(&kvm->lock); | ||
674 | |||
675 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
676 | entry->assigned_dev_id); | ||
677 | |||
678 | if (!adev) { | ||
679 | r = -EINVAL; | ||
680 | goto msix_entry_out; | ||
681 | } | ||
682 | |||
683 | for (i = 0; i < adev->entries_nr; i++) | ||
684 | if (adev->guest_msix_entries[i].vector == 0 || | ||
685 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
686 | adev->guest_msix_entries[i].entry = entry->entry; | ||
687 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
688 | adev->host_msix_entries[i].entry = entry->entry; | ||
689 | break; | ||
690 | } | ||
691 | if (i == adev->entries_nr) { | ||
692 | r = -ENOSPC; | ||
693 | goto msix_entry_out; | ||
694 | } | ||
695 | |||
696 | msix_entry_out: | ||
697 | mutex_unlock(&kvm->lock); | ||
698 | |||
699 | return r; | ||
700 | } | ||
701 | #endif | ||
702 | |||
703 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
704 | unsigned long arg) | ||
705 | { | ||
706 | void __user *argp = (void __user *)arg; | ||
707 | int r = -ENOTTY; | ||
708 | |||
709 | switch (ioctl) { | ||
710 | case KVM_ASSIGN_PCI_DEVICE: { | ||
711 | struct kvm_assigned_pci_dev assigned_dev; | ||
712 | |||
713 | r = -EFAULT; | ||
714 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
715 | goto out; | ||
716 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
717 | if (r) | ||
718 | goto out; | ||
719 | break; | ||
720 | } | ||
721 | case KVM_ASSIGN_IRQ: { | ||
722 | r = -EOPNOTSUPP; | ||
723 | break; | ||
724 | } | ||
725 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
726 | case KVM_ASSIGN_DEV_IRQ: { | ||
727 | struct kvm_assigned_irq assigned_irq; | ||
728 | |||
729 | r = -EFAULT; | ||
730 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
731 | goto out; | ||
732 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
733 | if (r) | ||
734 | goto out; | ||
735 | break; | ||
736 | } | ||
737 | case KVM_DEASSIGN_DEV_IRQ: { | ||
738 | struct kvm_assigned_irq assigned_irq; | ||
739 | |||
740 | r = -EFAULT; | ||
741 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
742 | goto out; | ||
743 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
744 | if (r) | ||
745 | goto out; | ||
746 | break; | ||
747 | } | ||
748 | #endif | ||
749 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
750 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
751 | struct kvm_assigned_pci_dev assigned_dev; | ||
752 | |||
753 | r = -EFAULT; | ||
754 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
755 | goto out; | ||
756 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
757 | if (r) | ||
758 | goto out; | ||
759 | break; | ||
760 | } | ||
761 | #endif | ||
762 | #ifdef KVM_CAP_IRQ_ROUTING | ||
763 | case KVM_SET_GSI_ROUTING: { | ||
764 | struct kvm_irq_routing routing; | ||
765 | struct kvm_irq_routing __user *urouting; | ||
766 | struct kvm_irq_routing_entry *entries; | ||
767 | |||
768 | r = -EFAULT; | ||
769 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
770 | goto out; | ||
771 | r = -EINVAL; | ||
772 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
773 | goto out; | ||
774 | if (routing.flags) | ||
775 | goto out; | ||
776 | r = -ENOMEM; | ||
777 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
778 | if (!entries) | ||
779 | goto out; | ||
780 | r = -EFAULT; | ||
781 | urouting = argp; | ||
782 | if (copy_from_user(entries, urouting->entries, | ||
783 | routing.nr * sizeof(*entries))) | ||
784 | goto out_free_irq_routing; | ||
785 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
786 | routing.flags); | ||
787 | out_free_irq_routing: | ||
788 | vfree(entries); | ||
789 | break; | ||
790 | } | ||
791 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
792 | #ifdef __KVM_HAVE_MSIX | ||
793 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
794 | struct kvm_assigned_msix_nr entry_nr; | ||
795 | r = -EFAULT; | ||
796 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
797 | goto out; | ||
798 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
799 | if (r) | ||
800 | goto out; | ||
801 | break; | ||
802 | } | ||
803 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
804 | struct kvm_assigned_msix_entry entry; | ||
805 | r = -EFAULT; | ||
806 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
807 | goto out; | ||
808 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
809 | if (r) | ||
810 | goto out; | ||
811 | break; | ||
812 | } | ||
813 | #endif | ||
814 | } | ||
815 | out: | ||
816 | return r; | ||
817 | } | ||
818 | |||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index bb4ebd89b9ff..30f70fd511c4 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work) | |||
61 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 61 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); |
62 | struct kvm *kvm = irqfd->kvm; | 62 | struct kvm *kvm = irqfd->kvm; |
63 | 63 | ||
64 | mutex_lock(&kvm->irq_lock); | ||
65 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | 64 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); |
66 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | 65 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); |
67 | mutex_unlock(&kvm->irq_lock); | ||
68 | } | 66 | } |
69 | 67 | ||
70 | /* | 68 | /* |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 9fe140bb38ec..38a2d20b89de 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
182 | union kvm_ioapic_redirect_entry entry; | 182 | union kvm_ioapic_redirect_entry entry; |
183 | int ret = 1; | 183 | int ret = 1; |
184 | 184 | ||
185 | mutex_lock(&ioapic->lock); | ||
185 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { | 186 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { |
186 | entry = ioapic->redirtbl[irq]; | 187 | entry = ioapic->redirtbl[irq]; |
187 | level ^= entry.fields.polarity; | 188 | level ^= entry.fields.polarity; |
@@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
198 | } | 199 | } |
199 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | 200 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); |
200 | } | 201 | } |
202 | mutex_unlock(&ioapic->lock); | ||
203 | |||
201 | return ret; | 204 | return ret; |
202 | } | 205 | } |
203 | 206 | ||
204 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, | 207 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, |
205 | int trigger_mode) | 208 | int trigger_mode) |
206 | { | 209 | { |
207 | union kvm_ioapic_redirect_entry *ent; | 210 | int i; |
211 | |||
212 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { | ||
213 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; | ||
208 | 214 | ||
209 | ent = &ioapic->redirtbl[pin]; | 215 | if (ent->fields.vector != vector) |
216 | continue; | ||
210 | 217 | ||
211 | kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); | 218 | /* |
219 | * We are dropping lock while calling ack notifiers because ack | ||
220 | * notifier callbacks for assigned devices call into IOAPIC | ||
221 | * recursively. Since remote_irr is cleared only after call | ||
222 | * to notifiers if the same vector will be delivered while lock | ||
223 | * is dropped it will be put into irr and will be delivered | ||
224 | * after ack notifier returns. | ||
225 | */ | ||
226 | mutex_unlock(&ioapic->lock); | ||
227 | kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); | ||
228 | mutex_lock(&ioapic->lock); | ||
229 | |||
230 | if (trigger_mode != IOAPIC_LEVEL_TRIG) | ||
231 | continue; | ||
212 | 232 | ||
213 | if (trigger_mode == IOAPIC_LEVEL_TRIG) { | ||
214 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | 233 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); |
215 | ent->fields.remote_irr = 0; | 234 | ent->fields.remote_irr = 0; |
216 | if (!ent->fields.mask && (ioapic->irr & (1 << pin))) | 235 | if (!ent->fields.mask && (ioapic->irr & (1 << i))) |
217 | ioapic_service(ioapic, pin); | 236 | ioapic_service(ioapic, i); |
218 | } | 237 | } |
219 | } | 238 | } |
220 | 239 | ||
221 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) | 240 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) |
222 | { | 241 | { |
223 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 242 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
224 | int i; | ||
225 | 243 | ||
226 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | 244 | mutex_lock(&ioapic->lock); |
227 | if (ioapic->redirtbl[i].fields.vector == vector) | 245 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); |
228 | __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); | 246 | mutex_unlock(&ioapic->lock); |
229 | } | 247 | } |
230 | 248 | ||
231 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) | 249 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) |
@@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
250 | ioapic_debug("addr %lx\n", (unsigned long)addr); | 268 | ioapic_debug("addr %lx\n", (unsigned long)addr); |
251 | ASSERT(!(addr & 0xf)); /* check alignment */ | 269 | ASSERT(!(addr & 0xf)); /* check alignment */ |
252 | 270 | ||
253 | mutex_lock(&ioapic->kvm->irq_lock); | ||
254 | addr &= 0xff; | 271 | addr &= 0xff; |
272 | mutex_lock(&ioapic->lock); | ||
255 | switch (addr) { | 273 | switch (addr) { |
256 | case IOAPIC_REG_SELECT: | 274 | case IOAPIC_REG_SELECT: |
257 | result = ioapic->ioregsel; | 275 | result = ioapic->ioregsel; |
@@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
265 | result = 0; | 283 | result = 0; |
266 | break; | 284 | break; |
267 | } | 285 | } |
286 | mutex_unlock(&ioapic->lock); | ||
287 | |||
268 | switch (len) { | 288 | switch (len) { |
269 | case 8: | 289 | case 8: |
270 | *(u64 *) val = result; | 290 | *(u64 *) val = result; |
@@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
277 | default: | 297 | default: |
278 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); | 298 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); |
279 | } | 299 | } |
280 | mutex_unlock(&ioapic->kvm->irq_lock); | ||
281 | return 0; | 300 | return 0; |
282 | } | 301 | } |
283 | 302 | ||
@@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
293 | (void*)addr, len, val); | 312 | (void*)addr, len, val); |
294 | ASSERT(!(addr & 0xf)); /* check alignment */ | 313 | ASSERT(!(addr & 0xf)); /* check alignment */ |
295 | 314 | ||
296 | mutex_lock(&ioapic->kvm->irq_lock); | ||
297 | if (len == 4 || len == 8) | 315 | if (len == 4 || len == 8) |
298 | data = *(u32 *) val; | 316 | data = *(u32 *) val; |
299 | else { | 317 | else { |
300 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); | 318 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); |
301 | goto unlock; | 319 | return 0; |
302 | } | 320 | } |
303 | 321 | ||
304 | addr &= 0xff; | 322 | addr &= 0xff; |
323 | mutex_lock(&ioapic->lock); | ||
305 | switch (addr) { | 324 | switch (addr) { |
306 | case IOAPIC_REG_SELECT: | 325 | case IOAPIC_REG_SELECT: |
307 | ioapic->ioregsel = data; | 326 | ioapic->ioregsel = data; |
@@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
312 | break; | 331 | break; |
313 | #ifdef CONFIG_IA64 | 332 | #ifdef CONFIG_IA64 |
314 | case IOAPIC_REG_EOI: | 333 | case IOAPIC_REG_EOI: |
315 | kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); | 334 | __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); |
316 | break; | 335 | break; |
317 | #endif | 336 | #endif |
318 | 337 | ||
319 | default: | 338 | default: |
320 | break; | 339 | break; |
321 | } | 340 | } |
322 | unlock: | 341 | mutex_unlock(&ioapic->lock); |
323 | mutex_unlock(&ioapic->kvm->irq_lock); | ||
324 | return 0; | 342 | return 0; |
325 | } | 343 | } |
326 | 344 | ||
@@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
349 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); | 367 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); |
350 | if (!ioapic) | 368 | if (!ioapic) |
351 | return -ENOMEM; | 369 | return -ENOMEM; |
370 | mutex_init(&ioapic->lock); | ||
352 | kvm->arch.vioapic = ioapic; | 371 | kvm->arch.vioapic = ioapic; |
353 | kvm_ioapic_reset(ioapic); | 372 | kvm_ioapic_reset(ioapic); |
354 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 373 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
@@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
360 | return ret; | 379 | return ret; |
361 | } | 380 | } |
362 | 381 | ||
382 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | ||
383 | { | ||
384 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | ||
385 | if (!ioapic) | ||
386 | return -EINVAL; | ||
387 | |||
388 | mutex_lock(&ioapic->lock); | ||
389 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); | ||
390 | mutex_unlock(&ioapic->lock); | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | ||
395 | { | ||
396 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | ||
397 | if (!ioapic) | ||
398 | return -EINVAL; | ||
399 | |||
400 | mutex_lock(&ioapic->lock); | ||
401 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | ||
402 | mutex_unlock(&ioapic->lock); | ||
403 | return 0; | ||
404 | } | ||
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7080b713c160..419c43b667ab 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
@@ -41,9 +41,11 @@ struct kvm_ioapic { | |||
41 | u32 irr; | 41 | u32 irr; |
42 | u32 pad; | 42 | u32 pad; |
43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; | 43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; |
44 | unsigned long irq_states[IOAPIC_NUM_PINS]; | ||
44 | struct kvm_io_device dev; | 45 | struct kvm_io_device dev; |
45 | struct kvm *kvm; | 46 | struct kvm *kvm; |
46 | void (*ack_notifier)(void *opaque, int irq); | 47 | void (*ack_notifier)(void *opaque, int irq); |
48 | struct mutex lock; | ||
47 | }; | 49 | }; |
48 | 50 | ||
49 | #ifdef DEBUG | 51 | #ifdef DEBUG |
@@ -73,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | |||
73 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 75 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
74 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 76 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
75 | struct kvm_lapic_irq *irq); | 77 | struct kvm_lapic_irq *irq); |
78 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | ||
79 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | ||
80 | |||
76 | #endif | 81 | #endif |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 001663ff401a..9b077342ab54 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -31,20 +31,39 @@ | |||
31 | 31 | ||
32 | #include "ioapic.h" | 32 | #include "ioapic.h" |
33 | 33 | ||
34 | static inline int kvm_irq_line_state(unsigned long *irq_state, | ||
35 | int irq_source_id, int level) | ||
36 | { | ||
37 | /* Logical OR for level trig interrupt */ | ||
38 | if (level) | ||
39 | set_bit(irq_source_id, irq_state); | ||
40 | else | ||
41 | clear_bit(irq_source_id, irq_state); | ||
42 | |||
43 | return !!(*irq_state); | ||
44 | } | ||
45 | |||
34 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | 46 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
35 | struct kvm *kvm, int level) | 47 | struct kvm *kvm, int irq_source_id, int level) |
36 | { | 48 | { |
37 | #ifdef CONFIG_X86 | 49 | #ifdef CONFIG_X86 |
38 | return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); | 50 | struct kvm_pic *pic = pic_irqchip(kvm); |
51 | level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], | ||
52 | irq_source_id, level); | ||
53 | return kvm_pic_set_irq(pic, e->irqchip.pin, level); | ||
39 | #else | 54 | #else |
40 | return -1; | 55 | return -1; |
41 | #endif | 56 | #endif |
42 | } | 57 | } |
43 | 58 | ||
44 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | 59 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, |
45 | struct kvm *kvm, int level) | 60 | struct kvm *kvm, int irq_source_id, int level) |
46 | { | 61 | { |
47 | return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); | 62 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
63 | level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], | ||
64 | irq_source_id, level); | ||
65 | |||
66 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); | ||
48 | } | 67 | } |
49 | 68 | ||
50 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) | 69 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) |
@@ -63,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
63 | int i, r = -1; | 82 | int i, r = -1; |
64 | struct kvm_vcpu *vcpu, *lowest = NULL; | 83 | struct kvm_vcpu *vcpu, *lowest = NULL; |
65 | 84 | ||
66 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
67 | |||
68 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | 85 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && |
69 | kvm_is_dm_lowest_prio(irq)) | 86 | kvm_is_dm_lowest_prio(irq)) |
70 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | 87 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); |
@@ -96,10 +113,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
96 | } | 113 | } |
97 | 114 | ||
98 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 115 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
99 | struct kvm *kvm, int level) | 116 | struct kvm *kvm, int irq_source_id, int level) |
100 | { | 117 | { |
101 | struct kvm_lapic_irq irq; | 118 | struct kvm_lapic_irq irq; |
102 | 119 | ||
120 | if (!level) | ||
121 | return -1; | ||
122 | |||
103 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); | 123 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); |
104 | 124 | ||
105 | irq.dest_id = (e->msi.address_lo & | 125 | irq.dest_id = (e->msi.address_lo & |
@@ -116,78 +136,67 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
116 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | 136 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); |
117 | } | 137 | } |
118 | 138 | ||
119 | /* This should be called with the kvm->irq_lock mutex held | 139 | /* |
120 | * Return value: | 140 | * Return value: |
121 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | 141 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) |
122 | * = 0 Interrupt was coalesced (previous irq is still pending) | 142 | * = 0 Interrupt was coalesced (previous irq is still pending) |
123 | * > 0 Number of CPUs interrupt was delivered to | 143 | * > 0 Number of CPUs interrupt was delivered to |
124 | */ | 144 | */ |
125 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) | 145 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) |
126 | { | 146 | { |
127 | struct kvm_kernel_irq_routing_entry *e; | 147 | struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; |
128 | unsigned long *irq_state, sig_level; | 148 | int ret = -1, i = 0; |
129 | int ret = -1; | 149 | struct kvm_irq_routing_table *irq_rt; |
150 | struct hlist_node *n; | ||
130 | 151 | ||
131 | trace_kvm_set_irq(irq, level, irq_source_id); | 152 | trace_kvm_set_irq(irq, level, irq_source_id); |
132 | 153 | ||
133 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
134 | |||
135 | if (irq < KVM_IOAPIC_NUM_PINS) { | ||
136 | irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; | ||
137 | |||
138 | /* Logical OR for level trig interrupt */ | ||
139 | if (level) | ||
140 | set_bit(irq_source_id, irq_state); | ||
141 | else | ||
142 | clear_bit(irq_source_id, irq_state); | ||
143 | sig_level = !!(*irq_state); | ||
144 | } else if (!level) | ||
145 | return ret; | ||
146 | else /* Deal with MSI/MSI-X */ | ||
147 | sig_level = 1; | ||
148 | |||
149 | /* Not possible to detect if the guest uses the PIC or the | 154 | /* Not possible to detect if the guest uses the PIC or the |
150 | * IOAPIC. So set the bit in both. The guest will ignore | 155 | * IOAPIC. So set the bit in both. The guest will ignore |
151 | * writes to the unused one. | 156 | * writes to the unused one. |
152 | */ | 157 | */ |
153 | list_for_each_entry(e, &kvm->irq_routing, link) | 158 | rcu_read_lock(); |
154 | if (e->gsi == irq) { | 159 | irq_rt = rcu_dereference(kvm->irq_routing); |
155 | int r = e->set(e, kvm, sig_level); | 160 | if (irq < irq_rt->nr_rt_entries) |
156 | if (r < 0) | 161 | hlist_for_each_entry(e, n, &irq_rt->map[irq], link) |
157 | continue; | 162 | irq_set[i++] = *e; |
163 | rcu_read_unlock(); | ||
164 | |||
165 | while(i--) { | ||
166 | int r; | ||
167 | r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); | ||
168 | if (r < 0) | ||
169 | continue; | ||
170 | |||
171 | ret = r + ((ret < 0) ? 0 : ret); | ||
172 | } | ||
158 | 173 | ||
159 | ret = r + ((ret < 0) ? 0 : ret); | ||
160 | } | ||
161 | return ret; | 174 | return ret; |
162 | } | 175 | } |
163 | 176 | ||
164 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 177 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) |
165 | { | 178 | { |
166 | struct kvm_kernel_irq_routing_entry *e; | ||
167 | struct kvm_irq_ack_notifier *kian; | 179 | struct kvm_irq_ack_notifier *kian; |
168 | struct hlist_node *n; | 180 | struct hlist_node *n; |
169 | unsigned gsi = pin; | 181 | int gsi; |
170 | 182 | ||
171 | trace_kvm_ack_irq(irqchip, pin); | 183 | trace_kvm_ack_irq(irqchip, pin); |
172 | 184 | ||
173 | list_for_each_entry(e, &kvm->irq_routing, link) | 185 | rcu_read_lock(); |
174 | if (e->type == KVM_IRQ_ROUTING_IRQCHIP && | 186 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; |
175 | e->irqchip.irqchip == irqchip && | 187 | if (gsi != -1) |
176 | e->irqchip.pin == pin) { | 188 | hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, |
177 | gsi = e->gsi; | 189 | link) |
178 | break; | 190 | if (kian->gsi == gsi) |
179 | } | 191 | kian->irq_acked(kian); |
180 | 192 | rcu_read_unlock(); | |
181 | hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) | ||
182 | if (kian->gsi == gsi) | ||
183 | kian->irq_acked(kian); | ||
184 | } | 193 | } |
185 | 194 | ||
186 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 195 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
187 | struct kvm_irq_ack_notifier *kian) | 196 | struct kvm_irq_ack_notifier *kian) |
188 | { | 197 | { |
189 | mutex_lock(&kvm->irq_lock); | 198 | mutex_lock(&kvm->irq_lock); |
190 | hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); | 199 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); |
191 | mutex_unlock(&kvm->irq_lock); | 200 | mutex_unlock(&kvm->irq_lock); |
192 | } | 201 | } |
193 | 202 | ||
@@ -195,8 +204,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | |||
195 | struct kvm_irq_ack_notifier *kian) | 204 | struct kvm_irq_ack_notifier *kian) |
196 | { | 205 | { |
197 | mutex_lock(&kvm->irq_lock); | 206 | mutex_lock(&kvm->irq_lock); |
198 | hlist_del_init(&kian->link); | 207 | hlist_del_init_rcu(&kian->link); |
199 | mutex_unlock(&kvm->irq_lock); | 208 | mutex_unlock(&kvm->irq_lock); |
209 | synchronize_rcu(); | ||
200 | } | 210 | } |
201 | 211 | ||
202 | int kvm_request_irq_source_id(struct kvm *kvm) | 212 | int kvm_request_irq_source_id(struct kvm *kvm) |
@@ -205,16 +215,17 @@ int kvm_request_irq_source_id(struct kvm *kvm) | |||
205 | int irq_source_id; | 215 | int irq_source_id; |
206 | 216 | ||
207 | mutex_lock(&kvm->irq_lock); | 217 | mutex_lock(&kvm->irq_lock); |
208 | irq_source_id = find_first_zero_bit(bitmap, | 218 | irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG); |
209 | sizeof(kvm->arch.irq_sources_bitmap)); | ||
210 | 219 | ||
211 | if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 220 | if (irq_source_id >= BITS_PER_LONG) { |
212 | printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); | 221 | printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); |
213 | return -EFAULT; | 222 | irq_source_id = -EFAULT; |
223 | goto unlock; | ||
214 | } | 224 | } |
215 | 225 | ||
216 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 226 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
217 | set_bit(irq_source_id, bitmap); | 227 | set_bit(irq_source_id, bitmap); |
228 | unlock: | ||
218 | mutex_unlock(&kvm->irq_lock); | 229 | mutex_unlock(&kvm->irq_lock); |
219 | 230 | ||
220 | return irq_source_id; | 231 | return irq_source_id; |
@@ -228,13 +239,23 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
228 | 239 | ||
229 | mutex_lock(&kvm->irq_lock); | 240 | mutex_lock(&kvm->irq_lock); |
230 | if (irq_source_id < 0 || | 241 | if (irq_source_id < 0 || |
231 | irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 242 | irq_source_id >= BITS_PER_LONG) { |
232 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); | 243 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); |
233 | return; | 244 | goto unlock; |
234 | } | 245 | } |
235 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) | ||
236 | clear_bit(irq_source_id, &kvm->arch.irq_states[i]); | ||
237 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); | 246 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); |
247 | if (!irqchip_in_kernel(kvm)) | ||
248 | goto unlock; | ||
249 | |||
250 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { | ||
251 | clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); | ||
252 | if (i >= 16) | ||
253 | continue; | ||
254 | #ifdef CONFIG_X86 | ||
255 | clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); | ||
256 | #endif | ||
257 | } | ||
258 | unlock: | ||
238 | mutex_unlock(&kvm->irq_lock); | 259 | mutex_unlock(&kvm->irq_lock); |
239 | } | 260 | } |
240 | 261 | ||
@@ -243,7 +264,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | |||
243 | { | 264 | { |
244 | mutex_lock(&kvm->irq_lock); | 265 | mutex_lock(&kvm->irq_lock); |
245 | kimn->irq = irq; | 266 | kimn->irq = irq; |
246 | hlist_add_head(&kimn->link, &kvm->mask_notifier_list); | 267 | hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list); |
247 | mutex_unlock(&kvm->irq_lock); | 268 | mutex_unlock(&kvm->irq_lock); |
248 | } | 269 | } |
249 | 270 | ||
@@ -251,8 +272,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |||
251 | struct kvm_irq_mask_notifier *kimn) | 272 | struct kvm_irq_mask_notifier *kimn) |
252 | { | 273 | { |
253 | mutex_lock(&kvm->irq_lock); | 274 | mutex_lock(&kvm->irq_lock); |
254 | hlist_del(&kimn->link); | 275 | hlist_del_rcu(&kimn->link); |
255 | mutex_unlock(&kvm->irq_lock); | 276 | mutex_unlock(&kvm->irq_lock); |
277 | synchronize_rcu(); | ||
256 | } | 278 | } |
257 | 279 | ||
258 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | 280 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) |
@@ -260,33 +282,37 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | |||
260 | struct kvm_irq_mask_notifier *kimn; | 282 | struct kvm_irq_mask_notifier *kimn; |
261 | struct hlist_node *n; | 283 | struct hlist_node *n; |
262 | 284 | ||
263 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | 285 | rcu_read_lock(); |
264 | 286 | hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) | |
265 | hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) | ||
266 | if (kimn->irq == irq) | 287 | if (kimn->irq == irq) |
267 | kimn->func(kimn, mask); | 288 | kimn->func(kimn, mask); |
268 | } | 289 | rcu_read_unlock(); |
269 | |||
270 | static void __kvm_free_irq_routing(struct list_head *irq_routing) | ||
271 | { | ||
272 | struct kvm_kernel_irq_routing_entry *e, *n; | ||
273 | |||
274 | list_for_each_entry_safe(e, n, irq_routing, link) | ||
275 | kfree(e); | ||
276 | } | 290 | } |
277 | 291 | ||
278 | void kvm_free_irq_routing(struct kvm *kvm) | 292 | void kvm_free_irq_routing(struct kvm *kvm) |
279 | { | 293 | { |
280 | mutex_lock(&kvm->irq_lock); | 294 | /* Called only during vm destruction. Nobody can use the pointer |
281 | __kvm_free_irq_routing(&kvm->irq_routing); | 295 | at this stage */ |
282 | mutex_unlock(&kvm->irq_lock); | 296 | kfree(kvm->irq_routing); |
283 | } | 297 | } |
284 | 298 | ||
285 | static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | 299 | static int setup_routing_entry(struct kvm_irq_routing_table *rt, |
300 | struct kvm_kernel_irq_routing_entry *e, | ||
286 | const struct kvm_irq_routing_entry *ue) | 301 | const struct kvm_irq_routing_entry *ue) |
287 | { | 302 | { |
288 | int r = -EINVAL; | 303 | int r = -EINVAL; |
289 | int delta; | 304 | int delta; |
305 | struct kvm_kernel_irq_routing_entry *ei; | ||
306 | struct hlist_node *n; | ||
307 | |||
308 | /* | ||
309 | * Do not allow GSI to be mapped to the same irqchip more than once. | ||
310 | * Allow only one to one mapping between GSI and MSI. | ||
311 | */ | ||
312 | hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) | ||
313 | if (ei->type == KVM_IRQ_ROUTING_MSI || | ||
314 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | ||
315 | return r; | ||
290 | 316 | ||
291 | e->gsi = ue->gsi; | 317 | e->gsi = ue->gsi; |
292 | e->type = ue->type; | 318 | e->type = ue->type; |
@@ -309,6 +335,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
309 | } | 335 | } |
310 | e->irqchip.irqchip = ue->u.irqchip.irqchip; | 336 | e->irqchip.irqchip = ue->u.irqchip.irqchip; |
311 | e->irqchip.pin = ue->u.irqchip.pin + delta; | 337 | e->irqchip.pin = ue->u.irqchip.pin + delta; |
338 | if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS) | ||
339 | goto out; | ||
340 | rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; | ||
312 | break; | 341 | break; |
313 | case KVM_IRQ_ROUTING_MSI: | 342 | case KVM_IRQ_ROUTING_MSI: |
314 | e->set = kvm_set_msi; | 343 | e->set = kvm_set_msi; |
@@ -319,6 +348,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
319 | default: | 348 | default: |
320 | goto out; | 349 | goto out; |
321 | } | 350 | } |
351 | |||
352 | hlist_add_head(&e->link, &rt->map[e->gsi]); | ||
322 | r = 0; | 353 | r = 0; |
323 | out: | 354 | out: |
324 | return r; | 355 | return r; |
@@ -330,43 +361,53 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
330 | unsigned nr, | 361 | unsigned nr, |
331 | unsigned flags) | 362 | unsigned flags) |
332 | { | 363 | { |
333 | struct list_head irq_list = LIST_HEAD_INIT(irq_list); | 364 | struct kvm_irq_routing_table *new, *old; |
334 | struct list_head tmp = LIST_HEAD_INIT(tmp); | 365 | u32 i, j, nr_rt_entries = 0; |
335 | struct kvm_kernel_irq_routing_entry *e = NULL; | ||
336 | unsigned i; | ||
337 | int r; | 366 | int r; |
338 | 367 | ||
339 | for (i = 0; i < nr; ++i) { | 368 | for (i = 0; i < nr; ++i) { |
369 | if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) | ||
370 | return -EINVAL; | ||
371 | nr_rt_entries = max(nr_rt_entries, ue[i].gsi); | ||
372 | } | ||
373 | |||
374 | nr_rt_entries += 1; | ||
375 | |||
376 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) | ||
377 | + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), | ||
378 | GFP_KERNEL); | ||
379 | |||
380 | if (!new) | ||
381 | return -ENOMEM; | ||
382 | |||
383 | new->rt_entries = (void *)&new->map[nr_rt_entries]; | ||
384 | |||
385 | new->nr_rt_entries = nr_rt_entries; | ||
386 | for (i = 0; i < 3; i++) | ||
387 | for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) | ||
388 | new->chip[i][j] = -1; | ||
389 | |||
390 | for (i = 0; i < nr; ++i) { | ||
340 | r = -EINVAL; | 391 | r = -EINVAL; |
341 | if (ue->gsi >= KVM_MAX_IRQ_ROUTES) | ||
342 | goto out; | ||
343 | if (ue->flags) | 392 | if (ue->flags) |
344 | goto out; | 393 | goto out; |
345 | r = -ENOMEM; | 394 | r = setup_routing_entry(new, &new->rt_entries[i], ue); |
346 | e = kzalloc(sizeof(*e), GFP_KERNEL); | ||
347 | if (!e) | ||
348 | goto out; | ||
349 | r = setup_routing_entry(e, ue); | ||
350 | if (r) | 395 | if (r) |
351 | goto out; | 396 | goto out; |
352 | ++ue; | 397 | ++ue; |
353 | list_add(&e->link, &irq_list); | ||
354 | e = NULL; | ||
355 | } | 398 | } |
356 | 399 | ||
357 | mutex_lock(&kvm->irq_lock); | 400 | mutex_lock(&kvm->irq_lock); |
358 | list_splice(&kvm->irq_routing, &tmp); | 401 | old = kvm->irq_routing; |
359 | INIT_LIST_HEAD(&kvm->irq_routing); | 402 | rcu_assign_pointer(kvm->irq_routing, new); |
360 | list_splice(&irq_list, &kvm->irq_routing); | ||
361 | INIT_LIST_HEAD(&irq_list); | ||
362 | list_splice(&tmp, &irq_list); | ||
363 | mutex_unlock(&kvm->irq_lock); | 403 | mutex_unlock(&kvm->irq_lock); |
404 | synchronize_rcu(); | ||
364 | 405 | ||
406 | new = old; | ||
365 | r = 0; | 407 | r = 0; |
366 | 408 | ||
367 | out: | 409 | out: |
368 | kfree(e); | 410 | kfree(new); |
369 | __kvm_free_irq_routing(&irq_list); | ||
370 | return r; | 411 | return r; |
371 | } | 412 | } |
372 | 413 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cdca63917e77..e1f2bf8d7b1e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/swap.h> | 43 | #include <linux/swap.h> |
44 | #include <linux/bitops.h> | 44 | #include <linux/bitops.h> |
45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
46 | #include <linux/compat.h> | ||
46 | 47 | ||
47 | #include <asm/processor.h> | 48 | #include <asm/processor.h> |
48 | #include <asm/io.h> | 49 | #include <asm/io.h> |
@@ -54,12 +55,6 @@ | |||
54 | #include "coalesced_mmio.h" | 55 | #include "coalesced_mmio.h" |
55 | #endif | 56 | #endif |
56 | 57 | ||
57 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
58 | #include <linux/pci.h> | ||
59 | #include <linux/interrupt.h> | ||
60 | #include "irq.h" | ||
61 | #endif | ||
62 | |||
63 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
64 | #include <trace/events/kvm.h> | 59 | #include <trace/events/kvm.h> |
65 | 60 | ||
@@ -76,6 +71,8 @@ DEFINE_SPINLOCK(kvm_lock); | |||
76 | LIST_HEAD(vm_list); | 71 | LIST_HEAD(vm_list); |
77 | 72 | ||
78 | static cpumask_var_t cpus_hardware_enabled; | 73 | static cpumask_var_t cpus_hardware_enabled; |
74 | static int kvm_usage_count = 0; | ||
75 | static atomic_t hardware_enable_failed; | ||
79 | 76 | ||
80 | struct kmem_cache *kvm_vcpu_cache; | 77 | struct kmem_cache *kvm_vcpu_cache; |
81 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | 78 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); |
@@ -86,615 +83,13 @@ struct dentry *kvm_debugfs_dir; | |||
86 | 83 | ||
87 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 84 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
88 | unsigned long arg); | 85 | unsigned long arg); |
86 | static int hardware_enable_all(void); | ||
87 | static void hardware_disable_all(void); | ||
89 | 88 | ||
90 | static bool kvm_rebooting; | 89 | static bool kvm_rebooting; |
91 | 90 | ||
92 | static bool largepages_enabled = true; | 91 | static bool largepages_enabled = true; |
93 | 92 | ||
94 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
95 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
96 | int assigned_dev_id) | ||
97 | { | ||
98 | struct list_head *ptr; | ||
99 | struct kvm_assigned_dev_kernel *match; | ||
100 | |||
101 | list_for_each(ptr, head) { | ||
102 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
103 | if (match->assigned_dev_id == assigned_dev_id) | ||
104 | return match; | ||
105 | } | ||
106 | return NULL; | ||
107 | } | ||
108 | |||
109 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
110 | *assigned_dev, int irq) | ||
111 | { | ||
112 | int i, index; | ||
113 | struct msix_entry *host_msix_entries; | ||
114 | |||
115 | host_msix_entries = assigned_dev->host_msix_entries; | ||
116 | |||
117 | index = -1; | ||
118 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
119 | if (irq == host_msix_entries[i].vector) { | ||
120 | index = i; | ||
121 | break; | ||
122 | } | ||
123 | if (index < 0) { | ||
124 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | return index; | ||
129 | } | ||
130 | |||
131 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
132 | { | ||
133 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
134 | struct kvm *kvm; | ||
135 | int i; | ||
136 | |||
137 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
138 | interrupt_work); | ||
139 | kvm = assigned_dev->kvm; | ||
140 | |||
141 | mutex_lock(&kvm->irq_lock); | ||
142 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
143 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
144 | struct kvm_guest_msix_entry *guest_entries = | ||
145 | assigned_dev->guest_msix_entries; | ||
146 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
147 | if (!(guest_entries[i].flags & | ||
148 | KVM_ASSIGNED_MSIX_PENDING)) | ||
149 | continue; | ||
150 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
151 | kvm_set_irq(assigned_dev->kvm, | ||
152 | assigned_dev->irq_source_id, | ||
153 | guest_entries[i].vector, 1); | ||
154 | } | ||
155 | } else | ||
156 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
157 | assigned_dev->guest_irq, 1); | ||
158 | |||
159 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
160 | mutex_unlock(&assigned_dev->kvm->irq_lock); | ||
161 | } | ||
162 | |||
163 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
164 | { | ||
165 | unsigned long flags; | ||
166 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
167 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
168 | |||
169 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
170 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
171 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
172 | if (index < 0) | ||
173 | goto out; | ||
174 | assigned_dev->guest_msix_entries[index].flags |= | ||
175 | KVM_ASSIGNED_MSIX_PENDING; | ||
176 | } | ||
177 | |||
178 | schedule_work(&assigned_dev->interrupt_work); | ||
179 | |||
180 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
181 | disable_irq_nosync(irq); | ||
182 | assigned_dev->host_irq_disabled = true; | ||
183 | } | ||
184 | |||
185 | out: | ||
186 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
187 | return IRQ_HANDLED; | ||
188 | } | ||
189 | |||
190 | /* Ack the irq line for an assigned device */ | ||
191 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
192 | { | ||
193 | struct kvm_assigned_dev_kernel *dev; | ||
194 | unsigned long flags; | ||
195 | |||
196 | if (kian->gsi == -1) | ||
197 | return; | ||
198 | |||
199 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
200 | ack_notifier); | ||
201 | |||
202 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
203 | |||
204 | /* The guest irq may be shared so this ack may be | ||
205 | * from another device. | ||
206 | */ | ||
207 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
208 | if (dev->host_irq_disabled) { | ||
209 | enable_irq(dev->host_irq); | ||
210 | dev->host_irq_disabled = false; | ||
211 | } | ||
212 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
213 | } | ||
214 | |||
215 | static void deassign_guest_irq(struct kvm *kvm, | ||
216 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
217 | { | ||
218 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | ||
219 | assigned_dev->ack_notifier.gsi = -1; | ||
220 | |||
221 | if (assigned_dev->irq_source_id != -1) | ||
222 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
223 | assigned_dev->irq_source_id = -1; | ||
224 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
225 | } | ||
226 | |||
227 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
228 | static void deassign_host_irq(struct kvm *kvm, | ||
229 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
230 | { | ||
231 | /* | ||
232 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
233 | * 1. work is scheduled, and then cancelled. | ||
234 | * 2. work callback is executed. | ||
235 | * | ||
236 | * The first one ensured that the irq is disabled and no more events | ||
237 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
238 | * for MSI). So we disable irq here to prevent further events. | ||
239 | * | ||
240 | * Notice this maybe result in nested disable if the interrupt type is | ||
241 | * INTx, but it's OK for we are going to free it. | ||
242 | * | ||
243 | * If this function is a part of VM destroy, please ensure that till | ||
244 | * now, the kvm state is still legal for probably we also have to wait | ||
245 | * interrupt_work done. | ||
246 | */ | ||
247 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
248 | int i; | ||
249 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
250 | disable_irq_nosync(assigned_dev-> | ||
251 | host_msix_entries[i].vector); | ||
252 | |||
253 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
254 | |||
255 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
256 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
257 | (void *)assigned_dev); | ||
258 | |||
259 | assigned_dev->entries_nr = 0; | ||
260 | kfree(assigned_dev->host_msix_entries); | ||
261 | kfree(assigned_dev->guest_msix_entries); | ||
262 | pci_disable_msix(assigned_dev->dev); | ||
263 | } else { | ||
264 | /* Deal with MSI and INTx */ | ||
265 | disable_irq_nosync(assigned_dev->host_irq); | ||
266 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
267 | |||
268 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
269 | |||
270 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
271 | pci_disable_msi(assigned_dev->dev); | ||
272 | } | ||
273 | |||
274 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
275 | } | ||
276 | |||
277 | static int kvm_deassign_irq(struct kvm *kvm, | ||
278 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
279 | unsigned long irq_requested_type) | ||
280 | { | ||
281 | unsigned long guest_irq_type, host_irq_type; | ||
282 | |||
283 | if (!irqchip_in_kernel(kvm)) | ||
284 | return -EINVAL; | ||
285 | /* no irq assignment to deassign */ | ||
286 | if (!assigned_dev->irq_requested_type) | ||
287 | return -ENXIO; | ||
288 | |||
289 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
290 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
291 | |||
292 | if (host_irq_type) | ||
293 | deassign_host_irq(kvm, assigned_dev); | ||
294 | if (guest_irq_type) | ||
295 | deassign_guest_irq(kvm, assigned_dev); | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
301 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
302 | { | ||
303 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
304 | } | ||
305 | |||
306 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
307 | struct kvm_assigned_dev_kernel | ||
308 | *assigned_dev) | ||
309 | { | ||
310 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
311 | |||
312 | pci_reset_function(assigned_dev->dev); | ||
313 | |||
314 | pci_release_regions(assigned_dev->dev); | ||
315 | pci_disable_device(assigned_dev->dev); | ||
316 | pci_dev_put(assigned_dev->dev); | ||
317 | |||
318 | list_del(&assigned_dev->list); | ||
319 | kfree(assigned_dev); | ||
320 | } | ||
321 | |||
322 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
323 | { | ||
324 | struct list_head *ptr, *ptr2; | ||
325 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
326 | |||
327 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
328 | assigned_dev = list_entry(ptr, | ||
329 | struct kvm_assigned_dev_kernel, | ||
330 | list); | ||
331 | |||
332 | kvm_free_assigned_device(kvm, assigned_dev); | ||
333 | } | ||
334 | } | ||
335 | |||
336 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
337 | struct kvm_assigned_dev_kernel *dev) | ||
338 | { | ||
339 | dev->host_irq = dev->dev->irq; | ||
340 | /* Even though this is PCI, we don't want to use shared | ||
341 | * interrupts. Sharing host devices with guest-assigned devices | ||
342 | * on the same interrupt line is not a happy situation: there | ||
343 | * are going to be long delays in accepting, acking, etc. | ||
344 | */ | ||
345 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
346 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
347 | return -EIO; | ||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | #ifdef __KVM_HAVE_MSI | ||
352 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
353 | struct kvm_assigned_dev_kernel *dev) | ||
354 | { | ||
355 | int r; | ||
356 | |||
357 | if (!dev->dev->msi_enabled) { | ||
358 | r = pci_enable_msi(dev->dev); | ||
359 | if (r) | ||
360 | return r; | ||
361 | } | ||
362 | |||
363 | dev->host_irq = dev->dev->irq; | ||
364 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
365 | "kvm_assigned_msi_device", (void *)dev)) { | ||
366 | pci_disable_msi(dev->dev); | ||
367 | return -EIO; | ||
368 | } | ||
369 | |||
370 | return 0; | ||
371 | } | ||
372 | #endif | ||
373 | |||
374 | #ifdef __KVM_HAVE_MSIX | ||
375 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
376 | struct kvm_assigned_dev_kernel *dev) | ||
377 | { | ||
378 | int i, r = -EINVAL; | ||
379 | |||
380 | /* host_msix_entries and guest_msix_entries should have been | ||
381 | * initialized */ | ||
382 | if (dev->entries_nr == 0) | ||
383 | return r; | ||
384 | |||
385 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
386 | if (r) | ||
387 | return r; | ||
388 | |||
389 | for (i = 0; i < dev->entries_nr; i++) { | ||
390 | r = request_irq(dev->host_msix_entries[i].vector, | ||
391 | kvm_assigned_dev_intr, 0, | ||
392 | "kvm_assigned_msix_device", | ||
393 | (void *)dev); | ||
394 | /* FIXME: free requested_irq's on failure */ | ||
395 | if (r) | ||
396 | return r; | ||
397 | } | ||
398 | |||
399 | return 0; | ||
400 | } | ||
401 | |||
402 | #endif | ||
403 | |||
404 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
405 | struct kvm_assigned_dev_kernel *dev, | ||
406 | struct kvm_assigned_irq *irq) | ||
407 | { | ||
408 | dev->guest_irq = irq->guest_irq; | ||
409 | dev->ack_notifier.gsi = irq->guest_irq; | ||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | #ifdef __KVM_HAVE_MSI | ||
414 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
415 | struct kvm_assigned_dev_kernel *dev, | ||
416 | struct kvm_assigned_irq *irq) | ||
417 | { | ||
418 | dev->guest_irq = irq->guest_irq; | ||
419 | dev->ack_notifier.gsi = -1; | ||
420 | dev->host_irq_disabled = false; | ||
421 | return 0; | ||
422 | } | ||
423 | #endif | ||
424 | #ifdef __KVM_HAVE_MSIX | ||
425 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
426 | struct kvm_assigned_dev_kernel *dev, | ||
427 | struct kvm_assigned_irq *irq) | ||
428 | { | ||
429 | dev->guest_irq = irq->guest_irq; | ||
430 | dev->ack_notifier.gsi = -1; | ||
431 | dev->host_irq_disabled = false; | ||
432 | return 0; | ||
433 | } | ||
434 | #endif | ||
435 | |||
436 | static int assign_host_irq(struct kvm *kvm, | ||
437 | struct kvm_assigned_dev_kernel *dev, | ||
438 | __u32 host_irq_type) | ||
439 | { | ||
440 | int r = -EEXIST; | ||
441 | |||
442 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
443 | return r; | ||
444 | |||
445 | switch (host_irq_type) { | ||
446 | case KVM_DEV_IRQ_HOST_INTX: | ||
447 | r = assigned_device_enable_host_intx(kvm, dev); | ||
448 | break; | ||
449 | #ifdef __KVM_HAVE_MSI | ||
450 | case KVM_DEV_IRQ_HOST_MSI: | ||
451 | r = assigned_device_enable_host_msi(kvm, dev); | ||
452 | break; | ||
453 | #endif | ||
454 | #ifdef __KVM_HAVE_MSIX | ||
455 | case KVM_DEV_IRQ_HOST_MSIX: | ||
456 | r = assigned_device_enable_host_msix(kvm, dev); | ||
457 | break; | ||
458 | #endif | ||
459 | default: | ||
460 | r = -EINVAL; | ||
461 | } | ||
462 | |||
463 | if (!r) | ||
464 | dev->irq_requested_type |= host_irq_type; | ||
465 | |||
466 | return r; | ||
467 | } | ||
468 | |||
469 | static int assign_guest_irq(struct kvm *kvm, | ||
470 | struct kvm_assigned_dev_kernel *dev, | ||
471 | struct kvm_assigned_irq *irq, | ||
472 | unsigned long guest_irq_type) | ||
473 | { | ||
474 | int id; | ||
475 | int r = -EEXIST; | ||
476 | |||
477 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
478 | return r; | ||
479 | |||
480 | id = kvm_request_irq_source_id(kvm); | ||
481 | if (id < 0) | ||
482 | return id; | ||
483 | |||
484 | dev->irq_source_id = id; | ||
485 | |||
486 | switch (guest_irq_type) { | ||
487 | case KVM_DEV_IRQ_GUEST_INTX: | ||
488 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
489 | break; | ||
490 | #ifdef __KVM_HAVE_MSI | ||
491 | case KVM_DEV_IRQ_GUEST_MSI: | ||
492 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
493 | break; | ||
494 | #endif | ||
495 | #ifdef __KVM_HAVE_MSIX | ||
496 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
497 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
498 | break; | ||
499 | #endif | ||
500 | default: | ||
501 | r = -EINVAL; | ||
502 | } | ||
503 | |||
504 | if (!r) { | ||
505 | dev->irq_requested_type |= guest_irq_type; | ||
506 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
507 | } else | ||
508 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
509 | |||
510 | return r; | ||
511 | } | ||
512 | |||
513 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
514 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
515 | struct kvm_assigned_irq *assigned_irq) | ||
516 | { | ||
517 | int r = -EINVAL; | ||
518 | struct kvm_assigned_dev_kernel *match; | ||
519 | unsigned long host_irq_type, guest_irq_type; | ||
520 | |||
521 | if (!capable(CAP_SYS_RAWIO)) | ||
522 | return -EPERM; | ||
523 | |||
524 | if (!irqchip_in_kernel(kvm)) | ||
525 | return r; | ||
526 | |||
527 | mutex_lock(&kvm->lock); | ||
528 | r = -ENODEV; | ||
529 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
530 | assigned_irq->assigned_dev_id); | ||
531 | if (!match) | ||
532 | goto out; | ||
533 | |||
534 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
535 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
536 | |||
537 | r = -EINVAL; | ||
538 | /* can only assign one type at a time */ | ||
539 | if (hweight_long(host_irq_type) > 1) | ||
540 | goto out; | ||
541 | if (hweight_long(guest_irq_type) > 1) | ||
542 | goto out; | ||
543 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
544 | goto out; | ||
545 | |||
546 | r = 0; | ||
547 | if (host_irq_type) | ||
548 | r = assign_host_irq(kvm, match, host_irq_type); | ||
549 | if (r) | ||
550 | goto out; | ||
551 | |||
552 | if (guest_irq_type) | ||
553 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
554 | out: | ||
555 | mutex_unlock(&kvm->lock); | ||
556 | return r; | ||
557 | } | ||
558 | |||
559 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
560 | struct kvm_assigned_irq | ||
561 | *assigned_irq) | ||
562 | { | ||
563 | int r = -ENODEV; | ||
564 | struct kvm_assigned_dev_kernel *match; | ||
565 | |||
566 | mutex_lock(&kvm->lock); | ||
567 | |||
568 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
569 | assigned_irq->assigned_dev_id); | ||
570 | if (!match) | ||
571 | goto out; | ||
572 | |||
573 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
574 | out: | ||
575 | mutex_unlock(&kvm->lock); | ||
576 | return r; | ||
577 | } | ||
578 | |||
579 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
580 | struct kvm_assigned_pci_dev *assigned_dev) | ||
581 | { | ||
582 | int r = 0; | ||
583 | struct kvm_assigned_dev_kernel *match; | ||
584 | struct pci_dev *dev; | ||
585 | |||
586 | down_read(&kvm->slots_lock); | ||
587 | mutex_lock(&kvm->lock); | ||
588 | |||
589 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
590 | assigned_dev->assigned_dev_id); | ||
591 | if (match) { | ||
592 | /* device already assigned */ | ||
593 | r = -EEXIST; | ||
594 | goto out; | ||
595 | } | ||
596 | |||
597 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
598 | if (match == NULL) { | ||
599 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
600 | __func__); | ||
601 | r = -ENOMEM; | ||
602 | goto out; | ||
603 | } | ||
604 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
605 | assigned_dev->devfn); | ||
606 | if (!dev) { | ||
607 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
608 | r = -EINVAL; | ||
609 | goto out_free; | ||
610 | } | ||
611 | if (pci_enable_device(dev)) { | ||
612 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
613 | r = -EBUSY; | ||
614 | goto out_put; | ||
615 | } | ||
616 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
617 | if (r) { | ||
618 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
619 | __func__); | ||
620 | goto out_disable; | ||
621 | } | ||
622 | |||
623 | pci_reset_function(dev); | ||
624 | |||
625 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
626 | match->host_busnr = assigned_dev->busnr; | ||
627 | match->host_devfn = assigned_dev->devfn; | ||
628 | match->flags = assigned_dev->flags; | ||
629 | match->dev = dev; | ||
630 | spin_lock_init(&match->assigned_dev_lock); | ||
631 | match->irq_source_id = -1; | ||
632 | match->kvm = kvm; | ||
633 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
634 | INIT_WORK(&match->interrupt_work, | ||
635 | kvm_assigned_dev_interrupt_work_handler); | ||
636 | |||
637 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
638 | |||
639 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
640 | if (!kvm->arch.iommu_domain) { | ||
641 | r = kvm_iommu_map_guest(kvm); | ||
642 | if (r) | ||
643 | goto out_list_del; | ||
644 | } | ||
645 | r = kvm_assign_device(kvm, match); | ||
646 | if (r) | ||
647 | goto out_list_del; | ||
648 | } | ||
649 | |||
650 | out: | ||
651 | mutex_unlock(&kvm->lock); | ||
652 | up_read(&kvm->slots_lock); | ||
653 | return r; | ||
654 | out_list_del: | ||
655 | list_del(&match->list); | ||
656 | pci_release_regions(dev); | ||
657 | out_disable: | ||
658 | pci_disable_device(dev); | ||
659 | out_put: | ||
660 | pci_dev_put(dev); | ||
661 | out_free: | ||
662 | kfree(match); | ||
663 | mutex_unlock(&kvm->lock); | ||
664 | up_read(&kvm->slots_lock); | ||
665 | return r; | ||
666 | } | ||
667 | #endif | ||
668 | |||
669 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
670 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
671 | struct kvm_assigned_pci_dev *assigned_dev) | ||
672 | { | ||
673 | int r = 0; | ||
674 | struct kvm_assigned_dev_kernel *match; | ||
675 | |||
676 | mutex_lock(&kvm->lock); | ||
677 | |||
678 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
679 | assigned_dev->assigned_dev_id); | ||
680 | if (!match) { | ||
681 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
682 | "so cannot be deassigned\n", __func__); | ||
683 | r = -EINVAL; | ||
684 | goto out; | ||
685 | } | ||
686 | |||
687 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
688 | kvm_deassign_device(kvm, match); | ||
689 | |||
690 | kvm_free_assigned_device(kvm, match); | ||
691 | |||
692 | out: | ||
693 | mutex_unlock(&kvm->lock); | ||
694 | return r; | ||
695 | } | ||
696 | #endif | ||
697 | |||
698 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 93 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
699 | { | 94 | { |
700 | if (pfn_valid(pfn)) { | 95 | if (pfn_valid(pfn)) { |
@@ -950,6 +345,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
950 | 345 | ||
951 | static struct kvm *kvm_create_vm(void) | 346 | static struct kvm *kvm_create_vm(void) |
952 | { | 347 | { |
348 | int r = 0; | ||
953 | struct kvm *kvm = kvm_arch_create_vm(); | 349 | struct kvm *kvm = kvm_arch_create_vm(); |
954 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 350 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
955 | struct page *page; | 351 | struct page *page; |
@@ -957,16 +353,21 @@ static struct kvm *kvm_create_vm(void) | |||
957 | 353 | ||
958 | if (IS_ERR(kvm)) | 354 | if (IS_ERR(kvm)) |
959 | goto out; | 355 | goto out; |
356 | |||
357 | r = hardware_enable_all(); | ||
358 | if (r) | ||
359 | goto out_err_nodisable; | ||
360 | |||
960 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 361 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
961 | INIT_LIST_HEAD(&kvm->irq_routing); | ||
962 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); | 362 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); |
363 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | ||
963 | #endif | 364 | #endif |
964 | 365 | ||
965 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 366 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
966 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 367 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
967 | if (!page) { | 368 | if (!page) { |
968 | kfree(kvm); | 369 | r = -ENOMEM; |
969 | return ERR_PTR(-ENOMEM); | 370 | goto out_err; |
970 | } | 371 | } |
971 | kvm->coalesced_mmio_ring = | 372 | kvm->coalesced_mmio_ring = |
972 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 373 | (struct kvm_coalesced_mmio_ring *)page_address(page); |
@@ -974,15 +375,13 @@ static struct kvm *kvm_create_vm(void) | |||
974 | 375 | ||
975 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 376 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
976 | { | 377 | { |
977 | int err; | ||
978 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | 378 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; |
979 | err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | 379 | r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); |
980 | if (err) { | 380 | if (r) { |
981 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 381 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
982 | put_page(page); | 382 | put_page(page); |
983 | #endif | 383 | #endif |
984 | kfree(kvm); | 384 | goto out_err; |
985 | return ERR_PTR(err); | ||
986 | } | 385 | } |
987 | } | 386 | } |
988 | #endif | 387 | #endif |
@@ -1006,6 +405,12 @@ static struct kvm *kvm_create_vm(void) | |||
1006 | #endif | 405 | #endif |
1007 | out: | 406 | out: |
1008 | return kvm; | 407 | return kvm; |
408 | |||
409 | out_err: | ||
410 | hardware_disable_all(); | ||
411 | out_err_nodisable: | ||
412 | kfree(kvm); | ||
413 | return ERR_PTR(r); | ||
1009 | } | 414 | } |
1010 | 415 | ||
1011 | /* | 416 | /* |
@@ -1064,6 +469,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
1064 | kvm_arch_flush_shadow(kvm); | 469 | kvm_arch_flush_shadow(kvm); |
1065 | #endif | 470 | #endif |
1066 | kvm_arch_destroy_vm(kvm); | 471 | kvm_arch_destroy_vm(kvm); |
472 | hardware_disable_all(); | ||
1067 | mmdrop(mm); | 473 | mmdrop(mm); |
1068 | } | 474 | } |
1069 | 475 | ||
@@ -1690,9 +1096,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1690 | if (signal_pending(current)) | 1096 | if (signal_pending(current)) |
1691 | break; | 1097 | break; |
1692 | 1098 | ||
1693 | vcpu_put(vcpu); | ||
1694 | schedule(); | 1099 | schedule(); |
1695 | vcpu_load(vcpu); | ||
1696 | } | 1100 | } |
1697 | 1101 | ||
1698 | finish_wait(&vcpu->wq, &wait); | 1102 | finish_wait(&vcpu->wq, &wait); |
@@ -1706,6 +1110,21 @@ void kvm_resched(struct kvm_vcpu *vcpu) | |||
1706 | } | 1110 | } |
1707 | EXPORT_SYMBOL_GPL(kvm_resched); | 1111 | EXPORT_SYMBOL_GPL(kvm_resched); |
1708 | 1112 | ||
1113 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) | ||
1114 | { | ||
1115 | ktime_t expires; | ||
1116 | DEFINE_WAIT(wait); | ||
1117 | |||
1118 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | ||
1119 | |||
1120 | /* Sleep for 100 us, and hope lock-holder got scheduled */ | ||
1121 | expires = ktime_add_ns(ktime_get(), 100000UL); | ||
1122 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1123 | |||
1124 | finish_wait(&vcpu->wq, &wait); | ||
1125 | } | ||
1126 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | ||
1127 | |||
1709 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1128 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1710 | { | 1129 | { |
1711 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; | 1130 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; |
@@ -1829,88 +1248,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | |||
1829 | return 0; | 1248 | return 0; |
1830 | } | 1249 | } |
1831 | 1250 | ||
1832 | #ifdef __KVM_HAVE_MSIX | ||
1833 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
1834 | struct kvm_assigned_msix_nr *entry_nr) | ||
1835 | { | ||
1836 | int r = 0; | ||
1837 | struct kvm_assigned_dev_kernel *adev; | ||
1838 | |||
1839 | mutex_lock(&kvm->lock); | ||
1840 | |||
1841 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
1842 | entry_nr->assigned_dev_id); | ||
1843 | if (!adev) { | ||
1844 | r = -EINVAL; | ||
1845 | goto msix_nr_out; | ||
1846 | } | ||
1847 | |||
1848 | if (adev->entries_nr == 0) { | ||
1849 | adev->entries_nr = entry_nr->entry_nr; | ||
1850 | if (adev->entries_nr == 0 || | ||
1851 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
1852 | r = -EINVAL; | ||
1853 | goto msix_nr_out; | ||
1854 | } | ||
1855 | |||
1856 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
1857 | entry_nr->entry_nr, | ||
1858 | GFP_KERNEL); | ||
1859 | if (!adev->host_msix_entries) { | ||
1860 | r = -ENOMEM; | ||
1861 | goto msix_nr_out; | ||
1862 | } | ||
1863 | adev->guest_msix_entries = kzalloc( | ||
1864 | sizeof(struct kvm_guest_msix_entry) * | ||
1865 | entry_nr->entry_nr, GFP_KERNEL); | ||
1866 | if (!adev->guest_msix_entries) { | ||
1867 | kfree(adev->host_msix_entries); | ||
1868 | r = -ENOMEM; | ||
1869 | goto msix_nr_out; | ||
1870 | } | ||
1871 | } else /* Not allowed set MSI-X number twice */ | ||
1872 | r = -EINVAL; | ||
1873 | msix_nr_out: | ||
1874 | mutex_unlock(&kvm->lock); | ||
1875 | return r; | ||
1876 | } | ||
1877 | |||
1878 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
1879 | struct kvm_assigned_msix_entry *entry) | ||
1880 | { | ||
1881 | int r = 0, i; | ||
1882 | struct kvm_assigned_dev_kernel *adev; | ||
1883 | |||
1884 | mutex_lock(&kvm->lock); | ||
1885 | |||
1886 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
1887 | entry->assigned_dev_id); | ||
1888 | |||
1889 | if (!adev) { | ||
1890 | r = -EINVAL; | ||
1891 | goto msix_entry_out; | ||
1892 | } | ||
1893 | |||
1894 | for (i = 0; i < adev->entries_nr; i++) | ||
1895 | if (adev->guest_msix_entries[i].vector == 0 || | ||
1896 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
1897 | adev->guest_msix_entries[i].entry = entry->entry; | ||
1898 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
1899 | adev->host_msix_entries[i].entry = entry->entry; | ||
1900 | break; | ||
1901 | } | ||
1902 | if (i == adev->entries_nr) { | ||
1903 | r = -ENOSPC; | ||
1904 | goto msix_entry_out; | ||
1905 | } | ||
1906 | |||
1907 | msix_entry_out: | ||
1908 | mutex_unlock(&kvm->lock); | ||
1909 | |||
1910 | return r; | ||
1911 | } | ||
1912 | #endif | ||
1913 | |||
1914 | static long kvm_vcpu_ioctl(struct file *filp, | 1251 | static long kvm_vcpu_ioctl(struct file *filp, |
1915 | unsigned int ioctl, unsigned long arg) | 1252 | unsigned int ioctl, unsigned long arg) |
1916 | { | 1253 | { |
@@ -2169,112 +1506,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2169 | break; | 1506 | break; |
2170 | } | 1507 | } |
2171 | #endif | 1508 | #endif |
2172 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
2173 | case KVM_ASSIGN_PCI_DEVICE: { | ||
2174 | struct kvm_assigned_pci_dev assigned_dev; | ||
2175 | |||
2176 | r = -EFAULT; | ||
2177 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
2178 | goto out; | ||
2179 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
2180 | if (r) | ||
2181 | goto out; | ||
2182 | break; | ||
2183 | } | ||
2184 | case KVM_ASSIGN_IRQ: { | ||
2185 | r = -EOPNOTSUPP; | ||
2186 | break; | ||
2187 | } | ||
2188 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
2189 | case KVM_ASSIGN_DEV_IRQ: { | ||
2190 | struct kvm_assigned_irq assigned_irq; | ||
2191 | |||
2192 | r = -EFAULT; | ||
2193 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
2194 | goto out; | ||
2195 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
2196 | if (r) | ||
2197 | goto out; | ||
2198 | break; | ||
2199 | } | ||
2200 | case KVM_DEASSIGN_DEV_IRQ: { | ||
2201 | struct kvm_assigned_irq assigned_irq; | ||
2202 | |||
2203 | r = -EFAULT; | ||
2204 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
2205 | goto out; | ||
2206 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
2207 | if (r) | ||
2208 | goto out; | ||
2209 | break; | ||
2210 | } | ||
2211 | #endif | ||
2212 | #endif | ||
2213 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
2214 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
2215 | struct kvm_assigned_pci_dev assigned_dev; | ||
2216 | |||
2217 | r = -EFAULT; | ||
2218 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
2219 | goto out; | ||
2220 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
2221 | if (r) | ||
2222 | goto out; | ||
2223 | break; | ||
2224 | } | ||
2225 | #endif | ||
2226 | #ifdef KVM_CAP_IRQ_ROUTING | ||
2227 | case KVM_SET_GSI_ROUTING: { | ||
2228 | struct kvm_irq_routing routing; | ||
2229 | struct kvm_irq_routing __user *urouting; | ||
2230 | struct kvm_irq_routing_entry *entries; | ||
2231 | |||
2232 | r = -EFAULT; | ||
2233 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
2234 | goto out; | ||
2235 | r = -EINVAL; | ||
2236 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
2237 | goto out; | ||
2238 | if (routing.flags) | ||
2239 | goto out; | ||
2240 | r = -ENOMEM; | ||
2241 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
2242 | if (!entries) | ||
2243 | goto out; | ||
2244 | r = -EFAULT; | ||
2245 | urouting = argp; | ||
2246 | if (copy_from_user(entries, urouting->entries, | ||
2247 | routing.nr * sizeof(*entries))) | ||
2248 | goto out_free_irq_routing; | ||
2249 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
2250 | routing.flags); | ||
2251 | out_free_irq_routing: | ||
2252 | vfree(entries); | ||
2253 | break; | ||
2254 | } | ||
2255 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
2256 | #ifdef __KVM_HAVE_MSIX | ||
2257 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
2258 | struct kvm_assigned_msix_nr entry_nr; | ||
2259 | r = -EFAULT; | ||
2260 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
2261 | goto out; | ||
2262 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
2263 | if (r) | ||
2264 | goto out; | ||
2265 | break; | ||
2266 | } | ||
2267 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
2268 | struct kvm_assigned_msix_entry entry; | ||
2269 | r = -EFAULT; | ||
2270 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
2271 | goto out; | ||
2272 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
2273 | if (r) | ||
2274 | goto out; | ||
2275 | break; | ||
2276 | } | ||
2277 | #endif | ||
2278 | case KVM_IRQFD: { | 1509 | case KVM_IRQFD: { |
2279 | struct kvm_irqfd data; | 1510 | struct kvm_irqfd data; |
2280 | 1511 | ||
@@ -2306,11 +1537,59 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2306 | #endif | 1537 | #endif |
2307 | default: | 1538 | default: |
2308 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1539 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
1540 | if (r == -ENOTTY) | ||
1541 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); | ||
2309 | } | 1542 | } |
2310 | out: | 1543 | out: |
2311 | return r; | 1544 | return r; |
2312 | } | 1545 | } |
2313 | 1546 | ||
1547 | #ifdef CONFIG_COMPAT | ||
1548 | struct compat_kvm_dirty_log { | ||
1549 | __u32 slot; | ||
1550 | __u32 padding1; | ||
1551 | union { | ||
1552 | compat_uptr_t dirty_bitmap; /* one bit per page */ | ||
1553 | __u64 padding2; | ||
1554 | }; | ||
1555 | }; | ||
1556 | |||
1557 | static long kvm_vm_compat_ioctl(struct file *filp, | ||
1558 | unsigned int ioctl, unsigned long arg) | ||
1559 | { | ||
1560 | struct kvm *kvm = filp->private_data; | ||
1561 | int r; | ||
1562 | |||
1563 | if (kvm->mm != current->mm) | ||
1564 | return -EIO; | ||
1565 | switch (ioctl) { | ||
1566 | case KVM_GET_DIRTY_LOG: { | ||
1567 | struct compat_kvm_dirty_log compat_log; | ||
1568 | struct kvm_dirty_log log; | ||
1569 | |||
1570 | r = -EFAULT; | ||
1571 | if (copy_from_user(&compat_log, (void __user *)arg, | ||
1572 | sizeof(compat_log))) | ||
1573 | goto out; | ||
1574 | log.slot = compat_log.slot; | ||
1575 | log.padding1 = compat_log.padding1; | ||
1576 | log.padding2 = compat_log.padding2; | ||
1577 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); | ||
1578 | |||
1579 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | ||
1580 | if (r) | ||
1581 | goto out; | ||
1582 | break; | ||
1583 | } | ||
1584 | default: | ||
1585 | r = kvm_vm_ioctl(filp, ioctl, arg); | ||
1586 | } | ||
1587 | |||
1588 | out: | ||
1589 | return r; | ||
1590 | } | ||
1591 | #endif | ||
1592 | |||
2314 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1593 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
2315 | { | 1594 | { |
2316 | struct page *page[1]; | 1595 | struct page *page[1]; |
@@ -2345,7 +1624,9 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
2345 | static struct file_operations kvm_vm_fops = { | 1624 | static struct file_operations kvm_vm_fops = { |
2346 | .release = kvm_vm_release, | 1625 | .release = kvm_vm_release, |
2347 | .unlocked_ioctl = kvm_vm_ioctl, | 1626 | .unlocked_ioctl = kvm_vm_ioctl, |
2348 | .compat_ioctl = kvm_vm_ioctl, | 1627 | #ifdef CONFIG_COMPAT |
1628 | .compat_ioctl = kvm_vm_compat_ioctl, | ||
1629 | #endif | ||
2349 | .mmap = kvm_vm_mmap, | 1630 | .mmap = kvm_vm_mmap, |
2350 | }; | 1631 | }; |
2351 | 1632 | ||
@@ -2373,6 +1654,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) | |||
2373 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 1654 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
2374 | case KVM_CAP_SET_BOOT_CPU_ID: | 1655 | case KVM_CAP_SET_BOOT_CPU_ID: |
2375 | #endif | 1656 | #endif |
1657 | case KVM_CAP_INTERNAL_ERROR_DATA: | ||
2376 | return 1; | 1658 | return 1; |
2377 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 1659 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
2378 | case KVM_CAP_IRQ_ROUTING: | 1660 | case KVM_CAP_IRQ_ROUTING: |
@@ -2443,11 +1725,21 @@ static struct miscdevice kvm_dev = { | |||
2443 | static void hardware_enable(void *junk) | 1725 | static void hardware_enable(void *junk) |
2444 | { | 1726 | { |
2445 | int cpu = raw_smp_processor_id(); | 1727 | int cpu = raw_smp_processor_id(); |
1728 | int r; | ||
2446 | 1729 | ||
2447 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 1730 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
2448 | return; | 1731 | return; |
1732 | |||
2449 | cpumask_set_cpu(cpu, cpus_hardware_enabled); | 1733 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
2450 | kvm_arch_hardware_enable(NULL); | 1734 | |
1735 | r = kvm_arch_hardware_enable(NULL); | ||
1736 | |||
1737 | if (r) { | ||
1738 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | ||
1739 | atomic_inc(&hardware_enable_failed); | ||
1740 | printk(KERN_INFO "kvm: enabling virtualization on " | ||
1741 | "CPU%d failed\n", cpu); | ||
1742 | } | ||
2451 | } | 1743 | } |
2452 | 1744 | ||
2453 | static void hardware_disable(void *junk) | 1745 | static void hardware_disable(void *junk) |
@@ -2460,11 +1752,52 @@ static void hardware_disable(void *junk) | |||
2460 | kvm_arch_hardware_disable(NULL); | 1752 | kvm_arch_hardware_disable(NULL); |
2461 | } | 1753 | } |
2462 | 1754 | ||
1755 | static void hardware_disable_all_nolock(void) | ||
1756 | { | ||
1757 | BUG_ON(!kvm_usage_count); | ||
1758 | |||
1759 | kvm_usage_count--; | ||
1760 | if (!kvm_usage_count) | ||
1761 | on_each_cpu(hardware_disable, NULL, 1); | ||
1762 | } | ||
1763 | |||
1764 | static void hardware_disable_all(void) | ||
1765 | { | ||
1766 | spin_lock(&kvm_lock); | ||
1767 | hardware_disable_all_nolock(); | ||
1768 | spin_unlock(&kvm_lock); | ||
1769 | } | ||
1770 | |||
1771 | static int hardware_enable_all(void) | ||
1772 | { | ||
1773 | int r = 0; | ||
1774 | |||
1775 | spin_lock(&kvm_lock); | ||
1776 | |||
1777 | kvm_usage_count++; | ||
1778 | if (kvm_usage_count == 1) { | ||
1779 | atomic_set(&hardware_enable_failed, 0); | ||
1780 | on_each_cpu(hardware_enable, NULL, 1); | ||
1781 | |||
1782 | if (atomic_read(&hardware_enable_failed)) { | ||
1783 | hardware_disable_all_nolock(); | ||
1784 | r = -EBUSY; | ||
1785 | } | ||
1786 | } | ||
1787 | |||
1788 | spin_unlock(&kvm_lock); | ||
1789 | |||
1790 | return r; | ||
1791 | } | ||
1792 | |||
2463 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 1793 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
2464 | void *v) | 1794 | void *v) |
2465 | { | 1795 | { |
2466 | int cpu = (long)v; | 1796 | int cpu = (long)v; |
2467 | 1797 | ||
1798 | if (!kvm_usage_count) | ||
1799 | return NOTIFY_OK; | ||
1800 | |||
2468 | val &= ~CPU_TASKS_FROZEN; | 1801 | val &= ~CPU_TASKS_FROZEN; |
2469 | switch (val) { | 1802 | switch (val) { |
2470 | case CPU_DYING: | 1803 | case CPU_DYING: |
@@ -2667,13 +2000,15 @@ static void kvm_exit_debug(void) | |||
2667 | 2000 | ||
2668 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 2001 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
2669 | { | 2002 | { |
2670 | hardware_disable(NULL); | 2003 | if (kvm_usage_count) |
2004 | hardware_disable(NULL); | ||
2671 | return 0; | 2005 | return 0; |
2672 | } | 2006 | } |
2673 | 2007 | ||
2674 | static int kvm_resume(struct sys_device *dev) | 2008 | static int kvm_resume(struct sys_device *dev) |
2675 | { | 2009 | { |
2676 | hardware_enable(NULL); | 2010 | if (kvm_usage_count) |
2011 | hardware_enable(NULL); | ||
2677 | return 0; | 2012 | return 0; |
2678 | } | 2013 | } |
2679 | 2014 | ||
@@ -2748,7 +2083,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2748 | goto out_free_1; | 2083 | goto out_free_1; |
2749 | } | 2084 | } |
2750 | 2085 | ||
2751 | on_each_cpu(hardware_enable, NULL, 1); | ||
2752 | r = register_cpu_notifier(&kvm_cpu_notifier); | 2086 | r = register_cpu_notifier(&kvm_cpu_notifier); |
2753 | if (r) | 2087 | if (r) |
2754 | goto out_free_2; | 2088 | goto out_free_2; |
@@ -2798,7 +2132,6 @@ out_free_3: | |||
2798 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2132 | unregister_reboot_notifier(&kvm_reboot_notifier); |
2799 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2133 | unregister_cpu_notifier(&kvm_cpu_notifier); |
2800 | out_free_2: | 2134 | out_free_2: |
2801 | on_each_cpu(hardware_disable, NULL, 1); | ||
2802 | out_free_1: | 2135 | out_free_1: |
2803 | kvm_arch_hardware_unsetup(); | 2136 | kvm_arch_hardware_unsetup(); |
2804 | out_free_0a: | 2137 | out_free_0a: |