diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2010-02-21 14:17:22 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-02-21 14:17:22 -0500 |
commit | 5f854cfc024622e4aae14d7cf422f6ff86278688 (patch) | |
tree | 426e77c6f6e4939c80440bf1fabcb020e3ee145b /virt/kvm | |
parent | cc24da0742870f152ddf1002aa39dfcd83f7cf9c (diff) | |
parent | 4ec62b2b2e6bd7ddef7b6cea6e5db7b5578a6532 (diff) |
Forward to 2.6.33-rc8
Merge branch 'linus' into rt/head with a pile of conflicts.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/Kconfig | 14 | ||||
-rw-r--r-- | virt/kvm/assigned-dev.c | 818 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 74 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 1 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 588 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 144 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 5 | ||||
-rw-r--r-- | virt/kvm/iodev.h | 55 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 262 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 1231 | ||||
-rw-r--r-- | virt/kvm/kvm_trace.c | 285 |
11 files changed, 2119 insertions, 1358 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig new file mode 100644 index 000000000000..daece36c0a57 --- /dev/null +++ b/virt/kvm/Kconfig | |||
@@ -0,0 +1,14 @@ | |||
1 | # KVM common configuration items and defaults | ||
2 | |||
3 | config HAVE_KVM | ||
4 | bool | ||
5 | |||
6 | config HAVE_KVM_IRQCHIP | ||
7 | bool | ||
8 | |||
9 | config HAVE_KVM_EVENTFD | ||
10 | bool | ||
11 | select EVENTFD | ||
12 | |||
13 | config KVM_APIC_ARCHITECTURE | ||
14 | bool | ||
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c new file mode 100644 index 000000000000..f73de631e3ee --- /dev/null +++ b/virt/kvm/assigned-dev.c | |||
@@ -0,0 +1,818 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine - device assignment support | ||
3 | * | ||
4 | * Copyright (C) 2006-9 Red Hat, Inc | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
7 | * the COPYING file in the top-level directory. | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/kvm_host.h> | ||
12 | #include <linux/kvm.h> | ||
13 | #include <linux/uaccess.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/pci.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include "irq.h" | ||
20 | |||
21 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
22 | int assigned_dev_id) | ||
23 | { | ||
24 | struct list_head *ptr; | ||
25 | struct kvm_assigned_dev_kernel *match; | ||
26 | |||
27 | list_for_each(ptr, head) { | ||
28 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
29 | if (match->assigned_dev_id == assigned_dev_id) | ||
30 | return match; | ||
31 | } | ||
32 | return NULL; | ||
33 | } | ||
34 | |||
35 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
36 | *assigned_dev, int irq) | ||
37 | { | ||
38 | int i, index; | ||
39 | struct msix_entry *host_msix_entries; | ||
40 | |||
41 | host_msix_entries = assigned_dev->host_msix_entries; | ||
42 | |||
43 | index = -1; | ||
44 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
45 | if (irq == host_msix_entries[i].vector) { | ||
46 | index = i; | ||
47 | break; | ||
48 | } | ||
49 | if (index < 0) { | ||
50 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | return index; | ||
55 | } | ||
56 | |||
57 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
58 | { | ||
59 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
60 | struct kvm *kvm; | ||
61 | int i; | ||
62 | |||
63 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
64 | interrupt_work); | ||
65 | kvm = assigned_dev->kvm; | ||
66 | |||
67 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
68 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
69 | struct kvm_guest_msix_entry *guest_entries = | ||
70 | assigned_dev->guest_msix_entries; | ||
71 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
72 | if (!(guest_entries[i].flags & | ||
73 | KVM_ASSIGNED_MSIX_PENDING)) | ||
74 | continue; | ||
75 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
76 | kvm_set_irq(assigned_dev->kvm, | ||
77 | assigned_dev->irq_source_id, | ||
78 | guest_entries[i].vector, 1); | ||
79 | } | ||
80 | } else | ||
81 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
82 | assigned_dev->guest_irq, 1); | ||
83 | |||
84 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
85 | } | ||
86 | |||
87 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
88 | { | ||
89 | unsigned long flags; | ||
90 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
91 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
92 | |||
93 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
94 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
95 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
96 | if (index < 0) | ||
97 | goto out; | ||
98 | assigned_dev->guest_msix_entries[index].flags |= | ||
99 | KVM_ASSIGNED_MSIX_PENDING; | ||
100 | } | ||
101 | |||
102 | schedule_work(&assigned_dev->interrupt_work); | ||
103 | |||
104 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
105 | disable_irq_nosync(irq); | ||
106 | assigned_dev->host_irq_disabled = true; | ||
107 | } | ||
108 | |||
109 | out: | ||
110 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
111 | return IRQ_HANDLED; | ||
112 | } | ||
113 | |||
114 | /* Ack the irq line for an assigned device */ | ||
115 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
116 | { | ||
117 | struct kvm_assigned_dev_kernel *dev; | ||
118 | unsigned long flags; | ||
119 | |||
120 | if (kian->gsi == -1) | ||
121 | return; | ||
122 | |||
123 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
124 | ack_notifier); | ||
125 | |||
126 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
127 | |||
128 | /* The guest irq may be shared so this ack may be | ||
129 | * from another device. | ||
130 | */ | ||
131 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
132 | if (dev->host_irq_disabled) { | ||
133 | enable_irq(dev->host_irq); | ||
134 | dev->host_irq_disabled = false; | ||
135 | } | ||
136 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
137 | } | ||
138 | |||
139 | static void deassign_guest_irq(struct kvm *kvm, | ||
140 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
141 | { | ||
142 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | ||
143 | assigned_dev->ack_notifier.gsi = -1; | ||
144 | |||
145 | if (assigned_dev->irq_source_id != -1) | ||
146 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
147 | assigned_dev->irq_source_id = -1; | ||
148 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
149 | } | ||
150 | |||
151 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
152 | static void deassign_host_irq(struct kvm *kvm, | ||
153 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
154 | { | ||
155 | /* | ||
156 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
157 | * 1. work is scheduled, and then cancelled. | ||
158 | * 2. work callback is executed. | ||
159 | * | ||
160 | * The first one ensured that the irq is disabled and no more events | ||
161 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
162 | * for MSI). So we disable irq here to prevent further events. | ||
163 | * | ||
164 | * Notice this maybe result in nested disable if the interrupt type is | ||
165 | * INTx, but it's OK for we are going to free it. | ||
166 | * | ||
167 | * If this function is a part of VM destroy, please ensure that till | ||
168 | * now, the kvm state is still legal for probably we also have to wait | ||
169 | * interrupt_work done. | ||
170 | */ | ||
171 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
172 | int i; | ||
173 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
174 | disable_irq_nosync(assigned_dev-> | ||
175 | host_msix_entries[i].vector); | ||
176 | |||
177 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
178 | |||
179 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
180 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
181 | (void *)assigned_dev); | ||
182 | |||
183 | assigned_dev->entries_nr = 0; | ||
184 | kfree(assigned_dev->host_msix_entries); | ||
185 | kfree(assigned_dev->guest_msix_entries); | ||
186 | pci_disable_msix(assigned_dev->dev); | ||
187 | } else { | ||
188 | /* Deal with MSI and INTx */ | ||
189 | disable_irq_nosync(assigned_dev->host_irq); | ||
190 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
191 | |||
192 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
193 | |||
194 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
195 | pci_disable_msi(assigned_dev->dev); | ||
196 | } | ||
197 | |||
198 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
199 | } | ||
200 | |||
201 | static int kvm_deassign_irq(struct kvm *kvm, | ||
202 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
203 | unsigned long irq_requested_type) | ||
204 | { | ||
205 | unsigned long guest_irq_type, host_irq_type; | ||
206 | |||
207 | if (!irqchip_in_kernel(kvm)) | ||
208 | return -EINVAL; | ||
209 | /* no irq assignment to deassign */ | ||
210 | if (!assigned_dev->irq_requested_type) | ||
211 | return -ENXIO; | ||
212 | |||
213 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
214 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
215 | |||
216 | if (host_irq_type) | ||
217 | deassign_host_irq(kvm, assigned_dev); | ||
218 | if (guest_irq_type) | ||
219 | deassign_guest_irq(kvm, assigned_dev); | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
225 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
226 | { | ||
227 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
228 | } | ||
229 | |||
230 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
231 | struct kvm_assigned_dev_kernel | ||
232 | *assigned_dev) | ||
233 | { | ||
234 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
235 | |||
236 | pci_reset_function(assigned_dev->dev); | ||
237 | |||
238 | pci_release_regions(assigned_dev->dev); | ||
239 | pci_disable_device(assigned_dev->dev); | ||
240 | pci_dev_put(assigned_dev->dev); | ||
241 | |||
242 | list_del(&assigned_dev->list); | ||
243 | kfree(assigned_dev); | ||
244 | } | ||
245 | |||
246 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
247 | { | ||
248 | struct list_head *ptr, *ptr2; | ||
249 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
250 | |||
251 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
252 | assigned_dev = list_entry(ptr, | ||
253 | struct kvm_assigned_dev_kernel, | ||
254 | list); | ||
255 | |||
256 | kvm_free_assigned_device(kvm, assigned_dev); | ||
257 | } | ||
258 | } | ||
259 | |||
260 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
261 | struct kvm_assigned_dev_kernel *dev) | ||
262 | { | ||
263 | dev->host_irq = dev->dev->irq; | ||
264 | /* Even though this is PCI, we don't want to use shared | ||
265 | * interrupts. Sharing host devices with guest-assigned devices | ||
266 | * on the same interrupt line is not a happy situation: there | ||
267 | * are going to be long delays in accepting, acking, etc. | ||
268 | */ | ||
269 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
270 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
271 | return -EIO; | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | #ifdef __KVM_HAVE_MSI | ||
276 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
277 | struct kvm_assigned_dev_kernel *dev) | ||
278 | { | ||
279 | int r; | ||
280 | |||
281 | if (!dev->dev->msi_enabled) { | ||
282 | r = pci_enable_msi(dev->dev); | ||
283 | if (r) | ||
284 | return r; | ||
285 | } | ||
286 | |||
287 | dev->host_irq = dev->dev->irq; | ||
288 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
289 | "kvm_assigned_msi_device", (void *)dev)) { | ||
290 | pci_disable_msi(dev->dev); | ||
291 | return -EIO; | ||
292 | } | ||
293 | |||
294 | return 0; | ||
295 | } | ||
296 | #endif | ||
297 | |||
298 | #ifdef __KVM_HAVE_MSIX | ||
299 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
300 | struct kvm_assigned_dev_kernel *dev) | ||
301 | { | ||
302 | int i, r = -EINVAL; | ||
303 | |||
304 | /* host_msix_entries and guest_msix_entries should have been | ||
305 | * initialized */ | ||
306 | if (dev->entries_nr == 0) | ||
307 | return r; | ||
308 | |||
309 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
310 | if (r) | ||
311 | return r; | ||
312 | |||
313 | for (i = 0; i < dev->entries_nr; i++) { | ||
314 | r = request_irq(dev->host_msix_entries[i].vector, | ||
315 | kvm_assigned_dev_intr, 0, | ||
316 | "kvm_assigned_msix_device", | ||
317 | (void *)dev); | ||
318 | /* FIXME: free requested_irq's on failure */ | ||
319 | if (r) | ||
320 | return r; | ||
321 | } | ||
322 | |||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | #endif | ||
327 | |||
328 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
329 | struct kvm_assigned_dev_kernel *dev, | ||
330 | struct kvm_assigned_irq *irq) | ||
331 | { | ||
332 | dev->guest_irq = irq->guest_irq; | ||
333 | dev->ack_notifier.gsi = irq->guest_irq; | ||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | #ifdef __KVM_HAVE_MSI | ||
338 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
339 | struct kvm_assigned_dev_kernel *dev, | ||
340 | struct kvm_assigned_irq *irq) | ||
341 | { | ||
342 | dev->guest_irq = irq->guest_irq; | ||
343 | dev->ack_notifier.gsi = -1; | ||
344 | dev->host_irq_disabled = false; | ||
345 | return 0; | ||
346 | } | ||
347 | #endif | ||
348 | |||
349 | #ifdef __KVM_HAVE_MSIX | ||
350 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
351 | struct kvm_assigned_dev_kernel *dev, | ||
352 | struct kvm_assigned_irq *irq) | ||
353 | { | ||
354 | dev->guest_irq = irq->guest_irq; | ||
355 | dev->ack_notifier.gsi = -1; | ||
356 | dev->host_irq_disabled = false; | ||
357 | return 0; | ||
358 | } | ||
359 | #endif | ||
360 | |||
361 | static int assign_host_irq(struct kvm *kvm, | ||
362 | struct kvm_assigned_dev_kernel *dev, | ||
363 | __u32 host_irq_type) | ||
364 | { | ||
365 | int r = -EEXIST; | ||
366 | |||
367 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
368 | return r; | ||
369 | |||
370 | switch (host_irq_type) { | ||
371 | case KVM_DEV_IRQ_HOST_INTX: | ||
372 | r = assigned_device_enable_host_intx(kvm, dev); | ||
373 | break; | ||
374 | #ifdef __KVM_HAVE_MSI | ||
375 | case KVM_DEV_IRQ_HOST_MSI: | ||
376 | r = assigned_device_enable_host_msi(kvm, dev); | ||
377 | break; | ||
378 | #endif | ||
379 | #ifdef __KVM_HAVE_MSIX | ||
380 | case KVM_DEV_IRQ_HOST_MSIX: | ||
381 | r = assigned_device_enable_host_msix(kvm, dev); | ||
382 | break; | ||
383 | #endif | ||
384 | default: | ||
385 | r = -EINVAL; | ||
386 | } | ||
387 | |||
388 | if (!r) | ||
389 | dev->irq_requested_type |= host_irq_type; | ||
390 | |||
391 | return r; | ||
392 | } | ||
393 | |||
394 | static int assign_guest_irq(struct kvm *kvm, | ||
395 | struct kvm_assigned_dev_kernel *dev, | ||
396 | struct kvm_assigned_irq *irq, | ||
397 | unsigned long guest_irq_type) | ||
398 | { | ||
399 | int id; | ||
400 | int r = -EEXIST; | ||
401 | |||
402 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
403 | return r; | ||
404 | |||
405 | id = kvm_request_irq_source_id(kvm); | ||
406 | if (id < 0) | ||
407 | return id; | ||
408 | |||
409 | dev->irq_source_id = id; | ||
410 | |||
411 | switch (guest_irq_type) { | ||
412 | case KVM_DEV_IRQ_GUEST_INTX: | ||
413 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
414 | break; | ||
415 | #ifdef __KVM_HAVE_MSI | ||
416 | case KVM_DEV_IRQ_GUEST_MSI: | ||
417 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
418 | break; | ||
419 | #endif | ||
420 | #ifdef __KVM_HAVE_MSIX | ||
421 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
422 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
423 | break; | ||
424 | #endif | ||
425 | default: | ||
426 | r = -EINVAL; | ||
427 | } | ||
428 | |||
429 | if (!r) { | ||
430 | dev->irq_requested_type |= guest_irq_type; | ||
431 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
432 | } else | ||
433 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
434 | |||
435 | return r; | ||
436 | } | ||
437 | |||
438 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
439 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
440 | struct kvm_assigned_irq *assigned_irq) | ||
441 | { | ||
442 | int r = -EINVAL; | ||
443 | struct kvm_assigned_dev_kernel *match; | ||
444 | unsigned long host_irq_type, guest_irq_type; | ||
445 | |||
446 | if (!capable(CAP_SYS_RAWIO)) | ||
447 | return -EPERM; | ||
448 | |||
449 | if (!irqchip_in_kernel(kvm)) | ||
450 | return r; | ||
451 | |||
452 | mutex_lock(&kvm->lock); | ||
453 | r = -ENODEV; | ||
454 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
455 | assigned_irq->assigned_dev_id); | ||
456 | if (!match) | ||
457 | goto out; | ||
458 | |||
459 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
460 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
461 | |||
462 | r = -EINVAL; | ||
463 | /* can only assign one type at a time */ | ||
464 | if (hweight_long(host_irq_type) > 1) | ||
465 | goto out; | ||
466 | if (hweight_long(guest_irq_type) > 1) | ||
467 | goto out; | ||
468 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
469 | goto out; | ||
470 | |||
471 | r = 0; | ||
472 | if (host_irq_type) | ||
473 | r = assign_host_irq(kvm, match, host_irq_type); | ||
474 | if (r) | ||
475 | goto out; | ||
476 | |||
477 | if (guest_irq_type) | ||
478 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
479 | out: | ||
480 | mutex_unlock(&kvm->lock); | ||
481 | return r; | ||
482 | } | ||
483 | |||
484 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
485 | struct kvm_assigned_irq | ||
486 | *assigned_irq) | ||
487 | { | ||
488 | int r = -ENODEV; | ||
489 | struct kvm_assigned_dev_kernel *match; | ||
490 | |||
491 | mutex_lock(&kvm->lock); | ||
492 | |||
493 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
494 | assigned_irq->assigned_dev_id); | ||
495 | if (!match) | ||
496 | goto out; | ||
497 | |||
498 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
499 | out: | ||
500 | mutex_unlock(&kvm->lock); | ||
501 | return r; | ||
502 | } | ||
503 | |||
504 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
505 | struct kvm_assigned_pci_dev *assigned_dev) | ||
506 | { | ||
507 | int r = 0; | ||
508 | struct kvm_assigned_dev_kernel *match; | ||
509 | struct pci_dev *dev; | ||
510 | |||
511 | mutex_lock(&kvm->lock); | ||
512 | down_read(&kvm->slots_lock); | ||
513 | |||
514 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
515 | assigned_dev->assigned_dev_id); | ||
516 | if (match) { | ||
517 | /* device already assigned */ | ||
518 | r = -EEXIST; | ||
519 | goto out; | ||
520 | } | ||
521 | |||
522 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
523 | if (match == NULL) { | ||
524 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
525 | __func__); | ||
526 | r = -ENOMEM; | ||
527 | goto out; | ||
528 | } | ||
529 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
530 | assigned_dev->devfn); | ||
531 | if (!dev) { | ||
532 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
533 | r = -EINVAL; | ||
534 | goto out_free; | ||
535 | } | ||
536 | if (pci_enable_device(dev)) { | ||
537 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
538 | r = -EBUSY; | ||
539 | goto out_put; | ||
540 | } | ||
541 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
542 | if (r) { | ||
543 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
544 | __func__); | ||
545 | goto out_disable; | ||
546 | } | ||
547 | |||
548 | pci_reset_function(dev); | ||
549 | |||
550 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
551 | match->host_busnr = assigned_dev->busnr; | ||
552 | match->host_devfn = assigned_dev->devfn; | ||
553 | match->flags = assigned_dev->flags; | ||
554 | match->dev = dev; | ||
555 | spin_lock_init(&match->assigned_dev_lock); | ||
556 | match->irq_source_id = -1; | ||
557 | match->kvm = kvm; | ||
558 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
559 | INIT_WORK(&match->interrupt_work, | ||
560 | kvm_assigned_dev_interrupt_work_handler); | ||
561 | |||
562 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
563 | |||
564 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
565 | if (!kvm->arch.iommu_domain) { | ||
566 | r = kvm_iommu_map_guest(kvm); | ||
567 | if (r) | ||
568 | goto out_list_del; | ||
569 | } | ||
570 | r = kvm_assign_device(kvm, match); | ||
571 | if (r) | ||
572 | goto out_list_del; | ||
573 | } | ||
574 | |||
575 | out: | ||
576 | up_read(&kvm->slots_lock); | ||
577 | mutex_unlock(&kvm->lock); | ||
578 | return r; | ||
579 | out_list_del: | ||
580 | list_del(&match->list); | ||
581 | pci_release_regions(dev); | ||
582 | out_disable: | ||
583 | pci_disable_device(dev); | ||
584 | out_put: | ||
585 | pci_dev_put(dev); | ||
586 | out_free: | ||
587 | kfree(match); | ||
588 | up_read(&kvm->slots_lock); | ||
589 | mutex_unlock(&kvm->lock); | ||
590 | return r; | ||
591 | } | ||
592 | |||
593 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
594 | struct kvm_assigned_pci_dev *assigned_dev) | ||
595 | { | ||
596 | int r = 0; | ||
597 | struct kvm_assigned_dev_kernel *match; | ||
598 | |||
599 | mutex_lock(&kvm->lock); | ||
600 | |||
601 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
602 | assigned_dev->assigned_dev_id); | ||
603 | if (!match) { | ||
604 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
605 | "so cannot be deassigned\n", __func__); | ||
606 | r = -EINVAL; | ||
607 | goto out; | ||
608 | } | ||
609 | |||
610 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
611 | kvm_deassign_device(kvm, match); | ||
612 | |||
613 | kvm_free_assigned_device(kvm, match); | ||
614 | |||
615 | out: | ||
616 | mutex_unlock(&kvm->lock); | ||
617 | return r; | ||
618 | } | ||
619 | |||
620 | |||
621 | #ifdef __KVM_HAVE_MSIX | ||
622 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
623 | struct kvm_assigned_msix_nr *entry_nr) | ||
624 | { | ||
625 | int r = 0; | ||
626 | struct kvm_assigned_dev_kernel *adev; | ||
627 | |||
628 | mutex_lock(&kvm->lock); | ||
629 | |||
630 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
631 | entry_nr->assigned_dev_id); | ||
632 | if (!adev) { | ||
633 | r = -EINVAL; | ||
634 | goto msix_nr_out; | ||
635 | } | ||
636 | |||
637 | if (adev->entries_nr == 0) { | ||
638 | adev->entries_nr = entry_nr->entry_nr; | ||
639 | if (adev->entries_nr == 0 || | ||
640 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
641 | r = -EINVAL; | ||
642 | goto msix_nr_out; | ||
643 | } | ||
644 | |||
645 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
646 | entry_nr->entry_nr, | ||
647 | GFP_KERNEL); | ||
648 | if (!adev->host_msix_entries) { | ||
649 | r = -ENOMEM; | ||
650 | goto msix_nr_out; | ||
651 | } | ||
652 | adev->guest_msix_entries = kzalloc( | ||
653 | sizeof(struct kvm_guest_msix_entry) * | ||
654 | entry_nr->entry_nr, GFP_KERNEL); | ||
655 | if (!adev->guest_msix_entries) { | ||
656 | kfree(adev->host_msix_entries); | ||
657 | r = -ENOMEM; | ||
658 | goto msix_nr_out; | ||
659 | } | ||
660 | } else /* Not allowed set MSI-X number twice */ | ||
661 | r = -EINVAL; | ||
662 | msix_nr_out: | ||
663 | mutex_unlock(&kvm->lock); | ||
664 | return r; | ||
665 | } | ||
666 | |||
667 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
668 | struct kvm_assigned_msix_entry *entry) | ||
669 | { | ||
670 | int r = 0, i; | ||
671 | struct kvm_assigned_dev_kernel *adev; | ||
672 | |||
673 | mutex_lock(&kvm->lock); | ||
674 | |||
675 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
676 | entry->assigned_dev_id); | ||
677 | |||
678 | if (!adev) { | ||
679 | r = -EINVAL; | ||
680 | goto msix_entry_out; | ||
681 | } | ||
682 | |||
683 | for (i = 0; i < adev->entries_nr; i++) | ||
684 | if (adev->guest_msix_entries[i].vector == 0 || | ||
685 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
686 | adev->guest_msix_entries[i].entry = entry->entry; | ||
687 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
688 | adev->host_msix_entries[i].entry = entry->entry; | ||
689 | break; | ||
690 | } | ||
691 | if (i == adev->entries_nr) { | ||
692 | r = -ENOSPC; | ||
693 | goto msix_entry_out; | ||
694 | } | ||
695 | |||
696 | msix_entry_out: | ||
697 | mutex_unlock(&kvm->lock); | ||
698 | |||
699 | return r; | ||
700 | } | ||
701 | #endif | ||
702 | |||
703 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
704 | unsigned long arg) | ||
705 | { | ||
706 | void __user *argp = (void __user *)arg; | ||
707 | int r = -ENOTTY; | ||
708 | |||
709 | switch (ioctl) { | ||
710 | case KVM_ASSIGN_PCI_DEVICE: { | ||
711 | struct kvm_assigned_pci_dev assigned_dev; | ||
712 | |||
713 | r = -EFAULT; | ||
714 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
715 | goto out; | ||
716 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
717 | if (r) | ||
718 | goto out; | ||
719 | break; | ||
720 | } | ||
721 | case KVM_ASSIGN_IRQ: { | ||
722 | r = -EOPNOTSUPP; | ||
723 | break; | ||
724 | } | ||
725 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
726 | case KVM_ASSIGN_DEV_IRQ: { | ||
727 | struct kvm_assigned_irq assigned_irq; | ||
728 | |||
729 | r = -EFAULT; | ||
730 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
731 | goto out; | ||
732 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
733 | if (r) | ||
734 | goto out; | ||
735 | break; | ||
736 | } | ||
737 | case KVM_DEASSIGN_DEV_IRQ: { | ||
738 | struct kvm_assigned_irq assigned_irq; | ||
739 | |||
740 | r = -EFAULT; | ||
741 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
742 | goto out; | ||
743 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
744 | if (r) | ||
745 | goto out; | ||
746 | break; | ||
747 | } | ||
748 | #endif | ||
749 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
750 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
751 | struct kvm_assigned_pci_dev assigned_dev; | ||
752 | |||
753 | r = -EFAULT; | ||
754 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
755 | goto out; | ||
756 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
757 | if (r) | ||
758 | goto out; | ||
759 | break; | ||
760 | } | ||
761 | #endif | ||
762 | #ifdef KVM_CAP_IRQ_ROUTING | ||
763 | case KVM_SET_GSI_ROUTING: { | ||
764 | struct kvm_irq_routing routing; | ||
765 | struct kvm_irq_routing __user *urouting; | ||
766 | struct kvm_irq_routing_entry *entries; | ||
767 | |||
768 | r = -EFAULT; | ||
769 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
770 | goto out; | ||
771 | r = -EINVAL; | ||
772 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
773 | goto out; | ||
774 | if (routing.flags) | ||
775 | goto out; | ||
776 | r = -ENOMEM; | ||
777 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
778 | if (!entries) | ||
779 | goto out; | ||
780 | r = -EFAULT; | ||
781 | urouting = argp; | ||
782 | if (copy_from_user(entries, urouting->entries, | ||
783 | routing.nr * sizeof(*entries))) | ||
784 | goto out_free_irq_routing; | ||
785 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
786 | routing.flags); | ||
787 | out_free_irq_routing: | ||
788 | vfree(entries); | ||
789 | break; | ||
790 | } | ||
791 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
792 | #ifdef __KVM_HAVE_MSIX | ||
793 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
794 | struct kvm_assigned_msix_nr entry_nr; | ||
795 | r = -EFAULT; | ||
796 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
797 | goto out; | ||
798 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
799 | if (r) | ||
800 | goto out; | ||
801 | break; | ||
802 | } | ||
803 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
804 | struct kvm_assigned_msix_entry entry; | ||
805 | r = -EFAULT; | ||
806 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
807 | goto out; | ||
808 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
809 | if (r) | ||
810 | goto out; | ||
811 | break; | ||
812 | } | ||
813 | #endif | ||
814 | } | ||
815 | out: | ||
816 | return r; | ||
817 | } | ||
818 | |||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 5ae620d32fac..04d69cd7049b 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -14,32 +14,28 @@ | |||
14 | 14 | ||
15 | #include "coalesced_mmio.h" | 15 | #include "coalesced_mmio.h" |
16 | 16 | ||
17 | static int coalesced_mmio_in_range(struct kvm_io_device *this, | 17 | static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) |
18 | gpa_t addr, int len, int is_write) | 18 | { |
19 | return container_of(dev, struct kvm_coalesced_mmio_dev, dev); | ||
20 | } | ||
21 | |||
22 | static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, | ||
23 | gpa_t addr, int len) | ||
19 | { | 24 | { |
20 | struct kvm_coalesced_mmio_dev *dev = | ||
21 | (struct kvm_coalesced_mmio_dev*)this->private; | ||
22 | struct kvm_coalesced_mmio_zone *zone; | 25 | struct kvm_coalesced_mmio_zone *zone; |
23 | int next; | 26 | struct kvm_coalesced_mmio_ring *ring; |
27 | unsigned avail; | ||
24 | int i; | 28 | int i; |
25 | 29 | ||
26 | if (!is_write) | ||
27 | return 0; | ||
28 | |||
29 | /* kvm->lock is taken by the caller and must be not released before | ||
30 | * dev.read/write | ||
31 | */ | ||
32 | |||
33 | /* Are we able to batch it ? */ | 30 | /* Are we able to batch it ? */ |
34 | 31 | ||
35 | /* last is the first free entry | 32 | /* last is the first free entry |
36 | * check if we don't meet the first used entry | 33 | * check if we don't meet the first used entry |
37 | * there is always one unused entry in the buffer | 34 | * there is always one unused entry in the buffer |
38 | */ | 35 | */ |
39 | 36 | ring = dev->kvm->coalesced_mmio_ring; | |
40 | next = (dev->kvm->coalesced_mmio_ring->last + 1) % | 37 | avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; |
41 | KVM_COALESCED_MMIO_MAX; | 38 | if (avail < KVM_MAX_VCPUS) { |
42 | if (next == dev->kvm->coalesced_mmio_ring->first) { | ||
43 | /* full */ | 39 | /* full */ |
44 | return 0; | 40 | return 0; |
45 | } | 41 | } |
@@ -60,14 +56,15 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this, | |||
60 | return 0; | 56 | return 0; |
61 | } | 57 | } |
62 | 58 | ||
63 | static void coalesced_mmio_write(struct kvm_io_device *this, | 59 | static int coalesced_mmio_write(struct kvm_io_device *this, |
64 | gpa_t addr, int len, const void *val) | 60 | gpa_t addr, int len, const void *val) |
65 | { | 61 | { |
66 | struct kvm_coalesced_mmio_dev *dev = | 62 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
67 | (struct kvm_coalesced_mmio_dev*)this->private; | ||
68 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; | 63 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; |
64 | if (!coalesced_mmio_in_range(dev, addr, len)) | ||
65 | return -EOPNOTSUPP; | ||
69 | 66 | ||
70 | /* kvm->lock must be taken by caller before call to in_range()*/ | 67 | spin_lock(&dev->lock); |
71 | 68 | ||
72 | /* copy data in first free entry of the ring */ | 69 | /* copy data in first free entry of the ring */ |
73 | 70 | ||
@@ -76,29 +73,40 @@ static void coalesced_mmio_write(struct kvm_io_device *this, | |||
76 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); | 73 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); |
77 | smp_wmb(); | 74 | smp_wmb(); |
78 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; | 75 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; |
76 | spin_unlock(&dev->lock); | ||
77 | return 0; | ||
79 | } | 78 | } |
80 | 79 | ||
81 | static void coalesced_mmio_destructor(struct kvm_io_device *this) | 80 | static void coalesced_mmio_destructor(struct kvm_io_device *this) |
82 | { | 81 | { |
83 | kfree(this); | 82 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
83 | |||
84 | kfree(dev); | ||
84 | } | 85 | } |
85 | 86 | ||
87 | static const struct kvm_io_device_ops coalesced_mmio_ops = { | ||
88 | .write = coalesced_mmio_write, | ||
89 | .destructor = coalesced_mmio_destructor, | ||
90 | }; | ||
91 | |||
86 | int kvm_coalesced_mmio_init(struct kvm *kvm) | 92 | int kvm_coalesced_mmio_init(struct kvm *kvm) |
87 | { | 93 | { |
88 | struct kvm_coalesced_mmio_dev *dev; | 94 | struct kvm_coalesced_mmio_dev *dev; |
95 | int ret; | ||
89 | 96 | ||
90 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | 97 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); |
91 | if (!dev) | 98 | if (!dev) |
92 | return -ENOMEM; | 99 | return -ENOMEM; |
93 | dev->dev.write = coalesced_mmio_write; | 100 | spin_lock_init(&dev->lock); |
94 | dev->dev.in_range = coalesced_mmio_in_range; | 101 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); |
95 | dev->dev.destructor = coalesced_mmio_destructor; | ||
96 | dev->dev.private = dev; | ||
97 | dev->kvm = kvm; | 102 | dev->kvm = kvm; |
98 | kvm->coalesced_mmio_dev = dev; | 103 | kvm->coalesced_mmio_dev = dev; |
99 | kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev); | ||
100 | 104 | ||
101 | return 0; | 105 | ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); |
106 | if (ret < 0) | ||
107 | kfree(dev); | ||
108 | |||
109 | return ret; | ||
102 | } | 110 | } |
103 | 111 | ||
104 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 112 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
@@ -109,16 +117,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | |||
109 | if (dev == NULL) | 117 | if (dev == NULL) |
110 | return -EINVAL; | 118 | return -EINVAL; |
111 | 119 | ||
112 | mutex_lock(&kvm->lock); | 120 | down_write(&kvm->slots_lock); |
113 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 121 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { |
114 | mutex_unlock(&kvm->lock); | 122 | up_write(&kvm->slots_lock); |
115 | return -ENOBUFS; | 123 | return -ENOBUFS; |
116 | } | 124 | } |
117 | 125 | ||
118 | dev->zone[dev->nb_zones] = *zone; | 126 | dev->zone[dev->nb_zones] = *zone; |
119 | dev->nb_zones++; | 127 | dev->nb_zones++; |
120 | 128 | ||
121 | mutex_unlock(&kvm->lock); | 129 | up_write(&kvm->slots_lock); |
122 | return 0; | 130 | return 0; |
123 | } | 131 | } |
124 | 132 | ||
@@ -132,7 +140,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
132 | if (dev == NULL) | 140 | if (dev == NULL) |
133 | return -EINVAL; | 141 | return -EINVAL; |
134 | 142 | ||
135 | mutex_lock(&kvm->lock); | 143 | down_write(&kvm->slots_lock); |
136 | 144 | ||
137 | i = dev->nb_zones; | 145 | i = dev->nb_zones; |
138 | while(i) { | 146 | while(i) { |
@@ -150,7 +158,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
150 | i--; | 158 | i--; |
151 | } | 159 | } |
152 | 160 | ||
153 | mutex_unlock(&kvm->lock); | 161 | up_write(&kvm->slots_lock); |
154 | 162 | ||
155 | return 0; | 163 | return 0; |
156 | } | 164 | } |
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 5ac0ec628461..4b49f27fa31e 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h | |||
@@ -12,6 +12,7 @@ | |||
12 | struct kvm_coalesced_mmio_dev { | 12 | struct kvm_coalesced_mmio_dev { |
13 | struct kvm_io_device dev; | 13 | struct kvm_io_device dev; |
14 | struct kvm *kvm; | 14 | struct kvm *kvm; |
15 | spinlock_t lock; | ||
15 | int nb_zones; | 16 | int nb_zones; |
16 | struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; | 17 | struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; |
17 | }; | 18 | }; |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c new file mode 100644 index 000000000000..a9d3fc6c681c --- /dev/null +++ b/virt/kvm/eventfd.c | |||
@@ -0,0 +1,588 @@ | |||
1 | /* | ||
2 | * kvm eventfd support - use eventfd objects to signal various KVM events | ||
3 | * | ||
4 | * Copyright 2009 Novell. All Rights Reserved. | ||
5 | * | ||
6 | * Author: | ||
7 | * Gregory Haskins <ghaskins@novell.com> | ||
8 | * | ||
9 | * This file is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of version 2 of the GNU General Public License | ||
11 | * as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software Foundation, | ||
20 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | ||
21 | */ | ||
22 | |||
23 | #include <linux/kvm_host.h> | ||
24 | #include <linux/kvm.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | #include <linux/syscalls.h> | ||
27 | #include <linux/wait.h> | ||
28 | #include <linux/poll.h> | ||
29 | #include <linux/file.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <linux/eventfd.h> | ||
32 | #include <linux/kernel.h> | ||
33 | |||
34 | #include "iodev.h" | ||
35 | |||
36 | /* | ||
37 | * -------------------------------------------------------------------- | ||
38 | * irqfd: Allows an fd to be used to inject an interrupt to the guest | ||
39 | * | ||
40 | * Credit goes to Avi Kivity for the original idea. | ||
41 | * -------------------------------------------------------------------- | ||
42 | */ | ||
43 | |||
44 | struct _irqfd { | ||
45 | struct kvm *kvm; | ||
46 | struct eventfd_ctx *eventfd; | ||
47 | int gsi; | ||
48 | struct list_head list; | ||
49 | poll_table pt; | ||
50 | wait_queue_head_t *wqh; | ||
51 | wait_queue_t wait; | ||
52 | struct work_struct inject; | ||
53 | struct work_struct shutdown; | ||
54 | }; | ||
55 | |||
56 | static struct workqueue_struct *irqfd_cleanup_wq; | ||
57 | |||
58 | static void | ||
59 | irqfd_inject(struct work_struct *work) | ||
60 | { | ||
61 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | ||
62 | struct kvm *kvm = irqfd->kvm; | ||
63 | |||
64 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | ||
65 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Race-free decouple logic (ordering is critical) | ||
70 | */ | ||
71 | static void | ||
72 | irqfd_shutdown(struct work_struct *work) | ||
73 | { | ||
74 | struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); | ||
75 | u64 cnt; | ||
76 | |||
77 | /* | ||
78 | * Synchronize with the wait-queue and unhook ourselves to prevent | ||
79 | * further events. | ||
80 | */ | ||
81 | eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); | ||
82 | |||
83 | /* | ||
84 | * We know no new events will be scheduled at this point, so block | ||
85 | * until all previously outstanding events have completed | ||
86 | */ | ||
87 | flush_work(&irqfd->inject); | ||
88 | |||
89 | /* | ||
90 | * It is now safe to release the object's resources | ||
91 | */ | ||
92 | eventfd_ctx_put(irqfd->eventfd); | ||
93 | kfree(irqfd); | ||
94 | } | ||
95 | |||
96 | |||
97 | /* assumes kvm->irqfds.lock is held */ | ||
98 | static bool | ||
99 | irqfd_is_active(struct _irqfd *irqfd) | ||
100 | { | ||
101 | return list_empty(&irqfd->list) ? false : true; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Mark the irqfd as inactive and schedule it for removal | ||
106 | * | ||
107 | * assumes kvm->irqfds.lock is held | ||
108 | */ | ||
109 | static void | ||
110 | irqfd_deactivate(struct _irqfd *irqfd) | ||
111 | { | ||
112 | BUG_ON(!irqfd_is_active(irqfd)); | ||
113 | |||
114 | list_del_init(&irqfd->list); | ||
115 | |||
116 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Called with wqh->lock held and interrupts disabled | ||
121 | */ | ||
122 | static int | ||
123 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | ||
124 | { | ||
125 | struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); | ||
126 | unsigned long flags = (unsigned long)key; | ||
127 | |||
128 | if (flags & POLLIN) | ||
129 | /* An event has been signaled, inject an interrupt */ | ||
130 | schedule_work(&irqfd->inject); | ||
131 | |||
132 | if (flags & POLLHUP) { | ||
133 | /* The eventfd is closing, detach from KVM */ | ||
134 | struct kvm *kvm = irqfd->kvm; | ||
135 | unsigned long flags; | ||
136 | |||
137 | spin_lock_irqsave(&kvm->irqfds.lock, flags); | ||
138 | |||
139 | /* | ||
140 | * We must check if someone deactivated the irqfd before | ||
141 | * we could acquire the irqfds.lock since the item is | ||
142 | * deactivated from the KVM side before it is unhooked from | ||
143 | * the wait-queue. If it is already deactivated, we can | ||
144 | * simply return knowing the other side will cleanup for us. | ||
145 | * We cannot race against the irqfd going away since the | ||
146 | * other side is required to acquire wqh->lock, which we hold | ||
147 | */ | ||
148 | if (irqfd_is_active(irqfd)) | ||
149 | irqfd_deactivate(irqfd); | ||
150 | |||
151 | spin_unlock_irqrestore(&kvm->irqfds.lock, flags); | ||
152 | } | ||
153 | |||
154 | return 0; | ||
155 | } | ||
156 | |||
157 | static void | ||
158 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | ||
159 | poll_table *pt) | ||
160 | { | ||
161 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); | ||
162 | |||
163 | irqfd->wqh = wqh; | ||
164 | add_wait_queue(wqh, &irqfd->wait); | ||
165 | } | ||
166 | |||
167 | static int | ||
168 | kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | ||
169 | { | ||
170 | struct _irqfd *irqfd, *tmp; | ||
171 | struct file *file = NULL; | ||
172 | struct eventfd_ctx *eventfd = NULL; | ||
173 | int ret; | ||
174 | unsigned int events; | ||
175 | |||
176 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); | ||
177 | if (!irqfd) | ||
178 | return -ENOMEM; | ||
179 | |||
180 | irqfd->kvm = kvm; | ||
181 | irqfd->gsi = gsi; | ||
182 | INIT_LIST_HEAD(&irqfd->list); | ||
183 | INIT_WORK(&irqfd->inject, irqfd_inject); | ||
184 | INIT_WORK(&irqfd->shutdown, irqfd_shutdown); | ||
185 | |||
186 | file = eventfd_fget(fd); | ||
187 | if (IS_ERR(file)) { | ||
188 | ret = PTR_ERR(file); | ||
189 | goto fail; | ||
190 | } | ||
191 | |||
192 | eventfd = eventfd_ctx_fileget(file); | ||
193 | if (IS_ERR(eventfd)) { | ||
194 | ret = PTR_ERR(eventfd); | ||
195 | goto fail; | ||
196 | } | ||
197 | |||
198 | irqfd->eventfd = eventfd; | ||
199 | |||
200 | /* | ||
201 | * Install our own custom wake-up handling so we are notified via | ||
202 | * a callback whenever someone signals the underlying eventfd | ||
203 | */ | ||
204 | init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); | ||
205 | init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); | ||
206 | |||
207 | spin_lock_irq(&kvm->irqfds.lock); | ||
208 | |||
209 | ret = 0; | ||
210 | list_for_each_entry(tmp, &kvm->irqfds.items, list) { | ||
211 | if (irqfd->eventfd != tmp->eventfd) | ||
212 | continue; | ||
213 | /* This fd is used for another irq already. */ | ||
214 | ret = -EBUSY; | ||
215 | spin_unlock_irq(&kvm->irqfds.lock); | ||
216 | goto fail; | ||
217 | } | ||
218 | |||
219 | events = file->f_op->poll(file, &irqfd->pt); | ||
220 | |||
221 | list_add_tail(&irqfd->list, &kvm->irqfds.items); | ||
222 | spin_unlock_irq(&kvm->irqfds.lock); | ||
223 | |||
224 | /* | ||
225 | * Check if there was an event already pending on the eventfd | ||
226 | * before we registered, and trigger it as if we didn't miss it. | ||
227 | */ | ||
228 | if (events & POLLIN) | ||
229 | schedule_work(&irqfd->inject); | ||
230 | |||
231 | /* | ||
232 | * do not drop the file until the irqfd is fully initialized, otherwise | ||
233 | * we might race against the POLLHUP | ||
234 | */ | ||
235 | fput(file); | ||
236 | |||
237 | return 0; | ||
238 | |||
239 | fail: | ||
240 | if (eventfd && !IS_ERR(eventfd)) | ||
241 | eventfd_ctx_put(eventfd); | ||
242 | |||
243 | if (!IS_ERR(file)) | ||
244 | fput(file); | ||
245 | |||
246 | kfree(irqfd); | ||
247 | return ret; | ||
248 | } | ||
249 | |||
250 | void | ||
251 | kvm_eventfd_init(struct kvm *kvm) | ||
252 | { | ||
253 | spin_lock_init(&kvm->irqfds.lock); | ||
254 | INIT_LIST_HEAD(&kvm->irqfds.items); | ||
255 | INIT_LIST_HEAD(&kvm->ioeventfds); | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * shutdown any irqfd's that match fd+gsi | ||
260 | */ | ||
261 | static int | ||
262 | kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) | ||
263 | { | ||
264 | struct _irqfd *irqfd, *tmp; | ||
265 | struct eventfd_ctx *eventfd; | ||
266 | |||
267 | eventfd = eventfd_ctx_fdget(fd); | ||
268 | if (IS_ERR(eventfd)) | ||
269 | return PTR_ERR(eventfd); | ||
270 | |||
271 | spin_lock_irq(&kvm->irqfds.lock); | ||
272 | |||
273 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { | ||
274 | if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) | ||
275 | irqfd_deactivate(irqfd); | ||
276 | } | ||
277 | |||
278 | spin_unlock_irq(&kvm->irqfds.lock); | ||
279 | eventfd_ctx_put(eventfd); | ||
280 | |||
281 | /* | ||
282 | * Block until we know all outstanding shutdown jobs have completed | ||
283 | * so that we guarantee there will not be any more interrupts on this | ||
284 | * gsi once this deassign function returns. | ||
285 | */ | ||
286 | flush_workqueue(irqfd_cleanup_wq); | ||
287 | |||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | int | ||
292 | kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) | ||
293 | { | ||
294 | if (flags & KVM_IRQFD_FLAG_DEASSIGN) | ||
295 | return kvm_irqfd_deassign(kvm, fd, gsi); | ||
296 | |||
297 | return kvm_irqfd_assign(kvm, fd, gsi); | ||
298 | } | ||
299 | |||
300 | /* | ||
301 | * This function is called as the kvm VM fd is being released. Shutdown all | ||
302 | * irqfds that still remain open | ||
303 | */ | ||
304 | void | ||
305 | kvm_irqfd_release(struct kvm *kvm) | ||
306 | { | ||
307 | struct _irqfd *irqfd, *tmp; | ||
308 | |||
309 | spin_lock_irq(&kvm->irqfds.lock); | ||
310 | |||
311 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) | ||
312 | irqfd_deactivate(irqfd); | ||
313 | |||
314 | spin_unlock_irq(&kvm->irqfds.lock); | ||
315 | |||
316 | /* | ||
317 | * Block until we know all outstanding shutdown jobs have completed | ||
318 | * since we do not take a kvm* reference. | ||
319 | */ | ||
320 | flush_workqueue(irqfd_cleanup_wq); | ||
321 | |||
322 | } | ||
323 | |||
324 | /* | ||
325 | * create a host-wide workqueue for issuing deferred shutdown requests | ||
326 | * aggregated from all vm* instances. We need our own isolated single-thread | ||
327 | * queue to prevent deadlock against flushing the normal work-queue. | ||
328 | */ | ||
329 | static int __init irqfd_module_init(void) | ||
330 | { | ||
331 | irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); | ||
332 | if (!irqfd_cleanup_wq) | ||
333 | return -ENOMEM; | ||
334 | |||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | static void __exit irqfd_module_exit(void) | ||
339 | { | ||
340 | destroy_workqueue(irqfd_cleanup_wq); | ||
341 | } | ||
342 | |||
343 | module_init(irqfd_module_init); | ||
344 | module_exit(irqfd_module_exit); | ||
345 | |||
346 | /* | ||
347 | * -------------------------------------------------------------------- | ||
348 | * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. | ||
349 | * | ||
350 | * userspace can register a PIO/MMIO address with an eventfd for receiving | ||
351 | * notification when the memory has been touched. | ||
352 | * -------------------------------------------------------------------- | ||
353 | */ | ||
354 | |||
355 | struct _ioeventfd { | ||
356 | struct list_head list; | ||
357 | u64 addr; | ||
358 | int length; | ||
359 | struct eventfd_ctx *eventfd; | ||
360 | u64 datamatch; | ||
361 | struct kvm_io_device dev; | ||
362 | bool wildcard; | ||
363 | }; | ||
364 | |||
365 | static inline struct _ioeventfd * | ||
366 | to_ioeventfd(struct kvm_io_device *dev) | ||
367 | { | ||
368 | return container_of(dev, struct _ioeventfd, dev); | ||
369 | } | ||
370 | |||
371 | static void | ||
372 | ioeventfd_release(struct _ioeventfd *p) | ||
373 | { | ||
374 | eventfd_ctx_put(p->eventfd); | ||
375 | list_del(&p->list); | ||
376 | kfree(p); | ||
377 | } | ||
378 | |||
379 | static bool | ||
380 | ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) | ||
381 | { | ||
382 | u64 _val; | ||
383 | |||
384 | if (!(addr == p->addr && len == p->length)) | ||
385 | /* address-range must be precise for a hit */ | ||
386 | return false; | ||
387 | |||
388 | if (p->wildcard) | ||
389 | /* all else equal, wildcard is always a hit */ | ||
390 | return true; | ||
391 | |||
392 | /* otherwise, we have to actually compare the data */ | ||
393 | |||
394 | BUG_ON(!IS_ALIGNED((unsigned long)val, len)); | ||
395 | |||
396 | switch (len) { | ||
397 | case 1: | ||
398 | _val = *(u8 *)val; | ||
399 | break; | ||
400 | case 2: | ||
401 | _val = *(u16 *)val; | ||
402 | break; | ||
403 | case 4: | ||
404 | _val = *(u32 *)val; | ||
405 | break; | ||
406 | case 8: | ||
407 | _val = *(u64 *)val; | ||
408 | break; | ||
409 | default: | ||
410 | return false; | ||
411 | } | ||
412 | |||
413 | return _val == p->datamatch ? true : false; | ||
414 | } | ||
415 | |||
416 | /* MMIO/PIO writes trigger an event if the addr/val match */ | ||
417 | static int | ||
418 | ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, | ||
419 | const void *val) | ||
420 | { | ||
421 | struct _ioeventfd *p = to_ioeventfd(this); | ||
422 | |||
423 | if (!ioeventfd_in_range(p, addr, len, val)) | ||
424 | return -EOPNOTSUPP; | ||
425 | |||
426 | eventfd_signal(p->eventfd, 1); | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | /* | ||
431 | * This function is called as KVM is completely shutting down. We do not | ||
432 | * need to worry about locking just nuke anything we have as quickly as possible | ||
433 | */ | ||
434 | static void | ||
435 | ioeventfd_destructor(struct kvm_io_device *this) | ||
436 | { | ||
437 | struct _ioeventfd *p = to_ioeventfd(this); | ||
438 | |||
439 | ioeventfd_release(p); | ||
440 | } | ||
441 | |||
442 | static const struct kvm_io_device_ops ioeventfd_ops = { | ||
443 | .write = ioeventfd_write, | ||
444 | .destructor = ioeventfd_destructor, | ||
445 | }; | ||
446 | |||
447 | /* assumes kvm->slots_lock held */ | ||
448 | static bool | ||
449 | ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) | ||
450 | { | ||
451 | struct _ioeventfd *_p; | ||
452 | |||
453 | list_for_each_entry(_p, &kvm->ioeventfds, list) | ||
454 | if (_p->addr == p->addr && _p->length == p->length && | ||
455 | (_p->wildcard || p->wildcard || | ||
456 | _p->datamatch == p->datamatch)) | ||
457 | return true; | ||
458 | |||
459 | return false; | ||
460 | } | ||
461 | |||
462 | static int | ||
463 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | ||
464 | { | ||
465 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | ||
466 | struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; | ||
467 | struct _ioeventfd *p; | ||
468 | struct eventfd_ctx *eventfd; | ||
469 | int ret; | ||
470 | |||
471 | /* must be natural-word sized */ | ||
472 | switch (args->len) { | ||
473 | case 1: | ||
474 | case 2: | ||
475 | case 4: | ||
476 | case 8: | ||
477 | break; | ||
478 | default: | ||
479 | return -EINVAL; | ||
480 | } | ||
481 | |||
482 | /* check for range overflow */ | ||
483 | if (args->addr + args->len < args->addr) | ||
484 | return -EINVAL; | ||
485 | |||
486 | /* check for extra flags that we don't understand */ | ||
487 | if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) | ||
488 | return -EINVAL; | ||
489 | |||
490 | eventfd = eventfd_ctx_fdget(args->fd); | ||
491 | if (IS_ERR(eventfd)) | ||
492 | return PTR_ERR(eventfd); | ||
493 | |||
494 | p = kzalloc(sizeof(*p), GFP_KERNEL); | ||
495 | if (!p) { | ||
496 | ret = -ENOMEM; | ||
497 | goto fail; | ||
498 | } | ||
499 | |||
500 | INIT_LIST_HEAD(&p->list); | ||
501 | p->addr = args->addr; | ||
502 | p->length = args->len; | ||
503 | p->eventfd = eventfd; | ||
504 | |||
505 | /* The datamatch feature is optional, otherwise this is a wildcard */ | ||
506 | if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) | ||
507 | p->datamatch = args->datamatch; | ||
508 | else | ||
509 | p->wildcard = true; | ||
510 | |||
511 | down_write(&kvm->slots_lock); | ||
512 | |||
513 | /* Verify that there isnt a match already */ | ||
514 | if (ioeventfd_check_collision(kvm, p)) { | ||
515 | ret = -EEXIST; | ||
516 | goto unlock_fail; | ||
517 | } | ||
518 | |||
519 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); | ||
520 | |||
521 | ret = __kvm_io_bus_register_dev(bus, &p->dev); | ||
522 | if (ret < 0) | ||
523 | goto unlock_fail; | ||
524 | |||
525 | list_add_tail(&p->list, &kvm->ioeventfds); | ||
526 | |||
527 | up_write(&kvm->slots_lock); | ||
528 | |||
529 | return 0; | ||
530 | |||
531 | unlock_fail: | ||
532 | up_write(&kvm->slots_lock); | ||
533 | |||
534 | fail: | ||
535 | kfree(p); | ||
536 | eventfd_ctx_put(eventfd); | ||
537 | |||
538 | return ret; | ||
539 | } | ||
540 | |||
541 | static int | ||
542 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | ||
543 | { | ||
544 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | ||
545 | struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; | ||
546 | struct _ioeventfd *p, *tmp; | ||
547 | struct eventfd_ctx *eventfd; | ||
548 | int ret = -ENOENT; | ||
549 | |||
550 | eventfd = eventfd_ctx_fdget(args->fd); | ||
551 | if (IS_ERR(eventfd)) | ||
552 | return PTR_ERR(eventfd); | ||
553 | |||
554 | down_write(&kvm->slots_lock); | ||
555 | |||
556 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { | ||
557 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); | ||
558 | |||
559 | if (p->eventfd != eventfd || | ||
560 | p->addr != args->addr || | ||
561 | p->length != args->len || | ||
562 | p->wildcard != wildcard) | ||
563 | continue; | ||
564 | |||
565 | if (!p->wildcard && p->datamatch != args->datamatch) | ||
566 | continue; | ||
567 | |||
568 | __kvm_io_bus_unregister_dev(bus, &p->dev); | ||
569 | ioeventfd_release(p); | ||
570 | ret = 0; | ||
571 | break; | ||
572 | } | ||
573 | |||
574 | up_write(&kvm->slots_lock); | ||
575 | |||
576 | eventfd_ctx_put(eventfd); | ||
577 | |||
578 | return ret; | ||
579 | } | ||
580 | |||
581 | int | ||
582 | kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | ||
583 | { | ||
584 | if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) | ||
585 | return kvm_deassign_ioeventfd(kvm, args); | ||
586 | |||
587 | return kvm_assign_ioeventfd(kvm, args); | ||
588 | } | ||
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1150c6d5c7b8..38a2d20b89de 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/processor.h> | 36 | #include <asm/processor.h> |
37 | #include <asm/page.h> | 37 | #include <asm/page.h> |
38 | #include <asm/current.h> | 38 | #include <asm/current.h> |
39 | #include <trace/events/kvm.h> | ||
39 | 40 | ||
40 | #include "ioapic.h" | 41 | #include "ioapic.h" |
41 | #include "lapic.h" | 42 | #include "lapic.h" |
@@ -103,6 +104,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
103 | { | 104 | { |
104 | unsigned index; | 105 | unsigned index; |
105 | bool mask_before, mask_after; | 106 | bool mask_before, mask_after; |
107 | union kvm_ioapic_redirect_entry *e; | ||
106 | 108 | ||
107 | switch (ioapic->ioregsel) { | 109 | switch (ioapic->ioregsel) { |
108 | case IOAPIC_REG_VERSION: | 110 | case IOAPIC_REG_VERSION: |
@@ -122,19 +124,20 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
122 | ioapic_debug("change redir index %x val %x\n", index, val); | 124 | ioapic_debug("change redir index %x val %x\n", index, val); |
123 | if (index >= IOAPIC_NUM_PINS) | 125 | if (index >= IOAPIC_NUM_PINS) |
124 | return; | 126 | return; |
125 | mask_before = ioapic->redirtbl[index].fields.mask; | 127 | e = &ioapic->redirtbl[index]; |
128 | mask_before = e->fields.mask; | ||
126 | if (ioapic->ioregsel & 1) { | 129 | if (ioapic->ioregsel & 1) { |
127 | ioapic->redirtbl[index].bits &= 0xffffffff; | 130 | e->bits &= 0xffffffff; |
128 | ioapic->redirtbl[index].bits |= (u64) val << 32; | 131 | e->bits |= (u64) val << 32; |
129 | } else { | 132 | } else { |
130 | ioapic->redirtbl[index].bits &= ~0xffffffffULL; | 133 | e->bits &= ~0xffffffffULL; |
131 | ioapic->redirtbl[index].bits |= (u32) val; | 134 | e->bits |= (u32) val; |
132 | ioapic->redirtbl[index].fields.remote_irr = 0; | 135 | e->fields.remote_irr = 0; |
133 | } | 136 | } |
134 | mask_after = ioapic->redirtbl[index].fields.mask; | 137 | mask_after = e->fields.mask; |
135 | if (mask_before != mask_after) | 138 | if (mask_before != mask_after) |
136 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); | 139 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); |
137 | if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG | 140 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG |
138 | && ioapic->irr & (1 << index)) | 141 | && ioapic->irr & (1 << index)) |
139 | ioapic_service(ioapic, index); | 142 | ioapic_service(ioapic, index); |
140 | break; | 143 | break; |
@@ -164,7 +167,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | |||
164 | /* Always delivery PIT interrupt to vcpu 0 */ | 167 | /* Always delivery PIT interrupt to vcpu 0 */ |
165 | if (irq == 0) { | 168 | if (irq == 0) { |
166 | irqe.dest_mode = 0; /* Physical mode. */ | 169 | irqe.dest_mode = 0; /* Physical mode. */ |
167 | irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id; | 170 | /* need to read apic_id from apic regiest since |
171 | * it can be rewritten */ | ||
172 | irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id; | ||
168 | } | 173 | } |
169 | #endif | 174 | #endif |
170 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); | 175 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); |
@@ -177,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
177 | union kvm_ioapic_redirect_entry entry; | 182 | union kvm_ioapic_redirect_entry entry; |
178 | int ret = 1; | 183 | int ret = 1; |
179 | 184 | ||
185 | mutex_lock(&ioapic->lock); | ||
180 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { | 186 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { |
181 | entry = ioapic->redirtbl[irq]; | 187 | entry = ioapic->redirtbl[irq]; |
182 | level ^= entry.fields.polarity; | 188 | level ^= entry.fields.polarity; |
@@ -188,57 +194,82 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
188 | if ((edge && old_irr != ioapic->irr) || | 194 | if ((edge && old_irr != ioapic->irr) || |
189 | (!edge && !entry.fields.remote_irr)) | 195 | (!edge && !entry.fields.remote_irr)) |
190 | ret = ioapic_service(ioapic, irq); | 196 | ret = ioapic_service(ioapic, irq); |
197 | else | ||
198 | ret = 0; /* report coalesced interrupt */ | ||
191 | } | 199 | } |
200 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | ||
192 | } | 201 | } |
202 | mutex_unlock(&ioapic->lock); | ||
203 | |||
193 | return ret; | 204 | return ret; |
194 | } | 205 | } |
195 | 206 | ||
196 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, | 207 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, |
197 | int trigger_mode) | 208 | int trigger_mode) |
198 | { | 209 | { |
199 | union kvm_ioapic_redirect_entry *ent; | 210 | int i; |
211 | |||
212 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { | ||
213 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; | ||
214 | |||
215 | if (ent->fields.vector != vector) | ||
216 | continue; | ||
200 | 217 | ||
201 | ent = &ioapic->redirtbl[pin]; | 218 | /* |
219 | * We are dropping lock while calling ack notifiers because ack | ||
220 | * notifier callbacks for assigned devices call into IOAPIC | ||
221 | * recursively. Since remote_irr is cleared only after call | ||
222 | * to notifiers if the same vector will be delivered while lock | ||
223 | * is dropped it will be put into irr and will be delivered | ||
224 | * after ack notifier returns. | ||
225 | */ | ||
226 | mutex_unlock(&ioapic->lock); | ||
227 | kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); | ||
228 | mutex_lock(&ioapic->lock); | ||
202 | 229 | ||
203 | kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); | 230 | if (trigger_mode != IOAPIC_LEVEL_TRIG) |
231 | continue; | ||
204 | 232 | ||
205 | if (trigger_mode == IOAPIC_LEVEL_TRIG) { | ||
206 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | 233 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); |
207 | ent->fields.remote_irr = 0; | 234 | ent->fields.remote_irr = 0; |
208 | if (!ent->fields.mask && (ioapic->irr & (1 << pin))) | 235 | if (!ent->fields.mask && (ioapic->irr & (1 << i))) |
209 | ioapic_service(ioapic, pin); | 236 | ioapic_service(ioapic, i); |
210 | } | 237 | } |
211 | } | 238 | } |
212 | 239 | ||
213 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) | 240 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) |
214 | { | 241 | { |
215 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 242 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
216 | int i; | ||
217 | 243 | ||
218 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | 244 | mutex_lock(&ioapic->lock); |
219 | if (ioapic->redirtbl[i].fields.vector == vector) | 245 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); |
220 | __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); | 246 | mutex_unlock(&ioapic->lock); |
221 | } | 247 | } |
222 | 248 | ||
223 | static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, | 249 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) |
224 | int len, int is_write) | ||
225 | { | 250 | { |
226 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | 251 | return container_of(dev, struct kvm_ioapic, dev); |
252 | } | ||
227 | 253 | ||
254 | static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) | ||
255 | { | ||
228 | return ((addr >= ioapic->base_address && | 256 | return ((addr >= ioapic->base_address && |
229 | (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); | 257 | (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); |
230 | } | 258 | } |
231 | 259 | ||
232 | static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | 260 | static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, |
233 | void *val) | 261 | void *val) |
234 | { | 262 | { |
235 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | 263 | struct kvm_ioapic *ioapic = to_ioapic(this); |
236 | u32 result; | 264 | u32 result; |
265 | if (!ioapic_in_range(ioapic, addr)) | ||
266 | return -EOPNOTSUPP; | ||
237 | 267 | ||
238 | ioapic_debug("addr %lx\n", (unsigned long)addr); | 268 | ioapic_debug("addr %lx\n", (unsigned long)addr); |
239 | ASSERT(!(addr & 0xf)); /* check alignment */ | 269 | ASSERT(!(addr & 0xf)); /* check alignment */ |
240 | 270 | ||
241 | addr &= 0xff; | 271 | addr &= 0xff; |
272 | mutex_lock(&ioapic->lock); | ||
242 | switch (addr) { | 273 | switch (addr) { |
243 | case IOAPIC_REG_SELECT: | 274 | case IOAPIC_REG_SELECT: |
244 | result = ioapic->ioregsel; | 275 | result = ioapic->ioregsel; |
@@ -252,6 +283,8 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
252 | result = 0; | 283 | result = 0; |
253 | break; | 284 | break; |
254 | } | 285 | } |
286 | mutex_unlock(&ioapic->lock); | ||
287 | |||
255 | switch (len) { | 288 | switch (len) { |
256 | case 8: | 289 | case 8: |
257 | *(u64 *) val = result; | 290 | *(u64 *) val = result; |
@@ -264,25 +297,30 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
264 | default: | 297 | default: |
265 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); | 298 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); |
266 | } | 299 | } |
300 | return 0; | ||
267 | } | 301 | } |
268 | 302 | ||
269 | static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | 303 | static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, |
270 | const void *val) | 304 | const void *val) |
271 | { | 305 | { |
272 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | 306 | struct kvm_ioapic *ioapic = to_ioapic(this); |
273 | u32 data; | 307 | u32 data; |
308 | if (!ioapic_in_range(ioapic, addr)) | ||
309 | return -EOPNOTSUPP; | ||
274 | 310 | ||
275 | ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", | 311 | ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", |
276 | (void*)addr, len, val); | 312 | (void*)addr, len, val); |
277 | ASSERT(!(addr & 0xf)); /* check alignment */ | 313 | ASSERT(!(addr & 0xf)); /* check alignment */ |
314 | |||
278 | if (len == 4 || len == 8) | 315 | if (len == 4 || len == 8) |
279 | data = *(u32 *) val; | 316 | data = *(u32 *) val; |
280 | else { | 317 | else { |
281 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); | 318 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); |
282 | return; | 319 | return 0; |
283 | } | 320 | } |
284 | 321 | ||
285 | addr &= 0xff; | 322 | addr &= 0xff; |
323 | mutex_lock(&ioapic->lock); | ||
286 | switch (addr) { | 324 | switch (addr) { |
287 | case IOAPIC_REG_SELECT: | 325 | case IOAPIC_REG_SELECT: |
288 | ioapic->ioregsel = data; | 326 | ioapic->ioregsel = data; |
@@ -293,13 +331,15 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
293 | break; | 331 | break; |
294 | #ifdef CONFIG_IA64 | 332 | #ifdef CONFIG_IA64 |
295 | case IOAPIC_REG_EOI: | 333 | case IOAPIC_REG_EOI: |
296 | kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); | 334 | __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); |
297 | break; | 335 | break; |
298 | #endif | 336 | #endif |
299 | 337 | ||
300 | default: | 338 | default: |
301 | break; | 339 | break; |
302 | } | 340 | } |
341 | mutex_unlock(&ioapic->lock); | ||
342 | return 0; | ||
303 | } | 343 | } |
304 | 344 | ||
305 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | 345 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic) |
@@ -314,21 +354,51 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
314 | ioapic->id = 0; | 354 | ioapic->id = 0; |
315 | } | 355 | } |
316 | 356 | ||
357 | static const struct kvm_io_device_ops ioapic_mmio_ops = { | ||
358 | .read = ioapic_mmio_read, | ||
359 | .write = ioapic_mmio_write, | ||
360 | }; | ||
361 | |||
317 | int kvm_ioapic_init(struct kvm *kvm) | 362 | int kvm_ioapic_init(struct kvm *kvm) |
318 | { | 363 | { |
319 | struct kvm_ioapic *ioapic; | 364 | struct kvm_ioapic *ioapic; |
365 | int ret; | ||
320 | 366 | ||
321 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); | 367 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); |
322 | if (!ioapic) | 368 | if (!ioapic) |
323 | return -ENOMEM; | 369 | return -ENOMEM; |
370 | mutex_init(&ioapic->lock); | ||
324 | kvm->arch.vioapic = ioapic; | 371 | kvm->arch.vioapic = ioapic; |
325 | kvm_ioapic_reset(ioapic); | 372 | kvm_ioapic_reset(ioapic); |
326 | ioapic->dev.read = ioapic_mmio_read; | 373 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
327 | ioapic->dev.write = ioapic_mmio_write; | ||
328 | ioapic->dev.in_range = ioapic_in_range; | ||
329 | ioapic->dev.private = ioapic; | ||
330 | ioapic->kvm = kvm; | 374 | ioapic->kvm = kvm; |
331 | kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); | 375 | ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); |
376 | if (ret < 0) | ||
377 | kfree(ioapic); | ||
378 | |||
379 | return ret; | ||
380 | } | ||
381 | |||
382 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | ||
383 | { | ||
384 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | ||
385 | if (!ioapic) | ||
386 | return -EINVAL; | ||
387 | |||
388 | mutex_lock(&ioapic->lock); | ||
389 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); | ||
390 | mutex_unlock(&ioapic->lock); | ||
332 | return 0; | 391 | return 0; |
333 | } | 392 | } |
334 | 393 | ||
394 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | ||
395 | { | ||
396 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | ||
397 | if (!ioapic) | ||
398 | return -EINVAL; | ||
399 | |||
400 | mutex_lock(&ioapic->lock); | ||
401 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | ||
402 | mutex_unlock(&ioapic->lock); | ||
403 | return 0; | ||
404 | } | ||
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7080b713c160..419c43b667ab 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
@@ -41,9 +41,11 @@ struct kvm_ioapic { | |||
41 | u32 irr; | 41 | u32 irr; |
42 | u32 pad; | 42 | u32 pad; |
43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; | 43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; |
44 | unsigned long irq_states[IOAPIC_NUM_PINS]; | ||
44 | struct kvm_io_device dev; | 45 | struct kvm_io_device dev; |
45 | struct kvm *kvm; | 46 | struct kvm *kvm; |
46 | void (*ack_notifier)(void *opaque, int irq); | 47 | void (*ack_notifier)(void *opaque, int irq); |
48 | struct mutex lock; | ||
47 | }; | 49 | }; |
48 | 50 | ||
49 | #ifdef DEBUG | 51 | #ifdef DEBUG |
@@ -73,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | |||
73 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 75 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
74 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 76 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
75 | struct kvm_lapic_irq *irq); | 77 | struct kvm_lapic_irq *irq); |
78 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | ||
79 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | ||
80 | |||
76 | #endif | 81 | #endif |
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h index 55e8846ac3a6..12fd3caffd2b 100644 --- a/virt/kvm/iodev.h +++ b/virt/kvm/iodev.h | |||
@@ -17,49 +17,54 @@ | |||
17 | #define __KVM_IODEV_H__ | 17 | #define __KVM_IODEV_H__ |
18 | 18 | ||
19 | #include <linux/kvm_types.h> | 19 | #include <linux/kvm_types.h> |
20 | #include <asm/errno.h> | ||
20 | 21 | ||
21 | struct kvm_io_device { | 22 | struct kvm_io_device; |
22 | void (*read)(struct kvm_io_device *this, | 23 | |
24 | /** | ||
25 | * kvm_io_device_ops are called under kvm slots_lock. | ||
26 | * read and write handlers return 0 if the transaction has been handled, | ||
27 | * or non-zero to have it passed to the next device. | ||
28 | **/ | ||
29 | struct kvm_io_device_ops { | ||
30 | int (*read)(struct kvm_io_device *this, | ||
31 | gpa_t addr, | ||
32 | int len, | ||
33 | void *val); | ||
34 | int (*write)(struct kvm_io_device *this, | ||
23 | gpa_t addr, | 35 | gpa_t addr, |
24 | int len, | 36 | int len, |
25 | void *val); | 37 | const void *val); |
26 | void (*write)(struct kvm_io_device *this, | ||
27 | gpa_t addr, | ||
28 | int len, | ||
29 | const void *val); | ||
30 | int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len, | ||
31 | int is_write); | ||
32 | void (*destructor)(struct kvm_io_device *this); | 38 | void (*destructor)(struct kvm_io_device *this); |
39 | }; | ||
33 | 40 | ||
34 | void *private; | 41 | |
42 | struct kvm_io_device { | ||
43 | const struct kvm_io_device_ops *ops; | ||
35 | }; | 44 | }; |
36 | 45 | ||
37 | static inline void kvm_iodevice_read(struct kvm_io_device *dev, | 46 | static inline void kvm_iodevice_init(struct kvm_io_device *dev, |
38 | gpa_t addr, | 47 | const struct kvm_io_device_ops *ops) |
39 | int len, | ||
40 | void *val) | ||
41 | { | 48 | { |
42 | dev->read(dev, addr, len, val); | 49 | dev->ops = ops; |
43 | } | 50 | } |
44 | 51 | ||
45 | static inline void kvm_iodevice_write(struct kvm_io_device *dev, | 52 | static inline int kvm_iodevice_read(struct kvm_io_device *dev, |
46 | gpa_t addr, | 53 | gpa_t addr, int l, void *v) |
47 | int len, | ||
48 | const void *val) | ||
49 | { | 54 | { |
50 | dev->write(dev, addr, len, val); | 55 | return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP; |
51 | } | 56 | } |
52 | 57 | ||
53 | static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, | 58 | static inline int kvm_iodevice_write(struct kvm_io_device *dev, |
54 | gpa_t addr, int len, int is_write) | 59 | gpa_t addr, int l, const void *v) |
55 | { | 60 | { |
56 | return dev->in_range(dev, addr, len, is_write); | 61 | return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP; |
57 | } | 62 | } |
58 | 63 | ||
59 | static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) | 64 | static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) |
60 | { | 65 | { |
61 | if (dev->destructor) | 66 | if (dev->ops->destructor) |
62 | dev->destructor(dev); | 67 | dev->ops->destructor(dev); |
63 | } | 68 | } |
64 | 69 | ||
65 | #endif /* __KVM_IODEV_H__ */ | 70 | #endif /* __KVM_IODEV_H__ */ |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ddc17f0e2f35..9fd5b3ebc517 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -20,6 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
23 | #include <trace/events/kvm.h> | ||
23 | 24 | ||
24 | #include <asm/msidef.h> | 25 | #include <asm/msidef.h> |
25 | #ifdef CONFIG_IA64 | 26 | #ifdef CONFIG_IA64 |
@@ -30,20 +31,39 @@ | |||
30 | 31 | ||
31 | #include "ioapic.h" | 32 | #include "ioapic.h" |
32 | 33 | ||
34 | static inline int kvm_irq_line_state(unsigned long *irq_state, | ||
35 | int irq_source_id, int level) | ||
36 | { | ||
37 | /* Logical OR for level trig interrupt */ | ||
38 | if (level) | ||
39 | set_bit(irq_source_id, irq_state); | ||
40 | else | ||
41 | clear_bit(irq_source_id, irq_state); | ||
42 | |||
43 | return !!(*irq_state); | ||
44 | } | ||
45 | |||
33 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | 46 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
34 | struct kvm *kvm, int level) | 47 | struct kvm *kvm, int irq_source_id, int level) |
35 | { | 48 | { |
36 | #ifdef CONFIG_X86 | 49 | #ifdef CONFIG_X86 |
37 | return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); | 50 | struct kvm_pic *pic = pic_irqchip(kvm); |
51 | level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], | ||
52 | irq_source_id, level); | ||
53 | return kvm_pic_set_irq(pic, e->irqchip.pin, level); | ||
38 | #else | 54 | #else |
39 | return -1; | 55 | return -1; |
40 | #endif | 56 | #endif |
41 | } | 57 | } |
42 | 58 | ||
43 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | 59 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, |
44 | struct kvm *kvm, int level) | 60 | struct kvm *kvm, int irq_source_id, int level) |
45 | { | 61 | { |
46 | return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); | 62 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
63 | level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], | ||
64 | irq_source_id, level); | ||
65 | |||
66 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); | ||
47 | } | 67 | } |
48 | 68 | ||
49 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) | 69 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) |
@@ -66,10 +86,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
66 | kvm_is_dm_lowest_prio(irq)) | 86 | kvm_is_dm_lowest_prio(irq)) |
67 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | 87 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); |
68 | 88 | ||
69 | for (i = 0; i < KVM_MAX_VCPUS; i++) { | 89 | kvm_for_each_vcpu(i, vcpu, kvm) { |
70 | vcpu = kvm->vcpus[i]; | 90 | if (!kvm_apic_present(vcpu)) |
71 | |||
72 | if (!vcpu || !kvm_apic_present(vcpu)) | ||
73 | continue; | 91 | continue; |
74 | 92 | ||
75 | if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, | 93 | if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, |
@@ -95,10 +113,15 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
95 | } | 113 | } |
96 | 114 | ||
97 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 115 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
98 | struct kvm *kvm, int level) | 116 | struct kvm *kvm, int irq_source_id, int level) |
99 | { | 117 | { |
100 | struct kvm_lapic_irq irq; | 118 | struct kvm_lapic_irq irq; |
101 | 119 | ||
120 | if (!level) | ||
121 | return -1; | ||
122 | |||
123 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); | ||
124 | |||
102 | irq.dest_id = (e->msi.address_lo & | 125 | irq.dest_id = (e->msi.address_lo & |
103 | MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; | 126 | MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; |
104 | irq.vector = (e->msi.data & | 127 | irq.vector = (e->msi.data & |
@@ -113,90 +136,97 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
113 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | 136 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); |
114 | } | 137 | } |
115 | 138 | ||
116 | /* This should be called with the kvm->lock mutex held | 139 | /* |
117 | * Return value: | 140 | * Return value: |
118 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | 141 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) |
119 | * = 0 Interrupt was coalesced (previous irq is still pending) | 142 | * = 0 Interrupt was coalesced (previous irq is still pending) |
120 | * > 0 Number of CPUs interrupt was delivered to | 143 | * > 0 Number of CPUs interrupt was delivered to |
121 | */ | 144 | */ |
122 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) | 145 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) |
123 | { | 146 | { |
124 | struct kvm_kernel_irq_routing_entry *e; | 147 | struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; |
125 | unsigned long *irq_state, sig_level; | 148 | int ret = -1, i = 0; |
126 | int ret = -1; | 149 | struct kvm_irq_routing_table *irq_rt; |
127 | 150 | struct hlist_node *n; | |
128 | if (irq < KVM_IOAPIC_NUM_PINS) { | 151 | |
129 | irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; | 152 | trace_kvm_set_irq(irq, level, irq_source_id); |
130 | |||
131 | /* Logical OR for level trig interrupt */ | ||
132 | if (level) | ||
133 | set_bit(irq_source_id, irq_state); | ||
134 | else | ||
135 | clear_bit(irq_source_id, irq_state); | ||
136 | sig_level = !!(*irq_state); | ||
137 | } else /* Deal with MSI/MSI-X */ | ||
138 | sig_level = 1; | ||
139 | 153 | ||
140 | /* Not possible to detect if the guest uses the PIC or the | 154 | /* Not possible to detect if the guest uses the PIC or the |
141 | * IOAPIC. So set the bit in both. The guest will ignore | 155 | * IOAPIC. So set the bit in both. The guest will ignore |
142 | * writes to the unused one. | 156 | * writes to the unused one. |
143 | */ | 157 | */ |
144 | list_for_each_entry(e, &kvm->irq_routing, link) | 158 | rcu_read_lock(); |
145 | if (e->gsi == irq) { | 159 | irq_rt = rcu_dereference(kvm->irq_routing); |
146 | int r = e->set(e, kvm, sig_level); | 160 | if (irq < irq_rt->nr_rt_entries) |
147 | if (r < 0) | 161 | hlist_for_each_entry(e, n, &irq_rt->map[irq], link) |
148 | continue; | 162 | irq_set[i++] = *e; |
163 | rcu_read_unlock(); | ||
164 | |||
165 | while(i--) { | ||
166 | int r; | ||
167 | r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); | ||
168 | if (r < 0) | ||
169 | continue; | ||
170 | |||
171 | ret = r + ((ret < 0) ? 0 : ret); | ||
172 | } | ||
149 | 173 | ||
150 | ret = r + ((ret < 0) ? 0 : ret); | ||
151 | } | ||
152 | return ret; | 174 | return ret; |
153 | } | 175 | } |
154 | 176 | ||
155 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 177 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) |
156 | { | 178 | { |
157 | struct kvm_kernel_irq_routing_entry *e; | ||
158 | struct kvm_irq_ack_notifier *kian; | 179 | struct kvm_irq_ack_notifier *kian; |
159 | struct hlist_node *n; | 180 | struct hlist_node *n; |
160 | unsigned gsi = pin; | 181 | int gsi; |
161 | 182 | ||
162 | list_for_each_entry(e, &kvm->irq_routing, link) | 183 | trace_kvm_ack_irq(irqchip, pin); |
163 | if (e->type == KVM_IRQ_ROUTING_IRQCHIP && | 184 | |
164 | e->irqchip.irqchip == irqchip && | 185 | rcu_read_lock(); |
165 | e->irqchip.pin == pin) { | 186 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; |
166 | gsi = e->gsi; | 187 | if (gsi != -1) |
167 | break; | 188 | hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, |
168 | } | 189 | link) |
169 | 190 | if (kian->gsi == gsi) | |
170 | hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) | 191 | kian->irq_acked(kian); |
171 | if (kian->gsi == gsi) | 192 | rcu_read_unlock(); |
172 | kian->irq_acked(kian); | ||
173 | } | 193 | } |
174 | 194 | ||
175 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 195 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
176 | struct kvm_irq_ack_notifier *kian) | 196 | struct kvm_irq_ack_notifier *kian) |
177 | { | 197 | { |
178 | hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); | 198 | mutex_lock(&kvm->irq_lock); |
199 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); | ||
200 | mutex_unlock(&kvm->irq_lock); | ||
179 | } | 201 | } |
180 | 202 | ||
181 | void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian) | 203 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, |
204 | struct kvm_irq_ack_notifier *kian) | ||
182 | { | 205 | { |
183 | hlist_del_init(&kian->link); | 206 | mutex_lock(&kvm->irq_lock); |
207 | hlist_del_init_rcu(&kian->link); | ||
208 | mutex_unlock(&kvm->irq_lock); | ||
209 | synchronize_rcu(); | ||
184 | } | 210 | } |
185 | 211 | ||
186 | /* The caller must hold kvm->lock mutex */ | ||
187 | int kvm_request_irq_source_id(struct kvm *kvm) | 212 | int kvm_request_irq_source_id(struct kvm *kvm) |
188 | { | 213 | { |
189 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; | 214 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; |
190 | int irq_source_id = find_first_zero_bit(bitmap, | 215 | int irq_source_id; |
191 | sizeof(kvm->arch.irq_sources_bitmap)); | 216 | |
217 | mutex_lock(&kvm->irq_lock); | ||
218 | irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG); | ||
192 | 219 | ||
193 | if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 220 | if (irq_source_id >= BITS_PER_LONG) { |
194 | printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); | 221 | printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); |
195 | return -EFAULT; | 222 | irq_source_id = -EFAULT; |
223 | goto unlock; | ||
196 | } | 224 | } |
197 | 225 | ||
198 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 226 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
199 | set_bit(irq_source_id, bitmap); | 227 | set_bit(irq_source_id, bitmap); |
228 | unlock: | ||
229 | mutex_unlock(&kvm->irq_lock); | ||
200 | 230 | ||
201 | return irq_source_id; | 231 | return irq_source_id; |
202 | } | 232 | } |
@@ -207,27 +237,44 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
207 | 237 | ||
208 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 238 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
209 | 239 | ||
240 | mutex_lock(&kvm->irq_lock); | ||
210 | if (irq_source_id < 0 || | 241 | if (irq_source_id < 0 || |
211 | irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 242 | irq_source_id >= BITS_PER_LONG) { |
212 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); | 243 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); |
213 | return; | 244 | goto unlock; |
214 | } | 245 | } |
215 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) | ||
216 | clear_bit(irq_source_id, &kvm->arch.irq_states[i]); | ||
217 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); | 246 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); |
247 | if (!irqchip_in_kernel(kvm)) | ||
248 | goto unlock; | ||
249 | |||
250 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { | ||
251 | clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); | ||
252 | if (i >= 16) | ||
253 | continue; | ||
254 | #ifdef CONFIG_X86 | ||
255 | clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); | ||
256 | #endif | ||
257 | } | ||
258 | unlock: | ||
259 | mutex_unlock(&kvm->irq_lock); | ||
218 | } | 260 | } |
219 | 261 | ||
220 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | 262 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, |
221 | struct kvm_irq_mask_notifier *kimn) | 263 | struct kvm_irq_mask_notifier *kimn) |
222 | { | 264 | { |
265 | mutex_lock(&kvm->irq_lock); | ||
223 | kimn->irq = irq; | 266 | kimn->irq = irq; |
224 | hlist_add_head(&kimn->link, &kvm->mask_notifier_list); | 267 | hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list); |
268 | mutex_unlock(&kvm->irq_lock); | ||
225 | } | 269 | } |
226 | 270 | ||
227 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | 271 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, |
228 | struct kvm_irq_mask_notifier *kimn) | 272 | struct kvm_irq_mask_notifier *kimn) |
229 | { | 273 | { |
230 | hlist_del(&kimn->link); | 274 | mutex_lock(&kvm->irq_lock); |
275 | hlist_del_rcu(&kimn->link); | ||
276 | mutex_unlock(&kvm->irq_lock); | ||
277 | synchronize_rcu(); | ||
231 | } | 278 | } |
232 | 279 | ||
233 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | 280 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) |
@@ -235,29 +282,38 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | |||
235 | struct kvm_irq_mask_notifier *kimn; | 282 | struct kvm_irq_mask_notifier *kimn; |
236 | struct hlist_node *n; | 283 | struct hlist_node *n; |
237 | 284 | ||
238 | hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) | 285 | rcu_read_lock(); |
286 | hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) | ||
239 | if (kimn->irq == irq) | 287 | if (kimn->irq == irq) |
240 | kimn->func(kimn, mask); | 288 | kimn->func(kimn, mask); |
241 | } | 289 | rcu_read_unlock(); |
242 | |||
243 | static void __kvm_free_irq_routing(struct list_head *irq_routing) | ||
244 | { | ||
245 | struct kvm_kernel_irq_routing_entry *e, *n; | ||
246 | |||
247 | list_for_each_entry_safe(e, n, irq_routing, link) | ||
248 | kfree(e); | ||
249 | } | 290 | } |
250 | 291 | ||
251 | void kvm_free_irq_routing(struct kvm *kvm) | 292 | void kvm_free_irq_routing(struct kvm *kvm) |
252 | { | 293 | { |
253 | __kvm_free_irq_routing(&kvm->irq_routing); | 294 | /* Called only during vm destruction. Nobody can use the pointer |
295 | at this stage */ | ||
296 | kfree(kvm->irq_routing); | ||
254 | } | 297 | } |
255 | 298 | ||
256 | static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | 299 | static int setup_routing_entry(struct kvm_irq_routing_table *rt, |
300 | struct kvm_kernel_irq_routing_entry *e, | ||
257 | const struct kvm_irq_routing_entry *ue) | 301 | const struct kvm_irq_routing_entry *ue) |
258 | { | 302 | { |
259 | int r = -EINVAL; | 303 | int r = -EINVAL; |
260 | int delta; | 304 | int delta; |
305 | unsigned max_pin; | ||
306 | struct kvm_kernel_irq_routing_entry *ei; | ||
307 | struct hlist_node *n; | ||
308 | |||
309 | /* | ||
310 | * Do not allow GSI to be mapped to the same irqchip more than once. | ||
311 | * Allow only one to one mapping between GSI and MSI. | ||
312 | */ | ||
313 | hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) | ||
314 | if (ei->type == KVM_IRQ_ROUTING_MSI || | ||
315 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | ||
316 | return r; | ||
261 | 317 | ||
262 | e->gsi = ue->gsi; | 318 | e->gsi = ue->gsi; |
263 | e->type = ue->type; | 319 | e->type = ue->type; |
@@ -267,12 +323,15 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
267 | switch (ue->u.irqchip.irqchip) { | 323 | switch (ue->u.irqchip.irqchip) { |
268 | case KVM_IRQCHIP_PIC_MASTER: | 324 | case KVM_IRQCHIP_PIC_MASTER: |
269 | e->set = kvm_set_pic_irq; | 325 | e->set = kvm_set_pic_irq; |
326 | max_pin = 16; | ||
270 | break; | 327 | break; |
271 | case KVM_IRQCHIP_PIC_SLAVE: | 328 | case KVM_IRQCHIP_PIC_SLAVE: |
272 | e->set = kvm_set_pic_irq; | 329 | e->set = kvm_set_pic_irq; |
330 | max_pin = 16; | ||
273 | delta = 8; | 331 | delta = 8; |
274 | break; | 332 | break; |
275 | case KVM_IRQCHIP_IOAPIC: | 333 | case KVM_IRQCHIP_IOAPIC: |
334 | max_pin = KVM_IOAPIC_NUM_PINS; | ||
276 | e->set = kvm_set_ioapic_irq; | 335 | e->set = kvm_set_ioapic_irq; |
277 | break; | 336 | break; |
278 | default: | 337 | default: |
@@ -280,6 +339,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
280 | } | 339 | } |
281 | e->irqchip.irqchip = ue->u.irqchip.irqchip; | 340 | e->irqchip.irqchip = ue->u.irqchip.irqchip; |
282 | e->irqchip.pin = ue->u.irqchip.pin + delta; | 341 | e->irqchip.pin = ue->u.irqchip.pin + delta; |
342 | if (e->irqchip.pin >= max_pin) | ||
343 | goto out; | ||
344 | rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; | ||
283 | break; | 345 | break; |
284 | case KVM_IRQ_ROUTING_MSI: | 346 | case KVM_IRQ_ROUTING_MSI: |
285 | e->set = kvm_set_msi; | 347 | e->set = kvm_set_msi; |
@@ -290,6 +352,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
290 | default: | 352 | default: |
291 | goto out; | 353 | goto out; |
292 | } | 354 | } |
355 | |||
356 | hlist_add_head(&e->link, &rt->map[e->gsi]); | ||
293 | r = 0; | 357 | r = 0; |
294 | out: | 358 | out: |
295 | return r; | 359 | return r; |
@@ -301,43 +365,53 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
301 | unsigned nr, | 365 | unsigned nr, |
302 | unsigned flags) | 366 | unsigned flags) |
303 | { | 367 | { |
304 | struct list_head irq_list = LIST_HEAD_INIT(irq_list); | 368 | struct kvm_irq_routing_table *new, *old; |
305 | struct list_head tmp = LIST_HEAD_INIT(tmp); | 369 | u32 i, j, nr_rt_entries = 0; |
306 | struct kvm_kernel_irq_routing_entry *e = NULL; | ||
307 | unsigned i; | ||
308 | int r; | 370 | int r; |
309 | 371 | ||
310 | for (i = 0; i < nr; ++i) { | 372 | for (i = 0; i < nr; ++i) { |
373 | if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) | ||
374 | return -EINVAL; | ||
375 | nr_rt_entries = max(nr_rt_entries, ue[i].gsi); | ||
376 | } | ||
377 | |||
378 | nr_rt_entries += 1; | ||
379 | |||
380 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) | ||
381 | + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), | ||
382 | GFP_KERNEL); | ||
383 | |||
384 | if (!new) | ||
385 | return -ENOMEM; | ||
386 | |||
387 | new->rt_entries = (void *)&new->map[nr_rt_entries]; | ||
388 | |||
389 | new->nr_rt_entries = nr_rt_entries; | ||
390 | for (i = 0; i < 3; i++) | ||
391 | for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) | ||
392 | new->chip[i][j] = -1; | ||
393 | |||
394 | for (i = 0; i < nr; ++i) { | ||
311 | r = -EINVAL; | 395 | r = -EINVAL; |
312 | if (ue->gsi >= KVM_MAX_IRQ_ROUTES) | ||
313 | goto out; | ||
314 | if (ue->flags) | 396 | if (ue->flags) |
315 | goto out; | 397 | goto out; |
316 | r = -ENOMEM; | 398 | r = setup_routing_entry(new, &new->rt_entries[i], ue); |
317 | e = kzalloc(sizeof(*e), GFP_KERNEL); | ||
318 | if (!e) | ||
319 | goto out; | ||
320 | r = setup_routing_entry(e, ue); | ||
321 | if (r) | 399 | if (r) |
322 | goto out; | 400 | goto out; |
323 | ++ue; | 401 | ++ue; |
324 | list_add(&e->link, &irq_list); | ||
325 | e = NULL; | ||
326 | } | 402 | } |
327 | 403 | ||
328 | mutex_lock(&kvm->lock); | 404 | mutex_lock(&kvm->irq_lock); |
329 | list_splice(&kvm->irq_routing, &tmp); | 405 | old = kvm->irq_routing; |
330 | INIT_LIST_HEAD(&kvm->irq_routing); | 406 | rcu_assign_pointer(kvm->irq_routing, new); |
331 | list_splice(&irq_list, &kvm->irq_routing); | 407 | mutex_unlock(&kvm->irq_lock); |
332 | INIT_LIST_HEAD(&irq_list); | 408 | synchronize_rcu(); |
333 | list_splice(&tmp, &irq_list); | ||
334 | mutex_unlock(&kvm->lock); | ||
335 | 409 | ||
410 | new = old; | ||
336 | r = 0; | 411 | r = 0; |
337 | 412 | ||
338 | out: | 413 | out: |
339 | kfree(e); | 414 | kfree(new); |
340 | __kvm_free_irq_routing(&irq_list); | ||
341 | return r; | 415 | return r; |
342 | } | 416 | } |
343 | 417 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b24e96d5d40c..a51ba60a78b1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -43,29 +43,36 @@ | |||
43 | #include <linux/swap.h> | 43 | #include <linux/swap.h> |
44 | #include <linux/bitops.h> | 44 | #include <linux/bitops.h> |
45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
46 | #include <linux/compat.h> | ||
46 | 47 | ||
47 | #include <asm/processor.h> | 48 | #include <asm/processor.h> |
48 | #include <asm/io.h> | 49 | #include <asm/io.h> |
49 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
50 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
52 | #include <asm-generic/bitops/le.h> | ||
51 | 53 | ||
52 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 54 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
53 | #include "coalesced_mmio.h" | 55 | #include "coalesced_mmio.h" |
54 | #endif | 56 | #endif |
55 | 57 | ||
56 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | 58 | #define CREATE_TRACE_POINTS |
57 | #include <linux/pci.h> | 59 | #include <trace/events/kvm.h> |
58 | #include <linux/interrupt.h> | ||
59 | #include "irq.h" | ||
60 | #endif | ||
61 | 60 | ||
62 | MODULE_AUTHOR("Qumranet"); | 61 | MODULE_AUTHOR("Qumranet"); |
63 | MODULE_LICENSE("GPL"); | 62 | MODULE_LICENSE("GPL"); |
64 | 63 | ||
64 | /* | ||
65 | * Ordering of locks: | ||
66 | * | ||
67 | * kvm->lock --> kvm->slots_lock --> kvm->irq_lock | ||
68 | */ | ||
69 | |||
65 | DEFINE_SPINLOCK(kvm_lock); | 70 | DEFINE_SPINLOCK(kvm_lock); |
66 | LIST_HEAD(vm_list); | 71 | LIST_HEAD(vm_list); |
67 | 72 | ||
68 | static cpumask_var_t cpus_hardware_enabled; | 73 | static cpumask_var_t cpus_hardware_enabled; |
74 | static int kvm_usage_count = 0; | ||
75 | static atomic_t hardware_enable_failed; | ||
69 | 76 | ||
70 | struct kmem_cache *kvm_vcpu_cache; | 77 | struct kmem_cache *kvm_vcpu_cache; |
71 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | 78 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); |
@@ -76,627 +83,12 @@ struct dentry *kvm_debugfs_dir; | |||
76 | 83 | ||
77 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 84 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
78 | unsigned long arg); | 85 | unsigned long arg); |
86 | static int hardware_enable_all(void); | ||
87 | static void hardware_disable_all(void); | ||
79 | 88 | ||
80 | static bool kvm_rebooting; | 89 | static bool kvm_rebooting; |
81 | 90 | ||
82 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | 91 | static bool largepages_enabled = true; |
83 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
84 | int assigned_dev_id) | ||
85 | { | ||
86 | struct list_head *ptr; | ||
87 | struct kvm_assigned_dev_kernel *match; | ||
88 | |||
89 | list_for_each(ptr, head) { | ||
90 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
91 | if (match->assigned_dev_id == assigned_dev_id) | ||
92 | return match; | ||
93 | } | ||
94 | return NULL; | ||
95 | } | ||
96 | |||
97 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
98 | *assigned_dev, int irq) | ||
99 | { | ||
100 | int i, index; | ||
101 | struct msix_entry *host_msix_entries; | ||
102 | |||
103 | host_msix_entries = assigned_dev->host_msix_entries; | ||
104 | |||
105 | index = -1; | ||
106 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
107 | if (irq == host_msix_entries[i].vector) { | ||
108 | index = i; | ||
109 | break; | ||
110 | } | ||
111 | if (index < 0) { | ||
112 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | return index; | ||
117 | } | ||
118 | |||
119 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
120 | { | ||
121 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
122 | struct kvm *kvm; | ||
123 | int irq, i; | ||
124 | |||
125 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
126 | interrupt_work); | ||
127 | kvm = assigned_dev->kvm; | ||
128 | |||
129 | /* This is taken to safely inject irq inside the guest. When | ||
130 | * the interrupt injection (or the ioapic code) uses a | ||
131 | * finer-grained lock, update this | ||
132 | */ | ||
133 | mutex_lock(&kvm->lock); | ||
134 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
135 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
136 | struct kvm_guest_msix_entry *guest_entries = | ||
137 | assigned_dev->guest_msix_entries; | ||
138 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
139 | if (!(guest_entries[i].flags & | ||
140 | KVM_ASSIGNED_MSIX_PENDING)) | ||
141 | continue; | ||
142 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
143 | kvm_set_irq(assigned_dev->kvm, | ||
144 | assigned_dev->irq_source_id, | ||
145 | guest_entries[i].vector, 1); | ||
146 | irq = assigned_dev->host_msix_entries[i].vector; | ||
147 | if (irq != 0) | ||
148 | enable_irq(irq); | ||
149 | assigned_dev->host_irq_disabled = false; | ||
150 | } | ||
151 | } else { | ||
152 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
153 | assigned_dev->guest_irq, 1); | ||
154 | if (assigned_dev->irq_requested_type & | ||
155 | KVM_DEV_IRQ_GUEST_MSI) { | ||
156 | enable_irq(assigned_dev->host_irq); | ||
157 | assigned_dev->host_irq_disabled = false; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
162 | mutex_unlock(&assigned_dev->kvm->lock); | ||
163 | } | ||
164 | |||
165 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
166 | { | ||
167 | unsigned long flags; | ||
168 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
169 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
170 | |||
171 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
172 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
173 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
174 | if (index < 0) | ||
175 | goto out; | ||
176 | assigned_dev->guest_msix_entries[index].flags |= | ||
177 | KVM_ASSIGNED_MSIX_PENDING; | ||
178 | } | ||
179 | |||
180 | schedule_work(&assigned_dev->interrupt_work); | ||
181 | |||
182 | disable_irq_nosync(irq); | ||
183 | assigned_dev->host_irq_disabled = true; | ||
184 | |||
185 | out: | ||
186 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
187 | return IRQ_HANDLED; | ||
188 | } | ||
189 | |||
190 | /* Ack the irq line for an assigned device */ | ||
191 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
192 | { | ||
193 | struct kvm_assigned_dev_kernel *dev; | ||
194 | unsigned long flags; | ||
195 | |||
196 | if (kian->gsi == -1) | ||
197 | return; | ||
198 | |||
199 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
200 | ack_notifier); | ||
201 | |||
202 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
203 | |||
204 | /* The guest irq may be shared so this ack may be | ||
205 | * from another device. | ||
206 | */ | ||
207 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
208 | if (dev->host_irq_disabled) { | ||
209 | enable_irq(dev->host_irq); | ||
210 | dev->host_irq_disabled = false; | ||
211 | } | ||
212 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
213 | } | ||
214 | |||
215 | static void deassign_guest_irq(struct kvm *kvm, | ||
216 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
217 | { | ||
218 | kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); | ||
219 | assigned_dev->ack_notifier.gsi = -1; | ||
220 | |||
221 | if (assigned_dev->irq_source_id != -1) | ||
222 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
223 | assigned_dev->irq_source_id = -1; | ||
224 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
225 | } | ||
226 | |||
227 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
228 | static void deassign_host_irq(struct kvm *kvm, | ||
229 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
230 | { | ||
231 | /* | ||
232 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
233 | * 1. work is scheduled, and then cancelled. | ||
234 | * 2. work callback is executed. | ||
235 | * | ||
236 | * The first one ensured that the irq is disabled and no more events | ||
237 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
238 | * for MSI). So we disable irq here to prevent further events. | ||
239 | * | ||
240 | * Notice this maybe result in nested disable if the interrupt type is | ||
241 | * INTx, but it's OK for we are going to free it. | ||
242 | * | ||
243 | * If this function is a part of VM destroy, please ensure that till | ||
244 | * now, the kvm state is still legal for probably we also have to wait | ||
245 | * interrupt_work done. | ||
246 | */ | ||
247 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
248 | int i; | ||
249 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
250 | disable_irq_nosync(assigned_dev-> | ||
251 | host_msix_entries[i].vector); | ||
252 | |||
253 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
254 | |||
255 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
256 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
257 | (void *)assigned_dev); | ||
258 | |||
259 | assigned_dev->entries_nr = 0; | ||
260 | kfree(assigned_dev->host_msix_entries); | ||
261 | kfree(assigned_dev->guest_msix_entries); | ||
262 | pci_disable_msix(assigned_dev->dev); | ||
263 | } else { | ||
264 | /* Deal with MSI and INTx */ | ||
265 | disable_irq_nosync(assigned_dev->host_irq); | ||
266 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
267 | |||
268 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
269 | |||
270 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
271 | pci_disable_msi(assigned_dev->dev); | ||
272 | } | ||
273 | |||
274 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
275 | } | ||
276 | |||
277 | static int kvm_deassign_irq(struct kvm *kvm, | ||
278 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
279 | unsigned long irq_requested_type) | ||
280 | { | ||
281 | unsigned long guest_irq_type, host_irq_type; | ||
282 | |||
283 | if (!irqchip_in_kernel(kvm)) | ||
284 | return -EINVAL; | ||
285 | /* no irq assignment to deassign */ | ||
286 | if (!assigned_dev->irq_requested_type) | ||
287 | return -ENXIO; | ||
288 | |||
289 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
290 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
291 | |||
292 | if (host_irq_type) | ||
293 | deassign_host_irq(kvm, assigned_dev); | ||
294 | if (guest_irq_type) | ||
295 | deassign_guest_irq(kvm, assigned_dev); | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
301 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
302 | { | ||
303 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
304 | } | ||
305 | |||
306 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
307 | struct kvm_assigned_dev_kernel | ||
308 | *assigned_dev) | ||
309 | { | ||
310 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
311 | |||
312 | pci_reset_function(assigned_dev->dev); | ||
313 | |||
314 | pci_release_regions(assigned_dev->dev); | ||
315 | pci_disable_device(assigned_dev->dev); | ||
316 | pci_dev_put(assigned_dev->dev); | ||
317 | |||
318 | list_del(&assigned_dev->list); | ||
319 | kfree(assigned_dev); | ||
320 | } | ||
321 | |||
322 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
323 | { | ||
324 | struct list_head *ptr, *ptr2; | ||
325 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
326 | |||
327 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
328 | assigned_dev = list_entry(ptr, | ||
329 | struct kvm_assigned_dev_kernel, | ||
330 | list); | ||
331 | |||
332 | kvm_free_assigned_device(kvm, assigned_dev); | ||
333 | } | ||
334 | } | ||
335 | |||
336 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
337 | struct kvm_assigned_dev_kernel *dev) | ||
338 | { | ||
339 | dev->host_irq = dev->dev->irq; | ||
340 | /* Even though this is PCI, we don't want to use shared | ||
341 | * interrupts. Sharing host devices with guest-assigned devices | ||
342 | * on the same interrupt line is not a happy situation: there | ||
343 | * are going to be long delays in accepting, acking, etc. | ||
344 | */ | ||
345 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
346 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
347 | return -EIO; | ||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | #ifdef __KVM_HAVE_MSI | ||
352 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
353 | struct kvm_assigned_dev_kernel *dev) | ||
354 | { | ||
355 | int r; | ||
356 | |||
357 | if (!dev->dev->msi_enabled) { | ||
358 | r = pci_enable_msi(dev->dev); | ||
359 | if (r) | ||
360 | return r; | ||
361 | } | ||
362 | |||
363 | dev->host_irq = dev->dev->irq; | ||
364 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
365 | "kvm_assigned_msi_device", (void *)dev)) { | ||
366 | pci_disable_msi(dev->dev); | ||
367 | return -EIO; | ||
368 | } | ||
369 | |||
370 | return 0; | ||
371 | } | ||
372 | #endif | ||
373 | |||
374 | #ifdef __KVM_HAVE_MSIX | ||
375 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
376 | struct kvm_assigned_dev_kernel *dev) | ||
377 | { | ||
378 | int i, r = -EINVAL; | ||
379 | |||
380 | /* host_msix_entries and guest_msix_entries should have been | ||
381 | * initialized */ | ||
382 | if (dev->entries_nr == 0) | ||
383 | return r; | ||
384 | |||
385 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
386 | if (r) | ||
387 | return r; | ||
388 | |||
389 | for (i = 0; i < dev->entries_nr; i++) { | ||
390 | r = request_irq(dev->host_msix_entries[i].vector, | ||
391 | kvm_assigned_dev_intr, 0, | ||
392 | "kvm_assigned_msix_device", | ||
393 | (void *)dev); | ||
394 | /* FIXME: free requested_irq's on failure */ | ||
395 | if (r) | ||
396 | return r; | ||
397 | } | ||
398 | |||
399 | return 0; | ||
400 | } | ||
401 | |||
402 | #endif | ||
403 | |||
404 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
405 | struct kvm_assigned_dev_kernel *dev, | ||
406 | struct kvm_assigned_irq *irq) | ||
407 | { | ||
408 | dev->guest_irq = irq->guest_irq; | ||
409 | dev->ack_notifier.gsi = irq->guest_irq; | ||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | #ifdef __KVM_HAVE_MSI | ||
414 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
415 | struct kvm_assigned_dev_kernel *dev, | ||
416 | struct kvm_assigned_irq *irq) | ||
417 | { | ||
418 | dev->guest_irq = irq->guest_irq; | ||
419 | dev->ack_notifier.gsi = -1; | ||
420 | return 0; | ||
421 | } | ||
422 | #endif | ||
423 | #ifdef __KVM_HAVE_MSIX | ||
424 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
425 | struct kvm_assigned_dev_kernel *dev, | ||
426 | struct kvm_assigned_irq *irq) | ||
427 | { | ||
428 | dev->guest_irq = irq->guest_irq; | ||
429 | dev->ack_notifier.gsi = -1; | ||
430 | return 0; | ||
431 | } | ||
432 | #endif | ||
433 | |||
434 | static int assign_host_irq(struct kvm *kvm, | ||
435 | struct kvm_assigned_dev_kernel *dev, | ||
436 | __u32 host_irq_type) | ||
437 | { | ||
438 | int r = -EEXIST; | ||
439 | |||
440 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
441 | return r; | ||
442 | |||
443 | switch (host_irq_type) { | ||
444 | case KVM_DEV_IRQ_HOST_INTX: | ||
445 | r = assigned_device_enable_host_intx(kvm, dev); | ||
446 | break; | ||
447 | #ifdef __KVM_HAVE_MSI | ||
448 | case KVM_DEV_IRQ_HOST_MSI: | ||
449 | r = assigned_device_enable_host_msi(kvm, dev); | ||
450 | break; | ||
451 | #endif | ||
452 | #ifdef __KVM_HAVE_MSIX | ||
453 | case KVM_DEV_IRQ_HOST_MSIX: | ||
454 | r = assigned_device_enable_host_msix(kvm, dev); | ||
455 | break; | ||
456 | #endif | ||
457 | default: | ||
458 | r = -EINVAL; | ||
459 | } | ||
460 | |||
461 | if (!r) | ||
462 | dev->irq_requested_type |= host_irq_type; | ||
463 | |||
464 | return r; | ||
465 | } | ||
466 | |||
467 | static int assign_guest_irq(struct kvm *kvm, | ||
468 | struct kvm_assigned_dev_kernel *dev, | ||
469 | struct kvm_assigned_irq *irq, | ||
470 | unsigned long guest_irq_type) | ||
471 | { | ||
472 | int id; | ||
473 | int r = -EEXIST; | ||
474 | |||
475 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
476 | return r; | ||
477 | |||
478 | id = kvm_request_irq_source_id(kvm); | ||
479 | if (id < 0) | ||
480 | return id; | ||
481 | |||
482 | dev->irq_source_id = id; | ||
483 | |||
484 | switch (guest_irq_type) { | ||
485 | case KVM_DEV_IRQ_GUEST_INTX: | ||
486 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
487 | break; | ||
488 | #ifdef __KVM_HAVE_MSI | ||
489 | case KVM_DEV_IRQ_GUEST_MSI: | ||
490 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
491 | break; | ||
492 | #endif | ||
493 | #ifdef __KVM_HAVE_MSIX | ||
494 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
495 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
496 | break; | ||
497 | #endif | ||
498 | default: | ||
499 | r = -EINVAL; | ||
500 | } | ||
501 | |||
502 | if (!r) { | ||
503 | dev->irq_requested_type |= guest_irq_type; | ||
504 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
505 | } else | ||
506 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
507 | |||
508 | return r; | ||
509 | } | ||
510 | |||
511 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
512 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
513 | struct kvm_assigned_irq *assigned_irq) | ||
514 | { | ||
515 | int r = -EINVAL; | ||
516 | struct kvm_assigned_dev_kernel *match; | ||
517 | unsigned long host_irq_type, guest_irq_type; | ||
518 | |||
519 | if (!capable(CAP_SYS_RAWIO)) | ||
520 | return -EPERM; | ||
521 | |||
522 | if (!irqchip_in_kernel(kvm)) | ||
523 | return r; | ||
524 | |||
525 | mutex_lock(&kvm->lock); | ||
526 | r = -ENODEV; | ||
527 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
528 | assigned_irq->assigned_dev_id); | ||
529 | if (!match) | ||
530 | goto out; | ||
531 | |||
532 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
533 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
534 | |||
535 | r = -EINVAL; | ||
536 | /* can only assign one type at a time */ | ||
537 | if (hweight_long(host_irq_type) > 1) | ||
538 | goto out; | ||
539 | if (hweight_long(guest_irq_type) > 1) | ||
540 | goto out; | ||
541 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
542 | goto out; | ||
543 | |||
544 | r = 0; | ||
545 | if (host_irq_type) | ||
546 | r = assign_host_irq(kvm, match, host_irq_type); | ||
547 | if (r) | ||
548 | goto out; | ||
549 | |||
550 | if (guest_irq_type) | ||
551 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
552 | out: | ||
553 | mutex_unlock(&kvm->lock); | ||
554 | return r; | ||
555 | } | ||
556 | |||
557 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
558 | struct kvm_assigned_irq | ||
559 | *assigned_irq) | ||
560 | { | ||
561 | int r = -ENODEV; | ||
562 | struct kvm_assigned_dev_kernel *match; | ||
563 | |||
564 | mutex_lock(&kvm->lock); | ||
565 | |||
566 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
567 | assigned_irq->assigned_dev_id); | ||
568 | if (!match) | ||
569 | goto out; | ||
570 | |||
571 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
572 | out: | ||
573 | mutex_unlock(&kvm->lock); | ||
574 | return r; | ||
575 | } | ||
576 | |||
577 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
578 | struct kvm_assigned_pci_dev *assigned_dev) | ||
579 | { | ||
580 | int r = 0; | ||
581 | struct kvm_assigned_dev_kernel *match; | ||
582 | struct pci_dev *dev; | ||
583 | |||
584 | down_read(&kvm->slots_lock); | ||
585 | mutex_lock(&kvm->lock); | ||
586 | |||
587 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
588 | assigned_dev->assigned_dev_id); | ||
589 | if (match) { | ||
590 | /* device already assigned */ | ||
591 | r = -EEXIST; | ||
592 | goto out; | ||
593 | } | ||
594 | |||
595 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
596 | if (match == NULL) { | ||
597 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
598 | __func__); | ||
599 | r = -ENOMEM; | ||
600 | goto out; | ||
601 | } | ||
602 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
603 | assigned_dev->devfn); | ||
604 | if (!dev) { | ||
605 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
606 | r = -EINVAL; | ||
607 | goto out_free; | ||
608 | } | ||
609 | if (pci_enable_device(dev)) { | ||
610 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
611 | r = -EBUSY; | ||
612 | goto out_put; | ||
613 | } | ||
614 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
615 | if (r) { | ||
616 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
617 | __func__); | ||
618 | goto out_disable; | ||
619 | } | ||
620 | |||
621 | pci_reset_function(dev); | ||
622 | |||
623 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
624 | match->host_busnr = assigned_dev->busnr; | ||
625 | match->host_devfn = assigned_dev->devfn; | ||
626 | match->flags = assigned_dev->flags; | ||
627 | match->dev = dev; | ||
628 | spin_lock_init(&match->assigned_dev_lock); | ||
629 | match->irq_source_id = -1; | ||
630 | match->kvm = kvm; | ||
631 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
632 | INIT_WORK(&match->interrupt_work, | ||
633 | kvm_assigned_dev_interrupt_work_handler); | ||
634 | |||
635 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
636 | |||
637 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
638 | if (!kvm->arch.iommu_domain) { | ||
639 | r = kvm_iommu_map_guest(kvm); | ||
640 | if (r) | ||
641 | goto out_list_del; | ||
642 | } | ||
643 | r = kvm_assign_device(kvm, match); | ||
644 | if (r) | ||
645 | goto out_list_del; | ||
646 | } | ||
647 | |||
648 | out: | ||
649 | mutex_unlock(&kvm->lock); | ||
650 | up_read(&kvm->slots_lock); | ||
651 | return r; | ||
652 | out_list_del: | ||
653 | list_del(&match->list); | ||
654 | pci_release_regions(dev); | ||
655 | out_disable: | ||
656 | pci_disable_device(dev); | ||
657 | out_put: | ||
658 | pci_dev_put(dev); | ||
659 | out_free: | ||
660 | kfree(match); | ||
661 | mutex_unlock(&kvm->lock); | ||
662 | up_read(&kvm->slots_lock); | ||
663 | return r; | ||
664 | } | ||
665 | #endif | ||
666 | |||
667 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
668 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
669 | struct kvm_assigned_pci_dev *assigned_dev) | ||
670 | { | ||
671 | int r = 0; | ||
672 | struct kvm_assigned_dev_kernel *match; | ||
673 | |||
674 | mutex_lock(&kvm->lock); | ||
675 | |||
676 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
677 | assigned_dev->assigned_dev_id); | ||
678 | if (!match) { | ||
679 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
680 | "so cannot be deassigned\n", __func__); | ||
681 | r = -EINVAL; | ||
682 | goto out; | ||
683 | } | ||
684 | |||
685 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
686 | kvm_deassign_device(kvm, match); | ||
687 | |||
688 | kvm_free_assigned_device(kvm, match); | ||
689 | |||
690 | out: | ||
691 | mutex_unlock(&kvm->lock); | ||
692 | return r; | ||
693 | } | ||
694 | #endif | ||
695 | |||
696 | static inline int valid_vcpu(int n) | ||
697 | { | ||
698 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | ||
699 | } | ||
700 | 92 | ||
701 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 93 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
702 | { | 94 | { |
@@ -742,15 +134,11 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
742 | bool called = true; | 134 | bool called = true; |
743 | struct kvm_vcpu *vcpu; | 135 | struct kvm_vcpu *vcpu; |
744 | 136 | ||
745 | if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) | 137 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); |
746 | cpumask_clear(cpus); | ||
747 | 138 | ||
748 | spin_lock(&kvm->requests_lock); | 139 | spin_lock(&kvm->requests_lock); |
749 | me = get_cpu(); | 140 | me = raw_smp_processor_id(); |
750 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 141 | kvm_for_each_vcpu(i, vcpu, kvm) { |
751 | vcpu = kvm->vcpus[i]; | ||
752 | if (!vcpu) | ||
753 | continue; | ||
754 | if (test_and_set_bit(req, &vcpu->requests)) | 142 | if (test_and_set_bit(req, &vcpu->requests)) |
755 | continue; | 143 | continue; |
756 | cpu = vcpu->cpu; | 144 | cpu = vcpu->cpu; |
@@ -763,7 +151,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
763 | smp_call_function_many(cpus, ack_flush, NULL, 1); | 151 | smp_call_function_many(cpus, ack_flush, NULL, 1); |
764 | else | 152 | else |
765 | called = false; | 153 | called = false; |
766 | put_cpu(); | ||
767 | spin_unlock(&kvm->requests_lock); | 154 | spin_unlock(&kvm->requests_lock); |
768 | free_cpumask_var(cpus); | 155 | free_cpumask_var(cpus); |
769 | return called; | 156 | return called; |
@@ -859,6 +246,19 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
859 | 246 | ||
860 | } | 247 | } |
861 | 248 | ||
249 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | ||
250 | struct mm_struct *mm, | ||
251 | unsigned long address, | ||
252 | pte_t pte) | ||
253 | { | ||
254 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
255 | |||
256 | spin_lock(&kvm->mmu_lock); | ||
257 | kvm->mmu_notifier_seq++; | ||
258 | kvm_set_spte_hva(kvm, address, pte); | ||
259 | spin_unlock(&kvm->mmu_lock); | ||
260 | } | ||
261 | |||
862 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | 262 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, |
863 | struct mm_struct *mm, | 263 | struct mm_struct *mm, |
864 | unsigned long start, | 264 | unsigned long start, |
@@ -938,12 +338,14 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
938 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | 338 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, |
939 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | 339 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, |
940 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | 340 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, |
341 | .change_pte = kvm_mmu_notifier_change_pte, | ||
941 | .release = kvm_mmu_notifier_release, | 342 | .release = kvm_mmu_notifier_release, |
942 | }; | 343 | }; |
943 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 344 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
944 | 345 | ||
945 | static struct kvm *kvm_create_vm(void) | 346 | static struct kvm *kvm_create_vm(void) |
946 | { | 347 | { |
348 | int r = 0; | ||
947 | struct kvm *kvm = kvm_arch_create_vm(); | 349 | struct kvm *kvm = kvm_arch_create_vm(); |
948 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 350 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
949 | struct page *page; | 351 | struct page *page; |
@@ -951,16 +353,21 @@ static struct kvm *kvm_create_vm(void) | |||
951 | 353 | ||
952 | if (IS_ERR(kvm)) | 354 | if (IS_ERR(kvm)) |
953 | goto out; | 355 | goto out; |
356 | |||
357 | r = hardware_enable_all(); | ||
358 | if (r) | ||
359 | goto out_err_nodisable; | ||
360 | |||
954 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 361 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
955 | INIT_LIST_HEAD(&kvm->irq_routing); | ||
956 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); | 362 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); |
363 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | ||
957 | #endif | 364 | #endif |
958 | 365 | ||
959 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 366 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
960 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 367 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
961 | if (!page) { | 368 | if (!page) { |
962 | kfree(kvm); | 369 | r = -ENOMEM; |
963 | return ERR_PTR(-ENOMEM); | 370 | goto out_err; |
964 | } | 371 | } |
965 | kvm->coalesced_mmio_ring = | 372 | kvm->coalesced_mmio_ring = |
966 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 373 | (struct kvm_coalesced_mmio_ring *)page_address(page); |
@@ -968,15 +375,13 @@ static struct kvm *kvm_create_vm(void) | |||
968 | 375 | ||
969 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 376 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
970 | { | 377 | { |
971 | int err; | ||
972 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | 378 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; |
973 | err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | 379 | r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); |
974 | if (err) { | 380 | if (r) { |
975 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 381 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
976 | put_page(page); | 382 | put_page(page); |
977 | #endif | 383 | #endif |
978 | kfree(kvm); | 384 | goto out_err; |
979 | return ERR_PTR(err); | ||
980 | } | 385 | } |
981 | } | 386 | } |
982 | #endif | 387 | #endif |
@@ -986,7 +391,9 @@ static struct kvm *kvm_create_vm(void) | |||
986 | spin_lock_init(&kvm->mmu_lock); | 391 | spin_lock_init(&kvm->mmu_lock); |
987 | spin_lock_init(&kvm->requests_lock); | 392 | spin_lock_init(&kvm->requests_lock); |
988 | kvm_io_bus_init(&kvm->pio_bus); | 393 | kvm_io_bus_init(&kvm->pio_bus); |
394 | kvm_eventfd_init(kvm); | ||
989 | mutex_init(&kvm->lock); | 395 | mutex_init(&kvm->lock); |
396 | mutex_init(&kvm->irq_lock); | ||
990 | kvm_io_bus_init(&kvm->mmio_bus); | 397 | kvm_io_bus_init(&kvm->mmio_bus); |
991 | init_rwsem(&kvm->slots_lock); | 398 | init_rwsem(&kvm->slots_lock); |
992 | atomic_set(&kvm->users_count, 1); | 399 | atomic_set(&kvm->users_count, 1); |
@@ -998,6 +405,15 @@ static struct kvm *kvm_create_vm(void) | |||
998 | #endif | 405 | #endif |
999 | out: | 406 | out: |
1000 | return kvm; | 407 | return kvm; |
408 | |||
409 | #if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \ | ||
410 | (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) | ||
411 | out_err: | ||
412 | hardware_disable_all(); | ||
413 | #endif | ||
414 | out_err_nodisable: | ||
415 | kfree(kvm); | ||
416 | return ERR_PTR(r); | ||
1001 | } | 417 | } |
1002 | 418 | ||
1003 | /* | 419 | /* |
@@ -1006,19 +422,25 @@ out: | |||
1006 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | 422 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, |
1007 | struct kvm_memory_slot *dont) | 423 | struct kvm_memory_slot *dont) |
1008 | { | 424 | { |
425 | int i; | ||
426 | |||
1009 | if (!dont || free->rmap != dont->rmap) | 427 | if (!dont || free->rmap != dont->rmap) |
1010 | vfree(free->rmap); | 428 | vfree(free->rmap); |
1011 | 429 | ||
1012 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 430 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
1013 | vfree(free->dirty_bitmap); | 431 | vfree(free->dirty_bitmap); |
1014 | 432 | ||
1015 | if (!dont || free->lpage_info != dont->lpage_info) | 433 | |
1016 | vfree(free->lpage_info); | 434 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
435 | if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { | ||
436 | vfree(free->lpage_info[i]); | ||
437 | free->lpage_info[i] = NULL; | ||
438 | } | ||
439 | } | ||
1017 | 440 | ||
1018 | free->npages = 0; | 441 | free->npages = 0; |
1019 | free->dirty_bitmap = NULL; | 442 | free->dirty_bitmap = NULL; |
1020 | free->rmap = NULL; | 443 | free->rmap = NULL; |
1021 | free->lpage_info = NULL; | ||
1022 | } | 444 | } |
1023 | 445 | ||
1024 | void kvm_free_physmem(struct kvm *kvm) | 446 | void kvm_free_physmem(struct kvm *kvm) |
@@ -1050,6 +472,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
1050 | kvm_arch_flush_shadow(kvm); | 472 | kvm_arch_flush_shadow(kvm); |
1051 | #endif | 473 | #endif |
1052 | kvm_arch_destroy_vm(kvm); | 474 | kvm_arch_destroy_vm(kvm); |
475 | hardware_disable_all(); | ||
1053 | mmdrop(mm); | 476 | mmdrop(mm); |
1054 | } | 477 | } |
1055 | 478 | ||
@@ -1071,6 +494,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
1071 | { | 494 | { |
1072 | struct kvm *kvm = filp->private_data; | 495 | struct kvm *kvm = filp->private_data; |
1073 | 496 | ||
497 | kvm_irqfd_release(kvm); | ||
498 | |||
1074 | kvm_put_kvm(kvm); | 499 | kvm_put_kvm(kvm); |
1075 | return 0; | 500 | return 0; |
1076 | } | 501 | } |
@@ -1089,8 +514,8 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1089 | { | 514 | { |
1090 | int r; | 515 | int r; |
1091 | gfn_t base_gfn; | 516 | gfn_t base_gfn; |
1092 | unsigned long npages, ugfn; | 517 | unsigned long npages; |
1093 | unsigned long largepages, i; | 518 | unsigned long i; |
1094 | struct kvm_memory_slot *memslot; | 519 | struct kvm_memory_slot *memslot; |
1095 | struct kvm_memory_slot old, new; | 520 | struct kvm_memory_slot old, new; |
1096 | 521 | ||
@@ -1164,31 +589,51 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1164 | else | 589 | else |
1165 | new.userspace_addr = 0; | 590 | new.userspace_addr = 0; |
1166 | } | 591 | } |
1167 | if (npages && !new.lpage_info) { | 592 | if (!npages) |
1168 | largepages = 1 + (base_gfn + npages - 1) / KVM_PAGES_PER_HPAGE; | 593 | goto skip_lpage; |
1169 | largepages -= base_gfn / KVM_PAGES_PER_HPAGE; | 594 | |
595 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
596 | unsigned long ugfn; | ||
597 | unsigned long j; | ||
598 | int lpages; | ||
599 | int level = i + 2; | ||
1170 | 600 | ||
1171 | new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); | 601 | /* Avoid unused variable warning if no large pages */ |
602 | (void)level; | ||
1172 | 603 | ||
1173 | if (!new.lpage_info) | 604 | if (new.lpage_info[i]) |
605 | continue; | ||
606 | |||
607 | lpages = 1 + (base_gfn + npages - 1) / | ||
608 | KVM_PAGES_PER_HPAGE(level); | ||
609 | lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); | ||
610 | |||
611 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); | ||
612 | |||
613 | if (!new.lpage_info[i]) | ||
1174 | goto out_free; | 614 | goto out_free; |
1175 | 615 | ||
1176 | memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); | 616 | memset(new.lpage_info[i], 0, |
617 | lpages * sizeof(*new.lpage_info[i])); | ||
1177 | 618 | ||
1178 | if (base_gfn % KVM_PAGES_PER_HPAGE) | 619 | if (base_gfn % KVM_PAGES_PER_HPAGE(level)) |
1179 | new.lpage_info[0].write_count = 1; | 620 | new.lpage_info[i][0].write_count = 1; |
1180 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) | 621 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) |
1181 | new.lpage_info[largepages-1].write_count = 1; | 622 | new.lpage_info[i][lpages - 1].write_count = 1; |
1182 | ugfn = new.userspace_addr >> PAGE_SHIFT; | 623 | ugfn = new.userspace_addr >> PAGE_SHIFT; |
1183 | /* | 624 | /* |
1184 | * If the gfn and userspace address are not aligned wrt each | 625 | * If the gfn and userspace address are not aligned wrt each |
1185 | * other, disable large page support for this slot | 626 | * other, or if explicitly asked to, disable large page |
627 | * support for this slot | ||
1186 | */ | 628 | */ |
1187 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1)) | 629 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || |
1188 | for (i = 0; i < largepages; ++i) | 630 | !largepages_enabled) |
1189 | new.lpage_info[i].write_count = 1; | 631 | for (j = 0; j < lpages; ++j) |
632 | new.lpage_info[i][j].write_count = 1; | ||
1190 | } | 633 | } |
1191 | 634 | ||
635 | skip_lpage: | ||
636 | |||
1192 | /* Allocate page dirty bitmap if needed */ | 637 | /* Allocate page dirty bitmap if needed */ |
1193 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 638 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
1194 | unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; | 639 | unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; |
@@ -1200,6 +645,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1200 | if (old.npages) | 645 | if (old.npages) |
1201 | kvm_arch_flush_shadow(kvm); | 646 | kvm_arch_flush_shadow(kvm); |
1202 | } | 647 | } |
648 | #else /* not defined CONFIG_S390 */ | ||
649 | new.user_alloc = user_alloc; | ||
650 | if (user_alloc) | ||
651 | new.userspace_addr = mem->userspace_addr; | ||
1203 | #endif /* not defined CONFIG_S390 */ | 652 | #endif /* not defined CONFIG_S390 */ |
1204 | 653 | ||
1205 | if (!npages) | 654 | if (!npages) |
@@ -1299,6 +748,12 @@ out: | |||
1299 | return r; | 748 | return r; |
1300 | } | 749 | } |
1301 | 750 | ||
751 | void kvm_disable_largepages(void) | ||
752 | { | ||
753 | largepages_enabled = false; | ||
754 | } | ||
755 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); | ||
756 | |||
1302 | int is_error_page(struct page *page) | 757 | int is_error_page(struct page *page) |
1303 | { | 758 | { |
1304 | return page == bad_page; | 759 | return page == bad_page; |
@@ -1620,8 +1075,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
1620 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1075 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
1621 | 1076 | ||
1622 | /* avoid RMW */ | 1077 | /* avoid RMW */ |
1623 | if (!test_bit(rel_gfn, memslot->dirty_bitmap)) | 1078 | if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) |
1624 | set_bit(rel_gfn, memslot->dirty_bitmap); | 1079 | generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); |
1625 | } | 1080 | } |
1626 | } | 1081 | } |
1627 | 1082 | ||
@@ -1635,9 +1090,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1635 | for (;;) { | 1090 | for (;;) { |
1636 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1091 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
1637 | 1092 | ||
1638 | if ((kvm_arch_interrupt_allowed(vcpu) && | 1093 | if (kvm_arch_vcpu_runnable(vcpu)) { |
1639 | kvm_cpu_has_interrupt(vcpu)) || | ||
1640 | kvm_arch_vcpu_runnable(vcpu)) { | ||
1641 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); | 1094 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); |
1642 | break; | 1095 | break; |
1643 | } | 1096 | } |
@@ -1646,9 +1099,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1646 | if (signal_pending(current)) | 1099 | if (signal_pending(current)) |
1647 | break; | 1100 | break; |
1648 | 1101 | ||
1649 | vcpu_put(vcpu); | ||
1650 | schedule(); | 1102 | schedule(); |
1651 | vcpu_load(vcpu); | ||
1652 | } | 1103 | } |
1653 | 1104 | ||
1654 | finish_wait(&vcpu->wq, &wait); | 1105 | finish_wait(&vcpu->wq, &wait); |
@@ -1662,6 +1113,21 @@ void kvm_resched(struct kvm_vcpu *vcpu) | |||
1662 | } | 1113 | } |
1663 | EXPORT_SYMBOL_GPL(kvm_resched); | 1114 | EXPORT_SYMBOL_GPL(kvm_resched); |
1664 | 1115 | ||
1116 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) | ||
1117 | { | ||
1118 | ktime_t expires; | ||
1119 | DEFINE_WAIT(wait); | ||
1120 | |||
1121 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | ||
1122 | |||
1123 | /* Sleep for 100 us, and hope lock-holder got scheduled */ | ||
1124 | expires = ktime_add_ns(ktime_get(), 100000UL); | ||
1125 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1126 | |||
1127 | finish_wait(&vcpu->wq, &wait); | ||
1128 | } | ||
1129 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | ||
1130 | |||
1665 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1131 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1666 | { | 1132 | { |
1667 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; | 1133 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; |
@@ -1684,7 +1150,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1684 | return 0; | 1150 | return 0; |
1685 | } | 1151 | } |
1686 | 1152 | ||
1687 | static struct vm_operations_struct kvm_vcpu_vm_ops = { | 1153 | static const struct vm_operations_struct kvm_vcpu_vm_ops = { |
1688 | .fault = kvm_vcpu_fault, | 1154 | .fault = kvm_vcpu_fault, |
1689 | }; | 1155 | }; |
1690 | 1156 | ||
@@ -1714,24 +1180,18 @@ static struct file_operations kvm_vcpu_fops = { | |||
1714 | */ | 1180 | */ |
1715 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) | 1181 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) |
1716 | { | 1182 | { |
1717 | int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); | 1183 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); |
1718 | if (fd < 0) | ||
1719 | kvm_put_kvm(vcpu->kvm); | ||
1720 | return fd; | ||
1721 | } | 1184 | } |
1722 | 1185 | ||
1723 | /* | 1186 | /* |
1724 | * Creates some virtual cpus. Good luck creating more than one. | 1187 | * Creates some virtual cpus. Good luck creating more than one. |
1725 | */ | 1188 | */ |
1726 | static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | 1189 | static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) |
1727 | { | 1190 | { |
1728 | int r; | 1191 | int r; |
1729 | struct kvm_vcpu *vcpu; | 1192 | struct kvm_vcpu *vcpu, *v; |
1730 | 1193 | ||
1731 | if (!valid_vcpu(n)) | 1194 | vcpu = kvm_arch_vcpu_create(kvm, id); |
1732 | return -EINVAL; | ||
1733 | |||
1734 | vcpu = kvm_arch_vcpu_create(kvm, n); | ||
1735 | if (IS_ERR(vcpu)) | 1195 | if (IS_ERR(vcpu)) |
1736 | return PTR_ERR(vcpu); | 1196 | return PTR_ERR(vcpu); |
1737 | 1197 | ||
@@ -1742,23 +1202,38 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
1742 | return r; | 1202 | return r; |
1743 | 1203 | ||
1744 | mutex_lock(&kvm->lock); | 1204 | mutex_lock(&kvm->lock); |
1745 | if (kvm->vcpus[n]) { | 1205 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { |
1746 | r = -EEXIST; | 1206 | r = -EINVAL; |
1747 | goto vcpu_destroy; | 1207 | goto vcpu_destroy; |
1748 | } | 1208 | } |
1749 | kvm->vcpus[n] = vcpu; | 1209 | |
1750 | mutex_unlock(&kvm->lock); | 1210 | kvm_for_each_vcpu(r, v, kvm) |
1211 | if (v->vcpu_id == id) { | ||
1212 | r = -EEXIST; | ||
1213 | goto vcpu_destroy; | ||
1214 | } | ||
1215 | |||
1216 | BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); | ||
1751 | 1217 | ||
1752 | /* Now it's all set up, let userspace reach it */ | 1218 | /* Now it's all set up, let userspace reach it */ |
1753 | kvm_get_kvm(kvm); | 1219 | kvm_get_kvm(kvm); |
1754 | r = create_vcpu_fd(vcpu); | 1220 | r = create_vcpu_fd(vcpu); |
1755 | if (r < 0) | 1221 | if (r < 0) { |
1756 | goto unlink; | 1222 | kvm_put_kvm(kvm); |
1223 | goto vcpu_destroy; | ||
1224 | } | ||
1225 | |||
1226 | kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; | ||
1227 | smp_wmb(); | ||
1228 | atomic_inc(&kvm->online_vcpus); | ||
1229 | |||
1230 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
1231 | if (kvm->bsp_vcpu_id == id) | ||
1232 | kvm->bsp_vcpu = vcpu; | ||
1233 | #endif | ||
1234 | mutex_unlock(&kvm->lock); | ||
1757 | return r; | 1235 | return r; |
1758 | 1236 | ||
1759 | unlink: | ||
1760 | mutex_lock(&kvm->lock); | ||
1761 | kvm->vcpus[n] = NULL; | ||
1762 | vcpu_destroy: | 1237 | vcpu_destroy: |
1763 | mutex_unlock(&kvm->lock); | 1238 | mutex_unlock(&kvm->lock); |
1764 | kvm_arch_vcpu_destroy(vcpu); | 1239 | kvm_arch_vcpu_destroy(vcpu); |
@@ -1776,88 +1251,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | |||
1776 | return 0; | 1251 | return 0; |
1777 | } | 1252 | } |
1778 | 1253 | ||
1779 | #ifdef __KVM_HAVE_MSIX | ||
1780 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
1781 | struct kvm_assigned_msix_nr *entry_nr) | ||
1782 | { | ||
1783 | int r = 0; | ||
1784 | struct kvm_assigned_dev_kernel *adev; | ||
1785 | |||
1786 | mutex_lock(&kvm->lock); | ||
1787 | |||
1788 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
1789 | entry_nr->assigned_dev_id); | ||
1790 | if (!adev) { | ||
1791 | r = -EINVAL; | ||
1792 | goto msix_nr_out; | ||
1793 | } | ||
1794 | |||
1795 | if (adev->entries_nr == 0) { | ||
1796 | adev->entries_nr = entry_nr->entry_nr; | ||
1797 | if (adev->entries_nr == 0 || | ||
1798 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
1799 | r = -EINVAL; | ||
1800 | goto msix_nr_out; | ||
1801 | } | ||
1802 | |||
1803 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
1804 | entry_nr->entry_nr, | ||
1805 | GFP_KERNEL); | ||
1806 | if (!adev->host_msix_entries) { | ||
1807 | r = -ENOMEM; | ||
1808 | goto msix_nr_out; | ||
1809 | } | ||
1810 | adev->guest_msix_entries = kzalloc( | ||
1811 | sizeof(struct kvm_guest_msix_entry) * | ||
1812 | entry_nr->entry_nr, GFP_KERNEL); | ||
1813 | if (!adev->guest_msix_entries) { | ||
1814 | kfree(adev->host_msix_entries); | ||
1815 | r = -ENOMEM; | ||
1816 | goto msix_nr_out; | ||
1817 | } | ||
1818 | } else /* Not allowed set MSI-X number twice */ | ||
1819 | r = -EINVAL; | ||
1820 | msix_nr_out: | ||
1821 | mutex_unlock(&kvm->lock); | ||
1822 | return r; | ||
1823 | } | ||
1824 | |||
1825 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
1826 | struct kvm_assigned_msix_entry *entry) | ||
1827 | { | ||
1828 | int r = 0, i; | ||
1829 | struct kvm_assigned_dev_kernel *adev; | ||
1830 | |||
1831 | mutex_lock(&kvm->lock); | ||
1832 | |||
1833 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
1834 | entry->assigned_dev_id); | ||
1835 | |||
1836 | if (!adev) { | ||
1837 | r = -EINVAL; | ||
1838 | goto msix_entry_out; | ||
1839 | } | ||
1840 | |||
1841 | for (i = 0; i < adev->entries_nr; i++) | ||
1842 | if (adev->guest_msix_entries[i].vector == 0 || | ||
1843 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
1844 | adev->guest_msix_entries[i].entry = entry->entry; | ||
1845 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
1846 | adev->host_msix_entries[i].entry = entry->entry; | ||
1847 | break; | ||
1848 | } | ||
1849 | if (i == adev->entries_nr) { | ||
1850 | r = -ENOSPC; | ||
1851 | goto msix_entry_out; | ||
1852 | } | ||
1853 | |||
1854 | msix_entry_out: | ||
1855 | mutex_unlock(&kvm->lock); | ||
1856 | |||
1857 | return r; | ||
1858 | } | ||
1859 | #endif | ||
1860 | |||
1861 | static long kvm_vcpu_ioctl(struct file *filp, | 1254 | static long kvm_vcpu_ioctl(struct file *filp, |
1862 | unsigned int ioctl, unsigned long arg) | 1255 | unsigned int ioctl, unsigned long arg) |
1863 | { | 1256 | { |
@@ -2116,118 +1509,89 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2116 | break; | 1509 | break; |
2117 | } | 1510 | } |
2118 | #endif | 1511 | #endif |
2119 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | 1512 | case KVM_IRQFD: { |
2120 | case KVM_ASSIGN_PCI_DEVICE: { | 1513 | struct kvm_irqfd data; |
2121 | struct kvm_assigned_pci_dev assigned_dev; | ||
2122 | 1514 | ||
2123 | r = -EFAULT; | 1515 | r = -EFAULT; |
2124 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | 1516 | if (copy_from_user(&data, argp, sizeof data)) |
2125 | goto out; | ||
2126 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
2127 | if (r) | ||
2128 | goto out; | 1517 | goto out; |
1518 | r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); | ||
2129 | break; | 1519 | break; |
2130 | } | 1520 | } |
2131 | case KVM_ASSIGN_IRQ: { | 1521 | case KVM_IOEVENTFD: { |
2132 | r = -EOPNOTSUPP; | 1522 | struct kvm_ioeventfd data; |
2133 | break; | ||
2134 | } | ||
2135 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
2136 | case KVM_ASSIGN_DEV_IRQ: { | ||
2137 | struct kvm_assigned_irq assigned_irq; | ||
2138 | 1523 | ||
2139 | r = -EFAULT; | 1524 | r = -EFAULT; |
2140 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | 1525 | if (copy_from_user(&data, argp, sizeof data)) |
2141 | goto out; | ||
2142 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
2143 | if (r) | ||
2144 | goto out; | 1526 | goto out; |
1527 | r = kvm_ioeventfd(kvm, &data); | ||
2145 | break; | 1528 | break; |
2146 | } | 1529 | } |
2147 | case KVM_DEASSIGN_DEV_IRQ: { | 1530 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
2148 | struct kvm_assigned_irq assigned_irq; | 1531 | case KVM_SET_BOOT_CPU_ID: |
2149 | 1532 | r = 0; | |
2150 | r = -EFAULT; | 1533 | mutex_lock(&kvm->lock); |
2151 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | 1534 | if (atomic_read(&kvm->online_vcpus) != 0) |
2152 | goto out; | 1535 | r = -EBUSY; |
2153 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | 1536 | else |
2154 | if (r) | 1537 | kvm->bsp_vcpu_id = arg; |
2155 | goto out; | 1538 | mutex_unlock(&kvm->lock); |
2156 | break; | 1539 | break; |
2157 | } | ||
2158 | #endif | ||
2159 | #endif | 1540 | #endif |
2160 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | 1541 | default: |
2161 | case KVM_DEASSIGN_PCI_DEVICE: { | 1542 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
2162 | struct kvm_assigned_pci_dev assigned_dev; | 1543 | if (r == -ENOTTY) |
2163 | 1544 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); | |
2164 | r = -EFAULT; | ||
2165 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
2166 | goto out; | ||
2167 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
2168 | if (r) | ||
2169 | goto out; | ||
2170 | break; | ||
2171 | } | 1545 | } |
2172 | #endif | 1546 | out: |
2173 | #ifdef KVM_CAP_IRQ_ROUTING | 1547 | return r; |
2174 | case KVM_SET_GSI_ROUTING: { | 1548 | } |
2175 | struct kvm_irq_routing routing; | 1549 | |
2176 | struct kvm_irq_routing __user *urouting; | 1550 | #ifdef CONFIG_COMPAT |
2177 | struct kvm_irq_routing_entry *entries; | 1551 | struct compat_kvm_dirty_log { |
1552 | __u32 slot; | ||
1553 | __u32 padding1; | ||
1554 | union { | ||
1555 | compat_uptr_t dirty_bitmap; /* one bit per page */ | ||
1556 | __u64 padding2; | ||
1557 | }; | ||
1558 | }; | ||
1559 | |||
1560 | static long kvm_vm_compat_ioctl(struct file *filp, | ||
1561 | unsigned int ioctl, unsigned long arg) | ||
1562 | { | ||
1563 | struct kvm *kvm = filp->private_data; | ||
1564 | int r; | ||
1565 | |||
1566 | if (kvm->mm != current->mm) | ||
1567 | return -EIO; | ||
1568 | switch (ioctl) { | ||
1569 | case KVM_GET_DIRTY_LOG: { | ||
1570 | struct compat_kvm_dirty_log compat_log; | ||
1571 | struct kvm_dirty_log log; | ||
2178 | 1572 | ||
2179 | r = -EFAULT; | 1573 | r = -EFAULT; |
2180 | if (copy_from_user(&routing, argp, sizeof(routing))) | 1574 | if (copy_from_user(&compat_log, (void __user *)arg, |
2181 | goto out; | 1575 | sizeof(compat_log))) |
2182 | r = -EINVAL; | ||
2183 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
2184 | goto out; | ||
2185 | if (routing.flags) | ||
2186 | goto out; | ||
2187 | r = -ENOMEM; | ||
2188 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
2189 | if (!entries) | ||
2190 | goto out; | ||
2191 | r = -EFAULT; | ||
2192 | urouting = argp; | ||
2193 | if (copy_from_user(entries, urouting->entries, | ||
2194 | routing.nr * sizeof(*entries))) | ||
2195 | goto out_free_irq_routing; | ||
2196 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
2197 | routing.flags); | ||
2198 | out_free_irq_routing: | ||
2199 | vfree(entries); | ||
2200 | break; | ||
2201 | } | ||
2202 | #ifdef __KVM_HAVE_MSIX | ||
2203 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
2204 | struct kvm_assigned_msix_nr entry_nr; | ||
2205 | r = -EFAULT; | ||
2206 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
2207 | goto out; | ||
2208 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
2209 | if (r) | ||
2210 | goto out; | ||
2211 | break; | ||
2212 | } | ||
2213 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
2214 | struct kvm_assigned_msix_entry entry; | ||
2215 | r = -EFAULT; | ||
2216 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
2217 | goto out; | 1576 | goto out; |
2218 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | 1577 | log.slot = compat_log.slot; |
1578 | log.padding1 = compat_log.padding1; | ||
1579 | log.padding2 = compat_log.padding2; | ||
1580 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); | ||
1581 | |||
1582 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | ||
2219 | if (r) | 1583 | if (r) |
2220 | goto out; | 1584 | goto out; |
2221 | break; | 1585 | break; |
2222 | } | 1586 | } |
2223 | #endif | ||
2224 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
2225 | default: | 1587 | default: |
2226 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1588 | r = kvm_vm_ioctl(filp, ioctl, arg); |
2227 | } | 1589 | } |
1590 | |||
2228 | out: | 1591 | out: |
2229 | return r; | 1592 | return r; |
2230 | } | 1593 | } |
1594 | #endif | ||
2231 | 1595 | ||
2232 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1596 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
2233 | { | 1597 | { |
@@ -2250,7 +1614,7 @@ static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
2250 | return 0; | 1614 | return 0; |
2251 | } | 1615 | } |
2252 | 1616 | ||
2253 | static struct vm_operations_struct kvm_vm_vm_ops = { | 1617 | static const struct vm_operations_struct kvm_vm_vm_ops = { |
2254 | .fault = kvm_vm_fault, | 1618 | .fault = kvm_vm_fault, |
2255 | }; | 1619 | }; |
2256 | 1620 | ||
@@ -2263,7 +1627,9 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
2263 | static struct file_operations kvm_vm_fops = { | 1627 | static struct file_operations kvm_vm_fops = { |
2264 | .release = kvm_vm_release, | 1628 | .release = kvm_vm_release, |
2265 | .unlocked_ioctl = kvm_vm_ioctl, | 1629 | .unlocked_ioctl = kvm_vm_ioctl, |
2266 | .compat_ioctl = kvm_vm_ioctl, | 1630 | #ifdef CONFIG_COMPAT |
1631 | .compat_ioctl = kvm_vm_compat_ioctl, | ||
1632 | #endif | ||
2267 | .mmap = kvm_vm_mmap, | 1633 | .mmap = kvm_vm_mmap, |
2268 | }; | 1634 | }; |
2269 | 1635 | ||
@@ -2275,7 +1641,7 @@ static int kvm_dev_ioctl_create_vm(void) | |||
2275 | kvm = kvm_create_vm(); | 1641 | kvm = kvm_create_vm(); |
2276 | if (IS_ERR(kvm)) | 1642 | if (IS_ERR(kvm)) |
2277 | return PTR_ERR(kvm); | 1643 | return PTR_ERR(kvm); |
2278 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0); | 1644 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); |
2279 | if (fd < 0) | 1645 | if (fd < 0) |
2280 | kvm_put_kvm(kvm); | 1646 | kvm_put_kvm(kvm); |
2281 | 1647 | ||
@@ -2288,6 +1654,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) | |||
2288 | case KVM_CAP_USER_MEMORY: | 1654 | case KVM_CAP_USER_MEMORY: |
2289 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: | 1655 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: |
2290 | case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: | 1656 | case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: |
1657 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
1658 | case KVM_CAP_SET_BOOT_CPU_ID: | ||
1659 | #endif | ||
1660 | case KVM_CAP_INTERNAL_ERROR_DATA: | ||
2291 | return 1; | 1661 | return 1; |
2292 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 1662 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
2293 | case KVM_CAP_IRQ_ROUTING: | 1663 | case KVM_CAP_IRQ_ROUTING: |
@@ -2335,7 +1705,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2335 | case KVM_TRACE_ENABLE: | 1705 | case KVM_TRACE_ENABLE: |
2336 | case KVM_TRACE_PAUSE: | 1706 | case KVM_TRACE_PAUSE: |
2337 | case KVM_TRACE_DISABLE: | 1707 | case KVM_TRACE_DISABLE: |
2338 | r = kvm_trace_ioctl(ioctl, arg); | 1708 | r = -EOPNOTSUPP; |
2339 | break; | 1709 | break; |
2340 | default: | 1710 | default: |
2341 | return kvm_arch_dev_ioctl(filp, ioctl, arg); | 1711 | return kvm_arch_dev_ioctl(filp, ioctl, arg); |
@@ -2358,11 +1728,21 @@ static struct miscdevice kvm_dev = { | |||
2358 | static void hardware_enable(void *junk) | 1728 | static void hardware_enable(void *junk) |
2359 | { | 1729 | { |
2360 | int cpu = raw_smp_processor_id(); | 1730 | int cpu = raw_smp_processor_id(); |
1731 | int r; | ||
2361 | 1732 | ||
2362 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 1733 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
2363 | return; | 1734 | return; |
1735 | |||
2364 | cpumask_set_cpu(cpu, cpus_hardware_enabled); | 1736 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
2365 | kvm_arch_hardware_enable(NULL); | 1737 | |
1738 | r = kvm_arch_hardware_enable(NULL); | ||
1739 | |||
1740 | if (r) { | ||
1741 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | ||
1742 | atomic_inc(&hardware_enable_failed); | ||
1743 | printk(KERN_INFO "kvm: enabling virtualization on " | ||
1744 | "CPU%d failed\n", cpu); | ||
1745 | } | ||
2366 | } | 1746 | } |
2367 | 1747 | ||
2368 | static void hardware_disable(void *junk) | 1748 | static void hardware_disable(void *junk) |
@@ -2375,11 +1755,52 @@ static void hardware_disable(void *junk) | |||
2375 | kvm_arch_hardware_disable(NULL); | 1755 | kvm_arch_hardware_disable(NULL); |
2376 | } | 1756 | } |
2377 | 1757 | ||
1758 | static void hardware_disable_all_nolock(void) | ||
1759 | { | ||
1760 | BUG_ON(!kvm_usage_count); | ||
1761 | |||
1762 | kvm_usage_count--; | ||
1763 | if (!kvm_usage_count) | ||
1764 | on_each_cpu(hardware_disable, NULL, 1); | ||
1765 | } | ||
1766 | |||
1767 | static void hardware_disable_all(void) | ||
1768 | { | ||
1769 | spin_lock(&kvm_lock); | ||
1770 | hardware_disable_all_nolock(); | ||
1771 | spin_unlock(&kvm_lock); | ||
1772 | } | ||
1773 | |||
1774 | static int hardware_enable_all(void) | ||
1775 | { | ||
1776 | int r = 0; | ||
1777 | |||
1778 | spin_lock(&kvm_lock); | ||
1779 | |||
1780 | kvm_usage_count++; | ||
1781 | if (kvm_usage_count == 1) { | ||
1782 | atomic_set(&hardware_enable_failed, 0); | ||
1783 | on_each_cpu(hardware_enable, NULL, 1); | ||
1784 | |||
1785 | if (atomic_read(&hardware_enable_failed)) { | ||
1786 | hardware_disable_all_nolock(); | ||
1787 | r = -EBUSY; | ||
1788 | } | ||
1789 | } | ||
1790 | |||
1791 | spin_unlock(&kvm_lock); | ||
1792 | |||
1793 | return r; | ||
1794 | } | ||
1795 | |||
2378 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 1796 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
2379 | void *v) | 1797 | void *v) |
2380 | { | 1798 | { |
2381 | int cpu = (long)v; | 1799 | int cpu = (long)v; |
2382 | 1800 | ||
1801 | if (!kvm_usage_count) | ||
1802 | return NOTIFY_OK; | ||
1803 | |||
2383 | val &= ~CPU_TASKS_FROZEN; | 1804 | val &= ~CPU_TASKS_FROZEN; |
2384 | switch (val) { | 1805 | switch (val) { |
2385 | case CPU_DYING: | 1806 | case CPU_DYING: |
@@ -2449,26 +1870,71 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
2449 | } | 1870 | } |
2450 | } | 1871 | } |
2451 | 1872 | ||
2452 | struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, | 1873 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
2453 | gpa_t addr, int len, int is_write) | 1874 | int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, |
1875 | int len, const void *val) | ||
2454 | { | 1876 | { |
2455 | int i; | 1877 | int i; |
1878 | for (i = 0; i < bus->dev_count; i++) | ||
1879 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | ||
1880 | return 0; | ||
1881 | return -EOPNOTSUPP; | ||
1882 | } | ||
2456 | 1883 | ||
2457 | for (i = 0; i < bus->dev_count; i++) { | 1884 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
2458 | struct kvm_io_device *pos = bus->devs[i]; | 1885 | int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) |
1886 | { | ||
1887 | int i; | ||
1888 | for (i = 0; i < bus->dev_count; i++) | ||
1889 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | ||
1890 | return 0; | ||
1891 | return -EOPNOTSUPP; | ||
1892 | } | ||
2459 | 1893 | ||
2460 | if (pos->in_range(pos, addr, len, is_write)) | 1894 | int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, |
2461 | return pos; | 1895 | struct kvm_io_device *dev) |
2462 | } | 1896 | { |
1897 | int ret; | ||
2463 | 1898 | ||
2464 | return NULL; | 1899 | down_write(&kvm->slots_lock); |
1900 | ret = __kvm_io_bus_register_dev(bus, dev); | ||
1901 | up_write(&kvm->slots_lock); | ||
1902 | |||
1903 | return ret; | ||
2465 | } | 1904 | } |
2466 | 1905 | ||
2467 | void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) | 1906 | /* An unlocked version. Caller must have write lock on slots_lock. */ |
1907 | int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, | ||
1908 | struct kvm_io_device *dev) | ||
2468 | { | 1909 | { |
2469 | BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); | 1910 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
1911 | return -ENOSPC; | ||
2470 | 1912 | ||
2471 | bus->devs[bus->dev_count++] = dev; | 1913 | bus->devs[bus->dev_count++] = dev; |
1914 | |||
1915 | return 0; | ||
1916 | } | ||
1917 | |||
1918 | void kvm_io_bus_unregister_dev(struct kvm *kvm, | ||
1919 | struct kvm_io_bus *bus, | ||
1920 | struct kvm_io_device *dev) | ||
1921 | { | ||
1922 | down_write(&kvm->slots_lock); | ||
1923 | __kvm_io_bus_unregister_dev(bus, dev); | ||
1924 | up_write(&kvm->slots_lock); | ||
1925 | } | ||
1926 | |||
1927 | /* An unlocked version. Caller must have write lock on slots_lock. */ | ||
1928 | void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, | ||
1929 | struct kvm_io_device *dev) | ||
1930 | { | ||
1931 | int i; | ||
1932 | |||
1933 | for (i = 0; i < bus->dev_count; i++) | ||
1934 | if (bus->devs[i] == dev) { | ||
1935 | bus->devs[i] = bus->devs[--bus->dev_count]; | ||
1936 | break; | ||
1937 | } | ||
2472 | } | 1938 | } |
2473 | 1939 | ||
2474 | static struct notifier_block kvm_cpu_notifier = { | 1940 | static struct notifier_block kvm_cpu_notifier = { |
@@ -2501,18 +1967,16 @@ static int vcpu_stat_get(void *_offset, u64 *val) | |||
2501 | *val = 0; | 1967 | *val = 0; |
2502 | spin_lock(&kvm_lock); | 1968 | spin_lock(&kvm_lock); |
2503 | list_for_each_entry(kvm, &vm_list, vm_list) | 1969 | list_for_each_entry(kvm, &vm_list, vm_list) |
2504 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 1970 | kvm_for_each_vcpu(i, vcpu, kvm) |
2505 | vcpu = kvm->vcpus[i]; | 1971 | *val += *(u32 *)((void *)vcpu + offset); |
2506 | if (vcpu) | 1972 | |
2507 | *val += *(u32 *)((void *)vcpu + offset); | ||
2508 | } | ||
2509 | spin_unlock(&kvm_lock); | 1973 | spin_unlock(&kvm_lock); |
2510 | return 0; | 1974 | return 0; |
2511 | } | 1975 | } |
2512 | 1976 | ||
2513 | DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); | 1977 | DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); |
2514 | 1978 | ||
2515 | static struct file_operations *stat_fops[] = { | 1979 | static const struct file_operations *stat_fops[] = { |
2516 | [KVM_STAT_VCPU] = &vcpu_stat_fops, | 1980 | [KVM_STAT_VCPU] = &vcpu_stat_fops, |
2517 | [KVM_STAT_VM] = &vm_stat_fops, | 1981 | [KVM_STAT_VM] = &vm_stat_fops, |
2518 | }; | 1982 | }; |
@@ -2539,13 +2003,15 @@ static void kvm_exit_debug(void) | |||
2539 | 2003 | ||
2540 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 2004 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
2541 | { | 2005 | { |
2542 | hardware_disable(NULL); | 2006 | if (kvm_usage_count) |
2007 | hardware_disable(NULL); | ||
2543 | return 0; | 2008 | return 0; |
2544 | } | 2009 | } |
2545 | 2010 | ||
2546 | static int kvm_resume(struct sys_device *dev) | 2011 | static int kvm_resume(struct sys_device *dev) |
2547 | { | 2012 | { |
2548 | hardware_enable(NULL); | 2013 | if (kvm_usage_count) |
2014 | hardware_enable(NULL); | ||
2549 | return 0; | 2015 | return 0; |
2550 | } | 2016 | } |
2551 | 2017 | ||
@@ -2590,8 +2056,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2590 | int r; | 2056 | int r; |
2591 | int cpu; | 2057 | int cpu; |
2592 | 2058 | ||
2593 | kvm_init_debug(); | ||
2594 | |||
2595 | r = kvm_arch_init(opaque); | 2059 | r = kvm_arch_init(opaque); |
2596 | if (r) | 2060 | if (r) |
2597 | goto out_fail; | 2061 | goto out_fail; |
@@ -2622,7 +2086,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2622 | goto out_free_1; | 2086 | goto out_free_1; |
2623 | } | 2087 | } |
2624 | 2088 | ||
2625 | on_each_cpu(hardware_enable, NULL, 1); | ||
2626 | r = register_cpu_notifier(&kvm_cpu_notifier); | 2089 | r = register_cpu_notifier(&kvm_cpu_notifier); |
2627 | if (r) | 2090 | if (r) |
2628 | goto out_free_2; | 2091 | goto out_free_2; |
@@ -2658,6 +2121,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2658 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2121 | kvm_preempt_ops.sched_in = kvm_sched_in; |
2659 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2122 | kvm_preempt_ops.sched_out = kvm_sched_out; |
2660 | 2123 | ||
2124 | kvm_init_debug(); | ||
2125 | |||
2661 | return 0; | 2126 | return 0; |
2662 | 2127 | ||
2663 | out_free: | 2128 | out_free: |
@@ -2670,7 +2135,6 @@ out_free_3: | |||
2670 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2135 | unregister_reboot_notifier(&kvm_reboot_notifier); |
2671 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2136 | unregister_cpu_notifier(&kvm_cpu_notifier); |
2672 | out_free_2: | 2137 | out_free_2: |
2673 | on_each_cpu(hardware_disable, NULL, 1); | ||
2674 | out_free_1: | 2138 | out_free_1: |
2675 | kvm_arch_hardware_unsetup(); | 2139 | kvm_arch_hardware_unsetup(); |
2676 | out_free_0a: | 2140 | out_free_0a: |
@@ -2679,7 +2143,6 @@ out_free_0: | |||
2679 | __free_page(bad_page); | 2143 | __free_page(bad_page); |
2680 | out: | 2144 | out: |
2681 | kvm_arch_exit(); | 2145 | kvm_arch_exit(); |
2682 | kvm_exit_debug(); | ||
2683 | out_fail: | 2146 | out_fail: |
2684 | return r; | 2147 | return r; |
2685 | } | 2148 | } |
@@ -2687,7 +2150,8 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
2687 | 2150 | ||
2688 | void kvm_exit(void) | 2151 | void kvm_exit(void) |
2689 | { | 2152 | { |
2690 | kvm_trace_cleanup(); | 2153 | tracepoint_synchronize_unregister(); |
2154 | kvm_exit_debug(); | ||
2691 | misc_deregister(&kvm_dev); | 2155 | misc_deregister(&kvm_dev); |
2692 | kmem_cache_destroy(kvm_vcpu_cache); | 2156 | kmem_cache_destroy(kvm_vcpu_cache); |
2693 | sysdev_unregister(&kvm_sysdev); | 2157 | sysdev_unregister(&kvm_sysdev); |
@@ -2697,7 +2161,6 @@ void kvm_exit(void) | |||
2697 | on_each_cpu(hardware_disable, NULL, 1); | 2161 | on_each_cpu(hardware_disable, NULL, 1); |
2698 | kvm_arch_hardware_unsetup(); | 2162 | kvm_arch_hardware_unsetup(); |
2699 | kvm_arch_exit(); | 2163 | kvm_arch_exit(); |
2700 | kvm_exit_debug(); | ||
2701 | free_cpumask_var(cpus_hardware_enabled); | 2164 | free_cpumask_var(cpus_hardware_enabled); |
2702 | __free_page(bad_page); | 2165 | __free_page(bad_page); |
2703 | } | 2166 | } |
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c deleted file mode 100644 index f59874446440..000000000000 --- a/virt/kvm/kvm_trace.c +++ /dev/null | |||
@@ -1,285 +0,0 @@ | |||
1 | /* | ||
2 | * kvm trace | ||
3 | * | ||
4 | * It is designed to allow debugging traces of kvm to be generated | ||
5 | * on UP / SMP machines. Each trace entry can be timestamped so that | ||
6 | * it's possible to reconstruct a chronological record of trace events. | ||
7 | * The implementation refers to blktrace kernel support. | ||
8 | * | ||
9 | * Copyright (c) 2008 Intel Corporation | ||
10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | ||
11 | * | ||
12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | ||
13 | * | ||
14 | * Date: Feb 2008 | ||
15 | */ | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | #include <linux/relay.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/ktime.h> | ||
21 | |||
22 | #include <linux/kvm_host.h> | ||
23 | |||
24 | #define KVM_TRACE_STATE_RUNNING (1 << 0) | ||
25 | #define KVM_TRACE_STATE_PAUSE (1 << 1) | ||
26 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) | ||
27 | |||
28 | struct kvm_trace { | ||
29 | int trace_state; | ||
30 | struct rchan *rchan; | ||
31 | struct dentry *lost_file; | ||
32 | atomic_t lost_records; | ||
33 | }; | ||
34 | static struct kvm_trace *kvm_trace; | ||
35 | |||
36 | struct kvm_trace_probe { | ||
37 | const char *name; | ||
38 | const char *format; | ||
39 | u32 timestamp_in; | ||
40 | marker_probe_func *probe_func; | ||
41 | }; | ||
42 | |||
43 | static inline int calc_rec_size(int timestamp, int extra) | ||
44 | { | ||
45 | int rec_size = KVM_TRC_HEAD_SIZE; | ||
46 | |||
47 | rec_size += extra; | ||
48 | return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | ||
49 | } | ||
50 | |||
51 | static void kvm_add_trace(void *probe_private, void *call_data, | ||
52 | const char *format, va_list *args) | ||
53 | { | ||
54 | struct kvm_trace_probe *p = probe_private; | ||
55 | struct kvm_trace *kt = kvm_trace; | ||
56 | struct kvm_trace_rec rec; | ||
57 | struct kvm_vcpu *vcpu; | ||
58 | int i, size; | ||
59 | u32 extra; | ||
60 | |||
61 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | ||
62 | return; | ||
63 | |||
64 | rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); | ||
65 | vcpu = va_arg(*args, struct kvm_vcpu *); | ||
66 | rec.pid = current->tgid; | ||
67 | rec.vcpu_id = vcpu->vcpu_id; | ||
68 | |||
69 | extra = va_arg(*args, u32); | ||
70 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | ||
71 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | ||
72 | |||
73 | rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) | ||
74 | | TRACE_REC_NUM_DATA_ARGS(extra); | ||
75 | |||
76 | if (p->timestamp_in) { | ||
77 | rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); | ||
78 | |||
79 | for (i = 0; i < extra; i++) | ||
80 | rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); | ||
81 | } else { | ||
82 | for (i = 0; i < extra; i++) | ||
83 | rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); | ||
84 | } | ||
85 | |||
86 | size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); | ||
87 | relay_write(kt->rchan, &rec, size); | ||
88 | } | ||
89 | |||
90 | static struct kvm_trace_probe kvm_trace_probes[] = { | ||
91 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | ||
92 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | ||
93 | }; | ||
94 | |||
95 | static int lost_records_get(void *data, u64 *val) | ||
96 | { | ||
97 | struct kvm_trace *kt = data; | ||
98 | |||
99 | *val = atomic_read(&kt->lost_records); | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | ||
104 | |||
105 | /* | ||
106 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how | ||
107 | * many times we encountered a full subbuffer, to tell user space app the | ||
108 | * lost records there were. | ||
109 | */ | ||
110 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | ||
111 | void *prev_subbuf, size_t prev_padding) | ||
112 | { | ||
113 | struct kvm_trace *kt; | ||
114 | |||
115 | if (!relay_buf_full(buf)) { | ||
116 | if (!prev_subbuf) { | ||
117 | /* | ||
118 | * executed only once when the channel is opened | ||
119 | * save metadata as first record | ||
120 | */ | ||
121 | subbuf_start_reserve(buf, sizeof(u32)); | ||
122 | *(u32 *)subbuf = 0x12345678; | ||
123 | } | ||
124 | |||
125 | return 1; | ||
126 | } | ||
127 | |||
128 | kt = buf->chan->private_data; | ||
129 | atomic_inc(&kt->lost_records); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | ||
135 | struct dentry *parent, | ||
136 | int mode, | ||
137 | struct rchan_buf *buf, | ||
138 | int *is_global) | ||
139 | { | ||
140 | return debugfs_create_file(filename, mode, parent, buf, | ||
141 | &relay_file_operations); | ||
142 | } | ||
143 | |||
144 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | ||
145 | { | ||
146 | debugfs_remove(dentry); | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | static struct rchan_callbacks kvm_relay_callbacks = { | ||
151 | .subbuf_start = kvm_subbuf_start_callback, | ||
152 | .create_buf_file = kvm_create_buf_file_callack, | ||
153 | .remove_buf_file = kvm_remove_buf_file_callback, | ||
154 | }; | ||
155 | |||
156 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | ||
157 | { | ||
158 | struct kvm_trace *kt; | ||
159 | int i, r = -ENOMEM; | ||
160 | |||
161 | if (!kuts->buf_size || !kuts->buf_nr) | ||
162 | return -EINVAL; | ||
163 | |||
164 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); | ||
165 | if (!kt) | ||
166 | goto err; | ||
167 | |||
168 | r = -EIO; | ||
169 | atomic_set(&kt->lost_records, 0); | ||
170 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, | ||
171 | kt, &kvm_trace_lost_ops); | ||
172 | if (!kt->lost_file) | ||
173 | goto err; | ||
174 | |||
175 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, | ||
176 | kuts->buf_nr, &kvm_relay_callbacks, kt); | ||
177 | if (!kt->rchan) | ||
178 | goto err; | ||
179 | |||
180 | kvm_trace = kt; | ||
181 | |||
182 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
183 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
184 | |||
185 | r = marker_probe_register(p->name, p->format, p->probe_func, p); | ||
186 | if (r) | ||
187 | printk(KERN_INFO "Unable to register probe %s\n", | ||
188 | p->name); | ||
189 | } | ||
190 | |||
191 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | ||
192 | |||
193 | return 0; | ||
194 | err: | ||
195 | if (kt) { | ||
196 | if (kt->lost_file) | ||
197 | debugfs_remove(kt->lost_file); | ||
198 | if (kt->rchan) | ||
199 | relay_close(kt->rchan); | ||
200 | kfree(kt); | ||
201 | } | ||
202 | return r; | ||
203 | } | ||
204 | |||
205 | static int kvm_trace_enable(char __user *arg) | ||
206 | { | ||
207 | struct kvm_user_trace_setup kuts; | ||
208 | int ret; | ||
209 | |||
210 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); | ||
211 | if (ret) | ||
212 | return -EFAULT; | ||
213 | |||
214 | ret = do_kvm_trace_enable(&kuts); | ||
215 | if (ret) | ||
216 | return ret; | ||
217 | |||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | static int kvm_trace_pause(void) | ||
222 | { | ||
223 | struct kvm_trace *kt = kvm_trace; | ||
224 | int r = -EINVAL; | ||
225 | |||
226 | if (kt == NULL) | ||
227 | return r; | ||
228 | |||
229 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | ||
230 | kt->trace_state = KVM_TRACE_STATE_PAUSE; | ||
231 | relay_flush(kt->rchan); | ||
232 | r = 0; | ||
233 | } | ||
234 | |||
235 | return r; | ||
236 | } | ||
237 | |||
238 | void kvm_trace_cleanup(void) | ||
239 | { | ||
240 | struct kvm_trace *kt = kvm_trace; | ||
241 | int i; | ||
242 | |||
243 | if (kt == NULL) | ||
244 | return; | ||
245 | |||
246 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | ||
247 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { | ||
248 | |||
249 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; | ||
250 | |||
251 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
252 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
253 | marker_probe_unregister(p->name, p->probe_func, p); | ||
254 | } | ||
255 | marker_synchronize_unregister(); | ||
256 | |||
257 | relay_close(kt->rchan); | ||
258 | debugfs_remove(kt->lost_file); | ||
259 | kfree(kt); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | ||
264 | { | ||
265 | void __user *argp = (void __user *)arg; | ||
266 | long r = -EINVAL; | ||
267 | |||
268 | if (!capable(CAP_SYS_ADMIN)) | ||
269 | return -EPERM; | ||
270 | |||
271 | switch (ioctl) { | ||
272 | case KVM_TRACE_ENABLE: | ||
273 | r = kvm_trace_enable(argp); | ||
274 | break; | ||
275 | case KVM_TRACE_PAUSE: | ||
276 | r = kvm_trace_pause(); | ||
277 | break; | ||
278 | case KVM_TRACE_DISABLE: | ||
279 | r = 0; | ||
280 | kvm_trace_cleanup(); | ||
281 | break; | ||
282 | } | ||
283 | |||
284 | return r; | ||
285 | } | ||