diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2010-02-21 14:17:22 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-02-21 14:17:22 -0500 |
commit | 5f854cfc024622e4aae14d7cf422f6ff86278688 (patch) | |
tree | 426e77c6f6e4939c80440bf1fabcb020e3ee145b /virt/kvm/kvm_main.c | |
parent | cc24da0742870f152ddf1002aa39dfcd83f7cf9c (diff) | |
parent | 4ec62b2b2e6bd7ddef7b6cea6e5db7b5578a6532 (diff) |
Forward to 2.6.33-rc8
Merge branch 'linus' into rt/head with a pile of conflicts.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 1231 |
1 files changed, 347 insertions, 884 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b24e96d5d40c..a51ba60a78b1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -43,29 +43,36 @@ | |||
43 | #include <linux/swap.h> | 43 | #include <linux/swap.h> |
44 | #include <linux/bitops.h> | 44 | #include <linux/bitops.h> |
45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
46 | #include <linux/compat.h> | ||
46 | 47 | ||
47 | #include <asm/processor.h> | 48 | #include <asm/processor.h> |
48 | #include <asm/io.h> | 49 | #include <asm/io.h> |
49 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
50 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
52 | #include <asm-generic/bitops/le.h> | ||
51 | 53 | ||
52 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 54 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
53 | #include "coalesced_mmio.h" | 55 | #include "coalesced_mmio.h" |
54 | #endif | 56 | #endif |
55 | 57 | ||
56 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | 58 | #define CREATE_TRACE_POINTS |
57 | #include <linux/pci.h> | 59 | #include <trace/events/kvm.h> |
58 | #include <linux/interrupt.h> | ||
59 | #include "irq.h" | ||
60 | #endif | ||
61 | 60 | ||
62 | MODULE_AUTHOR("Qumranet"); | 61 | MODULE_AUTHOR("Qumranet"); |
63 | MODULE_LICENSE("GPL"); | 62 | MODULE_LICENSE("GPL"); |
64 | 63 | ||
64 | /* | ||
65 | * Ordering of locks: | ||
66 | * | ||
67 | * kvm->lock --> kvm->slots_lock --> kvm->irq_lock | ||
68 | */ | ||
69 | |||
65 | DEFINE_SPINLOCK(kvm_lock); | 70 | DEFINE_SPINLOCK(kvm_lock); |
66 | LIST_HEAD(vm_list); | 71 | LIST_HEAD(vm_list); |
67 | 72 | ||
68 | static cpumask_var_t cpus_hardware_enabled; | 73 | static cpumask_var_t cpus_hardware_enabled; |
74 | static int kvm_usage_count = 0; | ||
75 | static atomic_t hardware_enable_failed; | ||
69 | 76 | ||
70 | struct kmem_cache *kvm_vcpu_cache; | 77 | struct kmem_cache *kvm_vcpu_cache; |
71 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | 78 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); |
@@ -76,627 +83,12 @@ struct dentry *kvm_debugfs_dir; | |||
76 | 83 | ||
77 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 84 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
78 | unsigned long arg); | 85 | unsigned long arg); |
86 | static int hardware_enable_all(void); | ||
87 | static void hardware_disable_all(void); | ||
79 | 88 | ||
80 | static bool kvm_rebooting; | 89 | static bool kvm_rebooting; |
81 | 90 | ||
82 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | 91 | static bool largepages_enabled = true; |
83 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
84 | int assigned_dev_id) | ||
85 | { | ||
86 | struct list_head *ptr; | ||
87 | struct kvm_assigned_dev_kernel *match; | ||
88 | |||
89 | list_for_each(ptr, head) { | ||
90 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
91 | if (match->assigned_dev_id == assigned_dev_id) | ||
92 | return match; | ||
93 | } | ||
94 | return NULL; | ||
95 | } | ||
96 | |||
97 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
98 | *assigned_dev, int irq) | ||
99 | { | ||
100 | int i, index; | ||
101 | struct msix_entry *host_msix_entries; | ||
102 | |||
103 | host_msix_entries = assigned_dev->host_msix_entries; | ||
104 | |||
105 | index = -1; | ||
106 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
107 | if (irq == host_msix_entries[i].vector) { | ||
108 | index = i; | ||
109 | break; | ||
110 | } | ||
111 | if (index < 0) { | ||
112 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | return index; | ||
117 | } | ||
118 | |||
119 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
120 | { | ||
121 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
122 | struct kvm *kvm; | ||
123 | int irq, i; | ||
124 | |||
125 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
126 | interrupt_work); | ||
127 | kvm = assigned_dev->kvm; | ||
128 | |||
129 | /* This is taken to safely inject irq inside the guest. When | ||
130 | * the interrupt injection (or the ioapic code) uses a | ||
131 | * finer-grained lock, update this | ||
132 | */ | ||
133 | mutex_lock(&kvm->lock); | ||
134 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
135 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
136 | struct kvm_guest_msix_entry *guest_entries = | ||
137 | assigned_dev->guest_msix_entries; | ||
138 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
139 | if (!(guest_entries[i].flags & | ||
140 | KVM_ASSIGNED_MSIX_PENDING)) | ||
141 | continue; | ||
142 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
143 | kvm_set_irq(assigned_dev->kvm, | ||
144 | assigned_dev->irq_source_id, | ||
145 | guest_entries[i].vector, 1); | ||
146 | irq = assigned_dev->host_msix_entries[i].vector; | ||
147 | if (irq != 0) | ||
148 | enable_irq(irq); | ||
149 | assigned_dev->host_irq_disabled = false; | ||
150 | } | ||
151 | } else { | ||
152 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
153 | assigned_dev->guest_irq, 1); | ||
154 | if (assigned_dev->irq_requested_type & | ||
155 | KVM_DEV_IRQ_GUEST_MSI) { | ||
156 | enable_irq(assigned_dev->host_irq); | ||
157 | assigned_dev->host_irq_disabled = false; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
162 | mutex_unlock(&assigned_dev->kvm->lock); | ||
163 | } | ||
164 | |||
165 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
166 | { | ||
167 | unsigned long flags; | ||
168 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
169 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
170 | |||
171 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
172 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
173 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
174 | if (index < 0) | ||
175 | goto out; | ||
176 | assigned_dev->guest_msix_entries[index].flags |= | ||
177 | KVM_ASSIGNED_MSIX_PENDING; | ||
178 | } | ||
179 | |||
180 | schedule_work(&assigned_dev->interrupt_work); | ||
181 | |||
182 | disable_irq_nosync(irq); | ||
183 | assigned_dev->host_irq_disabled = true; | ||
184 | |||
185 | out: | ||
186 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
187 | return IRQ_HANDLED; | ||
188 | } | ||
189 | |||
190 | /* Ack the irq line for an assigned device */ | ||
191 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
192 | { | ||
193 | struct kvm_assigned_dev_kernel *dev; | ||
194 | unsigned long flags; | ||
195 | |||
196 | if (kian->gsi == -1) | ||
197 | return; | ||
198 | |||
199 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
200 | ack_notifier); | ||
201 | |||
202 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
203 | |||
204 | /* The guest irq may be shared so this ack may be | ||
205 | * from another device. | ||
206 | */ | ||
207 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
208 | if (dev->host_irq_disabled) { | ||
209 | enable_irq(dev->host_irq); | ||
210 | dev->host_irq_disabled = false; | ||
211 | } | ||
212 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
213 | } | ||
214 | |||
215 | static void deassign_guest_irq(struct kvm *kvm, | ||
216 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
217 | { | ||
218 | kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); | ||
219 | assigned_dev->ack_notifier.gsi = -1; | ||
220 | |||
221 | if (assigned_dev->irq_source_id != -1) | ||
222 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
223 | assigned_dev->irq_source_id = -1; | ||
224 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
225 | } | ||
226 | |||
227 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
228 | static void deassign_host_irq(struct kvm *kvm, | ||
229 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
230 | { | ||
231 | /* | ||
232 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
233 | * 1. work is scheduled, and then cancelled. | ||
234 | * 2. work callback is executed. | ||
235 | * | ||
236 | * The first one ensured that the irq is disabled and no more events | ||
237 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
238 | * for MSI). So we disable irq here to prevent further events. | ||
239 | * | ||
240 | * Notice this maybe result in nested disable if the interrupt type is | ||
241 | * INTx, but it's OK for we are going to free it. | ||
242 | * | ||
243 | * If this function is a part of VM destroy, please ensure that till | ||
244 | * now, the kvm state is still legal for probably we also have to wait | ||
245 | * interrupt_work done. | ||
246 | */ | ||
247 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
248 | int i; | ||
249 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
250 | disable_irq_nosync(assigned_dev-> | ||
251 | host_msix_entries[i].vector); | ||
252 | |||
253 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
254 | |||
255 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
256 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
257 | (void *)assigned_dev); | ||
258 | |||
259 | assigned_dev->entries_nr = 0; | ||
260 | kfree(assigned_dev->host_msix_entries); | ||
261 | kfree(assigned_dev->guest_msix_entries); | ||
262 | pci_disable_msix(assigned_dev->dev); | ||
263 | } else { | ||
264 | /* Deal with MSI and INTx */ | ||
265 | disable_irq_nosync(assigned_dev->host_irq); | ||
266 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
267 | |||
268 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
269 | |||
270 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
271 | pci_disable_msi(assigned_dev->dev); | ||
272 | } | ||
273 | |||
274 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
275 | } | ||
276 | |||
277 | static int kvm_deassign_irq(struct kvm *kvm, | ||
278 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
279 | unsigned long irq_requested_type) | ||
280 | { | ||
281 | unsigned long guest_irq_type, host_irq_type; | ||
282 | |||
283 | if (!irqchip_in_kernel(kvm)) | ||
284 | return -EINVAL; | ||
285 | /* no irq assignment to deassign */ | ||
286 | if (!assigned_dev->irq_requested_type) | ||
287 | return -ENXIO; | ||
288 | |||
289 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
290 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
291 | |||
292 | if (host_irq_type) | ||
293 | deassign_host_irq(kvm, assigned_dev); | ||
294 | if (guest_irq_type) | ||
295 | deassign_guest_irq(kvm, assigned_dev); | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
301 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
302 | { | ||
303 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
304 | } | ||
305 | |||
306 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
307 | struct kvm_assigned_dev_kernel | ||
308 | *assigned_dev) | ||
309 | { | ||
310 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
311 | |||
312 | pci_reset_function(assigned_dev->dev); | ||
313 | |||
314 | pci_release_regions(assigned_dev->dev); | ||
315 | pci_disable_device(assigned_dev->dev); | ||
316 | pci_dev_put(assigned_dev->dev); | ||
317 | |||
318 | list_del(&assigned_dev->list); | ||
319 | kfree(assigned_dev); | ||
320 | } | ||
321 | |||
322 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
323 | { | ||
324 | struct list_head *ptr, *ptr2; | ||
325 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
326 | |||
327 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
328 | assigned_dev = list_entry(ptr, | ||
329 | struct kvm_assigned_dev_kernel, | ||
330 | list); | ||
331 | |||
332 | kvm_free_assigned_device(kvm, assigned_dev); | ||
333 | } | ||
334 | } | ||
335 | |||
336 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
337 | struct kvm_assigned_dev_kernel *dev) | ||
338 | { | ||
339 | dev->host_irq = dev->dev->irq; | ||
340 | /* Even though this is PCI, we don't want to use shared | ||
341 | * interrupts. Sharing host devices with guest-assigned devices | ||
342 | * on the same interrupt line is not a happy situation: there | ||
343 | * are going to be long delays in accepting, acking, etc. | ||
344 | */ | ||
345 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
346 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
347 | return -EIO; | ||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | #ifdef __KVM_HAVE_MSI | ||
352 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
353 | struct kvm_assigned_dev_kernel *dev) | ||
354 | { | ||
355 | int r; | ||
356 | |||
357 | if (!dev->dev->msi_enabled) { | ||
358 | r = pci_enable_msi(dev->dev); | ||
359 | if (r) | ||
360 | return r; | ||
361 | } | ||
362 | |||
363 | dev->host_irq = dev->dev->irq; | ||
364 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
365 | "kvm_assigned_msi_device", (void *)dev)) { | ||
366 | pci_disable_msi(dev->dev); | ||
367 | return -EIO; | ||
368 | } | ||
369 | |||
370 | return 0; | ||
371 | } | ||
372 | #endif | ||
373 | |||
374 | #ifdef __KVM_HAVE_MSIX | ||
375 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
376 | struct kvm_assigned_dev_kernel *dev) | ||
377 | { | ||
378 | int i, r = -EINVAL; | ||
379 | |||
380 | /* host_msix_entries and guest_msix_entries should have been | ||
381 | * initialized */ | ||
382 | if (dev->entries_nr == 0) | ||
383 | return r; | ||
384 | |||
385 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
386 | if (r) | ||
387 | return r; | ||
388 | |||
389 | for (i = 0; i < dev->entries_nr; i++) { | ||
390 | r = request_irq(dev->host_msix_entries[i].vector, | ||
391 | kvm_assigned_dev_intr, 0, | ||
392 | "kvm_assigned_msix_device", | ||
393 | (void *)dev); | ||
394 | /* FIXME: free requested_irq's on failure */ | ||
395 | if (r) | ||
396 | return r; | ||
397 | } | ||
398 | |||
399 | return 0; | ||
400 | } | ||
401 | |||
402 | #endif | ||
403 | |||
404 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
405 | struct kvm_assigned_dev_kernel *dev, | ||
406 | struct kvm_assigned_irq *irq) | ||
407 | { | ||
408 | dev->guest_irq = irq->guest_irq; | ||
409 | dev->ack_notifier.gsi = irq->guest_irq; | ||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | #ifdef __KVM_HAVE_MSI | ||
414 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
415 | struct kvm_assigned_dev_kernel *dev, | ||
416 | struct kvm_assigned_irq *irq) | ||
417 | { | ||
418 | dev->guest_irq = irq->guest_irq; | ||
419 | dev->ack_notifier.gsi = -1; | ||
420 | return 0; | ||
421 | } | ||
422 | #endif | ||
423 | #ifdef __KVM_HAVE_MSIX | ||
424 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
425 | struct kvm_assigned_dev_kernel *dev, | ||
426 | struct kvm_assigned_irq *irq) | ||
427 | { | ||
428 | dev->guest_irq = irq->guest_irq; | ||
429 | dev->ack_notifier.gsi = -1; | ||
430 | return 0; | ||
431 | } | ||
432 | #endif | ||
433 | |||
434 | static int assign_host_irq(struct kvm *kvm, | ||
435 | struct kvm_assigned_dev_kernel *dev, | ||
436 | __u32 host_irq_type) | ||
437 | { | ||
438 | int r = -EEXIST; | ||
439 | |||
440 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
441 | return r; | ||
442 | |||
443 | switch (host_irq_type) { | ||
444 | case KVM_DEV_IRQ_HOST_INTX: | ||
445 | r = assigned_device_enable_host_intx(kvm, dev); | ||
446 | break; | ||
447 | #ifdef __KVM_HAVE_MSI | ||
448 | case KVM_DEV_IRQ_HOST_MSI: | ||
449 | r = assigned_device_enable_host_msi(kvm, dev); | ||
450 | break; | ||
451 | #endif | ||
452 | #ifdef __KVM_HAVE_MSIX | ||
453 | case KVM_DEV_IRQ_HOST_MSIX: | ||
454 | r = assigned_device_enable_host_msix(kvm, dev); | ||
455 | break; | ||
456 | #endif | ||
457 | default: | ||
458 | r = -EINVAL; | ||
459 | } | ||
460 | |||
461 | if (!r) | ||
462 | dev->irq_requested_type |= host_irq_type; | ||
463 | |||
464 | return r; | ||
465 | } | ||
466 | |||
467 | static int assign_guest_irq(struct kvm *kvm, | ||
468 | struct kvm_assigned_dev_kernel *dev, | ||
469 | struct kvm_assigned_irq *irq, | ||
470 | unsigned long guest_irq_type) | ||
471 | { | ||
472 | int id; | ||
473 | int r = -EEXIST; | ||
474 | |||
475 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
476 | return r; | ||
477 | |||
478 | id = kvm_request_irq_source_id(kvm); | ||
479 | if (id < 0) | ||
480 | return id; | ||
481 | |||
482 | dev->irq_source_id = id; | ||
483 | |||
484 | switch (guest_irq_type) { | ||
485 | case KVM_DEV_IRQ_GUEST_INTX: | ||
486 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
487 | break; | ||
488 | #ifdef __KVM_HAVE_MSI | ||
489 | case KVM_DEV_IRQ_GUEST_MSI: | ||
490 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
491 | break; | ||
492 | #endif | ||
493 | #ifdef __KVM_HAVE_MSIX | ||
494 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
495 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
496 | break; | ||
497 | #endif | ||
498 | default: | ||
499 | r = -EINVAL; | ||
500 | } | ||
501 | |||
502 | if (!r) { | ||
503 | dev->irq_requested_type |= guest_irq_type; | ||
504 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
505 | } else | ||
506 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
507 | |||
508 | return r; | ||
509 | } | ||
510 | |||
511 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
512 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
513 | struct kvm_assigned_irq *assigned_irq) | ||
514 | { | ||
515 | int r = -EINVAL; | ||
516 | struct kvm_assigned_dev_kernel *match; | ||
517 | unsigned long host_irq_type, guest_irq_type; | ||
518 | |||
519 | if (!capable(CAP_SYS_RAWIO)) | ||
520 | return -EPERM; | ||
521 | |||
522 | if (!irqchip_in_kernel(kvm)) | ||
523 | return r; | ||
524 | |||
525 | mutex_lock(&kvm->lock); | ||
526 | r = -ENODEV; | ||
527 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
528 | assigned_irq->assigned_dev_id); | ||
529 | if (!match) | ||
530 | goto out; | ||
531 | |||
532 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
533 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
534 | |||
535 | r = -EINVAL; | ||
536 | /* can only assign one type at a time */ | ||
537 | if (hweight_long(host_irq_type) > 1) | ||
538 | goto out; | ||
539 | if (hweight_long(guest_irq_type) > 1) | ||
540 | goto out; | ||
541 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
542 | goto out; | ||
543 | |||
544 | r = 0; | ||
545 | if (host_irq_type) | ||
546 | r = assign_host_irq(kvm, match, host_irq_type); | ||
547 | if (r) | ||
548 | goto out; | ||
549 | |||
550 | if (guest_irq_type) | ||
551 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
552 | out: | ||
553 | mutex_unlock(&kvm->lock); | ||
554 | return r; | ||
555 | } | ||
556 | |||
557 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
558 | struct kvm_assigned_irq | ||
559 | *assigned_irq) | ||
560 | { | ||
561 | int r = -ENODEV; | ||
562 | struct kvm_assigned_dev_kernel *match; | ||
563 | |||
564 | mutex_lock(&kvm->lock); | ||
565 | |||
566 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
567 | assigned_irq->assigned_dev_id); | ||
568 | if (!match) | ||
569 | goto out; | ||
570 | |||
571 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
572 | out: | ||
573 | mutex_unlock(&kvm->lock); | ||
574 | return r; | ||
575 | } | ||
576 | |||
577 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
578 | struct kvm_assigned_pci_dev *assigned_dev) | ||
579 | { | ||
580 | int r = 0; | ||
581 | struct kvm_assigned_dev_kernel *match; | ||
582 | struct pci_dev *dev; | ||
583 | |||
584 | down_read(&kvm->slots_lock); | ||
585 | mutex_lock(&kvm->lock); | ||
586 | |||
587 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
588 | assigned_dev->assigned_dev_id); | ||
589 | if (match) { | ||
590 | /* device already assigned */ | ||
591 | r = -EEXIST; | ||
592 | goto out; | ||
593 | } | ||
594 | |||
595 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
596 | if (match == NULL) { | ||
597 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
598 | __func__); | ||
599 | r = -ENOMEM; | ||
600 | goto out; | ||
601 | } | ||
602 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
603 | assigned_dev->devfn); | ||
604 | if (!dev) { | ||
605 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
606 | r = -EINVAL; | ||
607 | goto out_free; | ||
608 | } | ||
609 | if (pci_enable_device(dev)) { | ||
610 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
611 | r = -EBUSY; | ||
612 | goto out_put; | ||
613 | } | ||
614 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
615 | if (r) { | ||
616 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
617 | __func__); | ||
618 | goto out_disable; | ||
619 | } | ||
620 | |||
621 | pci_reset_function(dev); | ||
622 | |||
623 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
624 | match->host_busnr = assigned_dev->busnr; | ||
625 | match->host_devfn = assigned_dev->devfn; | ||
626 | match->flags = assigned_dev->flags; | ||
627 | match->dev = dev; | ||
628 | spin_lock_init(&match->assigned_dev_lock); | ||
629 | match->irq_source_id = -1; | ||
630 | match->kvm = kvm; | ||
631 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
632 | INIT_WORK(&match->interrupt_work, | ||
633 | kvm_assigned_dev_interrupt_work_handler); | ||
634 | |||
635 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
636 | |||
637 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
638 | if (!kvm->arch.iommu_domain) { | ||
639 | r = kvm_iommu_map_guest(kvm); | ||
640 | if (r) | ||
641 | goto out_list_del; | ||
642 | } | ||
643 | r = kvm_assign_device(kvm, match); | ||
644 | if (r) | ||
645 | goto out_list_del; | ||
646 | } | ||
647 | |||
648 | out: | ||
649 | mutex_unlock(&kvm->lock); | ||
650 | up_read(&kvm->slots_lock); | ||
651 | return r; | ||
652 | out_list_del: | ||
653 | list_del(&match->list); | ||
654 | pci_release_regions(dev); | ||
655 | out_disable: | ||
656 | pci_disable_device(dev); | ||
657 | out_put: | ||
658 | pci_dev_put(dev); | ||
659 | out_free: | ||
660 | kfree(match); | ||
661 | mutex_unlock(&kvm->lock); | ||
662 | up_read(&kvm->slots_lock); | ||
663 | return r; | ||
664 | } | ||
665 | #endif | ||
666 | |||
667 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
668 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
669 | struct kvm_assigned_pci_dev *assigned_dev) | ||
670 | { | ||
671 | int r = 0; | ||
672 | struct kvm_assigned_dev_kernel *match; | ||
673 | |||
674 | mutex_lock(&kvm->lock); | ||
675 | |||
676 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
677 | assigned_dev->assigned_dev_id); | ||
678 | if (!match) { | ||
679 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
680 | "so cannot be deassigned\n", __func__); | ||
681 | r = -EINVAL; | ||
682 | goto out; | ||
683 | } | ||
684 | |||
685 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
686 | kvm_deassign_device(kvm, match); | ||
687 | |||
688 | kvm_free_assigned_device(kvm, match); | ||
689 | |||
690 | out: | ||
691 | mutex_unlock(&kvm->lock); | ||
692 | return r; | ||
693 | } | ||
694 | #endif | ||
695 | |||
696 | static inline int valid_vcpu(int n) | ||
697 | { | ||
698 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | ||
699 | } | ||
700 | 92 | ||
701 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 93 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
702 | { | 94 | { |
@@ -742,15 +134,11 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
742 | bool called = true; | 134 | bool called = true; |
743 | struct kvm_vcpu *vcpu; | 135 | struct kvm_vcpu *vcpu; |
744 | 136 | ||
745 | if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) | 137 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); |
746 | cpumask_clear(cpus); | ||
747 | 138 | ||
748 | spin_lock(&kvm->requests_lock); | 139 | spin_lock(&kvm->requests_lock); |
749 | me = get_cpu(); | 140 | me = raw_smp_processor_id(); |
750 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 141 | kvm_for_each_vcpu(i, vcpu, kvm) { |
751 | vcpu = kvm->vcpus[i]; | ||
752 | if (!vcpu) | ||
753 | continue; | ||
754 | if (test_and_set_bit(req, &vcpu->requests)) | 142 | if (test_and_set_bit(req, &vcpu->requests)) |
755 | continue; | 143 | continue; |
756 | cpu = vcpu->cpu; | 144 | cpu = vcpu->cpu; |
@@ -763,7 +151,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
763 | smp_call_function_many(cpus, ack_flush, NULL, 1); | 151 | smp_call_function_many(cpus, ack_flush, NULL, 1); |
764 | else | 152 | else |
765 | called = false; | 153 | called = false; |
766 | put_cpu(); | ||
767 | spin_unlock(&kvm->requests_lock); | 154 | spin_unlock(&kvm->requests_lock); |
768 | free_cpumask_var(cpus); | 155 | free_cpumask_var(cpus); |
769 | return called; | 156 | return called; |
@@ -859,6 +246,19 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
859 | 246 | ||
860 | } | 247 | } |
861 | 248 | ||
249 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | ||
250 | struct mm_struct *mm, | ||
251 | unsigned long address, | ||
252 | pte_t pte) | ||
253 | { | ||
254 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
255 | |||
256 | spin_lock(&kvm->mmu_lock); | ||
257 | kvm->mmu_notifier_seq++; | ||
258 | kvm_set_spte_hva(kvm, address, pte); | ||
259 | spin_unlock(&kvm->mmu_lock); | ||
260 | } | ||
261 | |||
862 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | 262 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, |
863 | struct mm_struct *mm, | 263 | struct mm_struct *mm, |
864 | unsigned long start, | 264 | unsigned long start, |
@@ -938,12 +338,14 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
938 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | 338 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, |
939 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | 339 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, |
940 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | 340 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, |
341 | .change_pte = kvm_mmu_notifier_change_pte, | ||
941 | .release = kvm_mmu_notifier_release, | 342 | .release = kvm_mmu_notifier_release, |
942 | }; | 343 | }; |
943 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 344 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
944 | 345 | ||
945 | static struct kvm *kvm_create_vm(void) | 346 | static struct kvm *kvm_create_vm(void) |
946 | { | 347 | { |
348 | int r = 0; | ||
947 | struct kvm *kvm = kvm_arch_create_vm(); | 349 | struct kvm *kvm = kvm_arch_create_vm(); |
948 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 350 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
949 | struct page *page; | 351 | struct page *page; |
@@ -951,16 +353,21 @@ static struct kvm *kvm_create_vm(void) | |||
951 | 353 | ||
952 | if (IS_ERR(kvm)) | 354 | if (IS_ERR(kvm)) |
953 | goto out; | 355 | goto out; |
356 | |||
357 | r = hardware_enable_all(); | ||
358 | if (r) | ||
359 | goto out_err_nodisable; | ||
360 | |||
954 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 361 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
955 | INIT_LIST_HEAD(&kvm->irq_routing); | ||
956 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); | 362 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); |
363 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | ||
957 | #endif | 364 | #endif |
958 | 365 | ||
959 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 366 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
960 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 367 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
961 | if (!page) { | 368 | if (!page) { |
962 | kfree(kvm); | 369 | r = -ENOMEM; |
963 | return ERR_PTR(-ENOMEM); | 370 | goto out_err; |
964 | } | 371 | } |
965 | kvm->coalesced_mmio_ring = | 372 | kvm->coalesced_mmio_ring = |
966 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 373 | (struct kvm_coalesced_mmio_ring *)page_address(page); |
@@ -968,15 +375,13 @@ static struct kvm *kvm_create_vm(void) | |||
968 | 375 | ||
969 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 376 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
970 | { | 377 | { |
971 | int err; | ||
972 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | 378 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; |
973 | err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | 379 | r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); |
974 | if (err) { | 380 | if (r) { |
975 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 381 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
976 | put_page(page); | 382 | put_page(page); |
977 | #endif | 383 | #endif |
978 | kfree(kvm); | 384 | goto out_err; |
979 | return ERR_PTR(err); | ||
980 | } | 385 | } |
981 | } | 386 | } |
982 | #endif | 387 | #endif |
@@ -986,7 +391,9 @@ static struct kvm *kvm_create_vm(void) | |||
986 | spin_lock_init(&kvm->mmu_lock); | 391 | spin_lock_init(&kvm->mmu_lock); |
987 | spin_lock_init(&kvm->requests_lock); | 392 | spin_lock_init(&kvm->requests_lock); |
988 | kvm_io_bus_init(&kvm->pio_bus); | 393 | kvm_io_bus_init(&kvm->pio_bus); |
394 | kvm_eventfd_init(kvm); | ||
989 | mutex_init(&kvm->lock); | 395 | mutex_init(&kvm->lock); |
396 | mutex_init(&kvm->irq_lock); | ||
990 | kvm_io_bus_init(&kvm->mmio_bus); | 397 | kvm_io_bus_init(&kvm->mmio_bus); |
991 | init_rwsem(&kvm->slots_lock); | 398 | init_rwsem(&kvm->slots_lock); |
992 | atomic_set(&kvm->users_count, 1); | 399 | atomic_set(&kvm->users_count, 1); |
@@ -998,6 +405,15 @@ static struct kvm *kvm_create_vm(void) | |||
998 | #endif | 405 | #endif |
999 | out: | 406 | out: |
1000 | return kvm; | 407 | return kvm; |
408 | |||
409 | #if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \ | ||
410 | (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) | ||
411 | out_err: | ||
412 | hardware_disable_all(); | ||
413 | #endif | ||
414 | out_err_nodisable: | ||
415 | kfree(kvm); | ||
416 | return ERR_PTR(r); | ||
1001 | } | 417 | } |
1002 | 418 | ||
1003 | /* | 419 | /* |
@@ -1006,19 +422,25 @@ out: | |||
1006 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | 422 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, |
1007 | struct kvm_memory_slot *dont) | 423 | struct kvm_memory_slot *dont) |
1008 | { | 424 | { |
425 | int i; | ||
426 | |||
1009 | if (!dont || free->rmap != dont->rmap) | 427 | if (!dont || free->rmap != dont->rmap) |
1010 | vfree(free->rmap); | 428 | vfree(free->rmap); |
1011 | 429 | ||
1012 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 430 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
1013 | vfree(free->dirty_bitmap); | 431 | vfree(free->dirty_bitmap); |
1014 | 432 | ||
1015 | if (!dont || free->lpage_info != dont->lpage_info) | 433 | |
1016 | vfree(free->lpage_info); | 434 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
435 | if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { | ||
436 | vfree(free->lpage_info[i]); | ||
437 | free->lpage_info[i] = NULL; | ||
438 | } | ||
439 | } | ||
1017 | 440 | ||
1018 | free->npages = 0; | 441 | free->npages = 0; |
1019 | free->dirty_bitmap = NULL; | 442 | free->dirty_bitmap = NULL; |
1020 | free->rmap = NULL; | 443 | free->rmap = NULL; |
1021 | free->lpage_info = NULL; | ||
1022 | } | 444 | } |
1023 | 445 | ||
1024 | void kvm_free_physmem(struct kvm *kvm) | 446 | void kvm_free_physmem(struct kvm *kvm) |
@@ -1050,6 +472,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
1050 | kvm_arch_flush_shadow(kvm); | 472 | kvm_arch_flush_shadow(kvm); |
1051 | #endif | 473 | #endif |
1052 | kvm_arch_destroy_vm(kvm); | 474 | kvm_arch_destroy_vm(kvm); |
475 | hardware_disable_all(); | ||
1053 | mmdrop(mm); | 476 | mmdrop(mm); |
1054 | } | 477 | } |
1055 | 478 | ||
@@ -1071,6 +494,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
1071 | { | 494 | { |
1072 | struct kvm *kvm = filp->private_data; | 495 | struct kvm *kvm = filp->private_data; |
1073 | 496 | ||
497 | kvm_irqfd_release(kvm); | ||
498 | |||
1074 | kvm_put_kvm(kvm); | 499 | kvm_put_kvm(kvm); |
1075 | return 0; | 500 | return 0; |
1076 | } | 501 | } |
@@ -1089,8 +514,8 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1089 | { | 514 | { |
1090 | int r; | 515 | int r; |
1091 | gfn_t base_gfn; | 516 | gfn_t base_gfn; |
1092 | unsigned long npages, ugfn; | 517 | unsigned long npages; |
1093 | unsigned long largepages, i; | 518 | unsigned long i; |
1094 | struct kvm_memory_slot *memslot; | 519 | struct kvm_memory_slot *memslot; |
1095 | struct kvm_memory_slot old, new; | 520 | struct kvm_memory_slot old, new; |
1096 | 521 | ||
@@ -1164,31 +589,51 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1164 | else | 589 | else |
1165 | new.userspace_addr = 0; | 590 | new.userspace_addr = 0; |
1166 | } | 591 | } |
1167 | if (npages && !new.lpage_info) { | 592 | if (!npages) |
1168 | largepages = 1 + (base_gfn + npages - 1) / KVM_PAGES_PER_HPAGE; | 593 | goto skip_lpage; |
1169 | largepages -= base_gfn / KVM_PAGES_PER_HPAGE; | 594 | |
595 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
596 | unsigned long ugfn; | ||
597 | unsigned long j; | ||
598 | int lpages; | ||
599 | int level = i + 2; | ||
1170 | 600 | ||
1171 | new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); | 601 | /* Avoid unused variable warning if no large pages */ |
602 | (void)level; | ||
1172 | 603 | ||
1173 | if (!new.lpage_info) | 604 | if (new.lpage_info[i]) |
605 | continue; | ||
606 | |||
607 | lpages = 1 + (base_gfn + npages - 1) / | ||
608 | KVM_PAGES_PER_HPAGE(level); | ||
609 | lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); | ||
610 | |||
611 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); | ||
612 | |||
613 | if (!new.lpage_info[i]) | ||
1174 | goto out_free; | 614 | goto out_free; |
1175 | 615 | ||
1176 | memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); | 616 | memset(new.lpage_info[i], 0, |
617 | lpages * sizeof(*new.lpage_info[i])); | ||
1177 | 618 | ||
1178 | if (base_gfn % KVM_PAGES_PER_HPAGE) | 619 | if (base_gfn % KVM_PAGES_PER_HPAGE(level)) |
1179 | new.lpage_info[0].write_count = 1; | 620 | new.lpage_info[i][0].write_count = 1; |
1180 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) | 621 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) |
1181 | new.lpage_info[largepages-1].write_count = 1; | 622 | new.lpage_info[i][lpages - 1].write_count = 1; |
1182 | ugfn = new.userspace_addr >> PAGE_SHIFT; | 623 | ugfn = new.userspace_addr >> PAGE_SHIFT; |
1183 | /* | 624 | /* |
1184 | * If the gfn and userspace address are not aligned wrt each | 625 | * If the gfn and userspace address are not aligned wrt each |
1185 | * other, disable large page support for this slot | 626 | * other, or if explicitly asked to, disable large page |
627 | * support for this slot | ||
1186 | */ | 628 | */ |
1187 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1)) | 629 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || |
1188 | for (i = 0; i < largepages; ++i) | 630 | !largepages_enabled) |
1189 | new.lpage_info[i].write_count = 1; | 631 | for (j = 0; j < lpages; ++j) |
632 | new.lpage_info[i][j].write_count = 1; | ||
1190 | } | 633 | } |
1191 | 634 | ||
635 | skip_lpage: | ||
636 | |||
1192 | /* Allocate page dirty bitmap if needed */ | 637 | /* Allocate page dirty bitmap if needed */ |
1193 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 638 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
1194 | unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; | 639 | unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; |
@@ -1200,6 +645,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1200 | if (old.npages) | 645 | if (old.npages) |
1201 | kvm_arch_flush_shadow(kvm); | 646 | kvm_arch_flush_shadow(kvm); |
1202 | } | 647 | } |
648 | #else /* not defined CONFIG_S390 */ | ||
649 | new.user_alloc = user_alloc; | ||
650 | if (user_alloc) | ||
651 | new.userspace_addr = mem->userspace_addr; | ||
1203 | #endif /* not defined CONFIG_S390 */ | 652 | #endif /* not defined CONFIG_S390 */ |
1204 | 653 | ||
1205 | if (!npages) | 654 | if (!npages) |
@@ -1299,6 +748,12 @@ out: | |||
1299 | return r; | 748 | return r; |
1300 | } | 749 | } |
1301 | 750 | ||
751 | void kvm_disable_largepages(void) | ||
752 | { | ||
753 | largepages_enabled = false; | ||
754 | } | ||
755 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); | ||
756 | |||
1302 | int is_error_page(struct page *page) | 757 | int is_error_page(struct page *page) |
1303 | { | 758 | { |
1304 | return page == bad_page; | 759 | return page == bad_page; |
@@ -1620,8 +1075,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
1620 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1075 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
1621 | 1076 | ||
1622 | /* avoid RMW */ | 1077 | /* avoid RMW */ |
1623 | if (!test_bit(rel_gfn, memslot->dirty_bitmap)) | 1078 | if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) |
1624 | set_bit(rel_gfn, memslot->dirty_bitmap); | 1079 | generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); |
1625 | } | 1080 | } |
1626 | } | 1081 | } |
1627 | 1082 | ||
@@ -1635,9 +1090,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1635 | for (;;) { | 1090 | for (;;) { |
1636 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1091 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
1637 | 1092 | ||
1638 | if ((kvm_arch_interrupt_allowed(vcpu) && | 1093 | if (kvm_arch_vcpu_runnable(vcpu)) { |
1639 | kvm_cpu_has_interrupt(vcpu)) || | ||
1640 | kvm_arch_vcpu_runnable(vcpu)) { | ||
1641 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); | 1094 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); |
1642 | break; | 1095 | break; |
1643 | } | 1096 | } |
@@ -1646,9 +1099,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1646 | if (signal_pending(current)) | 1099 | if (signal_pending(current)) |
1647 | break; | 1100 | break; |
1648 | 1101 | ||
1649 | vcpu_put(vcpu); | ||
1650 | schedule(); | 1102 | schedule(); |
1651 | vcpu_load(vcpu); | ||
1652 | } | 1103 | } |
1653 | 1104 | ||
1654 | finish_wait(&vcpu->wq, &wait); | 1105 | finish_wait(&vcpu->wq, &wait); |
@@ -1662,6 +1113,21 @@ void kvm_resched(struct kvm_vcpu *vcpu) | |||
1662 | } | 1113 | } |
1663 | EXPORT_SYMBOL_GPL(kvm_resched); | 1114 | EXPORT_SYMBOL_GPL(kvm_resched); |
1664 | 1115 | ||
1116 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) | ||
1117 | { | ||
1118 | ktime_t expires; | ||
1119 | DEFINE_WAIT(wait); | ||
1120 | |||
1121 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | ||
1122 | |||
1123 | /* Sleep for 100 us, and hope lock-holder got scheduled */ | ||
1124 | expires = ktime_add_ns(ktime_get(), 100000UL); | ||
1125 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1126 | |||
1127 | finish_wait(&vcpu->wq, &wait); | ||
1128 | } | ||
1129 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | ||
1130 | |||
1665 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1131 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1666 | { | 1132 | { |
1667 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; | 1133 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; |
@@ -1684,7 +1150,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1684 | return 0; | 1150 | return 0; |
1685 | } | 1151 | } |
1686 | 1152 | ||
1687 | static struct vm_operations_struct kvm_vcpu_vm_ops = { | 1153 | static const struct vm_operations_struct kvm_vcpu_vm_ops = { |
1688 | .fault = kvm_vcpu_fault, | 1154 | .fault = kvm_vcpu_fault, |
1689 | }; | 1155 | }; |
1690 | 1156 | ||
@@ -1714,24 +1180,18 @@ static struct file_operations kvm_vcpu_fops = { | |||
1714 | */ | 1180 | */ |
1715 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) | 1181 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) |
1716 | { | 1182 | { |
1717 | int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); | 1183 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); |
1718 | if (fd < 0) | ||
1719 | kvm_put_kvm(vcpu->kvm); | ||
1720 | return fd; | ||
1721 | } | 1184 | } |
1722 | 1185 | ||
1723 | /* | 1186 | /* |
1724 | * Creates some virtual cpus. Good luck creating more than one. | 1187 | * Creates some virtual cpus. Good luck creating more than one. |
1725 | */ | 1188 | */ |
1726 | static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | 1189 | static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) |
1727 | { | 1190 | { |
1728 | int r; | 1191 | int r; |
1729 | struct kvm_vcpu *vcpu; | 1192 | struct kvm_vcpu *vcpu, *v; |
1730 | 1193 | ||
1731 | if (!valid_vcpu(n)) | 1194 | vcpu = kvm_arch_vcpu_create(kvm, id); |
1732 | return -EINVAL; | ||
1733 | |||
1734 | vcpu = kvm_arch_vcpu_create(kvm, n); | ||
1735 | if (IS_ERR(vcpu)) | 1195 | if (IS_ERR(vcpu)) |
1736 | return PTR_ERR(vcpu); | 1196 | return PTR_ERR(vcpu); |
1737 | 1197 | ||
@@ -1742,23 +1202,38 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
1742 | return r; | 1202 | return r; |
1743 | 1203 | ||
1744 | mutex_lock(&kvm->lock); | 1204 | mutex_lock(&kvm->lock); |
1745 | if (kvm->vcpus[n]) { | 1205 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { |
1746 | r = -EEXIST; | 1206 | r = -EINVAL; |
1747 | goto vcpu_destroy; | 1207 | goto vcpu_destroy; |
1748 | } | 1208 | } |
1749 | kvm->vcpus[n] = vcpu; | 1209 | |
1750 | mutex_unlock(&kvm->lock); | 1210 | kvm_for_each_vcpu(r, v, kvm) |
1211 | if (v->vcpu_id == id) { | ||
1212 | r = -EEXIST; | ||
1213 | goto vcpu_destroy; | ||
1214 | } | ||
1215 | |||
1216 | BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); | ||
1751 | 1217 | ||
1752 | /* Now it's all set up, let userspace reach it */ | 1218 | /* Now it's all set up, let userspace reach it */ |
1753 | kvm_get_kvm(kvm); | 1219 | kvm_get_kvm(kvm); |
1754 | r = create_vcpu_fd(vcpu); | 1220 | r = create_vcpu_fd(vcpu); |
1755 | if (r < 0) | 1221 | if (r < 0) { |
1756 | goto unlink; | 1222 | kvm_put_kvm(kvm); |
1223 | goto vcpu_destroy; | ||
1224 | } | ||
1225 | |||
1226 | kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; | ||
1227 | smp_wmb(); | ||
1228 | atomic_inc(&kvm->online_vcpus); | ||
1229 | |||
1230 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
1231 | if (kvm->bsp_vcpu_id == id) | ||
1232 | kvm->bsp_vcpu = vcpu; | ||
1233 | #endif | ||
1234 | mutex_unlock(&kvm->lock); | ||
1757 | return r; | 1235 | return r; |
1758 | 1236 | ||
1759 | unlink: | ||
1760 | mutex_lock(&kvm->lock); | ||
1761 | kvm->vcpus[n] = NULL; | ||
1762 | vcpu_destroy: | 1237 | vcpu_destroy: |
1763 | mutex_unlock(&kvm->lock); | 1238 | mutex_unlock(&kvm->lock); |
1764 | kvm_arch_vcpu_destroy(vcpu); | 1239 | kvm_arch_vcpu_destroy(vcpu); |
@@ -1776,88 +1251,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | |||
1776 | return 0; | 1251 | return 0; |
1777 | } | 1252 | } |
1778 | 1253 | ||
1779 | #ifdef __KVM_HAVE_MSIX | ||
1780 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
1781 | struct kvm_assigned_msix_nr *entry_nr) | ||
1782 | { | ||
1783 | int r = 0; | ||
1784 | struct kvm_assigned_dev_kernel *adev; | ||
1785 | |||
1786 | mutex_lock(&kvm->lock); | ||
1787 | |||
1788 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
1789 | entry_nr->assigned_dev_id); | ||
1790 | if (!adev) { | ||
1791 | r = -EINVAL; | ||
1792 | goto msix_nr_out; | ||
1793 | } | ||
1794 | |||
1795 | if (adev->entries_nr == 0) { | ||
1796 | adev->entries_nr = entry_nr->entry_nr; | ||
1797 | if (adev->entries_nr == 0 || | ||
1798 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
1799 | r = -EINVAL; | ||
1800 | goto msix_nr_out; | ||
1801 | } | ||
1802 | |||
1803 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
1804 | entry_nr->entry_nr, | ||
1805 | GFP_KERNEL); | ||
1806 | if (!adev->host_msix_entries) { | ||
1807 | r = -ENOMEM; | ||
1808 | goto msix_nr_out; | ||
1809 | } | ||
1810 | adev->guest_msix_entries = kzalloc( | ||
1811 | sizeof(struct kvm_guest_msix_entry) * | ||
1812 | entry_nr->entry_nr, GFP_KERNEL); | ||
1813 | if (!adev->guest_msix_entries) { | ||
1814 | kfree(adev->host_msix_entries); | ||
1815 | r = -ENOMEM; | ||
1816 | goto msix_nr_out; | ||
1817 | } | ||
1818 | } else /* Not allowed set MSI-X number twice */ | ||
1819 | r = -EINVAL; | ||
1820 | msix_nr_out: | ||
1821 | mutex_unlock(&kvm->lock); | ||
1822 | return r; | ||
1823 | } | ||
1824 | |||
1825 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
1826 | struct kvm_assigned_msix_entry *entry) | ||
1827 | { | ||
1828 | int r = 0, i; | ||
1829 | struct kvm_assigned_dev_kernel *adev; | ||
1830 | |||
1831 | mutex_lock(&kvm->lock); | ||
1832 | |||
1833 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
1834 | entry->assigned_dev_id); | ||
1835 | |||
1836 | if (!adev) { | ||
1837 | r = -EINVAL; | ||
1838 | goto msix_entry_out; | ||
1839 | } | ||
1840 | |||
1841 | for (i = 0; i < adev->entries_nr; i++) | ||
1842 | if (adev->guest_msix_entries[i].vector == 0 || | ||
1843 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
1844 | adev->guest_msix_entries[i].entry = entry->entry; | ||
1845 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
1846 | adev->host_msix_entries[i].entry = entry->entry; | ||
1847 | break; | ||
1848 | } | ||
1849 | if (i == adev->entries_nr) { | ||
1850 | r = -ENOSPC; | ||
1851 | goto msix_entry_out; | ||
1852 | } | ||
1853 | |||
1854 | msix_entry_out: | ||
1855 | mutex_unlock(&kvm->lock); | ||
1856 | |||
1857 | return r; | ||
1858 | } | ||
1859 | #endif | ||
1860 | |||
1861 | static long kvm_vcpu_ioctl(struct file *filp, | 1254 | static long kvm_vcpu_ioctl(struct file *filp, |
1862 | unsigned int ioctl, unsigned long arg) | 1255 | unsigned int ioctl, unsigned long arg) |
1863 | { | 1256 | { |
@@ -2116,118 +1509,89 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2116 | break; | 1509 | break; |
2117 | } | 1510 | } |
2118 | #endif | 1511 | #endif |
2119 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | 1512 | case KVM_IRQFD: { |
2120 | case KVM_ASSIGN_PCI_DEVICE: { | 1513 | struct kvm_irqfd data; |
2121 | struct kvm_assigned_pci_dev assigned_dev; | ||
2122 | 1514 | ||
2123 | r = -EFAULT; | 1515 | r = -EFAULT; |
2124 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | 1516 | if (copy_from_user(&data, argp, sizeof data)) |
2125 | goto out; | ||
2126 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
2127 | if (r) | ||
2128 | goto out; | 1517 | goto out; |
1518 | r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); | ||
2129 | break; | 1519 | break; |
2130 | } | 1520 | } |
2131 | case KVM_ASSIGN_IRQ: { | 1521 | case KVM_IOEVENTFD: { |
2132 | r = -EOPNOTSUPP; | 1522 | struct kvm_ioeventfd data; |
2133 | break; | ||
2134 | } | ||
2135 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
2136 | case KVM_ASSIGN_DEV_IRQ: { | ||
2137 | struct kvm_assigned_irq assigned_irq; | ||
2138 | 1523 | ||
2139 | r = -EFAULT; | 1524 | r = -EFAULT; |
2140 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | 1525 | if (copy_from_user(&data, argp, sizeof data)) |
2141 | goto out; | ||
2142 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
2143 | if (r) | ||
2144 | goto out; | 1526 | goto out; |
1527 | r = kvm_ioeventfd(kvm, &data); | ||
2145 | break; | 1528 | break; |
2146 | } | 1529 | } |
2147 | case KVM_DEASSIGN_DEV_IRQ: { | 1530 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
2148 | struct kvm_assigned_irq assigned_irq; | 1531 | case KVM_SET_BOOT_CPU_ID: |
2149 | 1532 | r = 0; | |
2150 | r = -EFAULT; | 1533 | mutex_lock(&kvm->lock); |
2151 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | 1534 | if (atomic_read(&kvm->online_vcpus) != 0) |
2152 | goto out; | 1535 | r = -EBUSY; |
2153 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | 1536 | else |
2154 | if (r) | 1537 | kvm->bsp_vcpu_id = arg; |
2155 | goto out; | 1538 | mutex_unlock(&kvm->lock); |
2156 | break; | 1539 | break; |
2157 | } | ||
2158 | #endif | ||
2159 | #endif | 1540 | #endif |
2160 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | 1541 | default: |
2161 | case KVM_DEASSIGN_PCI_DEVICE: { | 1542 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
2162 | struct kvm_assigned_pci_dev assigned_dev; | 1543 | if (r == -ENOTTY) |
2163 | 1544 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); | |
2164 | r = -EFAULT; | ||
2165 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
2166 | goto out; | ||
2167 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
2168 | if (r) | ||
2169 | goto out; | ||
2170 | break; | ||
2171 | } | 1545 | } |
2172 | #endif | 1546 | out: |
2173 | #ifdef KVM_CAP_IRQ_ROUTING | 1547 | return r; |
2174 | case KVM_SET_GSI_ROUTING: { | 1548 | } |
2175 | struct kvm_irq_routing routing; | 1549 | |
2176 | struct kvm_irq_routing __user *urouting; | 1550 | #ifdef CONFIG_COMPAT |
2177 | struct kvm_irq_routing_entry *entries; | 1551 | struct compat_kvm_dirty_log { |
1552 | __u32 slot; | ||
1553 | __u32 padding1; | ||
1554 | union { | ||
1555 | compat_uptr_t dirty_bitmap; /* one bit per page */ | ||
1556 | __u64 padding2; | ||
1557 | }; | ||
1558 | }; | ||
1559 | |||
1560 | static long kvm_vm_compat_ioctl(struct file *filp, | ||
1561 | unsigned int ioctl, unsigned long arg) | ||
1562 | { | ||
1563 | struct kvm *kvm = filp->private_data; | ||
1564 | int r; | ||
1565 | |||
1566 | if (kvm->mm != current->mm) | ||
1567 | return -EIO; | ||
1568 | switch (ioctl) { | ||
1569 | case KVM_GET_DIRTY_LOG: { | ||
1570 | struct compat_kvm_dirty_log compat_log; | ||
1571 | struct kvm_dirty_log log; | ||
2178 | 1572 | ||
2179 | r = -EFAULT; | 1573 | r = -EFAULT; |
2180 | if (copy_from_user(&routing, argp, sizeof(routing))) | 1574 | if (copy_from_user(&compat_log, (void __user *)arg, |
2181 | goto out; | 1575 | sizeof(compat_log))) |
2182 | r = -EINVAL; | ||
2183 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
2184 | goto out; | ||
2185 | if (routing.flags) | ||
2186 | goto out; | ||
2187 | r = -ENOMEM; | ||
2188 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
2189 | if (!entries) | ||
2190 | goto out; | ||
2191 | r = -EFAULT; | ||
2192 | urouting = argp; | ||
2193 | if (copy_from_user(entries, urouting->entries, | ||
2194 | routing.nr * sizeof(*entries))) | ||
2195 | goto out_free_irq_routing; | ||
2196 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
2197 | routing.flags); | ||
2198 | out_free_irq_routing: | ||
2199 | vfree(entries); | ||
2200 | break; | ||
2201 | } | ||
2202 | #ifdef __KVM_HAVE_MSIX | ||
2203 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
2204 | struct kvm_assigned_msix_nr entry_nr; | ||
2205 | r = -EFAULT; | ||
2206 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
2207 | goto out; | ||
2208 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
2209 | if (r) | ||
2210 | goto out; | ||
2211 | break; | ||
2212 | } | ||
2213 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
2214 | struct kvm_assigned_msix_entry entry; | ||
2215 | r = -EFAULT; | ||
2216 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
2217 | goto out; | 1576 | goto out; |
2218 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | 1577 | log.slot = compat_log.slot; |
1578 | log.padding1 = compat_log.padding1; | ||
1579 | log.padding2 = compat_log.padding2; | ||
1580 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); | ||
1581 | |||
1582 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | ||
2219 | if (r) | 1583 | if (r) |
2220 | goto out; | 1584 | goto out; |
2221 | break; | 1585 | break; |
2222 | } | 1586 | } |
2223 | #endif | ||
2224 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
2225 | default: | 1587 | default: |
2226 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1588 | r = kvm_vm_ioctl(filp, ioctl, arg); |
2227 | } | 1589 | } |
1590 | |||
2228 | out: | 1591 | out: |
2229 | return r; | 1592 | return r; |
2230 | } | 1593 | } |
1594 | #endif | ||
2231 | 1595 | ||
2232 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1596 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
2233 | { | 1597 | { |
@@ -2250,7 +1614,7 @@ static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
2250 | return 0; | 1614 | return 0; |
2251 | } | 1615 | } |
2252 | 1616 | ||
2253 | static struct vm_operations_struct kvm_vm_vm_ops = { | 1617 | static const struct vm_operations_struct kvm_vm_vm_ops = { |
2254 | .fault = kvm_vm_fault, | 1618 | .fault = kvm_vm_fault, |
2255 | }; | 1619 | }; |
2256 | 1620 | ||
@@ -2263,7 +1627,9 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
2263 | static struct file_operations kvm_vm_fops = { | 1627 | static struct file_operations kvm_vm_fops = { |
2264 | .release = kvm_vm_release, | 1628 | .release = kvm_vm_release, |
2265 | .unlocked_ioctl = kvm_vm_ioctl, | 1629 | .unlocked_ioctl = kvm_vm_ioctl, |
2266 | .compat_ioctl = kvm_vm_ioctl, | 1630 | #ifdef CONFIG_COMPAT |
1631 | .compat_ioctl = kvm_vm_compat_ioctl, | ||
1632 | #endif | ||
2267 | .mmap = kvm_vm_mmap, | 1633 | .mmap = kvm_vm_mmap, |
2268 | }; | 1634 | }; |
2269 | 1635 | ||
@@ -2275,7 +1641,7 @@ static int kvm_dev_ioctl_create_vm(void) | |||
2275 | kvm = kvm_create_vm(); | 1641 | kvm = kvm_create_vm(); |
2276 | if (IS_ERR(kvm)) | 1642 | if (IS_ERR(kvm)) |
2277 | return PTR_ERR(kvm); | 1643 | return PTR_ERR(kvm); |
2278 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0); | 1644 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); |
2279 | if (fd < 0) | 1645 | if (fd < 0) |
2280 | kvm_put_kvm(kvm); | 1646 | kvm_put_kvm(kvm); |
2281 | 1647 | ||
@@ -2288,6 +1654,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) | |||
2288 | case KVM_CAP_USER_MEMORY: | 1654 | case KVM_CAP_USER_MEMORY: |
2289 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: | 1655 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: |
2290 | case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: | 1656 | case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: |
1657 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
1658 | case KVM_CAP_SET_BOOT_CPU_ID: | ||
1659 | #endif | ||
1660 | case KVM_CAP_INTERNAL_ERROR_DATA: | ||
2291 | return 1; | 1661 | return 1; |
2292 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 1662 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
2293 | case KVM_CAP_IRQ_ROUTING: | 1663 | case KVM_CAP_IRQ_ROUTING: |
@@ -2335,7 +1705,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2335 | case KVM_TRACE_ENABLE: | 1705 | case KVM_TRACE_ENABLE: |
2336 | case KVM_TRACE_PAUSE: | 1706 | case KVM_TRACE_PAUSE: |
2337 | case KVM_TRACE_DISABLE: | 1707 | case KVM_TRACE_DISABLE: |
2338 | r = kvm_trace_ioctl(ioctl, arg); | 1708 | r = -EOPNOTSUPP; |
2339 | break; | 1709 | break; |
2340 | default: | 1710 | default: |
2341 | return kvm_arch_dev_ioctl(filp, ioctl, arg); | 1711 | return kvm_arch_dev_ioctl(filp, ioctl, arg); |
@@ -2358,11 +1728,21 @@ static struct miscdevice kvm_dev = { | |||
2358 | static void hardware_enable(void *junk) | 1728 | static void hardware_enable(void *junk) |
2359 | { | 1729 | { |
2360 | int cpu = raw_smp_processor_id(); | 1730 | int cpu = raw_smp_processor_id(); |
1731 | int r; | ||
2361 | 1732 | ||
2362 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 1733 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
2363 | return; | 1734 | return; |
1735 | |||
2364 | cpumask_set_cpu(cpu, cpus_hardware_enabled); | 1736 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
2365 | kvm_arch_hardware_enable(NULL); | 1737 | |
1738 | r = kvm_arch_hardware_enable(NULL); | ||
1739 | |||
1740 | if (r) { | ||
1741 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | ||
1742 | atomic_inc(&hardware_enable_failed); | ||
1743 | printk(KERN_INFO "kvm: enabling virtualization on " | ||
1744 | "CPU%d failed\n", cpu); | ||
1745 | } | ||
2366 | } | 1746 | } |
2367 | 1747 | ||
2368 | static void hardware_disable(void *junk) | 1748 | static void hardware_disable(void *junk) |
@@ -2375,11 +1755,52 @@ static void hardware_disable(void *junk) | |||
2375 | kvm_arch_hardware_disable(NULL); | 1755 | kvm_arch_hardware_disable(NULL); |
2376 | } | 1756 | } |
2377 | 1757 | ||
1758 | static void hardware_disable_all_nolock(void) | ||
1759 | { | ||
1760 | BUG_ON(!kvm_usage_count); | ||
1761 | |||
1762 | kvm_usage_count--; | ||
1763 | if (!kvm_usage_count) | ||
1764 | on_each_cpu(hardware_disable, NULL, 1); | ||
1765 | } | ||
1766 | |||
1767 | static void hardware_disable_all(void) | ||
1768 | { | ||
1769 | spin_lock(&kvm_lock); | ||
1770 | hardware_disable_all_nolock(); | ||
1771 | spin_unlock(&kvm_lock); | ||
1772 | } | ||
1773 | |||
1774 | static int hardware_enable_all(void) | ||
1775 | { | ||
1776 | int r = 0; | ||
1777 | |||
1778 | spin_lock(&kvm_lock); | ||
1779 | |||
1780 | kvm_usage_count++; | ||
1781 | if (kvm_usage_count == 1) { | ||
1782 | atomic_set(&hardware_enable_failed, 0); | ||
1783 | on_each_cpu(hardware_enable, NULL, 1); | ||
1784 | |||
1785 | if (atomic_read(&hardware_enable_failed)) { | ||
1786 | hardware_disable_all_nolock(); | ||
1787 | r = -EBUSY; | ||
1788 | } | ||
1789 | } | ||
1790 | |||
1791 | spin_unlock(&kvm_lock); | ||
1792 | |||
1793 | return r; | ||
1794 | } | ||
1795 | |||
2378 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 1796 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
2379 | void *v) | 1797 | void *v) |
2380 | { | 1798 | { |
2381 | int cpu = (long)v; | 1799 | int cpu = (long)v; |
2382 | 1800 | ||
1801 | if (!kvm_usage_count) | ||
1802 | return NOTIFY_OK; | ||
1803 | |||
2383 | val &= ~CPU_TASKS_FROZEN; | 1804 | val &= ~CPU_TASKS_FROZEN; |
2384 | switch (val) { | 1805 | switch (val) { |
2385 | case CPU_DYING: | 1806 | case CPU_DYING: |
@@ -2449,26 +1870,71 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
2449 | } | 1870 | } |
2450 | } | 1871 | } |
2451 | 1872 | ||
2452 | struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, | 1873 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
2453 | gpa_t addr, int len, int is_write) | 1874 | int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, |
1875 | int len, const void *val) | ||
2454 | { | 1876 | { |
2455 | int i; | 1877 | int i; |
1878 | for (i = 0; i < bus->dev_count; i++) | ||
1879 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | ||
1880 | return 0; | ||
1881 | return -EOPNOTSUPP; | ||
1882 | } | ||
2456 | 1883 | ||
2457 | for (i = 0; i < bus->dev_count; i++) { | 1884 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
2458 | struct kvm_io_device *pos = bus->devs[i]; | 1885 | int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) |
1886 | { | ||
1887 | int i; | ||
1888 | for (i = 0; i < bus->dev_count; i++) | ||
1889 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | ||
1890 | return 0; | ||
1891 | return -EOPNOTSUPP; | ||
1892 | } | ||
2459 | 1893 | ||
2460 | if (pos->in_range(pos, addr, len, is_write)) | 1894 | int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, |
2461 | return pos; | 1895 | struct kvm_io_device *dev) |
2462 | } | 1896 | { |
1897 | int ret; | ||
2463 | 1898 | ||
2464 | return NULL; | 1899 | down_write(&kvm->slots_lock); |
1900 | ret = __kvm_io_bus_register_dev(bus, dev); | ||
1901 | up_write(&kvm->slots_lock); | ||
1902 | |||
1903 | return ret; | ||
2465 | } | 1904 | } |
2466 | 1905 | ||
2467 | void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) | 1906 | /* An unlocked version. Caller must have write lock on slots_lock. */ |
1907 | int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, | ||
1908 | struct kvm_io_device *dev) | ||
2468 | { | 1909 | { |
2469 | BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); | 1910 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
1911 | return -ENOSPC; | ||
2470 | 1912 | ||
2471 | bus->devs[bus->dev_count++] = dev; | 1913 | bus->devs[bus->dev_count++] = dev; |
1914 | |||
1915 | return 0; | ||
1916 | } | ||
1917 | |||
1918 | void kvm_io_bus_unregister_dev(struct kvm *kvm, | ||
1919 | struct kvm_io_bus *bus, | ||
1920 | struct kvm_io_device *dev) | ||
1921 | { | ||
1922 | down_write(&kvm->slots_lock); | ||
1923 | __kvm_io_bus_unregister_dev(bus, dev); | ||
1924 | up_write(&kvm->slots_lock); | ||
1925 | } | ||
1926 | |||
1927 | /* An unlocked version. Caller must have write lock on slots_lock. */ | ||
1928 | void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, | ||
1929 | struct kvm_io_device *dev) | ||
1930 | { | ||
1931 | int i; | ||
1932 | |||
1933 | for (i = 0; i < bus->dev_count; i++) | ||
1934 | if (bus->devs[i] == dev) { | ||
1935 | bus->devs[i] = bus->devs[--bus->dev_count]; | ||
1936 | break; | ||
1937 | } | ||
2472 | } | 1938 | } |
2473 | 1939 | ||
2474 | static struct notifier_block kvm_cpu_notifier = { | 1940 | static struct notifier_block kvm_cpu_notifier = { |
@@ -2501,18 +1967,16 @@ static int vcpu_stat_get(void *_offset, u64 *val) | |||
2501 | *val = 0; | 1967 | *val = 0; |
2502 | spin_lock(&kvm_lock); | 1968 | spin_lock(&kvm_lock); |
2503 | list_for_each_entry(kvm, &vm_list, vm_list) | 1969 | list_for_each_entry(kvm, &vm_list, vm_list) |
2504 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 1970 | kvm_for_each_vcpu(i, vcpu, kvm) |
2505 | vcpu = kvm->vcpus[i]; | 1971 | *val += *(u32 *)((void *)vcpu + offset); |
2506 | if (vcpu) | 1972 | |
2507 | *val += *(u32 *)((void *)vcpu + offset); | ||
2508 | } | ||
2509 | spin_unlock(&kvm_lock); | 1973 | spin_unlock(&kvm_lock); |
2510 | return 0; | 1974 | return 0; |
2511 | } | 1975 | } |
2512 | 1976 | ||
2513 | DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); | 1977 | DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); |
2514 | 1978 | ||
2515 | static struct file_operations *stat_fops[] = { | 1979 | static const struct file_operations *stat_fops[] = { |
2516 | [KVM_STAT_VCPU] = &vcpu_stat_fops, | 1980 | [KVM_STAT_VCPU] = &vcpu_stat_fops, |
2517 | [KVM_STAT_VM] = &vm_stat_fops, | 1981 | [KVM_STAT_VM] = &vm_stat_fops, |
2518 | }; | 1982 | }; |
@@ -2539,13 +2003,15 @@ static void kvm_exit_debug(void) | |||
2539 | 2003 | ||
2540 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 2004 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
2541 | { | 2005 | { |
2542 | hardware_disable(NULL); | 2006 | if (kvm_usage_count) |
2007 | hardware_disable(NULL); | ||
2543 | return 0; | 2008 | return 0; |
2544 | } | 2009 | } |
2545 | 2010 | ||
2546 | static int kvm_resume(struct sys_device *dev) | 2011 | static int kvm_resume(struct sys_device *dev) |
2547 | { | 2012 | { |
2548 | hardware_enable(NULL); | 2013 | if (kvm_usage_count) |
2014 | hardware_enable(NULL); | ||
2549 | return 0; | 2015 | return 0; |
2550 | } | 2016 | } |
2551 | 2017 | ||
@@ -2590,8 +2056,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2590 | int r; | 2056 | int r; |
2591 | int cpu; | 2057 | int cpu; |
2592 | 2058 | ||
2593 | kvm_init_debug(); | ||
2594 | |||
2595 | r = kvm_arch_init(opaque); | 2059 | r = kvm_arch_init(opaque); |
2596 | if (r) | 2060 | if (r) |
2597 | goto out_fail; | 2061 | goto out_fail; |
@@ -2622,7 +2086,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2622 | goto out_free_1; | 2086 | goto out_free_1; |
2623 | } | 2087 | } |
2624 | 2088 | ||
2625 | on_each_cpu(hardware_enable, NULL, 1); | ||
2626 | r = register_cpu_notifier(&kvm_cpu_notifier); | 2089 | r = register_cpu_notifier(&kvm_cpu_notifier); |
2627 | if (r) | 2090 | if (r) |
2628 | goto out_free_2; | 2091 | goto out_free_2; |
@@ -2658,6 +2121,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2658 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2121 | kvm_preempt_ops.sched_in = kvm_sched_in; |
2659 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2122 | kvm_preempt_ops.sched_out = kvm_sched_out; |
2660 | 2123 | ||
2124 | kvm_init_debug(); | ||
2125 | |||
2661 | return 0; | 2126 | return 0; |
2662 | 2127 | ||
2663 | out_free: | 2128 | out_free: |
@@ -2670,7 +2135,6 @@ out_free_3: | |||
2670 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2135 | unregister_reboot_notifier(&kvm_reboot_notifier); |
2671 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2136 | unregister_cpu_notifier(&kvm_cpu_notifier); |
2672 | out_free_2: | 2137 | out_free_2: |
2673 | on_each_cpu(hardware_disable, NULL, 1); | ||
2674 | out_free_1: | 2138 | out_free_1: |
2675 | kvm_arch_hardware_unsetup(); | 2139 | kvm_arch_hardware_unsetup(); |
2676 | out_free_0a: | 2140 | out_free_0a: |
@@ -2679,7 +2143,6 @@ out_free_0: | |||
2679 | __free_page(bad_page); | 2143 | __free_page(bad_page); |
2680 | out: | 2144 | out: |
2681 | kvm_arch_exit(); | 2145 | kvm_arch_exit(); |
2682 | kvm_exit_debug(); | ||
2683 | out_fail: | 2146 | out_fail: |
2684 | return r; | 2147 | return r; |
2685 | } | 2148 | } |
@@ -2687,7 +2150,8 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
2687 | 2150 | ||
2688 | void kvm_exit(void) | 2151 | void kvm_exit(void) |
2689 | { | 2152 | { |
2690 | kvm_trace_cleanup(); | 2153 | tracepoint_synchronize_unregister(); |
2154 | kvm_exit_debug(); | ||
2691 | misc_deregister(&kvm_dev); | 2155 | misc_deregister(&kvm_dev); |
2692 | kmem_cache_destroy(kvm_vcpu_cache); | 2156 | kmem_cache_destroy(kvm_vcpu_cache); |
2693 | sysdev_unregister(&kvm_sysdev); | 2157 | sysdev_unregister(&kvm_sysdev); |
@@ -2697,7 +2161,6 @@ void kvm_exit(void) | |||
2697 | on_each_cpu(hardware_disable, NULL, 1); | 2161 | on_each_cpu(hardware_disable, NULL, 1); |
2698 | kvm_arch_hardware_unsetup(); | 2162 | kvm_arch_hardware_unsetup(); |
2699 | kvm_arch_exit(); | 2163 | kvm_arch_exit(); |
2700 | kvm_exit_debug(); | ||
2701 | free_cpumask_var(cpus_hardware_enabled); | 2164 | free_cpumask_var(cpus_hardware_enabled); |
2702 | __free_page(bad_page); | 2165 | __free_page(bad_page); |
2703 | } | 2166 | } |