aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c681
1 files changed, 489 insertions, 192 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1ecbe2391c8b..e21194566b71 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,8 @@
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/mman.h> 42#include <linux/mman.h>
43#include <linux/swap.h> 43#include <linux/swap.h>
44#include <linux/bitops.h>
45#include <linux/spinlock.h>
44 46
45#include <asm/processor.h> 47#include <asm/processor.h>
46#include <asm/io.h> 48#include <asm/io.h>
@@ -60,9 +62,6 @@
60MODULE_AUTHOR("Qumranet"); 62MODULE_AUTHOR("Qumranet");
61MODULE_LICENSE("GPL"); 63MODULE_LICENSE("GPL");
62 64
63static int msi2intx = 1;
64module_param(msi2intx, bool, 0);
65
66DEFINE_SPINLOCK(kvm_lock); 65DEFINE_SPINLOCK(kvm_lock);
67LIST_HEAD(vm_list); 66LIST_HEAD(vm_list);
68 67
@@ -95,38 +94,96 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
95 return NULL; 94 return NULL;
96} 95}
97 96
97static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
98 *assigned_dev, int irq)
99{
100 int i, index;
101 struct msix_entry *host_msix_entries;
102
103 host_msix_entries = assigned_dev->host_msix_entries;
104
105 index = -1;
106 for (i = 0; i < assigned_dev->entries_nr; i++)
107 if (irq == host_msix_entries[i].vector) {
108 index = i;
109 break;
110 }
111 if (index < 0) {
112 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
113 return 0;
114 }
115
116 return index;
117}
118
98static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) 119static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
99{ 120{
100 struct kvm_assigned_dev_kernel *assigned_dev; 121 struct kvm_assigned_dev_kernel *assigned_dev;
122 struct kvm *kvm;
123 int irq, i;
101 124
102 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, 125 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
103 interrupt_work); 126 interrupt_work);
127 kvm = assigned_dev->kvm;
104 128
105 /* This is taken to safely inject irq inside the guest. When 129 /* This is taken to safely inject irq inside the guest. When
106 * the interrupt injection (or the ioapic code) uses a 130 * the interrupt injection (or the ioapic code) uses a
107 * finer-grained lock, update this 131 * finer-grained lock, update this
108 */ 132 */
109 mutex_lock(&assigned_dev->kvm->lock); 133 mutex_lock(&kvm->lock);
110 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 134 spin_lock_irq(&assigned_dev->assigned_dev_lock);
111 assigned_dev->guest_irq, 1); 135 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
112 136 struct kvm_guest_msix_entry *guest_entries =
113 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { 137 assigned_dev->guest_msix_entries;
114 enable_irq(assigned_dev->host_irq); 138 for (i = 0; i < assigned_dev->entries_nr; i++) {
115 assigned_dev->host_irq_disabled = false; 139 if (!(guest_entries[i].flags &
140 KVM_ASSIGNED_MSIX_PENDING))
141 continue;
142 guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
143 kvm_set_irq(assigned_dev->kvm,
144 assigned_dev->irq_source_id,
145 guest_entries[i].vector, 1);
146 irq = assigned_dev->host_msix_entries[i].vector;
147 if (irq != 0)
148 enable_irq(irq);
149 assigned_dev->host_irq_disabled = false;
150 }
151 } else {
152 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
153 assigned_dev->guest_irq, 1);
154 if (assigned_dev->irq_requested_type &
155 KVM_DEV_IRQ_GUEST_MSI) {
156 enable_irq(assigned_dev->host_irq);
157 assigned_dev->host_irq_disabled = false;
158 }
116 } 159 }
160
161 spin_unlock_irq(&assigned_dev->assigned_dev_lock);
117 mutex_unlock(&assigned_dev->kvm->lock); 162 mutex_unlock(&assigned_dev->kvm->lock);
118} 163}
119 164
120static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) 165static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
121{ 166{
167 unsigned long flags;
122 struct kvm_assigned_dev_kernel *assigned_dev = 168 struct kvm_assigned_dev_kernel *assigned_dev =
123 (struct kvm_assigned_dev_kernel *) dev_id; 169 (struct kvm_assigned_dev_kernel *) dev_id;
124 170
171 spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
172 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
173 int index = find_index_from_host_irq(assigned_dev, irq);
174 if (index < 0)
175 goto out;
176 assigned_dev->guest_msix_entries[index].flags |=
177 KVM_ASSIGNED_MSIX_PENDING;
178 }
179
125 schedule_work(&assigned_dev->interrupt_work); 180 schedule_work(&assigned_dev->interrupt_work);
126 181
127 disable_irq_nosync(irq); 182 disable_irq_nosync(irq);
128 assigned_dev->host_irq_disabled = true; 183 assigned_dev->host_irq_disabled = true;
129 184
185out:
186 spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
130 return IRQ_HANDLED; 187 return IRQ_HANDLED;
131} 188}
132 189
@@ -134,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
134static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 191static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
135{ 192{
136 struct kvm_assigned_dev_kernel *dev; 193 struct kvm_assigned_dev_kernel *dev;
194 unsigned long flags;
137 195
138 if (kian->gsi == -1) 196 if (kian->gsi == -1)
139 return; 197 return;
@@ -146,28 +204,30 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
146 /* The guest irq may be shared so this ack may be 204 /* The guest irq may be shared so this ack may be
147 * from another device. 205 * from another device.
148 */ 206 */
207 spin_lock_irqsave(&dev->assigned_dev_lock, flags);
149 if (dev->host_irq_disabled) { 208 if (dev->host_irq_disabled) {
150 enable_irq(dev->host_irq); 209 enable_irq(dev->host_irq);
151 dev->host_irq_disabled = false; 210 dev->host_irq_disabled = false;
152 } 211 }
212 spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
153} 213}
154 214
155/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ 215static void deassign_guest_irq(struct kvm *kvm,
156static void kvm_free_assigned_irq(struct kvm *kvm, 216 struct kvm_assigned_dev_kernel *assigned_dev)
157 struct kvm_assigned_dev_kernel *assigned_dev)
158{ 217{
159 if (!irqchip_in_kernel(kvm))
160 return;
161
162 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); 218 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
219 assigned_dev->ack_notifier.gsi = -1;
163 220
164 if (assigned_dev->irq_source_id != -1) 221 if (assigned_dev->irq_source_id != -1)
165 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 222 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
166 assigned_dev->irq_source_id = -1; 223 assigned_dev->irq_source_id = -1;
224 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
225}
167 226
168 if (!assigned_dev->irq_requested_type) 227/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
169 return; 228static void deassign_host_irq(struct kvm *kvm,
170 229 struct kvm_assigned_dev_kernel *assigned_dev)
230{
171 /* 231 /*
172 * In kvm_free_device_irq, cancel_work_sync return true if: 232 * In kvm_free_device_irq, cancel_work_sync return true if:
173 * 1. work is scheduled, and then cancelled. 233 * 1. work is scheduled, and then cancelled.
@@ -184,17 +244,64 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
184 * now, the kvm state is still legal for probably we also have to wait 244 * now, the kvm state is still legal for probably we also have to wait
185 * interrupt_work done. 245 * interrupt_work done.
186 */ 246 */
187 disable_irq_nosync(assigned_dev->host_irq); 247 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
188 cancel_work_sync(&assigned_dev->interrupt_work); 248 int i;
249 for (i = 0; i < assigned_dev->entries_nr; i++)
250 disable_irq_nosync(assigned_dev->
251 host_msix_entries[i].vector);
252
253 cancel_work_sync(&assigned_dev->interrupt_work);
254
255 for (i = 0; i < assigned_dev->entries_nr; i++)
256 free_irq(assigned_dev->host_msix_entries[i].vector,
257 (void *)assigned_dev);
258
259 assigned_dev->entries_nr = 0;
260 kfree(assigned_dev->host_msix_entries);
261 kfree(assigned_dev->guest_msix_entries);
262 pci_disable_msix(assigned_dev->dev);
263 } else {
264 /* Deal with MSI and INTx */
265 disable_irq_nosync(assigned_dev->host_irq);
266 cancel_work_sync(&assigned_dev->interrupt_work);
189 267
190 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 268 free_irq(assigned_dev->host_irq, (void *)assigned_dev);
191 269
192 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 270 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
193 pci_disable_msi(assigned_dev->dev); 271 pci_disable_msi(assigned_dev->dev);
272 }
194 273
195 assigned_dev->irq_requested_type = 0; 274 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
196} 275}
197 276
277static int kvm_deassign_irq(struct kvm *kvm,
278 struct kvm_assigned_dev_kernel *assigned_dev,
279 unsigned long irq_requested_type)
280{
281 unsigned long guest_irq_type, host_irq_type;
282
283 if (!irqchip_in_kernel(kvm))
284 return -EINVAL;
285 /* no irq assignment to deassign */
286 if (!assigned_dev->irq_requested_type)
287 return -ENXIO;
288
289 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
290 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
291
292 if (host_irq_type)
293 deassign_host_irq(kvm, assigned_dev);
294 if (guest_irq_type)
295 deassign_guest_irq(kvm, assigned_dev);
296
297 return 0;
298}
299
300static void kvm_free_assigned_irq(struct kvm *kvm,
301 struct kvm_assigned_dev_kernel *assigned_dev)
302{
303 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
304}
198 305
199static void kvm_free_assigned_device(struct kvm *kvm, 306static void kvm_free_assigned_device(struct kvm *kvm,
200 struct kvm_assigned_dev_kernel 307 struct kvm_assigned_dev_kernel
@@ -226,190 +333,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
226 } 333 }
227} 334}
228 335
229static int assigned_device_update_intx(struct kvm *kvm, 336static int assigned_device_enable_host_intx(struct kvm *kvm,
230 struct kvm_assigned_dev_kernel *adev, 337 struct kvm_assigned_dev_kernel *dev)
231 struct kvm_assigned_irq *airq)
232{ 338{
233 adev->guest_irq = airq->guest_irq; 339 dev->host_irq = dev->dev->irq;
234 adev->ack_notifier.gsi = airq->guest_irq; 340 /* Even though this is PCI, we don't want to use shared
341 * interrupts. Sharing host devices with guest-assigned devices
342 * on the same interrupt line is not a happy situation: there
343 * are going to be long delays in accepting, acking, etc.
344 */
345 if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
346 0, "kvm_assigned_intx_device", (void *)dev))
347 return -EIO;
348 return 0;
349}
235 350
236 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) 351#ifdef __KVM_HAVE_MSI
237 return 0; 352static int assigned_device_enable_host_msi(struct kvm *kvm,
353 struct kvm_assigned_dev_kernel *dev)
354{
355 int r;
238 356
239 if (irqchip_in_kernel(kvm)) { 357 if (!dev->dev->msi_enabled) {
240 if (!msi2intx && 358 r = pci_enable_msi(dev->dev);
241 (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { 359 if (r)
242 free_irq(adev->host_irq, (void *)adev); 360 return r;
243 pci_disable_msi(adev->dev); 361 }
244 }
245 362
246 if (!capable(CAP_SYS_RAWIO)) 363 dev->host_irq = dev->dev->irq;
247 return -EPERM; 364 if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
365 "kvm_assigned_msi_device", (void *)dev)) {
366 pci_disable_msi(dev->dev);
367 return -EIO;
368 }
248 369
249 if (airq->host_irq) 370 return 0;
250 adev->host_irq = airq->host_irq; 371}
251 else 372#endif
252 adev->host_irq = adev->dev->irq;
253 373
254 /* Even though this is PCI, we don't want to use shared 374#ifdef __KVM_HAVE_MSIX
255 * interrupts. Sharing host devices with guest-assigned devices 375static int assigned_device_enable_host_msix(struct kvm *kvm,
256 * on the same interrupt line is not a happy situation: there 376 struct kvm_assigned_dev_kernel *dev)
257 * are going to be long delays in accepting, acking, etc. 377{
258 */ 378 int i, r = -EINVAL;
259 if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 379
260 0, "kvm_assigned_intx_device", (void *)adev)) 380 /* host_msix_entries and guest_msix_entries should have been
261 return -EIO; 381 * initialized */
382 if (dev->entries_nr == 0)
383 return r;
384
385 r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
386 if (r)
387 return r;
388
389 for (i = 0; i < dev->entries_nr; i++) {
390 r = request_irq(dev->host_msix_entries[i].vector,
391 kvm_assigned_dev_intr, 0,
392 "kvm_assigned_msix_device",
393 (void *)dev);
394 /* FIXME: free requested_irq's on failure */
395 if (r)
396 return r;
262 } 397 }
263 398
264 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
265 KVM_ASSIGNED_DEV_HOST_INTX;
266 return 0; 399 return 0;
267} 400}
268 401
269#ifdef CONFIG_X86 402#endif
270static int assigned_device_update_msi(struct kvm *kvm, 403
271 struct kvm_assigned_dev_kernel *adev, 404static int assigned_device_enable_guest_intx(struct kvm *kvm,
272 struct kvm_assigned_irq *airq) 405 struct kvm_assigned_dev_kernel *dev,
406 struct kvm_assigned_irq *irq)
273{ 407{
274 int r; 408 dev->guest_irq = irq->guest_irq;
409 dev->ack_notifier.gsi = irq->guest_irq;
410 return 0;
411}
275 412
276 adev->guest_irq = airq->guest_irq; 413#ifdef __KVM_HAVE_MSI
277 if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { 414static int assigned_device_enable_guest_msi(struct kvm *kvm,
278 /* x86 don't care upper address of guest msi message addr */ 415 struct kvm_assigned_dev_kernel *dev,
279 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; 416 struct kvm_assigned_irq *irq)
280 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; 417{
281 adev->ack_notifier.gsi = -1; 418 dev->guest_irq = irq->guest_irq;
282 } else if (msi2intx) { 419 dev->ack_notifier.gsi = -1;
283 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; 420 return 0;
284 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; 421}
285 adev->ack_notifier.gsi = airq->guest_irq; 422#endif
286 } else { 423#ifdef __KVM_HAVE_MSIX
287 /* 424static int assigned_device_enable_guest_msix(struct kvm *kvm,
288 * Guest require to disable device MSI, we disable MSI and 425 struct kvm_assigned_dev_kernel *dev,
289 * re-enable INTx by default again. Notice it's only for 426 struct kvm_assigned_irq *irq)
290 * non-msi2intx. 427{
291 */ 428 dev->guest_irq = irq->guest_irq;
292 assigned_device_update_intx(kvm, adev, airq); 429 dev->ack_notifier.gsi = -1;
293 return 0; 430 return 0;
431}
432#endif
433
434static int assign_host_irq(struct kvm *kvm,
435 struct kvm_assigned_dev_kernel *dev,
436 __u32 host_irq_type)
437{
438 int r = -EEXIST;
439
440 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
441 return r;
442
443 switch (host_irq_type) {
444 case KVM_DEV_IRQ_HOST_INTX:
445 r = assigned_device_enable_host_intx(kvm, dev);
446 break;
447#ifdef __KVM_HAVE_MSI
448 case KVM_DEV_IRQ_HOST_MSI:
449 r = assigned_device_enable_host_msi(kvm, dev);
450 break;
451#endif
452#ifdef __KVM_HAVE_MSIX
453 case KVM_DEV_IRQ_HOST_MSIX:
454 r = assigned_device_enable_host_msix(kvm, dev);
455 break;
456#endif
457 default:
458 r = -EINVAL;
294 } 459 }
295 460
296 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 461 if (!r)
297 return 0; 462 dev->irq_requested_type |= host_irq_type;
298 463
299 if (irqchip_in_kernel(kvm)) { 464 return r;
300 if (!msi2intx) { 465}
301 if (adev->irq_requested_type &
302 KVM_ASSIGNED_DEV_HOST_INTX)
303 free_irq(adev->host_irq, (void *)adev);
304 466
305 r = pci_enable_msi(adev->dev); 467static int assign_guest_irq(struct kvm *kvm,
306 if (r) 468 struct kvm_assigned_dev_kernel *dev,
307 return r; 469 struct kvm_assigned_irq *irq,
308 } 470 unsigned long guest_irq_type)
471{
472 int id;
473 int r = -EEXIST;
474
475 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
476 return r;
309 477
310 adev->host_irq = adev->dev->irq; 478 id = kvm_request_irq_source_id(kvm);
311 if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, 479 if (id < 0)
312 "kvm_assigned_msi_device", (void *)adev)) 480 return id;
313 return -EIO; 481
482 dev->irq_source_id = id;
483
484 switch (guest_irq_type) {
485 case KVM_DEV_IRQ_GUEST_INTX:
486 r = assigned_device_enable_guest_intx(kvm, dev, irq);
487 break;
488#ifdef __KVM_HAVE_MSI
489 case KVM_DEV_IRQ_GUEST_MSI:
490 r = assigned_device_enable_guest_msi(kvm, dev, irq);
491 break;
492#endif
493#ifdef __KVM_HAVE_MSIX
494 case KVM_DEV_IRQ_GUEST_MSIX:
495 r = assigned_device_enable_guest_msix(kvm, dev, irq);
496 break;
497#endif
498 default:
499 r = -EINVAL;
314 } 500 }
315 501
316 if (!msi2intx) 502 if (!r) {
317 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; 503 dev->irq_requested_type |= guest_irq_type;
504 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
505 } else
506 kvm_free_irq_source_id(kvm, dev->irq_source_id);
318 507
319 adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; 508 return r;
320 return 0;
321} 509}
322#endif
323 510
511/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
324static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 512static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
325 struct kvm_assigned_irq 513 struct kvm_assigned_irq *assigned_irq)
326 *assigned_irq)
327{ 514{
328 int r = 0; 515 int r = -EINVAL;
329 struct kvm_assigned_dev_kernel *match; 516 struct kvm_assigned_dev_kernel *match;
330 u32 current_flags = 0, changed_flags; 517 unsigned long host_irq_type, guest_irq_type;
331 518
332 mutex_lock(&kvm->lock); 519 if (!capable(CAP_SYS_RAWIO))
520 return -EPERM;
333 521
522 if (!irqchip_in_kernel(kvm))
523 return r;
524
525 mutex_lock(&kvm->lock);
526 r = -ENODEV;
334 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 527 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
335 assigned_irq->assigned_dev_id); 528 assigned_irq->assigned_dev_id);
336 if (!match) { 529 if (!match)
337 mutex_unlock(&kvm->lock); 530 goto out;
338 return -EINVAL;
339 }
340
341 if (!match->irq_requested_type) {
342 INIT_WORK(&match->interrupt_work,
343 kvm_assigned_dev_interrupt_work_handler);
344 if (irqchip_in_kernel(kvm)) {
345 /* Register ack nofitier */
346 match->ack_notifier.gsi = -1;
347 match->ack_notifier.irq_acked =
348 kvm_assigned_dev_ack_irq;
349 kvm_register_irq_ack_notifier(kvm,
350 &match->ack_notifier);
351
352 /* Request IRQ source ID */
353 r = kvm_request_irq_source_id(kvm);
354 if (r < 0)
355 goto out_release;
356 else
357 match->irq_source_id = r;
358
359#ifdef CONFIG_X86
360 /* Determine host device irq type, we can know the
361 * result from dev->msi_enabled */
362 if (msi2intx)
363 pci_enable_msi(match->dev);
364#endif
365 }
366 }
367 531
368 if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && 532 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
369 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) 533 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
370 current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
371 534
372 changed_flags = assigned_irq->flags ^ current_flags; 535 r = -EINVAL;
536 /* can only assign one type at a time */
537 if (hweight_long(host_irq_type) > 1)
538 goto out;
539 if (hweight_long(guest_irq_type) > 1)
540 goto out;
541 if (host_irq_type == 0 && guest_irq_type == 0)
542 goto out;
373 543
374 if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || 544 r = 0;
375 (msi2intx && match->dev->msi_enabled)) { 545 if (host_irq_type)
376#ifdef CONFIG_X86 546 r = assign_host_irq(kvm, match, host_irq_type);
377 r = assigned_device_update_msi(kvm, match, assigned_irq); 547 if (r)
378 if (r) { 548 goto out;
379 printk(KERN_WARNING "kvm: failed to enable "
380 "MSI device!\n");
381 goto out_release;
382 }
383#else
384 r = -ENOTTY;
385#endif
386 } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
387 /* Host device IRQ 0 means don't support INTx */
388 if (!msi2intx) {
389 printk(KERN_WARNING
390 "kvm: wait device to enable MSI!\n");
391 r = 0;
392 } else {
393 printk(KERN_WARNING
394 "kvm: failed to enable MSI device!\n");
395 r = -ENOTTY;
396 goto out_release;
397 }
398 } else {
399 /* Non-sharing INTx mode */
400 r = assigned_device_update_intx(kvm, match, assigned_irq);
401 if (r) {
402 printk(KERN_WARNING "kvm: failed to enable "
403 "INTx device!\n");
404 goto out_release;
405 }
406 }
407 549
550 if (guest_irq_type)
551 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
552out:
408 mutex_unlock(&kvm->lock); 553 mutex_unlock(&kvm->lock);
409 return r; 554 return r;
410out_release: 555}
556
557static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
558 struct kvm_assigned_irq
559 *assigned_irq)
560{
561 int r = -ENODEV;
562 struct kvm_assigned_dev_kernel *match;
563
564 mutex_lock(&kvm->lock);
565
566 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
567 assigned_irq->assigned_dev_id);
568 if (!match)
569 goto out;
570
571 r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
572out:
411 mutex_unlock(&kvm->lock); 573 mutex_unlock(&kvm->lock);
412 kvm_free_assigned_device(kvm, match);
413 return r; 574 return r;
414} 575}
415 576
@@ -427,7 +588,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
427 assigned_dev->assigned_dev_id); 588 assigned_dev->assigned_dev_id);
428 if (match) { 589 if (match) {
429 /* device already assigned */ 590 /* device already assigned */
430 r = -EINVAL; 591 r = -EEXIST;
431 goto out; 592 goto out;
432 } 593 }
433 594
@@ -464,8 +625,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
464 match->host_devfn = assigned_dev->devfn; 625 match->host_devfn = assigned_dev->devfn;
465 match->flags = assigned_dev->flags; 626 match->flags = assigned_dev->flags;
466 match->dev = dev; 627 match->dev = dev;
628 spin_lock_init(&match->assigned_dev_lock);
467 match->irq_source_id = -1; 629 match->irq_source_id = -1;
468 match->kvm = kvm; 630 match->kvm = kvm;
631 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
632 INIT_WORK(&match->interrupt_work,
633 kvm_assigned_dev_interrupt_work_handler);
469 634
470 list_add(&match->list, &kvm->arch.assigned_dev_head); 635 list_add(&match->list, &kvm->arch.assigned_dev_head);
471 636
@@ -878,6 +1043,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
878#endif 1043#endif
879#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 1044#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
880 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 1045 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
1046#else
1047 kvm_arch_flush_shadow(kvm);
881#endif 1048#endif
882 kvm_arch_destroy_vm(kvm); 1049 kvm_arch_destroy_vm(kvm);
883 mmdrop(mm); 1050 mmdrop(mm);
@@ -919,9 +1086,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
919{ 1086{
920 int r; 1087 int r;
921 gfn_t base_gfn; 1088 gfn_t base_gfn;
922 unsigned long npages; 1089 unsigned long npages, ugfn;
923 int largepages; 1090 unsigned long largepages, i;
924 unsigned long i;
925 struct kvm_memory_slot *memslot; 1091 struct kvm_memory_slot *memslot;
926 struct kvm_memory_slot old, new; 1092 struct kvm_memory_slot old, new;
927 1093
@@ -1010,6 +1176,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
1010 new.lpage_info[0].write_count = 1; 1176 new.lpage_info[0].write_count = 1;
1011 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) 1177 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
1012 new.lpage_info[largepages-1].write_count = 1; 1178 new.lpage_info[largepages-1].write_count = 1;
1179 ugfn = new.userspace_addr >> PAGE_SHIFT;
1180 /*
1181 * If the gfn and userspace address are not aligned wrt each
1182 * other, disable large page support for this slot
1183 */
1184 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
1185 for (i = 0; i < largepages; ++i)
1186 new.lpage_info[i].write_count = 1;
1013 } 1187 }
1014 1188
1015 /* Allocate page dirty bitmap if needed */ 1189 /* Allocate page dirty bitmap if needed */
@@ -1043,8 +1217,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
1043 1217
1044 kvm_free_physmem_slot(&old, npages ? &new : NULL); 1218 kvm_free_physmem_slot(&old, npages ? &new : NULL);
1045 /* Slot deletion case: we have to update the current slot */ 1219 /* Slot deletion case: we have to update the current slot */
1220 spin_lock(&kvm->mmu_lock);
1046 if (!npages) 1221 if (!npages)
1047 *memslot = old; 1222 *memslot = old;
1223 spin_unlock(&kvm->mmu_lock);
1048#ifdef CONFIG_DMAR 1224#ifdef CONFIG_DMAR
1049 /* map the pages in iommu page table */ 1225 /* map the pages in iommu page table */
1050 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 1226 r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@@ -1454,12 +1630,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1454 for (;;) { 1630 for (;;) {
1455 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1631 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1456 1632
1457 if (kvm_cpu_has_interrupt(vcpu) || 1633 if ((kvm_arch_interrupt_allowed(vcpu) &&
1458 kvm_cpu_has_pending_timer(vcpu) || 1634 kvm_cpu_has_interrupt(vcpu)) ||
1459 kvm_arch_vcpu_runnable(vcpu)) { 1635 kvm_arch_vcpu_runnable(vcpu)) {
1460 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1636 set_bit(KVM_REQ_UNHALT, &vcpu->requests);
1461 break; 1637 break;
1462 } 1638 }
1639 if (kvm_cpu_has_pending_timer(vcpu))
1640 break;
1463 if (signal_pending(current)) 1641 if (signal_pending(current))
1464 break; 1642 break;
1465 1643
@@ -1593,6 +1771,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
1593 return 0; 1771 return 0;
1594} 1772}
1595 1773
1774#ifdef __KVM_HAVE_MSIX
1775static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
1776 struct kvm_assigned_msix_nr *entry_nr)
1777{
1778 int r = 0;
1779 struct kvm_assigned_dev_kernel *adev;
1780
1781 mutex_lock(&kvm->lock);
1782
1783 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
1784 entry_nr->assigned_dev_id);
1785 if (!adev) {
1786 r = -EINVAL;
1787 goto msix_nr_out;
1788 }
1789
1790 if (adev->entries_nr == 0) {
1791 adev->entries_nr = entry_nr->entry_nr;
1792 if (adev->entries_nr == 0 ||
1793 adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
1794 r = -EINVAL;
1795 goto msix_nr_out;
1796 }
1797
1798 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
1799 entry_nr->entry_nr,
1800 GFP_KERNEL);
1801 if (!adev->host_msix_entries) {
1802 r = -ENOMEM;
1803 goto msix_nr_out;
1804 }
1805 adev->guest_msix_entries = kzalloc(
1806 sizeof(struct kvm_guest_msix_entry) *
1807 entry_nr->entry_nr, GFP_KERNEL);
1808 if (!adev->guest_msix_entries) {
1809 kfree(adev->host_msix_entries);
1810 r = -ENOMEM;
1811 goto msix_nr_out;
1812 }
1813 } else /* Not allowed set MSI-X number twice */
1814 r = -EINVAL;
1815msix_nr_out:
1816 mutex_unlock(&kvm->lock);
1817 return r;
1818}
1819
1820static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
1821 struct kvm_assigned_msix_entry *entry)
1822{
1823 int r = 0, i;
1824 struct kvm_assigned_dev_kernel *adev;
1825
1826 mutex_lock(&kvm->lock);
1827
1828 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
1829 entry->assigned_dev_id);
1830
1831 if (!adev) {
1832 r = -EINVAL;
1833 goto msix_entry_out;
1834 }
1835
1836 for (i = 0; i < adev->entries_nr; i++)
1837 if (adev->guest_msix_entries[i].vector == 0 ||
1838 adev->guest_msix_entries[i].entry == entry->entry) {
1839 adev->guest_msix_entries[i].entry = entry->entry;
1840 adev->guest_msix_entries[i].vector = entry->gsi;
1841 adev->host_msix_entries[i].entry = entry->entry;
1842 break;
1843 }
1844 if (i == adev->entries_nr) {
1845 r = -ENOSPC;
1846 goto msix_entry_out;
1847 }
1848
1849msix_entry_out:
1850 mutex_unlock(&kvm->lock);
1851
1852 return r;
1853}
1854#endif
1855
1596static long kvm_vcpu_ioctl(struct file *filp, 1856static long kvm_vcpu_ioctl(struct file *filp,
1597 unsigned int ioctl, unsigned long arg) 1857 unsigned int ioctl, unsigned long arg)
1598{ 1858{
@@ -1864,6 +2124,11 @@ static long kvm_vm_ioctl(struct file *filp,
1864 break; 2124 break;
1865 } 2125 }
1866 case KVM_ASSIGN_IRQ: { 2126 case KVM_ASSIGN_IRQ: {
2127 r = -EOPNOTSUPP;
2128 break;
2129 }
2130#ifdef KVM_CAP_ASSIGN_DEV_IRQ
2131 case KVM_ASSIGN_DEV_IRQ: {
1867 struct kvm_assigned_irq assigned_irq; 2132 struct kvm_assigned_irq assigned_irq;
1868 2133
1869 r = -EFAULT; 2134 r = -EFAULT;
@@ -1874,6 +2139,18 @@ static long kvm_vm_ioctl(struct file *filp,
1874 goto out; 2139 goto out;
1875 break; 2140 break;
1876 } 2141 }
2142 case KVM_DEASSIGN_DEV_IRQ: {
2143 struct kvm_assigned_irq assigned_irq;
2144
2145 r = -EFAULT;
2146 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
2147 goto out;
2148 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
2149 if (r)
2150 goto out;
2151 break;
2152 }
2153#endif
1877#endif 2154#endif
1878#ifdef KVM_CAP_DEVICE_DEASSIGNMENT 2155#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1879 case KVM_DEASSIGN_PCI_DEVICE: { 2156 case KVM_DEASSIGN_PCI_DEVICE: {
@@ -1917,7 +2194,29 @@ static long kvm_vm_ioctl(struct file *filp,
1917 vfree(entries); 2194 vfree(entries);
1918 break; 2195 break;
1919 } 2196 }
2197#ifdef __KVM_HAVE_MSIX
2198 case KVM_ASSIGN_SET_MSIX_NR: {
2199 struct kvm_assigned_msix_nr entry_nr;
2200 r = -EFAULT;
2201 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
2202 goto out;
2203 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
2204 if (r)
2205 goto out;
2206 break;
2207 }
2208 case KVM_ASSIGN_SET_MSIX_ENTRY: {
2209 struct kvm_assigned_msix_entry entry;
2210 r = -EFAULT;
2211 if (copy_from_user(&entry, argp, sizeof entry))
2212 goto out;
2213 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
2214 if (r)
2215 goto out;
2216 break;
2217 }
1920#endif 2218#endif
2219#endif /* KVM_CAP_IRQ_ROUTING */
1921 default: 2220 default:
1922 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 2221 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1923 } 2222 }
@@ -2112,15 +2411,15 @@ EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
2112static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 2411static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2113 void *v) 2412 void *v)
2114{ 2413{
2115 if (val == SYS_RESTART) { 2414 /*
2116 /* 2415 * Some (well, at least mine) BIOSes hang on reboot if
2117 * Some (well, at least mine) BIOSes hang on reboot if 2416 * in vmx root mode.
2118 * in vmx root mode. 2417 *
2119 */ 2418 * And Intel TXT required VMX off for all cpu when system shutdown.
2120 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 2419 */
2121 kvm_rebooting = true; 2420 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2122 on_each_cpu(hardware_disable, NULL, 1); 2421 kvm_rebooting = true;
2123 } 2422 on_each_cpu(hardware_disable, NULL, 1);
2124 return NOTIFY_OK; 2423 return NOTIFY_OK;
2125} 2424}
2126 2425
@@ -2301,10 +2600,11 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2301 2600
2302 bad_pfn = page_to_pfn(bad_page); 2601 bad_pfn = page_to_pfn(bad_page);
2303 2602
2304 if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2603 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
2305 r = -ENOMEM; 2604 r = -ENOMEM;
2306 goto out_free_0; 2605 goto out_free_0;
2307 } 2606 }
2607 cpumask_clear(cpus_hardware_enabled);
2308 2608
2309 r = kvm_arch_hardware_setup(); 2609 r = kvm_arch_hardware_setup();
2310 if (r < 0) 2610 if (r < 0)
@@ -2353,9 +2653,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2353 2653
2354 kvm_preempt_ops.sched_in = kvm_sched_in; 2654 kvm_preempt_ops.sched_in = kvm_sched_in;
2355 kvm_preempt_ops.sched_out = kvm_sched_out; 2655 kvm_preempt_ops.sched_out = kvm_sched_out;
2356#ifndef CONFIG_X86
2357 msi2intx = 0;
2358#endif
2359 2656
2360 return 0; 2657 return 0;
2361 2658