aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSheng Yang <sheng@linux.intel.com>2009-03-12 09:45:39 -0400
committerAvi Kivity <avi@redhat.com>2009-06-10 04:48:29 -0400
commite56d532f20c890a06bbe7cd479f4201e3a03cd73 (patch)
tree6c8b1a019a77bf2081ac7482eea322f5b0a636e8
parent386eb6e8b3caface8a0514da70a47c05cabb5b96 (diff)
KVM: Device assignment framework rework
After discussion with Marcelo, we decided to rework device assignment framework together. The old problems are kernel logic is unnecessary complex. So Marcelo suggest to split it into a more elegant way: 1. Split host IRQ assign and guest IRQ assign. And userspace determine the combination. Also discard msi2intx parameter, userspace can specific KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_INTX in assigned_irq->flags to enable MSI to INTx convertion. 2. Split assign IRQ and deassign IRQ. Import two new ioctls: KVM_ASSIGN_DEV_IRQ and KVM_DEASSIGN_DEV_IRQ. This patch also fixed the reversed _IOR vs _IOW in definition(by deprecated the old interface). [avi: replace homemade bitcount() by hweight_long()] Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Sheng Yang <sheng@linux.intel.com> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--include/linux/kvm.h26
-rw-r--r--include/linux/kvm_host.h5
-rw-r--r--virt/kvm/kvm_main.c486
4 files changed, 276 insertions, 242 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43e049a2ccf4..41123fc8613e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1026 case KVM_CAP_SYNC_MMU: 1026 case KVM_CAP_SYNC_MMU:
1027 case KVM_CAP_REINJECT_CONTROL: 1027 case KVM_CAP_REINJECT_CONTROL:
1028 case KVM_CAP_IRQ_INJECT_STATUS: 1028 case KVM_CAP_IRQ_INJECT_STATUS:
1029 case KVM_CAP_ASSIGN_DEV_IRQ:
1029 r = 1; 1030 r = 1;
1030 break; 1031 break;
1031 case KVM_CAP_COALESCED_MMIO: 1032 case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 640835ed2708..644e3a9f47db 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -412,6 +412,7 @@ struct kvm_trace_rec {
412#ifdef __KVM_HAVE_MSIX 412#ifdef __KVM_HAVE_MSIX
413#define KVM_CAP_DEVICE_MSIX 28 413#define KVM_CAP_DEVICE_MSIX 28
414#endif 414#endif
415#define KVM_CAP_ASSIGN_DEV_IRQ 29
415/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ 416/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
416#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 417#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
417 418
@@ -485,8 +486,10 @@ struct kvm_irq_routing {
485#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ 486#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
486 struct kvm_assigned_pci_dev) 487 struct kvm_assigned_pci_dev)
487#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) 488#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
489/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
488#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ 490#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
489 struct kvm_assigned_irq) 491 struct kvm_assigned_irq)
492#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
490#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) 493#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
491#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ 494#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
492 struct kvm_assigned_pci_dev) 495 struct kvm_assigned_pci_dev)
@@ -494,6 +497,7 @@ struct kvm_irq_routing {
494 _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) 497 _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
495#define KVM_ASSIGN_SET_MSIX_ENTRY \ 498#define KVM_ASSIGN_SET_MSIX_ENTRY \
496 _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) 499 _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
500#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
497 501
498/* 502/*
499 * ioctls for vcpu fds 503 * ioctls for vcpu fds
@@ -584,6 +588,8 @@ struct kvm_debug_guest {
584#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) 588#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
585#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) 589#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
586 590
591#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
592
587struct kvm_assigned_pci_dev { 593struct kvm_assigned_pci_dev {
588 __u32 assigned_dev_id; 594 __u32 assigned_dev_id;
589 __u32 busnr; 595 __u32 busnr;
@@ -594,6 +600,17 @@ struct kvm_assigned_pci_dev {
594 }; 600 };
595}; 601};
596 602
603#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
604#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
605#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
606
607#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
608#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
609#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
610
611#define KVM_DEV_IRQ_HOST_MASK 0x00ff
612#define KVM_DEV_IRQ_GUEST_MASK 0xff00
613
597struct kvm_assigned_irq { 614struct kvm_assigned_irq {
598 __u32 assigned_dev_id; 615 __u32 assigned_dev_id;
599 __u32 host_irq; 616 __u32 host_irq;
@@ -609,15 +626,6 @@ struct kvm_assigned_irq {
609 }; 626 };
610}; 627};
611 628
612#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
613
614#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
615#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
616
617#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\
618 KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
619#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX (1 << 1)
620#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX (1 << 2)
621 629
622struct kvm_assigned_msix_nr { 630struct kvm_assigned_msix_nr {
623 __u32 assigned_dev_id; 631 __u32 assigned_dev_id;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fb60f31c4fb3..40e49ede8f91 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -339,11 +339,6 @@ struct kvm_assigned_dev_kernel {
339 struct msix_entry *host_msix_entries; 339 struct msix_entry *host_msix_entries;
340 int guest_irq; 340 int guest_irq;
341 struct kvm_guest_msix_entry *guest_msix_entries; 341 struct kvm_guest_msix_entry *guest_msix_entries;
342#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0)
343#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
344#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
345#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
346#define KVM_ASSIGNED_DEV_MSIX ((1 << 2) | (1 << 10))
347 unsigned long irq_requested_type; 342 unsigned long irq_requested_type;
348 int irq_source_id; 343 int irq_source_id;
349 int flags; 344 int flags;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3bed82754a5d..792fb7fae0a3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/mman.h> 42#include <linux/mman.h>
43#include <linux/swap.h> 43#include <linux/swap.h>
44#include <linux/bitops.h>
44 45
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/io.h> 47#include <asm/io.h>
@@ -60,9 +61,6 @@
60MODULE_AUTHOR("Qumranet"); 61MODULE_AUTHOR("Qumranet");
61MODULE_LICENSE("GPL"); 62MODULE_LICENSE("GPL");
62 63
63static int msi2intx = 1;
64module_param(msi2intx, bool, 0);
65
66DEFINE_SPINLOCK(kvm_lock); 64DEFINE_SPINLOCK(kvm_lock);
67LIST_HEAD(vm_list); 65LIST_HEAD(vm_list);
68 66
@@ -132,7 +130,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
132 * finer-grained lock, update this 130 * finer-grained lock, update this
133 */ 131 */
134 mutex_lock(&kvm->lock); 132 mutex_lock(&kvm->lock);
135 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { 133 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
136 struct kvm_guest_msix_entry *guest_entries = 134 struct kvm_guest_msix_entry *guest_entries =
137 assigned_dev->guest_msix_entries; 135 assigned_dev->guest_msix_entries;
138 for (i = 0; i < assigned_dev->entries_nr; i++) { 136 for (i = 0; i < assigned_dev->entries_nr; i++) {
@@ -152,7 +150,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
152 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 150 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
153 assigned_dev->guest_irq, 1); 151 assigned_dev->guest_irq, 1);
154 if (assigned_dev->irq_requested_type & 152 if (assigned_dev->irq_requested_type &
155 KVM_ASSIGNED_DEV_GUEST_MSI) { 153 KVM_DEV_IRQ_GUEST_MSI) {
156 enable_irq(assigned_dev->host_irq); 154 enable_irq(assigned_dev->host_irq);
157 assigned_dev->host_irq_disabled = false; 155 assigned_dev->host_irq_disabled = false;
158 } 156 }
@@ -166,7 +164,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
166 struct kvm_assigned_dev_kernel *assigned_dev = 164 struct kvm_assigned_dev_kernel *assigned_dev =
167 (struct kvm_assigned_dev_kernel *) dev_id; 165 (struct kvm_assigned_dev_kernel *) dev_id;
168 166
169 if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) { 167 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
170 int index = find_index_from_host_irq(assigned_dev, irq); 168 int index = find_index_from_host_irq(assigned_dev, irq);
171 if (index < 0) 169 if (index < 0)
172 return IRQ_HANDLED; 170 return IRQ_HANDLED;
@@ -204,22 +202,22 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
204 } 202 }
205} 203}
206 204
207/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ 205static void deassign_guest_irq(struct kvm *kvm,
208static void kvm_free_assigned_irq(struct kvm *kvm, 206 struct kvm_assigned_dev_kernel *assigned_dev)
209 struct kvm_assigned_dev_kernel *assigned_dev)
210{ 207{
211 if (!irqchip_in_kernel(kvm))
212 return;
213
214 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); 208 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
209 assigned_dev->ack_notifier.gsi = -1;
215 210
216 if (assigned_dev->irq_source_id != -1) 211 if (assigned_dev->irq_source_id != -1)
217 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 212 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
218 assigned_dev->irq_source_id = -1; 213 assigned_dev->irq_source_id = -1;
214 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
215}
219 216
220 if (!assigned_dev->irq_requested_type) 217/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
221 return; 218static void deassign_host_irq(struct kvm *kvm,
222 219 struct kvm_assigned_dev_kernel *assigned_dev)
220{
223 /* 221 /*
224 * In kvm_free_device_irq, cancel_work_sync return true if: 222 * In kvm_free_device_irq, cancel_work_sync return true if:
225 * 1. work is scheduled, and then cancelled. 223 * 1. work is scheduled, and then cancelled.
@@ -236,7 +234,7 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
236 * now, the kvm state is still legal for probably we also have to wait 234 * now, the kvm state is still legal for probably we also have to wait
237 * interrupt_work done. 235 * interrupt_work done.
238 */ 236 */
239 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { 237 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
240 int i; 238 int i;
241 for (i = 0; i < assigned_dev->entries_nr; i++) 239 for (i = 0; i < assigned_dev->entries_nr; i++)
242 disable_irq_nosync(assigned_dev-> 240 disable_irq_nosync(assigned_dev->
@@ -259,14 +257,41 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
259 257
260 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 258 free_irq(assigned_dev->host_irq, (void *)assigned_dev);
261 259
262 if (assigned_dev->irq_requested_type & 260 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
263 KVM_ASSIGNED_DEV_HOST_MSI)
264 pci_disable_msi(assigned_dev->dev); 261 pci_disable_msi(assigned_dev->dev);
265 } 262 }
266 263
267 assigned_dev->irq_requested_type = 0; 264 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
265}
266
267static int kvm_deassign_irq(struct kvm *kvm,
268 struct kvm_assigned_dev_kernel *assigned_dev,
269 unsigned long irq_requested_type)
270{
271 unsigned long guest_irq_type, host_irq_type;
272
273 if (!irqchip_in_kernel(kvm))
274 return -EINVAL;
275 /* no irq assignment to deassign */
276 if (!assigned_dev->irq_requested_type)
277 return -ENXIO;
278
279 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
280 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
281
282 if (host_irq_type)
283 deassign_host_irq(kvm, assigned_dev);
284 if (guest_irq_type)
285 deassign_guest_irq(kvm, assigned_dev);
286
287 return 0;
268} 288}
269 289
290static void kvm_free_assigned_irq(struct kvm *kvm,
291 struct kvm_assigned_dev_kernel *assigned_dev)
292{
293 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
294}
270 295
271static void kvm_free_assigned_device(struct kvm *kvm, 296static void kvm_free_assigned_device(struct kvm *kvm,
272 struct kvm_assigned_dev_kernel 297 struct kvm_assigned_dev_kernel
@@ -298,256 +323,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
298 } 323 }
299} 324}
300 325
301static int assigned_device_update_intx(struct kvm *kvm, 326static int assigned_device_enable_host_intx(struct kvm *kvm,
302 struct kvm_assigned_dev_kernel *adev, 327 struct kvm_assigned_dev_kernel *dev)
303 struct kvm_assigned_irq *airq)
304{ 328{
305 adev->guest_irq = airq->guest_irq; 329 dev->host_irq = dev->dev->irq;
306 adev->ack_notifier.gsi = airq->guest_irq; 330 /* Even though this is PCI, we don't want to use shared
307 331 * interrupts. Sharing host devices with guest-assigned devices
308 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) 332 * on the same interrupt line is not a happy situation: there
309 return 0; 333 * are going to be long delays in accepting, acking, etc.
310 334 */
311 if (irqchip_in_kernel(kvm)) { 335 if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
312 if (!msi2intx && 336 0, "kvm_assigned_intx_device", (void *)dev))
313 (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { 337 return -EIO;
314 free_irq(adev->host_irq, (void *)adev); 338 return 0;
315 pci_disable_msi(adev->dev); 339}
316 }
317 340
318 if (!capable(CAP_SYS_RAWIO)) 341#ifdef __KVM_HAVE_MSI
319 return -EPERM; 342static int assigned_device_enable_host_msi(struct kvm *kvm,
343 struct kvm_assigned_dev_kernel *dev)
344{
345 int r;
320 346
321 if (airq->host_irq) 347 if (!dev->dev->msi_enabled) {
322 adev->host_irq = airq->host_irq; 348 r = pci_enable_msi(dev->dev);
323 else 349 if (r)
324 adev->host_irq = adev->dev->irq; 350 return r;
351 }
325 352
326 /* Even though this is PCI, we don't want to use shared 353 dev->host_irq = dev->dev->irq;
327 * interrupts. Sharing host devices with guest-assigned devices 354 if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
328 * on the same interrupt line is not a happy situation: there 355 "kvm_assigned_msi_device", (void *)dev)) {
329 * are going to be long delays in accepting, acking, etc. 356 pci_disable_msi(dev->dev);
330 */ 357 return -EIO;
331 if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
332 0, "kvm_assigned_intx_device", (void *)adev))
333 return -EIO;
334 } 358 }
335 359
336 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
337 KVM_ASSIGNED_DEV_HOST_INTX;
338 return 0; 360 return 0;
339} 361}
362#endif
340 363
341#ifdef CONFIG_X86 364#ifdef __KVM_HAVE_MSIX
342static int assigned_device_update_msi(struct kvm *kvm, 365static int assigned_device_enable_host_msix(struct kvm *kvm,
343 struct kvm_assigned_dev_kernel *adev, 366 struct kvm_assigned_dev_kernel *dev)
344 struct kvm_assigned_irq *airq)
345{ 367{
346 int r; 368 int i, r = -EINVAL;
347 369
348 adev->guest_irq = airq->guest_irq; 370 /* host_msix_entries and guest_msix_entries should have been
349 if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { 371 * initialized */
350 /* x86 don't care upper address of guest msi message addr */ 372 if (dev->entries_nr == 0)
351 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; 373 return r;
352 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
353 adev->ack_notifier.gsi = -1;
354 } else if (msi2intx) {
355 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
356 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
357 adev->ack_notifier.gsi = airq->guest_irq;
358 } else {
359 /*
360 * Guest require to disable device MSI, we disable MSI and
361 * re-enable INTx by default again. Notice it's only for
362 * non-msi2intx.
363 */
364 assigned_device_update_intx(kvm, adev, airq);
365 return 0;
366 }
367 374
368 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 375 r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
369 return 0; 376 if (r)
377 return r;
370 378
371 if (irqchip_in_kernel(kvm)) { 379 for (i = 0; i < dev->entries_nr; i++) {
372 if (!msi2intx) { 380 r = request_irq(dev->host_msix_entries[i].vector,
373 if (adev->irq_requested_type & 381 kvm_assigned_dev_intr, 0,
374 KVM_ASSIGNED_DEV_HOST_INTX) 382 "kvm_assigned_msix_device",
375 free_irq(adev->host_irq, (void *)adev); 383 (void *)dev);
384 /* FIXME: free requested_irq's on failure */
385 if (r)
386 return r;
387 }
376 388
377 r = pci_enable_msi(adev->dev); 389 return 0;
378 if (r) 390}
379 return r;
380 }
381 391
382 adev->host_irq = adev->dev->irq; 392#endif
383 if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
384 "kvm_assigned_msi_device", (void *)adev))
385 return -EIO;
386 }
387 393
388 if (!msi2intx) 394static int assigned_device_enable_guest_intx(struct kvm *kvm,
389 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; 395 struct kvm_assigned_dev_kernel *dev,
396 struct kvm_assigned_irq *irq)
397{
398 dev->guest_irq = irq->guest_irq;
399 dev->ack_notifier.gsi = irq->guest_irq;
400 return 0;
401}
390 402
391 adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; 403#ifdef __KVM_HAVE_MSI
404static int assigned_device_enable_guest_msi(struct kvm *kvm,
405 struct kvm_assigned_dev_kernel *dev,
406 struct kvm_assigned_irq *irq)
407{
408 dev->guest_irq = irq->guest_irq;
409 dev->ack_notifier.gsi = -1;
392 return 0; 410 return 0;
393} 411}
394#endif 412#endif
413#ifdef __KVM_HAVE_MSIX
414static int assigned_device_enable_guest_msix(struct kvm *kvm,
415 struct kvm_assigned_dev_kernel *dev,
416 struct kvm_assigned_irq *irq)
417{
418 dev->guest_irq = irq->guest_irq;
419 dev->ack_notifier.gsi = -1;
420 return 0;
421}
422#endif
423
424static int assign_host_irq(struct kvm *kvm,
425 struct kvm_assigned_dev_kernel *dev,
426 __u32 host_irq_type)
427{
428 int r = -EEXIST;
429
430 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
431 return r;
395 432
433 switch (host_irq_type) {
434 case KVM_DEV_IRQ_HOST_INTX:
435 r = assigned_device_enable_host_intx(kvm, dev);
436 break;
437#ifdef __KVM_HAVE_MSI
438 case KVM_DEV_IRQ_HOST_MSI:
439 r = assigned_device_enable_host_msi(kvm, dev);
440 break;
441#endif
396#ifdef __KVM_HAVE_MSIX 442#ifdef __KVM_HAVE_MSIX
397static int assigned_device_update_msix(struct kvm *kvm, 443 case KVM_DEV_IRQ_HOST_MSIX:
398 struct kvm_assigned_dev_kernel *adev, 444 r = assigned_device_enable_host_msix(kvm, dev);
399 struct kvm_assigned_irq *airq) 445 break;
400{ 446#endif
401 /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ 447 default:
402 int i, r; 448 r = -EINVAL;
403 449 }
404 adev->ack_notifier.gsi = -1;
405
406 if (irqchip_in_kernel(kvm)) {
407 if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
408 return -ENOTTY;
409
410 if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
411 /* Guest disable MSI-X */
412 kvm_free_assigned_irq(kvm, adev);
413 if (msi2intx) {
414 pci_enable_msi(adev->dev);
415 if (adev->dev->msi_enabled)
416 return assigned_device_update_msi(kvm,
417 adev, airq);
418 }
419 return assigned_device_update_intx(kvm, adev, airq);
420 }
421 450
422 /* host_msix_entries and guest_msix_entries should have been 451 if (!r)
423 * initialized */ 452 dev->irq_requested_type |= host_irq_type;
424 if (adev->entries_nr == 0)
425 return -EINVAL;
426 453
427 kvm_free_assigned_irq(kvm, adev); 454 return r;
455}
428 456
429 r = pci_enable_msix(adev->dev, adev->host_msix_entries, 457static int assign_guest_irq(struct kvm *kvm,
430 adev->entries_nr); 458 struct kvm_assigned_dev_kernel *dev,
431 if (r) 459 struct kvm_assigned_irq *irq,
432 return r; 460 unsigned long guest_irq_type)
461{
462 int id;
463 int r = -EEXIST;
433 464
434 for (i = 0; i < adev->entries_nr; i++) { 465 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
435 r = request_irq((adev->host_msix_entries + i)->vector, 466 return r;
436 kvm_assigned_dev_intr, 0, 467
437 "kvm_assigned_msix_device", 468 id = kvm_request_irq_source_id(kvm);
438 (void *)adev); 469 if (id < 0)
439 if (r) 470 return id;
440 return r; 471
441 } 472 dev->irq_source_id = id;
473
474 switch (guest_irq_type) {
475 case KVM_DEV_IRQ_GUEST_INTX:
476 r = assigned_device_enable_guest_intx(kvm, dev, irq);
477 break;
478#ifdef __KVM_HAVE_MSI
479 case KVM_DEV_IRQ_GUEST_MSI:
480 r = assigned_device_enable_guest_msi(kvm, dev, irq);
481 break;
482#endif
483#ifdef __KVM_HAVE_MSIX
484 case KVM_DEV_IRQ_GUEST_MSIX:
485 r = assigned_device_enable_guest_msix(kvm, dev, irq);
486 break;
487#endif
488 default:
489 r = -EINVAL;
442 } 490 }
443 491
444 adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX; 492 if (!r) {
493 dev->irq_requested_type |= guest_irq_type;
494 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
495 } else
496 kvm_free_irq_source_id(kvm, dev->irq_source_id);
445 497
446 return 0; 498 return r;
447} 499}
448#endif
449 500
501/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
450static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 502static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
451 struct kvm_assigned_irq 503 struct kvm_assigned_irq *assigned_irq)
452 *assigned_irq)
453{ 504{
454 int r = 0; 505 int r = -EINVAL;
455 struct kvm_assigned_dev_kernel *match; 506 struct kvm_assigned_dev_kernel *match;
456 u32 current_flags = 0, changed_flags; 507 unsigned long host_irq_type, guest_irq_type;
457 508
458 mutex_lock(&kvm->lock); 509 if (!capable(CAP_SYS_RAWIO))
510 return -EPERM;
459 511
512 if (!irqchip_in_kernel(kvm))
513 return r;
514
515 mutex_lock(&kvm->lock);
516 r = -ENODEV;
460 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 517 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
461 assigned_irq->assigned_dev_id); 518 assigned_irq->assigned_dev_id);
462 if (!match) { 519 if (!match)
463 mutex_unlock(&kvm->lock); 520 goto out;
464 return -EINVAL;
465 }
466
467 if (!match->irq_requested_type) {
468 INIT_WORK(&match->interrupt_work,
469 kvm_assigned_dev_interrupt_work_handler);
470 if (irqchip_in_kernel(kvm)) {
471 /* Register ack nofitier */
472 match->ack_notifier.gsi = -1;
473 match->ack_notifier.irq_acked =
474 kvm_assigned_dev_ack_irq;
475 kvm_register_irq_ack_notifier(kvm,
476 &match->ack_notifier);
477
478 /* Request IRQ source ID */
479 r = kvm_request_irq_source_id(kvm);
480 if (r < 0)
481 goto out_release;
482 else
483 match->irq_source_id = r;
484
485#ifdef CONFIG_X86
486 /* Determine host device irq type, we can know the
487 * result from dev->msi_enabled */
488 if (msi2intx)
489 pci_enable_msi(match->dev);
490#endif
491 }
492 }
493 521
494 if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) 522 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
495 current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX; 523 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
496 else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
497 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
498 current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
499 524
500 changed_flags = assigned_irq->flags ^ current_flags; 525 r = -EINVAL;
526 /* can only assign one type at a time */
527 if (hweight_long(host_irq_type) > 1)
528 goto out;
529 if (hweight_long(guest_irq_type) > 1)
530 goto out;
531 if (host_irq_type == 0 && guest_irq_type == 0)
532 goto out;
501 533
502#ifdef __KVM_HAVE_MSIX 534 r = 0;
503 if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) { 535 if (host_irq_type)
504 r = assigned_device_update_msix(kvm, match, assigned_irq); 536 r = assign_host_irq(kvm, match, host_irq_type);
505 if (r) { 537 if (r)
506 printk(KERN_WARNING "kvm: failed to execute " 538 goto out;
507 "MSI-X action!\n");
508 goto out_release;
509 }
510 } else
511#endif
512 if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
513 (msi2intx && match->dev->msi_enabled)) {
514#ifdef CONFIG_X86
515 r = assigned_device_update_msi(kvm, match, assigned_irq);
516 if (r) {
517 printk(KERN_WARNING "kvm: failed to enable "
518 "MSI device!\n");
519 goto out_release;
520 }
521#else
522 r = -ENOTTY;
523#endif
524 } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
525 /* Host device IRQ 0 means don't support INTx */
526 if (!msi2intx) {
527 printk(KERN_WARNING
528 "kvm: wait device to enable MSI!\n");
529 r = 0;
530 } else {
531 printk(KERN_WARNING
532 "kvm: failed to enable MSI device!\n");
533 r = -ENOTTY;
534 goto out_release;
535 }
536 } else {
537 /* Non-sharing INTx mode */
538 r = assigned_device_update_intx(kvm, match, assigned_irq);
539 if (r) {
540 printk(KERN_WARNING "kvm: failed to enable "
541 "INTx device!\n");
542 goto out_release;
543 }
544 }
545 539
540 if (guest_irq_type)
541 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
542out:
546 mutex_unlock(&kvm->lock); 543 mutex_unlock(&kvm->lock);
547 return r; 544 return r;
548out_release: 545}
546
547static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
548 struct kvm_assigned_irq
549 *assigned_irq)
550{
551 int r = -ENODEV;
552 struct kvm_assigned_dev_kernel *match;
553
554 mutex_lock(&kvm->lock);
555
556 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
557 assigned_irq->assigned_dev_id);
558 if (!match)
559 goto out;
560
561 r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
562out:
549 mutex_unlock(&kvm->lock); 563 mutex_unlock(&kvm->lock);
550 kvm_free_assigned_device(kvm, match);
551 return r; 564 return r;
552} 565}
553 566
@@ -565,7 +578,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
565 assigned_dev->assigned_dev_id); 578 assigned_dev->assigned_dev_id);
566 if (match) { 579 if (match) {
567 /* device already assigned */ 580 /* device already assigned */
568 r = -EINVAL; 581 r = -EEXIST;
569 goto out; 582 goto out;
570 } 583 }
571 584
@@ -604,6 +617,9 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
604 match->dev = dev; 617 match->dev = dev;
605 match->irq_source_id = -1; 618 match->irq_source_id = -1;
606 match->kvm = kvm; 619 match->kvm = kvm;
620 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
621 INIT_WORK(&match->interrupt_work,
622 kvm_assigned_dev_interrupt_work_handler);
607 623
608 list_add(&match->list, &kvm->arch.assigned_dev_head); 624 list_add(&match->list, &kvm->arch.assigned_dev_head);
609 625
@@ -2084,6 +2100,11 @@ static long kvm_vm_ioctl(struct file *filp,
2084 break; 2100 break;
2085 } 2101 }
2086 case KVM_ASSIGN_IRQ: { 2102 case KVM_ASSIGN_IRQ: {
2103 r = -EOPNOTSUPP;
2104 break;
2105 }
2106#ifdef KVM_CAP_ASSIGN_DEV_IRQ
2107 case KVM_ASSIGN_DEV_IRQ: {
2087 struct kvm_assigned_irq assigned_irq; 2108 struct kvm_assigned_irq assigned_irq;
2088 2109
2089 r = -EFAULT; 2110 r = -EFAULT;
@@ -2094,6 +2115,18 @@ static long kvm_vm_ioctl(struct file *filp,
2094 goto out; 2115 goto out;
2095 break; 2116 break;
2096 } 2117 }
2118 case KVM_DEASSIGN_DEV_IRQ: {
2119 struct kvm_assigned_irq assigned_irq;
2120
2121 r = -EFAULT;
2122 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
2123 goto out;
2124 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
2125 if (r)
2126 goto out;
2127 break;
2128 }
2129#endif
2097#endif 2130#endif
2098#ifdef KVM_CAP_DEVICE_DEASSIGNMENT 2131#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
2099 case KVM_DEASSIGN_PCI_DEVICE: { 2132 case KVM_DEASSIGN_PCI_DEVICE: {
@@ -2596,9 +2629,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2596 2629
2597 kvm_preempt_ops.sched_in = kvm_sched_in; 2630 kvm_preempt_ops.sched_in = kvm_sched_in;
2598 kvm_preempt_ops.sched_out = kvm_sched_out; 2631 kvm_preempt_ops.sched_out = kvm_sched_out;
2599#ifndef CONFIG_X86
2600 msi2intx = 0;
2601#endif
2602 2632
2603 return 0; 2633 return 0;
2604 2634