diff options
author | Sheng Yang <sheng@linux.intel.com> | 2009-03-12 09:45:39 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2009-06-10 04:48:29 -0400 |
commit | e56d532f20c890a06bbe7cd479f4201e3a03cd73 (patch) | |
tree | 6c8b1a019a77bf2081ac7482eea322f5b0a636e8 | |
parent | 386eb6e8b3caface8a0514da70a47c05cabb5b96 (diff) |
KVM: Device assignment framework rework
After discussion with Marcelo, we decided to rework device assignment framework
together. The old problems are kernel logic is unnecessary complex. So Marcelo
suggest to split it into a more elegant way:
1. Split host IRQ assign and guest IRQ assign. And userspace determine the
combination. Also discard msi2intx parameter, userspace can specific
KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_INTX in assigned_irq->flags to
enable MSI to INTx convertion.
2. Split assign IRQ and deassign IRQ. Import two new ioctls:
KVM_ASSIGN_DEV_IRQ and KVM_DEASSIGN_DEV_IRQ.
This patch also fixed the reversed _IOR vs _IOW in definition(by deprecated the
old interface).
[avi: replace homemade bitcount() by hweight_long()]
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r-- | arch/x86/kvm/x86.c | 1 | ||||
-rw-r--r-- | include/linux/kvm.h | 26 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 5 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 486 |
4 files changed, 276 insertions, 242 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 43e049a2ccf4..41123fc8613e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1026 | case KVM_CAP_SYNC_MMU: | 1026 | case KVM_CAP_SYNC_MMU: |
1027 | case KVM_CAP_REINJECT_CONTROL: | 1027 | case KVM_CAP_REINJECT_CONTROL: |
1028 | case KVM_CAP_IRQ_INJECT_STATUS: | 1028 | case KVM_CAP_IRQ_INJECT_STATUS: |
1029 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
1029 | r = 1; | 1030 | r = 1; |
1030 | break; | 1031 | break; |
1031 | case KVM_CAP_COALESCED_MMIO: | 1032 | case KVM_CAP_COALESCED_MMIO: |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 640835ed2708..644e3a9f47db 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -412,6 +412,7 @@ struct kvm_trace_rec { | |||
412 | #ifdef __KVM_HAVE_MSIX | 412 | #ifdef __KVM_HAVE_MSIX |
413 | #define KVM_CAP_DEVICE_MSIX 28 | 413 | #define KVM_CAP_DEVICE_MSIX 28 |
414 | #endif | 414 | #endif |
415 | #define KVM_CAP_ASSIGN_DEV_IRQ 29 | ||
415 | /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ | 416 | /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ |
416 | #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 | 417 | #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 |
417 | 418 | ||
@@ -485,8 +486,10 @@ struct kvm_irq_routing { | |||
485 | #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ | 486 | #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ |
486 | struct kvm_assigned_pci_dev) | 487 | struct kvm_assigned_pci_dev) |
487 | #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) | 488 | #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) |
489 | /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ | ||
488 | #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ | 490 | #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ |
489 | struct kvm_assigned_irq) | 491 | struct kvm_assigned_irq) |
492 | #define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) | ||
490 | #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) | 493 | #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) |
491 | #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ | 494 | #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ |
492 | struct kvm_assigned_pci_dev) | 495 | struct kvm_assigned_pci_dev) |
@@ -494,6 +497,7 @@ struct kvm_irq_routing { | |||
494 | _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) | 497 | _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) |
495 | #define KVM_ASSIGN_SET_MSIX_ENTRY \ | 498 | #define KVM_ASSIGN_SET_MSIX_ENTRY \ |
496 | _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) | 499 | _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) |
500 | #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) | ||
497 | 501 | ||
498 | /* | 502 | /* |
499 | * ioctls for vcpu fds | 503 | * ioctls for vcpu fds |
@@ -584,6 +588,8 @@ struct kvm_debug_guest { | |||
584 | #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) | 588 | #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) |
585 | #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) | 589 | #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) |
586 | 590 | ||
591 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | ||
592 | |||
587 | struct kvm_assigned_pci_dev { | 593 | struct kvm_assigned_pci_dev { |
588 | __u32 assigned_dev_id; | 594 | __u32 assigned_dev_id; |
589 | __u32 busnr; | 595 | __u32 busnr; |
@@ -594,6 +600,17 @@ struct kvm_assigned_pci_dev { | |||
594 | }; | 600 | }; |
595 | }; | 601 | }; |
596 | 602 | ||
603 | #define KVM_DEV_IRQ_HOST_INTX (1 << 0) | ||
604 | #define KVM_DEV_IRQ_HOST_MSI (1 << 1) | ||
605 | #define KVM_DEV_IRQ_HOST_MSIX (1 << 2) | ||
606 | |||
607 | #define KVM_DEV_IRQ_GUEST_INTX (1 << 8) | ||
608 | #define KVM_DEV_IRQ_GUEST_MSI (1 << 9) | ||
609 | #define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) | ||
610 | |||
611 | #define KVM_DEV_IRQ_HOST_MASK 0x00ff | ||
612 | #define KVM_DEV_IRQ_GUEST_MASK 0xff00 | ||
613 | |||
597 | struct kvm_assigned_irq { | 614 | struct kvm_assigned_irq { |
598 | __u32 assigned_dev_id; | 615 | __u32 assigned_dev_id; |
599 | __u32 host_irq; | 616 | __u32 host_irq; |
@@ -609,15 +626,6 @@ struct kvm_assigned_irq { | |||
609 | }; | 626 | }; |
610 | }; | 627 | }; |
611 | 628 | ||
612 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | ||
613 | |||
614 | #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI | ||
615 | #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) | ||
616 | |||
617 | #define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\ | ||
618 | KVM_DEV_IRQ_ASSIGN_MASK_MSIX) | ||
619 | #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX (1 << 1) | ||
620 | #define KVM_DEV_IRQ_ASSIGN_MASK_MSIX (1 << 2) | ||
621 | 629 | ||
622 | struct kvm_assigned_msix_nr { | 630 | struct kvm_assigned_msix_nr { |
623 | __u32 assigned_dev_id; | 631 | __u32 assigned_dev_id; |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb60f31c4fb3..40e49ede8f91 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -339,11 +339,6 @@ struct kvm_assigned_dev_kernel { | |||
339 | struct msix_entry *host_msix_entries; | 339 | struct msix_entry *host_msix_entries; |
340 | int guest_irq; | 340 | int guest_irq; |
341 | struct kvm_guest_msix_entry *guest_msix_entries; | 341 | struct kvm_guest_msix_entry *guest_msix_entries; |
342 | #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) | ||
343 | #define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) | ||
344 | #define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) | ||
345 | #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) | ||
346 | #define KVM_ASSIGNED_DEV_MSIX ((1 << 2) | (1 << 10)) | ||
347 | unsigned long irq_requested_type; | 342 | unsigned long irq_requested_type; |
348 | int irq_source_id; | 343 | int irq_source_id; |
349 | int flags; | 344 | int flags; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3bed82754a5d..792fb7fae0a3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
42 | #include <linux/mman.h> | 42 | #include <linux/mman.h> |
43 | #include <linux/swap.h> | 43 | #include <linux/swap.h> |
44 | #include <linux/bitops.h> | ||
44 | 45 | ||
45 | #include <asm/processor.h> | 46 | #include <asm/processor.h> |
46 | #include <asm/io.h> | 47 | #include <asm/io.h> |
@@ -60,9 +61,6 @@ | |||
60 | MODULE_AUTHOR("Qumranet"); | 61 | MODULE_AUTHOR("Qumranet"); |
61 | MODULE_LICENSE("GPL"); | 62 | MODULE_LICENSE("GPL"); |
62 | 63 | ||
63 | static int msi2intx = 1; | ||
64 | module_param(msi2intx, bool, 0); | ||
65 | |||
66 | DEFINE_SPINLOCK(kvm_lock); | 64 | DEFINE_SPINLOCK(kvm_lock); |
67 | LIST_HEAD(vm_list); | 65 | LIST_HEAD(vm_list); |
68 | 66 | ||
@@ -132,7 +130,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | |||
132 | * finer-grained lock, update this | 130 | * finer-grained lock, update this |
133 | */ | 131 | */ |
134 | mutex_lock(&kvm->lock); | 132 | mutex_lock(&kvm->lock); |
135 | if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { | 133 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
136 | struct kvm_guest_msix_entry *guest_entries = | 134 | struct kvm_guest_msix_entry *guest_entries = |
137 | assigned_dev->guest_msix_entries; | 135 | assigned_dev->guest_msix_entries; |
138 | for (i = 0; i < assigned_dev->entries_nr; i++) { | 136 | for (i = 0; i < assigned_dev->entries_nr; i++) { |
@@ -152,7 +150,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | |||
152 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 150 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
153 | assigned_dev->guest_irq, 1); | 151 | assigned_dev->guest_irq, 1); |
154 | if (assigned_dev->irq_requested_type & | 152 | if (assigned_dev->irq_requested_type & |
155 | KVM_ASSIGNED_DEV_GUEST_MSI) { | 153 | KVM_DEV_IRQ_GUEST_MSI) { |
156 | enable_irq(assigned_dev->host_irq); | 154 | enable_irq(assigned_dev->host_irq); |
157 | assigned_dev->host_irq_disabled = false; | 155 | assigned_dev->host_irq_disabled = false; |
158 | } | 156 | } |
@@ -166,7 +164,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | |||
166 | struct kvm_assigned_dev_kernel *assigned_dev = | 164 | struct kvm_assigned_dev_kernel *assigned_dev = |
167 | (struct kvm_assigned_dev_kernel *) dev_id; | 165 | (struct kvm_assigned_dev_kernel *) dev_id; |
168 | 166 | ||
169 | if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) { | 167 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
170 | int index = find_index_from_host_irq(assigned_dev, irq); | 168 | int index = find_index_from_host_irq(assigned_dev, irq); |
171 | if (index < 0) | 169 | if (index < 0) |
172 | return IRQ_HANDLED; | 170 | return IRQ_HANDLED; |
@@ -204,22 +202,22 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
204 | } | 202 | } |
205 | } | 203 | } |
206 | 204 | ||
207 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | 205 | static void deassign_guest_irq(struct kvm *kvm, |
208 | static void kvm_free_assigned_irq(struct kvm *kvm, | 206 | struct kvm_assigned_dev_kernel *assigned_dev) |
209 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
210 | { | 207 | { |
211 | if (!irqchip_in_kernel(kvm)) | ||
212 | return; | ||
213 | |||
214 | kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); | 208 | kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); |
209 | assigned_dev->ack_notifier.gsi = -1; | ||
215 | 210 | ||
216 | if (assigned_dev->irq_source_id != -1) | 211 | if (assigned_dev->irq_source_id != -1) |
217 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | 212 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); |
218 | assigned_dev->irq_source_id = -1; | 213 | assigned_dev->irq_source_id = -1; |
214 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
215 | } | ||
219 | 216 | ||
220 | if (!assigned_dev->irq_requested_type) | 217 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ |
221 | return; | 218 | static void deassign_host_irq(struct kvm *kvm, |
222 | 219 | struct kvm_assigned_dev_kernel *assigned_dev) | |
220 | { | ||
223 | /* | 221 | /* |
224 | * In kvm_free_device_irq, cancel_work_sync return true if: | 222 | * In kvm_free_device_irq, cancel_work_sync return true if: |
225 | * 1. work is scheduled, and then cancelled. | 223 | * 1. work is scheduled, and then cancelled. |
@@ -236,7 +234,7 @@ static void kvm_free_assigned_irq(struct kvm *kvm, | |||
236 | * now, the kvm state is still legal for probably we also have to wait | 234 | * now, the kvm state is still legal for probably we also have to wait |
237 | * interrupt_work done. | 235 | * interrupt_work done. |
238 | */ | 236 | */ |
239 | if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { | 237 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
240 | int i; | 238 | int i; |
241 | for (i = 0; i < assigned_dev->entries_nr; i++) | 239 | for (i = 0; i < assigned_dev->entries_nr; i++) |
242 | disable_irq_nosync(assigned_dev-> | 240 | disable_irq_nosync(assigned_dev-> |
@@ -259,14 +257,41 @@ static void kvm_free_assigned_irq(struct kvm *kvm, | |||
259 | 257 | ||
260 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | 258 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); |
261 | 259 | ||
262 | if (assigned_dev->irq_requested_type & | 260 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) |
263 | KVM_ASSIGNED_DEV_HOST_MSI) | ||
264 | pci_disable_msi(assigned_dev->dev); | 261 | pci_disable_msi(assigned_dev->dev); |
265 | } | 262 | } |
266 | 263 | ||
267 | assigned_dev->irq_requested_type = 0; | 264 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); |
265 | } | ||
266 | |||
267 | static int kvm_deassign_irq(struct kvm *kvm, | ||
268 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
269 | unsigned long irq_requested_type) | ||
270 | { | ||
271 | unsigned long guest_irq_type, host_irq_type; | ||
272 | |||
273 | if (!irqchip_in_kernel(kvm)) | ||
274 | return -EINVAL; | ||
275 | /* no irq assignment to deassign */ | ||
276 | if (!assigned_dev->irq_requested_type) | ||
277 | return -ENXIO; | ||
278 | |||
279 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
280 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
281 | |||
282 | if (host_irq_type) | ||
283 | deassign_host_irq(kvm, assigned_dev); | ||
284 | if (guest_irq_type) | ||
285 | deassign_guest_irq(kvm, assigned_dev); | ||
286 | |||
287 | return 0; | ||
268 | } | 288 | } |
269 | 289 | ||
290 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
291 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
292 | { | ||
293 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
294 | } | ||
270 | 295 | ||
271 | static void kvm_free_assigned_device(struct kvm *kvm, | 296 | static void kvm_free_assigned_device(struct kvm *kvm, |
272 | struct kvm_assigned_dev_kernel | 297 | struct kvm_assigned_dev_kernel |
@@ -298,256 +323,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) | |||
298 | } | 323 | } |
299 | } | 324 | } |
300 | 325 | ||
301 | static int assigned_device_update_intx(struct kvm *kvm, | 326 | static int assigned_device_enable_host_intx(struct kvm *kvm, |
302 | struct kvm_assigned_dev_kernel *adev, | 327 | struct kvm_assigned_dev_kernel *dev) |
303 | struct kvm_assigned_irq *airq) | ||
304 | { | 328 | { |
305 | adev->guest_irq = airq->guest_irq; | 329 | dev->host_irq = dev->dev->irq; |
306 | adev->ack_notifier.gsi = airq->guest_irq; | 330 | /* Even though this is PCI, we don't want to use shared |
307 | 331 | * interrupts. Sharing host devices with guest-assigned devices | |
308 | if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) | 332 | * on the same interrupt line is not a happy situation: there |
309 | return 0; | 333 | * are going to be long delays in accepting, acking, etc. |
310 | 334 | */ | |
311 | if (irqchip_in_kernel(kvm)) { | 335 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, |
312 | if (!msi2intx && | 336 | 0, "kvm_assigned_intx_device", (void *)dev)) |
313 | (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { | 337 | return -EIO; |
314 | free_irq(adev->host_irq, (void *)adev); | 338 | return 0; |
315 | pci_disable_msi(adev->dev); | 339 | } |
316 | } | ||
317 | 340 | ||
318 | if (!capable(CAP_SYS_RAWIO)) | 341 | #ifdef __KVM_HAVE_MSI |
319 | return -EPERM; | 342 | static int assigned_device_enable_host_msi(struct kvm *kvm, |
343 | struct kvm_assigned_dev_kernel *dev) | ||
344 | { | ||
345 | int r; | ||
320 | 346 | ||
321 | if (airq->host_irq) | 347 | if (!dev->dev->msi_enabled) { |
322 | adev->host_irq = airq->host_irq; | 348 | r = pci_enable_msi(dev->dev); |
323 | else | 349 | if (r) |
324 | adev->host_irq = adev->dev->irq; | 350 | return r; |
351 | } | ||
325 | 352 | ||
326 | /* Even though this is PCI, we don't want to use shared | 353 | dev->host_irq = dev->dev->irq; |
327 | * interrupts. Sharing host devices with guest-assigned devices | 354 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, |
328 | * on the same interrupt line is not a happy situation: there | 355 | "kvm_assigned_msi_device", (void *)dev)) { |
329 | * are going to be long delays in accepting, acking, etc. | 356 | pci_disable_msi(dev->dev); |
330 | */ | 357 | return -EIO; |
331 | if (request_irq(adev->host_irq, kvm_assigned_dev_intr, | ||
332 | 0, "kvm_assigned_intx_device", (void *)adev)) | ||
333 | return -EIO; | ||
334 | } | 358 | } |
335 | 359 | ||
336 | adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX | | ||
337 | KVM_ASSIGNED_DEV_HOST_INTX; | ||
338 | return 0; | 360 | return 0; |
339 | } | 361 | } |
362 | #endif | ||
340 | 363 | ||
341 | #ifdef CONFIG_X86 | 364 | #ifdef __KVM_HAVE_MSIX |
342 | static int assigned_device_update_msi(struct kvm *kvm, | 365 | static int assigned_device_enable_host_msix(struct kvm *kvm, |
343 | struct kvm_assigned_dev_kernel *adev, | 366 | struct kvm_assigned_dev_kernel *dev) |
344 | struct kvm_assigned_irq *airq) | ||
345 | { | 367 | { |
346 | int r; | 368 | int i, r = -EINVAL; |
347 | 369 | ||
348 | adev->guest_irq = airq->guest_irq; | 370 | /* host_msix_entries and guest_msix_entries should have been |
349 | if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { | 371 | * initialized */ |
350 | /* x86 don't care upper address of guest msi message addr */ | 372 | if (dev->entries_nr == 0) |
351 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; | 373 | return r; |
352 | adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; | ||
353 | adev->ack_notifier.gsi = -1; | ||
354 | } else if (msi2intx) { | ||
355 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; | ||
356 | adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; | ||
357 | adev->ack_notifier.gsi = airq->guest_irq; | ||
358 | } else { | ||
359 | /* | ||
360 | * Guest require to disable device MSI, we disable MSI and | ||
361 | * re-enable INTx by default again. Notice it's only for | ||
362 | * non-msi2intx. | ||
363 | */ | ||
364 | assigned_device_update_intx(kvm, adev, airq); | ||
365 | return 0; | ||
366 | } | ||
367 | 374 | ||
368 | if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) | 375 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); |
369 | return 0; | 376 | if (r) |
377 | return r; | ||
370 | 378 | ||
371 | if (irqchip_in_kernel(kvm)) { | 379 | for (i = 0; i < dev->entries_nr; i++) { |
372 | if (!msi2intx) { | 380 | r = request_irq(dev->host_msix_entries[i].vector, |
373 | if (adev->irq_requested_type & | 381 | kvm_assigned_dev_intr, 0, |
374 | KVM_ASSIGNED_DEV_HOST_INTX) | 382 | "kvm_assigned_msix_device", |
375 | free_irq(adev->host_irq, (void *)adev); | 383 | (void *)dev); |
384 | /* FIXME: free requested_irq's on failure */ | ||
385 | if (r) | ||
386 | return r; | ||
387 | } | ||
376 | 388 | ||
377 | r = pci_enable_msi(adev->dev); | 389 | return 0; |
378 | if (r) | 390 | } |
379 | return r; | ||
380 | } | ||
381 | 391 | ||
382 | adev->host_irq = adev->dev->irq; | 392 | #endif |
383 | if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, | ||
384 | "kvm_assigned_msi_device", (void *)adev)) | ||
385 | return -EIO; | ||
386 | } | ||
387 | 393 | ||
388 | if (!msi2intx) | 394 | static int assigned_device_enable_guest_intx(struct kvm *kvm, |
389 | adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; | 395 | struct kvm_assigned_dev_kernel *dev, |
396 | struct kvm_assigned_irq *irq) | ||
397 | { | ||
398 | dev->guest_irq = irq->guest_irq; | ||
399 | dev->ack_notifier.gsi = irq->guest_irq; | ||
400 | return 0; | ||
401 | } | ||
390 | 402 | ||
391 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; | 403 | #ifdef __KVM_HAVE_MSI |
404 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
405 | struct kvm_assigned_dev_kernel *dev, | ||
406 | struct kvm_assigned_irq *irq) | ||
407 | { | ||
408 | dev->guest_irq = irq->guest_irq; | ||
409 | dev->ack_notifier.gsi = -1; | ||
392 | return 0; | 410 | return 0; |
393 | } | 411 | } |
394 | #endif | 412 | #endif |
413 | #ifdef __KVM_HAVE_MSIX | ||
414 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
415 | struct kvm_assigned_dev_kernel *dev, | ||
416 | struct kvm_assigned_irq *irq) | ||
417 | { | ||
418 | dev->guest_irq = irq->guest_irq; | ||
419 | dev->ack_notifier.gsi = -1; | ||
420 | return 0; | ||
421 | } | ||
422 | #endif | ||
423 | |||
424 | static int assign_host_irq(struct kvm *kvm, | ||
425 | struct kvm_assigned_dev_kernel *dev, | ||
426 | __u32 host_irq_type) | ||
427 | { | ||
428 | int r = -EEXIST; | ||
429 | |||
430 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
431 | return r; | ||
395 | 432 | ||
433 | switch (host_irq_type) { | ||
434 | case KVM_DEV_IRQ_HOST_INTX: | ||
435 | r = assigned_device_enable_host_intx(kvm, dev); | ||
436 | break; | ||
437 | #ifdef __KVM_HAVE_MSI | ||
438 | case KVM_DEV_IRQ_HOST_MSI: | ||
439 | r = assigned_device_enable_host_msi(kvm, dev); | ||
440 | break; | ||
441 | #endif | ||
396 | #ifdef __KVM_HAVE_MSIX | 442 | #ifdef __KVM_HAVE_MSIX |
397 | static int assigned_device_update_msix(struct kvm *kvm, | 443 | case KVM_DEV_IRQ_HOST_MSIX: |
398 | struct kvm_assigned_dev_kernel *adev, | 444 | r = assigned_device_enable_host_msix(kvm, dev); |
399 | struct kvm_assigned_irq *airq) | 445 | break; |
400 | { | 446 | #endif |
401 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | 447 | default: |
402 | int i, r; | 448 | r = -EINVAL; |
403 | 449 | } | |
404 | adev->ack_notifier.gsi = -1; | ||
405 | |||
406 | if (irqchip_in_kernel(kvm)) { | ||
407 | if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) | ||
408 | return -ENOTTY; | ||
409 | |||
410 | if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) { | ||
411 | /* Guest disable MSI-X */ | ||
412 | kvm_free_assigned_irq(kvm, adev); | ||
413 | if (msi2intx) { | ||
414 | pci_enable_msi(adev->dev); | ||
415 | if (adev->dev->msi_enabled) | ||
416 | return assigned_device_update_msi(kvm, | ||
417 | adev, airq); | ||
418 | } | ||
419 | return assigned_device_update_intx(kvm, adev, airq); | ||
420 | } | ||
421 | 450 | ||
422 | /* host_msix_entries and guest_msix_entries should have been | 451 | if (!r) |
423 | * initialized */ | 452 | dev->irq_requested_type |= host_irq_type; |
424 | if (adev->entries_nr == 0) | ||
425 | return -EINVAL; | ||
426 | 453 | ||
427 | kvm_free_assigned_irq(kvm, adev); | 454 | return r; |
455 | } | ||
428 | 456 | ||
429 | r = pci_enable_msix(adev->dev, adev->host_msix_entries, | 457 | static int assign_guest_irq(struct kvm *kvm, |
430 | adev->entries_nr); | 458 | struct kvm_assigned_dev_kernel *dev, |
431 | if (r) | 459 | struct kvm_assigned_irq *irq, |
432 | return r; | 460 | unsigned long guest_irq_type) |
461 | { | ||
462 | int id; | ||
463 | int r = -EEXIST; | ||
433 | 464 | ||
434 | for (i = 0; i < adev->entries_nr; i++) { | 465 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) |
435 | r = request_irq((adev->host_msix_entries + i)->vector, | 466 | return r; |
436 | kvm_assigned_dev_intr, 0, | 467 | |
437 | "kvm_assigned_msix_device", | 468 | id = kvm_request_irq_source_id(kvm); |
438 | (void *)adev); | 469 | if (id < 0) |
439 | if (r) | 470 | return id; |
440 | return r; | 471 | |
441 | } | 472 | dev->irq_source_id = id; |
473 | |||
474 | switch (guest_irq_type) { | ||
475 | case KVM_DEV_IRQ_GUEST_INTX: | ||
476 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
477 | break; | ||
478 | #ifdef __KVM_HAVE_MSI | ||
479 | case KVM_DEV_IRQ_GUEST_MSI: | ||
480 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
481 | break; | ||
482 | #endif | ||
483 | #ifdef __KVM_HAVE_MSIX | ||
484 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
485 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
486 | break; | ||
487 | #endif | ||
488 | default: | ||
489 | r = -EINVAL; | ||
442 | } | 490 | } |
443 | 491 | ||
444 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX; | 492 | if (!r) { |
493 | dev->irq_requested_type |= guest_irq_type; | ||
494 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
495 | } else | ||
496 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
445 | 497 | ||
446 | return 0; | 498 | return r; |
447 | } | 499 | } |
448 | #endif | ||
449 | 500 | ||
501 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
450 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | 502 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, |
451 | struct kvm_assigned_irq | 503 | struct kvm_assigned_irq *assigned_irq) |
452 | *assigned_irq) | ||
453 | { | 504 | { |
454 | int r = 0; | 505 | int r = -EINVAL; |
455 | struct kvm_assigned_dev_kernel *match; | 506 | struct kvm_assigned_dev_kernel *match; |
456 | u32 current_flags = 0, changed_flags; | 507 | unsigned long host_irq_type, guest_irq_type; |
457 | 508 | ||
458 | mutex_lock(&kvm->lock); | 509 | if (!capable(CAP_SYS_RAWIO)) |
510 | return -EPERM; | ||
459 | 511 | ||
512 | if (!irqchip_in_kernel(kvm)) | ||
513 | return r; | ||
514 | |||
515 | mutex_lock(&kvm->lock); | ||
516 | r = -ENODEV; | ||
460 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | 517 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
461 | assigned_irq->assigned_dev_id); | 518 | assigned_irq->assigned_dev_id); |
462 | if (!match) { | 519 | if (!match) |
463 | mutex_unlock(&kvm->lock); | 520 | goto out; |
464 | return -EINVAL; | ||
465 | } | ||
466 | |||
467 | if (!match->irq_requested_type) { | ||
468 | INIT_WORK(&match->interrupt_work, | ||
469 | kvm_assigned_dev_interrupt_work_handler); | ||
470 | if (irqchip_in_kernel(kvm)) { | ||
471 | /* Register ack nofitier */ | ||
472 | match->ack_notifier.gsi = -1; | ||
473 | match->ack_notifier.irq_acked = | ||
474 | kvm_assigned_dev_ack_irq; | ||
475 | kvm_register_irq_ack_notifier(kvm, | ||
476 | &match->ack_notifier); | ||
477 | |||
478 | /* Request IRQ source ID */ | ||
479 | r = kvm_request_irq_source_id(kvm); | ||
480 | if (r < 0) | ||
481 | goto out_release; | ||
482 | else | ||
483 | match->irq_source_id = r; | ||
484 | |||
485 | #ifdef CONFIG_X86 | ||
486 | /* Determine host device irq type, we can know the | ||
487 | * result from dev->msi_enabled */ | ||
488 | if (msi2intx) | ||
489 | pci_enable_msi(match->dev); | ||
490 | #endif | ||
491 | } | ||
492 | } | ||
493 | 521 | ||
494 | if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) | 522 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); |
495 | current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX; | 523 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); |
496 | else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && | ||
497 | (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) | ||
498 | current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI; | ||
499 | 524 | ||
500 | changed_flags = assigned_irq->flags ^ current_flags; | 525 | r = -EINVAL; |
526 | /* can only assign one type at a time */ | ||
527 | if (hweight_long(host_irq_type) > 1) | ||
528 | goto out; | ||
529 | if (hweight_long(guest_irq_type) > 1) | ||
530 | goto out; | ||
531 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
532 | goto out; | ||
501 | 533 | ||
502 | #ifdef __KVM_HAVE_MSIX | 534 | r = 0; |
503 | if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) { | 535 | if (host_irq_type) |
504 | r = assigned_device_update_msix(kvm, match, assigned_irq); | 536 | r = assign_host_irq(kvm, match, host_irq_type); |
505 | if (r) { | 537 | if (r) |
506 | printk(KERN_WARNING "kvm: failed to execute " | 538 | goto out; |
507 | "MSI-X action!\n"); | ||
508 | goto out_release; | ||
509 | } | ||
510 | } else | ||
511 | #endif | ||
512 | if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || | ||
513 | (msi2intx && match->dev->msi_enabled)) { | ||
514 | #ifdef CONFIG_X86 | ||
515 | r = assigned_device_update_msi(kvm, match, assigned_irq); | ||
516 | if (r) { | ||
517 | printk(KERN_WARNING "kvm: failed to enable " | ||
518 | "MSI device!\n"); | ||
519 | goto out_release; | ||
520 | } | ||
521 | #else | ||
522 | r = -ENOTTY; | ||
523 | #endif | ||
524 | } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) { | ||
525 | /* Host device IRQ 0 means don't support INTx */ | ||
526 | if (!msi2intx) { | ||
527 | printk(KERN_WARNING | ||
528 | "kvm: wait device to enable MSI!\n"); | ||
529 | r = 0; | ||
530 | } else { | ||
531 | printk(KERN_WARNING | ||
532 | "kvm: failed to enable MSI device!\n"); | ||
533 | r = -ENOTTY; | ||
534 | goto out_release; | ||
535 | } | ||
536 | } else { | ||
537 | /* Non-sharing INTx mode */ | ||
538 | r = assigned_device_update_intx(kvm, match, assigned_irq); | ||
539 | if (r) { | ||
540 | printk(KERN_WARNING "kvm: failed to enable " | ||
541 | "INTx device!\n"); | ||
542 | goto out_release; | ||
543 | } | ||
544 | } | ||
545 | 539 | ||
540 | if (guest_irq_type) | ||
541 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
542 | out: | ||
546 | mutex_unlock(&kvm->lock); | 543 | mutex_unlock(&kvm->lock); |
547 | return r; | 544 | return r; |
548 | out_release: | 545 | } |
546 | |||
547 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
548 | struct kvm_assigned_irq | ||
549 | *assigned_irq) | ||
550 | { | ||
551 | int r = -ENODEV; | ||
552 | struct kvm_assigned_dev_kernel *match; | ||
553 | |||
554 | mutex_lock(&kvm->lock); | ||
555 | |||
556 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
557 | assigned_irq->assigned_dev_id); | ||
558 | if (!match) | ||
559 | goto out; | ||
560 | |||
561 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
562 | out: | ||
549 | mutex_unlock(&kvm->lock); | 563 | mutex_unlock(&kvm->lock); |
550 | kvm_free_assigned_device(kvm, match); | ||
551 | return r; | 564 | return r; |
552 | } | 565 | } |
553 | 566 | ||
@@ -565,7 +578,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
565 | assigned_dev->assigned_dev_id); | 578 | assigned_dev->assigned_dev_id); |
566 | if (match) { | 579 | if (match) { |
567 | /* device already assigned */ | 580 | /* device already assigned */ |
568 | r = -EINVAL; | 581 | r = -EEXIST; |
569 | goto out; | 582 | goto out; |
570 | } | 583 | } |
571 | 584 | ||
@@ -604,6 +617,9 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
604 | match->dev = dev; | 617 | match->dev = dev; |
605 | match->irq_source_id = -1; | 618 | match->irq_source_id = -1; |
606 | match->kvm = kvm; | 619 | match->kvm = kvm; |
620 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
621 | INIT_WORK(&match->interrupt_work, | ||
622 | kvm_assigned_dev_interrupt_work_handler); | ||
607 | 623 | ||
608 | list_add(&match->list, &kvm->arch.assigned_dev_head); | 624 | list_add(&match->list, &kvm->arch.assigned_dev_head); |
609 | 625 | ||
@@ -2084,6 +2100,11 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2084 | break; | 2100 | break; |
2085 | } | 2101 | } |
2086 | case KVM_ASSIGN_IRQ: { | 2102 | case KVM_ASSIGN_IRQ: { |
2103 | r = -EOPNOTSUPP; | ||
2104 | break; | ||
2105 | } | ||
2106 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
2107 | case KVM_ASSIGN_DEV_IRQ: { | ||
2087 | struct kvm_assigned_irq assigned_irq; | 2108 | struct kvm_assigned_irq assigned_irq; |
2088 | 2109 | ||
2089 | r = -EFAULT; | 2110 | r = -EFAULT; |
@@ -2094,6 +2115,18 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2094 | goto out; | 2115 | goto out; |
2095 | break; | 2116 | break; |
2096 | } | 2117 | } |
2118 | case KVM_DEASSIGN_DEV_IRQ: { | ||
2119 | struct kvm_assigned_irq assigned_irq; | ||
2120 | |||
2121 | r = -EFAULT; | ||
2122 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
2123 | goto out; | ||
2124 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
2125 | if (r) | ||
2126 | goto out; | ||
2127 | break; | ||
2128 | } | ||
2129 | #endif | ||
2097 | #endif | 2130 | #endif |
2098 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | 2131 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT |
2099 | case KVM_DEASSIGN_PCI_DEVICE: { | 2132 | case KVM_DEASSIGN_PCI_DEVICE: { |
@@ -2596,9 +2629,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2596 | 2629 | ||
2597 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2630 | kvm_preempt_ops.sched_in = kvm_sched_in; |
2598 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2631 | kvm_preempt_ops.sched_out = kvm_sched_out; |
2599 | #ifndef CONFIG_X86 | ||
2600 | msi2intx = 0; | ||
2601 | #endif | ||
2602 | 2632 | ||
2603 | return 0; | 2633 | return 0; |
2604 | 2634 | ||