diff options
-rw-r--r-- | Documentation/vfio.txt | 8 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 286 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 11 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 35 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 64 | ||||
-rw-r--r-- | include/linux/vfio.h | 7 | ||||
-rw-r--r-- | include/uapi/linux/vfio.h | 38 |
7 files changed, 421 insertions, 28 deletions
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index d7993dcf8537..b9ca02370d46 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt | |||
@@ -167,8 +167,8 @@ group and can access them as follows: | |||
167 | int container, group, device, i; | 167 | int container, group, device, i; |
168 | struct vfio_group_status group_status = | 168 | struct vfio_group_status group_status = |
169 | { .argsz = sizeof(group_status) }; | 169 | { .argsz = sizeof(group_status) }; |
170 | struct vfio_iommu_x86_info iommu_info = { .argsz = sizeof(iommu_info) }; | 170 | struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) }; |
171 | struct vfio_iommu_x86_dma_map dma_map = { .argsz = sizeof(dma_map) }; | 171 | struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map) }; |
172 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; | 172 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; |
173 | 173 | ||
174 | /* Create a new container */ | 174 | /* Create a new container */ |
@@ -193,7 +193,7 @@ group and can access them as follows: | |||
193 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | 193 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); |
194 | 194 | ||
195 | /* Enable the IOMMU model we want */ | 195 | /* Enable the IOMMU model we want */ |
196 | ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) | 196 | ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); |
197 | 197 | ||
198 | /* Get addition IOMMU info */ | 198 | /* Get addition IOMMU info */ |
199 | ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info); | 199 | ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info); |
@@ -229,7 +229,7 @@ group and can access them as follows: | |||
229 | 229 | ||
230 | irq.index = i; | 230 | irq.index = i; |
231 | 231 | ||
232 | ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, ®); | 232 | ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq); |
233 | 233 | ||
234 | /* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */ | 234 | /* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */ |
235 | } | 235 | } |
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index cef6002acbd4..6ab71b9fcf8d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #include <linux/device.h> | 14 | #include <linux/device.h> |
15 | #include <linux/eventfd.h> | 15 | #include <linux/eventfd.h> |
16 | #include <linux/file.h> | ||
16 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
17 | #include <linux/iommu.h> | 18 | #include <linux/iommu.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
@@ -227,6 +228,110 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) | |||
227 | return 0; | 228 | return 0; |
228 | } | 229 | } |
229 | 230 | ||
231 | static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) | ||
232 | { | ||
233 | (*(int *)data)++; | ||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | struct vfio_pci_fill_info { | ||
238 | int max; | ||
239 | int cur; | ||
240 | struct vfio_pci_dependent_device *devices; | ||
241 | }; | ||
242 | |||
243 | static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) | ||
244 | { | ||
245 | struct vfio_pci_fill_info *fill = data; | ||
246 | struct iommu_group *iommu_group; | ||
247 | |||
248 | if (fill->cur == fill->max) | ||
249 | return -EAGAIN; /* Something changed, try again */ | ||
250 | |||
251 | iommu_group = iommu_group_get(&pdev->dev); | ||
252 | if (!iommu_group) | ||
253 | return -EPERM; /* Cannot reset non-isolated devices */ | ||
254 | |||
255 | fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); | ||
256 | fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus); | ||
257 | fill->devices[fill->cur].bus = pdev->bus->number; | ||
258 | fill->devices[fill->cur].devfn = pdev->devfn; | ||
259 | fill->cur++; | ||
260 | iommu_group_put(iommu_group); | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | struct vfio_pci_group_entry { | ||
265 | struct vfio_group *group; | ||
266 | int id; | ||
267 | }; | ||
268 | |||
269 | struct vfio_pci_group_info { | ||
270 | int count; | ||
271 | struct vfio_pci_group_entry *groups; | ||
272 | }; | ||
273 | |||
274 | static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data) | ||
275 | { | ||
276 | struct vfio_pci_group_info *info = data; | ||
277 | struct iommu_group *group; | ||
278 | int id, i; | ||
279 | |||
280 | group = iommu_group_get(&pdev->dev); | ||
281 | if (!group) | ||
282 | return -EPERM; | ||
283 | |||
284 | id = iommu_group_id(group); | ||
285 | |||
286 | for (i = 0; i < info->count; i++) | ||
287 | if (info->groups[i].id == id) | ||
288 | break; | ||
289 | |||
290 | iommu_group_put(group); | ||
291 | |||
292 | return (i == info->count) ? -EINVAL : 0; | ||
293 | } | ||
294 | |||
295 | static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot) | ||
296 | { | ||
297 | for (; pdev; pdev = pdev->bus->self) | ||
298 | if (pdev->bus == slot->bus) | ||
299 | return (pdev->slot == slot); | ||
300 | return false; | ||
301 | } | ||
302 | |||
303 | struct vfio_pci_walk_info { | ||
304 | int (*fn)(struct pci_dev *, void *data); | ||
305 | void *data; | ||
306 | struct pci_dev *pdev; | ||
307 | bool slot; | ||
308 | int ret; | ||
309 | }; | ||
310 | |||
311 | static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data) | ||
312 | { | ||
313 | struct vfio_pci_walk_info *walk = data; | ||
314 | |||
315 | if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot)) | ||
316 | walk->ret = walk->fn(pdev, walk->data); | ||
317 | |||
318 | return walk->ret; | ||
319 | } | ||
320 | |||
321 | static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev, | ||
322 | int (*fn)(struct pci_dev *, | ||
323 | void *data), void *data, | ||
324 | bool slot) | ||
325 | { | ||
326 | struct vfio_pci_walk_info walk = { | ||
327 | .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0, | ||
328 | }; | ||
329 | |||
330 | pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk); | ||
331 | |||
332 | return walk.ret; | ||
333 | } | ||
334 | |||
230 | static long vfio_pci_ioctl(void *device_data, | 335 | static long vfio_pci_ioctl(void *device_data, |
231 | unsigned int cmd, unsigned long arg) | 336 | unsigned int cmd, unsigned long arg) |
232 | { | 337 | { |
@@ -407,10 +512,189 @@ static long vfio_pci_ioctl(void *device_data, | |||
407 | 512 | ||
408 | return ret; | 513 | return ret; |
409 | 514 | ||
410 | } else if (cmd == VFIO_DEVICE_RESET) | 515 | } else if (cmd == VFIO_DEVICE_RESET) { |
411 | return vdev->reset_works ? | 516 | return vdev->reset_works ? |
412 | pci_reset_function(vdev->pdev) : -EINVAL; | 517 | pci_reset_function(vdev->pdev) : -EINVAL; |
413 | 518 | ||
519 | } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) { | ||
520 | struct vfio_pci_hot_reset_info hdr; | ||
521 | struct vfio_pci_fill_info fill = { 0 }; | ||
522 | struct vfio_pci_dependent_device *devices = NULL; | ||
523 | bool slot = false; | ||
524 | int ret = 0; | ||
525 | |||
526 | minsz = offsetofend(struct vfio_pci_hot_reset_info, count); | ||
527 | |||
528 | if (copy_from_user(&hdr, (void __user *)arg, minsz)) | ||
529 | return -EFAULT; | ||
530 | |||
531 | if (hdr.argsz < minsz) | ||
532 | return -EINVAL; | ||
533 | |||
534 | hdr.flags = 0; | ||
535 | |||
536 | /* Can we do a slot or bus reset or neither? */ | ||
537 | if (!pci_probe_reset_slot(vdev->pdev->slot)) | ||
538 | slot = true; | ||
539 | else if (pci_probe_reset_bus(vdev->pdev->bus)) | ||
540 | return -ENODEV; | ||
541 | |||
542 | /* How many devices are affected? */ | ||
543 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
544 | vfio_pci_count_devs, | ||
545 | &fill.max, slot); | ||
546 | if (ret) | ||
547 | return ret; | ||
548 | |||
549 | WARN_ON(!fill.max); /* Should always be at least one */ | ||
550 | |||
551 | /* | ||
552 | * If there's enough space, fill it now, otherwise return | ||
553 | * -ENOSPC and the number of devices affected. | ||
554 | */ | ||
555 | if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) { | ||
556 | ret = -ENOSPC; | ||
557 | hdr.count = fill.max; | ||
558 | goto reset_info_exit; | ||
559 | } | ||
560 | |||
561 | devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL); | ||
562 | if (!devices) | ||
563 | return -ENOMEM; | ||
564 | |||
565 | fill.devices = devices; | ||
566 | |||
567 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
568 | vfio_pci_fill_devs, | ||
569 | &fill, slot); | ||
570 | |||
571 | /* | ||
572 | * If a device was removed between counting and filling, | ||
573 | * we may come up short of fill.max. If a device was | ||
574 | * added, we'll have a return of -EAGAIN above. | ||
575 | */ | ||
576 | if (!ret) | ||
577 | hdr.count = fill.cur; | ||
578 | |||
579 | reset_info_exit: | ||
580 | if (copy_to_user((void __user *)arg, &hdr, minsz)) | ||
581 | ret = -EFAULT; | ||
582 | |||
583 | if (!ret) { | ||
584 | if (copy_to_user((void __user *)(arg + minsz), devices, | ||
585 | hdr.count * sizeof(*devices))) | ||
586 | ret = -EFAULT; | ||
587 | } | ||
588 | |||
589 | kfree(devices); | ||
590 | return ret; | ||
591 | |||
592 | } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) { | ||
593 | struct vfio_pci_hot_reset hdr; | ||
594 | int32_t *group_fds; | ||
595 | struct vfio_pci_group_entry *groups; | ||
596 | struct vfio_pci_group_info info; | ||
597 | bool slot = false; | ||
598 | int i, count = 0, ret = 0; | ||
599 | |||
600 | minsz = offsetofend(struct vfio_pci_hot_reset, count); | ||
601 | |||
602 | if (copy_from_user(&hdr, (void __user *)arg, minsz)) | ||
603 | return -EFAULT; | ||
604 | |||
605 | if (hdr.argsz < minsz || hdr.flags) | ||
606 | return -EINVAL; | ||
607 | |||
608 | /* Can we do a slot or bus reset or neither? */ | ||
609 | if (!pci_probe_reset_slot(vdev->pdev->slot)) | ||
610 | slot = true; | ||
611 | else if (pci_probe_reset_bus(vdev->pdev->bus)) | ||
612 | return -ENODEV; | ||
613 | |||
614 | /* | ||
615 | * We can't let userspace give us an arbitrarily large | ||
616 | * buffer to copy, so verify how many we think there | ||
617 | * could be. Note groups can have multiple devices so | ||
618 | * one group per device is the max. | ||
619 | */ | ||
620 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
621 | vfio_pci_count_devs, | ||
622 | &count, slot); | ||
623 | if (ret) | ||
624 | return ret; | ||
625 | |||
626 | /* Somewhere between 1 and count is OK */ | ||
627 | if (!hdr.count || hdr.count > count) | ||
628 | return -EINVAL; | ||
629 | |||
630 | group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL); | ||
631 | groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL); | ||
632 | if (!group_fds || !groups) { | ||
633 | kfree(group_fds); | ||
634 | kfree(groups); | ||
635 | return -ENOMEM; | ||
636 | } | ||
637 | |||
638 | if (copy_from_user(group_fds, (void __user *)(arg + minsz), | ||
639 | hdr.count * sizeof(*group_fds))) { | ||
640 | kfree(group_fds); | ||
641 | kfree(groups); | ||
642 | return -EFAULT; | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * For each group_fd, get the group through the vfio external | ||
647 | * user interface and store the group and iommu ID. This | ||
648 | * ensures the group is held across the reset. | ||
649 | */ | ||
650 | for (i = 0; i < hdr.count; i++) { | ||
651 | struct vfio_group *group; | ||
652 | struct fd f = fdget(group_fds[i]); | ||
653 | if (!f.file) { | ||
654 | ret = -EBADF; | ||
655 | break; | ||
656 | } | ||
657 | |||
658 | group = vfio_group_get_external_user(f.file); | ||
659 | fdput(f); | ||
660 | if (IS_ERR(group)) { | ||
661 | ret = PTR_ERR(group); | ||
662 | break; | ||
663 | } | ||
664 | |||
665 | groups[i].group = group; | ||
666 | groups[i].id = vfio_external_user_iommu_id(group); | ||
667 | } | ||
668 | |||
669 | kfree(group_fds); | ||
670 | |||
671 | /* release reference to groups on error */ | ||
672 | if (ret) | ||
673 | goto hot_reset_release; | ||
674 | |||
675 | info.count = hdr.count; | ||
676 | info.groups = groups; | ||
677 | |||
678 | /* | ||
679 | * Test whether all the affected devices are contained | ||
680 | * by the set of groups provided by the user. | ||
681 | */ | ||
682 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
683 | vfio_pci_validate_devs, | ||
684 | &info, slot); | ||
685 | if (!ret) | ||
686 | /* User has access, do the reset */ | ||
687 | ret = slot ? pci_reset_slot(vdev->pdev->slot) : | ||
688 | pci_reset_bus(vdev->pdev->bus); | ||
689 | |||
690 | hot_reset_release: | ||
691 | for (i--; i >= 0; i--) | ||
692 | vfio_group_put_external_user(groups[i].group); | ||
693 | |||
694 | kfree(groups); | ||
695 | return ret; | ||
696 | } | ||
697 | |||
414 | return -ENOTTY; | 698 | return -ENOTTY; |
415 | } | 699 | } |
416 | 700 | ||
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index affa34745be9..ffd0632c3cbc 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c | |||
@@ -1012,6 +1012,7 @@ static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos) | |||
1012 | static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) | 1012 | static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) |
1013 | { | 1013 | { |
1014 | struct pci_dev *pdev = vdev->pdev; | 1014 | struct pci_dev *pdev = vdev->pdev; |
1015 | u32 dword; | ||
1015 | u16 word; | 1016 | u16 word; |
1016 | u8 byte; | 1017 | u8 byte; |
1017 | int ret; | 1018 | int ret; |
@@ -1025,7 +1026,9 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) | |||
1025 | return pcibios_err_to_errno(ret); | 1026 | return pcibios_err_to_errno(ret); |
1026 | 1027 | ||
1027 | if (PCI_X_CMD_VERSION(word)) { | 1028 | if (PCI_X_CMD_VERSION(word)) { |
1028 | vdev->extended_caps = true; | 1029 | /* Test for extended capabilities */ |
1030 | pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword); | ||
1031 | vdev->extended_caps = (dword != 0); | ||
1029 | return PCI_CAP_PCIX_SIZEOF_V2; | 1032 | return PCI_CAP_PCIX_SIZEOF_V2; |
1030 | } else | 1033 | } else |
1031 | return PCI_CAP_PCIX_SIZEOF_V0; | 1034 | return PCI_CAP_PCIX_SIZEOF_V0; |
@@ -1037,9 +1040,11 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) | |||
1037 | 1040 | ||
1038 | return byte; | 1041 | return byte; |
1039 | case PCI_CAP_ID_EXP: | 1042 | case PCI_CAP_ID_EXP: |
1040 | /* length based on version */ | 1043 | /* Test for extended capabilities */ |
1041 | vdev->extended_caps = true; | 1044 | pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword); |
1045 | vdev->extended_caps = (dword != 0); | ||
1042 | 1046 | ||
1047 | /* length based on version */ | ||
1043 | if ((pcie_caps_reg(pdev) & PCI_EXP_FLAGS_VERS) == 1) | 1048 | if ((pcie_caps_reg(pdev) & PCI_EXP_FLAGS_VERS) == 1) |
1044 | return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1; | 1049 | return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1; |
1045 | else | 1050 | else |
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 4bc704e1b7c7..641bc87bdb96 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c | |||
@@ -130,8 +130,8 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
130 | void (*thread)(struct vfio_pci_device *, void *), | 130 | void (*thread)(struct vfio_pci_device *, void *), |
131 | void *data, struct virqfd **pvirqfd, int fd) | 131 | void *data, struct virqfd **pvirqfd, int fd) |
132 | { | 132 | { |
133 | struct file *file = NULL; | 133 | struct fd irqfd; |
134 | struct eventfd_ctx *ctx = NULL; | 134 | struct eventfd_ctx *ctx; |
135 | struct virqfd *virqfd; | 135 | struct virqfd *virqfd; |
136 | int ret = 0; | 136 | int ret = 0; |
137 | unsigned int events; | 137 | unsigned int events; |
@@ -149,16 +149,16 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
149 | INIT_WORK(&virqfd->shutdown, virqfd_shutdown); | 149 | INIT_WORK(&virqfd->shutdown, virqfd_shutdown); |
150 | INIT_WORK(&virqfd->inject, virqfd_inject); | 150 | INIT_WORK(&virqfd->inject, virqfd_inject); |
151 | 151 | ||
152 | file = eventfd_fget(fd); | 152 | irqfd = fdget(fd); |
153 | if (IS_ERR(file)) { | 153 | if (!irqfd.file) { |
154 | ret = PTR_ERR(file); | 154 | ret = -EBADF; |
155 | goto fail; | 155 | goto err_fd; |
156 | } | 156 | } |
157 | 157 | ||
158 | ctx = eventfd_ctx_fileget(file); | 158 | ctx = eventfd_ctx_fileget(irqfd.file); |
159 | if (IS_ERR(ctx)) { | 159 | if (IS_ERR(ctx)) { |
160 | ret = PTR_ERR(ctx); | 160 | ret = PTR_ERR(ctx); |
161 | goto fail; | 161 | goto err_ctx; |
162 | } | 162 | } |
163 | 163 | ||
164 | virqfd->eventfd = ctx; | 164 | virqfd->eventfd = ctx; |
@@ -174,7 +174,7 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
174 | if (*pvirqfd) { | 174 | if (*pvirqfd) { |
175 | spin_unlock_irq(&vdev->irqlock); | 175 | spin_unlock_irq(&vdev->irqlock); |
176 | ret = -EBUSY; | 176 | ret = -EBUSY; |
177 | goto fail; | 177 | goto err_busy; |
178 | } | 178 | } |
179 | *pvirqfd = virqfd; | 179 | *pvirqfd = virqfd; |
180 | 180 | ||
@@ -187,7 +187,7 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
187 | init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); | 187 | init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); |
188 | init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); | 188 | init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); |
189 | 189 | ||
190 | events = file->f_op->poll(file, &virqfd->pt); | 190 | events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); |
191 | 191 | ||
192 | /* | 192 | /* |
193 | * Check if there was an event already pending on the eventfd | 193 | * Check if there was an event already pending on the eventfd |
@@ -202,17 +202,14 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
202 | * Do not drop the file until the irqfd is fully initialized, | 202 | * Do not drop the file until the irqfd is fully initialized, |
203 | * otherwise we might race against the POLLHUP. | 203 | * otherwise we might race against the POLLHUP. |
204 | */ | 204 | */ |
205 | fput(file); | 205 | fdput(irqfd); |
206 | 206 | ||
207 | return 0; | 207 | return 0; |
208 | 208 | err_busy: | |
209 | fail: | 209 | eventfd_ctx_put(ctx); |
210 | if (ctx && !IS_ERR(ctx)) | 210 | err_ctx: |
211 | eventfd_ctx_put(ctx); | 211 | fdput(irqfd); |
212 | 212 | err_fd: | |
213 | if (file && !IS_ERR(file)) | ||
214 | fput(file); | ||
215 | |||
216 | kfree(virqfd); | 213 | kfree(virqfd); |
217 | 214 | ||
218 | return ret; | 215 | return ret; |
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 842f4507883e..1eab4ace0671 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c | |||
@@ -1109,7 +1109,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) | |||
1109 | * We can't use anon_inode_getfd() because we need to modify | 1109 | * We can't use anon_inode_getfd() because we need to modify |
1110 | * the f_mode flags directly to allow more than just ioctls | 1110 | * the f_mode flags directly to allow more than just ioctls |
1111 | */ | 1111 | */ |
1112 | ret = get_unused_fd(); | 1112 | ret = get_unused_fd_flags(O_CLOEXEC); |
1113 | if (ret < 0) { | 1113 | if (ret < 0) { |
1114 | device->ops->release(device->device_data); | 1114 | device->ops->release(device->device_data); |
1115 | break; | 1115 | break; |
@@ -1353,6 +1353,68 @@ static const struct file_operations vfio_device_fops = { | |||
1353 | }; | 1353 | }; |
1354 | 1354 | ||
1355 | /** | 1355 | /** |
1356 | * External user API, exported by symbols to be linked dynamically. | ||
1357 | * | ||
1358 | * The protocol includes: | ||
1359 | * 1. do normal VFIO init operation: | ||
1360 | * - opening a new container; | ||
1361 | * - attaching group(s) to it; | ||
1362 | * - setting an IOMMU driver for a container. | ||
1363 | * When IOMMU is set for a container, all groups in it are | ||
1364 | * considered ready to use by an external user. | ||
1365 | * | ||
1366 | * 2. User space passes a group fd to an external user. | ||
1367 | * The external user calls vfio_group_get_external_user() | ||
1368 | * to verify that: | ||
1369 | * - the group is initialized; | ||
1370 | * - IOMMU is set for it. | ||
1371 | * If both checks passed, vfio_group_get_external_user() | ||
1372 | * increments the container user counter to prevent | ||
1373 | * the VFIO group from disposal before KVM exits. | ||
1374 | * | ||
1375 | * 3. The external user calls vfio_external_user_iommu_id() | ||
1376 | * to know an IOMMU ID. | ||
1377 | * | ||
1378 | * 4. When the external KVM finishes, it calls | ||
1379 | * vfio_group_put_external_user() to release the VFIO group. | ||
1380 | * This call decrements the container user counter. | ||
1381 | */ | ||
1382 | struct vfio_group *vfio_group_get_external_user(struct file *filep) | ||
1383 | { | ||
1384 | struct vfio_group *group = filep->private_data; | ||
1385 | |||
1386 | if (filep->f_op != &vfio_group_fops) | ||
1387 | return ERR_PTR(-EINVAL); | ||
1388 | |||
1389 | if (!atomic_inc_not_zero(&group->container_users)) | ||
1390 | return ERR_PTR(-EINVAL); | ||
1391 | |||
1392 | if (!group->container->iommu_driver || | ||
1393 | !vfio_group_viable(group)) { | ||
1394 | atomic_dec(&group->container_users); | ||
1395 | return ERR_PTR(-EINVAL); | ||
1396 | } | ||
1397 | |||
1398 | vfio_group_get(group); | ||
1399 | |||
1400 | return group; | ||
1401 | } | ||
1402 | EXPORT_SYMBOL_GPL(vfio_group_get_external_user); | ||
1403 | |||
1404 | void vfio_group_put_external_user(struct vfio_group *group) | ||
1405 | { | ||
1406 | vfio_group_put(group); | ||
1407 | vfio_group_try_dissolve_container(group); | ||
1408 | } | ||
1409 | EXPORT_SYMBOL_GPL(vfio_group_put_external_user); | ||
1410 | |||
1411 | int vfio_external_user_iommu_id(struct vfio_group *group) | ||
1412 | { | ||
1413 | return iommu_group_id(group->iommu_group); | ||
1414 | } | ||
1415 | EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id); | ||
1416 | |||
1417 | /** | ||
1356 | * Module/class support | 1418 | * Module/class support |
1357 | */ | 1419 | */ |
1358 | static char *vfio_devnode(struct device *dev, umode_t *mode) | 1420 | static char *vfio_devnode(struct device *dev, umode_t *mode) |
diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ac8d488e4372..24579a0312a0 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h | |||
@@ -90,4 +90,11 @@ extern void vfio_unregister_iommu_driver( | |||
90 | TYPE tmp; \ | 90 | TYPE tmp; \ |
91 | offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); }) \ | 91 | offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); }) \ |
92 | 92 | ||
93 | /* | ||
94 | * External user API | ||
95 | */ | ||
96 | extern struct vfio_group *vfio_group_get_external_user(struct file *filep); | ||
97 | extern void vfio_group_put_external_user(struct vfio_group *group); | ||
98 | extern int vfio_external_user_iommu_id(struct vfio_group *group); | ||
99 | |||
93 | #endif /* VFIO_H */ | 100 | #endif /* VFIO_H */ |
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 916e444e6f74..0fd47f5bc146 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h | |||
@@ -324,6 +324,44 @@ enum { | |||
324 | VFIO_PCI_NUM_IRQS | 324 | VFIO_PCI_NUM_IRQS |
325 | }; | 325 | }; |
326 | 326 | ||
327 | /** | ||
328 | * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, | ||
329 | * struct vfio_pci_hot_reset_info) | ||
330 | * | ||
331 | * Return: 0 on success, -errno on failure: | ||
332 | * -enospc = insufficient buffer, -enodev = unsupported for device. | ||
333 | */ | ||
334 | struct vfio_pci_dependent_device { | ||
335 | __u32 group_id; | ||
336 | __u16 segment; | ||
337 | __u8 bus; | ||
338 | __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */ | ||
339 | }; | ||
340 | |||
341 | struct vfio_pci_hot_reset_info { | ||
342 | __u32 argsz; | ||
343 | __u32 flags; | ||
344 | __u32 count; | ||
345 | struct vfio_pci_dependent_device devices[]; | ||
346 | }; | ||
347 | |||
348 | #define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) | ||
349 | |||
350 | /** | ||
351 | * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13, | ||
352 | * struct vfio_pci_hot_reset) | ||
353 | * | ||
354 | * Return: 0 on success, -errno on failure. | ||
355 | */ | ||
356 | struct vfio_pci_hot_reset { | ||
357 | __u32 argsz; | ||
358 | __u32 flags; | ||
359 | __u32 count; | ||
360 | __s32 group_fds[]; | ||
361 | }; | ||
362 | |||
363 | #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) | ||
364 | |||
327 | /* -------- API for Type1 VFIO IOMMU -------- */ | 365 | /* -------- API for Type1 VFIO IOMMU -------- */ |
328 | 366 | ||
329 | /** | 367 | /** |