diff options
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 286 |
1 files changed, 285 insertions, 1 deletions
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index cef6002acbd4..6ab71b9fcf8d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #include <linux/device.h> | 14 | #include <linux/device.h> |
15 | #include <linux/eventfd.h> | 15 | #include <linux/eventfd.h> |
16 | #include <linux/file.h> | ||
16 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
17 | #include <linux/iommu.h> | 18 | #include <linux/iommu.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
@@ -227,6 +228,110 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) | |||
227 | return 0; | 228 | return 0; |
228 | } | 229 | } |
229 | 230 | ||
231 | static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) | ||
232 | { | ||
233 | (*(int *)data)++; | ||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | struct vfio_pci_fill_info { | ||
238 | int max; | ||
239 | int cur; | ||
240 | struct vfio_pci_dependent_device *devices; | ||
241 | }; | ||
242 | |||
243 | static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) | ||
244 | { | ||
245 | struct vfio_pci_fill_info *fill = data; | ||
246 | struct iommu_group *iommu_group; | ||
247 | |||
248 | if (fill->cur == fill->max) | ||
249 | return -EAGAIN; /* Something changed, try again */ | ||
250 | |||
251 | iommu_group = iommu_group_get(&pdev->dev); | ||
252 | if (!iommu_group) | ||
253 | return -EPERM; /* Cannot reset non-isolated devices */ | ||
254 | |||
255 | fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); | ||
256 | fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus); | ||
257 | fill->devices[fill->cur].bus = pdev->bus->number; | ||
258 | fill->devices[fill->cur].devfn = pdev->devfn; | ||
259 | fill->cur++; | ||
260 | iommu_group_put(iommu_group); | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | struct vfio_pci_group_entry { | ||
265 | struct vfio_group *group; | ||
266 | int id; | ||
267 | }; | ||
268 | |||
269 | struct vfio_pci_group_info { | ||
270 | int count; | ||
271 | struct vfio_pci_group_entry *groups; | ||
272 | }; | ||
273 | |||
274 | static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data) | ||
275 | { | ||
276 | struct vfio_pci_group_info *info = data; | ||
277 | struct iommu_group *group; | ||
278 | int id, i; | ||
279 | |||
280 | group = iommu_group_get(&pdev->dev); | ||
281 | if (!group) | ||
282 | return -EPERM; | ||
283 | |||
284 | id = iommu_group_id(group); | ||
285 | |||
286 | for (i = 0; i < info->count; i++) | ||
287 | if (info->groups[i].id == id) | ||
288 | break; | ||
289 | |||
290 | iommu_group_put(group); | ||
291 | |||
292 | return (i == info->count) ? -EINVAL : 0; | ||
293 | } | ||
294 | |||
295 | static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot) | ||
296 | { | ||
297 | for (; pdev; pdev = pdev->bus->self) | ||
298 | if (pdev->bus == slot->bus) | ||
299 | return (pdev->slot == slot); | ||
300 | return false; | ||
301 | } | ||
302 | |||
303 | struct vfio_pci_walk_info { | ||
304 | int (*fn)(struct pci_dev *, void *data); | ||
305 | void *data; | ||
306 | struct pci_dev *pdev; | ||
307 | bool slot; | ||
308 | int ret; | ||
309 | }; | ||
310 | |||
311 | static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data) | ||
312 | { | ||
313 | struct vfio_pci_walk_info *walk = data; | ||
314 | |||
315 | if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot)) | ||
316 | walk->ret = walk->fn(pdev, walk->data); | ||
317 | |||
318 | return walk->ret; | ||
319 | } | ||
320 | |||
321 | static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev, | ||
322 | int (*fn)(struct pci_dev *, | ||
323 | void *data), void *data, | ||
324 | bool slot) | ||
325 | { | ||
326 | struct vfio_pci_walk_info walk = { | ||
327 | .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0, | ||
328 | }; | ||
329 | |||
330 | pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk); | ||
331 | |||
332 | return walk.ret; | ||
333 | } | ||
334 | |||
230 | static long vfio_pci_ioctl(void *device_data, | 335 | static long vfio_pci_ioctl(void *device_data, |
231 | unsigned int cmd, unsigned long arg) | 336 | unsigned int cmd, unsigned long arg) |
232 | { | 337 | { |
@@ -407,10 +512,189 @@ static long vfio_pci_ioctl(void *device_data, | |||
407 | 512 | ||
408 | return ret; | 513 | return ret; |
409 | 514 | ||
410 | } else if (cmd == VFIO_DEVICE_RESET) | 515 | } else if (cmd == VFIO_DEVICE_RESET) { |
411 | return vdev->reset_works ? | 516 | return vdev->reset_works ? |
412 | pci_reset_function(vdev->pdev) : -EINVAL; | 517 | pci_reset_function(vdev->pdev) : -EINVAL; |
413 | 518 | ||
519 | } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) { | ||
520 | struct vfio_pci_hot_reset_info hdr; | ||
521 | struct vfio_pci_fill_info fill = { 0 }; | ||
522 | struct vfio_pci_dependent_device *devices = NULL; | ||
523 | bool slot = false; | ||
524 | int ret = 0; | ||
525 | |||
526 | minsz = offsetofend(struct vfio_pci_hot_reset_info, count); | ||
527 | |||
528 | if (copy_from_user(&hdr, (void __user *)arg, minsz)) | ||
529 | return -EFAULT; | ||
530 | |||
531 | if (hdr.argsz < minsz) | ||
532 | return -EINVAL; | ||
533 | |||
534 | hdr.flags = 0; | ||
535 | |||
536 | /* Can we do a slot or bus reset or neither? */ | ||
537 | if (!pci_probe_reset_slot(vdev->pdev->slot)) | ||
538 | slot = true; | ||
539 | else if (pci_probe_reset_bus(vdev->pdev->bus)) | ||
540 | return -ENODEV; | ||
541 | |||
542 | /* How many devices are affected? */ | ||
543 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
544 | vfio_pci_count_devs, | ||
545 | &fill.max, slot); | ||
546 | if (ret) | ||
547 | return ret; | ||
548 | |||
549 | WARN_ON(!fill.max); /* Should always be at least one */ | ||
550 | |||
551 | /* | ||
552 | * If there's enough space, fill it now, otherwise return | ||
553 | * -ENOSPC and the number of devices affected. | ||
554 | */ | ||
555 | if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) { | ||
556 | ret = -ENOSPC; | ||
557 | hdr.count = fill.max; | ||
558 | goto reset_info_exit; | ||
559 | } | ||
560 | |||
561 | devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL); | ||
562 | if (!devices) | ||
563 | return -ENOMEM; | ||
564 | |||
565 | fill.devices = devices; | ||
566 | |||
567 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
568 | vfio_pci_fill_devs, | ||
569 | &fill, slot); | ||
570 | |||
571 | /* | ||
572 | * If a device was removed between counting and filling, | ||
573 | * we may come up short of fill.max. If a device was | ||
574 | * added, we'll have a return of -EAGAIN above. | ||
575 | */ | ||
576 | if (!ret) | ||
577 | hdr.count = fill.cur; | ||
578 | |||
579 | reset_info_exit: | ||
580 | if (copy_to_user((void __user *)arg, &hdr, minsz)) | ||
581 | ret = -EFAULT; | ||
582 | |||
583 | if (!ret) { | ||
584 | if (copy_to_user((void __user *)(arg + minsz), devices, | ||
585 | hdr.count * sizeof(*devices))) | ||
586 | ret = -EFAULT; | ||
587 | } | ||
588 | |||
589 | kfree(devices); | ||
590 | return ret; | ||
591 | |||
592 | } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) { | ||
593 | struct vfio_pci_hot_reset hdr; | ||
594 | int32_t *group_fds; | ||
595 | struct vfio_pci_group_entry *groups; | ||
596 | struct vfio_pci_group_info info; | ||
597 | bool slot = false; | ||
598 | int i, count = 0, ret = 0; | ||
599 | |||
600 | minsz = offsetofend(struct vfio_pci_hot_reset, count); | ||
601 | |||
602 | if (copy_from_user(&hdr, (void __user *)arg, minsz)) | ||
603 | return -EFAULT; | ||
604 | |||
605 | if (hdr.argsz < minsz || hdr.flags) | ||
606 | return -EINVAL; | ||
607 | |||
608 | /* Can we do a slot or bus reset or neither? */ | ||
609 | if (!pci_probe_reset_slot(vdev->pdev->slot)) | ||
610 | slot = true; | ||
611 | else if (pci_probe_reset_bus(vdev->pdev->bus)) | ||
612 | return -ENODEV; | ||
613 | |||
614 | /* | ||
615 | * We can't let userspace give us an arbitrarily large | ||
616 | * buffer to copy, so verify how many we think there | ||
617 | * could be. Note groups can have multiple devices so | ||
618 | * one group per device is the max. | ||
619 | */ | ||
620 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
621 | vfio_pci_count_devs, | ||
622 | &count, slot); | ||
623 | if (ret) | ||
624 | return ret; | ||
625 | |||
626 | /* Somewhere between 1 and count is OK */ | ||
627 | if (!hdr.count || hdr.count > count) | ||
628 | return -EINVAL; | ||
629 | |||
630 | group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL); | ||
631 | groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL); | ||
632 | if (!group_fds || !groups) { | ||
633 | kfree(group_fds); | ||
634 | kfree(groups); | ||
635 | return -ENOMEM; | ||
636 | } | ||
637 | |||
638 | if (copy_from_user(group_fds, (void __user *)(arg + minsz), | ||
639 | hdr.count * sizeof(*group_fds))) { | ||
640 | kfree(group_fds); | ||
641 | kfree(groups); | ||
642 | return -EFAULT; | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * For each group_fd, get the group through the vfio external | ||
647 | * user interface and store the group and iommu ID. This | ||
648 | * ensures the group is held across the reset. | ||
649 | */ | ||
650 | for (i = 0; i < hdr.count; i++) { | ||
651 | struct vfio_group *group; | ||
652 | struct fd f = fdget(group_fds[i]); | ||
653 | if (!f.file) { | ||
654 | ret = -EBADF; | ||
655 | break; | ||
656 | } | ||
657 | |||
658 | group = vfio_group_get_external_user(f.file); | ||
659 | fdput(f); | ||
660 | if (IS_ERR(group)) { | ||
661 | ret = PTR_ERR(group); | ||
662 | break; | ||
663 | } | ||
664 | |||
665 | groups[i].group = group; | ||
666 | groups[i].id = vfio_external_user_iommu_id(group); | ||
667 | } | ||
668 | |||
669 | kfree(group_fds); | ||
670 | |||
671 | /* release reference to groups on error */ | ||
672 | if (ret) | ||
673 | goto hot_reset_release; | ||
674 | |||
675 | info.count = hdr.count; | ||
676 | info.groups = groups; | ||
677 | |||
678 | /* | ||
679 | * Test whether all the affected devices are contained | ||
680 | * by the set of groups provided by the user. | ||
681 | */ | ||
682 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
683 | vfio_pci_validate_devs, | ||
684 | &info, slot); | ||
685 | if (!ret) | ||
686 | /* User has access, do the reset */ | ||
687 | ret = slot ? pci_reset_slot(vdev->pdev->slot) : | ||
688 | pci_reset_bus(vdev->pdev->bus); | ||
689 | |||
690 | hot_reset_release: | ||
691 | for (i--; i >= 0; i--) | ||
692 | vfio_group_put_external_user(groups[i].group); | ||
693 | |||
694 | kfree(groups); | ||
695 | return ret; | ||
696 | } | ||
697 | |||
414 | return -ENOTTY; | 698 | return -ENOTTY; |
415 | } | 699 | } |
416 | 700 | ||