aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vfio
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@redhat.com>2013-09-04 13:28:04 -0400
committerAlex Williamson <alex.williamson@redhat.com>2013-09-04 13:28:04 -0400
commit8b27ee60bfd6bbb84d2df28fa706c5c5081066ca (patch)
tree1fab334bc5bfdc157df746cda06ad328cc1a5208 /drivers/vfio
parent3bc4f3993b93dbf1f6402e2034a2e20eb07db807 (diff)
vfio-pci: PCI hot reset interface
The current VFIO_DEVICE_RESET interface only maps to PCI use cases where we can isolate the reset to the individual PCI function. This means the device must support FLR (PCIe or AF), PM reset on D3hot->D0 transition, device specific reset, or be a singleton device on a bus for a secondary bus reset. FLR does not have widespread support, PM reset is not very reliable, and bus topology is dictated by the system and device design. We need to provide a means for a user to induce a bus reset in cases where the existing mechanisms are not available or not reliable. This device specific extension to VFIO provides the user with this ability. Two new ioctls are introduced: - VFIO_DEVICE_PCI_GET_HOT_RESET_INFO - VFIO_DEVICE_PCI_HOT_RESET The first provides the user with information about the extent of devices affected by a hot reset. This is essentially a list of devices and the IOMMU groups they belong to. The user may then initiate a hot reset by calling the second ioctl. We must be careful that the user has ownership of all the affected devices found via the first ioctl, so the second ioctl takes a list of file descriptors for the VFIO groups affected by the reset. Each group must have IOMMU protection established for the ioctl to succeed. Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/pci/vfio_pci.c286
1 files changed, 285 insertions, 1 deletions
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index cef6002acbd4..6ab71b9fcf8d 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/device.h> 14#include <linux/device.h>
15#include <linux/eventfd.h> 15#include <linux/eventfd.h>
16#include <linux/file.h>
16#include <linux/interrupt.h> 17#include <linux/interrupt.h>
17#include <linux/iommu.h> 18#include <linux/iommu.h>
18#include <linux/module.h> 19#include <linux/module.h>
@@ -227,6 +228,110 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
227 return 0; 228 return 0;
228} 229}
229 230
231static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
232{
233 (*(int *)data)++;
234 return 0;
235}
236
237struct vfio_pci_fill_info {
238 int max;
239 int cur;
240 struct vfio_pci_dependent_device *devices;
241};
242
243static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
244{
245 struct vfio_pci_fill_info *fill = data;
246 struct iommu_group *iommu_group;
247
248 if (fill->cur == fill->max)
249 return -EAGAIN; /* Something changed, try again */
250
251 iommu_group = iommu_group_get(&pdev->dev);
252 if (!iommu_group)
253 return -EPERM; /* Cannot reset non-isolated devices */
254
255 fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
256 fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
257 fill->devices[fill->cur].bus = pdev->bus->number;
258 fill->devices[fill->cur].devfn = pdev->devfn;
259 fill->cur++;
260 iommu_group_put(iommu_group);
261 return 0;
262}
263
264struct vfio_pci_group_entry {
265 struct vfio_group *group;
266 int id;
267};
268
269struct vfio_pci_group_info {
270 int count;
271 struct vfio_pci_group_entry *groups;
272};
273
274static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
275{
276 struct vfio_pci_group_info *info = data;
277 struct iommu_group *group;
278 int id, i;
279
280 group = iommu_group_get(&pdev->dev);
281 if (!group)
282 return -EPERM;
283
284 id = iommu_group_id(group);
285
286 for (i = 0; i < info->count; i++)
287 if (info->groups[i].id == id)
288 break;
289
290 iommu_group_put(group);
291
292 return (i == info->count) ? -EINVAL : 0;
293}
294
295static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
296{
297 for (; pdev; pdev = pdev->bus->self)
298 if (pdev->bus == slot->bus)
299 return (pdev->slot == slot);
300 return false;
301}
302
303struct vfio_pci_walk_info {
304 int (*fn)(struct pci_dev *, void *data);
305 void *data;
306 struct pci_dev *pdev;
307 bool slot;
308 int ret;
309};
310
311static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)
312{
313 struct vfio_pci_walk_info *walk = data;
314
315 if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))
316 walk->ret = walk->fn(pdev, walk->data);
317
318 return walk->ret;
319}
320
321static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
322 int (*fn)(struct pci_dev *,
323 void *data), void *data,
324 bool slot)
325{
326 struct vfio_pci_walk_info walk = {
327 .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,
328 };
329
330 pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);
331
332 return walk.ret;
333}
334
230static long vfio_pci_ioctl(void *device_data, 335static long vfio_pci_ioctl(void *device_data,
231 unsigned int cmd, unsigned long arg) 336 unsigned int cmd, unsigned long arg)
232{ 337{
@@ -407,10 +512,189 @@ static long vfio_pci_ioctl(void *device_data,
407 512
408 return ret; 513 return ret;
409 514
410 } else if (cmd == VFIO_DEVICE_RESET) 515 } else if (cmd == VFIO_DEVICE_RESET) {
411 return vdev->reset_works ? 516 return vdev->reset_works ?
412 pci_reset_function(vdev->pdev) : -EINVAL; 517 pci_reset_function(vdev->pdev) : -EINVAL;
413 518
519 } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
520 struct vfio_pci_hot_reset_info hdr;
521 struct vfio_pci_fill_info fill = { 0 };
522 struct vfio_pci_dependent_device *devices = NULL;
523 bool slot = false;
524 int ret = 0;
525
526 minsz = offsetofend(struct vfio_pci_hot_reset_info, count);
527
528 if (copy_from_user(&hdr, (void __user *)arg, minsz))
529 return -EFAULT;
530
531 if (hdr.argsz < minsz)
532 return -EINVAL;
533
534 hdr.flags = 0;
535
536 /* Can we do a slot or bus reset or neither? */
537 if (!pci_probe_reset_slot(vdev->pdev->slot))
538 slot = true;
539 else if (pci_probe_reset_bus(vdev->pdev->bus))
540 return -ENODEV;
541
542 /* How many devices are affected? */
543 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
544 vfio_pci_count_devs,
545 &fill.max, slot);
546 if (ret)
547 return ret;
548
549 WARN_ON(!fill.max); /* Should always be at least one */
550
551 /*
552 * If there's enough space, fill it now, otherwise return
553 * -ENOSPC and the number of devices affected.
554 */
555 if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
556 ret = -ENOSPC;
557 hdr.count = fill.max;
558 goto reset_info_exit;
559 }
560
561 devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
562 if (!devices)
563 return -ENOMEM;
564
565 fill.devices = devices;
566
567 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
568 vfio_pci_fill_devs,
569 &fill, slot);
570
571 /*
572 * If a device was removed between counting and filling,
573 * we may come up short of fill.max. If a device was
574 * added, we'll have a return of -EAGAIN above.
575 */
576 if (!ret)
577 hdr.count = fill.cur;
578
579reset_info_exit:
580 if (copy_to_user((void __user *)arg, &hdr, minsz))
581 ret = -EFAULT;
582
583 if (!ret) {
584 if (copy_to_user((void __user *)(arg + minsz), devices,
585 hdr.count * sizeof(*devices)))
586 ret = -EFAULT;
587 }
588
589 kfree(devices);
590 return ret;
591
592 } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
593 struct vfio_pci_hot_reset hdr;
594 int32_t *group_fds;
595 struct vfio_pci_group_entry *groups;
596 struct vfio_pci_group_info info;
597 bool slot = false;
598 int i, count = 0, ret = 0;
599
600 minsz = offsetofend(struct vfio_pci_hot_reset, count);
601
602 if (copy_from_user(&hdr, (void __user *)arg, minsz))
603 return -EFAULT;
604
605 if (hdr.argsz < minsz || hdr.flags)
606 return -EINVAL;
607
608 /* Can we do a slot or bus reset or neither? */
609 if (!pci_probe_reset_slot(vdev->pdev->slot))
610 slot = true;
611 else if (pci_probe_reset_bus(vdev->pdev->bus))
612 return -ENODEV;
613
614 /*
615 * We can't let userspace give us an arbitrarily large
616 * buffer to copy, so verify how many we think there
617 * could be. Note groups can have multiple devices so
618 * one group per device is the max.
619 */
620 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
621 vfio_pci_count_devs,
622 &count, slot);
623 if (ret)
624 return ret;
625
626 /* Somewhere between 1 and count is OK */
627 if (!hdr.count || hdr.count > count)
628 return -EINVAL;
629
630 group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
631 groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);
632 if (!group_fds || !groups) {
633 kfree(group_fds);
634 kfree(groups);
635 return -ENOMEM;
636 }
637
638 if (copy_from_user(group_fds, (void __user *)(arg + minsz),
639 hdr.count * sizeof(*group_fds))) {
640 kfree(group_fds);
641 kfree(groups);
642 return -EFAULT;
643 }
644
645 /*
646 * For each group_fd, get the group through the vfio external
647 * user interface and store the group and iommu ID. This
648 * ensures the group is held across the reset.
649 */
650 for (i = 0; i < hdr.count; i++) {
651 struct vfio_group *group;
652 struct fd f = fdget(group_fds[i]);
653 if (!f.file) {
654 ret = -EBADF;
655 break;
656 }
657
658 group = vfio_group_get_external_user(f.file);
659 fdput(f);
660 if (IS_ERR(group)) {
661 ret = PTR_ERR(group);
662 break;
663 }
664
665 groups[i].group = group;
666 groups[i].id = vfio_external_user_iommu_id(group);
667 }
668
669 kfree(group_fds);
670
671 /* release reference to groups on error */
672 if (ret)
673 goto hot_reset_release;
674
675 info.count = hdr.count;
676 info.groups = groups;
677
678 /*
679 * Test whether all the affected devices are contained
680 * by the set of groups provided by the user.
681 */
682 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
683 vfio_pci_validate_devs,
684 &info, slot);
685 if (!ret)
686 /* User has access, do the reset */
687 ret = slot ? pci_reset_slot(vdev->pdev->slot) :
688 pci_reset_bus(vdev->pdev->bus);
689
690hot_reset_release:
691 for (i--; i >= 0; i--)
692 vfio_group_put_external_user(groups[i].group);
693
694 kfree(groups);
695 return ret;
696 }
697
414 return -ENOTTY; 698 return -ENOTTY;
415} 699}
416 700