diff options
| -rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 286 | ||||
| -rw-r--r-- | include/uapi/linux/vfio.h | 38 |
2 files changed, 323 insertions, 1 deletions
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index cef6002acbd4..6ab71b9fcf8d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include <linux/device.h> | 14 | #include <linux/device.h> |
| 15 | #include <linux/eventfd.h> | 15 | #include <linux/eventfd.h> |
| 16 | #include <linux/file.h> | ||
| 16 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
| 17 | #include <linux/iommu.h> | 18 | #include <linux/iommu.h> |
| 18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
| @@ -227,6 +228,110 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) | |||
| 227 | return 0; | 228 | return 0; |
| 228 | } | 229 | } |
| 229 | 230 | ||
| 231 | static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) | ||
| 232 | { | ||
| 233 | (*(int *)data)++; | ||
| 234 | return 0; | ||
| 235 | } | ||
| 236 | |||
| 237 | struct vfio_pci_fill_info { | ||
| 238 | int max; | ||
| 239 | int cur; | ||
| 240 | struct vfio_pci_dependent_device *devices; | ||
| 241 | }; | ||
| 242 | |||
| 243 | static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) | ||
| 244 | { | ||
| 245 | struct vfio_pci_fill_info *fill = data; | ||
| 246 | struct iommu_group *iommu_group; | ||
| 247 | |||
| 248 | if (fill->cur == fill->max) | ||
| 249 | return -EAGAIN; /* Something changed, try again */ | ||
| 250 | |||
| 251 | iommu_group = iommu_group_get(&pdev->dev); | ||
| 252 | if (!iommu_group) | ||
| 253 | return -EPERM; /* Cannot reset non-isolated devices */ | ||
| 254 | |||
| 255 | fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); | ||
| 256 | fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus); | ||
| 257 | fill->devices[fill->cur].bus = pdev->bus->number; | ||
| 258 | fill->devices[fill->cur].devfn = pdev->devfn; | ||
| 259 | fill->cur++; | ||
| 260 | iommu_group_put(iommu_group); | ||
| 261 | return 0; | ||
| 262 | } | ||
| 263 | |||
| 264 | struct vfio_pci_group_entry { | ||
| 265 | struct vfio_group *group; | ||
| 266 | int id; | ||
| 267 | }; | ||
| 268 | |||
| 269 | struct vfio_pci_group_info { | ||
| 270 | int count; | ||
| 271 | struct vfio_pci_group_entry *groups; | ||
| 272 | }; | ||
| 273 | |||
| 274 | static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data) | ||
| 275 | { | ||
| 276 | struct vfio_pci_group_info *info = data; | ||
| 277 | struct iommu_group *group; | ||
| 278 | int id, i; | ||
| 279 | |||
| 280 | group = iommu_group_get(&pdev->dev); | ||
| 281 | if (!group) | ||
| 282 | return -EPERM; | ||
| 283 | |||
| 284 | id = iommu_group_id(group); | ||
| 285 | |||
| 286 | for (i = 0; i < info->count; i++) | ||
| 287 | if (info->groups[i].id == id) | ||
| 288 | break; | ||
| 289 | |||
| 290 | iommu_group_put(group); | ||
| 291 | |||
| 292 | return (i == info->count) ? -EINVAL : 0; | ||
| 293 | } | ||
| 294 | |||
| 295 | static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot) | ||
| 296 | { | ||
| 297 | for (; pdev; pdev = pdev->bus->self) | ||
| 298 | if (pdev->bus == slot->bus) | ||
| 299 | return (pdev->slot == slot); | ||
| 300 | return false; | ||
| 301 | } | ||
| 302 | |||
| 303 | struct vfio_pci_walk_info { | ||
| 304 | int (*fn)(struct pci_dev *, void *data); | ||
| 305 | void *data; | ||
| 306 | struct pci_dev *pdev; | ||
| 307 | bool slot; | ||
| 308 | int ret; | ||
| 309 | }; | ||
| 310 | |||
| 311 | static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data) | ||
| 312 | { | ||
| 313 | struct vfio_pci_walk_info *walk = data; | ||
| 314 | |||
| 315 | if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot)) | ||
| 316 | walk->ret = walk->fn(pdev, walk->data); | ||
| 317 | |||
| 318 | return walk->ret; | ||
| 319 | } | ||
| 320 | |||
| 321 | static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev, | ||
| 322 | int (*fn)(struct pci_dev *, | ||
| 323 | void *data), void *data, | ||
| 324 | bool slot) | ||
| 325 | { | ||
| 326 | struct vfio_pci_walk_info walk = { | ||
| 327 | .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0, | ||
| 328 | }; | ||
| 329 | |||
| 330 | pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk); | ||
| 331 | |||
| 332 | return walk.ret; | ||
| 333 | } | ||
| 334 | |||
| 230 | static long vfio_pci_ioctl(void *device_data, | 335 | static long vfio_pci_ioctl(void *device_data, |
| 231 | unsigned int cmd, unsigned long arg) | 336 | unsigned int cmd, unsigned long arg) |
| 232 | { | 337 | { |
| @@ -407,10 +512,189 @@ static long vfio_pci_ioctl(void *device_data, | |||
| 407 | 512 | ||
| 408 | return ret; | 513 | return ret; |
| 409 | 514 | ||
| 410 | } else if (cmd == VFIO_DEVICE_RESET) | 515 | } else if (cmd == VFIO_DEVICE_RESET) { |
| 411 | return vdev->reset_works ? | 516 | return vdev->reset_works ? |
| 412 | pci_reset_function(vdev->pdev) : -EINVAL; | 517 | pci_reset_function(vdev->pdev) : -EINVAL; |
| 413 | 518 | ||
| 519 | } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) { | ||
| 520 | struct vfio_pci_hot_reset_info hdr; | ||
| 521 | struct vfio_pci_fill_info fill = { 0 }; | ||
| 522 | struct vfio_pci_dependent_device *devices = NULL; | ||
| 523 | bool slot = false; | ||
| 524 | int ret = 0; | ||
| 525 | |||
| 526 | minsz = offsetofend(struct vfio_pci_hot_reset_info, count); | ||
| 527 | |||
| 528 | if (copy_from_user(&hdr, (void __user *)arg, minsz)) | ||
| 529 | return -EFAULT; | ||
| 530 | |||
| 531 | if (hdr.argsz < minsz) | ||
| 532 | return -EINVAL; | ||
| 533 | |||
| 534 | hdr.flags = 0; | ||
| 535 | |||
| 536 | /* Can we do a slot or bus reset or neither? */ | ||
| 537 | if (!pci_probe_reset_slot(vdev->pdev->slot)) | ||
| 538 | slot = true; | ||
| 539 | else if (pci_probe_reset_bus(vdev->pdev->bus)) | ||
| 540 | return -ENODEV; | ||
| 541 | |||
| 542 | /* How many devices are affected? */ | ||
| 543 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
| 544 | vfio_pci_count_devs, | ||
| 545 | &fill.max, slot); | ||
| 546 | if (ret) | ||
| 547 | return ret; | ||
| 548 | |||
| 549 | WARN_ON(!fill.max); /* Should always be at least one */ | ||
| 550 | |||
| 551 | /* | ||
| 552 | * If there's enough space, fill it now, otherwise return | ||
| 553 | * -ENOSPC and the number of devices affected. | ||
| 554 | */ | ||
| 555 | if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) { | ||
| 556 | ret = -ENOSPC; | ||
| 557 | hdr.count = fill.max; | ||
| 558 | goto reset_info_exit; | ||
| 559 | } | ||
| 560 | |||
| 561 | devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL); | ||
| 562 | if (!devices) | ||
| 563 | return -ENOMEM; | ||
| 564 | |||
| 565 | fill.devices = devices; | ||
| 566 | |||
| 567 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
| 568 | vfio_pci_fill_devs, | ||
| 569 | &fill, slot); | ||
| 570 | |||
| 571 | /* | ||
| 572 | * If a device was removed between counting and filling, | ||
| 573 | * we may come up short of fill.max. If a device was | ||
| 574 | * added, we'll have a return of -EAGAIN above. | ||
| 575 | */ | ||
| 576 | if (!ret) | ||
| 577 | hdr.count = fill.cur; | ||
| 578 | |||
| 579 | reset_info_exit: | ||
| 580 | if (copy_to_user((void __user *)arg, &hdr, minsz)) | ||
| 581 | ret = -EFAULT; | ||
| 582 | |||
| 583 | if (!ret) { | ||
| 584 | if (copy_to_user((void __user *)(arg + minsz), devices, | ||
| 585 | hdr.count * sizeof(*devices))) | ||
| 586 | ret = -EFAULT; | ||
| 587 | } | ||
| 588 | |||
| 589 | kfree(devices); | ||
| 590 | return ret; | ||
| 591 | |||
| 592 | } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) { | ||
| 593 | struct vfio_pci_hot_reset hdr; | ||
| 594 | int32_t *group_fds; | ||
| 595 | struct vfio_pci_group_entry *groups; | ||
| 596 | struct vfio_pci_group_info info; | ||
| 597 | bool slot = false; | ||
| 598 | int i, count = 0, ret = 0; | ||
| 599 | |||
| 600 | minsz = offsetofend(struct vfio_pci_hot_reset, count); | ||
| 601 | |||
| 602 | if (copy_from_user(&hdr, (void __user *)arg, minsz)) | ||
| 603 | return -EFAULT; | ||
| 604 | |||
| 605 | if (hdr.argsz < minsz || hdr.flags) | ||
| 606 | return -EINVAL; | ||
| 607 | |||
| 608 | /* Can we do a slot or bus reset or neither? */ | ||
| 609 | if (!pci_probe_reset_slot(vdev->pdev->slot)) | ||
| 610 | slot = true; | ||
| 611 | else if (pci_probe_reset_bus(vdev->pdev->bus)) | ||
| 612 | return -ENODEV; | ||
| 613 | |||
| 614 | /* | ||
| 615 | * We can't let userspace give us an arbitrarily large | ||
| 616 | * buffer to copy, so verify how many we think there | ||
| 617 | * could be. Note groups can have multiple devices so | ||
| 618 | * one group per device is the max. | ||
| 619 | */ | ||
| 620 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
| 621 | vfio_pci_count_devs, | ||
| 622 | &count, slot); | ||
| 623 | if (ret) | ||
| 624 | return ret; | ||
| 625 | |||
| 626 | /* Somewhere between 1 and count is OK */ | ||
| 627 | if (!hdr.count || hdr.count > count) | ||
| 628 | return -EINVAL; | ||
| 629 | |||
| 630 | group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL); | ||
| 631 | groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL); | ||
| 632 | if (!group_fds || !groups) { | ||
| 633 | kfree(group_fds); | ||
| 634 | kfree(groups); | ||
| 635 | return -ENOMEM; | ||
| 636 | } | ||
| 637 | |||
| 638 | if (copy_from_user(group_fds, (void __user *)(arg + minsz), | ||
| 639 | hdr.count * sizeof(*group_fds))) { | ||
| 640 | kfree(group_fds); | ||
| 641 | kfree(groups); | ||
| 642 | return -EFAULT; | ||
| 643 | } | ||
| 644 | |||
| 645 | /* | ||
| 646 | * For each group_fd, get the group through the vfio external | ||
| 647 | * user interface and store the group and iommu ID. This | ||
| 648 | * ensures the group is held across the reset. | ||
| 649 | */ | ||
| 650 | for (i = 0; i < hdr.count; i++) { | ||
| 651 | struct vfio_group *group; | ||
| 652 | struct fd f = fdget(group_fds[i]); | ||
| 653 | if (!f.file) { | ||
| 654 | ret = -EBADF; | ||
| 655 | break; | ||
| 656 | } | ||
| 657 | |||
| 658 | group = vfio_group_get_external_user(f.file); | ||
| 659 | fdput(f); | ||
| 660 | if (IS_ERR(group)) { | ||
| 661 | ret = PTR_ERR(group); | ||
| 662 | break; | ||
| 663 | } | ||
| 664 | |||
| 665 | groups[i].group = group; | ||
| 666 | groups[i].id = vfio_external_user_iommu_id(group); | ||
| 667 | } | ||
| 668 | |||
| 669 | kfree(group_fds); | ||
| 670 | |||
| 671 | /* release reference to groups on error */ | ||
| 672 | if (ret) | ||
| 673 | goto hot_reset_release; | ||
| 674 | |||
| 675 | info.count = hdr.count; | ||
| 676 | info.groups = groups; | ||
| 677 | |||
| 678 | /* | ||
| 679 | * Test whether all the affected devices are contained | ||
| 680 | * by the set of groups provided by the user. | ||
| 681 | */ | ||
| 682 | ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
| 683 | vfio_pci_validate_devs, | ||
| 684 | &info, slot); | ||
| 685 | if (!ret) | ||
| 686 | /* User has access, do the reset */ | ||
| 687 | ret = slot ? pci_reset_slot(vdev->pdev->slot) : | ||
| 688 | pci_reset_bus(vdev->pdev->bus); | ||
| 689 | |||
| 690 | hot_reset_release: | ||
| 691 | for (i--; i >= 0; i--) | ||
| 692 | vfio_group_put_external_user(groups[i].group); | ||
| 693 | |||
| 694 | kfree(groups); | ||
| 695 | return ret; | ||
| 696 | } | ||
| 697 | |||
| 414 | return -ENOTTY; | 698 | return -ENOTTY; |
| 415 | } | 699 | } |
| 416 | 700 | ||
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 916e444e6f74..0fd47f5bc146 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h | |||
| @@ -324,6 +324,44 @@ enum { | |||
| 324 | VFIO_PCI_NUM_IRQS | 324 | VFIO_PCI_NUM_IRQS |
| 325 | }; | 325 | }; |
| 326 | 326 | ||
| 327 | /** | ||
| 328 | * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, | ||
| 329 | * struct vfio_pci_hot_reset_info) | ||
| 330 | * | ||
| 331 | * Return: 0 on success, -errno on failure: | ||
| 332 | * -enospc = insufficient buffer, -enodev = unsupported for device. | ||
| 333 | */ | ||
| 334 | struct vfio_pci_dependent_device { | ||
| 335 | __u32 group_id; | ||
| 336 | __u16 segment; | ||
| 337 | __u8 bus; | ||
| 338 | __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */ | ||
| 339 | }; | ||
| 340 | |||
| 341 | struct vfio_pci_hot_reset_info { | ||
| 342 | __u32 argsz; | ||
| 343 | __u32 flags; | ||
| 344 | __u32 count; | ||
| 345 | struct vfio_pci_dependent_device devices[]; | ||
| 346 | }; | ||
| 347 | |||
| 348 | #define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) | ||
| 349 | |||
| 350 | /** | ||
| 351 | * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13, | ||
| 352 | * struct vfio_pci_hot_reset) | ||
| 353 | * | ||
| 354 | * Return: 0 on success, -errno on failure. | ||
| 355 | */ | ||
| 356 | struct vfio_pci_hot_reset { | ||
| 357 | __u32 argsz; | ||
| 358 | __u32 flags; | ||
| 359 | __u32 count; | ||
| 360 | __s32 group_fds[]; | ||
| 361 | }; | ||
| 362 | |||
| 363 | #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) | ||
| 364 | |||
| 327 | /* -------- API for Type1 VFIO IOMMU -------- */ | 365 | /* -------- API for Type1 VFIO IOMMU -------- */ |
| 328 | 366 | ||
| 329 | /** | 367 | /** |
