diff options
-rw-r--r-- | drivers/vfio/pci/Kconfig | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/Makefile | 1 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 175 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 45 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_igd.c | 280 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 17 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 39 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_rdwr.c | 9 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 70 | ||||
-rw-r--r-- | include/linux/vfio.h | 11 | ||||
-rw-r--r-- | include/uapi/linux/vfio.h | 92 |
11 files changed, 706 insertions, 37 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 02912f180c6d..24ee2605b9f0 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig | |||
@@ -26,3 +26,7 @@ config VFIO_PCI_MMAP | |||
26 | config VFIO_PCI_INTX | 26 | config VFIO_PCI_INTX |
27 | depends on VFIO_PCI | 27 | depends on VFIO_PCI |
28 | def_bool y if !S390 | 28 | def_bool y if !S390 |
29 | |||
30 | config VFIO_PCI_IGD | ||
31 | depends on VFIO_PCI | ||
32 | def_bool y if X86 | ||
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 131079255fd9..76d8ec058edd 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | 1 | ||
2 | vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o | 2 | vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o |
3 | vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o | ||
3 | 4 | ||
4 | obj-$(CONFIG_VFIO_PCI) += vfio-pci.o | 5 | obj-$(CONFIG_VFIO_PCI) += vfio-pci.o |
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 8c80a48e3233..712a84978e97 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c | |||
@@ -111,6 +111,7 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev) | |||
111 | } | 111 | } |
112 | 112 | ||
113 | static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); | 113 | static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); |
114 | static void vfio_pci_disable(struct vfio_pci_device *vdev); | ||
114 | 115 | ||
115 | static int vfio_pci_enable(struct vfio_pci_device *vdev) | 116 | static int vfio_pci_enable(struct vfio_pci_device *vdev) |
116 | { | 117 | { |
@@ -169,13 +170,26 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) | |||
169 | if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) | 170 | if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) |
170 | vdev->has_vga = true; | 171 | vdev->has_vga = true; |
171 | 172 | ||
173 | |||
174 | if (vfio_pci_is_vga(pdev) && | ||
175 | pdev->vendor == PCI_VENDOR_ID_INTEL && | ||
176 | IS_ENABLED(CONFIG_VFIO_PCI_IGD)) { | ||
177 | ret = vfio_pci_igd_init(vdev); | ||
178 | if (ret) { | ||
179 | dev_warn(&vdev->pdev->dev, | ||
180 | "Failed to setup Intel IGD regions\n"); | ||
181 | vfio_pci_disable(vdev); | ||
182 | return ret; | ||
183 | } | ||
184 | } | ||
185 | |||
172 | return 0; | 186 | return 0; |
173 | } | 187 | } |
174 | 188 | ||
175 | static void vfio_pci_disable(struct vfio_pci_device *vdev) | 189 | static void vfio_pci_disable(struct vfio_pci_device *vdev) |
176 | { | 190 | { |
177 | struct pci_dev *pdev = vdev->pdev; | 191 | struct pci_dev *pdev = vdev->pdev; |
178 | int bar; | 192 | int i, bar; |
179 | 193 | ||
180 | /* Stop the device from further DMA */ | 194 | /* Stop the device from further DMA */ |
181 | pci_clear_master(pdev); | 195 | pci_clear_master(pdev); |
@@ -186,6 +200,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) | |||
186 | 200 | ||
187 | vdev->virq_disabled = false; | 201 | vdev->virq_disabled = false; |
188 | 202 | ||
203 | for (i = 0; i < vdev->num_regions; i++) | ||
204 | vdev->region[i].ops->release(vdev, &vdev->region[i]); | ||
205 | |||
206 | vdev->num_regions = 0; | ||
207 | kfree(vdev->region); | ||
208 | vdev->region = NULL; /* don't krealloc a freed pointer */ | ||
209 | |||
189 | vfio_config_free(vdev); | 210 | vfio_config_free(vdev); |
190 | 211 | ||
191 | for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { | 212 | for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { |
@@ -421,6 +442,93 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev, | |||
421 | return walk.ret; | 442 | return walk.ret; |
422 | } | 443 | } |
423 | 444 | ||
445 | static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev, | ||
446 | struct vfio_info_cap *caps) | ||
447 | { | ||
448 | struct vfio_info_cap_header *header; | ||
449 | struct vfio_region_info_cap_sparse_mmap *sparse; | ||
450 | size_t end, size; | ||
451 | int nr_areas = 2, i = 0; | ||
452 | |||
453 | end = pci_resource_len(vdev->pdev, vdev->msix_bar); | ||
454 | |||
455 | /* If MSI-X table is aligned to the start or end, only one area */ | ||
456 | if (((vdev->msix_offset & PAGE_MASK) == 0) || | ||
457 | (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end)) | ||
458 | nr_areas = 1; | ||
459 | |||
460 | size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas)); | ||
461 | |||
462 | header = vfio_info_cap_add(caps, size, | ||
463 | VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1); | ||
464 | if (IS_ERR(header)) | ||
465 | return PTR_ERR(header); | ||
466 | |||
467 | sparse = container_of(header, | ||
468 | struct vfio_region_info_cap_sparse_mmap, header); | ||
469 | sparse->nr_areas = nr_areas; | ||
470 | |||
471 | if (vdev->msix_offset & PAGE_MASK) { | ||
472 | sparse->areas[i].offset = 0; | ||
473 | sparse->areas[i].size = vdev->msix_offset & PAGE_MASK; | ||
474 | i++; | ||
475 | } | ||
476 | |||
477 | if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) { | ||
478 | sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset + | ||
479 | vdev->msix_size); | ||
480 | sparse->areas[i].size = end - sparse->areas[i].offset; | ||
481 | i++; | ||
482 | } | ||
483 | |||
484 | return 0; | ||
485 | } | ||
486 | |||
487 | static int region_type_cap(struct vfio_pci_device *vdev, | ||
488 | struct vfio_info_cap *caps, | ||
489 | unsigned int type, unsigned int subtype) | ||
490 | { | ||
491 | struct vfio_info_cap_header *header; | ||
492 | struct vfio_region_info_cap_type *cap; | ||
493 | |||
494 | header = vfio_info_cap_add(caps, sizeof(*cap), | ||
495 | VFIO_REGION_INFO_CAP_TYPE, 1); | ||
496 | if (IS_ERR(header)) | ||
497 | return PTR_ERR(header); | ||
498 | |||
499 | cap = container_of(header, struct vfio_region_info_cap_type, header); | ||
500 | cap->type = type; | ||
501 | cap->subtype = subtype; | ||
502 | |||
503 | return 0; | ||
504 | } | ||
505 | |||
506 | int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, | ||
507 | unsigned int type, unsigned int subtype, | ||
508 | const struct vfio_pci_regops *ops, | ||
509 | size_t size, u32 flags, void *data) | ||
510 | { | ||
511 | struct vfio_pci_region *region; | ||
512 | |||
513 | region = krealloc(vdev->region, | ||
514 | (vdev->num_regions + 1) * sizeof(*region), | ||
515 | GFP_KERNEL); | ||
516 | if (!region) | ||
517 | return -ENOMEM; | ||
518 | |||
519 | vdev->region = region; | ||
520 | vdev->region[vdev->num_regions].type = type; | ||
521 | vdev->region[vdev->num_regions].subtype = subtype; | ||
522 | vdev->region[vdev->num_regions].ops = ops; | ||
523 | vdev->region[vdev->num_regions].size = size; | ||
524 | vdev->region[vdev->num_regions].flags = flags; | ||
525 | vdev->region[vdev->num_regions].data = data; | ||
526 | |||
527 | vdev->num_regions++; | ||
528 | |||
529 | return 0; | ||
530 | } | ||
531 | |||
424 | static long vfio_pci_ioctl(void *device_data, | 532 | static long vfio_pci_ioctl(void *device_data, |
425 | unsigned int cmd, unsigned long arg) | 533 | unsigned int cmd, unsigned long arg) |
426 | { | 534 | { |
@@ -443,7 +551,7 @@ static long vfio_pci_ioctl(void *device_data, | |||
443 | if (vdev->reset_works) | 551 | if (vdev->reset_works) |
444 | info.flags |= VFIO_DEVICE_FLAGS_RESET; | 552 | info.flags |= VFIO_DEVICE_FLAGS_RESET; |
445 | 553 | ||
446 | info.num_regions = VFIO_PCI_NUM_REGIONS; | 554 | info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions; |
447 | info.num_irqs = VFIO_PCI_NUM_IRQS; | 555 | info.num_irqs = VFIO_PCI_NUM_IRQS; |
448 | 556 | ||
449 | return copy_to_user((void __user *)arg, &info, minsz) ? | 557 | return copy_to_user((void __user *)arg, &info, minsz) ? |
@@ -452,6 +560,8 @@ static long vfio_pci_ioctl(void *device_data, | |||
452 | } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { | 560 | } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { |
453 | struct pci_dev *pdev = vdev->pdev; | 561 | struct pci_dev *pdev = vdev->pdev; |
454 | struct vfio_region_info info; | 562 | struct vfio_region_info info; |
563 | struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; | ||
564 | int i, ret; | ||
455 | 565 | ||
456 | minsz = offsetofend(struct vfio_region_info, offset); | 566 | minsz = offsetofend(struct vfio_region_info, offset); |
457 | 567 | ||
@@ -480,8 +590,15 @@ static long vfio_pci_ioctl(void *device_data, | |||
480 | VFIO_REGION_INFO_FLAG_WRITE; | 590 | VFIO_REGION_INFO_FLAG_WRITE; |
481 | if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && | 591 | if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && |
482 | pci_resource_flags(pdev, info.index) & | 592 | pci_resource_flags(pdev, info.index) & |
483 | IORESOURCE_MEM && info.size >= PAGE_SIZE) | 593 | IORESOURCE_MEM && info.size >= PAGE_SIZE) { |
484 | info.flags |= VFIO_REGION_INFO_FLAG_MMAP; | 594 | info.flags |= VFIO_REGION_INFO_FLAG_MMAP; |
595 | if (info.index == vdev->msix_bar) { | ||
596 | ret = msix_sparse_mmap_cap(vdev, &caps); | ||
597 | if (ret) | ||
598 | return ret; | ||
599 | } | ||
600 | } | ||
601 | |||
485 | break; | 602 | break; |
486 | case VFIO_PCI_ROM_REGION_INDEX: | 603 | case VFIO_PCI_ROM_REGION_INDEX: |
487 | { | 604 | { |
@@ -493,8 +610,14 @@ static long vfio_pci_ioctl(void *device_data, | |||
493 | 610 | ||
494 | /* Report the BAR size, not the ROM size */ | 611 | /* Report the BAR size, not the ROM size */ |
495 | info.size = pci_resource_len(pdev, info.index); | 612 | info.size = pci_resource_len(pdev, info.index); |
496 | if (!info.size) | 613 | if (!info.size) { |
497 | break; | 614 | /* Shadow ROMs appear as PCI option ROMs */ |
615 | if (pdev->resource[PCI_ROM_RESOURCE].flags & | ||
616 | IORESOURCE_ROM_SHADOW) | ||
617 | info.size = 0x20000; | ||
618 | else | ||
619 | break; | ||
620 | } | ||
498 | 621 | ||
499 | /* Is it really there? */ | 622 | /* Is it really there? */ |
500 | io = pci_map_rom(pdev, &size); | 623 | io = pci_map_rom(pdev, &size); |
@@ -518,7 +641,40 @@ static long vfio_pci_ioctl(void *device_data, | |||
518 | 641 | ||
519 | break; | 642 | break; |
520 | default: | 643 | default: |
521 | return -EINVAL; | 644 | if (info.index >= |
645 | VFIO_PCI_NUM_REGIONS + vdev->num_regions) | ||
646 | return -EINVAL; | ||
647 | |||
648 | i = info.index - VFIO_PCI_NUM_REGIONS; | ||
649 | |||
650 | info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); | ||
651 | info.size = vdev->region[i].size; | ||
652 | info.flags = vdev->region[i].flags; | ||
653 | |||
654 | ret = region_type_cap(vdev, &caps, | ||
655 | vdev->region[i].type, | ||
656 | vdev->region[i].subtype); | ||
657 | if (ret) | ||
658 | return ret; | ||
659 | } | ||
660 | |||
661 | if (caps.size) { | ||
662 | info.flags |= VFIO_REGION_INFO_FLAG_CAPS; | ||
663 | if (info.argsz < sizeof(info) + caps.size) { | ||
664 | info.argsz = sizeof(info) + caps.size; | ||
665 | info.cap_offset = 0; | ||
666 | } else { | ||
667 | vfio_info_cap_shift(&caps, sizeof(info)); | ||
668 | if (copy_to_user((void __user *)arg + | ||
669 | sizeof(info), caps.buf, | ||
670 | caps.size)) { | ||
671 | kfree(caps.buf); | ||
672 | return -EFAULT; | ||
673 | } | ||
674 | info.cap_offset = sizeof(info); | ||
675 | } | ||
676 | |||
677 | kfree(caps.buf); | ||
522 | } | 678 | } |
523 | 679 | ||
524 | return copy_to_user((void __user *)arg, &info, minsz) ? | 680 | return copy_to_user((void __user *)arg, &info, minsz) ? |
@@ -798,7 +954,7 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, | |||
798 | unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); | 954 | unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); |
799 | struct vfio_pci_device *vdev = device_data; | 955 | struct vfio_pci_device *vdev = device_data; |
800 | 956 | ||
801 | if (index >= VFIO_PCI_NUM_REGIONS) | 957 | if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) |
802 | return -EINVAL; | 958 | return -EINVAL; |
803 | 959 | ||
804 | switch (index) { | 960 | switch (index) { |
@@ -815,6 +971,10 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, | |||
815 | 971 | ||
816 | case VFIO_PCI_VGA_REGION_INDEX: | 972 | case VFIO_PCI_VGA_REGION_INDEX: |
817 | return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); | 973 | return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); |
974 | default: | ||
975 | index -= VFIO_PCI_NUM_REGIONS; | ||
976 | return vdev->region[index].ops->rw(vdev, buf, | ||
977 | count, ppos, iswrite); | ||
818 | } | 978 | } |
819 | 979 | ||
820 | return -EINVAL; | 980 | return -EINVAL; |
@@ -997,6 +1157,7 @@ static void vfio_pci_remove(struct pci_dev *pdev) | |||
997 | return; | 1157 | return; |
998 | 1158 | ||
999 | vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); | 1159 | vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); |
1160 | kfree(vdev->region); | ||
1000 | kfree(vdev); | 1161 | kfree(vdev); |
1001 | 1162 | ||
1002 | if (vfio_pci_is_vga(pdev)) { | 1163 | if (vfio_pci_is_vga(pdev)) { |
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index fe2b470d7ec6..142c533efec7 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c | |||
@@ -33,9 +33,8 @@ | |||
33 | 33 | ||
34 | #define PCI_CFG_SPACE_SIZE 256 | 34 | #define PCI_CFG_SPACE_SIZE 256 |
35 | 35 | ||
36 | /* Useful "pseudo" capabilities */ | 36 | /* Fake capability ID for standard config space */ |
37 | #define PCI_CAP_ID_BASIC 0 | 37 | #define PCI_CAP_ID_BASIC 0 |
38 | #define PCI_CAP_ID_INVALID 0xFF | ||
39 | 38 | ||
40 | #define is_bar(offset) \ | 39 | #define is_bar(offset) \ |
41 | ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \ | 40 | ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \ |
@@ -301,6 +300,23 @@ static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos, | |||
301 | return count; | 300 | return count; |
302 | } | 301 | } |
303 | 302 | ||
303 | /* Virt access uses only virtualization */ | ||
304 | static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos, | ||
305 | int count, struct perm_bits *perm, | ||
306 | int offset, __le32 val) | ||
307 | { | ||
308 | memcpy(vdev->vconfig + pos, &val, count); | ||
309 | return count; | ||
310 | } | ||
311 | |||
312 | static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos, | ||
313 | int count, struct perm_bits *perm, | ||
314 | int offset, __le32 *val) | ||
315 | { | ||
316 | memcpy(val, vdev->vconfig + pos, count); | ||
317 | return count; | ||
318 | } | ||
319 | |||
304 | /* Default capability regions to read-only, no-virtualization */ | 320 | /* Default capability regions to read-only, no-virtualization */ |
305 | static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { | 321 | static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { |
306 | [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } | 322 | [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } |
@@ -319,6 +335,11 @@ static struct perm_bits unassigned_perms = { | |||
319 | .writefn = vfio_raw_config_write | 335 | .writefn = vfio_raw_config_write |
320 | }; | 336 | }; |
321 | 337 | ||
338 | static struct perm_bits virt_perms = { | ||
339 | .readfn = vfio_virt_config_read, | ||
340 | .writefn = vfio_virt_config_write | ||
341 | }; | ||
342 | |||
322 | static void free_perm_bits(struct perm_bits *perm) | 343 | static void free_perm_bits(struct perm_bits *perm) |
323 | { | 344 | { |
324 | kfree(perm->virt); | 345 | kfree(perm->virt); |
@@ -454,14 +475,19 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev) | |||
454 | bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; | 475 | bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; |
455 | 476 | ||
456 | /* | 477 | /* |
457 | * NB. we expose the actual BAR size here, regardless of whether | 478 | * NB. REGION_INFO will have reported zero size if we weren't able |
458 | * we can read it. When we report the REGION_INFO for the ROM | 479 | * to read the ROM, but we still return the actual BAR size here if |
459 | * we report what PCI tells us is the actual ROM size. | 480 | * it exists (or the shadow ROM space). |
460 | */ | 481 | */ |
461 | if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { | 482 | if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { |
462 | mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); | 483 | mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); |
463 | mask |= PCI_ROM_ADDRESS_ENABLE; | 484 | mask |= PCI_ROM_ADDRESS_ENABLE; |
464 | *bar &= cpu_to_le32((u32)mask); | 485 | *bar &= cpu_to_le32((u32)mask); |
486 | } else if (pdev->resource[PCI_ROM_RESOURCE].flags & | ||
487 | IORESOURCE_ROM_SHADOW) { | ||
488 | mask = ~(0x20000 - 1); | ||
489 | mask |= PCI_ROM_ADDRESS_ENABLE; | ||
490 | *bar &= cpu_to_le32((u32)mask); | ||
465 | } else | 491 | } else |
466 | *bar = 0; | 492 | *bar = 0; |
467 | 493 | ||
@@ -1332,6 +1358,8 @@ static int vfio_cap_init(struct vfio_pci_device *vdev) | |||
1332 | pos + i, map[pos + i], cap); | 1358 | pos + i, map[pos + i], cap); |
1333 | } | 1359 | } |
1334 | 1360 | ||
1361 | BUILD_BUG_ON(PCI_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT); | ||
1362 | |||
1335 | memset(map + pos, cap, len); | 1363 | memset(map + pos, cap, len); |
1336 | ret = vfio_fill_vconfig_bytes(vdev, pos, len); | 1364 | ret = vfio_fill_vconfig_bytes(vdev, pos, len); |
1337 | if (ret) | 1365 | if (ret) |
@@ -1419,9 +1447,9 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev) | |||
1419 | /* | 1447 | /* |
1420 | * Even though ecap is 2 bytes, we're currently a long way | 1448 | * Even though ecap is 2 bytes, we're currently a long way |
1421 | * from exceeding 1 byte capabilities. If we ever make it | 1449 | * from exceeding 1 byte capabilities. If we ever make it |
1422 | * up to 0xFF we'll need to up this to a two-byte, byte map. | 1450 | * up to 0xFE we'll need to up this to a two-byte, byte map. |
1423 | */ | 1451 | */ |
1424 | BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID); | 1452 | BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT); |
1425 | 1453 | ||
1426 | memset(map + epos, ecap, len); | 1454 | memset(map + epos, ecap, len); |
1427 | ret = vfio_fill_vconfig_bytes(vdev, epos, len); | 1455 | ret = vfio_fill_vconfig_bytes(vdev, epos, len); |
@@ -1597,6 +1625,9 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, | |||
1597 | if (cap_id == PCI_CAP_ID_INVALID) { | 1625 | if (cap_id == PCI_CAP_ID_INVALID) { |
1598 | perm = &unassigned_perms; | 1626 | perm = &unassigned_perms; |
1599 | cap_start = *ppos; | 1627 | cap_start = *ppos; |
1628 | } else if (cap_id == PCI_CAP_ID_INVALID_VIRT) { | ||
1629 | perm = &virt_perms; | ||
1630 | cap_start = *ppos; | ||
1600 | } else { | 1631 | } else { |
1601 | if (*ppos >= PCI_CFG_SPACE_SIZE) { | 1632 | if (*ppos >= PCI_CFG_SPACE_SIZE) { |
1602 | WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX); | 1633 | WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX); |
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c new file mode 100644 index 000000000000..6394b168ef29 --- /dev/null +++ b/drivers/vfio/pci/vfio_pci_igd.c | |||
@@ -0,0 +1,280 @@ | |||
1 | /* | ||
2 | * VFIO PCI Intel Graphics support | ||
3 | * | ||
4 | * Copyright (C) 2016 Red Hat, Inc. All rights reserved. | ||
5 | * Author: Alex Williamson <alex.williamson@redhat.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * Register a device specific region through which to provide read-only | ||
12 | * access to the Intel IGD opregion. The register defining the opregion | ||
13 | * address is also virtualized to prevent user modification. | ||
14 | */ | ||
15 | |||
16 | #include <linux/io.h> | ||
17 | #include <linux/pci.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <linux/vfio.h> | ||
20 | |||
21 | #include "vfio_pci_private.h" | ||
22 | |||
23 | #define OPREGION_SIGNATURE "IntelGraphicsMem" | ||
24 | #define OPREGION_SIZE (8 * 1024) | ||
25 | #define OPREGION_PCI_ADDR 0xfc | ||
26 | |||
27 | static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf, | ||
28 | size_t count, loff_t *ppos, bool iswrite) | ||
29 | { | ||
30 | unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; | ||
31 | void *base = vdev->region[i].data; | ||
32 | loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; | ||
33 | |||
34 | if (pos >= vdev->region[i].size || iswrite) | ||
35 | return -EINVAL; | ||
36 | |||
37 | count = min(count, (size_t)(vdev->region[i].size - pos)); | ||
38 | |||
39 | if (copy_to_user(buf, base + pos, count)) | ||
40 | return -EFAULT; | ||
41 | |||
42 | *ppos += count; | ||
43 | |||
44 | return count; | ||
45 | } | ||
46 | |||
47 | static void vfio_pci_igd_release(struct vfio_pci_device *vdev, | ||
48 | struct vfio_pci_region *region) | ||
49 | { | ||
50 | memunmap(region->data); | ||
51 | } | ||
52 | |||
53 | static const struct vfio_pci_regops vfio_pci_igd_regops = { | ||
54 | .rw = vfio_pci_igd_rw, | ||
55 | .release = vfio_pci_igd_release, | ||
56 | }; | ||
57 | |||
58 | static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) | ||
59 | { | ||
60 | __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR); | ||
61 | u32 addr, size; | ||
62 | void *base; | ||
63 | int ret; | ||
64 | |||
65 | ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr); | ||
66 | if (ret) | ||
67 | return ret; | ||
68 | |||
69 | if (!addr || !(~addr)) | ||
70 | return -ENODEV; | ||
71 | |||
72 | base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB); | ||
73 | if (!base) | ||
74 | return -ENOMEM; | ||
75 | |||
76 | if (memcmp(base, OPREGION_SIGNATURE, 16)) { | ||
77 | memunmap(base); | ||
78 | return -EINVAL; | ||
79 | } | ||
80 | |||
81 | size = le32_to_cpu(*(__le32 *)(base + 16)); | ||
82 | if (!size) { | ||
83 | memunmap(base); | ||
84 | return -EINVAL; | ||
85 | } | ||
86 | |||
87 | size *= 1024; /* In KB */ | ||
88 | |||
89 | if (size != OPREGION_SIZE) { | ||
90 | memunmap(base); | ||
91 | base = memremap(addr, size, MEMREMAP_WB); | ||
92 | if (!base) | ||
93 | return -ENOMEM; | ||
94 | } | ||
95 | |||
96 | ret = vfio_pci_register_dev_region(vdev, | ||
97 | PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, | ||
98 | VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, | ||
99 | &vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base); | ||
100 | if (ret) { | ||
101 | memunmap(base); | ||
102 | return ret; | ||
103 | } | ||
104 | |||
105 | /* Fill vconfig with the hw value and virtualize register */ | ||
106 | *dwordp = cpu_to_le32(addr); | ||
107 | memset(vdev->pci_config_map + OPREGION_PCI_ADDR, | ||
108 | PCI_CAP_ID_INVALID_VIRT, 4); | ||
109 | |||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, | ||
114 | char __user *buf, size_t count, loff_t *ppos, | ||
115 | bool iswrite) | ||
116 | { | ||
117 | unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; | ||
118 | struct pci_dev *pdev = vdev->region[i].data; | ||
119 | loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; | ||
120 | size_t size; | ||
121 | int ret; | ||
122 | |||
123 | if (pos >= vdev->region[i].size || iswrite) | ||
124 | return -EINVAL; | ||
125 | |||
126 | size = count = min(count, (size_t)(vdev->region[i].size - pos)); | ||
127 | |||
128 | if ((pos & 1) && size) { | ||
129 | u8 val; | ||
130 | |||
131 | ret = pci_user_read_config_byte(pdev, pos, &val); | ||
132 | if (ret) | ||
133 | return pcibios_err_to_errno(ret); | ||
134 | |||
135 | if (copy_to_user(buf + count - size, &val, 1)) | ||
136 | return -EFAULT; | ||
137 | |||
138 | pos++; | ||
139 | size--; | ||
140 | } | ||
141 | |||
142 | if ((pos & 3) && size > 2) { | ||
143 | u16 val; | ||
144 | |||
145 | ret = pci_user_read_config_word(pdev, pos, &val); | ||
146 | if (ret) | ||
147 | return pcibios_err_to_errno(ret); | ||
148 | |||
149 | val = cpu_to_le16(val); | ||
150 | if (copy_to_user(buf + count - size, &val, 2)) | ||
151 | return -EFAULT; | ||
152 | |||
153 | pos += 2; | ||
154 | size -= 2; | ||
155 | } | ||
156 | |||
157 | while (size > 3) { | ||
158 | u32 val; | ||
159 | |||
160 | ret = pci_user_read_config_dword(pdev, pos, &val); | ||
161 | if (ret) | ||
162 | return pcibios_err_to_errno(ret); | ||
163 | |||
164 | val = cpu_to_le32(val); | ||
165 | if (copy_to_user(buf + count - size, &val, 4)) | ||
166 | return -EFAULT; | ||
167 | |||
168 | pos += 4; | ||
169 | size -= 4; | ||
170 | } | ||
171 | |||
172 | while (size >= 2) { | ||
173 | u16 val; | ||
174 | |||
175 | ret = pci_user_read_config_word(pdev, pos, &val); | ||
176 | if (ret) | ||
177 | return pcibios_err_to_errno(ret); | ||
178 | |||
179 | val = cpu_to_le16(val); | ||
180 | if (copy_to_user(buf + count - size, &val, 2)) | ||
181 | return -EFAULT; | ||
182 | |||
183 | pos += 2; | ||
184 | size -= 2; | ||
185 | } | ||
186 | |||
187 | while (size) { | ||
188 | u8 val; | ||
189 | |||
190 | ret = pci_user_read_config_byte(pdev, pos, &val); | ||
191 | if (ret) | ||
192 | return pcibios_err_to_errno(ret); | ||
193 | |||
194 | if (copy_to_user(buf + count - size, &val, 1)) | ||
195 | return -EFAULT; | ||
196 | |||
197 | pos++; | ||
198 | size--; | ||
199 | } | ||
200 | |||
201 | *ppos += count; | ||
202 | |||
203 | return count; | ||
204 | } | ||
205 | |||
206 | static void vfio_pci_igd_cfg_release(struct vfio_pci_device *vdev, | ||
207 | struct vfio_pci_region *region) | ||
208 | { | ||
209 | struct pci_dev *pdev = region->data; | ||
210 | |||
211 | pci_dev_put(pdev); | ||
212 | } | ||
213 | |||
214 | static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = { | ||
215 | .rw = vfio_pci_igd_cfg_rw, | ||
216 | .release = vfio_pci_igd_cfg_release, | ||
217 | }; | ||
218 | |||
219 | static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev) | ||
220 | { | ||
221 | struct pci_dev *host_bridge, *lpc_bridge; | ||
222 | int ret; | ||
223 | |||
224 | host_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); | ||
225 | if (!host_bridge) | ||
226 | return -ENODEV; | ||
227 | |||
228 | if (host_bridge->vendor != PCI_VENDOR_ID_INTEL || | ||
229 | host_bridge->class != (PCI_CLASS_BRIDGE_HOST << 8)) { | ||
230 | pci_dev_put(host_bridge); | ||
231 | return -EINVAL; | ||
232 | } | ||
233 | |||
234 | ret = vfio_pci_register_dev_region(vdev, | ||
235 | PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, | ||
236 | VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, | ||
237 | &vfio_pci_igd_cfg_regops, host_bridge->cfg_size, | ||
238 | VFIO_REGION_INFO_FLAG_READ, host_bridge); | ||
239 | if (ret) { | ||
240 | pci_dev_put(host_bridge); | ||
241 | return ret; | ||
242 | } | ||
243 | |||
244 | lpc_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x1f, 0)); | ||
245 | if (!lpc_bridge) | ||
246 | return -ENODEV; | ||
247 | |||
248 | if (lpc_bridge->vendor != PCI_VENDOR_ID_INTEL || | ||
249 | lpc_bridge->class != (PCI_CLASS_BRIDGE_ISA << 8)) { | ||
250 | pci_dev_put(lpc_bridge); | ||
251 | return -EINVAL; | ||
252 | } | ||
253 | |||
254 | ret = vfio_pci_register_dev_region(vdev, | ||
255 | PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, | ||
256 | VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, | ||
257 | &vfio_pci_igd_cfg_regops, lpc_bridge->cfg_size, | ||
258 | VFIO_REGION_INFO_FLAG_READ, lpc_bridge); | ||
259 | if (ret) { | ||
260 | pci_dev_put(lpc_bridge); | ||
261 | return ret; | ||
262 | } | ||
263 | |||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | int vfio_pci_igd_init(struct vfio_pci_device *vdev) | ||
268 | { | ||
269 | int ret; | ||
270 | |||
271 | ret = vfio_pci_igd_opregion_init(vdev); | ||
272 | if (ret) | ||
273 | return ret; | ||
274 | |||
275 | ret = vfio_pci_igd_cfg_init(vdev); | ||
276 | if (ret) | ||
277 | return ret; | ||
278 | |||
279 | return 0; | ||
280 | } | ||
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 3b3ba15558b7..e9ea3fef144a 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c | |||
@@ -309,14 +309,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, | |||
309 | int vector, int fd, bool msix) | 309 | int vector, int fd, bool msix) |
310 | { | 310 | { |
311 | struct pci_dev *pdev = vdev->pdev; | 311 | struct pci_dev *pdev = vdev->pdev; |
312 | int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector; | ||
313 | char *name = msix ? "vfio-msix" : "vfio-msi"; | ||
314 | struct eventfd_ctx *trigger; | 312 | struct eventfd_ctx *trigger; |
315 | int ret; | 313 | int irq, ret; |
316 | 314 | ||
317 | if (vector >= vdev->num_ctx) | 315 | if (vector < 0 || vector >= vdev->num_ctx) |
318 | return -EINVAL; | 316 | return -EINVAL; |
319 | 317 | ||
318 | irq = msix ? vdev->msix[vector].vector : pdev->irq + vector; | ||
319 | |||
320 | if (vdev->ctx[vector].trigger) { | 320 | if (vdev->ctx[vector].trigger) { |
321 | free_irq(irq, vdev->ctx[vector].trigger); | 321 | free_irq(irq, vdev->ctx[vector].trigger); |
322 | irq_bypass_unregister_producer(&vdev->ctx[vector].producer); | 322 | irq_bypass_unregister_producer(&vdev->ctx[vector].producer); |
@@ -328,8 +328,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, | |||
328 | if (fd < 0) | 328 | if (fd < 0) |
329 | return 0; | 329 | return 0; |
330 | 330 | ||
331 | vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)", | 331 | vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", |
332 | name, vector, pci_name(pdev)); | 332 | msix ? "x" : "", vector, |
333 | pci_name(pdev)); | ||
333 | if (!vdev->ctx[vector].name) | 334 | if (!vdev->ctx[vector].name) |
334 | return -ENOMEM; | 335 | return -ENOMEM; |
335 | 336 | ||
@@ -379,7 +380,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start, | |||
379 | { | 380 | { |
380 | int i, j, ret = 0; | 381 | int i, j, ret = 0; |
381 | 382 | ||
382 | if (start + count > vdev->num_ctx) | 383 | if (start >= vdev->num_ctx || start + count > vdev->num_ctx) |
383 | return -EINVAL; | 384 | return -EINVAL; |
384 | 385 | ||
385 | for (i = 0, j = start; i < count && !ret; i++, j++) { | 386 | for (i = 0, j = start; i < count && !ret; i++, j++) { |
@@ -388,7 +389,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start, | |||
388 | } | 389 | } |
389 | 390 | ||
390 | if (ret) { | 391 | if (ret) { |
391 | for (--j; j >= start; j--) | 392 | for (--j; j >= (int)start; j--) |
392 | vfio_msi_set_vector_signal(vdev, j, -1, msix); | 393 | vfio_msi_set_vector_signal(vdev, j, -1, msix); |
393 | } | 394 | } |
394 | 395 | ||
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 0e7394f8f69b..8a7d546d18a0 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/mutex.h> | 14 | #include <linux/mutex.h> |
15 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
16 | #include <linux/irqbypass.h> | 16 | #include <linux/irqbypass.h> |
17 | #include <linux/types.h> | ||
17 | 18 | ||
18 | #ifndef VFIO_PCI_PRIVATE_H | 19 | #ifndef VFIO_PCI_PRIVATE_H |
19 | #define VFIO_PCI_PRIVATE_H | 20 | #define VFIO_PCI_PRIVATE_H |
@@ -24,6 +25,10 @@ | |||
24 | #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) | 25 | #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) |
25 | #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) | 26 | #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) |
26 | 27 | ||
28 | /* Special capability IDs predefined access */ | ||
29 | #define PCI_CAP_ID_INVALID 0xFF /* default raw access */ | ||
30 | #define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */ | ||
31 | |||
27 | struct vfio_pci_irq_ctx { | 32 | struct vfio_pci_irq_ctx { |
28 | struct eventfd_ctx *trigger; | 33 | struct eventfd_ctx *trigger; |
29 | struct virqfd *unmask; | 34 | struct virqfd *unmask; |
@@ -33,6 +38,25 @@ struct vfio_pci_irq_ctx { | |||
33 | struct irq_bypass_producer producer; | 38 | struct irq_bypass_producer producer; |
34 | }; | 39 | }; |
35 | 40 | ||
41 | struct vfio_pci_device; | ||
42 | struct vfio_pci_region; | ||
43 | |||
44 | struct vfio_pci_regops { | ||
45 | size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf, | ||
46 | size_t count, loff_t *ppos, bool iswrite); | ||
47 | void (*release)(struct vfio_pci_device *vdev, | ||
48 | struct vfio_pci_region *region); | ||
49 | }; | ||
50 | |||
51 | struct vfio_pci_region { | ||
52 | u32 type; | ||
53 | u32 subtype; | ||
54 | const struct vfio_pci_regops *ops; | ||
55 | void *data; | ||
56 | size_t size; | ||
57 | u32 flags; | ||
58 | }; | ||
59 | |||
36 | struct vfio_pci_device { | 60 | struct vfio_pci_device { |
37 | struct pci_dev *pdev; | 61 | struct pci_dev *pdev; |
38 | void __iomem *barmap[PCI_STD_RESOURCE_END + 1]; | 62 | void __iomem *barmap[PCI_STD_RESOURCE_END + 1]; |
@@ -45,6 +69,8 @@ struct vfio_pci_device { | |||
45 | struct vfio_pci_irq_ctx *ctx; | 69 | struct vfio_pci_irq_ctx *ctx; |
46 | int num_ctx; | 70 | int num_ctx; |
47 | int irq_type; | 71 | int irq_type; |
72 | int num_regions; | ||
73 | struct vfio_pci_region *region; | ||
48 | u8 msi_qmax; | 74 | u8 msi_qmax; |
49 | u8 msix_bar; | 75 | u8 msix_bar; |
50 | u16 msix_size; | 76 | u16 msix_size; |
@@ -91,4 +117,17 @@ extern void vfio_pci_uninit_perm_bits(void); | |||
91 | 117 | ||
92 | extern int vfio_config_init(struct vfio_pci_device *vdev); | 118 | extern int vfio_config_init(struct vfio_pci_device *vdev); |
93 | extern void vfio_config_free(struct vfio_pci_device *vdev); | 119 | extern void vfio_config_free(struct vfio_pci_device *vdev); |
120 | |||
121 | extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, | ||
122 | unsigned int type, unsigned int subtype, | ||
123 | const struct vfio_pci_regops *ops, | ||
124 | size_t size, u32 flags, void *data); | ||
125 | #ifdef CONFIG_VFIO_PCI_IGD | ||
126 | extern int vfio_pci_igd_init(struct vfio_pci_device *vdev); | ||
127 | #else | ||
128 | static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev) | ||
129 | { | ||
130 | return -ENODEV; | ||
131 | } | ||
132 | #endif | ||
94 | #endif /* VFIO_PCI_PRIVATE_H */ | 133 | #endif /* VFIO_PCI_PRIVATE_H */ |
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 210db24d2204..5ffd1d9ad4bd 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c | |||
@@ -124,11 +124,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, | |||
124 | void __iomem *io; | 124 | void __iomem *io; |
125 | ssize_t done; | 125 | ssize_t done; |
126 | 126 | ||
127 | if (!pci_resource_start(pdev, bar)) | 127 | if (pci_resource_start(pdev, bar)) |
128 | end = pci_resource_len(pdev, bar); | ||
129 | else if (bar == PCI_ROM_RESOURCE && | ||
130 | pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW) | ||
131 | end = 0x20000; | ||
132 | else | ||
128 | return -EINVAL; | 133 | return -EINVAL; |
129 | 134 | ||
130 | end = pci_resource_len(pdev, bar); | ||
131 | |||
132 | if (pos >= end) | 135 | if (pos >= end) |
133 | return -EINVAL; | 136 | return -EINVAL; |
134 | 137 | ||
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index ecca316386f5..6fd6fa5469de 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c | |||
@@ -1080,30 +1080,26 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container, | |||
1080 | continue; | 1080 | continue; |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | /* module reference holds the driver we're working on */ | ||
1084 | mutex_unlock(&vfio.iommu_drivers_lock); | ||
1085 | |||
1086 | data = driver->ops->open(arg); | 1083 | data = driver->ops->open(arg); |
1087 | if (IS_ERR(data)) { | 1084 | if (IS_ERR(data)) { |
1088 | ret = PTR_ERR(data); | 1085 | ret = PTR_ERR(data); |
1089 | module_put(driver->ops->owner); | 1086 | module_put(driver->ops->owner); |
1090 | goto skip_drivers_unlock; | 1087 | continue; |
1091 | } | 1088 | } |
1092 | 1089 | ||
1093 | ret = __vfio_container_attach_groups(container, driver, data); | 1090 | ret = __vfio_container_attach_groups(container, driver, data); |
1094 | if (!ret) { | 1091 | if (ret) { |
1095 | container->iommu_driver = driver; | ||
1096 | container->iommu_data = data; | ||
1097 | } else { | ||
1098 | driver->ops->release(data); | 1092 | driver->ops->release(data); |
1099 | module_put(driver->ops->owner); | 1093 | module_put(driver->ops->owner); |
1094 | continue; | ||
1100 | } | 1095 | } |
1101 | 1096 | ||
1102 | goto skip_drivers_unlock; | 1097 | container->iommu_driver = driver; |
1098 | container->iommu_data = data; | ||
1099 | break; | ||
1103 | } | 1100 | } |
1104 | 1101 | ||
1105 | mutex_unlock(&vfio.iommu_drivers_lock); | 1102 | mutex_unlock(&vfio.iommu_drivers_lock); |
1106 | skip_drivers_unlock: | ||
1107 | up_write(&container->group_lock); | 1103 | up_write(&container->group_lock); |
1108 | 1104 | ||
1109 | return ret; | 1105 | return ret; |
@@ -1733,6 +1729,60 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg) | |||
1733 | EXPORT_SYMBOL_GPL(vfio_external_check_extension); | 1729 | EXPORT_SYMBOL_GPL(vfio_external_check_extension); |
1734 | 1730 | ||
1735 | /** | 1731 | /** |
1732 | * Sub-module support | ||
1733 | */ | ||
1734 | /* | ||
1735 | * Helper for managing a buffer of info chain capabilities, allocate or | ||
1736 | * reallocate a buffer with additional @size, filling in @id and @version | ||
1737 | * of the capability. A pointer to the new capability is returned. | ||
1738 | * | ||
1739 | * NB. The chain is based at the head of the buffer, so new entries are | ||
1740 | * added to the tail, vfio_info_cap_shift() should be called to fixup the | ||
1741 | * next offsets prior to copying to the user buffer. | ||
1742 | */ | ||
1743 | struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, | ||
1744 | size_t size, u16 id, u16 version) | ||
1745 | { | ||
1746 | void *buf; | ||
1747 | struct vfio_info_cap_header *header, *tmp; | ||
1748 | |||
1749 | buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); | ||
1750 | if (!buf) { | ||
1751 | kfree(caps->buf); | ||
1752 | caps->size = 0; | ||
1753 | return ERR_PTR(-ENOMEM); | ||
1754 | } | ||
1755 | |||
1756 | caps->buf = buf; | ||
1757 | header = buf + caps->size; | ||
1758 | |||
1759 | /* Eventually copied to user buffer, zero */ | ||
1760 | memset(header, 0, size); | ||
1761 | |||
1762 | header->id = id; | ||
1763 | header->version = version; | ||
1764 | |||
1765 | /* Add to the end of the capability chain */ | ||
1766 | for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next) | ||
1767 | ; /* nothing */ | ||
1768 | |||
1769 | tmp->next = caps->size; | ||
1770 | caps->size += size; | ||
1771 | |||
1772 | return header; | ||
1773 | } | ||
1774 | EXPORT_SYMBOL_GPL(vfio_info_cap_add); | ||
1775 | |||
1776 | void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) | ||
1777 | { | ||
1778 | struct vfio_info_cap_header *tmp; | ||
1779 | |||
1780 | for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset) | ||
1781 | tmp->next += offset; | ||
1782 | } | ||
1783 | EXPORT_SYMBOL_GPL(vfio_info_cap_shift); | ||
1784 | |||
1785 | /** | ||
1736 | * Module/class support | 1786 | * Module/class support |
1737 | */ | 1787 | */ |
1738 | static char *vfio_devnode(struct device *dev, umode_t *mode) | 1788 | static char *vfio_devnode(struct device *dev, umode_t *mode) |
diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 610a86a892b8..0ecae0b1cd34 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h | |||
@@ -92,6 +92,17 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); | |||
92 | extern long vfio_external_check_extension(struct vfio_group *group, | 92 | extern long vfio_external_check_extension(struct vfio_group *group, |
93 | unsigned long arg); | 93 | unsigned long arg); |
94 | 94 | ||
95 | /* | ||
96 | * Sub-module helpers | ||
97 | */ | ||
98 | struct vfio_info_cap { | ||
99 | struct vfio_info_cap_header *buf; | ||
100 | size_t size; | ||
101 | }; | ||
102 | extern struct vfio_info_cap_header *vfio_info_cap_add( | ||
103 | struct vfio_info_cap *caps, size_t size, u16 id, u16 version); | ||
104 | extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); | ||
105 | |||
95 | struct pci_dev; | 106 | struct pci_dev; |
96 | #ifdef CONFIG_EEH | 107 | #ifdef CONFIG_EEH |
97 | extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); | 108 | extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); |
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 7d7a4c6f2090..255a2113f53c 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h | |||
@@ -59,6 +59,33 @@ | |||
59 | #define VFIO_TYPE (';') | 59 | #define VFIO_TYPE (';') |
60 | #define VFIO_BASE 100 | 60 | #define VFIO_BASE 100 |
61 | 61 | ||
62 | /* | ||
63 | * For extension of INFO ioctls, VFIO makes use of a capability chain | ||
64 | * designed after PCI/e capabilities. A flag bit indicates whether | ||
65 | * this capability chain is supported and a field defined in the fixed | ||
66 | * structure defines the offset of the first capability in the chain. | ||
67 | * This field is only valid when the corresponding bit in the flags | ||
68 | * bitmap is set. This offset field is relative to the start of the | ||
69 | * INFO buffer, as is the next field within each capability header. | ||
70 | * The id within the header is a shared address space per INFO ioctl, | ||
71 | * while the version field is specific to the capability id. The | ||
72 | * contents following the header are specific to the capability id. | ||
73 | */ | ||
74 | struct vfio_info_cap_header { | ||
75 | __u16 id; /* Identifies capability */ | ||
76 | __u16 version; /* Version specific to the capability ID */ | ||
77 | __u32 next; /* Offset of next capability */ | ||
78 | }; | ||
79 | |||
80 | /* | ||
81 | * Callers of INFO ioctls passing insufficiently sized buffers will see | ||
82 | * the capability chain flag bit set, a zero value for the first capability | ||
83 | * offset (if available within the provided argsz), and argsz will be | ||
84 | * updated to report the necessary buffer size. For compatibility, the | ||
85 | * INFO ioctl will not report error in this case, but the capability chain | ||
86 | * will not be available. | ||
87 | */ | ||
88 | |||
62 | /* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */ | 89 | /* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */ |
63 | 90 | ||
64 | /** | 91 | /** |
@@ -194,13 +221,73 @@ struct vfio_region_info { | |||
194 | #define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */ | 221 | #define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */ |
195 | #define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */ | 222 | #define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */ |
196 | #define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */ | 223 | #define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */ |
224 | #define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */ | ||
197 | __u32 index; /* Region index */ | 225 | __u32 index; /* Region index */ |
198 | __u32 resv; /* Reserved for alignment */ | 226 | __u32 cap_offset; /* Offset within info struct of first cap */ |
199 | __u64 size; /* Region size (bytes) */ | 227 | __u64 size; /* Region size (bytes) */ |
200 | __u64 offset; /* Region offset from start of device fd */ | 228 | __u64 offset; /* Region offset from start of device fd */ |
201 | }; | 229 | }; |
202 | #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) | 230 | #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) |
203 | 231 | ||
232 | /* | ||
233 | * The sparse mmap capability allows finer granularity of specifying areas | ||
234 | * within a region with mmap support. When specified, the user should only | ||
235 | * mmap the offset ranges specified by the areas array. mmaps outside of the | ||
236 | * areas specified may fail (such as the range covering a PCI MSI-X table) or | ||
237 | * may result in improper device behavior. | ||
238 | * | ||
239 | * The structures below define version 1 of this capability. | ||
240 | */ | ||
241 | #define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1 | ||
242 | |||
243 | struct vfio_region_sparse_mmap_area { | ||
244 | __u64 offset; /* Offset of mmap'able area within region */ | ||
245 | __u64 size; /* Size of mmap'able area */ | ||
246 | }; | ||
247 | |||
248 | struct vfio_region_info_cap_sparse_mmap { | ||
249 | struct vfio_info_cap_header header; | ||
250 | __u32 nr_areas; | ||
251 | __u32 reserved; | ||
252 | struct vfio_region_sparse_mmap_area areas[]; | ||
253 | }; | ||
254 | |||
255 | /* | ||
256 | * The device specific type capability allows regions unique to a specific | ||
257 | * device or class of devices to be exposed. This helps solve the problem for | ||
258 | * vfio bus drivers of defining which region indexes correspond to which region | ||
259 | * on the device, without needing to resort to static indexes, as done by | ||
260 | * vfio-pci. For instance, if we were to go back in time, we might remove | ||
261 | * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes | ||
262 | * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd | ||
263 | * make a "VGA" device specific type to describe the VGA access space. This | ||
264 | * means that non-VGA devices wouldn't need to waste this index, and thus the | ||
265 | * address space associated with it due to implementation of device file | ||
266 | * descriptor offsets in vfio-pci. | ||
267 | * | ||
268 | * The current implementation is now part of the user ABI, so we can't use this | ||
269 | * for VGA, but there are other upcoming use cases, such as opregions for Intel | ||
270 | * IGD devices and framebuffers for vGPU devices. We missed VGA, but we'll | ||
271 | * use this for future additions. | ||
272 | * | ||
273 | * The structure below defines version 1 of this capability. | ||
274 | */ | ||
275 | #define VFIO_REGION_INFO_CAP_TYPE 2 | ||
276 | |||
277 | struct vfio_region_info_cap_type { | ||
278 | struct vfio_info_cap_header header; | ||
279 | __u32 type; /* global per bus driver */ | ||
280 | __u32 subtype; /* type specific */ | ||
281 | }; | ||
282 | |||
283 | #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) | ||
284 | #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) | ||
285 | |||
286 | /* 8086 Vendor sub-types */ | ||
287 | #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) | ||
288 | #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) | ||
289 | #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) | ||
290 | |||
204 | /** | 291 | /** |
205 | * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, | 292 | * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, |
206 | * struct vfio_irq_info) | 293 | * struct vfio_irq_info) |
@@ -336,7 +423,8 @@ enum { | |||
336 | * between described ranges are unimplemented. | 423 | * between described ranges are unimplemented. |
337 | */ | 424 | */ |
338 | VFIO_PCI_VGA_REGION_INDEX, | 425 | VFIO_PCI_VGA_REGION_INDEX, |
339 | VFIO_PCI_NUM_REGIONS | 426 | VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */ |
427 | /* device specific cap to define content. */ | ||
340 | }; | 428 | }; |
341 | 429 | ||
342 | enum { | 430 | enum { |