summaryrefslogtreecommitdiffstats
path: root/drivers/vfio/pci
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 16:05:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 16:05:09 -0400
commit45cb5230f862d10209b83e488b20916555d70c55 (patch)
tree3501acce3ea2d96a3d84f8310001d2e81e581fb9 /drivers/vfio/pci
parent3c0b8d1c5f334d1fd0c7eafc18bfb4ab0087b165 (diff)
parentc4aec3101319f84363a57e09086c2aff6c60a3c3 (diff)
Merge tag 'vfio-v4.6-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: "Various enablers for assignment of Intel graphics devices and future support of vGPU devices (Alex Williamson). This includes - Handling the vfio type1 interface as an API rather than a specific implementation, allowing multiple type1 providers. - Capability chains, similar to PCI device capabilities, that allow extending ioctls. Extensions here include device specific regions and sparse mmap descriptions. The former is used to expose non-PCI regions for IGD, including the OpRegion (particularly the Video BIOS Table), and read only PCI config access to the host and LPC bridge as drivers often depend on identifying those devices. Sparse mmaps here are used to describe the MSIx vector table, which vfio has always protected from mmap, but never had an API to explicitly define that protection. In future vGPU support this is expected to allow the description of PCI BARs that may mix direct access and emulated access within a single region. - The ability to expose the shadow ROM as an option ROM as IGD use cases may rely on the ROM even though the physical device does not make use of a PCI option ROM BAR" * tag 'vfio-v4.6-rc1' of git://github.com/awilliam/linux-vfio: vfio/pci: return -EFAULT if copy_to_user fails vfio/pci: Expose shadow ROM as PCI option ROM vfio/pci: Intel IGD host and LCP bridge config space access vfio/pci: Intel IGD OpRegion support vfio/pci: Enable virtual register in PCI config space vfio/pci: Add infrastructure for additional device specific regions vfio: Define device specific region type capability vfio/pci: Include sparse mmap capability for MSI-X table regions vfio: Define sparse mmap capability for regions vfio: Add capability chain helpers vfio: Define capability chains vfio: If an IOMMU backend fails, keep looking vfio/pci: Fix unsigned comparison overflow
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r--drivers/vfio/pci/Kconfig4
-rw-r--r--drivers/vfio/pci/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c175
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c45
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c280
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c17
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h39
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c9
8 files changed, 545 insertions, 25 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 02912f180c6d..24ee2605b9f0 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -26,3 +26,7 @@ config VFIO_PCI_MMAP
26config VFIO_PCI_INTX 26config VFIO_PCI_INTX
27 depends on VFIO_PCI 27 depends on VFIO_PCI
28 def_bool y if !S390 28 def_bool y if !S390
29
30config VFIO_PCI_IGD
31 depends on VFIO_PCI
32 def_bool y if X86
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 131079255fd9..76d8ec058edd 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,4 +1,5 @@
1 1
2vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o 2vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
3vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
3 4
4obj-$(CONFIG_VFIO_PCI) += vfio-pci.o 5obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 8c80a48e3233..712a84978e97 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -111,6 +111,7 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
111} 111}
112 112
113static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); 113static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
114static void vfio_pci_disable(struct vfio_pci_device *vdev);
114 115
115static int vfio_pci_enable(struct vfio_pci_device *vdev) 116static int vfio_pci_enable(struct vfio_pci_device *vdev)
116{ 117{
@@ -169,13 +170,26 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
169 if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) 170 if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
170 vdev->has_vga = true; 171 vdev->has_vga = true;
171 172
173
174 if (vfio_pci_is_vga(pdev) &&
175 pdev->vendor == PCI_VENDOR_ID_INTEL &&
176 IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
177 ret = vfio_pci_igd_init(vdev);
178 if (ret) {
179 dev_warn(&vdev->pdev->dev,
180 "Failed to setup Intel IGD regions\n");
181 vfio_pci_disable(vdev);
182 return ret;
183 }
184 }
185
172 return 0; 186 return 0;
173} 187}
174 188
175static void vfio_pci_disable(struct vfio_pci_device *vdev) 189static void vfio_pci_disable(struct vfio_pci_device *vdev)
176{ 190{
177 struct pci_dev *pdev = vdev->pdev; 191 struct pci_dev *pdev = vdev->pdev;
178 int bar; 192 int i, bar;
179 193
180 /* Stop the device from further DMA */ 194 /* Stop the device from further DMA */
181 pci_clear_master(pdev); 195 pci_clear_master(pdev);
@@ -186,6 +200,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
186 200
187 vdev->virq_disabled = false; 201 vdev->virq_disabled = false;
188 202
203 for (i = 0; i < vdev->num_regions; i++)
204 vdev->region[i].ops->release(vdev, &vdev->region[i]);
205
206 vdev->num_regions = 0;
207 kfree(vdev->region);
208 vdev->region = NULL; /* don't krealloc a freed pointer */
209
189 vfio_config_free(vdev); 210 vfio_config_free(vdev);
190 211
191 for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { 212 for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
@@ -421,6 +442,93 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
421 return walk.ret; 442 return walk.ret;
422} 443}
423 444
445static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
446 struct vfio_info_cap *caps)
447{
448 struct vfio_info_cap_header *header;
449 struct vfio_region_info_cap_sparse_mmap *sparse;
450 size_t end, size;
451 int nr_areas = 2, i = 0;
452
453 end = pci_resource_len(vdev->pdev, vdev->msix_bar);
454
455 /* If MSI-X table is aligned to the start or end, only one area */
456 if (((vdev->msix_offset & PAGE_MASK) == 0) ||
457 (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
458 nr_areas = 1;
459
460 size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
461
462 header = vfio_info_cap_add(caps, size,
463 VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
464 if (IS_ERR(header))
465 return PTR_ERR(header);
466
467 sparse = container_of(header,
468 struct vfio_region_info_cap_sparse_mmap, header);
469 sparse->nr_areas = nr_areas;
470
471 if (vdev->msix_offset & PAGE_MASK) {
472 sparse->areas[i].offset = 0;
473 sparse->areas[i].size = vdev->msix_offset & PAGE_MASK;
474 i++;
475 }
476
477 if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) {
478 sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset +
479 vdev->msix_size);
480 sparse->areas[i].size = end - sparse->areas[i].offset;
481 i++;
482 }
483
484 return 0;
485}
486
487static int region_type_cap(struct vfio_pci_device *vdev,
488 struct vfio_info_cap *caps,
489 unsigned int type, unsigned int subtype)
490{
491 struct vfio_info_cap_header *header;
492 struct vfio_region_info_cap_type *cap;
493
494 header = vfio_info_cap_add(caps, sizeof(*cap),
495 VFIO_REGION_INFO_CAP_TYPE, 1);
496 if (IS_ERR(header))
497 return PTR_ERR(header);
498
499 cap = container_of(header, struct vfio_region_info_cap_type, header);
500 cap->type = type;
501 cap->subtype = subtype;
502
503 return 0;
504}
505
506int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
507 unsigned int type, unsigned int subtype,
508 const struct vfio_pci_regops *ops,
509 size_t size, u32 flags, void *data)
510{
511 struct vfio_pci_region *region;
512
513 region = krealloc(vdev->region,
514 (vdev->num_regions + 1) * sizeof(*region),
515 GFP_KERNEL);
516 if (!region)
517 return -ENOMEM;
518
519 vdev->region = region;
520 vdev->region[vdev->num_regions].type = type;
521 vdev->region[vdev->num_regions].subtype = subtype;
522 vdev->region[vdev->num_regions].ops = ops;
523 vdev->region[vdev->num_regions].size = size;
524 vdev->region[vdev->num_regions].flags = flags;
525 vdev->region[vdev->num_regions].data = data;
526
527 vdev->num_regions++;
528
529 return 0;
530}
531
424static long vfio_pci_ioctl(void *device_data, 532static long vfio_pci_ioctl(void *device_data,
425 unsigned int cmd, unsigned long arg) 533 unsigned int cmd, unsigned long arg)
426{ 534{
@@ -443,7 +551,7 @@ static long vfio_pci_ioctl(void *device_data,
443 if (vdev->reset_works) 551 if (vdev->reset_works)
444 info.flags |= VFIO_DEVICE_FLAGS_RESET; 552 info.flags |= VFIO_DEVICE_FLAGS_RESET;
445 553
446 info.num_regions = VFIO_PCI_NUM_REGIONS; 554 info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
447 info.num_irqs = VFIO_PCI_NUM_IRQS; 555 info.num_irqs = VFIO_PCI_NUM_IRQS;
448 556
449 return copy_to_user((void __user *)arg, &info, minsz) ? 557 return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -452,6 +560,8 @@ static long vfio_pci_ioctl(void *device_data,
452 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { 560 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
453 struct pci_dev *pdev = vdev->pdev; 561 struct pci_dev *pdev = vdev->pdev;
454 struct vfio_region_info info; 562 struct vfio_region_info info;
563 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
564 int i, ret;
455 565
456 minsz = offsetofend(struct vfio_region_info, offset); 566 minsz = offsetofend(struct vfio_region_info, offset);
457 567
@@ -480,8 +590,15 @@ static long vfio_pci_ioctl(void *device_data,
480 VFIO_REGION_INFO_FLAG_WRITE; 590 VFIO_REGION_INFO_FLAG_WRITE;
481 if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && 591 if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
482 pci_resource_flags(pdev, info.index) & 592 pci_resource_flags(pdev, info.index) &
483 IORESOURCE_MEM && info.size >= PAGE_SIZE) 593 IORESOURCE_MEM && info.size >= PAGE_SIZE) {
484 info.flags |= VFIO_REGION_INFO_FLAG_MMAP; 594 info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
595 if (info.index == vdev->msix_bar) {
596 ret = msix_sparse_mmap_cap(vdev, &caps);
597 if (ret)
598 return ret;
599 }
600 }
601
485 break; 602 break;
486 case VFIO_PCI_ROM_REGION_INDEX: 603 case VFIO_PCI_ROM_REGION_INDEX:
487 { 604 {
@@ -493,8 +610,14 @@ static long vfio_pci_ioctl(void *device_data,
493 610
494 /* Report the BAR size, not the ROM size */ 611 /* Report the BAR size, not the ROM size */
495 info.size = pci_resource_len(pdev, info.index); 612 info.size = pci_resource_len(pdev, info.index);
496 if (!info.size) 613 if (!info.size) {
497 break; 614 /* Shadow ROMs appear as PCI option ROMs */
615 if (pdev->resource[PCI_ROM_RESOURCE].flags &
616 IORESOURCE_ROM_SHADOW)
617 info.size = 0x20000;
618 else
619 break;
620 }
498 621
499 /* Is it really there? */ 622 /* Is it really there? */
500 io = pci_map_rom(pdev, &size); 623 io = pci_map_rom(pdev, &size);
@@ -518,7 +641,40 @@ static long vfio_pci_ioctl(void *device_data,
518 641
519 break; 642 break;
520 default: 643 default:
521 return -EINVAL; 644 if (info.index >=
645 VFIO_PCI_NUM_REGIONS + vdev->num_regions)
646 return -EINVAL;
647
648 i = info.index - VFIO_PCI_NUM_REGIONS;
649
650 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
651 info.size = vdev->region[i].size;
652 info.flags = vdev->region[i].flags;
653
654 ret = region_type_cap(vdev, &caps,
655 vdev->region[i].type,
656 vdev->region[i].subtype);
657 if (ret)
658 return ret;
659 }
660
661 if (caps.size) {
662 info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
663 if (info.argsz < sizeof(info) + caps.size) {
664 info.argsz = sizeof(info) + caps.size;
665 info.cap_offset = 0;
666 } else {
667 vfio_info_cap_shift(&caps, sizeof(info));
668 if (copy_to_user((void __user *)arg +
669 sizeof(info), caps.buf,
670 caps.size)) {
671 kfree(caps.buf);
672 return -EFAULT;
673 }
674 info.cap_offset = sizeof(info);
675 }
676
677 kfree(caps.buf);
522 } 678 }
523 679
524 return copy_to_user((void __user *)arg, &info, minsz) ? 680 return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -798,7 +954,7 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
798 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 954 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
799 struct vfio_pci_device *vdev = device_data; 955 struct vfio_pci_device *vdev = device_data;
800 956
801 if (index >= VFIO_PCI_NUM_REGIONS) 957 if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
802 return -EINVAL; 958 return -EINVAL;
803 959
804 switch (index) { 960 switch (index) {
@@ -815,6 +971,10 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
815 971
816 case VFIO_PCI_VGA_REGION_INDEX: 972 case VFIO_PCI_VGA_REGION_INDEX:
817 return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); 973 return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
974 default:
975 index -= VFIO_PCI_NUM_REGIONS;
976 return vdev->region[index].ops->rw(vdev, buf,
977 count, ppos, iswrite);
818 } 978 }
819 979
820 return -EINVAL; 980 return -EINVAL;
@@ -997,6 +1157,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
997 return; 1157 return;
998 1158
999 vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); 1159 vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
1160 kfree(vdev->region);
1000 kfree(vdev); 1161 kfree(vdev);
1001 1162
1002 if (vfio_pci_is_vga(pdev)) { 1163 if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index fe2b470d7ec6..142c533efec7 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -33,9 +33,8 @@
33 33
34#define PCI_CFG_SPACE_SIZE 256 34#define PCI_CFG_SPACE_SIZE 256
35 35
36/* Useful "pseudo" capabilities */ 36/* Fake capability ID for standard config space */
37#define PCI_CAP_ID_BASIC 0 37#define PCI_CAP_ID_BASIC 0
38#define PCI_CAP_ID_INVALID 0xFF
39 38
40#define is_bar(offset) \ 39#define is_bar(offset) \
41 ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \ 40 ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \
@@ -301,6 +300,23 @@ static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos,
301 return count; 300 return count;
302} 301}
303 302
303/* Virt access uses only virtualization */
304static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos,
305 int count, struct perm_bits *perm,
306 int offset, __le32 val)
307{
308 memcpy(vdev->vconfig + pos, &val, count);
309 return count;
310}
311
312static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos,
313 int count, struct perm_bits *perm,
314 int offset, __le32 *val)
315{
316 memcpy(val, vdev->vconfig + pos, count);
317 return count;
318}
319
304/* Default capability regions to read-only, no-virtualization */ 320/* Default capability regions to read-only, no-virtualization */
305static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { 321static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = {
306 [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } 322 [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read }
@@ -319,6 +335,11 @@ static struct perm_bits unassigned_perms = {
319 .writefn = vfio_raw_config_write 335 .writefn = vfio_raw_config_write
320}; 336};
321 337
338static struct perm_bits virt_perms = {
339 .readfn = vfio_virt_config_read,
340 .writefn = vfio_virt_config_write
341};
342
322static void free_perm_bits(struct perm_bits *perm) 343static void free_perm_bits(struct perm_bits *perm)
323{ 344{
324 kfree(perm->virt); 345 kfree(perm->virt);
@@ -454,14 +475,19 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
454 bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; 475 bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS];
455 476
456 /* 477 /*
457 * NB. we expose the actual BAR size here, regardless of whether 478 * NB. REGION_INFO will have reported zero size if we weren't able
458 * we can read it. When we report the REGION_INFO for the ROM 479 * to read the ROM, but we still return the actual BAR size here if
459 * we report what PCI tells us is the actual ROM size. 480 * it exists (or the shadow ROM space).
460 */ 481 */
461 if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { 482 if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) {
462 mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); 483 mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1);
463 mask |= PCI_ROM_ADDRESS_ENABLE; 484 mask |= PCI_ROM_ADDRESS_ENABLE;
464 *bar &= cpu_to_le32((u32)mask); 485 *bar &= cpu_to_le32((u32)mask);
486 } else if (pdev->resource[PCI_ROM_RESOURCE].flags &
487 IORESOURCE_ROM_SHADOW) {
488 mask = ~(0x20000 - 1);
489 mask |= PCI_ROM_ADDRESS_ENABLE;
490 *bar &= cpu_to_le32((u32)mask);
465 } else 491 } else
466 *bar = 0; 492 *bar = 0;
467 493
@@ -1332,6 +1358,8 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
1332 pos + i, map[pos + i], cap); 1358 pos + i, map[pos + i], cap);
1333 } 1359 }
1334 1360
1361 BUILD_BUG_ON(PCI_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT);
1362
1335 memset(map + pos, cap, len); 1363 memset(map + pos, cap, len);
1336 ret = vfio_fill_vconfig_bytes(vdev, pos, len); 1364 ret = vfio_fill_vconfig_bytes(vdev, pos, len);
1337 if (ret) 1365 if (ret)
@@ -1419,9 +1447,9 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
1419 /* 1447 /*
1420 * Even though ecap is 2 bytes, we're currently a long way 1448 * Even though ecap is 2 bytes, we're currently a long way
1421 * from exceeding 1 byte capabilities. If we ever make it 1449 * from exceeding 1 byte capabilities. If we ever make it
1422 * up to 0xFF we'll need to up this to a two-byte, byte map. 1450 * up to 0xFE we'll need to up this to a two-byte, byte map.
1423 */ 1451 */
1424 BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID); 1452 BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT);
1425 1453
1426 memset(map + epos, ecap, len); 1454 memset(map + epos, ecap, len);
1427 ret = vfio_fill_vconfig_bytes(vdev, epos, len); 1455 ret = vfio_fill_vconfig_bytes(vdev, epos, len);
@@ -1597,6 +1625,9 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
1597 if (cap_id == PCI_CAP_ID_INVALID) { 1625 if (cap_id == PCI_CAP_ID_INVALID) {
1598 perm = &unassigned_perms; 1626 perm = &unassigned_perms;
1599 cap_start = *ppos; 1627 cap_start = *ppos;
1628 } else if (cap_id == PCI_CAP_ID_INVALID_VIRT) {
1629 perm = &virt_perms;
1630 cap_start = *ppos;
1600 } else { 1631 } else {
1601 if (*ppos >= PCI_CFG_SPACE_SIZE) { 1632 if (*ppos >= PCI_CFG_SPACE_SIZE) {
1602 WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX); 1633 WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX);
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
new file mode 100644
index 000000000000..6394b168ef29
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -0,0 +1,280 @@
1/*
2 * VFIO PCI Intel Graphics support
3 *
4 * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Register a device specific region through which to provide read-only
12 * access to the Intel IGD opregion. The register defining the opregion
13 * address is also virtualized to prevent user modification.
14 */
15
16#include <linux/io.h>
17#include <linux/pci.h>
18#include <linux/uaccess.h>
19#include <linux/vfio.h>
20
21#include "vfio_pci_private.h"
22
23#define OPREGION_SIGNATURE "IntelGraphicsMem"
24#define OPREGION_SIZE (8 * 1024)
25#define OPREGION_PCI_ADDR 0xfc
26
27static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
28 size_t count, loff_t *ppos, bool iswrite)
29{
30 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
31 void *base = vdev->region[i].data;
32 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
33
34 if (pos >= vdev->region[i].size || iswrite)
35 return -EINVAL;
36
37 count = min(count, (size_t)(vdev->region[i].size - pos));
38
39 if (copy_to_user(buf, base + pos, count))
40 return -EFAULT;
41
42 *ppos += count;
43
44 return count;
45}
46
47static void vfio_pci_igd_release(struct vfio_pci_device *vdev,
48 struct vfio_pci_region *region)
49{
50 memunmap(region->data);
51}
52
53static const struct vfio_pci_regops vfio_pci_igd_regops = {
54 .rw = vfio_pci_igd_rw,
55 .release = vfio_pci_igd_release,
56};
57
58static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
59{
60 __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR);
61 u32 addr, size;
62 void *base;
63 int ret;
64
65 ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr);
66 if (ret)
67 return ret;
68
69 if (!addr || !(~addr))
70 return -ENODEV;
71
72 base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB);
73 if (!base)
74 return -ENOMEM;
75
76 if (memcmp(base, OPREGION_SIGNATURE, 16)) {
77 memunmap(base);
78 return -EINVAL;
79 }
80
81 size = le32_to_cpu(*(__le32 *)(base + 16));
82 if (!size) {
83 memunmap(base);
84 return -EINVAL;
85 }
86
87 size *= 1024; /* In KB */
88
89 if (size != OPREGION_SIZE) {
90 memunmap(base);
91 base = memremap(addr, size, MEMREMAP_WB);
92 if (!base)
93 return -ENOMEM;
94 }
95
96 ret = vfio_pci_register_dev_region(vdev,
97 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
98 VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
99 &vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base);
100 if (ret) {
101 memunmap(base);
102 return ret;
103 }
104
105 /* Fill vconfig with the hw value and virtualize register */
106 *dwordp = cpu_to_le32(addr);
107 memset(vdev->pci_config_map + OPREGION_PCI_ADDR,
108 PCI_CAP_ID_INVALID_VIRT, 4);
109
110 return ret;
111}
112
113static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
114 char __user *buf, size_t count, loff_t *ppos,
115 bool iswrite)
116{
117 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
118 struct pci_dev *pdev = vdev->region[i].data;
119 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
120 size_t size;
121 int ret;
122
123 if (pos >= vdev->region[i].size || iswrite)
124 return -EINVAL;
125
126 size = count = min(count, (size_t)(vdev->region[i].size - pos));
127
128 if ((pos & 1) && size) {
129 u8 val;
130
131 ret = pci_user_read_config_byte(pdev, pos, &val);
132 if (ret)
133 return pcibios_err_to_errno(ret);
134
135 if (copy_to_user(buf + count - size, &val, 1))
136 return -EFAULT;
137
138 pos++;
139 size--;
140 }
141
142 if ((pos & 3) && size > 2) {
143 u16 val;
144
145 ret = pci_user_read_config_word(pdev, pos, &val);
146 if (ret)
147 return pcibios_err_to_errno(ret);
148
149 val = cpu_to_le16(val);
150 if (copy_to_user(buf + count - size, &val, 2))
151 return -EFAULT;
152
153 pos += 2;
154 size -= 2;
155 }
156
157 while (size > 3) {
158 u32 val;
159
160 ret = pci_user_read_config_dword(pdev, pos, &val);
161 if (ret)
162 return pcibios_err_to_errno(ret);
163
164 val = cpu_to_le32(val);
165 if (copy_to_user(buf + count - size, &val, 4))
166 return -EFAULT;
167
168 pos += 4;
169 size -= 4;
170 }
171
172 while (size >= 2) {
173 u16 val;
174
175 ret = pci_user_read_config_word(pdev, pos, &val);
176 if (ret)
177 return pcibios_err_to_errno(ret);
178
179 val = cpu_to_le16(val);
180 if (copy_to_user(buf + count - size, &val, 2))
181 return -EFAULT;
182
183 pos += 2;
184 size -= 2;
185 }
186
187 while (size) {
188 u8 val;
189
190 ret = pci_user_read_config_byte(pdev, pos, &val);
191 if (ret)
192 return pcibios_err_to_errno(ret);
193
194 if (copy_to_user(buf + count - size, &val, 1))
195 return -EFAULT;
196
197 pos++;
198 size--;
199 }
200
201 *ppos += count;
202
203 return count;
204}
205
206static void vfio_pci_igd_cfg_release(struct vfio_pci_device *vdev,
207 struct vfio_pci_region *region)
208{
209 struct pci_dev *pdev = region->data;
210
211 pci_dev_put(pdev);
212}
213
214static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = {
215 .rw = vfio_pci_igd_cfg_rw,
216 .release = vfio_pci_igd_cfg_release,
217};
218
219static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev)
220{
221 struct pci_dev *host_bridge, *lpc_bridge;
222 int ret;
223
224 host_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
225 if (!host_bridge)
226 return -ENODEV;
227
228 if (host_bridge->vendor != PCI_VENDOR_ID_INTEL ||
229 host_bridge->class != (PCI_CLASS_BRIDGE_HOST << 8)) {
230 pci_dev_put(host_bridge);
231 return -EINVAL;
232 }
233
234 ret = vfio_pci_register_dev_region(vdev,
235 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
236 VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG,
237 &vfio_pci_igd_cfg_regops, host_bridge->cfg_size,
238 VFIO_REGION_INFO_FLAG_READ, host_bridge);
239 if (ret) {
240 pci_dev_put(host_bridge);
241 return ret;
242 }
243
244 lpc_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x1f, 0));
245 if (!lpc_bridge)
246 return -ENODEV;
247
248 if (lpc_bridge->vendor != PCI_VENDOR_ID_INTEL ||
249 lpc_bridge->class != (PCI_CLASS_BRIDGE_ISA << 8)) {
250 pci_dev_put(lpc_bridge);
251 return -EINVAL;
252 }
253
254 ret = vfio_pci_register_dev_region(vdev,
255 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
256 VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG,
257 &vfio_pci_igd_cfg_regops, lpc_bridge->cfg_size,
258 VFIO_REGION_INFO_FLAG_READ, lpc_bridge);
259 if (ret) {
260 pci_dev_put(lpc_bridge);
261 return ret;
262 }
263
264 return 0;
265}
266
267int vfio_pci_igd_init(struct vfio_pci_device *vdev)
268{
269 int ret;
270
271 ret = vfio_pci_igd_opregion_init(vdev);
272 if (ret)
273 return ret;
274
275 ret = vfio_pci_igd_cfg_init(vdev);
276 if (ret)
277 return ret;
278
279 return 0;
280}
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 3b3ba15558b7..e9ea3fef144a 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -309,14 +309,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
309 int vector, int fd, bool msix) 309 int vector, int fd, bool msix)
310{ 310{
311 struct pci_dev *pdev = vdev->pdev; 311 struct pci_dev *pdev = vdev->pdev;
312 int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
313 char *name = msix ? "vfio-msix" : "vfio-msi";
314 struct eventfd_ctx *trigger; 312 struct eventfd_ctx *trigger;
315 int ret; 313 int irq, ret;
316 314
317 if (vector >= vdev->num_ctx) 315 if (vector < 0 || vector >= vdev->num_ctx)
318 return -EINVAL; 316 return -EINVAL;
319 317
318 irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
319
320 if (vdev->ctx[vector].trigger) { 320 if (vdev->ctx[vector].trigger) {
321 free_irq(irq, vdev->ctx[vector].trigger); 321 free_irq(irq, vdev->ctx[vector].trigger);
322 irq_bypass_unregister_producer(&vdev->ctx[vector].producer); 322 irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
@@ -328,8 +328,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
328 if (fd < 0) 328 if (fd < 0)
329 return 0; 329 return 0;
330 330
331 vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)", 331 vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
332 name, vector, pci_name(pdev)); 332 msix ? "x" : "", vector,
333 pci_name(pdev));
333 if (!vdev->ctx[vector].name) 334 if (!vdev->ctx[vector].name)
334 return -ENOMEM; 335 return -ENOMEM;
335 336
@@ -379,7 +380,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
379{ 380{
380 int i, j, ret = 0; 381 int i, j, ret = 0;
381 382
382 if (start + count > vdev->num_ctx) 383 if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
383 return -EINVAL; 384 return -EINVAL;
384 385
385 for (i = 0, j = start; i < count && !ret; i++, j++) { 386 for (i = 0, j = start; i < count && !ret; i++, j++) {
@@ -388,7 +389,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
388 } 389 }
389 390
390 if (ret) { 391 if (ret) {
391 for (--j; j >= start; j--) 392 for (--j; j >= (int)start; j--)
392 vfio_msi_set_vector_signal(vdev, j, -1, msix); 393 vfio_msi_set_vector_signal(vdev, j, -1, msix);
393 } 394 }
394 395
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 0e7394f8f69b..8a7d546d18a0 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -14,6 +14,7 @@
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/pci.h> 15#include <linux/pci.h>
16#include <linux/irqbypass.h> 16#include <linux/irqbypass.h>
17#include <linux/types.h>
17 18
18#ifndef VFIO_PCI_PRIVATE_H 19#ifndef VFIO_PCI_PRIVATE_H
19#define VFIO_PCI_PRIVATE_H 20#define VFIO_PCI_PRIVATE_H
@@ -24,6 +25,10 @@
24#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) 25#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
25#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) 26#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
26 27
28/* Special capability IDs predefined access */
29#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
30#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
31
27struct vfio_pci_irq_ctx { 32struct vfio_pci_irq_ctx {
28 struct eventfd_ctx *trigger; 33 struct eventfd_ctx *trigger;
29 struct virqfd *unmask; 34 struct virqfd *unmask;
@@ -33,6 +38,25 @@ struct vfio_pci_irq_ctx {
33 struct irq_bypass_producer producer; 38 struct irq_bypass_producer producer;
34}; 39};
35 40
41struct vfio_pci_device;
42struct vfio_pci_region;
43
44struct vfio_pci_regops {
45 size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
46 size_t count, loff_t *ppos, bool iswrite);
47 void (*release)(struct vfio_pci_device *vdev,
48 struct vfio_pci_region *region);
49};
50
51struct vfio_pci_region {
52 u32 type;
53 u32 subtype;
54 const struct vfio_pci_regops *ops;
55 void *data;
56 size_t size;
57 u32 flags;
58};
59
36struct vfio_pci_device { 60struct vfio_pci_device {
37 struct pci_dev *pdev; 61 struct pci_dev *pdev;
38 void __iomem *barmap[PCI_STD_RESOURCE_END + 1]; 62 void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
@@ -45,6 +69,8 @@ struct vfio_pci_device {
45 struct vfio_pci_irq_ctx *ctx; 69 struct vfio_pci_irq_ctx *ctx;
46 int num_ctx; 70 int num_ctx;
47 int irq_type; 71 int irq_type;
72 int num_regions;
73 struct vfio_pci_region *region;
48 u8 msi_qmax; 74 u8 msi_qmax;
49 u8 msix_bar; 75 u8 msix_bar;
50 u16 msix_size; 76 u16 msix_size;
@@ -91,4 +117,17 @@ extern void vfio_pci_uninit_perm_bits(void);
91 117
92extern int vfio_config_init(struct vfio_pci_device *vdev); 118extern int vfio_config_init(struct vfio_pci_device *vdev);
93extern void vfio_config_free(struct vfio_pci_device *vdev); 119extern void vfio_config_free(struct vfio_pci_device *vdev);
120
121extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
122 unsigned int type, unsigned int subtype,
123 const struct vfio_pci_regops *ops,
124 size_t size, u32 flags, void *data);
125#ifdef CONFIG_VFIO_PCI_IGD
126extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
127#else
128static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
129{
130 return -ENODEV;
131}
132#endif
94#endif /* VFIO_PCI_PRIVATE_H */ 133#endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 210db24d2204..5ffd1d9ad4bd 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -124,11 +124,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
124 void __iomem *io; 124 void __iomem *io;
125 ssize_t done; 125 ssize_t done;
126 126
127 if (!pci_resource_start(pdev, bar)) 127 if (pci_resource_start(pdev, bar))
128 end = pci_resource_len(pdev, bar);
129 else if (bar == PCI_ROM_RESOURCE &&
130 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
131 end = 0x20000;
132 else
128 return -EINVAL; 133 return -EINVAL;
129 134
130 end = pci_resource_len(pdev, bar);
131
132 if (pos >= end) 135 if (pos >= end)
133 return -EINVAL; 136 return -EINVAL;
134 137