aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/vfio/pci/Kconfig4
-rw-r--r--drivers/vfio/pci/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c175
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c45
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c280
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c17
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h39
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c9
-rw-r--r--drivers/vfio/vfio.c70
-rw-r--r--include/linux/vfio.h11
-rw-r--r--include/uapi/linux/vfio.h92
11 files changed, 706 insertions, 37 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 02912f180c6d..24ee2605b9f0 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -26,3 +26,7 @@ config VFIO_PCI_MMAP
26config VFIO_PCI_INTX 26config VFIO_PCI_INTX
27 depends on VFIO_PCI 27 depends on VFIO_PCI
28 def_bool y if !S390 28 def_bool y if !S390
29
30config VFIO_PCI_IGD
31 depends on VFIO_PCI
32 def_bool y if X86
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 131079255fd9..76d8ec058edd 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,4 +1,5 @@
1 1
2vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o 2vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
3vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
3 4
4obj-$(CONFIG_VFIO_PCI) += vfio-pci.o 5obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 8c80a48e3233..712a84978e97 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -111,6 +111,7 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
111} 111}
112 112
113static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); 113static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
114static void vfio_pci_disable(struct vfio_pci_device *vdev);
114 115
115static int vfio_pci_enable(struct vfio_pci_device *vdev) 116static int vfio_pci_enable(struct vfio_pci_device *vdev)
116{ 117{
@@ -169,13 +170,26 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
169 if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) 170 if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
170 vdev->has_vga = true; 171 vdev->has_vga = true;
171 172
173
174 if (vfio_pci_is_vga(pdev) &&
175 pdev->vendor == PCI_VENDOR_ID_INTEL &&
176 IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
177 ret = vfio_pci_igd_init(vdev);
178 if (ret) {
179 dev_warn(&vdev->pdev->dev,
180 "Failed to setup Intel IGD regions\n");
181 vfio_pci_disable(vdev);
182 return ret;
183 }
184 }
185
172 return 0; 186 return 0;
173} 187}
174 188
175static void vfio_pci_disable(struct vfio_pci_device *vdev) 189static void vfio_pci_disable(struct vfio_pci_device *vdev)
176{ 190{
177 struct pci_dev *pdev = vdev->pdev; 191 struct pci_dev *pdev = vdev->pdev;
178 int bar; 192 int i, bar;
179 193
180 /* Stop the device from further DMA */ 194 /* Stop the device from further DMA */
181 pci_clear_master(pdev); 195 pci_clear_master(pdev);
@@ -186,6 +200,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
186 200
187 vdev->virq_disabled = false; 201 vdev->virq_disabled = false;
188 202
203 for (i = 0; i < vdev->num_regions; i++)
204 vdev->region[i].ops->release(vdev, &vdev->region[i]);
205
206 vdev->num_regions = 0;
207 kfree(vdev->region);
208 vdev->region = NULL; /* don't krealloc a freed pointer */
209
189 vfio_config_free(vdev); 210 vfio_config_free(vdev);
190 211
191 for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { 212 for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
@@ -421,6 +442,93 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
421 return walk.ret; 442 return walk.ret;
422} 443}
423 444
445static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
446 struct vfio_info_cap *caps)
447{
448 struct vfio_info_cap_header *header;
449 struct vfio_region_info_cap_sparse_mmap *sparse;
450 size_t end, size;
451 int nr_areas = 2, i = 0;
452
453 end = pci_resource_len(vdev->pdev, vdev->msix_bar);
454
455 /* If MSI-X table is aligned to the start or end, only one area */
456 if (((vdev->msix_offset & PAGE_MASK) == 0) ||
457 (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
458 nr_areas = 1;
459
460 size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
461
462 header = vfio_info_cap_add(caps, size,
463 VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
464 if (IS_ERR(header))
465 return PTR_ERR(header);
466
467 sparse = container_of(header,
468 struct vfio_region_info_cap_sparse_mmap, header);
469 sparse->nr_areas = nr_areas;
470
471 if (vdev->msix_offset & PAGE_MASK) {
472 sparse->areas[i].offset = 0;
473 sparse->areas[i].size = vdev->msix_offset & PAGE_MASK;
474 i++;
475 }
476
477 if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) {
478 sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset +
479 vdev->msix_size);
480 sparse->areas[i].size = end - sparse->areas[i].offset;
481 i++;
482 }
483
484 return 0;
485}
486
487static int region_type_cap(struct vfio_pci_device *vdev,
488 struct vfio_info_cap *caps,
489 unsigned int type, unsigned int subtype)
490{
491 struct vfio_info_cap_header *header;
492 struct vfio_region_info_cap_type *cap;
493
494 header = vfio_info_cap_add(caps, sizeof(*cap),
495 VFIO_REGION_INFO_CAP_TYPE, 1);
496 if (IS_ERR(header))
497 return PTR_ERR(header);
498
499 cap = container_of(header, struct vfio_region_info_cap_type, header);
500 cap->type = type;
501 cap->subtype = subtype;
502
503 return 0;
504}
505
506int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
507 unsigned int type, unsigned int subtype,
508 const struct vfio_pci_regops *ops,
509 size_t size, u32 flags, void *data)
510{
511 struct vfio_pci_region *region;
512
513 region = krealloc(vdev->region,
514 (vdev->num_regions + 1) * sizeof(*region),
515 GFP_KERNEL);
516 if (!region)
517 return -ENOMEM;
518
519 vdev->region = region;
520 vdev->region[vdev->num_regions].type = type;
521 vdev->region[vdev->num_regions].subtype = subtype;
522 vdev->region[vdev->num_regions].ops = ops;
523 vdev->region[vdev->num_regions].size = size;
524 vdev->region[vdev->num_regions].flags = flags;
525 vdev->region[vdev->num_regions].data = data;
526
527 vdev->num_regions++;
528
529 return 0;
530}
531
424static long vfio_pci_ioctl(void *device_data, 532static long vfio_pci_ioctl(void *device_data,
425 unsigned int cmd, unsigned long arg) 533 unsigned int cmd, unsigned long arg)
426{ 534{
@@ -443,7 +551,7 @@ static long vfio_pci_ioctl(void *device_data,
443 if (vdev->reset_works) 551 if (vdev->reset_works)
444 info.flags |= VFIO_DEVICE_FLAGS_RESET; 552 info.flags |= VFIO_DEVICE_FLAGS_RESET;
445 553
446 info.num_regions = VFIO_PCI_NUM_REGIONS; 554 info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
447 info.num_irqs = VFIO_PCI_NUM_IRQS; 555 info.num_irqs = VFIO_PCI_NUM_IRQS;
448 556
449 return copy_to_user((void __user *)arg, &info, minsz) ? 557 return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -452,6 +560,8 @@ static long vfio_pci_ioctl(void *device_data,
452 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { 560 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
453 struct pci_dev *pdev = vdev->pdev; 561 struct pci_dev *pdev = vdev->pdev;
454 struct vfio_region_info info; 562 struct vfio_region_info info;
563 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
564 int i, ret;
455 565
456 minsz = offsetofend(struct vfio_region_info, offset); 566 minsz = offsetofend(struct vfio_region_info, offset);
457 567
@@ -480,8 +590,15 @@ static long vfio_pci_ioctl(void *device_data,
480 VFIO_REGION_INFO_FLAG_WRITE; 590 VFIO_REGION_INFO_FLAG_WRITE;
481 if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && 591 if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
482 pci_resource_flags(pdev, info.index) & 592 pci_resource_flags(pdev, info.index) &
483 IORESOURCE_MEM && info.size >= PAGE_SIZE) 593 IORESOURCE_MEM && info.size >= PAGE_SIZE) {
484 info.flags |= VFIO_REGION_INFO_FLAG_MMAP; 594 info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
595 if (info.index == vdev->msix_bar) {
596 ret = msix_sparse_mmap_cap(vdev, &caps);
597 if (ret)
598 return ret;
599 }
600 }
601
485 break; 602 break;
486 case VFIO_PCI_ROM_REGION_INDEX: 603 case VFIO_PCI_ROM_REGION_INDEX:
487 { 604 {
@@ -493,8 +610,14 @@ static long vfio_pci_ioctl(void *device_data,
493 610
494 /* Report the BAR size, not the ROM size */ 611 /* Report the BAR size, not the ROM size */
495 info.size = pci_resource_len(pdev, info.index); 612 info.size = pci_resource_len(pdev, info.index);
496 if (!info.size) 613 if (!info.size) {
497 break; 614 /* Shadow ROMs appear as PCI option ROMs */
615 if (pdev->resource[PCI_ROM_RESOURCE].flags &
616 IORESOURCE_ROM_SHADOW)
617 info.size = 0x20000;
618 else
619 break;
620 }
498 621
499 /* Is it really there? */ 622 /* Is it really there? */
500 io = pci_map_rom(pdev, &size); 623 io = pci_map_rom(pdev, &size);
@@ -518,7 +641,40 @@ static long vfio_pci_ioctl(void *device_data,
518 641
519 break; 642 break;
520 default: 643 default:
521 return -EINVAL; 644 if (info.index >=
645 VFIO_PCI_NUM_REGIONS + vdev->num_regions)
646 return -EINVAL;
647
648 i = info.index - VFIO_PCI_NUM_REGIONS;
649
650 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
651 info.size = vdev->region[i].size;
652 info.flags = vdev->region[i].flags;
653
654 ret = region_type_cap(vdev, &caps,
655 vdev->region[i].type,
656 vdev->region[i].subtype);
657 if (ret)
658 return ret;
659 }
660
661 if (caps.size) {
662 info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
663 if (info.argsz < sizeof(info) + caps.size) {
664 info.argsz = sizeof(info) + caps.size;
665 info.cap_offset = 0;
666 } else {
667 vfio_info_cap_shift(&caps, sizeof(info));
668 if (copy_to_user((void __user *)arg +
669 sizeof(info), caps.buf,
670 caps.size)) {
671 kfree(caps.buf);
672 return -EFAULT;
673 }
674 info.cap_offset = sizeof(info);
675 }
676
677 kfree(caps.buf);
522 } 678 }
523 679
524 return copy_to_user((void __user *)arg, &info, minsz) ? 680 return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -798,7 +954,7 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
798 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 954 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
799 struct vfio_pci_device *vdev = device_data; 955 struct vfio_pci_device *vdev = device_data;
800 956
801 if (index >= VFIO_PCI_NUM_REGIONS) 957 if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
802 return -EINVAL; 958 return -EINVAL;
803 959
804 switch (index) { 960 switch (index) {
@@ -815,6 +971,10 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
815 971
816 case VFIO_PCI_VGA_REGION_INDEX: 972 case VFIO_PCI_VGA_REGION_INDEX:
817 return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); 973 return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
974 default:
975 index -= VFIO_PCI_NUM_REGIONS;
976 return vdev->region[index].ops->rw(vdev, buf,
977 count, ppos, iswrite);
818 } 978 }
819 979
820 return -EINVAL; 980 return -EINVAL;
@@ -997,6 +1157,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
997 return; 1157 return;
998 1158
999 vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); 1159 vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
1160 kfree(vdev->region);
1000 kfree(vdev); 1161 kfree(vdev);
1001 1162
1002 if (vfio_pci_is_vga(pdev)) { 1163 if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index fe2b470d7ec6..142c533efec7 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -33,9 +33,8 @@
33 33
34#define PCI_CFG_SPACE_SIZE 256 34#define PCI_CFG_SPACE_SIZE 256
35 35
36/* Useful "pseudo" capabilities */ 36/* Fake capability ID for standard config space */
37#define PCI_CAP_ID_BASIC 0 37#define PCI_CAP_ID_BASIC 0
38#define PCI_CAP_ID_INVALID 0xFF
39 38
40#define is_bar(offset) \ 39#define is_bar(offset) \
41 ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \ 40 ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \
@@ -301,6 +300,23 @@ static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos,
301 return count; 300 return count;
302} 301}
303 302
303/* Virt access uses only virtualization */
304static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos,
305 int count, struct perm_bits *perm,
306 int offset, __le32 val)
307{
308 memcpy(vdev->vconfig + pos, &val, count);
309 return count;
310}
311
312static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos,
313 int count, struct perm_bits *perm,
314 int offset, __le32 *val)
315{
316 memcpy(val, vdev->vconfig + pos, count);
317 return count;
318}
319
304/* Default capability regions to read-only, no-virtualization */ 320/* Default capability regions to read-only, no-virtualization */
305static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { 321static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = {
306 [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } 322 [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read }
@@ -319,6 +335,11 @@ static struct perm_bits unassigned_perms = {
319 .writefn = vfio_raw_config_write 335 .writefn = vfio_raw_config_write
320}; 336};
321 337
338static struct perm_bits virt_perms = {
339 .readfn = vfio_virt_config_read,
340 .writefn = vfio_virt_config_write
341};
342
322static void free_perm_bits(struct perm_bits *perm) 343static void free_perm_bits(struct perm_bits *perm)
323{ 344{
324 kfree(perm->virt); 345 kfree(perm->virt);
@@ -454,14 +475,19 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
454 bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; 475 bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS];
455 476
456 /* 477 /*
457 * NB. we expose the actual BAR size here, regardless of whether 478 * NB. REGION_INFO will have reported zero size if we weren't able
458 * we can read it. When we report the REGION_INFO for the ROM 479 * to read the ROM, but we still return the actual BAR size here if
459 * we report what PCI tells us is the actual ROM size. 480 * it exists (or the shadow ROM space).
460 */ 481 */
461 if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { 482 if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) {
462 mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); 483 mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1);
463 mask |= PCI_ROM_ADDRESS_ENABLE; 484 mask |= PCI_ROM_ADDRESS_ENABLE;
464 *bar &= cpu_to_le32((u32)mask); 485 *bar &= cpu_to_le32((u32)mask);
486 } else if (pdev->resource[PCI_ROM_RESOURCE].flags &
487 IORESOURCE_ROM_SHADOW) {
488 mask = ~(0x20000 - 1);
489 mask |= PCI_ROM_ADDRESS_ENABLE;
490 *bar &= cpu_to_le32((u32)mask);
465 } else 491 } else
466 *bar = 0; 492 *bar = 0;
467 493
@@ -1332,6 +1358,8 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
1332 pos + i, map[pos + i], cap); 1358 pos + i, map[pos + i], cap);
1333 } 1359 }
1334 1360
1361 BUILD_BUG_ON(PCI_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT);
1362
1335 memset(map + pos, cap, len); 1363 memset(map + pos, cap, len);
1336 ret = vfio_fill_vconfig_bytes(vdev, pos, len); 1364 ret = vfio_fill_vconfig_bytes(vdev, pos, len);
1337 if (ret) 1365 if (ret)
@@ -1419,9 +1447,9 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
1419 /* 1447 /*
1420 * Even though ecap is 2 bytes, we're currently a long way 1448 * Even though ecap is 2 bytes, we're currently a long way
1421 * from exceeding 1 byte capabilities. If we ever make it 1449 * from exceeding 1 byte capabilities. If we ever make it
1422 * up to 0xFF we'll need to up this to a two-byte, byte map. 1450 * up to 0xFE we'll need to up this to a two-byte, byte map.
1423 */ 1451 */
1424 BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID); 1452 BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT);
1425 1453
1426 memset(map + epos, ecap, len); 1454 memset(map + epos, ecap, len);
1427 ret = vfio_fill_vconfig_bytes(vdev, epos, len); 1455 ret = vfio_fill_vconfig_bytes(vdev, epos, len);
@@ -1597,6 +1625,9 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
1597 if (cap_id == PCI_CAP_ID_INVALID) { 1625 if (cap_id == PCI_CAP_ID_INVALID) {
1598 perm = &unassigned_perms; 1626 perm = &unassigned_perms;
1599 cap_start = *ppos; 1627 cap_start = *ppos;
1628 } else if (cap_id == PCI_CAP_ID_INVALID_VIRT) {
1629 perm = &virt_perms;
1630 cap_start = *ppos;
1600 } else { 1631 } else {
1601 if (*ppos >= PCI_CFG_SPACE_SIZE) { 1632 if (*ppos >= PCI_CFG_SPACE_SIZE) {
1602 WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX); 1633 WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX);
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
new file mode 100644
index 000000000000..6394b168ef29
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -0,0 +1,280 @@
1/*
2 * VFIO PCI Intel Graphics support
3 *
4 * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Register a device specific region through which to provide read-only
12 * access to the Intel IGD opregion. The register defining the opregion
13 * address is also virtualized to prevent user modification.
14 */
15
16#include <linux/io.h>
17#include <linux/pci.h>
18#include <linux/uaccess.h>
19#include <linux/vfio.h>
20
21#include "vfio_pci_private.h"
22
23#define OPREGION_SIGNATURE "IntelGraphicsMem"
24#define OPREGION_SIZE (8 * 1024)
25#define OPREGION_PCI_ADDR 0xfc
26
27static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
28 size_t count, loff_t *ppos, bool iswrite)
29{
30 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
31 void *base = vdev->region[i].data;
32 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
33
34 if (pos >= vdev->region[i].size || iswrite)
35 return -EINVAL;
36
37 count = min(count, (size_t)(vdev->region[i].size - pos));
38
39 if (copy_to_user(buf, base + pos, count))
40 return -EFAULT;
41
42 *ppos += count;
43
44 return count;
45}
46
47static void vfio_pci_igd_release(struct vfio_pci_device *vdev,
48 struct vfio_pci_region *region)
49{
50 memunmap(region->data);
51}
52
53static const struct vfio_pci_regops vfio_pci_igd_regops = {
54 .rw = vfio_pci_igd_rw,
55 .release = vfio_pci_igd_release,
56};
57
58static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
59{
60 __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR);
61 u32 addr, size;
62 void *base;
63 int ret;
64
65 ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr);
66 if (ret)
67 return ret;
68
69 if (!addr || !(~addr))
70 return -ENODEV;
71
72 base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB);
73 if (!base)
74 return -ENOMEM;
75
76 if (memcmp(base, OPREGION_SIGNATURE, 16)) {
77 memunmap(base);
78 return -EINVAL;
79 }
80
81 size = le32_to_cpu(*(__le32 *)(base + 16));
82 if (!size) {
83 memunmap(base);
84 return -EINVAL;
85 }
86
87 size *= 1024; /* In KB */
88
89 if (size != OPREGION_SIZE) {
90 memunmap(base);
91 base = memremap(addr, size, MEMREMAP_WB);
92 if (!base)
93 return -ENOMEM;
94 }
95
96 ret = vfio_pci_register_dev_region(vdev,
97 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
98 VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
99 &vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base);
100 if (ret) {
101 memunmap(base);
102 return ret;
103 }
104
105 /* Fill vconfig with the hw value and virtualize register */
106 *dwordp = cpu_to_le32(addr);
107 memset(vdev->pci_config_map + OPREGION_PCI_ADDR,
108 PCI_CAP_ID_INVALID_VIRT, 4);
109
110 return ret;
111}
112
113static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
114 char __user *buf, size_t count, loff_t *ppos,
115 bool iswrite)
116{
117 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
118 struct pci_dev *pdev = vdev->region[i].data;
119 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
120 size_t size;
121 int ret;
122
123 if (pos >= vdev->region[i].size || iswrite)
124 return -EINVAL;
125
126 size = count = min(count, (size_t)(vdev->region[i].size - pos));
127
128 if ((pos & 1) && size) {
129 u8 val;
130
131 ret = pci_user_read_config_byte(pdev, pos, &val);
132 if (ret)
133 return pcibios_err_to_errno(ret);
134
135 if (copy_to_user(buf + count - size, &val, 1))
136 return -EFAULT;
137
138 pos++;
139 size--;
140 }
141
142 if ((pos & 3) && size > 2) {
143 u16 val;
144
145 ret = pci_user_read_config_word(pdev, pos, &val);
146 if (ret)
147 return pcibios_err_to_errno(ret);
148
149 val = cpu_to_le16(val);
150 if (copy_to_user(buf + count - size, &val, 2))
151 return -EFAULT;
152
153 pos += 2;
154 size -= 2;
155 }
156
157 while (size > 3) {
158 u32 val;
159
160 ret = pci_user_read_config_dword(pdev, pos, &val);
161 if (ret)
162 return pcibios_err_to_errno(ret);
163
164 val = cpu_to_le32(val);
165 if (copy_to_user(buf + count - size, &val, 4))
166 return -EFAULT;
167
168 pos += 4;
169 size -= 4;
170 }
171
172 while (size >= 2) {
173 u16 val;
174
175 ret = pci_user_read_config_word(pdev, pos, &val);
176 if (ret)
177 return pcibios_err_to_errno(ret);
178
179 val = cpu_to_le16(val);
180 if (copy_to_user(buf + count - size, &val, 2))
181 return -EFAULT;
182
183 pos += 2;
184 size -= 2;
185 }
186
187 while (size) {
188 u8 val;
189
190 ret = pci_user_read_config_byte(pdev, pos, &val);
191 if (ret)
192 return pcibios_err_to_errno(ret);
193
194 if (copy_to_user(buf + count - size, &val, 1))
195 return -EFAULT;
196
197 pos++;
198 size--;
199 }
200
201 *ppos += count;
202
203 return count;
204}
205
206static void vfio_pci_igd_cfg_release(struct vfio_pci_device *vdev,
207 struct vfio_pci_region *region)
208{
209 struct pci_dev *pdev = region->data;
210
211 pci_dev_put(pdev);
212}
213
214static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = {
215 .rw = vfio_pci_igd_cfg_rw,
216 .release = vfio_pci_igd_cfg_release,
217};
218
219static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev)
220{
221 struct pci_dev *host_bridge, *lpc_bridge;
222 int ret;
223
224 host_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
225 if (!host_bridge)
226 return -ENODEV;
227
228 if (host_bridge->vendor != PCI_VENDOR_ID_INTEL ||
229 host_bridge->class != (PCI_CLASS_BRIDGE_HOST << 8)) {
230 pci_dev_put(host_bridge);
231 return -EINVAL;
232 }
233
234 ret = vfio_pci_register_dev_region(vdev,
235 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
236 VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG,
237 &vfio_pci_igd_cfg_regops, host_bridge->cfg_size,
238 VFIO_REGION_INFO_FLAG_READ, host_bridge);
239 if (ret) {
240 pci_dev_put(host_bridge);
241 return ret;
242 }
243
244 lpc_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x1f, 0));
245 if (!lpc_bridge)
246 return -ENODEV;
247
248 if (lpc_bridge->vendor != PCI_VENDOR_ID_INTEL ||
249 lpc_bridge->class != (PCI_CLASS_BRIDGE_ISA << 8)) {
250 pci_dev_put(lpc_bridge);
251 return -EINVAL;
252 }
253
254 ret = vfio_pci_register_dev_region(vdev,
255 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
256 VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG,
257 &vfio_pci_igd_cfg_regops, lpc_bridge->cfg_size,
258 VFIO_REGION_INFO_FLAG_READ, lpc_bridge);
259 if (ret) {
260 pci_dev_put(lpc_bridge);
261 return ret;
262 }
263
264 return 0;
265}
266
267int vfio_pci_igd_init(struct vfio_pci_device *vdev)
268{
269 int ret;
270
271 ret = vfio_pci_igd_opregion_init(vdev);
272 if (ret)
273 return ret;
274
275 ret = vfio_pci_igd_cfg_init(vdev);
276 if (ret)
277 return ret;
278
279 return 0;
280}
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 3b3ba15558b7..e9ea3fef144a 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -309,14 +309,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
309 int vector, int fd, bool msix) 309 int vector, int fd, bool msix)
310{ 310{
311 struct pci_dev *pdev = vdev->pdev; 311 struct pci_dev *pdev = vdev->pdev;
312 int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
313 char *name = msix ? "vfio-msix" : "vfio-msi";
314 struct eventfd_ctx *trigger; 312 struct eventfd_ctx *trigger;
315 int ret; 313 int irq, ret;
316 314
317 if (vector >= vdev->num_ctx) 315 if (vector < 0 || vector >= vdev->num_ctx)
318 return -EINVAL; 316 return -EINVAL;
319 317
318 irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
319
320 if (vdev->ctx[vector].trigger) { 320 if (vdev->ctx[vector].trigger) {
321 free_irq(irq, vdev->ctx[vector].trigger); 321 free_irq(irq, vdev->ctx[vector].trigger);
322 irq_bypass_unregister_producer(&vdev->ctx[vector].producer); 322 irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
@@ -328,8 +328,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
328 if (fd < 0) 328 if (fd < 0)
329 return 0; 329 return 0;
330 330
331 vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)", 331 vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
332 name, vector, pci_name(pdev)); 332 msix ? "x" : "", vector,
333 pci_name(pdev));
333 if (!vdev->ctx[vector].name) 334 if (!vdev->ctx[vector].name)
334 return -ENOMEM; 335 return -ENOMEM;
335 336
@@ -379,7 +380,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
379{ 380{
380 int i, j, ret = 0; 381 int i, j, ret = 0;
381 382
382 if (start + count > vdev->num_ctx) 383 if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
383 return -EINVAL; 384 return -EINVAL;
384 385
385 for (i = 0, j = start; i < count && !ret; i++, j++) { 386 for (i = 0, j = start; i < count && !ret; i++, j++) {
@@ -388,7 +389,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
388 } 389 }
389 390
390 if (ret) { 391 if (ret) {
391 for (--j; j >= start; j--) 392 for (--j; j >= (int)start; j--)
392 vfio_msi_set_vector_signal(vdev, j, -1, msix); 393 vfio_msi_set_vector_signal(vdev, j, -1, msix);
393 } 394 }
394 395
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 0e7394f8f69b..8a7d546d18a0 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -14,6 +14,7 @@
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/pci.h> 15#include <linux/pci.h>
16#include <linux/irqbypass.h> 16#include <linux/irqbypass.h>
17#include <linux/types.h>
17 18
18#ifndef VFIO_PCI_PRIVATE_H 19#ifndef VFIO_PCI_PRIVATE_H
19#define VFIO_PCI_PRIVATE_H 20#define VFIO_PCI_PRIVATE_H
@@ -24,6 +25,10 @@
24#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) 25#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
25#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) 26#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
26 27
28/* Special capability IDs predefined access */
29#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
30#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
31
27struct vfio_pci_irq_ctx { 32struct vfio_pci_irq_ctx {
28 struct eventfd_ctx *trigger; 33 struct eventfd_ctx *trigger;
29 struct virqfd *unmask; 34 struct virqfd *unmask;
@@ -33,6 +38,25 @@ struct vfio_pci_irq_ctx {
33 struct irq_bypass_producer producer; 38 struct irq_bypass_producer producer;
34}; 39};
35 40
41struct vfio_pci_device;
42struct vfio_pci_region;
43
44struct vfio_pci_regops {
45 size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
46 size_t count, loff_t *ppos, bool iswrite);
47 void (*release)(struct vfio_pci_device *vdev,
48 struct vfio_pci_region *region);
49};
50
51struct vfio_pci_region {
52 u32 type;
53 u32 subtype;
54 const struct vfio_pci_regops *ops;
55 void *data;
56 size_t size;
57 u32 flags;
58};
59
36struct vfio_pci_device { 60struct vfio_pci_device {
37 struct pci_dev *pdev; 61 struct pci_dev *pdev;
38 void __iomem *barmap[PCI_STD_RESOURCE_END + 1]; 62 void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
@@ -45,6 +69,8 @@ struct vfio_pci_device {
45 struct vfio_pci_irq_ctx *ctx; 69 struct vfio_pci_irq_ctx *ctx;
46 int num_ctx; 70 int num_ctx;
47 int irq_type; 71 int irq_type;
72 int num_regions;
73 struct vfio_pci_region *region;
48 u8 msi_qmax; 74 u8 msi_qmax;
49 u8 msix_bar; 75 u8 msix_bar;
50 u16 msix_size; 76 u16 msix_size;
@@ -91,4 +117,17 @@ extern void vfio_pci_uninit_perm_bits(void);
91 117
92extern int vfio_config_init(struct vfio_pci_device *vdev); 118extern int vfio_config_init(struct vfio_pci_device *vdev);
93extern void vfio_config_free(struct vfio_pci_device *vdev); 119extern void vfio_config_free(struct vfio_pci_device *vdev);
120
121extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
122 unsigned int type, unsigned int subtype,
123 const struct vfio_pci_regops *ops,
124 size_t size, u32 flags, void *data);
125#ifdef CONFIG_VFIO_PCI_IGD
126extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
127#else
128static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
129{
130 return -ENODEV;
131}
132#endif
94#endif /* VFIO_PCI_PRIVATE_H */ 133#endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 210db24d2204..5ffd1d9ad4bd 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -124,11 +124,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
124 void __iomem *io; 124 void __iomem *io;
125 ssize_t done; 125 ssize_t done;
126 126
127 if (!pci_resource_start(pdev, bar)) 127 if (pci_resource_start(pdev, bar))
128 end = pci_resource_len(pdev, bar);
129 else if (bar == PCI_ROM_RESOURCE &&
130 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
131 end = 0x20000;
132 else
128 return -EINVAL; 133 return -EINVAL;
129 134
130 end = pci_resource_len(pdev, bar);
131
132 if (pos >= end) 135 if (pos >= end)
133 return -EINVAL; 136 return -EINVAL;
134 137
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index ecca316386f5..6fd6fa5469de 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1080,30 +1080,26 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
1080 continue; 1080 continue;
1081 } 1081 }
1082 1082
1083 /* module reference holds the driver we're working on */
1084 mutex_unlock(&vfio.iommu_drivers_lock);
1085
1086 data = driver->ops->open(arg); 1083 data = driver->ops->open(arg);
1087 if (IS_ERR(data)) { 1084 if (IS_ERR(data)) {
1088 ret = PTR_ERR(data); 1085 ret = PTR_ERR(data);
1089 module_put(driver->ops->owner); 1086 module_put(driver->ops->owner);
1090 goto skip_drivers_unlock; 1087 continue;
1091 } 1088 }
1092 1089
1093 ret = __vfio_container_attach_groups(container, driver, data); 1090 ret = __vfio_container_attach_groups(container, driver, data);
1094 if (!ret) { 1091 if (ret) {
1095 container->iommu_driver = driver;
1096 container->iommu_data = data;
1097 } else {
1098 driver->ops->release(data); 1092 driver->ops->release(data);
1099 module_put(driver->ops->owner); 1093 module_put(driver->ops->owner);
1094 continue;
1100 } 1095 }
1101 1096
1102 goto skip_drivers_unlock; 1097 container->iommu_driver = driver;
1098 container->iommu_data = data;
1099 break;
1103 } 1100 }
1104 1101
1105 mutex_unlock(&vfio.iommu_drivers_lock); 1102 mutex_unlock(&vfio.iommu_drivers_lock);
1106skip_drivers_unlock:
1107 up_write(&container->group_lock); 1103 up_write(&container->group_lock);
1108 1104
1109 return ret; 1105 return ret;
@@ -1733,6 +1729,60 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1733EXPORT_SYMBOL_GPL(vfio_external_check_extension); 1729EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1734 1730
1735/** 1731/**
1732 * Sub-module support
1733 */
1734/*
1735 * Helper for managing a buffer of info chain capabilities, allocate or
1736 * reallocate a buffer with additional @size, filling in @id and @version
1737 * of the capability. A pointer to the new capability is returned.
1738 *
1739 * NB. The chain is based at the head of the buffer, so new entries are
1740 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1741 * next offsets prior to copying to the user buffer.
1742 */
1743struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1744 size_t size, u16 id, u16 version)
1745{
1746 void *buf;
1747 struct vfio_info_cap_header *header, *tmp;
1748
1749 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1750 if (!buf) {
1751 kfree(caps->buf);
1752 caps->size = 0;
1753 return ERR_PTR(-ENOMEM);
1754 }
1755
1756 caps->buf = buf;
1757 header = buf + caps->size;
1758
1759 /* Eventually copied to user buffer, zero */
1760 memset(header, 0, size);
1761
1762 header->id = id;
1763 header->version = version;
1764
1765 /* Add to the end of the capability chain */
1766 for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next)
1767 ; /* nothing */
1768
1769 tmp->next = caps->size;
1770 caps->size += size;
1771
1772 return header;
1773}
1774EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1775
1776void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1777{
1778 struct vfio_info_cap_header *tmp;
1779
1780 for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset)
1781 tmp->next += offset;
1782}
1783EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
1784
1785/**
1736 * Module/class support 1786 * Module/class support
1737 */ 1787 */
1738static char *vfio_devnode(struct device *dev, umode_t *mode) 1788static char *vfio_devnode(struct device *dev, umode_t *mode)
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 610a86a892b8..0ecae0b1cd34 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -92,6 +92,17 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
92extern long vfio_external_check_extension(struct vfio_group *group, 92extern long vfio_external_check_extension(struct vfio_group *group,
93 unsigned long arg); 93 unsigned long arg);
94 94
95/*
96 * Sub-module helpers
97 */
98struct vfio_info_cap {
99 struct vfio_info_cap_header *buf;
100 size_t size;
101};
102extern struct vfio_info_cap_header *vfio_info_cap_add(
103 struct vfio_info_cap *caps, size_t size, u16 id, u16 version);
104extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
105
95struct pci_dev; 106struct pci_dev;
96#ifdef CONFIG_EEH 107#ifdef CONFIG_EEH
97extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); 108extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 7d7a4c6f2090..255a2113f53c 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -59,6 +59,33 @@
59#define VFIO_TYPE (';') 59#define VFIO_TYPE (';')
60#define VFIO_BASE 100 60#define VFIO_BASE 100
61 61
62/*
63 * For extension of INFO ioctls, VFIO makes use of a capability chain
64 * designed after PCI/e capabilities. A flag bit indicates whether
65 * this capability chain is supported and a field defined in the fixed
66 * structure defines the offset of the first capability in the chain.
67 * This field is only valid when the corresponding bit in the flags
68 * bitmap is set. This offset field is relative to the start of the
69 * INFO buffer, as is the next field within each capability header.
70 * The id within the header is a shared address space per INFO ioctl,
71 * while the version field is specific to the capability id. The
72 * contents following the header are specific to the capability id.
73 */
74struct vfio_info_cap_header {
75 __u16 id; /* Identifies capability */
76 __u16 version; /* Version specific to the capability ID */
77 __u32 next; /* Offset of next capability */
78};
79
80/*
81 * Callers of INFO ioctls passing insufficiently sized buffers will see
82 * the capability chain flag bit set, a zero value for the first capability
83 * offset (if available within the provided argsz), and argsz will be
84 * updated to report the necessary buffer size. For compatibility, the
85 * INFO ioctl will not report error in this case, but the capability chain
86 * will not be available.
87 */
88
62/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */ 89/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
63 90
64/** 91/**
@@ -194,13 +221,73 @@ struct vfio_region_info {
194#define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */ 221#define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */
195#define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */ 222#define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */
196#define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */ 223#define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */
224#define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */
197 __u32 index; /* Region index */ 225 __u32 index; /* Region index */
198 __u32 resv; /* Reserved for alignment */ 226 __u32 cap_offset; /* Offset within info struct of first cap */
199 __u64 size; /* Region size (bytes) */ 227 __u64 size; /* Region size (bytes) */
200 __u64 offset; /* Region offset from start of device fd */ 228 __u64 offset; /* Region offset from start of device fd */
201}; 229};
202#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) 230#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
203 231
232/*
233 * The sparse mmap capability allows finer granularity of specifying areas
234 * within a region with mmap support. When specified, the user should only
235 * mmap the offset ranges specified by the areas array. mmaps outside of the
236 * areas specified may fail (such as the range covering a PCI MSI-X table) or
237 * may result in improper device behavior.
238 *
239 * The structures below define version 1 of this capability.
240 */
241#define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1
242
243struct vfio_region_sparse_mmap_area {
244 __u64 offset; /* Offset of mmap'able area within region */
245 __u64 size; /* Size of mmap'able area */
246};
247
248struct vfio_region_info_cap_sparse_mmap {
249 struct vfio_info_cap_header header;
250 __u32 nr_areas;
251 __u32 reserved;
252 struct vfio_region_sparse_mmap_area areas[];
253};
254
255/*
256 * The device specific type capability allows regions unique to a specific
257 * device or class of devices to be exposed. This helps solve the problem for
258 * vfio bus drivers of defining which region indexes correspond to which region
259 * on the device, without needing to resort to static indexes, as done by
260 * vfio-pci. For instance, if we were to go back in time, we might remove
261 * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes
262 * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd
263 * make a "VGA" device specific type to describe the VGA access space. This
264 * means that non-VGA devices wouldn't need to waste this index, and thus the
265 * address space associated with it due to implementation of device file
266 * descriptor offsets in vfio-pci.
267 *
268 * The current implementation is now part of the user ABI, so we can't use this
269 * for VGA, but there are other upcoming use cases, such as opregions for Intel
270 * IGD devices and framebuffers for vGPU devices. We missed VGA, but we'll
271 * use this for future additions.
272 *
273 * The structure below defines version 1 of this capability.
274 */
275#define VFIO_REGION_INFO_CAP_TYPE 2
276
277struct vfio_region_info_cap_type {
278 struct vfio_info_cap_header header;
279 __u32 type; /* global per bus driver */
280 __u32 subtype; /* type specific */
281};
282
283#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31)
284#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
285
286/* 8086 Vendor sub-types */
287#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
288#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
289#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
290
204/** 291/**
205 * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, 292 * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
206 * struct vfio_irq_info) 293 * struct vfio_irq_info)
@@ -336,7 +423,8 @@ enum {
336 * between described ranges are unimplemented. 423 * between described ranges are unimplemented.
337 */ 424 */
338 VFIO_PCI_VGA_REGION_INDEX, 425 VFIO_PCI_VGA_REGION_INDEX,
339 VFIO_PCI_NUM_REGIONS 426 VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */
427 /* device specific cap to define content. */
340}; 428};
341 429
342enum { 430enum {