aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYongji Xie <xyjxie@linux.vnet.ibm.com>2016-06-30 03:21:24 -0400
committerAlex Williamson <alex.williamson@redhat.com>2016-07-08 12:06:04 -0400
commit05f0c03fbac1819e86c9d5db4e208b68fc1b9b5e (patch)
tree9bd1c755a90f8f2cc6d34b7e191f3b6b92207a49
parent9698cbf0bea6b9f5c3190ce97bdf8963c0148671 (diff)
vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
Current vfio-pci implementation disallows to mmap sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio page may be shared with other BARs. This will cause some performance issues when we passthrough a PCI device with this kind of BARs. Guest will be not able to handle the mmio accesses to the BARs which leads to mmio emulations in host. However, not all sub-page BARs will share page with other BARs. We should allow to mmap the sub-page MMIO BARs which we can make sure will not share page with other BARs. This patch adds support for this case. And we try to add a dummy resource to reserve the remainder of the page which hot-add device's BAR might be assigned into. But it's not necessary to handle the case when the BAR is not page aligned. Because we can't expect the BAR will be assigned into the same location in a page in guest when we passthrough the BAR. And it's hard to access this BAR in userspace because we have no way to get the BAR's location in a page. Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
-rw-r--r--drivers/vfio/pci/vfio_pci.c88
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h8
2 files changed, 90 insertions, 6 deletions
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 188b1ff03f5f..d624a527777f 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -110,6 +110,74 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
110 return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; 110 return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
111} 111}
112 112
113static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
114{
115 struct resource *res;
116 int bar;
117 struct vfio_pci_dummy_resource *dummy_res;
118
119 INIT_LIST_HEAD(&vdev->dummy_resources_list);
120
121 for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
122 res = vdev->pdev->resource + bar;
123
124 if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
125 goto no_mmap;
126
127 if (!(res->flags & IORESOURCE_MEM))
128 goto no_mmap;
129
130 /*
131 * The PCI core shouldn't set up a resource with a
132 * type but zero size. But there may be bugs that
133 * cause us to do that.
134 */
135 if (!resource_size(res))
136 goto no_mmap;
137
138 if (resource_size(res) >= PAGE_SIZE) {
139 vdev->bar_mmap_supported[bar] = true;
140 continue;
141 }
142
143 if (!(res->start & ~PAGE_MASK)) {
144 /*
145 * Add a dummy resource to reserve the remainder
146 * of the exclusive page in case that hot-add
147 * device's bar is assigned into it.
148 */
149 dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
150 if (dummy_res == NULL)
151 goto no_mmap;
152
153 dummy_res->resource.name = "vfio sub-page reserved";
154 dummy_res->resource.start = res->end + 1;
155 dummy_res->resource.end = res->start + PAGE_SIZE - 1;
156 dummy_res->resource.flags = res->flags;
157 if (request_resource(res->parent,
158 &dummy_res->resource)) {
159 kfree(dummy_res);
160 goto no_mmap;
161 }
162 dummy_res->index = bar;
163 list_add(&dummy_res->res_next,
164 &vdev->dummy_resources_list);
165 vdev->bar_mmap_supported[bar] = true;
166 continue;
167 }
168 /*
169 * Here we don't handle the case when the BAR is not page
170 * aligned because we can't expect the BAR will be
171 * assigned into the same location in a page in guest
172 * when we passthrough the BAR. And it's hard to access
173 * this BAR in userspace because we have no way to get
174 * the BAR's location in a page.
175 */
176no_mmap:
177 vdev->bar_mmap_supported[bar] = false;
178 }
179}
180
113static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); 181static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
114static void vfio_pci_disable(struct vfio_pci_device *vdev); 182static void vfio_pci_disable(struct vfio_pci_device *vdev);
115 183
@@ -218,12 +286,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
218 } 286 }
219 } 287 }
220 288
289 vfio_pci_probe_mmaps(vdev);
290
221 return 0; 291 return 0;
222} 292}
223 293
224static void vfio_pci_disable(struct vfio_pci_device *vdev) 294static void vfio_pci_disable(struct vfio_pci_device *vdev)
225{ 295{
226 struct pci_dev *pdev = vdev->pdev; 296 struct pci_dev *pdev = vdev->pdev;
297 struct vfio_pci_dummy_resource *dummy_res, *tmp;
227 int i, bar; 298 int i, bar;
228 299
229 /* Stop the device from further DMA */ 300 /* Stop the device from further DMA */
@@ -252,6 +323,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
252 vdev->barmap[bar] = NULL; 323 vdev->barmap[bar] = NULL;
253 } 324 }
254 325
326 list_for_each_entry_safe(dummy_res, tmp,
327 &vdev->dummy_resources_list, res_next) {
328 list_del(&dummy_res->res_next);
329 release_resource(&dummy_res->resource);
330 kfree(dummy_res);
331 }
332
255 vdev->needs_reset = true; 333 vdev->needs_reset = true;
256 334
257 /* 335 /*
@@ -623,9 +701,7 @@ static long vfio_pci_ioctl(void *device_data,
623 701
624 info.flags = VFIO_REGION_INFO_FLAG_READ | 702 info.flags = VFIO_REGION_INFO_FLAG_READ |
625 VFIO_REGION_INFO_FLAG_WRITE; 703 VFIO_REGION_INFO_FLAG_WRITE;
626 if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && 704 if (vdev->bar_mmap_supported[info.index]) {
627 pci_resource_flags(pdev, info.index) &
628 IORESOURCE_MEM && info.size >= PAGE_SIZE) {
629 info.flags |= VFIO_REGION_INFO_FLAG_MMAP; 705 info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
630 if (info.index == vdev->msix_bar) { 706 if (info.index == vdev->msix_bar) {
631 ret = msix_sparse_mmap_cap(vdev, &caps); 707 ret = msix_sparse_mmap_cap(vdev, &caps);
@@ -1049,16 +1125,16 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
1049 return -EINVAL; 1125 return -EINVAL;
1050 if (index >= VFIO_PCI_ROM_REGION_INDEX) 1126 if (index >= VFIO_PCI_ROM_REGION_INDEX)
1051 return -EINVAL; 1127 return -EINVAL;
1052 if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM)) 1128 if (!vdev->bar_mmap_supported[index])
1053 return -EINVAL; 1129 return -EINVAL;
1054 1130
1055 phys_len = pci_resource_len(pdev, index); 1131 phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
1056 req_len = vma->vm_end - vma->vm_start; 1132 req_len = vma->vm_end - vma->vm_start;
1057 pgoff = vma->vm_pgoff & 1133 pgoff = vma->vm_pgoff &
1058 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); 1134 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1059 req_start = pgoff << PAGE_SHIFT; 1135 req_start = pgoff << PAGE_SHIFT;
1060 1136
1061 if (phys_len < PAGE_SIZE || req_start + req_len > phys_len) 1137 if (req_start + req_len > phys_len)
1062 return -EINVAL; 1138 return -EINVAL;
1063 1139
1064 if (index == vdev->msix_bar) { 1140 if (index == vdev->msix_bar) {
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 016c14a1b454..2128de86c80d 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -57,9 +57,16 @@ struct vfio_pci_region {
57 u32 flags; 57 u32 flags;
58}; 58};
59 59
60struct vfio_pci_dummy_resource {
61 struct resource resource;
62 int index;
63 struct list_head res_next;
64};
65
60struct vfio_pci_device { 66struct vfio_pci_device {
61 struct pci_dev *pdev; 67 struct pci_dev *pdev;
62 void __iomem *barmap[PCI_STD_RESOURCE_END + 1]; 68 void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
69 bool bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
63 u8 *pci_config_map; 70 u8 *pci_config_map;
64 u8 *vconfig; 71 u8 *vconfig;
65 struct perm_bits *msi_perm; 72 struct perm_bits *msi_perm;
@@ -88,6 +95,7 @@ struct vfio_pci_device {
88 int refcnt; 95 int refcnt;
89 struct eventfd_ctx *err_trigger; 96 struct eventfd_ctx *err_trigger;
90 struct eventfd_ctx *req_trigger; 97 struct eventfd_ctx *req_trigger;
98 struct list_head dummy_resources_list;
91}; 99};
92 100
93#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) 101#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)