summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Kardashevskiy <aik@ozlabs.ru>2015-06-05 02:35:23 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2015-06-11 01:16:54 -0400
commit46d3e1e16294c587a74093b1f5474c1b33b72381 (patch)
tree6d10c41510f2d4f5c809d0838860f9b9d6a881c2
parent0054719386d96984153ad31d714a8be4ec7eba80 (diff)
vfio: powerpc/spapr: powerpc/powernv/ioda2: Use DMA windows API in ownership control
Before the IOMMU user (VFIO) would take control over the IOMMU table belonging to a specific IOMMU group. This approach did not allow sharing tables between IOMMU groups attached to the same container. This introduces a new IOMMU ownership flavour when the user can not just control the existing IOMMU table but remove/create tables on demand. If an IOMMU implements take/release_ownership() callbacks, this lets the user have full control over the IOMMU group. When the ownership is taken, the platform code removes all the windows so the caller must create them. Before returning the ownership back to the platform code, VFIO unprograms and removes all the tables it created. This changes IODA2's onwership handler to remove the existing table rather than manipulating with the existing one. From now on, iommu_take_ownership() and iommu_release_ownership() are only called from the vfio_iommu_spapr_tce driver. Old-style ownership is still supported allowing VFIO to run on older P5IOC2 and IODA IO controllers. No change in userspace-visible behaviour is expected. Since it recreates TCE tables on each ownership change, related kernel traces will appear more often. This adds a pnv_pci_ioda2_setup_default_config() which is called when PE is being configured at boot time and when the ownership is passed from VFIO to the platform code. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> [aw: for the vfio related changes] Acked-by: Alex Williamson <alex.williamson@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c101
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c88
2 files changed, 141 insertions, 48 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index a7e098dba23d..b9f0f430e249 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2073,6 +2073,49 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
2073 return 0; 2073 return 0;
2074} 2074}
2075 2075
2076static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
2077{
2078 struct iommu_table *tbl = NULL;
2079 long rc;
2080
2081 rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
2082 IOMMU_PAGE_SHIFT_4K,
2083 pe->table_group.tce32_size,
2084 POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
2085 if (rc) {
2086 pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
2087 rc);
2088 return rc;
2089 }
2090
2091 iommu_init_table(tbl, pe->phb->hose->node);
2092
2093 rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
2094 if (rc) {
2095 pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
2096 rc);
2097 pnv_ioda2_table_free(tbl);
2098 return rc;
2099 }
2100
2101 if (!pnv_iommu_bypass_disabled)
2102 pnv_pci_ioda2_set_bypass(pe, true);
2103
2104 /* OPAL variant of PHB3 invalidated TCEs */
2105 if (pe->phb->ioda.tce_inval_reg)
2106 tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
2107
2108 /*
2109 * Setting table base here only for carrying iommu_group
2110 * further down to let iommu_add_device() do the job.
2111 * pnv_pci_ioda_dma_dev_setup will override it later anyway.
2112 */
2113 if (pe->flags & PNV_IODA_PE_DEV)
2114 set_iommu_table_base(&pe->pdev->dev, tbl);
2115
2116 return 0;
2117}
2118
2076#ifdef CONFIG_IOMMU_API 2119#ifdef CONFIG_IOMMU_API
2077static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, 2120static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
2078 __u64 window_size, __u32 levels) 2121 __u64 window_size, __u32 levels)
@@ -2134,9 +2177,12 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
2134{ 2177{
2135 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, 2178 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
2136 table_group); 2179 table_group);
2180 /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
2181 struct iommu_table *tbl = pe->table_group.tables[0];
2137 2182
2138 iommu_take_ownership(table_group->tables[0]);
2139 pnv_pci_ioda2_set_bypass(pe, false); 2183 pnv_pci_ioda2_set_bypass(pe, false);
2184 pnv_pci_ioda2_unset_window(&pe->table_group, 0);
2185 pnv_ioda2_table_free(tbl);
2140} 2186}
2141 2187
2142static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) 2188static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
@@ -2144,8 +2190,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
2144 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, 2190 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
2145 table_group); 2191 table_group);
2146 2192
2147 iommu_release_ownership(table_group->tables[0]); 2193 pnv_pci_ioda2_setup_default_config(pe);
2148 pnv_pci_ioda2_set_bypass(pe, true);
2149} 2194}
2150 2195
2151static struct iommu_table_group_ops pnv_pci_ioda2_ops = { 2196static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2308,7 +2353,6 @@ static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
2308static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 2353static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
2309 struct pnv_ioda_pe *pe) 2354 struct pnv_ioda_pe *pe)
2310{ 2355{
2311 struct iommu_table *tbl = NULL;
2312 int64_t rc; 2356 int64_t rc;
2313 2357
2314 /* We shouldn't already have a 32-bit DMA associated */ 2358 /* We shouldn't already have a 32-bit DMA associated */
@@ -2333,58 +2377,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
2333 IOMMU_TABLE_GROUP_MAX_TABLES; 2377 IOMMU_TABLE_GROUP_MAX_TABLES;
2334 pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; 2378 pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
2335 pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M; 2379 pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M;
2336
2337 rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
2338 IOMMU_PAGE_SHIFT_4K,
2339 pe->table_group.tce32_size,
2340 POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
2341 if (rc) {
2342 pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc);
2343 goto fail;
2344 }
2345 pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
2346
2347 tbl->it_ops = &pnv_ioda2_iommu_ops;
2348 iommu_init_table(tbl, phb->hose->node);
2349#ifdef CONFIG_IOMMU_API 2380#ifdef CONFIG_IOMMU_API
2350 pe->table_group.ops = &pnv_pci_ioda2_ops; 2381 pe->table_group.ops = &pnv_pci_ioda2_ops;
2351#endif 2382#endif
2352 2383
2353 rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); 2384 rc = pnv_pci_ioda2_setup_default_config(pe);
2354 if (rc) { 2385 if (rc) {
2355 pe_err(pe, "Failed to configure 32-bit TCE table," 2386 if (pe->tce32_seg >= 0)
2356 " err %ld\n", rc); 2387 pe->tce32_seg = -1;
2357 goto fail; 2388 return;
2358 } 2389 }
2359 2390
2360 /* OPAL variant of PHB3 invalidated TCEs */ 2391 if (pe->flags & PNV_IODA_PE_DEV)
2361 if (phb->ioda.tce_inval_reg)
2362 tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
2363
2364 if (pe->flags & PNV_IODA_PE_DEV) {
2365 /*
2366 * Setting table base here only for carrying iommu_group
2367 * further down to let iommu_add_device() do the job.
2368 * pnv_pci_ioda_dma_dev_setup will override it later anyway.
2369 */
2370 set_iommu_table_base(&pe->pdev->dev, tbl);
2371 iommu_add_device(&pe->pdev->dev); 2392 iommu_add_device(&pe->pdev->dev);
2372 } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) 2393 else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
2373 pnv_ioda_setup_bus_dma(pe, pe->pbus); 2394 pnv_ioda_setup_bus_dma(pe, pe->pbus);
2374
2375 /* Also create a bypass window */
2376 if (!pnv_iommu_bypass_disabled)
2377 pnv_pci_ioda2_set_bypass(pe, true);
2378
2379 return;
2380fail:
2381 if (pe->tce32_seg >= 0)
2382 pe->tce32_seg = -1;
2383 if (tbl) {
2384 pnv_pci_ioda2_table_free_pages(tbl);
2385 pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
2386 iommu_free_table(tbl, "pnv");
2387 }
2388} 2395}
2389 2396
2390static void pnv_ioda_setup_dma(struct pnv_phb *phb) 2397static void pnv_ioda_setup_dma(struct pnv_phb *phb)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 6d919eb4251f..203caacf2242 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -333,6 +333,45 @@ static long tce_iommu_build(struct tce_container *container,
333 return ret; 333 return ret;
334} 334}
335 335
336static long tce_iommu_create_table(struct tce_container *container,
337 struct iommu_table_group *table_group,
338 int num,
339 __u32 page_shift,
340 __u64 window_size,
341 __u32 levels,
342 struct iommu_table **ptbl)
343{
344 long ret, table_size;
345
346 table_size = table_group->ops->get_table_size(page_shift, window_size,
347 levels);
348 if (!table_size)
349 return -EINVAL;
350
351 ret = try_increment_locked_vm(table_size >> PAGE_SHIFT);
352 if (ret)
353 return ret;
354
355 ret = table_group->ops->create_table(table_group, num,
356 page_shift, window_size, levels, ptbl);
357
358 WARN_ON(!ret && !(*ptbl)->it_ops->free);
359 WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
360
361 if (ret)
362 decrement_locked_vm(table_size >> PAGE_SHIFT);
363
364 return ret;
365}
366
367static void tce_iommu_free_table(struct iommu_table *tbl)
368{
369 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
370
371 tbl->it_ops->free(tbl);
372 decrement_locked_vm(pages);
373}
374
336static long tce_iommu_ioctl(void *iommu_data, 375static long tce_iommu_ioctl(void *iommu_data,
337 unsigned int cmd, unsigned long arg) 376 unsigned int cmd, unsigned long arg)
338{ 377{
@@ -546,15 +585,62 @@ static int tce_iommu_take_ownership(struct tce_container *container,
546static void tce_iommu_release_ownership_ddw(struct tce_container *container, 585static void tce_iommu_release_ownership_ddw(struct tce_container *container,
547 struct iommu_table_group *table_group) 586 struct iommu_table_group *table_group)
548{ 587{
588 long i;
589
590 if (!table_group->ops->unset_window) {
591 WARN_ON_ONCE(1);
592 return;
593 }
594
595 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
596 /* Store table pointer as unset_window resets it */
597 struct iommu_table *tbl = table_group->tables[i];
598
599 if (!tbl)
600 continue;
601
602 table_group->ops->unset_window(table_group, i);
603 tce_iommu_clear(container, tbl,
604 tbl->it_offset, tbl->it_size);
605 tce_iommu_free_table(tbl);
606 }
607
549 table_group->ops->release_ownership(table_group); 608 table_group->ops->release_ownership(table_group);
550} 609}
551 610
552static long tce_iommu_take_ownership_ddw(struct tce_container *container, 611static long tce_iommu_take_ownership_ddw(struct tce_container *container,
553 struct iommu_table_group *table_group) 612 struct iommu_table_group *table_group)
554{ 613{
614 long ret;
615 struct iommu_table *tbl = NULL;
616
617 if (!table_group->ops->create_table || !table_group->ops->set_window ||
618 !table_group->ops->release_ownership) {
619 WARN_ON_ONCE(1);
620 return -EFAULT;
621 }
622
555 table_group->ops->take_ownership(table_group); 623 table_group->ops->take_ownership(table_group);
556 624
557 return 0; 625 ret = tce_iommu_create_table(container,
626 table_group,
627 0, /* window number */
628 IOMMU_PAGE_SHIFT_4K,
629 table_group->tce32_size,
630 1, /* default levels */
631 &tbl);
632 if (!ret) {
633 ret = table_group->ops->set_window(table_group, 0, tbl);
634 if (ret)
635 tce_iommu_free_table(tbl);
636 else
637 table_group->tables[0] = tbl;
638 }
639
640 if (ret)
641 table_group->ops->release_ownership(table_group);
642
643 return ret;
558} 644}
559 645
560static int tce_iommu_attach_group(void *iommu_data, 646static int tce_iommu_attach_group(void *iommu_data,