aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobin Murphy <robin.murphy@arm.com>2017-03-31 10:46:07 -0400
committerJoerg Roedel <jroedel@suse.de>2017-04-03 06:45:03 -0400
commitbb65a64c7285e7105c1a6c8a33b37770343a4e96 (patch)
tree2efc271de7785fa95fa91c6385c6fa56a76ce7be
parenta44e6657585b15eeebf5681bfcc7ce0b002429c2 (diff)
iommu/dma: Plumb in the per-CPU IOVA caches
With IOVA allocation suitably tidied up, we are finally free to opt in to the per-CPU caching mechanism. The caching alone can provide a modest improvement over walking the rbtree for weedier systems (iperf3 shows ~10% more ethernet throughput on an ARM Juno r1 constrained to a single 650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree lock contention which larger ARM-based systems with lots of parallel I/O are starting to feel the pain of. Reviewed-by: Nate Watterson <nwatters@codeaurora.org> Tested-by: Nate Watterson <nwatters@codeaurora.org> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
-rw-r--r--drivers/iommu/dma-iommu.c37
1 files changed, 17 insertions, 20 deletions
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1b94beb43036..8348f366ddd1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
361{ 361{
362 struct iommu_dma_cookie *cookie = domain->iova_cookie; 362 struct iommu_dma_cookie *cookie = domain->iova_cookie;
363 struct iova_domain *iovad = &cookie->iovad; 363 struct iova_domain *iovad = &cookie->iovad;
364 unsigned long shift, iova_len; 364 unsigned long shift, iova_len, iova = 0;
365 struct iova *iova = NULL;
366 365
367 if (cookie->type == IOMMU_DMA_MSI_COOKIE) { 366 if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
368 cookie->msi_iova += size; 367 cookie->msi_iova += size;
@@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
371 370
372 shift = iova_shift(iovad); 371 shift = iova_shift(iovad);
373 iova_len = size >> shift; 372 iova_len = size >> shift;
373 /*
374 * Freeing non-power-of-two-sized allocations back into the IOVA caches
375 * will come back to bite us badly, so we have to waste a bit of space
376 * rounding up anything cacheable to make sure that can't happen. The
377 * order of the unadjusted size will still match upon freeing.
378 */
379 if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
380 iova_len = roundup_pow_of_two(iova_len);
374 381
375 if (domain->geometry.force_aperture) 382 if (domain->geometry.force_aperture)
376 dma_limit = min(dma_limit, domain->geometry.aperture_end); 383 dma_limit = min(dma_limit, domain->geometry.aperture_end);
377 384
378 /* Try to get PCI devices a SAC address */ 385 /* Try to get PCI devices a SAC address */
379 if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) 386 if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
380 iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift, 387 iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
381 true); 388
382 /*
383 * Enforce size-alignment to be safe - there could perhaps be an
384 * attribute to control this per-device, or at least per-domain...
385 */
386 if (!iova) 389 if (!iova)
387 iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true); 390 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
388 391
389 return (dma_addr_t)iova->pfn_lo << shift; 392 return (dma_addr_t)iova << shift;
390} 393}
391 394
392static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, 395static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
393 dma_addr_t iova, size_t size) 396 dma_addr_t iova, size_t size)
394{ 397{
395 struct iova_domain *iovad = &cookie->iovad; 398 struct iova_domain *iovad = &cookie->iovad;
396 struct iova *iova_rbnode; 399 unsigned long shift = iova_shift(iovad);
397 400
398 /* The MSI case is only ever cleaning up its most recent allocation */ 401 /* The MSI case is only ever cleaning up its most recent allocation */
399 if (cookie->type == IOMMU_DMA_MSI_COOKIE) { 402 if (cookie->type == IOMMU_DMA_MSI_COOKIE)
400 cookie->msi_iova -= size; 403 cookie->msi_iova -= size;
401 return; 404 else
402 } 405 free_iova_fast(iovad, iova >> shift, size >> shift);
403
404 iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
405 if (WARN_ON(!iova_rbnode))
406 return;
407
408 __free_iova(iovad, iova_rbnode);
409} 406}
410 407
411static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, 408static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,