diff options
author | Robin Murphy <robin.murphy@arm.com> | 2017-03-31 10:46:07 -0400 |
---|---|---|
committer | Joerg Roedel <jroedel@suse.de> | 2017-04-03 06:45:03 -0400 |
commit | bb65a64c7285e7105c1a6c8a33b37770343a4e96 (patch) | |
tree | 2efc271de7785fa95fa91c6385c6fa56a76ce7be | |
parent | a44e6657585b15eeebf5681bfcc7ce0b002429c2 (diff) |
iommu/dma: Plumb in the per-CPU IOVA caches
With IOVA allocation suitably tidied up, we are finally free to opt in
to the per-CPU caching mechanism. The caching alone can provide a modest
improvement over walking the rbtree for weedier systems (iperf3 shows
~10% more ethernet throughput on an ARM Juno r1 constrained to a single
650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
lock contention which larger ARM-based systems with lots of parallel I/O
are starting to feel the pain of.
Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
-rw-r--r-- | drivers/iommu/dma-iommu.c | 37 |
1 files changed, 17 insertions, 20 deletions
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1b94beb43036..8348f366ddd1 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c | |||
@@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, | |||
361 | { | 361 | { |
362 | struct iommu_dma_cookie *cookie = domain->iova_cookie; | 362 | struct iommu_dma_cookie *cookie = domain->iova_cookie; |
363 | struct iova_domain *iovad = &cookie->iovad; | 363 | struct iova_domain *iovad = &cookie->iovad; |
364 | unsigned long shift, iova_len; | 364 | unsigned long shift, iova_len, iova = 0; |
365 | struct iova *iova = NULL; | ||
366 | 365 | ||
367 | if (cookie->type == IOMMU_DMA_MSI_COOKIE) { | 366 | if (cookie->type == IOMMU_DMA_MSI_COOKIE) { |
368 | cookie->msi_iova += size; | 367 | cookie->msi_iova += size; |
@@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, | |||
371 | 370 | ||
372 | shift = iova_shift(iovad); | 371 | shift = iova_shift(iovad); |
373 | iova_len = size >> shift; | 372 | iova_len = size >> shift; |
373 | /* | ||
374 | * Freeing non-power-of-two-sized allocations back into the IOVA caches | ||
375 | * will come back to bite us badly, so we have to waste a bit of space | ||
376 | * rounding up anything cacheable to make sure that can't happen. The | ||
377 | * order of the unadjusted size will still match upon freeing. | ||
378 | */ | ||
379 | if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) | ||
380 | iova_len = roundup_pow_of_two(iova_len); | ||
374 | 381 | ||
375 | if (domain->geometry.force_aperture) | 382 | if (domain->geometry.force_aperture) |
376 | dma_limit = min(dma_limit, domain->geometry.aperture_end); | 383 | dma_limit = min(dma_limit, domain->geometry.aperture_end); |
377 | 384 | ||
378 | /* Try to get PCI devices a SAC address */ | 385 | /* Try to get PCI devices a SAC address */ |
379 | if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) | 386 | if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) |
380 | iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift, | 387 | iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); |
381 | true); | 388 | |
382 | /* | ||
383 | * Enforce size-alignment to be safe - there could perhaps be an | ||
384 | * attribute to control this per-device, or at least per-domain... | ||
385 | */ | ||
386 | if (!iova) | 389 | if (!iova) |
387 | iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true); | 390 | iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); |
388 | 391 | ||
389 | return (dma_addr_t)iova->pfn_lo << shift; | 392 | return (dma_addr_t)iova << shift; |
390 | } | 393 | } |
391 | 394 | ||
392 | static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, | 395 | static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, |
393 | dma_addr_t iova, size_t size) | 396 | dma_addr_t iova, size_t size) |
394 | { | 397 | { |
395 | struct iova_domain *iovad = &cookie->iovad; | 398 | struct iova_domain *iovad = &cookie->iovad; |
396 | struct iova *iova_rbnode; | 399 | unsigned long shift = iova_shift(iovad); |
397 | 400 | ||
398 | /* The MSI case is only ever cleaning up its most recent allocation */ | 401 | /* The MSI case is only ever cleaning up its most recent allocation */ |
399 | if (cookie->type == IOMMU_DMA_MSI_COOKIE) { | 402 | if (cookie->type == IOMMU_DMA_MSI_COOKIE) |
400 | cookie->msi_iova -= size; | 403 | cookie->msi_iova -= size; |
401 | return; | 404 | else |
402 | } | 405 | free_iova_fast(iovad, iova >> shift, size >> shift); |
403 | |||
404 | iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova)); | ||
405 | if (WARN_ON(!iova_rbnode)) | ||
406 | return; | ||
407 | |||
408 | __free_iova(iovad, iova_rbnode); | ||
409 | } | 406 | } |
410 | 407 | ||
411 | static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, | 408 | static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, |