summaryrefslogtreecommitdiffstats
path: root/arch/s390/pci
diff options
context:
space:
mode:
authorSebastian Ott <sebott@linux.vnet.ibm.com>2016-09-08 07:25:01 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-09-22 07:42:33 -0400
commit13954fd6913acff8f8b8c21612074b57051ba457 (patch)
tree5f2b8e7c89df80c25923fcfcdf252e24f9434ad0 /arch/s390/pci
parent1f166e9e5c7cd5d1fe2a5da7c97c1688d4c93fbb (diff)
s390/pci_dma: improve lazy flush for unmap
Lazy unmap (defer tlb flush after unmap until dma address reuse) can greatly reduce the number of RPCIT instructions in the best case. In reality we are often far away from the best case scenario because our implementation suffers from the following problem: To create dma addresses we maintain an iommu bitmap and a pointer into that bitmap to mark the start of the next search. That pointer moves from the start to the end of that bitmap and we issue a global tlb flush once that pointer wraps around. To prevent address reuse before we issue the tlb flush we even have to move the next pointer during unmaps - when clearing a bit > next. This could lead to a situation where we only use the rear part of that bitmap and issue more tlb flushes than expected. To fix this we no longer clear bits during unmap but maintain a 2nd bitmap which we use to mark addresses that can't be reused until we issue the global tlb flush after wrap around. Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/pci')
-rw-r--r--arch/s390/pci/pci_dma.c47
1 files changed, 33 insertions, 14 deletions
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9e5f2ecf7f25..7350c8bc13a2 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -257,20 +257,28 @@ static dma_addr_t dma_alloc_address(struct device *dev, int size)
257 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 257 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
258 offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 258 offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
259 if (offset == -1) { 259 if (offset == -1) {
260 if (!zdev->tlb_refresh && !s390_iommu_strict) {
261 /* global flush before DMA addresses are reused */
262 if (zpci_refresh_global(zdev))
263 goto out_error;
264
265 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
266 zdev->lazy_bitmap, zdev->iommu_pages);
267 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
268 }
260 /* wrap-around */ 269 /* wrap-around */
261 offset = __dma_alloc_iommu(dev, 0, size); 270 offset = __dma_alloc_iommu(dev, 0, size);
262 if (offset == -1) { 271 if (offset == -1)
263 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 272 goto out_error;
264 return DMA_ERROR_CODE;
265 }
266 if (!zdev->tlb_refresh && !s390_iommu_strict)
267 /* global flush after wrap-around with lazy unmap */
268 zpci_refresh_global(zdev);
269 } 273 }
270 zdev->next_bit = offset + size; 274 zdev->next_bit = offset + size;
271 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 275 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
272 276
273 return zdev->start_dma + offset * PAGE_SIZE; 277 return zdev->start_dma + offset * PAGE_SIZE;
278
279out_error:
280 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
281 return DMA_ERROR_CODE;
274} 282}
275 283
276static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 284static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
@@ -283,13 +291,12 @@ static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
283 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 291 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
284 if (!zdev->iommu_bitmap) 292 if (!zdev->iommu_bitmap)
285 goto out; 293 goto out;
286 bitmap_clear(zdev->iommu_bitmap, offset, size); 294
287 /* 295 if (zdev->tlb_refresh || s390_iommu_strict)
288 * Lazy flush for unmap: need to move next_bit to avoid address re-use 296 bitmap_clear(zdev->iommu_bitmap, offset, size);
289 * until wrap-around. 297 else
290 */ 298 bitmap_set(zdev->lazy_bitmap, offset, size);
291 if (!s390_iommu_strict && offset >= zdev->next_bit) 299
292 zdev->next_bit = offset + size;
293out: 300out:
294 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 301 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
295} 302}
@@ -557,7 +564,14 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
557 rc = -ENOMEM; 564 rc = -ENOMEM;
558 goto free_dma_table; 565 goto free_dma_table;
559 } 566 }
567 if (!zdev->tlb_refresh && !s390_iommu_strict) {
568 zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
569 if (!zdev->lazy_bitmap) {
570 rc = -ENOMEM;
571 goto free_bitmap;
572 }
560 573
574 }
561 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 575 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
562 (u64) zdev->dma_table); 576 (u64) zdev->dma_table);
563 if (rc) 577 if (rc)
@@ -567,6 +581,8 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
567free_bitmap: 581free_bitmap:
568 vfree(zdev->iommu_bitmap); 582 vfree(zdev->iommu_bitmap);
569 zdev->iommu_bitmap = NULL; 583 zdev->iommu_bitmap = NULL;
584 vfree(zdev->lazy_bitmap);
585 zdev->lazy_bitmap = NULL;
570free_dma_table: 586free_dma_table:
571 dma_free_cpu_table(zdev->dma_table); 587 dma_free_cpu_table(zdev->dma_table);
572 zdev->dma_table = NULL; 588 zdev->dma_table = NULL;
@@ -588,6 +604,9 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
588 zdev->dma_table = NULL; 604 zdev->dma_table = NULL;
589 vfree(zdev->iommu_bitmap); 605 vfree(zdev->iommu_bitmap);
590 zdev->iommu_bitmap = NULL; 606 zdev->iommu_bitmap = NULL;
607 vfree(zdev->lazy_bitmap);
608 zdev->lazy_bitmap = NULL;
609
591 zdev->next_bit = 0; 610 zdev->next_bit = 0;
592} 611}
593 612