aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/pci
diff options
context:
space:
mode:
authorGerald Schaefer <gerald.schaefer@de.ibm.com>2014-07-18 11:37:08 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2014-07-22 03:26:24 -0400
commitc60d1ae4efcb5790f7d085369baf66c167a6484f (patch)
tree7555d2b2e44a6ad6f07e16afaf15492b862d8f99 /arch/s390/pci
parent29b8dd9d4274bca6526e4bb8d4f46dec1f4c15c9 (diff)
s390/pci: introduce lazy IOTLB flushing for DMA unmap
This changes the default IOTLB flushing method to lazy flushing, which means that there will be no direct flush after each DMA unmap operation. Instead, the iommu bitmap pointer will be adjusted after unmap, so that no DMA address will be re-used until after an iommu bitmap wrap-around. The only IOTLB flush will then happen after each wrap-around. A new kernel parameter "s390_iommu=" is also introduced, to allow changing the flushing behaviour to the old strict method. Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/pci')
-rw-r--r--arch/s390/pci/pci_dma.c50
1 files changed, 36 insertions, 14 deletions
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f91c03119804..4cbb29a4d615 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -16,6 +16,13 @@
16 16
17static struct kmem_cache *dma_region_table_cache; 17static struct kmem_cache *dma_region_table_cache;
18static struct kmem_cache *dma_page_table_cache; 18static struct kmem_cache *dma_page_table_cache;
19static int s390_iommu_strict;
20
21static int zpci_refresh_global(struct zpci_dev *zdev)
22{
23 return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
24 zdev->iommu_pages * PAGE_SIZE);
25}
19 26
20static unsigned long *dma_alloc_cpu_table(void) 27static unsigned long *dma_alloc_cpu_table(void)
21{ 28{
@@ -155,18 +162,15 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
155 } 162 }
156 163
157 /* 164 /*
158 * rpcit is not required to establish new translations when previously 165 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
159 * invalid translation-table entries are validated, however it is 166 * translations when previously invalid translation-table entries are
160 * required when altering previously valid entries. 167 * validated. With lazy unmap, it also is skipped for previously valid
168 * entries, but a global rpcit is then required before any address can
169 * be re-used, i.e. after each iommu bitmap wrap-around.
161 */ 170 */
162 if (!zdev->tlb_refresh && 171 if (!zdev->tlb_refresh &&
163 ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 172 (!s390_iommu_strict ||
164 /* 173 ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
165 * TODO: also need to check that the old entry is indeed INVALID
166 * and not only for one page but for the whole range...
167 * -> now we WARN_ON in that case but with lazy unmap that
168 * needs to be redone!
169 */
170 goto no_refresh; 174 goto no_refresh;
171 175
172 rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, 176 rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
@@ -220,16 +224,21 @@ static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
220static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) 224static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
221{ 225{
222 unsigned long offset, flags; 226 unsigned long offset, flags;
227 int wrap = 0;
223 228
224 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 229 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
225 offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); 230 offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
226 if (offset == -1) 231 if (offset == -1) {
232 /* wrap-around */
227 offset = __dma_alloc_iommu(zdev, 0, size); 233 offset = __dma_alloc_iommu(zdev, 0, size);
234 wrap = 1;
235 }
228 236
229 if (offset != -1) { 237 if (offset != -1) {
230 zdev->next_bit = offset + size; 238 zdev->next_bit = offset + size;
231 if (zdev->next_bit >= zdev->iommu_pages) 239 if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
232 zdev->next_bit = 0; 240 /* global flush after wrap-around with lazy unmap */
241 zpci_refresh_global(zdev);
233 } 242 }
234 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 243 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
235 return offset; 244 return offset;
@@ -243,7 +252,11 @@ static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size
243 if (!zdev->iommu_bitmap) 252 if (!zdev->iommu_bitmap)
244 goto out; 253 goto out;
245 bitmap_clear(zdev->iommu_bitmap, offset, size); 254 bitmap_clear(zdev->iommu_bitmap, offset, size);
246 if (offset >= zdev->next_bit) 255 /*
256 * Lazy flush for unmap: need to move next_bit to avoid address re-use
257 * until wrap-around.
258 */
259 if (!s390_iommu_strict && offset >= zdev->next_bit)
247 zdev->next_bit = offset + size; 260 zdev->next_bit = offset + size;
248out: 261out:
249 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 262 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
@@ -504,3 +517,12 @@ struct dma_map_ops s390_dma_ops = {
504 /* dma_supported is unconditionally true without a callback */ 517 /* dma_supported is unconditionally true without a callback */
505}; 518};
506EXPORT_SYMBOL_GPL(s390_dma_ops); 519EXPORT_SYMBOL_GPL(s390_dma_ops);
520
521static int __init s390_iommu_setup(char *str)
522{
523 if (!strncmp(str, "strict", 6))
524 s390_iommu_strict = 1;
525 return 0;
526}
527
528__setup("s390_iommu=", s390_iommu_setup);