aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchris hyser <chris.hyser@oracle.com>2016-09-28 15:19:50 -0400
committerDavid S. Miller <davem@davemloft.net>2016-10-06 01:44:51 -0400
commitaa7bde1a8b49391d34f17905a04c3acf7770283d (patch)
tree8211bfaa3f0a1079db760f96f9d7ff277f996f62
parent8914391b4e6517ca3dbbb975fc38ce13b0c5ee45 (diff)
sparc64: Enable setting "relaxed ordering" in IOMMU mappings
Enable relaxed ordering for memory writes in IOMMU TSB entry from dma_4v_alloc_coherent(), dma_4v_map_page() and dma_4v_map_sg() when dma_attrs DMA_ATTR_WEAK_ORDERING is set. This requires PCI IOMMU I/O Translation Services version 2.0 API. Many PCIe devices allow enabling relaxed-ordering (memory writes bypassing other memory writes) for various DMA buffers. A notable exception is the Mellanox mlx4 IB adapter. Due to the nature of x86 HW this appears to have little performance impact there. On SPARC HW however, this results in major performance degradation getting only about 3Gbps. Enabling RO in the IOMMU entries corresponding to mlx4 data buffers increases the throughput to about 13 Gbps. Orabug: 19245907 Signed-off-by: Chris Hyser <chris.hyser@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc/include/asm/hypervisor.h1
-rw-r--r--arch/sparc/kernel/pci_sun4v.c16
2 files changed, 16 insertions, 1 deletions
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index f5b6537306f0..666d5ba230d2 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -1744,6 +1744,7 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle,
1744 1744
1745#define HV_PCI_MAP_ATTR_READ 0x01 1745#define HV_PCI_MAP_ATTR_READ 0x01
1746#define HV_PCI_MAP_ATTR_WRITE 0x02 1746#define HV_PCI_MAP_ATTR_WRITE 0x02
1747#define HV_PCI_MAP_ATTR_RELAXED_ORDER 0x04
1747 1748
1748#define HV_PCI_DEVICE_BUILD(b,d,f) \ 1749#define HV_PCI_DEVICE_BUILD(b,d,f) \
1749 ((((b) & 0xff) << 16) | \ 1750 ((((b) & 0xff) << 16) | \
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index c5c819daf800..db57d8acdc01 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -78,6 +78,10 @@ static long iommu_batch_flush(struct iommu_batch *p)
78 u64 *pglist = p->pglist; 78 u64 *pglist = p->pglist;
79 unsigned long npages = p->npages; 79 unsigned long npages = p->npages;
80 80
81 /* VPCI maj=1, min=[0,1] only supports read and write */
82 if (vpci_major < 2)
83 prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
84
81 while (npages != 0) { 85 while (npages != 0) {
82 long num; 86 long num;
83 87
@@ -144,6 +148,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
144 unsigned long attrs) 148 unsigned long attrs)
145{ 149{
146 unsigned long flags, order, first_page, npages, n; 150 unsigned long flags, order, first_page, npages, n;
151 unsigned long prot = 0;
147 struct iommu *iommu; 152 struct iommu *iommu;
148 struct page *page; 153 struct page *page;
149 void *ret; 154 void *ret;
@@ -157,6 +162,9 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
157 162
158 npages = size >> IO_PAGE_SHIFT; 163 npages = size >> IO_PAGE_SHIFT;
159 164
165 if (attrs & DMA_ATTR_WEAK_ORDERING)
166 prot = HV_PCI_MAP_ATTR_RELAXED_ORDER;
167
160 nid = dev->archdata.numa_node; 168 nid = dev->archdata.numa_node;
161 page = alloc_pages_node(nid, gfp, order); 169 page = alloc_pages_node(nid, gfp, order);
162 if (unlikely(!page)) 170 if (unlikely(!page))
@@ -180,7 +188,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
180 local_irq_save(flags); 188 local_irq_save(flags);
181 189
182 iommu_batch_start(dev, 190 iommu_batch_start(dev,
183 (HV_PCI_MAP_ATTR_READ | 191 (HV_PCI_MAP_ATTR_READ | prot |
184 HV_PCI_MAP_ATTR_WRITE), 192 HV_PCI_MAP_ATTR_WRITE),
185 entry); 193 entry);
186 194
@@ -277,6 +285,9 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
277 if (direction != DMA_TO_DEVICE) 285 if (direction != DMA_TO_DEVICE)
278 prot |= HV_PCI_MAP_ATTR_WRITE; 286 prot |= HV_PCI_MAP_ATTR_WRITE;
279 287
288 if (attrs & DMA_ATTR_WEAK_ORDERING)
289 prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER;
290
280 local_irq_save(flags); 291 local_irq_save(flags);
281 292
282 iommu_batch_start(dev, prot, entry); 293 iommu_batch_start(dev, prot, entry);
@@ -355,6 +366,9 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
355 if (direction != DMA_TO_DEVICE) 366 if (direction != DMA_TO_DEVICE)
356 prot |= HV_PCI_MAP_ATTR_WRITE; 367 prot |= HV_PCI_MAP_ATTR_WRITE;
357 368
369 if (attrs & DMA_ATTR_WEAK_ORDERING)
370 prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER;
371
358 outs = s = segstart = &sglist[0]; 372 outs = s = segstart = &sglist[0];
359 outcount = 1; 373 outcount = 1;
360 incount = nelems; 374 incount = nelems;