aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2015-04-09 15:33:31 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-18 15:32:59 -0400
commitbb620c3d3925aec0ed4f21010c86df08ec18a8c7 (patch)
treef3adfa12c95f9aa6dd844fd47fd486332bbd0a2e
parentff7d37a502022149655c18035b99a53391be0383 (diff)
sparc: Make sparc64 use scalable lib/iommu-common.c functions
In iperf experiments running linux as the Tx side (TCP client) with 10 threads results in a severe performance drop when TSO is disabled, indicating a weakness in the software that can be avoided by using the scalable IOMMU arena DMA allocation. Baseline numbers before this patch: with default settings (TSO enabled) : 9-9.5 Gbps Disable TSO using ethtool- drops badly: 2-3 Gbps. After this patch, iperf client with 10 threads, can give a throughput of at least 8.5 Gbps, even when TSO is disabled. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc/include/asm/iommu_64.h7
-rw-r--r--arch/sparc/kernel/iommu.c172
-rw-r--r--arch/sparc/kernel/iommu_common.h8
-rw-r--r--arch/sparc/kernel/pci_sun4v.c183
4 files changed, 128 insertions, 242 deletions
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
index 2b9321ab064d..cd0d69fa7592 100644
--- a/arch/sparc/include/asm/iommu_64.h
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -16,6 +16,7 @@
16#define IOPTE_WRITE 0x0000000000000002UL 16#define IOPTE_WRITE 0x0000000000000002UL
17 17
18#define IOMMU_NUM_CTXS 4096 18#define IOMMU_NUM_CTXS 4096
19#include <linux/iommu-common.h>
19 20
20struct iommu_arena { 21struct iommu_arena {
21 unsigned long *map; 22 unsigned long *map;
@@ -24,11 +25,10 @@ struct iommu_arena {
24}; 25};
25 26
26struct iommu { 27struct iommu {
28 struct iommu_map_table tbl;
27 spinlock_t lock; 29 spinlock_t lock;
28 struct iommu_arena arena; 30 u32 dma_addr_mask;
29 void (*flush_all)(struct iommu *);
30 iopte_t *page_table; 31 iopte_t *page_table;
31 u32 page_table_map_base;
32 unsigned long iommu_control; 32 unsigned long iommu_control;
33 unsigned long iommu_tsbbase; 33 unsigned long iommu_tsbbase;
34 unsigned long iommu_flush; 34 unsigned long iommu_flush;
@@ -40,7 +40,6 @@ struct iommu {
40 unsigned long dummy_page_pa; 40 unsigned long dummy_page_pa;
41 unsigned long ctx_lowest_free; 41 unsigned long ctx_lowest_free;
42 DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS); 42 DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS);
43 u32 dma_addr_mask;
44}; 43};
45 44
46struct strbuf { 45struct strbuf {
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index bfa4d0c2df42..5320689c06e9 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -13,6 +13,7 @@
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/iommu-helper.h> 14#include <linux/iommu-helper.h>
15#include <linux/bitmap.h> 15#include <linux/bitmap.h>
16#include <linux/iommu-common.h>
16 17
17#ifdef CONFIG_PCI 18#ifdef CONFIG_PCI
18#include <linux/pci.h> 19#include <linux/pci.h>
@@ -45,8 +46,9 @@
45 "i" (ASI_PHYS_BYPASS_EC_E)) 46 "i" (ASI_PHYS_BYPASS_EC_E))
46 47
47/* Must be invoked under the IOMMU lock. */ 48/* Must be invoked under the IOMMU lock. */
48static void iommu_flushall(struct iommu *iommu) 49static void iommu_flushall(struct iommu_map_table *iommu_map_table)
49{ 50{
51 struct iommu *iommu = container_of(iommu_map_table, struct iommu, tbl);
50 if (iommu->iommu_flushinv) { 52 if (iommu->iommu_flushinv) {
51 iommu_write(iommu->iommu_flushinv, ~(u64)0); 53 iommu_write(iommu->iommu_flushinv, ~(u64)0);
52 } else { 54 } else {
@@ -87,94 +89,6 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
87 iopte_val(*iopte) = val; 89 iopte_val(*iopte) = val;
88} 90}
89 91
90/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
91 * facility it must all be done in one pass while under the iommu lock.
92 *
93 * On sun4u platforms, we only flush the IOMMU once every time we've passed
94 * over the entire page table doing allocations. Therefore we only ever advance
95 * the hint and cannot backtrack it.
96 */
97unsigned long iommu_range_alloc(struct device *dev,
98 struct iommu *iommu,
99 unsigned long npages,
100 unsigned long *handle)
101{
102 unsigned long n, end, start, limit, boundary_size;
103 struct iommu_arena *arena = &iommu->arena;
104 int pass = 0;
105
106 /* This allocator was derived from x86_64's bit string search */
107
108 /* Sanity check */
109 if (unlikely(npages == 0)) {
110 if (printk_ratelimit())
111 WARN_ON(1);
112 return DMA_ERROR_CODE;
113 }
114
115 if (handle && *handle)
116 start = *handle;
117 else
118 start = arena->hint;
119
120 limit = arena->limit;
121
122 /* The case below can happen if we have a small segment appended
123 * to a large, or when the previous alloc was at the very end of
124 * the available space. If so, go back to the beginning and flush.
125 */
126 if (start >= limit) {
127 start = 0;
128 if (iommu->flush_all)
129 iommu->flush_all(iommu);
130 }
131
132 again:
133
134 if (dev)
135 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
136 1 << IO_PAGE_SHIFT);
137 else
138 boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
139
140 n = iommu_area_alloc(arena->map, limit, start, npages,
141 iommu->page_table_map_base >> IO_PAGE_SHIFT,
142 boundary_size >> IO_PAGE_SHIFT, 0);
143 if (n == -1) {
144 if (likely(pass < 1)) {
145 /* First failure, rescan from the beginning. */
146 start = 0;
147 if (iommu->flush_all)
148 iommu->flush_all(iommu);
149 pass++;
150 goto again;
151 } else {
152 /* Second failure, give up */
153 return DMA_ERROR_CODE;
154 }
155 }
156
157 end = n + npages;
158
159 arena->hint = end;
160
161 /* Update handle for SG allocations */
162 if (handle)
163 *handle = end;
164
165 return n;
166}
167
168void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
169{
170 struct iommu_arena *arena = &iommu->arena;
171 unsigned long entry;
172
173 entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
174
175 bitmap_clear(arena->map, entry, npages);
176}
177
178int iommu_table_init(struct iommu *iommu, int tsbsize, 92int iommu_table_init(struct iommu *iommu, int tsbsize,
179 u32 dma_offset, u32 dma_addr_mask, 93 u32 dma_offset, u32 dma_addr_mask,
180 int numa_node) 94 int numa_node)
@@ -187,22 +101,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
187 /* Setup initial software IOMMU state. */ 101 /* Setup initial software IOMMU state. */
188 spin_lock_init(&iommu->lock); 102 spin_lock_init(&iommu->lock);
189 iommu->ctx_lowest_free = 1; 103 iommu->ctx_lowest_free = 1;
190 iommu->page_table_map_base = dma_offset; 104 iommu->tbl.table_map_base = dma_offset;
191 iommu->dma_addr_mask = dma_addr_mask; 105 iommu->dma_addr_mask = dma_addr_mask;
192 106
193 /* Allocate and initialize the free area map. */ 107 /* Allocate and initialize the free area map. */
194 sz = num_tsb_entries / 8; 108 sz = num_tsb_entries / 8;
195 sz = (sz + 7UL) & ~7UL; 109 sz = (sz + 7UL) & ~7UL;
196 iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node); 110 iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
197 if (!iommu->arena.map) { 111 if (!iommu->tbl.map)
198 printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
199 return -ENOMEM; 112 return -ENOMEM;
200 } 113 memset(iommu->tbl.map, 0, sz);
201 memset(iommu->arena.map, 0, sz);
202 iommu->arena.limit = num_tsb_entries;
203 114
204 if (tlb_type != hypervisor) 115 iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
205 iommu->flush_all = iommu_flushall; 116 (tlb_type != hypervisor ? iommu_flushall : NULL),
117 false, 1, false);
206 118
207 /* Allocate and initialize the dummy page which we 119 /* Allocate and initialize the dummy page which we
208 * set inactive IO PTEs to point to. 120 * set inactive IO PTEs to point to.
@@ -235,18 +147,20 @@ out_free_dummy_page:
235 iommu->dummy_page = 0UL; 147 iommu->dummy_page = 0UL;
236 148
237out_free_map: 149out_free_map:
238 kfree(iommu->arena.map); 150 kfree(iommu->tbl.map);
239 iommu->arena.map = NULL; 151 iommu->tbl.map = NULL;
240 152
241 return -ENOMEM; 153 return -ENOMEM;
242} 154}
243 155
244static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu, 156static inline iopte_t *alloc_npages(struct device *dev,
157 struct iommu *iommu,
245 unsigned long npages) 158 unsigned long npages)
246{ 159{
247 unsigned long entry; 160 unsigned long entry;
248 161
249 entry = iommu_range_alloc(dev, iommu, npages, NULL); 162 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
163 (unsigned long)(-1), 0);
250 if (unlikely(entry == DMA_ERROR_CODE)) 164 if (unlikely(entry == DMA_ERROR_CODE))
251 return NULL; 165 return NULL;
252 166
@@ -284,7 +198,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
284 dma_addr_t *dma_addrp, gfp_t gfp, 198 dma_addr_t *dma_addrp, gfp_t gfp,
285 struct dma_attrs *attrs) 199 struct dma_attrs *attrs)
286{ 200{
287 unsigned long flags, order, first_page; 201 unsigned long order, first_page;
288 struct iommu *iommu; 202 struct iommu *iommu;
289 struct page *page; 203 struct page *page;
290 int npages, nid; 204 int npages, nid;
@@ -306,16 +220,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
306 220
307 iommu = dev->archdata.iommu; 221 iommu = dev->archdata.iommu;
308 222
309 spin_lock_irqsave(&iommu->lock, flags);
310 iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT); 223 iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
311 spin_unlock_irqrestore(&iommu->lock, flags);
312 224
313 if (unlikely(iopte == NULL)) { 225 if (unlikely(iopte == NULL)) {
314 free_pages(first_page, order); 226 free_pages(first_page, order);
315 return NULL; 227 return NULL;
316 } 228 }
317 229
318 *dma_addrp = (iommu->page_table_map_base + 230 *dma_addrp = (iommu->tbl.table_map_base +
319 ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); 231 ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
320 ret = (void *) first_page; 232 ret = (void *) first_page;
321 npages = size >> IO_PAGE_SHIFT; 233 npages = size >> IO_PAGE_SHIFT;
@@ -336,16 +248,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
336 struct dma_attrs *attrs) 248 struct dma_attrs *attrs)
337{ 249{
338 struct iommu *iommu; 250 struct iommu *iommu;
339 unsigned long flags, order, npages; 251 unsigned long order, npages;
340 252
341 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 253 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
342 iommu = dev->archdata.iommu; 254 iommu = dev->archdata.iommu;
343 255
344 spin_lock_irqsave(&iommu->lock, flags); 256 iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
345
346 iommu_range_free(iommu, dvma, npages);
347
348 spin_unlock_irqrestore(&iommu->lock, flags);
349 257
350 order = get_order(size); 258 order = get_order(size);
351 if (order < 10) 259 if (order < 10)
@@ -375,8 +283,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
375 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); 283 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
376 npages >>= IO_PAGE_SHIFT; 284 npages >>= IO_PAGE_SHIFT;
377 285
378 spin_lock_irqsave(&iommu->lock, flags);
379 base = alloc_npages(dev, iommu, npages); 286 base = alloc_npages(dev, iommu, npages);
287 spin_lock_irqsave(&iommu->lock, flags);
380 ctx = 0; 288 ctx = 0;
381 if (iommu->iommu_ctxflush) 289 if (iommu->iommu_ctxflush)
382 ctx = iommu_alloc_ctx(iommu); 290 ctx = iommu_alloc_ctx(iommu);
@@ -385,7 +293,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
385 if (unlikely(!base)) 293 if (unlikely(!base))
386 goto bad; 294 goto bad;
387 295
388 bus_addr = (iommu->page_table_map_base + 296 bus_addr = (iommu->tbl.table_map_base +
389 ((base - iommu->page_table) << IO_PAGE_SHIFT)); 297 ((base - iommu->page_table) << IO_PAGE_SHIFT));
390 ret = bus_addr | (oaddr & ~IO_PAGE_MASK); 298 ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
391 base_paddr = __pa(oaddr & IO_PAGE_MASK); 299 base_paddr = __pa(oaddr & IO_PAGE_MASK);
@@ -496,7 +404,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
496 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); 404 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
497 npages >>= IO_PAGE_SHIFT; 405 npages >>= IO_PAGE_SHIFT;
498 base = iommu->page_table + 406 base = iommu->page_table +
499 ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 407 ((bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
500 bus_addr &= IO_PAGE_MASK; 408 bus_addr &= IO_PAGE_MASK;
501 409
502 spin_lock_irqsave(&iommu->lock, flags); 410 spin_lock_irqsave(&iommu->lock, flags);
@@ -515,11 +423,10 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
515 for (i = 0; i < npages; i++) 423 for (i = 0; i < npages; i++)
516 iopte_make_dummy(iommu, base + i); 424 iopte_make_dummy(iommu, base + i);
517 425
518 iommu_range_free(iommu, bus_addr, npages);
519
520 iommu_free_ctx(iommu, ctx); 426 iommu_free_ctx(iommu, ctx);
521
522 spin_unlock_irqrestore(&iommu->lock, flags); 427 spin_unlock_irqrestore(&iommu->lock, flags);
428
429 iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
523} 430}
524 431
525static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, 432static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -567,7 +474,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
567 max_seg_size = dma_get_max_seg_size(dev); 474 max_seg_size = dma_get_max_seg_size(dev);
568 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 475 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
569 IO_PAGE_SIZE) >> IO_PAGE_SHIFT; 476 IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
570 base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; 477 base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
571 for_each_sg(sglist, s, nelems, i) { 478 for_each_sg(sglist, s, nelems, i) {
572 unsigned long paddr, npages, entry, out_entry = 0, slen; 479 unsigned long paddr, npages, entry, out_entry = 0, slen;
573 iopte_t *base; 480 iopte_t *base;
@@ -581,7 +488,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
581 /* Allocate iommu entries for that segment */ 488 /* Allocate iommu entries for that segment */
582 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); 489 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
583 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); 490 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
584 entry = iommu_range_alloc(dev, iommu, npages, &handle); 491 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
492 &handle, (unsigned long)(-1), 0);
585 493
586 /* Handle failure */ 494 /* Handle failure */
587 if (unlikely(entry == DMA_ERROR_CODE)) { 495 if (unlikely(entry == DMA_ERROR_CODE)) {
@@ -594,7 +502,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
594 base = iommu->page_table + entry; 502 base = iommu->page_table + entry;
595 503
596 /* Convert entry to a dma_addr_t */ 504 /* Convert entry to a dma_addr_t */
597 dma_addr = iommu->page_table_map_base + 505 dma_addr = iommu->tbl.table_map_base +
598 (entry << IO_PAGE_SHIFT); 506 (entry << IO_PAGE_SHIFT);
599 dma_addr |= (s->offset & ~IO_PAGE_MASK); 507 dma_addr |= (s->offset & ~IO_PAGE_MASK);
600 508
@@ -654,15 +562,17 @@ iommu_map_failed:
654 vaddr = s->dma_address & IO_PAGE_MASK; 562 vaddr = s->dma_address & IO_PAGE_MASK;
655 npages = iommu_num_pages(s->dma_address, s->dma_length, 563 npages = iommu_num_pages(s->dma_address, s->dma_length,
656 IO_PAGE_SIZE); 564 IO_PAGE_SIZE);
657 iommu_range_free(iommu, vaddr, npages);
658 565
659 entry = (vaddr - iommu->page_table_map_base) 566 entry = (vaddr - iommu->tbl.table_map_base)
660 >> IO_PAGE_SHIFT; 567 >> IO_PAGE_SHIFT;
661 base = iommu->page_table + entry; 568 base = iommu->page_table + entry;
662 569
663 for (j = 0; j < npages; j++) 570 for (j = 0; j < npages; j++)
664 iopte_make_dummy(iommu, base + j); 571 iopte_make_dummy(iommu, base + j);
665 572
573 iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
574 DMA_ERROR_CODE);
575
666 s->dma_address = DMA_ERROR_CODE; 576 s->dma_address = DMA_ERROR_CODE;
667 s->dma_length = 0; 577 s->dma_length = 0;
668 } 578 }
@@ -684,10 +594,11 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
684 if (iommu->iommu_ctxflush) { 594 if (iommu->iommu_ctxflush) {
685 iopte_t *base; 595 iopte_t *base;
686 u32 bus_addr; 596 u32 bus_addr;
597 struct iommu_map_table *tbl = &iommu->tbl;
687 598
688 bus_addr = sg->dma_address & IO_PAGE_MASK; 599 bus_addr = sg->dma_address & IO_PAGE_MASK;
689 base = iommu->page_table + 600 base = iommu->page_table +
690 ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 601 ((bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT);
691 602
692 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; 603 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
693 } 604 }
@@ -723,9 +634,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
723 if (!len) 634 if (!len)
724 break; 635 break;
725 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); 636 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
726 iommu_range_free(iommu, dma_handle, npages);
727 637
728 entry = ((dma_handle - iommu->page_table_map_base) 638 entry = ((dma_handle - iommu->tbl.table_map_base)
729 >> IO_PAGE_SHIFT); 639 >> IO_PAGE_SHIFT);
730 base = iommu->page_table + entry; 640 base = iommu->page_table + entry;
731 641
@@ -737,6 +647,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
737 for (i = 0; i < npages; i++) 647 for (i = 0; i < npages; i++)
738 iopte_make_dummy(iommu, base + i); 648 iopte_make_dummy(iommu, base + i);
739 649
650 iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
651 DMA_ERROR_CODE);
740 sg = sg_next(sg); 652 sg = sg_next(sg);
741 } 653 }
742 654
@@ -770,9 +682,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev,
770 if (iommu->iommu_ctxflush && 682 if (iommu->iommu_ctxflush &&
771 strbuf->strbuf_ctxflush) { 683 strbuf->strbuf_ctxflush) {
772 iopte_t *iopte; 684 iopte_t *iopte;
685 struct iommu_map_table *tbl = &iommu->tbl;
773 686
774 iopte = iommu->page_table + 687 iopte = iommu->page_table +
775 ((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT); 688 ((bus_addr - tbl->table_map_base)>>IO_PAGE_SHIFT);
776 ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; 689 ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
777 } 690 }
778 691
@@ -805,9 +718,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
805 if (iommu->iommu_ctxflush && 718 if (iommu->iommu_ctxflush &&
806 strbuf->strbuf_ctxflush) { 719 strbuf->strbuf_ctxflush) {
807 iopte_t *iopte; 720 iopte_t *iopte;
721 struct iommu_map_table *tbl = &iommu->tbl;
808 722
809 iopte = iommu->page_table + 723 iopte = iommu->page_table + ((sglist[0].dma_address -
810 ((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 724 tbl->table_map_base) >> IO_PAGE_SHIFT);
811 ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; 725 ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
812 } 726 }
813 727
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
index 1ec0de4156e7..f4be0d724fc6 100644
--- a/arch/sparc/kernel/iommu_common.h
+++ b/arch/sparc/kernel/iommu_common.h
@@ -48,12 +48,4 @@ static inline int is_span_boundary(unsigned long entry,
48 return iommu_is_span_boundary(entry, nr, shift, boundary_size); 48 return iommu_is_span_boundary(entry, nr, shift, boundary_size);
49} 49}
50 50
51unsigned long iommu_range_alloc(struct device *dev,
52 struct iommu *iommu,
53 unsigned long npages,
54 unsigned long *handle);
55void iommu_range_free(struct iommu *iommu,
56 dma_addr_t dma_addr,
57 unsigned long npages);
58
59#endif /* _IOMMU_COMMON_H */ 51#endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 47ddbd496a1e..d2fe57dad433 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -15,6 +15,7 @@
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/log2.h> 16#include <linux/log2.h>
17#include <linux/of_device.h> 17#include <linux/of_device.h>
18#include <linux/iommu-common.h>
18 19
19#include <asm/iommu.h> 20#include <asm/iommu.h>
20#include <asm/irq.h> 21#include <asm/irq.h>
@@ -155,15 +156,13 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
155 156
156 iommu = dev->archdata.iommu; 157 iommu = dev->archdata.iommu;
157 158
158 spin_lock_irqsave(&iommu->lock, flags); 159 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
159 entry = iommu_range_alloc(dev, iommu, npages, NULL); 160 (unsigned long)(-1), 0);
160 spin_unlock_irqrestore(&iommu->lock, flags);
161 161
162 if (unlikely(entry == DMA_ERROR_CODE)) 162 if (unlikely(entry == DMA_ERROR_CODE))
163 goto range_alloc_fail; 163 goto range_alloc_fail;
164 164
165 *dma_addrp = (iommu->page_table_map_base + 165 *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
166 (entry << IO_PAGE_SHIFT));
167 ret = (void *) first_page; 166 ret = (void *) first_page;
168 first_page = __pa(first_page); 167 first_page = __pa(first_page);
169 168
@@ -188,45 +187,46 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
188 return ret; 187 return ret;
189 188
190iommu_map_fail: 189iommu_map_fail:
191 /* Interrupts are disabled. */ 190 iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE);
192 spin_lock(&iommu->lock);
193 iommu_range_free(iommu, *dma_addrp, npages);
194 spin_unlock_irqrestore(&iommu->lock, flags);
195 191
196range_alloc_fail: 192range_alloc_fail:
197 free_pages(first_page, order); 193 free_pages(first_page, order);
198 return NULL; 194 return NULL;
199} 195}
200 196
197static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
198 unsigned long npages)
199{
200 u32 devhandle = *(u32 *)demap_arg;
201 unsigned long num, flags;
202
203 local_irq_save(flags);
204 do {
205 num = pci_sun4v_iommu_demap(devhandle,
206 HV_PCI_TSBID(0, entry),
207 npages);
208
209 entry += num;
210 npages -= num;
211 } while (npages != 0);
212 local_irq_restore(flags);
213}
214
201static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, 215static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
202 dma_addr_t dvma, struct dma_attrs *attrs) 216 dma_addr_t dvma, struct dma_attrs *attrs)
203{ 217{
204 struct pci_pbm_info *pbm; 218 struct pci_pbm_info *pbm;
205 struct iommu *iommu; 219 struct iommu *iommu;
206 unsigned long flags, order, npages, entry; 220 unsigned long order, npages, entry;
207 u32 devhandle; 221 u32 devhandle;
208 222
209 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 223 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
210 iommu = dev->archdata.iommu; 224 iommu = dev->archdata.iommu;
211 pbm = dev->archdata.host_controller; 225 pbm = dev->archdata.host_controller;
212 devhandle = pbm->devhandle; 226 devhandle = pbm->devhandle;
213 entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 227 entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
214 228 dma_4v_iommu_demap(&devhandle, entry, npages);
215 spin_lock_irqsave(&iommu->lock, flags); 229 iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
216
217 iommu_range_free(iommu, dvma, npages);
218
219 do {
220 unsigned long num;
221
222 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
223 npages);
224 entry += num;
225 npages -= num;
226 } while (npages != 0);
227
228 spin_unlock_irqrestore(&iommu->lock, flags);
229
230 order = get_order(size); 230 order = get_order(size);
231 if (order < 10) 231 if (order < 10)
232 free_pages((unsigned long)cpu, order); 232 free_pages((unsigned long)cpu, order);
@@ -253,15 +253,13 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
253 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); 253 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
254 npages >>= IO_PAGE_SHIFT; 254 npages >>= IO_PAGE_SHIFT;
255 255
256 spin_lock_irqsave(&iommu->lock, flags); 256 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
257 entry = iommu_range_alloc(dev, iommu, npages, NULL); 257 (unsigned long)(-1), 0);
258 spin_unlock_irqrestore(&iommu->lock, flags);
259 258
260 if (unlikely(entry == DMA_ERROR_CODE)) 259 if (unlikely(entry == DMA_ERROR_CODE))
261 goto bad; 260 goto bad;
262 261
263 bus_addr = (iommu->page_table_map_base + 262 bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
264 (entry << IO_PAGE_SHIFT));
265 ret = bus_addr | (oaddr & ~IO_PAGE_MASK); 263 ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
266 base_paddr = __pa(oaddr & IO_PAGE_MASK); 264 base_paddr = __pa(oaddr & IO_PAGE_MASK);
267 prot = HV_PCI_MAP_ATTR_READ; 265 prot = HV_PCI_MAP_ATTR_READ;
@@ -290,11 +288,7 @@ bad:
290 return DMA_ERROR_CODE; 288 return DMA_ERROR_CODE;
291 289
292iommu_map_fail: 290iommu_map_fail:
293 /* Interrupts are disabled. */ 291 iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
294 spin_lock(&iommu->lock);
295 iommu_range_free(iommu, bus_addr, npages);
296 spin_unlock_irqrestore(&iommu->lock, flags);
297
298 return DMA_ERROR_CODE; 292 return DMA_ERROR_CODE;
299} 293}
300 294
@@ -304,7 +298,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
304{ 298{
305 struct pci_pbm_info *pbm; 299 struct pci_pbm_info *pbm;
306 struct iommu *iommu; 300 struct iommu *iommu;
307 unsigned long flags, npages; 301 unsigned long npages;
308 long entry; 302 long entry;
309 u32 devhandle; 303 u32 devhandle;
310 304
@@ -321,22 +315,9 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
321 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); 315 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
322 npages >>= IO_PAGE_SHIFT; 316 npages >>= IO_PAGE_SHIFT;
323 bus_addr &= IO_PAGE_MASK; 317 bus_addr &= IO_PAGE_MASK;
324 318 entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
325 spin_lock_irqsave(&iommu->lock, flags); 319 dma_4v_iommu_demap(&devhandle, entry, npages);
326 320 iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
327 iommu_range_free(iommu, bus_addr, npages);
328
329 entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
330 do {
331 unsigned long num;
332
333 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
334 npages);
335 entry += num;
336 npages -= num;
337 } while (npages != 0);
338
339 spin_unlock_irqrestore(&iommu->lock, flags);
340} 321}
341 322
342static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, 323static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -371,14 +352,14 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
371 /* Init first segment length for backout at failure */ 352 /* Init first segment length for backout at failure */
372 outs->dma_length = 0; 353 outs->dma_length = 0;
373 354
374 spin_lock_irqsave(&iommu->lock, flags); 355 local_irq_save(flags);
375 356
376 iommu_batch_start(dev, prot, ~0UL); 357 iommu_batch_start(dev, prot, ~0UL);
377 358
378 max_seg_size = dma_get_max_seg_size(dev); 359 max_seg_size = dma_get_max_seg_size(dev);
379 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 360 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
380 IO_PAGE_SIZE) >> IO_PAGE_SHIFT; 361 IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
381 base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; 362 base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
382 for_each_sg(sglist, s, nelems, i) { 363 for_each_sg(sglist, s, nelems, i) {
383 unsigned long paddr, npages, entry, out_entry = 0, slen; 364 unsigned long paddr, npages, entry, out_entry = 0, slen;
384 365
@@ -391,7 +372,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
391 /* Allocate iommu entries for that segment */ 372 /* Allocate iommu entries for that segment */
392 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); 373 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
393 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); 374 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
394 entry = iommu_range_alloc(dev, iommu, npages, &handle); 375 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
376 &handle, (unsigned long)(-1), 0);
395 377
396 /* Handle failure */ 378 /* Handle failure */
397 if (unlikely(entry == DMA_ERROR_CODE)) { 379 if (unlikely(entry == DMA_ERROR_CODE)) {
@@ -404,8 +386,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
404 iommu_batch_new_entry(entry); 386 iommu_batch_new_entry(entry);
405 387
406 /* Convert entry to a dma_addr_t */ 388 /* Convert entry to a dma_addr_t */
407 dma_addr = iommu->page_table_map_base + 389 dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
408 (entry << IO_PAGE_SHIFT);
409 dma_addr |= (s->offset & ~IO_PAGE_MASK); 390 dma_addr |= (s->offset & ~IO_PAGE_MASK);
410 391
411 /* Insert into HW table */ 392 /* Insert into HW table */
@@ -451,7 +432,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
451 if (unlikely(err < 0L)) 432 if (unlikely(err < 0L))
452 goto iommu_map_failed; 433 goto iommu_map_failed;
453 434
454 spin_unlock_irqrestore(&iommu->lock, flags); 435 local_irq_restore(flags);
455 436
456 if (outcount < incount) { 437 if (outcount < incount) {
457 outs = sg_next(outs); 438 outs = sg_next(outs);
@@ -469,7 +450,8 @@ iommu_map_failed:
469 vaddr = s->dma_address & IO_PAGE_MASK; 450 vaddr = s->dma_address & IO_PAGE_MASK;
470 npages = iommu_num_pages(s->dma_address, s->dma_length, 451 npages = iommu_num_pages(s->dma_address, s->dma_length,
471 IO_PAGE_SIZE); 452 IO_PAGE_SIZE);
472 iommu_range_free(iommu, vaddr, npages); 453 iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
454 DMA_ERROR_CODE);
473 /* XXX demap? XXX */ 455 /* XXX demap? XXX */
474 s->dma_address = DMA_ERROR_CODE; 456 s->dma_address = DMA_ERROR_CODE;
475 s->dma_length = 0; 457 s->dma_length = 0;
@@ -477,7 +459,7 @@ iommu_map_failed:
477 if (s == outs) 459 if (s == outs)
478 break; 460 break;
479 } 461 }
480 spin_unlock_irqrestore(&iommu->lock, flags); 462 local_irq_restore(flags);
481 463
482 return 0; 464 return 0;
483} 465}
@@ -489,7 +471,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
489 struct pci_pbm_info *pbm; 471 struct pci_pbm_info *pbm;
490 struct scatterlist *sg; 472 struct scatterlist *sg;
491 struct iommu *iommu; 473 struct iommu *iommu;
492 unsigned long flags; 474 unsigned long flags, entry;
493 u32 devhandle; 475 u32 devhandle;
494 476
495 BUG_ON(direction == DMA_NONE); 477 BUG_ON(direction == DMA_NONE);
@@ -498,33 +480,27 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
498 pbm = dev->archdata.host_controller; 480 pbm = dev->archdata.host_controller;
499 devhandle = pbm->devhandle; 481 devhandle = pbm->devhandle;
500 482
501 spin_lock_irqsave(&iommu->lock, flags); 483 local_irq_save(flags);
502 484
503 sg = sglist; 485 sg = sglist;
504 while (nelems--) { 486 while (nelems--) {
505 dma_addr_t dma_handle = sg->dma_address; 487 dma_addr_t dma_handle = sg->dma_address;
506 unsigned int len = sg->dma_length; 488 unsigned int len = sg->dma_length;
507 unsigned long npages, entry; 489 unsigned long npages;
490 struct iommu_map_table *tbl = &iommu->tbl;
491 unsigned long shift = IO_PAGE_SHIFT;
508 492
509 if (!len) 493 if (!len)
510 break; 494 break;
511 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); 495 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
512 iommu_range_free(iommu, dma_handle, npages); 496 entry = ((dma_handle - tbl->table_map_base) >> shift);
513 497 dma_4v_iommu_demap(&devhandle, entry, npages);
514 entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 498 iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
515 while (npages) { 499 DMA_ERROR_CODE);
516 unsigned long num;
517
518 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
519 npages);
520 entry += num;
521 npages -= num;
522 }
523
524 sg = sg_next(sg); 500 sg = sg_next(sg);
525 } 501 }
526 502
527 spin_unlock_irqrestore(&iommu->lock, flags); 503 local_irq_restore(flags);
528} 504}
529 505
530static struct dma_map_ops sun4v_dma_ops = { 506static struct dma_map_ops sun4v_dma_ops = {
@@ -550,30 +526,33 @@ static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
550} 526}
551 527
552static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, 528static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
553 struct iommu *iommu) 529 struct iommu_map_table *iommu)
554{ 530{
555 struct iommu_arena *arena = &iommu->arena; 531 struct iommu_pool *pool;
556 unsigned long i, cnt = 0; 532 unsigned long i, pool_nr, cnt = 0;
557 u32 devhandle; 533 u32 devhandle;
558 534
559 devhandle = pbm->devhandle; 535 devhandle = pbm->devhandle;
560 for (i = 0; i < arena->limit; i++) { 536 for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) {
561 unsigned long ret, io_attrs, ra; 537 pool = &(iommu->pools[pool_nr]);
562 538 for (i = pool->start; i <= pool->end; i++) {
563 ret = pci_sun4v_iommu_getmap(devhandle, 539 unsigned long ret, io_attrs, ra;
564 HV_PCI_TSBID(0, i), 540
565 &io_attrs, &ra); 541 ret = pci_sun4v_iommu_getmap(devhandle,
566 if (ret == HV_EOK) { 542 HV_PCI_TSBID(0, i),
567 if (page_in_phys_avail(ra)) { 543 &io_attrs, &ra);
568 pci_sun4v_iommu_demap(devhandle, 544 if (ret == HV_EOK) {
569 HV_PCI_TSBID(0, i), 1); 545 if (page_in_phys_avail(ra)) {
570 } else { 546 pci_sun4v_iommu_demap(devhandle,
571 cnt++; 547 HV_PCI_TSBID(0,
572 __set_bit(i, arena->map); 548 i), 1);
549 } else {
550 cnt++;
551 __set_bit(i, iommu->map);
552 }
573 } 553 }
574 } 554 }
575 } 555 }
576
577 return cnt; 556 return cnt;
578} 557}
579 558
@@ -603,20 +582,22 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
603 /* Setup initial software IOMMU state. */ 582 /* Setup initial software IOMMU state. */
604 spin_lock_init(&iommu->lock); 583 spin_lock_init(&iommu->lock);
605 iommu->ctx_lowest_free = 1; 584 iommu->ctx_lowest_free = 1;
606 iommu->page_table_map_base = dma_offset; 585 iommu->tbl.table_map_base = dma_offset;
607 iommu->dma_addr_mask = dma_mask; 586 iommu->dma_addr_mask = dma_mask;
608 587
609 /* Allocate and initialize the free area map. */ 588 /* Allocate and initialize the free area map. */
610 sz = (num_tsb_entries + 7) / 8; 589 sz = (num_tsb_entries + 7) / 8;
611 sz = (sz + 7UL) & ~7UL; 590 sz = (sz + 7UL) & ~7UL;
612 iommu->arena.map = kzalloc(sz, GFP_KERNEL); 591 iommu->tbl.map = kzalloc(sz, GFP_KERNEL);
613 if (!iommu->arena.map) { 592 if (!iommu->tbl.map) {
614 printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n"); 593 printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
615 return -ENOMEM; 594 return -ENOMEM;
616 } 595 }
617 iommu->arena.limit = num_tsb_entries; 596 iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
618 597 NULL, false /* no large_pool */,
619 sz = probe_existing_entries(pbm, iommu); 598 0 /* default npools */,
599 false /* want span boundary checking */);
600 sz = probe_existing_entries(pbm, &iommu->tbl);
620 if (sz) 601 if (sz)
621 printk("%s: Imported %lu TSB entries from OBP\n", 602 printk("%s: Imported %lu TSB entries from OBP\n",
622 pbm->name, sz); 603 pbm->name, sz);