aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/kernel/pci_sun4v.c
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2015-03-12 20:02:36 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-16 15:44:56 -0400
commitf1600e549b948a32ad7672e069b2915314637ae3 (patch)
tree1386e80c8654c0a88b80c56f605a2705e0f4a115 /arch/sparc/kernel/pci_sun4v.c
parent10b88a4b17d31a7409494b179dcb76e7ab2fcaea (diff)
sparc: Make sparc64 use scalable lib/iommu-common.c functions
In iperf experiments running linux as the Tx side (TCP client) with 10 threads results in a severe performance drop when TSO is disabled, indicating a weakness in the software that can be avoided by using the scalable IOMMU arena DMA allocation. Baseline numbers before this patch: with default settings (TSO enabled) : 9-9.5 Gbps Disable TSO using ethtool- drops badly: 2-3 Gbps. After this patch, iperf client with 10 threads, can give a throughput of at least 8.5 Gbps, even when TSO is disabled. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/kernel/pci_sun4v.c')
-rw-r--r--arch/sparc/kernel/pci_sun4v.c193
1 files changed, 94 insertions, 99 deletions
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 47ddbd496a1e..9b76b9d639e1 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -15,6 +15,8 @@
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/log2.h> 16#include <linux/log2.h>
17#include <linux/of_device.h> 17#include <linux/of_device.h>
18#include <linux/hash.h>
19#include <linux/iommu-common.h>
18 20
19#include <asm/iommu.h> 21#include <asm/iommu.h>
20#include <asm/irq.h> 22#include <asm/irq.h>
@@ -28,6 +30,7 @@
28 30
29#define DRIVER_NAME "pci_sun4v" 31#define DRIVER_NAME "pci_sun4v"
30#define PFX DRIVER_NAME ": " 32#define PFX DRIVER_NAME ": "
33static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
31 34
32static unsigned long vpci_major = 1; 35static unsigned long vpci_major = 1;
33static unsigned long vpci_minor = 1; 36static unsigned long vpci_minor = 1;
@@ -155,14 +158,13 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
155 158
156 iommu = dev->archdata.iommu; 159 iommu = dev->archdata.iommu;
157 160
158 spin_lock_irqsave(&iommu->lock, flags); 161 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
159 entry = iommu_range_alloc(dev, iommu, npages, NULL); 162 __this_cpu_read(iommu_pool_hash));
160 spin_unlock_irqrestore(&iommu->lock, flags);
161 163
162 if (unlikely(entry == DMA_ERROR_CODE)) 164 if (unlikely(entry == DMA_ERROR_CODE))
163 goto range_alloc_fail; 165 goto range_alloc_fail;
164 166
165 *dma_addrp = (iommu->page_table_map_base + 167 *dma_addrp = (iommu->tbl.page_table_map_base +
166 (entry << IO_PAGE_SHIFT)); 168 (entry << IO_PAGE_SHIFT));
167 ret = (void *) first_page; 169 ret = (void *) first_page;
168 first_page = __pa(first_page); 170 first_page = __pa(first_page);
@@ -188,45 +190,46 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
188 return ret; 190 return ret;
189 191
190iommu_map_fail: 192iommu_map_fail:
191 /* Interrupts are disabled. */ 193 iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, false, NULL);
192 spin_lock(&iommu->lock);
193 iommu_range_free(iommu, *dma_addrp, npages);
194 spin_unlock_irqrestore(&iommu->lock, flags);
195 194
196range_alloc_fail: 195range_alloc_fail:
197 free_pages(first_page, order); 196 free_pages(first_page, order);
198 return NULL; 197 return NULL;
199} 198}
200 199
200static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
201 unsigned long npages)
202{
203 u32 devhandle = *(u32 *)demap_arg;
204 unsigned long num, flags;
205
206 local_irq_save(flags);
207 do {
208 num = pci_sun4v_iommu_demap(devhandle,
209 HV_PCI_TSBID(0, entry),
210 npages);
211
212 entry += num;
213 npages -= num;
214 } while (npages != 0);
215 local_irq_restore(flags);
216}
217
201static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, 218static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
202 dma_addr_t dvma, struct dma_attrs *attrs) 219 dma_addr_t dvma, struct dma_attrs *attrs)
203{ 220{
204 struct pci_pbm_info *pbm; 221 struct pci_pbm_info *pbm;
205 struct iommu *iommu; 222 struct iommu *iommu;
206 unsigned long flags, order, npages, entry; 223 unsigned long order, npages, entry;
207 u32 devhandle; 224 u32 devhandle;
208 225
209 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 226 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
210 iommu = dev->archdata.iommu; 227 iommu = dev->archdata.iommu;
211 pbm = dev->archdata.host_controller; 228 pbm = dev->archdata.host_controller;
212 devhandle = pbm->devhandle; 229 devhandle = pbm->devhandle;
213 entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 230 entry = ((dvma - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT);
214 231 dma_4v_iommu_demap(&devhandle, entry, npages);
215 spin_lock_irqsave(&iommu->lock, flags); 232 iommu_tbl_range_free(&iommu->tbl, dvma, npages, false, NULL);
216
217 iommu_range_free(iommu, dvma, npages);
218
219 do {
220 unsigned long num;
221
222 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
223 npages);
224 entry += num;
225 npages -= num;
226 } while (npages != 0);
227
228 spin_unlock_irqrestore(&iommu->lock, flags);
229
230 order = get_order(size); 233 order = get_order(size);
231 if (order < 10) 234 if (order < 10)
232 free_pages((unsigned long)cpu, order); 235 free_pages((unsigned long)cpu, order);
@@ -253,14 +256,13 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
253 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); 256 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
254 npages >>= IO_PAGE_SHIFT; 257 npages >>= IO_PAGE_SHIFT;
255 258
256 spin_lock_irqsave(&iommu->lock, flags); 259 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
257 entry = iommu_range_alloc(dev, iommu, npages, NULL); 260 __this_cpu_read(iommu_pool_hash));
258 spin_unlock_irqrestore(&iommu->lock, flags);
259 261
260 if (unlikely(entry == DMA_ERROR_CODE)) 262 if (unlikely(entry == DMA_ERROR_CODE))
261 goto bad; 263 goto bad;
262 264
263 bus_addr = (iommu->page_table_map_base + 265 bus_addr = (iommu->tbl.page_table_map_base +
264 (entry << IO_PAGE_SHIFT)); 266 (entry << IO_PAGE_SHIFT));
265 ret = bus_addr | (oaddr & ~IO_PAGE_MASK); 267 ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
266 base_paddr = __pa(oaddr & IO_PAGE_MASK); 268 base_paddr = __pa(oaddr & IO_PAGE_MASK);
@@ -290,11 +292,7 @@ bad:
290 return DMA_ERROR_CODE; 292 return DMA_ERROR_CODE;
291 293
292iommu_map_fail: 294iommu_map_fail:
293 /* Interrupts are disabled. */ 295 iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, false, NULL);
294 spin_lock(&iommu->lock);
295 iommu_range_free(iommu, bus_addr, npages);
296 spin_unlock_irqrestore(&iommu->lock, flags);
297
298 return DMA_ERROR_CODE; 296 return DMA_ERROR_CODE;
299} 297}
300 298
@@ -304,7 +302,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
304{ 302{
305 struct pci_pbm_info *pbm; 303 struct pci_pbm_info *pbm;
306 struct iommu *iommu; 304 struct iommu *iommu;
307 unsigned long flags, npages; 305 unsigned long npages;
308 long entry; 306 long entry;
309 u32 devhandle; 307 u32 devhandle;
310 308
@@ -321,22 +319,9 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
321 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); 319 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
322 npages >>= IO_PAGE_SHIFT; 320 npages >>= IO_PAGE_SHIFT;
323 bus_addr &= IO_PAGE_MASK; 321 bus_addr &= IO_PAGE_MASK;
324 322 entry = (bus_addr - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT;
325 spin_lock_irqsave(&iommu->lock, flags); 323 dma_4v_iommu_demap(&devhandle, entry, npages);
326 324 iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, false, NULL);
327 iommu_range_free(iommu, bus_addr, npages);
328
329 entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
330 do {
331 unsigned long num;
332
333 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
334 npages);
335 entry += num;
336 npages -= num;
337 } while (npages != 0);
338
339 spin_unlock_irqrestore(&iommu->lock, flags);
340} 325}
341 326
342static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, 327static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -371,14 +356,14 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
371 /* Init first segment length for backout at failure */ 356 /* Init first segment length for backout at failure */
372 outs->dma_length = 0; 357 outs->dma_length = 0;
373 358
374 spin_lock_irqsave(&iommu->lock, flags); 359 local_irq_save(flags);
375 360
376 iommu_batch_start(dev, prot, ~0UL); 361 iommu_batch_start(dev, prot, ~0UL);
377 362
378 max_seg_size = dma_get_max_seg_size(dev); 363 max_seg_size = dma_get_max_seg_size(dev);
379 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 364 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
380 IO_PAGE_SIZE) >> IO_PAGE_SHIFT; 365 IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
381 base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; 366 base_shift = iommu->tbl.page_table_map_base >> IO_PAGE_SHIFT;
382 for_each_sg(sglist, s, nelems, i) { 367 for_each_sg(sglist, s, nelems, i) {
383 unsigned long paddr, npages, entry, out_entry = 0, slen; 368 unsigned long paddr, npages, entry, out_entry = 0, slen;
384 369
@@ -391,7 +376,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
391 /* Allocate iommu entries for that segment */ 376 /* Allocate iommu entries for that segment */
392 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); 377 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
393 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); 378 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
394 entry = iommu_range_alloc(dev, iommu, npages, &handle); 379 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, &handle,
380 __this_cpu_read(iommu_pool_hash));
395 381
396 /* Handle failure */ 382 /* Handle failure */
397 if (unlikely(entry == DMA_ERROR_CODE)) { 383 if (unlikely(entry == DMA_ERROR_CODE)) {
@@ -404,7 +390,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
404 iommu_batch_new_entry(entry); 390 iommu_batch_new_entry(entry);
405 391
406 /* Convert entry to a dma_addr_t */ 392 /* Convert entry to a dma_addr_t */
407 dma_addr = iommu->page_table_map_base + 393 dma_addr = iommu->tbl.page_table_map_base +
408 (entry << IO_PAGE_SHIFT); 394 (entry << IO_PAGE_SHIFT);
409 dma_addr |= (s->offset & ~IO_PAGE_MASK); 395 dma_addr |= (s->offset & ~IO_PAGE_MASK);
410 396
@@ -451,7 +437,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
451 if (unlikely(err < 0L)) 437 if (unlikely(err < 0L))
452 goto iommu_map_failed; 438 goto iommu_map_failed;
453 439
454 spin_unlock_irqrestore(&iommu->lock, flags); 440 local_irq_restore(flags);
455 441
456 if (outcount < incount) { 442 if (outcount < incount) {
457 outs = sg_next(outs); 443 outs = sg_next(outs);
@@ -469,7 +455,8 @@ iommu_map_failed:
469 vaddr = s->dma_address & IO_PAGE_MASK; 455 vaddr = s->dma_address & IO_PAGE_MASK;
470 npages = iommu_num_pages(s->dma_address, s->dma_length, 456 npages = iommu_num_pages(s->dma_address, s->dma_length,
471 IO_PAGE_SIZE); 457 IO_PAGE_SIZE);
472 iommu_range_free(iommu, vaddr, npages); 458 iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
459 false, NULL);
473 /* XXX demap? XXX */ 460 /* XXX demap? XXX */
474 s->dma_address = DMA_ERROR_CODE; 461 s->dma_address = DMA_ERROR_CODE;
475 s->dma_length = 0; 462 s->dma_length = 0;
@@ -477,7 +464,7 @@ iommu_map_failed:
477 if (s == outs) 464 if (s == outs)
478 break; 465 break;
479 } 466 }
480 spin_unlock_irqrestore(&iommu->lock, flags); 467 local_irq_restore(flags);
481 468
482 return 0; 469 return 0;
483} 470}
@@ -489,7 +476,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
489 struct pci_pbm_info *pbm; 476 struct pci_pbm_info *pbm;
490 struct scatterlist *sg; 477 struct scatterlist *sg;
491 struct iommu *iommu; 478 struct iommu *iommu;
492 unsigned long flags; 479 unsigned long flags, entry;
493 u32 devhandle; 480 u32 devhandle;
494 481
495 BUG_ON(direction == DMA_NONE); 482 BUG_ON(direction == DMA_NONE);
@@ -498,33 +485,27 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
498 pbm = dev->archdata.host_controller; 485 pbm = dev->archdata.host_controller;
499 devhandle = pbm->devhandle; 486 devhandle = pbm->devhandle;
500 487
501 spin_lock_irqsave(&iommu->lock, flags); 488 local_irq_save(flags);
502 489
503 sg = sglist; 490 sg = sglist;
504 while (nelems--) { 491 while (nelems--) {
505 dma_addr_t dma_handle = sg->dma_address; 492 dma_addr_t dma_handle = sg->dma_address;
506 unsigned int len = sg->dma_length; 493 unsigned int len = sg->dma_length;
507 unsigned long npages, entry; 494 unsigned long npages;
495 struct iommu_table *tbl = &iommu->tbl;
496 unsigned long shift = IO_PAGE_SHIFT;
508 497
509 if (!len) 498 if (!len)
510 break; 499 break;
511 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); 500 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
512 iommu_range_free(iommu, dma_handle, npages); 501 entry = ((dma_handle - tbl->page_table_map_base) >> shift);
513 502 dma_4v_iommu_demap(&devhandle, entry, npages);
514 entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 503 iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
515 while (npages) { 504 false, NULL);
516 unsigned long num;
517
518 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
519 npages);
520 entry += num;
521 npages -= num;
522 }
523
524 sg = sg_next(sg); 505 sg = sg_next(sg);
525 } 506 }
526 507
527 spin_unlock_irqrestore(&iommu->lock, flags); 508 local_irq_restore(flags);
528} 509}
529 510
530static struct dma_map_ops sun4v_dma_ops = { 511static struct dma_map_ops sun4v_dma_ops = {
@@ -536,6 +517,8 @@ static struct dma_map_ops sun4v_dma_ops = {
536 .unmap_sg = dma_4v_unmap_sg, 517 .unmap_sg = dma_4v_unmap_sg,
537}; 518};
538 519
520static struct iommu_tbl_ops dma_4v_iommu_ops;
521
539static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent) 522static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
540{ 523{
541 struct property *prop; 524 struct property *prop;
@@ -550,30 +533,33 @@ static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
550} 533}
551 534
552static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, 535static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
553 struct iommu *iommu) 536 struct iommu_table *iommu)
554{ 537{
555 struct iommu_arena *arena = &iommu->arena; 538 struct iommu_pool *pool;
556 unsigned long i, cnt = 0; 539 unsigned long i, pool_nr, cnt = 0;
557 u32 devhandle; 540 u32 devhandle;
558 541
559 devhandle = pbm->devhandle; 542 devhandle = pbm->devhandle;
560 for (i = 0; i < arena->limit; i++) { 543 for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) {
561 unsigned long ret, io_attrs, ra; 544 pool = &(iommu->arena_pool[pool_nr]);
562 545 for (i = pool->start; i <= pool->end; i++) {
563 ret = pci_sun4v_iommu_getmap(devhandle, 546 unsigned long ret, io_attrs, ra;
564 HV_PCI_TSBID(0, i), 547
565 &io_attrs, &ra); 548 ret = pci_sun4v_iommu_getmap(devhandle,
566 if (ret == HV_EOK) { 549 HV_PCI_TSBID(0, i),
567 if (page_in_phys_avail(ra)) { 550 &io_attrs, &ra);
568 pci_sun4v_iommu_demap(devhandle, 551 if (ret == HV_EOK) {
569 HV_PCI_TSBID(0, i), 1); 552 if (page_in_phys_avail(ra)) {
570 } else { 553 pci_sun4v_iommu_demap(devhandle,
571 cnt++; 554 HV_PCI_TSBID(0,
572 __set_bit(i, arena->map); 555 i), 1);
556 } else {
557 cnt++;
558 __set_bit(i, iommu->map);
559 }
573 } 560 }
574 } 561 }
575 } 562 }
576
577 return cnt; 563 return cnt;
578} 564}
579 565
@@ -601,22 +587,22 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
601 dma_offset = vdma[0]; 587 dma_offset = vdma[0];
602 588
603 /* Setup initial software IOMMU state. */ 589 /* Setup initial software IOMMU state. */
604 spin_lock_init(&iommu->lock);
605 iommu->ctx_lowest_free = 1; 590 iommu->ctx_lowest_free = 1;
606 iommu->page_table_map_base = dma_offset; 591 iommu->tbl.page_table_map_base = dma_offset;
607 iommu->dma_addr_mask = dma_mask; 592 iommu->dma_addr_mask = dma_mask;
608 593
609 /* Allocate and initialize the free area map. */ 594 /* Allocate and initialize the free area map. */
610 sz = (num_tsb_entries + 7) / 8; 595 sz = (num_tsb_entries + 7) / 8;
611 sz = (sz + 7UL) & ~7UL; 596 sz = (sz + 7UL) & ~7UL;
612 iommu->arena.map = kzalloc(sz, GFP_KERNEL); 597 iommu->tbl.map = kzalloc(sz, GFP_KERNEL);
613 if (!iommu->arena.map) { 598 if (!iommu->tbl.map) {
614 printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n"); 599 printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
615 return -ENOMEM; 600 return -ENOMEM;
616 } 601 }
617 iommu->arena.limit = num_tsb_entries; 602 iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
618 603 &dma_4v_iommu_ops, false /* no large_pool */,
619 sz = probe_existing_entries(pbm, iommu); 604 0 /* default npools */);
605 sz = probe_existing_entries(pbm, &iommu->tbl);
620 if (sz) 606 if (sz)
621 printk("%s: Imported %lu TSB entries from OBP\n", 607 printk("%s: Imported %lu TSB entries from OBP\n",
622 pbm->name, sz); 608 pbm->name, sz);
@@ -1015,8 +1001,17 @@ static struct platform_driver pci_sun4v_driver = {
1015 .probe = pci_sun4v_probe, 1001 .probe = pci_sun4v_probe,
1016}; 1002};
1017 1003
1004static void setup_iommu_pool_hash(void)
1005{
1006 unsigned int i;
1007
1008 for_each_possible_cpu(i)
1009 per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
1010}
1011
1018static int __init pci_sun4v_init(void) 1012static int __init pci_sun4v_init(void)
1019{ 1013{
1014 setup_iommu_pool_hash();
1020 return platform_driver_register(&pci_sun4v_driver); 1015 return platform_driver_register(&pci_sun4v_driver);
1021} 1016}
1022 1017