aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/cxgb3/iwch_provider.c
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2008-05-06 18:56:22 -0400
committerRoland Dreier <rolandd@cisco.com>2008-05-06 18:56:22 -0400
commit273748cc908a901d082b4da5a16b2541c9d78a02 (patch)
treeb573f1f3f583acf2309a10e86f99a1842a8ccdc5 /drivers/infiniband/hw/cxgb3/iwch_provider.c
parent0e9913362a967377eb886bbdf305ec58aa07a878 (diff)
RDMA/cxgb3: Fix severe limit on userspace memory registration size
Currently, iw_cxgb3 is severely limited on the amount of userspace memory that can be registered in in a single memory region, which causes big problems for applications that expect to be able to register 100s of MB. The problem is that the driver uses a single kmalloc()ed buffer to hold the physical buffer list (PBL) for the entire memory region during registration, which means that 8 bytes of contiguous memory are required for each page of memory being registered. For example, a 64 MB registration will require 128 KB of contiguous memory with 4 KB pages, and it unlikely that such an allocation will succeed on a busy system. This is purely a driver problem: the temporary page list buffer is not needed by the hardware, so we can fix this by writing the PBL to the hardware in page-sized chunks rather than all at once. We do this by splitting the memory registration operation up into several steps: - Allocate PBL space in adapter memory for the full registration - Copy PBL to adapter memory in chunks - Allocate STag and enable memory region This also allows several other cleanups to the __cxio_tpt_op() interface and related parts of the driver. This change leaves the reregister memory region and memory window operations broken, but they already didn't work due to other longstanding bugs, so fixing them will be left to a later patch. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb3/iwch_provider.c')
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c68
1 files changed, 53 insertions, 15 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index d07d3a377b5f..8934178a23ee 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -442,6 +442,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
442 mmid = mhp->attr.stag >> 8; 442 mmid = mhp->attr.stag >> 8;
443 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 443 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
444 mhp->attr.pbl_addr); 444 mhp->attr.pbl_addr);
445 iwch_free_pbl(mhp);
445 remove_handle(rhp, &rhp->mmidr, mmid); 446 remove_handle(rhp, &rhp->mmidr, mmid);
446 if (mhp->kva) 447 if (mhp->kva)
447 kfree((void *) (unsigned long) mhp->kva); 448 kfree((void *) (unsigned long) mhp->kva);
@@ -475,6 +476,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
475 if (!mhp) 476 if (!mhp)
476 return ERR_PTR(-ENOMEM); 477 return ERR_PTR(-ENOMEM);
477 478
479 mhp->rhp = rhp;
480
478 /* First check that we have enough alignment */ 481 /* First check that we have enough alignment */
479 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { 482 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
480 ret = -EINVAL; 483 ret = -EINVAL;
@@ -492,7 +495,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
492 if (ret) 495 if (ret)
493 goto err; 496 goto err;
494 497
495 mhp->rhp = rhp; 498 ret = iwch_alloc_pbl(mhp, npages);
499 if (ret) {
500 kfree(page_list);
501 goto err_pbl;
502 }
503
504 ret = iwch_write_pbl(mhp, page_list, npages, 0);
505 kfree(page_list);
506 if (ret)
507 goto err_pbl;
508
496 mhp->attr.pdid = php->pdid; 509 mhp->attr.pdid = php->pdid;
497 mhp->attr.zbva = 0; 510 mhp->attr.zbva = 0;
498 511
@@ -502,12 +515,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
502 515
503 mhp->attr.len = (u32) total_size; 516 mhp->attr.len = (u32) total_size;
504 mhp->attr.pbl_size = npages; 517 mhp->attr.pbl_size = npages;
505 ret = iwch_register_mem(rhp, php, mhp, shift, page_list); 518 ret = iwch_register_mem(rhp, php, mhp, shift);
506 kfree(page_list); 519 if (ret)
507 if (ret) { 520 goto err_pbl;
508 goto err; 521
509 }
510 return &mhp->ibmr; 522 return &mhp->ibmr;
523
524err_pbl:
525 iwch_free_pbl(mhp);
526
511err: 527err:
512 kfree(mhp); 528 kfree(mhp);
513 return ERR_PTR(ret); 529 return ERR_PTR(ret);
@@ -560,7 +576,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
560 return ret; 576 return ret;
561 } 577 }
562 578
563 ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); 579 ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
564 kfree(page_list); 580 kfree(page_list);
565 if (ret) { 581 if (ret) {
566 return ret; 582 return ret;
@@ -602,6 +618,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
602 if (!mhp) 618 if (!mhp)
603 return ERR_PTR(-ENOMEM); 619 return ERR_PTR(-ENOMEM);
604 620
621 mhp->rhp = rhp;
622
605 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); 623 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
606 if (IS_ERR(mhp->umem)) { 624 if (IS_ERR(mhp->umem)) {
607 err = PTR_ERR(mhp->umem); 625 err = PTR_ERR(mhp->umem);
@@ -615,10 +633,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
615 list_for_each_entry(chunk, &mhp->umem->chunk_list, list) 633 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
616 n += chunk->nents; 634 n += chunk->nents;
617 635
618 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 636 err = iwch_alloc_pbl(mhp, n);
637 if (err)
638 goto err;
639
640 pages = (__be64 *) __get_free_page(GFP_KERNEL);
619 if (!pages) { 641 if (!pages) {
620 err = -ENOMEM; 642 err = -ENOMEM;
621 goto err; 643 goto err_pbl;
622 } 644 }
623 645
624 i = n = 0; 646 i = n = 0;
@@ -630,25 +652,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
630 pages[i++] = cpu_to_be64(sg_dma_address( 652 pages[i++] = cpu_to_be64(sg_dma_address(
631 &chunk->page_list[j]) + 653 &chunk->page_list[j]) +
632 mhp->umem->page_size * k); 654 mhp->umem->page_size * k);
655 if (i == PAGE_SIZE / sizeof *pages) {
656 err = iwch_write_pbl(mhp, pages, i, n);
657 if (err)
658 goto pbl_done;
659 n += i;
660 i = 0;
661 }
633 } 662 }
634 } 663 }
635 664
636 mhp->rhp = rhp; 665 if (i)
666 err = iwch_write_pbl(mhp, pages, i, n);
667
668pbl_done:
669 free_page((unsigned long) pages);
670 if (err)
671 goto err_pbl;
672
637 mhp->attr.pdid = php->pdid; 673 mhp->attr.pdid = php->pdid;
638 mhp->attr.zbva = 0; 674 mhp->attr.zbva = 0;
639 mhp->attr.perms = iwch_ib_to_tpt_access(acc); 675 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
640 mhp->attr.va_fbo = virt; 676 mhp->attr.va_fbo = virt;
641 mhp->attr.page_size = shift - 12; 677 mhp->attr.page_size = shift - 12;
642 mhp->attr.len = (u32) length; 678 mhp->attr.len = (u32) length;
643 mhp->attr.pbl_size = i; 679
644 err = iwch_register_mem(rhp, php, mhp, shift, pages); 680 err = iwch_register_mem(rhp, php, mhp, shift);
645 kfree(pages);
646 if (err) 681 if (err)
647 goto err; 682 goto err_pbl;
648 683
649 if (udata && !t3a_device(rhp)) { 684 if (udata && !t3a_device(rhp)) {
650 uresp.pbl_addr = (mhp->attr.pbl_addr - 685 uresp.pbl_addr = (mhp->attr.pbl_addr -
651 rhp->rdev.rnic_info.pbl_base) >> 3; 686 rhp->rdev.rnic_info.pbl_base) >> 3;
652 PDBG("%s user resp pbl_addr 0x%x\n", __func__, 687 PDBG("%s user resp pbl_addr 0x%x\n", __func__,
653 uresp.pbl_addr); 688 uresp.pbl_addr);
654 689
@@ -661,6 +696,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
661 696
662 return &mhp->ibmr; 697 return &mhp->ibmr;
663 698
699err_pbl:
700 iwch_free_pbl(mhp);
701
664err: 702err:
665 ib_umem_release(mhp->umem); 703 ib_umem_release(mhp->umem);
666 kfree(mhp); 704 kfree(mhp);