diff options
author | Roland Dreier <rolandd@cisco.com> | 2008-05-06 18:56:22 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2008-05-06 18:56:22 -0400 |
commit | 273748cc908a901d082b4da5a16b2541c9d78a02 (patch) | |
tree | b573f1f3f583acf2309a10e86f99a1842a8ccdc5 /drivers/infiniband/hw/cxgb3/iwch_provider.c | |
parent | 0e9913362a967377eb886bbdf305ec58aa07a878 (diff) |
RDMA/cxgb3: Fix severe limit on userspace memory registration size
Currently, iw_cxgb3 is severely limited on the amount of userspace
memory that can be registered in in a single memory region, which
causes big problems for applications that expect to be able to
register 100s of MB.
The problem is that the driver uses a single kmalloc()ed buffer to
hold the physical buffer list (PBL) for the entire memory region
during registration, which means that 8 bytes of contiguous memory are
required for each page of memory being registered. For example, a 64
MB registration will require 128 KB of contiguous memory with 4 KB
pages, and it unlikely that such an allocation will succeed on a busy
system.
This is purely a driver problem: the temporary page list buffer is not
needed by the hardware, so we can fix this by writing the PBL to the
hardware in page-sized chunks rather than all at once. We do this by
splitting the memory registration operation up into several steps:
- Allocate PBL space in adapter memory for the full registration
- Copy PBL to adapter memory in chunks
- Allocate STag and enable memory region
This also allows several other cleanups to the __cxio_tpt_op()
interface and related parts of the driver.
This change leaves the reregister memory region and memory window
operations broken, but they already didn't work due to other
longstanding bugs, so fixing them will be left to a later patch.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb3/iwch_provider.c')
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_provider.c | 68 |
1 files changed, 53 insertions, 15 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index d07d3a377b5f..8934178a23ee 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c | |||
@@ -442,6 +442,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) | |||
442 | mmid = mhp->attr.stag >> 8; | 442 | mmid = mhp->attr.stag >> 8; |
443 | cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, | 443 | cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, |
444 | mhp->attr.pbl_addr); | 444 | mhp->attr.pbl_addr); |
445 | iwch_free_pbl(mhp); | ||
445 | remove_handle(rhp, &rhp->mmidr, mmid); | 446 | remove_handle(rhp, &rhp->mmidr, mmid); |
446 | if (mhp->kva) | 447 | if (mhp->kva) |
447 | kfree((void *) (unsigned long) mhp->kva); | 448 | kfree((void *) (unsigned long) mhp->kva); |
@@ -475,6 +476,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, | |||
475 | if (!mhp) | 476 | if (!mhp) |
476 | return ERR_PTR(-ENOMEM); | 477 | return ERR_PTR(-ENOMEM); |
477 | 478 | ||
479 | mhp->rhp = rhp; | ||
480 | |||
478 | /* First check that we have enough alignment */ | 481 | /* First check that we have enough alignment */ |
479 | if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { | 482 | if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { |
480 | ret = -EINVAL; | 483 | ret = -EINVAL; |
@@ -492,7 +495,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, | |||
492 | if (ret) | 495 | if (ret) |
493 | goto err; | 496 | goto err; |
494 | 497 | ||
495 | mhp->rhp = rhp; | 498 | ret = iwch_alloc_pbl(mhp, npages); |
499 | if (ret) { | ||
500 | kfree(page_list); | ||
501 | goto err_pbl; | ||
502 | } | ||
503 | |||
504 | ret = iwch_write_pbl(mhp, page_list, npages, 0); | ||
505 | kfree(page_list); | ||
506 | if (ret) | ||
507 | goto err_pbl; | ||
508 | |||
496 | mhp->attr.pdid = php->pdid; | 509 | mhp->attr.pdid = php->pdid; |
497 | mhp->attr.zbva = 0; | 510 | mhp->attr.zbva = 0; |
498 | 511 | ||
@@ -502,12 +515,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, | |||
502 | 515 | ||
503 | mhp->attr.len = (u32) total_size; | 516 | mhp->attr.len = (u32) total_size; |
504 | mhp->attr.pbl_size = npages; | 517 | mhp->attr.pbl_size = npages; |
505 | ret = iwch_register_mem(rhp, php, mhp, shift, page_list); | 518 | ret = iwch_register_mem(rhp, php, mhp, shift); |
506 | kfree(page_list); | 519 | if (ret) |
507 | if (ret) { | 520 | goto err_pbl; |
508 | goto err; | 521 | |
509 | } | ||
510 | return &mhp->ibmr; | 522 | return &mhp->ibmr; |
523 | |||
524 | err_pbl: | ||
525 | iwch_free_pbl(mhp); | ||
526 | |||
511 | err: | 527 | err: |
512 | kfree(mhp); | 528 | kfree(mhp); |
513 | return ERR_PTR(ret); | 529 | return ERR_PTR(ret); |
@@ -560,7 +576,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, | |||
560 | return ret; | 576 | return ret; |
561 | } | 577 | } |
562 | 578 | ||
563 | ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); | 579 | ret = iwch_reregister_mem(rhp, php, &mh, shift, npages); |
564 | kfree(page_list); | 580 | kfree(page_list); |
565 | if (ret) { | 581 | if (ret) { |
566 | return ret; | 582 | return ret; |
@@ -602,6 +618,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
602 | if (!mhp) | 618 | if (!mhp) |
603 | return ERR_PTR(-ENOMEM); | 619 | return ERR_PTR(-ENOMEM); |
604 | 620 | ||
621 | mhp->rhp = rhp; | ||
622 | |||
605 | mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); | 623 | mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); |
606 | if (IS_ERR(mhp->umem)) { | 624 | if (IS_ERR(mhp->umem)) { |
607 | err = PTR_ERR(mhp->umem); | 625 | err = PTR_ERR(mhp->umem); |
@@ -615,10 +633,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
615 | list_for_each_entry(chunk, &mhp->umem->chunk_list, list) | 633 | list_for_each_entry(chunk, &mhp->umem->chunk_list, list) |
616 | n += chunk->nents; | 634 | n += chunk->nents; |
617 | 635 | ||
618 | pages = kmalloc(n * sizeof(u64), GFP_KERNEL); | 636 | err = iwch_alloc_pbl(mhp, n); |
637 | if (err) | ||
638 | goto err; | ||
639 | |||
640 | pages = (__be64 *) __get_free_page(GFP_KERNEL); | ||
619 | if (!pages) { | 641 | if (!pages) { |
620 | err = -ENOMEM; | 642 | err = -ENOMEM; |
621 | goto err; | 643 | goto err_pbl; |
622 | } | 644 | } |
623 | 645 | ||
624 | i = n = 0; | 646 | i = n = 0; |
@@ -630,25 +652,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
630 | pages[i++] = cpu_to_be64(sg_dma_address( | 652 | pages[i++] = cpu_to_be64(sg_dma_address( |
631 | &chunk->page_list[j]) + | 653 | &chunk->page_list[j]) + |
632 | mhp->umem->page_size * k); | 654 | mhp->umem->page_size * k); |
655 | if (i == PAGE_SIZE / sizeof *pages) { | ||
656 | err = iwch_write_pbl(mhp, pages, i, n); | ||
657 | if (err) | ||
658 | goto pbl_done; | ||
659 | n += i; | ||
660 | i = 0; | ||
661 | } | ||
633 | } | 662 | } |
634 | } | 663 | } |
635 | 664 | ||
636 | mhp->rhp = rhp; | 665 | if (i) |
666 | err = iwch_write_pbl(mhp, pages, i, n); | ||
667 | |||
668 | pbl_done: | ||
669 | free_page((unsigned long) pages); | ||
670 | if (err) | ||
671 | goto err_pbl; | ||
672 | |||
637 | mhp->attr.pdid = php->pdid; | 673 | mhp->attr.pdid = php->pdid; |
638 | mhp->attr.zbva = 0; | 674 | mhp->attr.zbva = 0; |
639 | mhp->attr.perms = iwch_ib_to_tpt_access(acc); | 675 | mhp->attr.perms = iwch_ib_to_tpt_access(acc); |
640 | mhp->attr.va_fbo = virt; | 676 | mhp->attr.va_fbo = virt; |
641 | mhp->attr.page_size = shift - 12; | 677 | mhp->attr.page_size = shift - 12; |
642 | mhp->attr.len = (u32) length; | 678 | mhp->attr.len = (u32) length; |
643 | mhp->attr.pbl_size = i; | 679 | |
644 | err = iwch_register_mem(rhp, php, mhp, shift, pages); | 680 | err = iwch_register_mem(rhp, php, mhp, shift); |
645 | kfree(pages); | ||
646 | if (err) | 681 | if (err) |
647 | goto err; | 682 | goto err_pbl; |
648 | 683 | ||
649 | if (udata && !t3a_device(rhp)) { | 684 | if (udata && !t3a_device(rhp)) { |
650 | uresp.pbl_addr = (mhp->attr.pbl_addr - | 685 | uresp.pbl_addr = (mhp->attr.pbl_addr - |
651 | rhp->rdev.rnic_info.pbl_base) >> 3; | 686 | rhp->rdev.rnic_info.pbl_base) >> 3; |
652 | PDBG("%s user resp pbl_addr 0x%x\n", __func__, | 687 | PDBG("%s user resp pbl_addr 0x%x\n", __func__, |
653 | uresp.pbl_addr); | 688 | uresp.pbl_addr); |
654 | 689 | ||
@@ -661,6 +696,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
661 | 696 | ||
662 | return &mhp->ibmr; | 697 | return &mhp->ibmr; |
663 | 698 | ||
699 | err_pbl: | ||
700 | iwch_free_pbl(mhp); | ||
701 | |||
664 | err: | 702 | err: |
665 | ib_umem_release(mhp->umem); | 703 | ib_umem_release(mhp->umem); |
666 | kfree(mhp); | 704 | kfree(mhp); |