aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2008-05-06 18:56:22 -0400
committerRoland Dreier <rolandd@cisco.com>2008-05-06 18:56:22 -0400
commit273748cc908a901d082b4da5a16b2541c9d78a02 (patch)
treeb573f1f3f583acf2309a10e86f99a1842a8ccdc5 /drivers/infiniband/hw
parent0e9913362a967377eb886bbdf305ec58aa07a878 (diff)
RDMA/cxgb3: Fix severe limit on userspace memory registration size
Currently, iw_cxgb3 is severely limited on the amount of userspace memory that can be registered in in a single memory region, which causes big problems for applications that expect to be able to register 100s of MB. The problem is that the driver uses a single kmalloc()ed buffer to hold the physical buffer list (PBL) for the entire memory region during registration, which means that 8 bytes of contiguous memory are required for each page of memory being registered. For example, a 64 MB registration will require 128 KB of contiguous memory with 4 KB pages, and it unlikely that such an allocation will succeed on a busy system. This is purely a driver problem: the temporary page list buffer is not needed by the hardware, so we can fix this by writing the PBL to the hardware in page-sized chunks rather than all at once. We do this by splitting the memory registration operation up into several steps: - Allocate PBL space in adapter memory for the full registration - Copy PBL to adapter memory in chunks - Allocate STag and enable memory region This also allows several other cleanups to the __cxio_tpt_op() interface and related parts of the driver. This change leaves the reregister memory region and memory window operations broken, but they already didn't work due to other longstanding bugs, so fixing them will be left to a later patch. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c90
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h8
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c75
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c68
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h8
5 files changed, 155 insertions, 94 deletions
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 5fd8506a8657..ebf9d3043f80 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -588,7 +588,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
588 * caller aquires the ctrl_qp lock before the call 588 * caller aquires the ctrl_qp lock before the call
589 */ 589 */
590static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, 590static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
591 u32 len, void *data, int completion) 591 u32 len, void *data)
592{ 592{
593 u32 i, nr_wqe, copy_len; 593 u32 i, nr_wqe, copy_len;
594 u8 *copy_data; 594 u8 *copy_data;
@@ -624,7 +624,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
624 flag = 0; 624 flag = 0;
625 if (i == (nr_wqe - 1)) { 625 if (i == (nr_wqe - 1)) {
626 /* last WQE */ 626 /* last WQE */
627 flag = completion ? T3_COMPLETION_FLAG : 0; 627 flag = T3_COMPLETION_FLAG;
628 if (len % 32) 628 if (len % 32)
629 utx_len = len / 32 + 1; 629 utx_len = len / 32 + 1;
630 else 630 else
@@ -683,21 +683,20 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
683 return 0; 683 return 0;
684} 684}
685 685
686/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size 686/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
687 * OUT: stag index, actual pbl_size, pbl_addr allocated. 687 * OUT: stag index
688 * TBD: shared memory region support 688 * TBD: shared memory region support
689 */ 689 */
690static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, 690static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
691 u32 *stag, u8 stag_state, u32 pdid, 691 u32 *stag, u8 stag_state, u32 pdid,
692 enum tpt_mem_type type, enum tpt_mem_perm perm, 692 enum tpt_mem_type type, enum tpt_mem_perm perm,
693 u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl, 693 u32 zbva, u64 to, u32 len, u8 page_size,
694 u32 *pbl_size, u32 *pbl_addr) 694 u32 pbl_size, u32 pbl_addr)
695{ 695{
696 int err; 696 int err;
697 struct tpt_entry tpt; 697 struct tpt_entry tpt;
698 u32 stag_idx; 698 u32 stag_idx;
699 u32 wptr; 699 u32 wptr;
700 int rereg = (*stag != T3_STAG_UNSET);
701 700
702 stag_state = stag_state > 0; 701 stag_state = stag_state > 0;
703 stag_idx = (*stag) >> 8; 702 stag_idx = (*stag) >> 8;
@@ -711,30 +710,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
711 PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", 710 PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
712 __func__, stag_state, type, pdid, stag_idx); 711 __func__, stag_state, type, pdid, stag_idx);
713 712
714 if (reset_tpt_entry)
715 cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
716 else if (!rereg) {
717 *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
718 if (!*pbl_addr) {
719 return -ENOMEM;
720 }
721 }
722
723 mutex_lock(&rdev_p->ctrl_qp.lock); 713 mutex_lock(&rdev_p->ctrl_qp.lock);
724 714
725 /* write PBL first if any - update pbl only if pbl list exist */
726 if (pbl) {
727
728 PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
729 __func__, *pbl_addr, rdev_p->rnic_info.pbl_base,
730 *pbl_size);
731 err = cxio_hal_ctrl_qp_write_mem(rdev_p,
732 (*pbl_addr >> 5),
733 (*pbl_size << 3), pbl, 0);
734 if (err)
735 goto ret;
736 }
737
738 /* write TPT entry */ 715 /* write TPT entry */
739 if (reset_tpt_entry) 716 if (reset_tpt_entry)
740 memset(&tpt, 0, sizeof(tpt)); 717 memset(&tpt, 0, sizeof(tpt));
@@ -749,23 +726,23 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
749 V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | 726 V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
750 V_TPT_PAGE_SIZE(page_size)); 727 V_TPT_PAGE_SIZE(page_size));
751 tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : 728 tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
752 cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3)); 729 cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
753 tpt.len = cpu_to_be32(len); 730 tpt.len = cpu_to_be32(len);
754 tpt.va_hi = cpu_to_be32((u32) (to >> 32)); 731 tpt.va_hi = cpu_to_be32((u32) (to >> 32));
755 tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); 732 tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
756 tpt.rsvd_bind_cnt_or_pstag = 0; 733 tpt.rsvd_bind_cnt_or_pstag = 0;
757 tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : 734 tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
758 cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2)); 735 cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
759 } 736 }
760 err = cxio_hal_ctrl_qp_write_mem(rdev_p, 737 err = cxio_hal_ctrl_qp_write_mem(rdev_p,
761 stag_idx + 738 stag_idx +
762 (rdev_p->rnic_info.tpt_base >> 5), 739 (rdev_p->rnic_info.tpt_base >> 5),
763 sizeof(tpt), &tpt, 1); 740 sizeof(tpt), &tpt);
764 741
765 /* release the stag index to free pool */ 742 /* release the stag index to free pool */
766 if (reset_tpt_entry) 743 if (reset_tpt_entry)
767 cxio_hal_put_stag(rdev_p->rscp, stag_idx); 744 cxio_hal_put_stag(rdev_p->rscp, stag_idx);
768ret: 745
769 wptr = rdev_p->ctrl_qp.wptr; 746 wptr = rdev_p->ctrl_qp.wptr;
770 mutex_unlock(&rdev_p->ctrl_qp.lock); 747 mutex_unlock(&rdev_p->ctrl_qp.lock);
771 if (!err) 748 if (!err)
@@ -776,44 +753,67 @@ ret:
776 return err; 753 return err;
777} 754}
778 755
756int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
757 u32 pbl_addr, u32 pbl_size)
758{
759 u32 wptr;
760 int err;
761
762 PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
763 __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
764 pbl_size);
765
766 mutex_lock(&rdev_p->ctrl_qp.lock);
767 err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
768 pbl);
769 wptr = rdev_p->ctrl_qp.wptr;
770 mutex_unlock(&rdev_p->ctrl_qp.lock);
771 if (err)
772 return err;
773
774 if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
775 SEQ32_GE(rdev_p->ctrl_qp.rptr,
776 wptr)))
777 return -ERESTARTSYS;
778
779 return 0;
780}
781
779int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, 782int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
780 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 783 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
781 u8 page_size, __be64 *pbl, u32 *pbl_size, 784 u8 page_size, u32 pbl_size, u32 pbl_addr)
782 u32 *pbl_addr)
783{ 785{
784 *stag = T3_STAG_UNSET; 786 *stag = T3_STAG_UNSET;
785 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, 787 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
786 zbva, to, len, page_size, pbl, pbl_size, pbl_addr); 788 zbva, to, len, page_size, pbl_size, pbl_addr);
787} 789}
788 790
789int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, 791int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
790 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 792 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
791 u8 page_size, __be64 *pbl, u32 *pbl_size, 793 u8 page_size, u32 pbl_size, u32 pbl_addr)
792 u32 *pbl_addr)
793{ 794{
794 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, 795 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
795 zbva, to, len, page_size, pbl, pbl_size, pbl_addr); 796 zbva, to, len, page_size, pbl_size, pbl_addr);
796} 797}
797 798
798int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, 799int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
799 u32 pbl_addr) 800 u32 pbl_addr)
800{ 801{
801 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, 802 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
802 &pbl_size, &pbl_addr); 803 pbl_size, pbl_addr);
803} 804}
804 805
805int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) 806int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
806{ 807{
807 u32 pbl_size = 0;
808 *stag = T3_STAG_UNSET; 808 *stag = T3_STAG_UNSET;
809 return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, 809 return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
810 NULL, &pbl_size, NULL); 810 0, 0);
811} 811}
812 812
813int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) 813int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
814{ 814{
815 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, 815 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
816 NULL, NULL); 816 0, 0);
817} 817}
818 818
819int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) 819int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 69ab08ebc680..6e128f6bab05 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -154,14 +154,14 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
154int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, 154int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
155 struct cxio_ucontext *uctx); 155 struct cxio_ucontext *uctx);
156int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); 156int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
157int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
158 u32 pbl_addr, u32 pbl_size);
157int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, 159int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
158 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 160 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
159 u8 page_size, __be64 *pbl, u32 *pbl_size, 161 u8 page_size, u32 pbl_size, u32 pbl_addr);
160 u32 *pbl_addr);
161int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, 162int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
162 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 163 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
163 u8 page_size, __be64 *pbl, u32 *pbl_size, 164 u8 page_size, u32 pbl_size, u32 pbl_addr);
164 u32 *pbl_addr);
165int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, 165int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
166 u32 pbl_addr); 166 u32 pbl_addr);
167int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); 167int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 58c3d61bcd14..ec49a5cbdebb 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -35,17 +35,26 @@
35#include <rdma/ib_verbs.h> 35#include <rdma/ib_verbs.h>
36 36
37#include "cxio_hal.h" 37#include "cxio_hal.h"
38#include "cxio_resource.h"
38#include "iwch.h" 39#include "iwch.h"
39#include "iwch_provider.h" 40#include "iwch_provider.h"
40 41
41int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, 42static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
42 struct iwch_mr *mhp,
43 int shift,
44 __be64 *page_list)
45{ 43{
46 u32 stag;
47 u32 mmid; 44 u32 mmid;
48 45
46 mhp->attr.state = 1;
47 mhp->attr.stag = stag;
48 mmid = stag >> 8;
49 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
50 insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
51 PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
52}
53
54int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
55 struct iwch_mr *mhp, int shift)
56{
57 u32 stag;
49 58
50 if (cxio_register_phys_mem(&rhp->rdev, 59 if (cxio_register_phys_mem(&rhp->rdev,
51 &stag, mhp->attr.pdid, 60 &stag, mhp->attr.pdid,
@@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
53 mhp->attr.zbva, 62 mhp->attr.zbva,
54 mhp->attr.va_fbo, 63 mhp->attr.va_fbo,
55 mhp->attr.len, 64 mhp->attr.len,
56 shift-12, 65 shift - 12,
57 page_list, 66 mhp->attr.pbl_size, mhp->attr.pbl_addr))
58 &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
59 return -ENOMEM; 67 return -ENOMEM;
60 mhp->attr.state = 1; 68
61 mhp->attr.stag = stag; 69 iwch_finish_mem_reg(mhp, stag);
62 mmid = stag >> 8; 70
63 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
64 insert_handle(rhp, &rhp->mmidr, mhp, mmid);
65 PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
66 return 0; 71 return 0;
67} 72}
68 73
69int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, 74int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
70 struct iwch_mr *mhp, 75 struct iwch_mr *mhp,
71 int shift, 76 int shift,
72 __be64 *page_list,
73 int npages) 77 int npages)
74{ 78{
75 u32 stag; 79 u32 stag;
76 u32 mmid;
77
78 80
79 /* We could support this... */ 81 /* We could support this... */
80 if (npages > mhp->attr.pbl_size) 82 if (npages > mhp->attr.pbl_size)
@@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
87 mhp->attr.zbva, 89 mhp->attr.zbva,
88 mhp->attr.va_fbo, 90 mhp->attr.va_fbo,
89 mhp->attr.len, 91 mhp->attr.len,
90 shift-12, 92 shift - 12,
91 page_list, 93 mhp->attr.pbl_size, mhp->attr.pbl_addr))
92 &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
93 return -ENOMEM; 94 return -ENOMEM;
94 mhp->attr.state = 1; 95
95 mhp->attr.stag = stag; 96 iwch_finish_mem_reg(mhp, stag);
96 mmid = stag >> 8; 97
97 mhp->ibmr.rkey = mhp->ibmr.lkey = stag; 98 return 0;
98 insert_handle(rhp, &rhp->mmidr, mhp, mmid); 99}
99 PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); 100
101int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
102{
103 mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
104 npages << 3);
105
106 if (!mhp->attr.pbl_addr)
107 return -ENOMEM;
108
109 mhp->attr.pbl_size = npages;
110
100 return 0; 111 return 0;
101} 112}
102 113
114void iwch_free_pbl(struct iwch_mr *mhp)
115{
116 cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
117 mhp->attr.pbl_size << 3);
118}
119
120int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
121{
122 return cxio_write_pbl(&mhp->rhp->rdev, pages,
123 mhp->attr.pbl_addr + (offset << 3), npages);
124}
125
103int build_phys_page_list(struct ib_phys_buf *buffer_list, 126int build_phys_page_list(struct ib_phys_buf *buffer_list,
104 int num_phys_buf, 127 int num_phys_buf,
105 u64 *iova_start, 128 u64 *iova_start,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index d07d3a377b5f..8934178a23ee 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -442,6 +442,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
442 mmid = mhp->attr.stag >> 8; 442 mmid = mhp->attr.stag >> 8;
443 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 443 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
444 mhp->attr.pbl_addr); 444 mhp->attr.pbl_addr);
445 iwch_free_pbl(mhp);
445 remove_handle(rhp, &rhp->mmidr, mmid); 446 remove_handle(rhp, &rhp->mmidr, mmid);
446 if (mhp->kva) 447 if (mhp->kva)
447 kfree((void *) (unsigned long) mhp->kva); 448 kfree((void *) (unsigned long) mhp->kva);
@@ -475,6 +476,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
475 if (!mhp) 476 if (!mhp)
476 return ERR_PTR(-ENOMEM); 477 return ERR_PTR(-ENOMEM);
477 478
479 mhp->rhp = rhp;
480
478 /* First check that we have enough alignment */ 481 /* First check that we have enough alignment */
479 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { 482 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
480 ret = -EINVAL; 483 ret = -EINVAL;
@@ -492,7 +495,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
492 if (ret) 495 if (ret)
493 goto err; 496 goto err;
494 497
495 mhp->rhp = rhp; 498 ret = iwch_alloc_pbl(mhp, npages);
499 if (ret) {
500 kfree(page_list);
501 goto err_pbl;
502 }
503
504 ret = iwch_write_pbl(mhp, page_list, npages, 0);
505 kfree(page_list);
506 if (ret)
507 goto err_pbl;
508
496 mhp->attr.pdid = php->pdid; 509 mhp->attr.pdid = php->pdid;
497 mhp->attr.zbva = 0; 510 mhp->attr.zbva = 0;
498 511
@@ -502,12 +515,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
502 515
503 mhp->attr.len = (u32) total_size; 516 mhp->attr.len = (u32) total_size;
504 mhp->attr.pbl_size = npages; 517 mhp->attr.pbl_size = npages;
505 ret = iwch_register_mem(rhp, php, mhp, shift, page_list); 518 ret = iwch_register_mem(rhp, php, mhp, shift);
506 kfree(page_list); 519 if (ret)
507 if (ret) { 520 goto err_pbl;
508 goto err; 521
509 }
510 return &mhp->ibmr; 522 return &mhp->ibmr;
523
524err_pbl:
525 iwch_free_pbl(mhp);
526
511err: 527err:
512 kfree(mhp); 528 kfree(mhp);
513 return ERR_PTR(ret); 529 return ERR_PTR(ret);
@@ -560,7 +576,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
560 return ret; 576 return ret;
561 } 577 }
562 578
563 ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); 579 ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
564 kfree(page_list); 580 kfree(page_list);
565 if (ret) { 581 if (ret) {
566 return ret; 582 return ret;
@@ -602,6 +618,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
602 if (!mhp) 618 if (!mhp)
603 return ERR_PTR(-ENOMEM); 619 return ERR_PTR(-ENOMEM);
604 620
621 mhp->rhp = rhp;
622
605 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); 623 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
606 if (IS_ERR(mhp->umem)) { 624 if (IS_ERR(mhp->umem)) {
607 err = PTR_ERR(mhp->umem); 625 err = PTR_ERR(mhp->umem);
@@ -615,10 +633,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
615 list_for_each_entry(chunk, &mhp->umem->chunk_list, list) 633 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
616 n += chunk->nents; 634 n += chunk->nents;
617 635
618 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 636 err = iwch_alloc_pbl(mhp, n);
637 if (err)
638 goto err;
639
640 pages = (__be64 *) __get_free_page(GFP_KERNEL);
619 if (!pages) { 641 if (!pages) {
620 err = -ENOMEM; 642 err = -ENOMEM;
621 goto err; 643 goto err_pbl;
622 } 644 }
623 645
624 i = n = 0; 646 i = n = 0;
@@ -630,25 +652,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
630 pages[i++] = cpu_to_be64(sg_dma_address( 652 pages[i++] = cpu_to_be64(sg_dma_address(
631 &chunk->page_list[j]) + 653 &chunk->page_list[j]) +
632 mhp->umem->page_size * k); 654 mhp->umem->page_size * k);
655 if (i == PAGE_SIZE / sizeof *pages) {
656 err = iwch_write_pbl(mhp, pages, i, n);
657 if (err)
658 goto pbl_done;
659 n += i;
660 i = 0;
661 }
633 } 662 }
634 } 663 }
635 664
636 mhp->rhp = rhp; 665 if (i)
666 err = iwch_write_pbl(mhp, pages, i, n);
667
668pbl_done:
669 free_page((unsigned long) pages);
670 if (err)
671 goto err_pbl;
672
637 mhp->attr.pdid = php->pdid; 673 mhp->attr.pdid = php->pdid;
638 mhp->attr.zbva = 0; 674 mhp->attr.zbva = 0;
639 mhp->attr.perms = iwch_ib_to_tpt_access(acc); 675 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
640 mhp->attr.va_fbo = virt; 676 mhp->attr.va_fbo = virt;
641 mhp->attr.page_size = shift - 12; 677 mhp->attr.page_size = shift - 12;
642 mhp->attr.len = (u32) length; 678 mhp->attr.len = (u32) length;
643 mhp->attr.pbl_size = i; 679
644 err = iwch_register_mem(rhp, php, mhp, shift, pages); 680 err = iwch_register_mem(rhp, php, mhp, shift);
645 kfree(pages);
646 if (err) 681 if (err)
647 goto err; 682 goto err_pbl;
648 683
649 if (udata && !t3a_device(rhp)) { 684 if (udata && !t3a_device(rhp)) {
650 uresp.pbl_addr = (mhp->attr.pbl_addr - 685 uresp.pbl_addr = (mhp->attr.pbl_addr -
651 rhp->rdev.rnic_info.pbl_base) >> 3; 686 rhp->rdev.rnic_info.pbl_base) >> 3;
652 PDBG("%s user resp pbl_addr 0x%x\n", __func__, 687 PDBG("%s user resp pbl_addr 0x%x\n", __func__,
653 uresp.pbl_addr); 688 uresp.pbl_addr);
654 689
@@ -661,6 +696,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
661 696
662 return &mhp->ibmr; 697 return &mhp->ibmr;
663 698
699err_pbl:
700 iwch_free_pbl(mhp);
701
664err: 702err:
665 ib_umem_release(mhp->umem); 703 ib_umem_release(mhp->umem);
666 kfree(mhp); 704 kfree(mhp);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index db5100d27ca2..836163fc5429 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -340,14 +340,14 @@ int iwch_quiesce_qps(struct iwch_cq *chp);
340int iwch_resume_qps(struct iwch_cq *chp); 340int iwch_resume_qps(struct iwch_cq *chp);
341void stop_read_rep_timer(struct iwch_qp *qhp); 341void stop_read_rep_timer(struct iwch_qp *qhp);
342int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, 342int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
343 struct iwch_mr *mhp, 343 struct iwch_mr *mhp, int shift);
344 int shift,
345 __be64 *page_list);
346int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, 344int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
347 struct iwch_mr *mhp, 345 struct iwch_mr *mhp,
348 int shift, 346 int shift,
349 __be64 *page_list,
350 int npages); 347 int npages);
348int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
349void iwch_free_pbl(struct iwch_mr *mhp);
350int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
351int build_phys_page_list(struct ib_phys_buf *buffer_list, 351int build_phys_page_list(struct ib_phys_buf *buffer_list,
352 int num_phys_buf, 352 int num_phys_buf,
353 u64 *iova_start, 353 u64 *iova_start,