diff options
author | Roland Dreier <rolandd@cisco.com> | 2008-05-06 18:56:22 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2008-05-06 18:56:22 -0400 |
commit | 273748cc908a901d082b4da5a16b2541c9d78a02 (patch) | |
tree | b573f1f3f583acf2309a10e86f99a1842a8ccdc5 /drivers/infiniband/hw/cxgb3/iwch_mem.c | |
parent | 0e9913362a967377eb886bbdf305ec58aa07a878 (diff) |
RDMA/cxgb3: Fix severe limit on userspace memory registration size
Currently, iw_cxgb3 is severely limited on the amount of userspace
memory that can be registered in in a single memory region, which
causes big problems for applications that expect to be able to
register 100s of MB.
The problem is that the driver uses a single kmalloc()ed buffer to
hold the physical buffer list (PBL) for the entire memory region
during registration, which means that 8 bytes of contiguous memory are
required for each page of memory being registered. For example, a 64
MB registration will require 128 KB of contiguous memory with 4 KB
pages, and it unlikely that such an allocation will succeed on a busy
system.
This is purely a driver problem: the temporary page list buffer is not
needed by the hardware, so we can fix this by writing the PBL to the
hardware in page-sized chunks rather than all at once. We do this by
splitting the memory registration operation up into several steps:
- Allocate PBL space in adapter memory for the full registration
- Copy PBL to adapter memory in chunks
- Allocate STag and enable memory region
This also allows several other cleanups to the __cxio_tpt_op()
interface and related parts of the driver.
This change leaves the reregister memory region and memory window
operations broken, but they already didn't work due to other
longstanding bugs, so fixing them will be left to a later patch.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb3/iwch_mem.c')
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_mem.c | 75 |
1 files changed, 49 insertions, 26 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 58c3d61bcd1..ec49a5cbdeb 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c | |||
@@ -35,17 +35,26 @@ | |||
35 | #include <rdma/ib_verbs.h> | 35 | #include <rdma/ib_verbs.h> |
36 | 36 | ||
37 | #include "cxio_hal.h" | 37 | #include "cxio_hal.h" |
38 | #include "cxio_resource.h" | ||
38 | #include "iwch.h" | 39 | #include "iwch.h" |
39 | #include "iwch_provider.h" | 40 | #include "iwch_provider.h" |
40 | 41 | ||
41 | int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, | 42 | static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) |
42 | struct iwch_mr *mhp, | ||
43 | int shift, | ||
44 | __be64 *page_list) | ||
45 | { | 43 | { |
46 | u32 stag; | ||
47 | u32 mmid; | 44 | u32 mmid; |
48 | 45 | ||
46 | mhp->attr.state = 1; | ||
47 | mhp->attr.stag = stag; | ||
48 | mmid = stag >> 8; | ||
49 | mhp->ibmr.rkey = mhp->ibmr.lkey = stag; | ||
50 | insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); | ||
51 | PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); | ||
52 | } | ||
53 | |||
54 | int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, | ||
55 | struct iwch_mr *mhp, int shift) | ||
56 | { | ||
57 | u32 stag; | ||
49 | 58 | ||
50 | if (cxio_register_phys_mem(&rhp->rdev, | 59 | if (cxio_register_phys_mem(&rhp->rdev, |
51 | &stag, mhp->attr.pdid, | 60 | &stag, mhp->attr.pdid, |
@@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, | |||
53 | mhp->attr.zbva, | 62 | mhp->attr.zbva, |
54 | mhp->attr.va_fbo, | 63 | mhp->attr.va_fbo, |
55 | mhp->attr.len, | 64 | mhp->attr.len, |
56 | shift-12, | 65 | shift - 12, |
57 | page_list, | 66 | mhp->attr.pbl_size, mhp->attr.pbl_addr)) |
58 | &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) | ||
59 | return -ENOMEM; | 67 | return -ENOMEM; |
60 | mhp->attr.state = 1; | 68 | |
61 | mhp->attr.stag = stag; | 69 | iwch_finish_mem_reg(mhp, stag); |
62 | mmid = stag >> 8; | 70 | |
63 | mhp->ibmr.rkey = mhp->ibmr.lkey = stag; | ||
64 | insert_handle(rhp, &rhp->mmidr, mhp, mmid); | ||
65 | PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); | ||
66 | return 0; | 71 | return 0; |
67 | } | 72 | } |
68 | 73 | ||
69 | int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, | 74 | int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, |
70 | struct iwch_mr *mhp, | 75 | struct iwch_mr *mhp, |
71 | int shift, | 76 | int shift, |
72 | __be64 *page_list, | ||
73 | int npages) | 77 | int npages) |
74 | { | 78 | { |
75 | u32 stag; | 79 | u32 stag; |
76 | u32 mmid; | ||
77 | |||
78 | 80 | ||
79 | /* We could support this... */ | 81 | /* We could support this... */ |
80 | if (npages > mhp->attr.pbl_size) | 82 | if (npages > mhp->attr.pbl_size) |
@@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, | |||
87 | mhp->attr.zbva, | 89 | mhp->attr.zbva, |
88 | mhp->attr.va_fbo, | 90 | mhp->attr.va_fbo, |
89 | mhp->attr.len, | 91 | mhp->attr.len, |
90 | shift-12, | 92 | shift - 12, |
91 | page_list, | 93 | mhp->attr.pbl_size, mhp->attr.pbl_addr)) |
92 | &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) | ||
93 | return -ENOMEM; | 94 | return -ENOMEM; |
94 | mhp->attr.state = 1; | 95 | |
95 | mhp->attr.stag = stag; | 96 | iwch_finish_mem_reg(mhp, stag); |
96 | mmid = stag >> 8; | 97 | |
97 | mhp->ibmr.rkey = mhp->ibmr.lkey = stag; | 98 | return 0; |
98 | insert_handle(rhp, &rhp->mmidr, mhp, mmid); | 99 | } |
99 | PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); | 100 | |
101 | int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) | ||
102 | { | ||
103 | mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, | ||
104 | npages << 3); | ||
105 | |||
106 | if (!mhp->attr.pbl_addr) | ||
107 | return -ENOMEM; | ||
108 | |||
109 | mhp->attr.pbl_size = npages; | ||
110 | |||
100 | return 0; | 111 | return 0; |
101 | } | 112 | } |
102 | 113 | ||
114 | void iwch_free_pbl(struct iwch_mr *mhp) | ||
115 | { | ||
116 | cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, | ||
117 | mhp->attr.pbl_size << 3); | ||
118 | } | ||
119 | |||
120 | int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) | ||
121 | { | ||
122 | return cxio_write_pbl(&mhp->rhp->rdev, pages, | ||
123 | mhp->attr.pbl_addr + (offset << 3), npages); | ||
124 | } | ||
125 | |||
103 | int build_phys_page_list(struct ib_phys_buf *buffer_list, | 126 | int build_phys_page_list(struct ib_phys_buf *buffer_list, |
104 | int num_phys_buf, | 127 | int num_phys_buf, |
105 | u64 *iova_start, | 128 | u64 *iova_start, |