aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Tucker <tom@opengridcomputing.com>2008-07-02 15:56:13 -0400
committerTom Tucker <tom@opengridcomputing.com>2008-07-02 16:01:54 -0400
commit34d16e42a6ab74a4a4389c061dfa3c6609e08fa0 (patch)
tree85b1939f8096ea89bf7c2ccb569ae3b5e0667905
parentab96dddbedf4bb8a7a0fe44012efc1d99598c36f (diff)
svcrdma: Use RPC reply map for RDMA_WRITE processing
Use the new svc_rdma_req_map data type for mapping the client side memory to the server side memory. Move the DMA mapping to the context pointed to by each WR individually so that it is unmapped after the WR completes. Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c163
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c5
2 files changed, 80 insertions, 88 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index fb82b1b683f8..bdc11a30e937 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -63,52 +63,44 @@
63 * SGE[2..sge_count-2] data from xdr->pages[] 63 * SGE[2..sge_count-2] data from xdr->pages[]
64 * SGE[sge_count-1] data from xdr->tail. 64 * SGE[sge_count-1] data from xdr->tail.
65 * 65 *
66 * The max SGE we need is the length of the XDR / pagesize + one for
67 * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
68 * reserves a page for both the request and the reply header, and this
69 * array is only concerned with the reply we are assured that we have
70 * on extra page for the RPCRMDA header.
66 */ 71 */
67static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, 72static void xdr_to_sge(struct svcxprt_rdma *xprt,
68 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
69 struct ib_sge *sge, 74 struct svc_rdma_req_map *vec)
70 int *sge_count)
71{ 75{
72 /* Max we need is the length of the XDR / pagesize + one for
73 * head + one for tail + one for RPCRDMA header
74 */
75 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; 76 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
76 int sge_no; 77 int sge_no;
77 u32 byte_count = xdr->len;
78 u32 sge_bytes; 78 u32 sge_bytes;
79 u32 page_bytes; 79 u32 page_bytes;
80 int page_off; 80 u32 page_off;
81 int page_no; 81 int page_no;
82 82
83 BUG_ON(xdr->len !=
84 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
85
83 /* Skip the first sge, this is for the RPCRDMA header */ 86 /* Skip the first sge, this is for the RPCRDMA header */
84 sge_no = 1; 87 sge_no = 1;
85 88
86 /* Head SGE */ 89 /* Head SGE */
87 sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, 90 vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
88 xdr->head[0].iov_base, 91 vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
89 xdr->head[0].iov_len,
90 DMA_TO_DEVICE);
91 sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
92 byte_count -= sge_bytes;
93 sge[sge_no].length = sge_bytes;
94 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
95 sge_no++; 92 sge_no++;
96 93
97 /* pages SGE */ 94 /* pages SGE */
98 page_no = 0; 95 page_no = 0;
99 page_bytes = xdr->page_len; 96 page_bytes = xdr->page_len;
100 page_off = xdr->page_base; 97 page_off = xdr->page_base;
101 while (byte_count && page_bytes) { 98 while (page_bytes) {
102 sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); 99 vec->sge[sge_no].iov_base =
103 sge[sge_no].addr = 100 page_address(xdr->pages[page_no]) + page_off;
104 ib_dma_map_page(xprt->sc_cm_id->device, 101 sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
105 xdr->pages[page_no], page_off,
106 sge_bytes, DMA_TO_DEVICE);
107 sge_bytes = min(sge_bytes, page_bytes);
108 byte_count -= sge_bytes;
109 page_bytes -= sge_bytes; 102 page_bytes -= sge_bytes;
110 sge[sge_no].length = sge_bytes; 103 vec->sge[sge_no].iov_len = sge_bytes;
111 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
112 104
113 sge_no++; 105 sge_no++;
114 page_no++; 106 page_no++;
@@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
116 } 108 }
117 109
118 /* Tail SGE */ 110 /* Tail SGE */
119 if (byte_count && xdr->tail[0].iov_len) { 111 if (xdr->tail[0].iov_len) {
120 sge[sge_no].addr = 112 vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
121 ib_dma_map_single(xprt->sc_cm_id->device, 113 vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
122 xdr->tail[0].iov_base,
123 xdr->tail[0].iov_len,
124 DMA_TO_DEVICE);
125 sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
126 byte_count -= sge_bytes;
127 sge[sge_no].length = sge_bytes;
128 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
129 sge_no++; 114 sge_no++;
130 } 115 }
131 116
132 BUG_ON(sge_no > sge_max); 117 BUG_ON(sge_no > sge_max);
133 BUG_ON(byte_count != 0); 118 vec->count = sge_no;
134
135 *sge_count = sge_no;
136 return sge;
137} 119}
138 120
139
140/* Assumptions: 121/* Assumptions:
141 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 122 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
142 */ 123 */
143static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, 124static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
144 u32 rmr, u64 to, 125 u32 rmr, u64 to,
145 u32 xdr_off, int write_len, 126 u32 xdr_off, int write_len,
146 struct ib_sge *xdr_sge, int sge_count) 127 struct svc_rdma_req_map *vec)
147{ 128{
148 struct svc_rdma_op_ctxt *tmp_sge_ctxt;
149 struct ib_send_wr write_wr; 129 struct ib_send_wr write_wr;
150 struct ib_sge *sge; 130 struct ib_sge *sge;
151 int xdr_sge_no; 131 int xdr_sge_no;
@@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
154 int sge_off; 134 int sge_off;
155 int bc; 135 int bc;
156 struct svc_rdma_op_ctxt *ctxt; 136 struct svc_rdma_op_ctxt *ctxt;
157 int ret = 0;
158 137
159 BUG_ON(sge_count > RPCSVC_MAXPAGES); 138 BUG_ON(vec->count > RPCSVC_MAXPAGES);
160 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " 139 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
161 "write_len=%d, xdr_sge=%p, sge_count=%d\n", 140 "write_len=%d, vec->sge=%p, vec->count=%lu\n",
162 rmr, (unsigned long long)to, xdr_off, 141 rmr, (unsigned long long)to, xdr_off,
163 write_len, xdr_sge, sge_count); 142 write_len, vec->sge, vec->count);
164 143
165 ctxt = svc_rdma_get_context(xprt); 144 ctxt = svc_rdma_get_context(xprt);
166 ctxt->count = 0; 145 ctxt->direction = DMA_TO_DEVICE;
167 tmp_sge_ctxt = svc_rdma_get_context(xprt); 146 sge = ctxt->sge;
168 sge = tmp_sge_ctxt->sge;
169 147
170 /* Find the SGE associated with xdr_off */ 148 /* Find the SGE associated with xdr_off */
171 for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; 149 for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
172 xdr_sge_no++) { 150 xdr_sge_no++) {
173 if (xdr_sge[xdr_sge_no].length > bc) 151 if (vec->sge[xdr_sge_no].iov_len > bc)
174 break; 152 break;
175 bc -= xdr_sge[xdr_sge_no].length; 153 bc -= vec->sge[xdr_sge_no].iov_len;
176 } 154 }
177 155
178 sge_off = bc; 156 sge_off = bc;
@@ -180,21 +158,27 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
180 sge_no = 0; 158 sge_no = 0;
181 159
182 /* Copy the remaining SGE */ 160 /* Copy the remaining SGE */
183 while (bc != 0 && xdr_sge_no < sge_count) { 161 while (bc != 0 && xdr_sge_no < vec->count) {
184 sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; 162 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
185 sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
186 sge_bytes = min((size_t)bc, 163 sge_bytes = min((size_t)bc,
187 (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); 164 (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
188 sge[sge_no].length = sge_bytes; 165 sge[sge_no].length = sge_bytes;
189 166 sge[sge_no].addr =
167 ib_dma_map_single(xprt->sc_cm_id->device,
168 (void *)
169 vec->sge[xdr_sge_no].iov_base + sge_off,
170 sge_bytes, DMA_TO_DEVICE);
171 if (dma_mapping_error(sge[sge_no].addr))
172 goto err;
190 sge_off = 0; 173 sge_off = 0;
191 sge_no++; 174 sge_no++;
175 ctxt->count++;
192 xdr_sge_no++; 176 xdr_sge_no++;
193 bc -= sge_bytes; 177 bc -= sge_bytes;
194 } 178 }
195 179
196 BUG_ON(bc != 0); 180 BUG_ON(bc != 0);
197 BUG_ON(xdr_sge_no > sge_count); 181 BUG_ON(xdr_sge_no > vec->count);
198 182
199 /* Prepare WRITE WR */ 183 /* Prepare WRITE WR */
200 memset(&write_wr, 0, sizeof write_wr); 184 memset(&write_wr, 0, sizeof write_wr);
@@ -209,21 +193,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
209 193
210 /* Post It */ 194 /* Post It */
211 atomic_inc(&rdma_stat_write); 195 atomic_inc(&rdma_stat_write);
212 if (svc_rdma_send(xprt, &write_wr)) { 196 if (svc_rdma_send(xprt, &write_wr))
213 svc_rdma_put_context(ctxt, 1); 197 goto err;
214 /* Fatal error, close transport */ 198 return 0;
215 ret = -EIO; 199 err:
216 } 200 svc_rdma_put_context(ctxt, 0);
217 svc_rdma_put_context(tmp_sge_ctxt, 0); 201 /* Fatal error, close transport */
218 return ret; 202 return -EIO;
219} 203}
220 204
221static int send_write_chunks(struct svcxprt_rdma *xprt, 205static int send_write_chunks(struct svcxprt_rdma *xprt,
222 struct rpcrdma_msg *rdma_argp, 206 struct rpcrdma_msg *rdma_argp,
223 struct rpcrdma_msg *rdma_resp, 207 struct rpcrdma_msg *rdma_resp,
224 struct svc_rqst *rqstp, 208 struct svc_rqst *rqstp,
225 struct ib_sge *sge, 209 struct svc_rdma_req_map *vec)
226 int sge_count)
227{ 210{
228 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 211 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
229 int write_len; 212 int write_len;
@@ -269,8 +252,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
269 rs_offset + chunk_off, 252 rs_offset + chunk_off,
270 xdr_off, 253 xdr_off,
271 this_write, 254 this_write,
272 sge, 255 vec);
273 sge_count);
274 if (ret) { 256 if (ret) {
275 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 257 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
276 ret); 258 ret);
@@ -292,8 +274,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
292 struct rpcrdma_msg *rdma_argp, 274 struct rpcrdma_msg *rdma_argp,
293 struct rpcrdma_msg *rdma_resp, 275 struct rpcrdma_msg *rdma_resp,
294 struct svc_rqst *rqstp, 276 struct svc_rqst *rqstp,
295 struct ib_sge *sge, 277 struct svc_rdma_req_map *vec)
296 int sge_count)
297{ 278{
298 u32 xfer_len = rqstp->rq_res.len; 279 u32 xfer_len = rqstp->rq_res.len;
299 int write_len; 280 int write_len;
@@ -341,8 +322,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
341 rs_offset + chunk_off, 322 rs_offset + chunk_off,
342 xdr_off, 323 xdr_off,
343 this_write, 324 this_write,
344 sge, 325 vec);
345 sge_count);
346 if (ret) { 326 if (ret) {
347 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 327 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
348 ret); 328 ret);
@@ -380,7 +360,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
380 struct page *page, 360 struct page *page,
381 struct rpcrdma_msg *rdma_resp, 361 struct rpcrdma_msg *rdma_resp,
382 struct svc_rdma_op_ctxt *ctxt, 362 struct svc_rdma_op_ctxt *ctxt,
383 int sge_count, 363 struct svc_rdma_req_map *vec,
384 int byte_count) 364 int byte_count)
385{ 365{
386 struct ib_send_wr send_wr; 366 struct ib_send_wr send_wr;
@@ -413,10 +393,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
413 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; 393 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
414 394
415 /* Determine how many of our SGE are to be transmitted */ 395 /* Determine how many of our SGE are to be transmitted */
416 for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { 396 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
417 sge_bytes = min((size_t)ctxt->sge[sge_no].length, 397 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
418 (size_t)byte_count);
419 byte_count -= sge_bytes; 398 byte_count -= sge_bytes;
399 ctxt->sge[sge_no].addr =
400 ib_dma_map_single(rdma->sc_cm_id->device,
401 vec->sge[sge_no].iov_base,
402 sge_bytes, DMA_TO_DEVICE);
403 ctxt->sge[sge_no].length = sge_bytes;
404 ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
420 } 405 }
421 BUG_ON(byte_count != 0); 406 BUG_ON(byte_count != 0);
422 407
@@ -428,8 +413,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
428 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; 413 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
429 ctxt->count++; 414 ctxt->count++;
430 rqstp->rq_respages[page_no] = NULL; 415 rqstp->rq_respages[page_no] = NULL;
416 /* If there are more pages than SGE, terminate SGE list */
417 if (page_no+1 >= sge_no)
418 ctxt->sge[page_no+1].length = 0;
431 } 419 }
432
433 BUG_ON(sge_no > rdma->sc_max_sge); 420 BUG_ON(sge_no > rdma->sc_max_sge);
434 memset(&send_wr, 0, sizeof send_wr); 421 memset(&send_wr, 0, sizeof send_wr);
435 ctxt->wr_op = IB_WR_SEND; 422 ctxt->wr_op = IB_WR_SEND;
@@ -473,20 +460,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
473 enum rpcrdma_proc reply_type; 460 enum rpcrdma_proc reply_type;
474 int ret; 461 int ret;
475 int inline_bytes; 462 int inline_bytes;
476 struct ib_sge *sge;
477 int sge_count = 0;
478 struct page *res_page; 463 struct page *res_page;
479 struct svc_rdma_op_ctxt *ctxt; 464 struct svc_rdma_op_ctxt *ctxt;
465 struct svc_rdma_req_map *vec;
480 466
481 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); 467 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
482 468
483 /* Get the RDMA request header. */ 469 /* Get the RDMA request header. */
484 rdma_argp = xdr_start(&rqstp->rq_arg); 470 rdma_argp = xdr_start(&rqstp->rq_arg);
485 471
486 /* Build an SGE for the XDR */ 472 /* Build an req vec for the XDR */
487 ctxt = svc_rdma_get_context(rdma); 473 ctxt = svc_rdma_get_context(rdma);
488 ctxt->direction = DMA_TO_DEVICE; 474 ctxt->direction = DMA_TO_DEVICE;
489 sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); 475 vec = svc_rdma_get_req_map();
476 xdr_to_sge(rdma, &rqstp->rq_res, vec);
490 477
491 inline_bytes = rqstp->rq_res.len; 478 inline_bytes = rqstp->rq_res.len;
492 479
@@ -503,7 +490,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
503 490
504 /* Send any write-chunk data and build resp write-list */ 491 /* Send any write-chunk data and build resp write-list */
505 ret = send_write_chunks(rdma, rdma_argp, rdma_resp, 492 ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
506 rqstp, sge, sge_count); 493 rqstp, vec);
507 if (ret < 0) { 494 if (ret < 0) {
508 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 495 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
509 ret); 496 ret);
@@ -513,7 +500,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
513 500
514 /* Send any reply-list data and update resp reply-list */ 501 /* Send any reply-list data and update resp reply-list */
515 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, 502 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
516 rqstp, sge, sge_count); 503 rqstp, vec);
517 if (ret < 0) { 504 if (ret < 0) {
518 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 505 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
519 ret); 506 ret);
@@ -521,11 +508,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
521 } 508 }
522 inline_bytes -= ret; 509 inline_bytes -= ret;
523 510
524 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, 511 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
525 inline_bytes); 512 inline_bytes);
513 svc_rdma_put_req_map(vec);
526 dprintk("svcrdma: send_reply returns %d\n", ret); 514 dprintk("svcrdma: send_reply returns %d\n", ret);
527 return ret; 515 return ret;
528 error: 516 error:
517 svc_rdma_put_req_map(vec);
529 svc_rdma_put_context(ctxt, 0); 518 svc_rdma_put_context(ctxt, 0);
530 put_page(res_page); 519 put_page(res_page);
531 return ret; 520 return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ae90758d8e9b..fc86338bcbb2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -387,10 +387,13 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
387 387
388 switch (ctxt->wr_op) { 388 switch (ctxt->wr_op) {
389 case IB_WR_SEND: 389 case IB_WR_SEND:
390 case IB_WR_RDMA_WRITE:
391 svc_rdma_put_context(ctxt, 1); 390 svc_rdma_put_context(ctxt, 1);
392 break; 391 break;
393 392
393 case IB_WR_RDMA_WRITE:
394 svc_rdma_put_context(ctxt, 0);
395 break;
396
394 case IB_WR_RDMA_READ: 397 case IB_WR_RDMA_READ:
395 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { 398 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
396 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; 399 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;