aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Tucker <tom@opengridcomputing.com>2008-04-30 20:50:56 -0400
committerTom Tucker <tom@opengridcomputing.com>2008-05-19 08:33:51 -0400
commit02e7452de74d308ca642f54f7e5ef801ced60a92 (patch)
tree9300b5d12be31a6d48689a4260abb1b81e8a5551
parent10a38c33f46d128d11e299acba744bc325cde420 (diff)
svcrdma: Simplify RDMA_READ deferral buffer management
An NFS_WRITE requires a set of RDMA_READ requests to fetch the write data from the client. There are two principal pieces of data that need to be tracked: the list of pages that comprise the completed RPC and the SGE of dma mapped pages to refer to this list of pages. Previously this whole bit was managed as a linked list of contexts with the context containing the page list buried in this list. This patch simplifies this processing by not keeping a linked list, but rather only a pionter from the last submitted RDMA_READ's context to the context that maps the set of pages that describe the RPC. This significantly simplifies this code path. SGE contexts are cleaned up inline in the DTO path instead of at read completion time. Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
-rw-r--r--include/linux/sunrpc/svc_rdma.h1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c58
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c5
3 files changed, 16 insertions, 48 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d0011f3db90c..c447c417b37b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -71,6 +71,7 @@ extern atomic_t rdma_stat_sq_prod;
71 * completes. 71 * completes.
72 */ 72 */
73struct svc_rdma_op_ctxt { 73struct svc_rdma_op_ctxt {
74 struct svc_rdma_op_ctxt *read_hdr;
74 struct svc_rdma_op_ctxt *next; 75 struct svc_rdma_op_ctxt *next;
75 struct xdr_buf arg; 76 struct xdr_buf arg;
76 struct list_head dto_q; 77 struct list_head dto_q;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 80c6ee82c34b..21a1e625ef03 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -289,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
289 u64 sgl_offset; 289 u64 sgl_offset;
290 struct rpcrdma_read_chunk *ch; 290 struct rpcrdma_read_chunk *ch;
291 struct svc_rdma_op_ctxt *ctxt = NULL; 291 struct svc_rdma_op_ctxt *ctxt = NULL;
292 struct svc_rdma_op_ctxt *head;
293 struct svc_rdma_op_ctxt *tmp_sge_ctxt; 292 struct svc_rdma_op_ctxt *tmp_sge_ctxt;
294 struct svc_rdma_op_ctxt *tmp_ch_ctxt; 293 struct svc_rdma_op_ctxt *tmp_ch_ctxt;
295 struct chunk_sge *ch_sge_ary; 294 struct chunk_sge *ch_sge_ary;
@@ -310,20 +309,13 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
310 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, 309 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
311 sge, ch_sge_ary, 310 sge, ch_sge_ary,
312 ch_count, byte_count); 311 ch_count, byte_count);
313 head = svc_rdma_get_context(xprt);
314 sgl_offset = 0; 312 sgl_offset = 0;
315 ch_no = 0; 313 ch_no = 0;
316 314
317 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; 315 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
318 ch->rc_discrim != 0; ch++, ch_no++) { 316 ch->rc_discrim != 0; ch++, ch_no++) {
319next_sge: 317next_sge:
320 if (!ctxt) 318 ctxt = svc_rdma_get_context(xprt);
321 ctxt = head;
322 else {
323 ctxt->next = svc_rdma_get_context(xprt);
324 ctxt = ctxt->next;
325 }
326 ctxt->next = NULL;
327 ctxt->direction = DMA_FROM_DEVICE; 319 ctxt->direction = DMA_FROM_DEVICE;
328 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 320 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
329 321
@@ -351,20 +343,15 @@ next_sge:
351 * the client and the RPC needs to be enqueued. 343 * the client and the RPC needs to be enqueued.
352 */ 344 */
353 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 345 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
354 ctxt->next = hdr_ctxt; 346 ctxt->read_hdr = hdr_ctxt;
355 hdr_ctxt->next = head;
356 } 347 }
357 /* Post the read */ 348 /* Post the read */
358 err = svc_rdma_send(xprt, &read_wr); 349 err = svc_rdma_send(xprt, &read_wr);
359 if (err) { 350 if (err) {
360 printk(KERN_ERR "svcrdma: Error posting send = %d\n", 351 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
361 err); 352 err);
362 /* 353 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
363 * Break the circular list so free knows when 354 svc_rdma_put_context(ctxt, 0);
364 * to stop if the error happened to occur on
365 * the last read
366 */
367 ctxt->next = NULL;
368 goto out; 355 goto out;
369 } 356 }
370 atomic_inc(&rdma_stat_read); 357 atomic_inc(&rdma_stat_read);
@@ -375,7 +362,7 @@ next_sge:
375 goto next_sge; 362 goto next_sge;
376 } 363 }
377 sgl_offset = 0; 364 sgl_offset = 0;
378 err = 0; 365 err = 1;
379 } 366 }
380 367
381 out: 368 out:
@@ -393,25 +380,12 @@ next_sge:
393 while (rqstp->rq_resused) 380 while (rqstp->rq_resused)
394 rqstp->rq_respages[--rqstp->rq_resused] = NULL; 381 rqstp->rq_respages[--rqstp->rq_resused] = NULL;
395 382
396 if (err) { 383 return err;
397 printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
398 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
399 /* Free the linked list of read contexts */
400 while (head != NULL) {
401 ctxt = head->next;
402 svc_rdma_put_context(head, 1);
403 head = ctxt;
404 }
405 return err;
406 }
407
408 return 1;
409} 384}
410 385
411static int rdma_read_complete(struct svc_rqst *rqstp, 386static int rdma_read_complete(struct svc_rqst *rqstp,
412 struct svc_rdma_op_ctxt *data) 387 struct svc_rdma_op_ctxt *head)
413{ 388{
414 struct svc_rdma_op_ctxt *head = data->next;
415 int page_no; 389 int page_no;
416 int ret; 390 int ret;
417 391
@@ -437,22 +411,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
437 rqstp->rq_arg.len = head->arg.len; 411 rqstp->rq_arg.len = head->arg.len;
438 rqstp->rq_arg.buflen = head->arg.buflen; 412 rqstp->rq_arg.buflen = head->arg.buflen;
439 413
414 /* Free the context */
415 svc_rdma_put_context(head, 0);
416
440 /* XXX: What should this be? */ 417 /* XXX: What should this be? */
441 rqstp->rq_prot = IPPROTO_MAX; 418 rqstp->rq_prot = IPPROTO_MAX;
442 419
443 /*
444 * Free the contexts we used to build the RDMA_READ. We have
445 * to be careful here because the context list uses the same
446 * next pointer used to chain the contexts associated with the
447 * RDMA_READ
448 */
449 data->next = NULL; /* terminate circular list */
450 do {
451 data = head->next;
452 svc_rdma_put_context(head, 0);
453 head = data;
454 } while (head != NULL);
455
456 ret = rqstp->rq_arg.head[0].iov_len 420 ret = rqstp->rq_arg.head[0].iov_len
457 + rqstp->rq_arg.page_len 421 + rqstp->rq_arg.page_len
458 + rqstp->rq_arg.tail[0].iov_len; 422 + rqstp->rq_arg.tail[0].iov_len;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4a79dfda1465..34141eaf25a0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -352,13 +352,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
352 352
353 case IB_WR_RDMA_READ: 353 case IB_WR_RDMA_READ:
354 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { 354 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
355 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
356 BUG_ON(!read_hdr);
355 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 357 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
356 spin_lock_bh(&xprt->sc_read_complete_lock); 358 spin_lock_bh(&xprt->sc_read_complete_lock);
357 list_add_tail(&ctxt->dto_q, 359 list_add_tail(&read_hdr->dto_q,
358 &xprt->sc_read_complete_q); 360 &xprt->sc_read_complete_q);
359 spin_unlock_bh(&xprt->sc_read_complete_lock); 361 spin_unlock_bh(&xprt->sc_read_complete_lock);
360 svc_xprt_enqueue(&xprt->sc_xprt); 362 svc_xprt_enqueue(&xprt->sc_xprt);
361 } 363 }
364 svc_rdma_put_context(ctxt, 0);
362 break; 365 break;
363 366
364 default: 367 default: