diff options
author | Tom Tucker <tom@opengridcomputing.com> | 2008-04-30 20:50:56 -0400 |
---|---|---|
committer | Tom Tucker <tom@opengridcomputing.com> | 2008-05-19 08:33:51 -0400 |
commit | 02e7452de74d308ca642f54f7e5ef801ced60a92 (patch) | |
tree | 9300b5d12be31a6d48689a4260abb1b81e8a5551 | |
parent | 10a38c33f46d128d11e299acba744bc325cde420 (diff) |
svcrdma: Simplify RDMA_READ deferral buffer management
An NFS_WRITE requires a set of RDMA_READ requests to fetch the write
data from the client. There are two principal pieces of data that
need to be tracked: the list of pages that comprise the completed RPC
and the SGE of dma mapped pages to refer to this list of pages. Previously
this whole bit was managed as a linked list of contexts with the
context containing the page list buried in this list. This patch
simplifies this processing by not keeping a linked list, but rather only
a pionter from the last submitted RDMA_READ's context to the context
that maps the set of pages that describe the RPC. This significantly
simplifies this code path. SGE contexts are cleaned up inline in the DTO
path instead of at read completion time.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
-rw-r--r-- | include/linux/sunrpc/svc_rdma.h | 1 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 58 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 |
3 files changed, 16 insertions, 48 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d0011f3db90c..c447c417b37b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h | |||
@@ -71,6 +71,7 @@ extern atomic_t rdma_stat_sq_prod; | |||
71 | * completes. | 71 | * completes. |
72 | */ | 72 | */ |
73 | struct svc_rdma_op_ctxt { | 73 | struct svc_rdma_op_ctxt { |
74 | struct svc_rdma_op_ctxt *read_hdr; | ||
74 | struct svc_rdma_op_ctxt *next; | 75 | struct svc_rdma_op_ctxt *next; |
75 | struct xdr_buf arg; | 76 | struct xdr_buf arg; |
76 | struct list_head dto_q; | 77 | struct list_head dto_q; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 80c6ee82c34b..21a1e625ef03 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
@@ -289,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
289 | u64 sgl_offset; | 289 | u64 sgl_offset; |
290 | struct rpcrdma_read_chunk *ch; | 290 | struct rpcrdma_read_chunk *ch; |
291 | struct svc_rdma_op_ctxt *ctxt = NULL; | 291 | struct svc_rdma_op_ctxt *ctxt = NULL; |
292 | struct svc_rdma_op_ctxt *head; | ||
293 | struct svc_rdma_op_ctxt *tmp_sge_ctxt; | 292 | struct svc_rdma_op_ctxt *tmp_sge_ctxt; |
294 | struct svc_rdma_op_ctxt *tmp_ch_ctxt; | 293 | struct svc_rdma_op_ctxt *tmp_ch_ctxt; |
295 | struct chunk_sge *ch_sge_ary; | 294 | struct chunk_sge *ch_sge_ary; |
@@ -310,20 +309,13 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
310 | sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, | 309 | sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, |
311 | sge, ch_sge_ary, | 310 | sge, ch_sge_ary, |
312 | ch_count, byte_count); | 311 | ch_count, byte_count); |
313 | head = svc_rdma_get_context(xprt); | ||
314 | sgl_offset = 0; | 312 | sgl_offset = 0; |
315 | ch_no = 0; | 313 | ch_no = 0; |
316 | 314 | ||
317 | for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | 315 | for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; |
318 | ch->rc_discrim != 0; ch++, ch_no++) { | 316 | ch->rc_discrim != 0; ch++, ch_no++) { |
319 | next_sge: | 317 | next_sge: |
320 | if (!ctxt) | 318 | ctxt = svc_rdma_get_context(xprt); |
321 | ctxt = head; | ||
322 | else { | ||
323 | ctxt->next = svc_rdma_get_context(xprt); | ||
324 | ctxt = ctxt->next; | ||
325 | } | ||
326 | ctxt->next = NULL; | ||
327 | ctxt->direction = DMA_FROM_DEVICE; | 319 | ctxt->direction = DMA_FROM_DEVICE; |
328 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | 320 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); |
329 | 321 | ||
@@ -351,20 +343,15 @@ next_sge: | |||
351 | * the client and the RPC needs to be enqueued. | 343 | * the client and the RPC needs to be enqueued. |
352 | */ | 344 | */ |
353 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | 345 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); |
354 | ctxt->next = hdr_ctxt; | 346 | ctxt->read_hdr = hdr_ctxt; |
355 | hdr_ctxt->next = head; | ||
356 | } | 347 | } |
357 | /* Post the read */ | 348 | /* Post the read */ |
358 | err = svc_rdma_send(xprt, &read_wr); | 349 | err = svc_rdma_send(xprt, &read_wr); |
359 | if (err) { | 350 | if (err) { |
360 | printk(KERN_ERR "svcrdma: Error posting send = %d\n", | 351 | printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", |
361 | err); | 352 | err); |
362 | /* | 353 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); |
363 | * Break the circular list so free knows when | 354 | svc_rdma_put_context(ctxt, 0); |
364 | * to stop if the error happened to occur on | ||
365 | * the last read | ||
366 | */ | ||
367 | ctxt->next = NULL; | ||
368 | goto out; | 355 | goto out; |
369 | } | 356 | } |
370 | atomic_inc(&rdma_stat_read); | 357 | atomic_inc(&rdma_stat_read); |
@@ -375,7 +362,7 @@ next_sge: | |||
375 | goto next_sge; | 362 | goto next_sge; |
376 | } | 363 | } |
377 | sgl_offset = 0; | 364 | sgl_offset = 0; |
378 | err = 0; | 365 | err = 1; |
379 | } | 366 | } |
380 | 367 | ||
381 | out: | 368 | out: |
@@ -393,25 +380,12 @@ next_sge: | |||
393 | while (rqstp->rq_resused) | 380 | while (rqstp->rq_resused) |
394 | rqstp->rq_respages[--rqstp->rq_resused] = NULL; | 381 | rqstp->rq_respages[--rqstp->rq_resused] = NULL; |
395 | 382 | ||
396 | if (err) { | 383 | return err; |
397 | printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err); | ||
398 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
399 | /* Free the linked list of read contexts */ | ||
400 | while (head != NULL) { | ||
401 | ctxt = head->next; | ||
402 | svc_rdma_put_context(head, 1); | ||
403 | head = ctxt; | ||
404 | } | ||
405 | return err; | ||
406 | } | ||
407 | |||
408 | return 1; | ||
409 | } | 384 | } |
410 | 385 | ||
411 | static int rdma_read_complete(struct svc_rqst *rqstp, | 386 | static int rdma_read_complete(struct svc_rqst *rqstp, |
412 | struct svc_rdma_op_ctxt *data) | 387 | struct svc_rdma_op_ctxt *head) |
413 | { | 388 | { |
414 | struct svc_rdma_op_ctxt *head = data->next; | ||
415 | int page_no; | 389 | int page_no; |
416 | int ret; | 390 | int ret; |
417 | 391 | ||
@@ -437,22 +411,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp, | |||
437 | rqstp->rq_arg.len = head->arg.len; | 411 | rqstp->rq_arg.len = head->arg.len; |
438 | rqstp->rq_arg.buflen = head->arg.buflen; | 412 | rqstp->rq_arg.buflen = head->arg.buflen; |
439 | 413 | ||
414 | /* Free the context */ | ||
415 | svc_rdma_put_context(head, 0); | ||
416 | |||
440 | /* XXX: What should this be? */ | 417 | /* XXX: What should this be? */ |
441 | rqstp->rq_prot = IPPROTO_MAX; | 418 | rqstp->rq_prot = IPPROTO_MAX; |
442 | 419 | ||
443 | /* | ||
444 | * Free the contexts we used to build the RDMA_READ. We have | ||
445 | * to be careful here because the context list uses the same | ||
446 | * next pointer used to chain the contexts associated with the | ||
447 | * RDMA_READ | ||
448 | */ | ||
449 | data->next = NULL; /* terminate circular list */ | ||
450 | do { | ||
451 | data = head->next; | ||
452 | svc_rdma_put_context(head, 0); | ||
453 | head = data; | ||
454 | } while (head != NULL); | ||
455 | |||
456 | ret = rqstp->rq_arg.head[0].iov_len | 420 | ret = rqstp->rq_arg.head[0].iov_len |
457 | + rqstp->rq_arg.page_len | 421 | + rqstp->rq_arg.page_len |
458 | + rqstp->rq_arg.tail[0].iov_len; | 422 | + rqstp->rq_arg.tail[0].iov_len; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4a79dfda1465..34141eaf25a0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
@@ -352,13 +352,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) | |||
352 | 352 | ||
353 | case IB_WR_RDMA_READ: | 353 | case IB_WR_RDMA_READ: |
354 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | 354 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { |
355 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | ||
356 | BUG_ON(!read_hdr); | ||
355 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | 357 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); |
356 | spin_lock_bh(&xprt->sc_read_complete_lock); | 358 | spin_lock_bh(&xprt->sc_read_complete_lock); |
357 | list_add_tail(&ctxt->dto_q, | 359 | list_add_tail(&read_hdr->dto_q, |
358 | &xprt->sc_read_complete_q); | 360 | &xprt->sc_read_complete_q); |
359 | spin_unlock_bh(&xprt->sc_read_complete_lock); | 361 | spin_unlock_bh(&xprt->sc_read_complete_lock); |
360 | svc_xprt_enqueue(&xprt->sc_xprt); | 362 | svc_xprt_enqueue(&xprt->sc_xprt); |
361 | } | 363 | } |
364 | svc_rdma_put_context(ctxt, 0); | ||
362 | break; | 365 | break; |
363 | 366 | ||
364 | default: | 367 | default: |