diff options
| author | Chuck Lever <chuck.lever@oracle.com> | 2018-05-04 15:35:04 -0400 |
|---|---|---|
| committer | Anna Schumaker <Anna.Schumaker@Netapp.com> | 2018-05-07 09:20:03 -0400 |
| commit | 48be539dd44a3a010a6a330d09610d60ad42758a (patch) | |
| tree | b5d79c71dad7cc122d779f68efb520cd3ab3332b | |
| parent | a9cde23ab7cdf5e4e93432dffd0e734267f2b745 (diff) | |
xprtrdma: Introduce ->alloc_slot call-out for xprtrdma
rpcrdma_buffer_get acquires an rpcrdma_req and rep for each RPC.
Currently this is done in the call_allocate action, and sometimes it
can fail if there are many outstanding RPCs.
When call_allocate fails, the RPC task is put on the delayq. It is
awoken a few milliseconds later, but there's no guarantee it will
get a buffer at that time. The RPC task can be repeatedly put back
to sleep or even starved.
The call_allocate action should rarely fail. The delayq mechanism is
not meant to deal with transport congestion.
In the current sunrpc stack, there is a friendlier way to deal with
this situation. These objects are actually tantamount to an RPC
slot (rpc_rqst) and there is a separate FSM action, distinct from
call_allocate, for allocating slot resources. This is the
call_reserve action.
When allocation fails during this action, the RPC is placed on the
transport's backlog queue. The backlog mechanism provides a stronger
guarantee that when the RPC is awoken, a buffer will be available
for it; and backlogged RPCs are awoken one-at-a-time.
To make slot resource allocation occur in the call_reserve action,
create special ->alloc_slot and ->free_slot call-outs for xprtrdma.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
| -rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 52 |
1 files changed, 50 insertions, 2 deletions
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index cf5e866ee969..8f9338e98c4f 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
| @@ -538,6 +538,54 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
| 538 | } | 538 | } |
| 539 | } | 539 | } |
| 540 | 540 | ||
| 541 | /** | ||
| 542 | * xprt_rdma_alloc_slot - allocate an rpc_rqst | ||
| 543 | * @xprt: controlling RPC transport | ||
| 544 | * @task: RPC task requesting a fresh rpc_rqst | ||
| 545 | * | ||
| 546 | * tk_status values: | ||
| 547 | * %0 if task->tk_rqstp points to a fresh rpc_rqst | ||
| 548 | * %-EAGAIN if no rpc_rqst is available; queued on backlog | ||
| 549 | */ | ||
| 550 | static void | ||
| 551 | xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) | ||
| 552 | { | ||
| 553 | struct rpc_rqst *rqst; | ||
| 554 | |||
| 555 | spin_lock(&xprt->reserve_lock); | ||
| 556 | if (list_empty(&xprt->free)) | ||
| 557 | goto out_sleep; | ||
| 558 | rqst = list_first_entry(&xprt->free, struct rpc_rqst, rq_list); | ||
| 559 | list_del(&rqst->rq_list); | ||
| 560 | spin_unlock(&xprt->reserve_lock); | ||
| 561 | |||
| 562 | task->tk_rqstp = rqst; | ||
| 563 | task->tk_status = 0; | ||
| 564 | return; | ||
| 565 | |||
| 566 | out_sleep: | ||
| 567 | rpc_sleep_on(&xprt->backlog, task, NULL); | ||
| 568 | spin_unlock(&xprt->reserve_lock); | ||
| 569 | task->tk_status = -EAGAIN; | ||
| 570 | } | ||
| 571 | |||
| 572 | /** | ||
| 573 | * xprt_rdma_free_slot - release an rpc_rqst | ||
| 574 | * @xprt: controlling RPC transport | ||
| 575 | * @rqst: rpc_rqst to release | ||
| 576 | * | ||
| 577 | */ | ||
| 578 | static void | ||
| 579 | xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) | ||
| 580 | { | ||
| 581 | memset(rqst, 0, sizeof(*rqst)); | ||
| 582 | |||
| 583 | spin_lock(&xprt->reserve_lock); | ||
| 584 | list_add(&rqst->rq_list, &xprt->free); | ||
| 585 | rpc_wake_up_next(&xprt->backlog); | ||
| 586 | spin_unlock(&xprt->reserve_lock); | ||
| 587 | } | ||
| 588 | |||
| 541 | static bool | 589 | static bool |
| 542 | rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | 590 | rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, |
| 543 | size_t size, gfp_t flags) | 591 | size_t size, gfp_t flags) |
| @@ -780,8 +828,8 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt) | |||
| 780 | static const struct rpc_xprt_ops xprt_rdma_procs = { | 828 | static const struct rpc_xprt_ops xprt_rdma_procs = { |
| 781 | .reserve_xprt = xprt_reserve_xprt_cong, | 829 | .reserve_xprt = xprt_reserve_xprt_cong, |
| 782 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ | 830 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ |
| 783 | .alloc_slot = xprt_alloc_slot, | 831 | .alloc_slot = xprt_rdma_alloc_slot, |
| 784 | .free_slot = xprt_free_slot, | 832 | .free_slot = xprt_rdma_free_slot, |
| 785 | .release_request = xprt_release_rqst_cong, /* ditto */ | 833 | .release_request = xprt_release_rqst_cong, /* ditto */ |
| 786 | .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ | 834 | .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ |
| 787 | .timer = xprt_rdma_timer, | 835 | .timer = xprt_rdma_timer, |
