aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2019-06-19 10:33:15 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2019-07-09 10:30:25 -0400
commit0ab115237025f5e379620bbcd56a02697d07b002 (patch)
tree670d6d24149aa7f2b1e3ab254e6658c2a297441a /net/sunrpc/xprtrdma
parentd8099feda4833bab96b1bf312e9e6aad6b771570 (diff)
xprtrdma: Wake RPCs directly in rpcrdma_wc_send path
Eliminate a context switch in the path that handles RPC wake-ups when a Receive completion has to wait for a Send completion. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c61
-rw-r--r--net/sunrpc/xprtrdma/transport.c10
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h12
4 files changed, 36 insertions, 50 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 33b6e6a03f68..caf0b1950d76 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -511,6 +511,16 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
511 return 0; 511 return 0;
512} 512}
513 513
514static void rpcrdma_sendctx_done(struct kref *kref)
515{
516 struct rpcrdma_req *req =
517 container_of(kref, struct rpcrdma_req, rl_kref);
518 struct rpcrdma_rep *rep = req->rl_reply;
519
520 rpcrdma_complete_rqst(rep);
521 rep->rr_rxprt->rx_stats.reply_waits_for_send++;
522}
523
514/** 524/**
515 * rpcrdma_sendctx_unmap - DMA-unmap Send buffer 525 * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
516 * @sc: sendctx containing SGEs to unmap 526 * @sc: sendctx containing SGEs to unmap
@@ -520,6 +530,9 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
520{ 530{
521 struct ib_sge *sge; 531 struct ib_sge *sge;
522 532
533 if (!sc->sc_unmap_count)
534 return;
535
523 /* The first two SGEs contain the transport header and 536 /* The first two SGEs contain the transport header and
524 * the inline buffer. These are always left mapped so 537 * the inline buffer. These are always left mapped so
525 * they can be cheaply re-used. 538 * they can be cheaply re-used.
@@ -529,9 +542,7 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
529 ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length, 542 ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
530 DMA_TO_DEVICE); 543 DMA_TO_DEVICE);
531 544
532 if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, 545 kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
533 &sc->sc_req->rl_flags))
534 wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
535} 546}
536 547
537/* Prepare an SGE for the RPC-over-RDMA transport header. 548/* Prepare an SGE for the RPC-over-RDMA transport header.
@@ -666,7 +677,7 @@ map_tail:
666out: 677out:
667 sc->sc_wr.num_sge += sge_no; 678 sc->sc_wr.num_sge += sge_no;
668 if (sc->sc_unmap_count) 679 if (sc->sc_unmap_count)
669 __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); 680 kref_get(&req->rl_kref);
670 return true; 681 return true;
671 682
672out_regbuf: 683out_regbuf:
@@ -708,7 +719,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
708 req->rl_sendctx->sc_wr.num_sge = 0; 719 req->rl_sendctx->sc_wr.num_sge = 0;
709 req->rl_sendctx->sc_unmap_count = 0; 720 req->rl_sendctx->sc_unmap_count = 0;
710 req->rl_sendctx->sc_req = req; 721 req->rl_sendctx->sc_req = req;
711 __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); 722 kref_init(&req->rl_kref);
712 723
713 ret = -EIO; 724 ret = -EIO;
714 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) 725 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
@@ -1268,36 +1279,12 @@ out_badheader:
1268 goto out; 1279 goto out;
1269} 1280}
1270 1281
1271/* Ensure that any DMA mapped pages associated with 1282static void rpcrdma_reply_done(struct kref *kref)
1272 * the Send of the RPC Call have been unmapped before
1273 * allowing the RPC to complete. This protects argument
1274 * memory not controlled by the RPC client from being
1275 * re-used before we're done with it.
1276 */
1277static void rpcrdma_release_tx(struct rpcrdma_xprt *r_xprt,
1278 struct rpcrdma_req *req)
1279{ 1283{
1280 if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { 1284 struct rpcrdma_req *req =
1281 r_xprt->rx_stats.reply_waits_for_send++; 1285 container_of(kref, struct rpcrdma_req, rl_kref);
1282 out_of_line_wait_on_bit(&req->rl_flags,
1283 RPCRDMA_REQ_F_TX_RESOURCES,
1284 bit_wait,
1285 TASK_UNINTERRUPTIBLE);
1286 }
1287}
1288 1286
1289/** 1287 rpcrdma_complete_rqst(req->rl_reply);
1290 * rpcrdma_release_rqst - Release hardware resources
1291 * @r_xprt: controlling transport instance
1292 * @req: request with resources to release
1293 *
1294 */
1295void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
1296{
1297 if (!list_empty(&req->rl_registered))
1298 frwr_unmap_sync(r_xprt, req);
1299
1300 rpcrdma_release_tx(r_xprt, req);
1301} 1288}
1302 1289
1303/** 1290/**
@@ -1367,13 +1354,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1367 1354
1368 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) 1355 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1369 frwr_reminv(rep, &req->rl_registered); 1356 frwr_reminv(rep, &req->rl_registered);
1370 if (!list_empty(&req->rl_registered)) { 1357 if (!list_empty(&req->rl_registered))
1371 frwr_unmap_async(r_xprt, req); 1358 frwr_unmap_async(r_xprt, req);
1372 /* LocalInv completion will complete the RPC */ 1359 /* LocalInv completion will complete the RPC */
1373 } else { 1360 else
1374 rpcrdma_release_tx(r_xprt, req); 1361 kref_put(&req->rl_kref, rpcrdma_reply_done);
1375 rpcrdma_complete_rqst(rep);
1376 }
1377 return; 1362 return;
1378 1363
1379out_badversion: 1364out_badversion:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index f84375ddbb4d..9575f1d8db07 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -618,8 +618,16 @@ xprt_rdma_free(struct rpc_task *task)
618 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 618 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
619 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 619 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
620 620
621 rpcrdma_release_rqst(r_xprt, req);
622 trace_xprtrdma_op_free(task, req); 621 trace_xprtrdma_op_free(task, req);
622
623 if (!list_empty(&req->rl_registered))
624 frwr_unmap_sync(r_xprt, req);
625
626 /* XXX: If the RPC is completing because of a signal and
627 * not because a reply was received, we ought to ensure
628 * that the Send completion has fired, so that memory
629 * involved with the Send is not still visible to the NIC.
630 */
623} 631}
624 632
625/** 633/**
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index c50a4b295bd7..4e22cc244149 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1462,8 +1462,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1462 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; 1462 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
1463 int rc; 1463 int rc;
1464 1464
1465 if (!ep->rep_send_count || 1465 if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) {
1466 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1467 send_wr->send_flags |= IB_SEND_SIGNALED; 1466 send_wr->send_flags |= IB_SEND_SIGNALED;
1468 ep->rep_send_count = ep->rep_send_batch; 1467 ep->rep_send_count = ep->rep_send_batch;
1469 } else { 1468 } else {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e465221c9c96..5475f0dff22a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -44,7 +44,8 @@
44 44
45#include <linux/wait.h> /* wait_queue_head_t, etc */ 45#include <linux/wait.h> /* wait_queue_head_t, etc */
46#include <linux/spinlock.h> /* spinlock_t, etc */ 46#include <linux/spinlock.h> /* spinlock_t, etc */
47#include <linux/atomic.h> /* atomic_t, etc */ 47#include <linux/atomic.h> /* atomic_t, etc */
48#include <linux/kref.h> /* struct kref */
48#include <linux/workqueue.h> /* struct work_struct */ 49#include <linux/workqueue.h> /* struct work_struct */
49 50
50#include <rdma/rdma_cm.h> /* RDMA connection api */ 51#include <rdma/rdma_cm.h> /* RDMA connection api */
@@ -329,17 +330,12 @@ struct rpcrdma_req {
329 struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ 330 struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
330 331
331 struct list_head rl_all; 332 struct list_head rl_all;
332 unsigned long rl_flags; 333 struct kref rl_kref;
333 334
334 struct list_head rl_registered; /* registered segments */ 335 struct list_head rl_registered; /* registered segments */
335 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; 336 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
336}; 337};
337 338
338/* rl_flags */
339enum {
340 RPCRDMA_REQ_F_TX_RESOURCES,
341};
342
343static inline struct rpcrdma_req * 339static inline struct rpcrdma_req *
344rpcr_to_rdmar(const struct rpc_rqst *rqst) 340rpcr_to_rdmar(const struct rpc_rqst *rqst)
345{ 341{
@@ -584,8 +580,6 @@ int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
584void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 580void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
585void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); 581void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
586void rpcrdma_reply_handler(struct rpcrdma_rep *rep); 582void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
587void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
588 struct rpcrdma_req *req);
589 583
590static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) 584static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
591{ 585{