aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2016-05-02 14:41:05 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2016-05-17 15:47:58 -0400
commit302d3deb20682a076e1ab551821cacfdc81c5e4f (patch)
treef379f2c56120ba7d6f970306d4520c4cc5517726 /net
parent949317464bc2baca0ccc69e35a7b5cd3715633a6 (diff)
xprtrdma: Prevent inline overflow
When deciding whether to send a Call inline, rpcrdma_marshal_req doesn't take into account header bytes consumed by chunk lists. This results in Call messages on the wire that are sometimes larger than the inline threshold. Likewise, when a Write list or Reply chunk is in play, the server's reply has to emit an RDMA Send that includes a larger-than-minimal RPC-over-RDMA header. The actual size of a Call message cannot be estimated until after the chunk lists have been registered. Thus the size of each RPC-over-RDMA header can be estimated only after chunks are registered; but the decision to register chunks is based on the size of that header. Chicken, meet egg. The best a client can do is estimate header size based on the largest header that might occur, and then ensure that inline content is always smaller than that. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c3
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c3
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c5
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c85
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h5
5 files changed, 90 insertions, 11 deletions
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 4aeb104d0696..009694b0c56e 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -39,6 +39,9 @@ static int
39fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 39fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
40 struct rpcrdma_create_data_internal *cdata) 40 struct rpcrdma_create_data_internal *cdata)
41{ 41{
42 rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
43 RPCRDMA_MAX_DATA_SEGS /
44 RPCRDMA_MAX_FMR_SGES));
42 return 0; 45 return 0;
43} 46}
44 47
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 2f375982abf4..41e02e7d9b4c 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -231,6 +231,9 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
231 depth; 231 depth;
232 } 232 }
233 233
234 rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
235 RPCRDMA_MAX_DATA_SEGS /
236 ia->ri_max_frmr_depth));
234 return 0; 237 return 0;
235} 238}
236 239
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index e16ed54d24ed..2dc6ec2b006a 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
36 __func__, PTR_ERR(mr)); 36 __func__, PTR_ERR(mr));
37 return -ENOMEM; 37 return -ENOMEM;
38 } 38 }
39
40 ia->ri_dma_mr = mr; 39 ia->ri_dma_mr = mr;
40
41 rpcrdma_set_max_header_sizes(ia, cdata, min_t(unsigned int,
42 RPCRDMA_MAX_DATA_SEGS,
43 RPCRDMA_MAX_HDR_SEGS));
41 return 0; 44 return 0;
42} 45}
43 46
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 888823bb6dae..205b81b5ca9e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -61,7 +61,6 @@ enum rpcrdma_chunktype {
61 rpcrdma_replych 61 rpcrdma_replych
62}; 62};
63 63
64#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
65static const char transfertypes[][12] = { 64static const char transfertypes[][12] = {
66 "pure inline", /* no chunks */ 65 "pure inline", /* no chunks */
67 " read chunk", /* some argument via rdma read */ 66 " read chunk", /* some argument via rdma read */
@@ -69,18 +68,72 @@ static const char transfertypes[][12] = {
69 "write chunk", /* some result via rdma write */ 68 "write chunk", /* some result via rdma write */
70 "reply chunk" /* entire reply via rdma write */ 69 "reply chunk" /* entire reply via rdma write */
71}; 70};
72#endif 71
72/* Returns size of largest RPC-over-RDMA header in a Call message
73 *
74 * The client marshals only one chunk list per Call message.
75 * The largest list is the Read list.
76 */
77static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
78{
79 unsigned int size;
80
81 /* Fixed header fields and list discriminators */
82 size = RPCRDMA_HDRLEN_MIN;
83
84 /* Maximum Read list size */
85 maxsegs += 2; /* segment for head and tail buffers */
86 size = maxsegs * sizeof(struct rpcrdma_read_chunk);
87
88 dprintk("RPC: %s: max call header size = %u\n",
89 __func__, size);
90 return size;
91}
92
93/* Returns size of largest RPC-over-RDMA header in a Reply message
94 *
95 * There is only one Write list or one Reply chunk per Reply
96 * message. The larger list is the Write list.
97 */
98static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
99{
100 unsigned int size;
101
102 /* Fixed header fields and list discriminators */
103 size = RPCRDMA_HDRLEN_MIN;
104
105 /* Maximum Write list size */
106 maxsegs += 2; /* segment for head and tail buffers */
107 size = sizeof(__be32); /* segment count */
108 size += maxsegs * sizeof(struct rpcrdma_segment);
109 size += sizeof(__be32); /* list discriminator */
110
111 dprintk("RPC: %s: max reply header size = %u\n",
112 __func__, size);
113 return size;
114}
115
116void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia,
117 struct rpcrdma_create_data_internal *cdata,
118 unsigned int maxsegs)
119{
120 ia->ri_max_inline_write = cdata->inline_wsize -
121 rpcrdma_max_call_header_size(maxsegs);
122 ia->ri_max_inline_read = cdata->inline_rsize -
123 rpcrdma_max_reply_header_size(maxsegs);
124}
73 125
74/* The client can send a request inline as long as the RPCRDMA header 126/* The client can send a request inline as long as the RPCRDMA header
75 * plus the RPC call fit under the transport's inline limit. If the 127 * plus the RPC call fit under the transport's inline limit. If the
76 * combined call message size exceeds that limit, the client must use 128 * combined call message size exceeds that limit, the client must use
77 * the read chunk list for this operation. 129 * the read chunk list for this operation.
78 */ 130 */
79static bool rpcrdma_args_inline(struct rpc_rqst *rqst) 131static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
132 struct rpc_rqst *rqst)
80{ 133{
81 unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len; 134 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
82 135
83 return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); 136 return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
84} 137}
85 138
86/* The client can't know how large the actual reply will be. Thus it 139/* The client can't know how large the actual reply will be. Thus it
@@ -89,11 +142,12 @@ static bool rpcrdma_args_inline(struct rpc_rqst *rqst)
89 * limit, the client must provide a write list or a reply chunk for 142 * limit, the client must provide a write list or a reply chunk for
90 * this request. 143 * this request.
91 */ 144 */
92static bool rpcrdma_results_inline(struct rpc_rqst *rqst) 145static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
146 struct rpc_rqst *rqst)
93{ 147{
94 unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen; 148 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
95 149
96 return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst); 150 return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
97} 151}
98 152
99static int 153static int
@@ -492,7 +546,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
492 */ 546 */
493 if (rqst->rq_rcv_buf.flags & XDRBUF_READ) 547 if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
494 wtype = rpcrdma_writech; 548 wtype = rpcrdma_writech;
495 else if (rpcrdma_results_inline(rqst)) 549 else if (rpcrdma_results_inline(r_xprt, rqst))
496 wtype = rpcrdma_noch; 550 wtype = rpcrdma_noch;
497 else 551 else
498 wtype = rpcrdma_replych; 552 wtype = rpcrdma_replych;
@@ -511,7 +565,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
511 * that both has a data payload, and whose non-data arguments 565 * that both has a data payload, and whose non-data arguments
512 * by themselves are larger than the inline threshold. 566 * by themselves are larger than the inline threshold.
513 */ 567 */
514 if (rpcrdma_args_inline(rqst)) { 568 if (rpcrdma_args_inline(r_xprt, rqst)) {
515 rtype = rpcrdma_noch; 569 rtype = rpcrdma_noch;
516 } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { 570 } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
517 rtype = rpcrdma_readch; 571 rtype = rpcrdma_readch;
@@ -561,6 +615,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
561 if (hdrlen < 0) 615 if (hdrlen < 0)
562 return hdrlen; 616 return hdrlen;
563 617
618 if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
619 goto out_overflow;
620
564 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd" 621 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd"
565 " headerp 0x%p base 0x%p lkey 0x%x\n", 622 " headerp 0x%p base 0x%p lkey 0x%x\n",
566 __func__, transfertypes[wtype], hdrlen, rpclen, 623 __func__, transfertypes[wtype], hdrlen, rpclen,
@@ -587,6 +644,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
587 644
588 req->rl_niovs = 2; 645 req->rl_niovs = 2;
589 return 0; 646 return 0;
647
648out_overflow:
649 pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s\n",
650 hdrlen, rpclen, transfertypes[wtype]);
651 /* Terminate this RPC. Chunks registered above will be
652 * released by xprt_release -> xprt_rmda_free .
653 */
654 return -EIO;
590} 655}
591 656
592/* 657/*
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 00287486c62c..4349e03069c7 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -73,6 +73,8 @@ struct rpcrdma_ia {
73 struct completion ri_done; 73 struct completion ri_done;
74 int ri_async_rc; 74 int ri_async_rc;
75 unsigned int ri_max_frmr_depth; 75 unsigned int ri_max_frmr_depth;
76 unsigned int ri_max_inline_write;
77 unsigned int ri_max_inline_read;
76 struct ib_qp_attr ri_qp_attr; 78 struct ib_qp_attr ri_qp_attr;
77 struct ib_qp_init_attr ri_qp_init_attr; 79 struct ib_qp_init_attr ri_qp_init_attr;
78}; 80};
@@ -538,6 +540,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
538 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c 540 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
539 */ 541 */
540int rpcrdma_marshal_req(struct rpc_rqst *); 542int rpcrdma_marshal_req(struct rpc_rqst *);
543void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *,
544 struct rpcrdma_create_data_internal *,
545 unsigned int);
541 546
542/* RPC/RDMA module init - xprtrdma/transport.c 547/* RPC/RDMA module init - xprtrdma/transport.c
543 */ 548 */