diff options
author | Chuck Lever <chuck.lever@oracle.com> | 2016-05-02 14:41:05 -0400 |
---|---|---|
committer | Anna Schumaker <Anna.Schumaker@Netapp.com> | 2016-05-17 15:47:58 -0400 |
commit | 302d3deb20682a076e1ab551821cacfdc81c5e4f (patch) | |
tree | f379f2c56120ba7d6f970306d4520c4cc5517726 /net | |
parent | 949317464bc2baca0ccc69e35a7b5cd3715633a6 (diff) |
xprtrdma: Prevent inline overflow
When deciding whether to send a Call inline, rpcrdma_marshal_req
doesn't take into account header bytes consumed by chunk lists.
This results in Call messages on the wire that are sometimes larger
than the inline threshold.
Likewise, when a Write list or Reply chunk is in play, the server's
reply has to emit an RDMA Send that includes a larger-than-minimal
RPC-over-RDMA header.
The actual size of a Call message cannot be estimated until after
the chunk lists have been registered. Thus the size of each
RPC-over-RDMA header can be estimated only after chunks are
registered; but the decision to register chunks is based on the size
of that header. Chicken, meet egg.
The best a client can do is estimate header size based on the
largest header that might occur, and then ensure that inline content
is always smaller than that.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/xprtrdma/fmr_ops.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/physical_ops.c | 5 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 85 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 5 |
5 files changed, 90 insertions, 11 deletions
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 4aeb104d0696..009694b0c56e 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -39,6 +39,9 @@ static int | |||
39 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | 39 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, |
40 | struct rpcrdma_create_data_internal *cdata) | 40 | struct rpcrdma_create_data_internal *cdata) |
41 | { | 41 | { |
42 | rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, | ||
43 | RPCRDMA_MAX_DATA_SEGS / | ||
44 | RPCRDMA_MAX_FMR_SGES)); | ||
42 | return 0; | 45 | return 0; |
43 | } | 46 | } |
44 | 47 | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 2f375982abf4..41e02e7d9b4c 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -231,6 +231,9 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
231 | depth; | 231 | depth; |
232 | } | 232 | } |
233 | 233 | ||
234 | rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, | ||
235 | RPCRDMA_MAX_DATA_SEGS / | ||
236 | ia->ri_max_frmr_depth)); | ||
234 | return 0; | 237 | return 0; |
235 | } | 238 | } |
236 | 239 | ||
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index e16ed54d24ed..2dc6ec2b006a 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
36 | __func__, PTR_ERR(mr)); | 36 | __func__, PTR_ERR(mr)); |
37 | return -ENOMEM; | 37 | return -ENOMEM; |
38 | } | 38 | } |
39 | |||
40 | ia->ri_dma_mr = mr; | 39 | ia->ri_dma_mr = mr; |
40 | |||
41 | rpcrdma_set_max_header_sizes(ia, cdata, min_t(unsigned int, | ||
42 | RPCRDMA_MAX_DATA_SEGS, | ||
43 | RPCRDMA_MAX_HDR_SEGS)); | ||
41 | return 0; | 44 | return 0; |
42 | } | 45 | } |
43 | 46 | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 888823bb6dae..205b81b5ca9e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -61,7 +61,6 @@ enum rpcrdma_chunktype { | |||
61 | rpcrdma_replych | 61 | rpcrdma_replych |
62 | }; | 62 | }; |
63 | 63 | ||
64 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
65 | static const char transfertypes[][12] = { | 64 | static const char transfertypes[][12] = { |
66 | "pure inline", /* no chunks */ | 65 | "pure inline", /* no chunks */ |
67 | " read chunk", /* some argument via rdma read */ | 66 | " read chunk", /* some argument via rdma read */ |
@@ -69,18 +68,72 @@ static const char transfertypes[][12] = { | |||
69 | "write chunk", /* some result via rdma write */ | 68 | "write chunk", /* some result via rdma write */ |
70 | "reply chunk" /* entire reply via rdma write */ | 69 | "reply chunk" /* entire reply via rdma write */ |
71 | }; | 70 | }; |
72 | #endif | 71 | |
72 | /* Returns size of largest RPC-over-RDMA header in a Call message | ||
73 | * | ||
74 | * The client marshals only one chunk list per Call message. | ||
75 | * The largest list is the Read list. | ||
76 | */ | ||
77 | static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) | ||
78 | { | ||
79 | unsigned int size; | ||
80 | |||
81 | /* Fixed header fields and list discriminators */ | ||
82 | size = RPCRDMA_HDRLEN_MIN; | ||
83 | |||
84 | /* Maximum Read list size */ | ||
85 | maxsegs += 2; /* segment for head and tail buffers */ | ||
86 | size = maxsegs * sizeof(struct rpcrdma_read_chunk); | ||
87 | |||
88 | dprintk("RPC: %s: max call header size = %u\n", | ||
89 | __func__, size); | ||
90 | return size; | ||
91 | } | ||
92 | |||
93 | /* Returns size of largest RPC-over-RDMA header in a Reply message | ||
94 | * | ||
95 | * There is only one Write list or one Reply chunk per Reply | ||
96 | * message. The larger list is the Write list. | ||
97 | */ | ||
98 | static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) | ||
99 | { | ||
100 | unsigned int size; | ||
101 | |||
102 | /* Fixed header fields and list discriminators */ | ||
103 | size = RPCRDMA_HDRLEN_MIN; | ||
104 | |||
105 | /* Maximum Write list size */ | ||
106 | maxsegs += 2; /* segment for head and tail buffers */ | ||
107 | size = sizeof(__be32); /* segment count */ | ||
108 | size += maxsegs * sizeof(struct rpcrdma_segment); | ||
109 | size += sizeof(__be32); /* list discriminator */ | ||
110 | |||
111 | dprintk("RPC: %s: max reply header size = %u\n", | ||
112 | __func__, size); | ||
113 | return size; | ||
114 | } | ||
115 | |||
116 | void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia, | ||
117 | struct rpcrdma_create_data_internal *cdata, | ||
118 | unsigned int maxsegs) | ||
119 | { | ||
120 | ia->ri_max_inline_write = cdata->inline_wsize - | ||
121 | rpcrdma_max_call_header_size(maxsegs); | ||
122 | ia->ri_max_inline_read = cdata->inline_rsize - | ||
123 | rpcrdma_max_reply_header_size(maxsegs); | ||
124 | } | ||
73 | 125 | ||
74 | /* The client can send a request inline as long as the RPCRDMA header | 126 | /* The client can send a request inline as long as the RPCRDMA header |
75 | * plus the RPC call fit under the transport's inline limit. If the | 127 | * plus the RPC call fit under the transport's inline limit. If the |
76 | * combined call message size exceeds that limit, the client must use | 128 | * combined call message size exceeds that limit, the client must use |
77 | * the read chunk list for this operation. | 129 | * the read chunk list for this operation. |
78 | */ | 130 | */ |
79 | static bool rpcrdma_args_inline(struct rpc_rqst *rqst) | 131 | static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, |
132 | struct rpc_rqst *rqst) | ||
80 | { | 133 | { |
81 | unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len; | 134 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
82 | 135 | ||
83 | return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); | 136 | return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; |
84 | } | 137 | } |
85 | 138 | ||
86 | /* The client can't know how large the actual reply will be. Thus it | 139 | /* The client can't know how large the actual reply will be. Thus it |
@@ -89,11 +142,12 @@ static bool rpcrdma_args_inline(struct rpc_rqst *rqst) | |||
89 | * limit, the client must provide a write list or a reply chunk for | 142 | * limit, the client must provide a write list or a reply chunk for |
90 | * this request. | 143 | * this request. |
91 | */ | 144 | */ |
92 | static bool rpcrdma_results_inline(struct rpc_rqst *rqst) | 145 | static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, |
146 | struct rpc_rqst *rqst) | ||
93 | { | 147 | { |
94 | unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen; | 148 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
95 | 149 | ||
96 | return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst); | 150 | return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; |
97 | } | 151 | } |
98 | 152 | ||
99 | static int | 153 | static int |
@@ -492,7 +546,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
492 | */ | 546 | */ |
493 | if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 547 | if (rqst->rq_rcv_buf.flags & XDRBUF_READ) |
494 | wtype = rpcrdma_writech; | 548 | wtype = rpcrdma_writech; |
495 | else if (rpcrdma_results_inline(rqst)) | 549 | else if (rpcrdma_results_inline(r_xprt, rqst)) |
496 | wtype = rpcrdma_noch; | 550 | wtype = rpcrdma_noch; |
497 | else | 551 | else |
498 | wtype = rpcrdma_replych; | 552 | wtype = rpcrdma_replych; |
@@ -511,7 +565,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
511 | * that both has a data payload, and whose non-data arguments | 565 | * that both has a data payload, and whose non-data arguments |
512 | * by themselves are larger than the inline threshold. | 566 | * by themselves are larger than the inline threshold. |
513 | */ | 567 | */ |
514 | if (rpcrdma_args_inline(rqst)) { | 568 | if (rpcrdma_args_inline(r_xprt, rqst)) { |
515 | rtype = rpcrdma_noch; | 569 | rtype = rpcrdma_noch; |
516 | } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { | 570 | } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { |
517 | rtype = rpcrdma_readch; | 571 | rtype = rpcrdma_readch; |
@@ -561,6 +615,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
561 | if (hdrlen < 0) | 615 | if (hdrlen < 0) |
562 | return hdrlen; | 616 | return hdrlen; |
563 | 617 | ||
618 | if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | ||
619 | goto out_overflow; | ||
620 | |||
564 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd" | 621 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd" |
565 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 622 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
566 | __func__, transfertypes[wtype], hdrlen, rpclen, | 623 | __func__, transfertypes[wtype], hdrlen, rpclen, |
@@ -587,6 +644,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
587 | 644 | ||
588 | req->rl_niovs = 2; | 645 | req->rl_niovs = 2; |
589 | return 0; | 646 | return 0; |
647 | |||
648 | out_overflow: | ||
649 | pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s\n", | ||
650 | hdrlen, rpclen, transfertypes[wtype]); | ||
651 | /* Terminate this RPC. Chunks registered above will be | ||
652 | * released by xprt_release -> xprt_rmda_free . | ||
653 | */ | ||
654 | return -EIO; | ||
590 | } | 655 | } |
591 | 656 | ||
592 | /* | 657 | /* |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 00287486c62c..4349e03069c7 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -73,6 +73,8 @@ struct rpcrdma_ia { | |||
73 | struct completion ri_done; | 73 | struct completion ri_done; |
74 | int ri_async_rc; | 74 | int ri_async_rc; |
75 | unsigned int ri_max_frmr_depth; | 75 | unsigned int ri_max_frmr_depth; |
76 | unsigned int ri_max_inline_write; | ||
77 | unsigned int ri_max_inline_read; | ||
76 | struct ib_qp_attr ri_qp_attr; | 78 | struct ib_qp_attr ri_qp_attr; |
77 | struct ib_qp_init_attr ri_qp_init_attr; | 79 | struct ib_qp_init_attr ri_qp_init_attr; |
78 | }; | 80 | }; |
@@ -538,6 +540,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | |||
538 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | 540 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |
539 | */ | 541 | */ |
540 | int rpcrdma_marshal_req(struct rpc_rqst *); | 542 | int rpcrdma_marshal_req(struct rpc_rqst *); |
543 | void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *, | ||
544 | struct rpcrdma_create_data_internal *, | ||
545 | unsigned int); | ||
541 | 546 | ||
542 | /* RPC/RDMA module init - xprtrdma/transport.c | 547 | /* RPC/RDMA module init - xprtrdma/transport.c |
543 | */ | 548 | */ |