diff options
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 108 |
1 files changed, 78 insertions, 30 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 0f28f2d743ed..888823bb6dae 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -132,6 +132,33 @@ rpcrdma_tail_pullup(struct xdr_buf *buf) | |||
132 | return tlen; | 132 | return tlen; |
133 | } | 133 | } |
134 | 134 | ||
135 | /* Split "vec" on page boundaries into segments. FMR registers pages, | ||
136 | * not a byte range. Other modes coalesce these segments into a single | ||
137 | * MR when they can. | ||
138 | */ | ||
139 | static int | ||
140 | rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, | ||
141 | int n, int nsegs) | ||
142 | { | ||
143 | size_t page_offset; | ||
144 | u32 remaining; | ||
145 | char *base; | ||
146 | |||
147 | base = vec->iov_base; | ||
148 | page_offset = offset_in_page(base); | ||
149 | remaining = vec->iov_len; | ||
150 | while (remaining && n < nsegs) { | ||
151 | seg[n].mr_page = NULL; | ||
152 | seg[n].mr_offset = base; | ||
153 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining); | ||
154 | remaining -= seg[n].mr_len; | ||
155 | base += seg[n].mr_len; | ||
156 | ++n; | ||
157 | page_offset = 0; | ||
158 | } | ||
159 | return n; | ||
160 | } | ||
161 | |||
135 | /* | 162 | /* |
136 | * Chunk assembly from upper layer xdr_buf. | 163 | * Chunk assembly from upper layer xdr_buf. |
137 | * | 164 | * |
@@ -150,11 +177,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
150 | int page_base; | 177 | int page_base; |
151 | struct page **ppages; | 178 | struct page **ppages; |
152 | 179 | ||
153 | if (pos == 0 && xdrbuf->head[0].iov_len) { | 180 | if (pos == 0) { |
154 | seg[n].mr_page = NULL; | 181 | n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n, nsegs); |
155 | seg[n].mr_offset = xdrbuf->head[0].iov_base; | 182 | if (n == nsegs) |
156 | seg[n].mr_len = xdrbuf->head[0].iov_len; | 183 | return -EIO; |
157 | ++n; | ||
158 | } | 184 | } |
159 | 185 | ||
160 | len = xdrbuf->page_len; | 186 | len = xdrbuf->page_len; |
@@ -192,13 +218,9 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
192 | * xdr pad bytes, saving the server an RDMA operation. */ | 218 | * xdr pad bytes, saving the server an RDMA operation. */ |
193 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) | 219 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) |
194 | return n; | 220 | return n; |
221 | n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n, nsegs); | ||
195 | if (n == nsegs) | 222 | if (n == nsegs) |
196 | /* Tail remains, but we're out of segments */ | ||
197 | return -EIO; | 223 | return -EIO; |
198 | seg[n].mr_page = NULL; | ||
199 | seg[n].mr_offset = xdrbuf->tail[0].iov_base; | ||
200 | seg[n].mr_len = xdrbuf->tail[0].iov_len; | ||
201 | ++n; | ||
202 | } | 224 | } |
203 | 225 | ||
204 | return n; | 226 | return n; |
@@ -773,20 +795,17 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
773 | struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; | 795 | struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; |
774 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; | 796 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
775 | __be32 *iptr; | 797 | __be32 *iptr; |
776 | int rdmalen, status; | 798 | int rdmalen, status, rmerr; |
777 | unsigned long cwnd; | 799 | unsigned long cwnd; |
778 | u32 credits; | ||
779 | 800 | ||
780 | dprintk("RPC: %s: incoming rep %p\n", __func__, rep); | 801 | dprintk("RPC: %s: incoming rep %p\n", __func__, rep); |
781 | 802 | ||
782 | if (rep->rr_len == RPCRDMA_BAD_LEN) | 803 | if (rep->rr_len == RPCRDMA_BAD_LEN) |
783 | goto out_badstatus; | 804 | goto out_badstatus; |
784 | if (rep->rr_len < RPCRDMA_HDRLEN_MIN) | 805 | if (rep->rr_len < RPCRDMA_HDRLEN_ERR) |
785 | goto out_shortreply; | 806 | goto out_shortreply; |
786 | 807 | ||
787 | headerp = rdmab_to_msg(rep->rr_rdmabuf); | 808 | headerp = rdmab_to_msg(rep->rr_rdmabuf); |
788 | if (headerp->rm_vers != rpcrdma_version) | ||
789 | goto out_badversion; | ||
790 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | 809 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
791 | if (rpcrdma_is_bcall(headerp)) | 810 | if (rpcrdma_is_bcall(headerp)) |
792 | goto out_bcall; | 811 | goto out_bcall; |
@@ -809,15 +828,16 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
809 | */ | 828 | */ |
810 | list_del_init(&rqst->rq_list); | 829 | list_del_init(&rqst->rq_list); |
811 | spin_unlock_bh(&xprt->transport_lock); | 830 | spin_unlock_bh(&xprt->transport_lock); |
812 | dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" | 831 | dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", |
813 | " RPC request 0x%p xid 0x%08x\n", | 832 | __func__, rep, req, be32_to_cpu(headerp->rm_xid)); |
814 | __func__, rep, req, rqst, | ||
815 | be32_to_cpu(headerp->rm_xid)); | ||
816 | 833 | ||
817 | /* from here on, the reply is no longer an orphan */ | 834 | /* from here on, the reply is no longer an orphan */ |
818 | req->rl_reply = rep; | 835 | req->rl_reply = rep; |
819 | xprt->reestablish_timeout = 0; | 836 | xprt->reestablish_timeout = 0; |
820 | 837 | ||
838 | if (headerp->rm_vers != rpcrdma_version) | ||
839 | goto out_badversion; | ||
840 | |||
821 | /* check for expected message types */ | 841 | /* check for expected message types */ |
822 | /* The order of some of these tests is important. */ | 842 | /* The order of some of these tests is important. */ |
823 | switch (headerp->rm_type) { | 843 | switch (headerp->rm_type) { |
@@ -878,6 +898,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
878 | status = rdmalen; | 898 | status = rdmalen; |
879 | break; | 899 | break; |
880 | 900 | ||
901 | case rdma_error: | ||
902 | goto out_rdmaerr; | ||
903 | |||
881 | badheader: | 904 | badheader: |
882 | default: | 905 | default: |
883 | dprintk("%s: invalid rpcrdma reply header (type %d):" | 906 | dprintk("%s: invalid rpcrdma reply header (type %d):" |
@@ -893,6 +916,7 @@ badheader: | |||
893 | break; | 916 | break; |
894 | } | 917 | } |
895 | 918 | ||
919 | out: | ||
896 | /* Invalidate and flush the data payloads before waking the | 920 | /* Invalidate and flush the data payloads before waking the |
897 | * waiting application. This guarantees the memory region is | 921 | * waiting application. This guarantees the memory region is |
898 | * properly fenced from the server before the application | 922 | * properly fenced from the server before the application |
@@ -903,15 +927,9 @@ badheader: | |||
903 | if (req->rl_nchunks) | 927 | if (req->rl_nchunks) |
904 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); | 928 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); |
905 | 929 | ||
906 | credits = be32_to_cpu(headerp->rm_credit); | ||
907 | if (credits == 0) | ||
908 | credits = 1; /* don't deadlock */ | ||
909 | else if (credits > r_xprt->rx_buf.rb_max_requests) | ||
910 | credits = r_xprt->rx_buf.rb_max_requests; | ||
911 | |||
912 | spin_lock_bh(&xprt->transport_lock); | 930 | spin_lock_bh(&xprt->transport_lock); |
913 | cwnd = xprt->cwnd; | 931 | cwnd = xprt->cwnd; |
914 | xprt->cwnd = credits << RPC_CWNDSHIFT; | 932 | xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; |
915 | if (xprt->cwnd > cwnd) | 933 | if (xprt->cwnd > cwnd) |
916 | xprt_release_rqst_cong(rqst->rq_task); | 934 | xprt_release_rqst_cong(rqst->rq_task); |
917 | 935 | ||
@@ -935,13 +953,43 @@ out_bcall: | |||
935 | return; | 953 | return; |
936 | #endif | 954 | #endif |
937 | 955 | ||
938 | out_shortreply: | 956 | /* If the incoming reply terminated a pending RPC, the next |
939 | dprintk("RPC: %s: short/invalid reply\n", __func__); | 957 | * RPC call will post a replacement receive buffer as it is |
940 | goto repost; | 958 | * being marshaled. |
941 | 959 | */ | |
942 | out_badversion: | 960 | out_badversion: |
943 | dprintk("RPC: %s: invalid version %d\n", | 961 | dprintk("RPC: %s: invalid version %d\n", |
944 | __func__, be32_to_cpu(headerp->rm_vers)); | 962 | __func__, be32_to_cpu(headerp->rm_vers)); |
963 | status = -EIO; | ||
964 | r_xprt->rx_stats.bad_reply_count++; | ||
965 | goto out; | ||
966 | |||
967 | out_rdmaerr: | ||
968 | rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err); | ||
969 | switch (rmerr) { | ||
970 | case ERR_VERS: | ||
971 | pr_err("%s: server reports header version error (%u-%u)\n", | ||
972 | __func__, | ||
973 | be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low), | ||
974 | be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high)); | ||
975 | break; | ||
976 | case ERR_CHUNK: | ||
977 | pr_err("%s: server reports header decoding error\n", | ||
978 | __func__); | ||
979 | break; | ||
980 | default: | ||
981 | pr_err("%s: server reports unknown error %d\n", | ||
982 | __func__, rmerr); | ||
983 | } | ||
984 | status = -EREMOTEIO; | ||
985 | r_xprt->rx_stats.bad_reply_count++; | ||
986 | goto out; | ||
987 | |||
988 | /* If no pending RPC transaction was matched, post a replacement | ||
989 | * receive buffer before returning. | ||
990 | */ | ||
991 | out_shortreply: | ||
992 | dprintk("RPC: %s: short/invalid reply\n", __func__); | ||
945 | goto repost; | 993 | goto repost; |
946 | 994 | ||
947 | out_nomatch: | 995 | out_nomatch: |