aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/rpc_rdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c108
1 files changed, 78 insertions, 30 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 0f28f2d743ed..888823bb6dae 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -132,6 +132,33 @@ rpcrdma_tail_pullup(struct xdr_buf *buf)
132 return tlen; 132 return tlen;
133} 133}
134 134
135/* Split "vec" on page boundaries into segments. FMR registers pages,
136 * not a byte range. Other modes coalesce these segments into a single
137 * MR when they can.
138 */
139static int
140rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
141 int n, int nsegs)
142{
143 size_t page_offset;
144 u32 remaining;
145 char *base;
146
147 base = vec->iov_base;
148 page_offset = offset_in_page(base);
149 remaining = vec->iov_len;
150 while (remaining && n < nsegs) {
151 seg[n].mr_page = NULL;
152 seg[n].mr_offset = base;
153 seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
154 remaining -= seg[n].mr_len;
155 base += seg[n].mr_len;
156 ++n;
157 page_offset = 0;
158 }
159 return n;
160}
161
135/* 162/*
136 * Chunk assembly from upper layer xdr_buf. 163 * Chunk assembly from upper layer xdr_buf.
137 * 164 *
@@ -150,11 +177,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
150 int page_base; 177 int page_base;
151 struct page **ppages; 178 struct page **ppages;
152 179
153 if (pos == 0 && xdrbuf->head[0].iov_len) { 180 if (pos == 0) {
154 seg[n].mr_page = NULL; 181 n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n, nsegs);
155 seg[n].mr_offset = xdrbuf->head[0].iov_base; 182 if (n == nsegs)
156 seg[n].mr_len = xdrbuf->head[0].iov_len; 183 return -EIO;
157 ++n;
158 } 184 }
159 185
160 len = xdrbuf->page_len; 186 len = xdrbuf->page_len;
@@ -192,13 +218,9 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
192 * xdr pad bytes, saving the server an RDMA operation. */ 218 * xdr pad bytes, saving the server an RDMA operation. */
193 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) 219 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
194 return n; 220 return n;
221 n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n, nsegs);
195 if (n == nsegs) 222 if (n == nsegs)
196 /* Tail remains, but we're out of segments */
197 return -EIO; 223 return -EIO;
198 seg[n].mr_page = NULL;
199 seg[n].mr_offset = xdrbuf->tail[0].iov_base;
200 seg[n].mr_len = xdrbuf->tail[0].iov_len;
201 ++n;
202 } 224 }
203 225
204 return n; 226 return n;
@@ -773,20 +795,17 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
773 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; 795 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
774 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 796 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
775 __be32 *iptr; 797 __be32 *iptr;
776 int rdmalen, status; 798 int rdmalen, status, rmerr;
777 unsigned long cwnd; 799 unsigned long cwnd;
778 u32 credits;
779 800
780 dprintk("RPC: %s: incoming rep %p\n", __func__, rep); 801 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
781 802
782 if (rep->rr_len == RPCRDMA_BAD_LEN) 803 if (rep->rr_len == RPCRDMA_BAD_LEN)
783 goto out_badstatus; 804 goto out_badstatus;
784 if (rep->rr_len < RPCRDMA_HDRLEN_MIN) 805 if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
785 goto out_shortreply; 806 goto out_shortreply;
786 807
787 headerp = rdmab_to_msg(rep->rr_rdmabuf); 808 headerp = rdmab_to_msg(rep->rr_rdmabuf);
788 if (headerp->rm_vers != rpcrdma_version)
789 goto out_badversion;
790#if defined(CONFIG_SUNRPC_BACKCHANNEL) 809#if defined(CONFIG_SUNRPC_BACKCHANNEL)
791 if (rpcrdma_is_bcall(headerp)) 810 if (rpcrdma_is_bcall(headerp))
792 goto out_bcall; 811 goto out_bcall;
@@ -809,15 +828,16 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
809 */ 828 */
810 list_del_init(&rqst->rq_list); 829 list_del_init(&rqst->rq_list);
811 spin_unlock_bh(&xprt->transport_lock); 830 spin_unlock_bh(&xprt->transport_lock);
812 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" 831 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
813 " RPC request 0x%p xid 0x%08x\n", 832 __func__, rep, req, be32_to_cpu(headerp->rm_xid));
814 __func__, rep, req, rqst,
815 be32_to_cpu(headerp->rm_xid));
816 833
817 /* from here on, the reply is no longer an orphan */ 834 /* from here on, the reply is no longer an orphan */
818 req->rl_reply = rep; 835 req->rl_reply = rep;
819 xprt->reestablish_timeout = 0; 836 xprt->reestablish_timeout = 0;
820 837
838 if (headerp->rm_vers != rpcrdma_version)
839 goto out_badversion;
840
821 /* check for expected message types */ 841 /* check for expected message types */
822 /* The order of some of these tests is important. */ 842 /* The order of some of these tests is important. */
823 switch (headerp->rm_type) { 843 switch (headerp->rm_type) {
@@ -878,6 +898,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
878 status = rdmalen; 898 status = rdmalen;
879 break; 899 break;
880 900
901 case rdma_error:
902 goto out_rdmaerr;
903
881badheader: 904badheader:
882 default: 905 default:
883 dprintk("%s: invalid rpcrdma reply header (type %d):" 906 dprintk("%s: invalid rpcrdma reply header (type %d):"
@@ -893,6 +916,7 @@ badheader:
893 break; 916 break;
894 } 917 }
895 918
919out:
896 /* Invalidate and flush the data payloads before waking the 920 /* Invalidate and flush the data payloads before waking the
897 * waiting application. This guarantees the memory region is 921 * waiting application. This guarantees the memory region is
898 * properly fenced from the server before the application 922 * properly fenced from the server before the application
@@ -903,15 +927,9 @@ badheader:
903 if (req->rl_nchunks) 927 if (req->rl_nchunks)
904 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); 928 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
905 929
906 credits = be32_to_cpu(headerp->rm_credit);
907 if (credits == 0)
908 credits = 1; /* don't deadlock */
909 else if (credits > r_xprt->rx_buf.rb_max_requests)
910 credits = r_xprt->rx_buf.rb_max_requests;
911
912 spin_lock_bh(&xprt->transport_lock); 930 spin_lock_bh(&xprt->transport_lock);
913 cwnd = xprt->cwnd; 931 cwnd = xprt->cwnd;
914 xprt->cwnd = credits << RPC_CWNDSHIFT; 932 xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
915 if (xprt->cwnd > cwnd) 933 if (xprt->cwnd > cwnd)
916 xprt_release_rqst_cong(rqst->rq_task); 934 xprt_release_rqst_cong(rqst->rq_task);
917 935
@@ -935,13 +953,43 @@ out_bcall:
935 return; 953 return;
936#endif 954#endif
937 955
938out_shortreply: 956/* If the incoming reply terminated a pending RPC, the next
939 dprintk("RPC: %s: short/invalid reply\n", __func__); 957 * RPC call will post a replacement receive buffer as it is
940 goto repost; 958 * being marshaled.
941 959 */
942out_badversion: 960out_badversion:
943 dprintk("RPC: %s: invalid version %d\n", 961 dprintk("RPC: %s: invalid version %d\n",
944 __func__, be32_to_cpu(headerp->rm_vers)); 962 __func__, be32_to_cpu(headerp->rm_vers));
963 status = -EIO;
964 r_xprt->rx_stats.bad_reply_count++;
965 goto out;
966
967out_rdmaerr:
968 rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err);
969 switch (rmerr) {
970 case ERR_VERS:
971 pr_err("%s: server reports header version error (%u-%u)\n",
972 __func__,
973 be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low),
974 be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high));
975 break;
976 case ERR_CHUNK:
977 pr_err("%s: server reports header decoding error\n",
978 __func__);
979 break;
980 default:
981 pr_err("%s: server reports unknown error %d\n",
982 __func__, rmerr);
983 }
984 status = -EREMOTEIO;
985 r_xprt->rx_stats.bad_reply_count++;
986 goto out;
987
988/* If no pending RPC transaction was matched, post a replacement
989 * receive buffer before returning.
990 */
991out_shortreply:
992 dprintk("RPC: %s: short/invalid reply\n", __func__);
945 goto repost; 993 goto repost;
946 994
947out_nomatch: 995out_nomatch: