aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma
diff options
context:
space:
mode:
authorTom Talpey <talpey@netapp.com>2008-10-09 15:01:11 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2008-10-10 15:12:33 -0400
commit9191ca3b381b15b9a88785a8ae2fa4db8e553b0c (patch)
tree7357b4009fefff106adab972f0572b3d4e105fa0 /net/sunrpc/xprtrdma
parentfee08caf943e8ed3446ce42fa085b5e7e5f08d92 (diff)
RPC/RDMA: adhere to protocol for unpadded client trailing write chunks.
The RPC/RDMA protocol allows clients and servers to avoid RDMA operations for data which is purely the result of XDR padding. On the client, automatically insert the necessary padding for such server replies, and optionally don't marshal such chunks. Signed-off-by: Tom Talpey <talpey@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c21
-rw-r--r--net/sunrpc/xprtrdma/transport.c9
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h5
3 files changed, 33 insertions, 2 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 721dae795d68..d245c0bf7873 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
118 } 118 }
119 119
120 if (xdrbuf->tail[0].iov_len) { 120 if (xdrbuf->tail[0].iov_len) {
121 /* the rpcrdma protocol allows us to omit any trailing
122 * xdr pad bytes, saving the server an RDMA operation. */
123 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
124 return n;
121 if (n == nsegs) 125 if (n == nsegs)
122 return 0; 126 return 0;
123 seg[n].mr_page = NULL; 127 seg[n].mr_page = NULL;
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
594 * Scatter inline received data back into provided iov's. 598 * Scatter inline received data back into provided iov's.
595 */ 599 */
596static void 600static void
597rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) 601rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
598{ 602{
599 int i, npages, curlen, olen; 603 int i, npages, curlen, olen;
600 char *destp; 604 char *destp;
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
660 } else 664 } else
661 rqst->rq_rcv_buf.tail[0].iov_len = 0; 665 rqst->rq_rcv_buf.tail[0].iov_len = 0;
662 666
667 if (pad) {
668 /* implicit padding on terminal chunk */
669 unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
670 while (pad--)
671 p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
672 }
673
663 if (copy_len) 674 if (copy_len)
664 dprintk("RPC: %s: %d bytes in" 675 dprintk("RPC: %s: %d bytes in"
665 " %d extra segments (%d lost)\n", 676 " %d extra segments (%d lost)\n",
@@ -794,14 +805,20 @@ repost:
794 ((unsigned char *)iptr - (unsigned char *)headerp); 805 ((unsigned char *)iptr - (unsigned char *)headerp);
795 status = rep->rr_len + rdmalen; 806 status = rep->rr_len + rdmalen;
796 r_xprt->rx_stats.total_rdma_reply += rdmalen; 807 r_xprt->rx_stats.total_rdma_reply += rdmalen;
808 /* special case - last chunk may omit padding */
809 if (rdmalen &= 3) {
810 rdmalen = 4 - rdmalen;
811 status += rdmalen;
812 }
797 } else { 813 } else {
798 /* else ordinary inline */ 814 /* else ordinary inline */
815 rdmalen = 0;
799 iptr = (__be32 *)((unsigned char *)headerp + 28); 816 iptr = (__be32 *)((unsigned char *)headerp + 28);
800 rep->rr_len -= 28; /*sizeof *headerp;*/ 817 rep->rr_len -= 28; /*sizeof *headerp;*/
801 status = rep->rr_len; 818 status = rep->rr_len;
802 } 819 }
803 /* Fix up the rpc results for upper layer */ 820 /* Fix up the rpc results for upper layer */
804 rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); 821 rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
805 break; 822 break;
806 823
807 case __constant_htonl(RDMA_NOMSG): 824 case __constant_htonl(RDMA_NOMSG):
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ec6d1e7a1941..c7d2380bb5e3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -71,6 +71,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
71static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 71static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
72static unsigned int xprt_rdma_inline_write_padding; 72static unsigned int xprt_rdma_inline_write_padding;
73static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; 73static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
74 int xprt_rdma_pad_optimize = 0;
74 75
75#ifdef RPC_DEBUG 76#ifdef RPC_DEBUG
76 77
@@ -136,6 +137,14 @@ static ctl_table xr_tunables_table[] = {
136 .extra2 = &max_memreg, 137 .extra2 = &max_memreg,
137 }, 138 },
138 { 139 {
140 .ctl_name = CTL_UNNUMBERED,
141 .procname = "rdma_pad_optimize",
142 .data = &xprt_rdma_pad_optimize,
143 .maxlen = sizeof(unsigned int),
144 .mode = 0644,
145 .proc_handler = &proc_dointvec,
146 },
147 {
139 .ctl_name = 0, 148 .ctl_name = 0,
140 }, 149 },
141}; 150};
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2db2344d487e..fde6499a53b2 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -280,6 +280,11 @@ struct rpcrdma_xprt {
280#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) 280#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
281#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) 281#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
282 282
283/* Setting this to 0 ensures interoperability with early servers.
284 * Setting this to 1 enhances certain unaligned read/write performance.
285 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
286extern int xprt_rdma_pad_optimize;
287
283/* 288/*
284 * Interface Adapter calls - xprtrdma/verbs.c 289 * Interface Adapter calls - xprtrdma/verbs.c
285 */ 290 */