aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <cel@netapp.com>2005-08-25 19:25:49 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2005-09-23 12:38:33 -0400
commit808012fbb23a52ec59352445d2076d175ad4ab26 (patch)
tree7175edb3917bc35f7f5484f567e91d7303a17663
parent262965f53defd312a294b45366ea17907b6a616b (diff)
[PATCH] RPC: skip over transport-specific heads automatically
Add a generic mechanism for skipping over transport-specific headers when constructing an RPC request. This removes another "xprt->stream" dependency. Test-plan: Write-intensive workload on a single mount point (try both UDP and TCP). Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--include/linux/sunrpc/msg_prot.h25
-rw-r--r--include/linux/sunrpc/xprt.h7
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c6
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/xprtsock.c24
5 files changed, 53 insertions, 14 deletions
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 15f115332389..f43f237360ae 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -76,5 +76,30 @@ enum rpc_auth_stat {
76 76
77#define RPC_MAXNETNAMELEN 256 77#define RPC_MAXNETNAMELEN 256
78 78
79/*
80 * From RFC 1831:
81 *
82 * "A record is composed of one or more record fragments. A record
83 * fragment is a four-byte header followed by 0 to (2**31) - 1 bytes of
84 * fragment data. The bytes encode an unsigned binary number; as with
85 * XDR integers, the byte order is from highest to lowest. The number
86 * encodes two values -- a boolean which indicates whether the fragment
87 * is the last fragment of the record (bit value 1 implies the fragment
88 * is the last fragment) and a 31-bit unsigned binary value which is the
89 * length in bytes of the fragment's data. The boolean value is the
90 * highest-order bit of the header; the length is the 31 low-order bits.
91 * (Note that this record specification is NOT in XDR standard form!)"
92 *
93 * The Linux RPC client always sends its requests in a single record
94 * fragment, limiting the maximum payload size for stream transports to
95 * 2GB.
96 */
97
98typedef u32 rpc_fraghdr;
99
100#define RPC_LAST_STREAM_FRAGMENT (1U << 31)
101#define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT)
102#define RPC_MAX_FRAGMENT_SIZE ((1U << 31) - 1)
103
79#endif /* __KERNEL__ */ 104#endif /* __KERNEL__ */
80#endif /* _LINUX_SUNRPC_MSGPROT_H_ */ 105#endif /* _LINUX_SUNRPC_MSGPROT_H_ */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index e73174c7e450..966c456a0f6d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -155,6 +155,8 @@ struct rpc_xprt {
155 155
156 size_t max_payload; /* largest RPC payload size, 156 size_t max_payload; /* largest RPC payload size,
157 in bytes */ 157 in bytes */
158 unsigned int tsh_size; /* size of transport specific
159 header */
158 160
159 struct rpc_wait_queue sending; /* requests waiting to send */ 161 struct rpc_wait_queue sending; /* requests waiting to send */
160 struct rpc_wait_queue resend; /* requests waiting to resend */ 162 struct rpc_wait_queue resend; /* requests waiting to resend */
@@ -236,6 +238,11 @@ int xprt_adjust_timeout(struct rpc_rqst *req);
236void xprt_release(struct rpc_task *task); 238void xprt_release(struct rpc_task *task);
237int xprt_destroy(struct rpc_xprt *xprt); 239int xprt_destroy(struct rpc_xprt *xprt);
238 240
241static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p)
242{
243 return p + xprt->tsh_size;
244}
245
239/* 246/*
240 * Transport switch helper functions 247 * Transport switch helper functions
241 */ 248 */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 53a030acdf75..d2b08f16c257 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -844,10 +844,8 @@ gss_marshal(struct rpc_task *task, u32 *p)
844 844
845 /* We compute the checksum for the verifier over the xdr-encoded bytes 845 /* We compute the checksum for the verifier over the xdr-encoded bytes
846 * starting with the xid and ending at the end of the credential: */ 846 * starting with the xid and ending at the end of the credential: */
847 iov.iov_base = req->rq_snd_buf.head[0].iov_base; 847 iov.iov_base = xprt_skip_transport_header(task->tk_xprt,
848 if (task->tk_client->cl_xprt->stream) 848 req->rq_snd_buf.head[0].iov_base);
849 /* See clnt.c:call_header() */
850 iov.iov_base += 4;
851 iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; 849 iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
852 xdr_buf_from_iov(&iov, &verf_buf); 850 xdr_buf_from_iov(&iov, &verf_buf);
853 851
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4677959d2834..cc1b773a79d3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1075,13 +1075,12 @@ static u32 *
1075call_header(struct rpc_task *task) 1075call_header(struct rpc_task *task)
1076{ 1076{
1077 struct rpc_clnt *clnt = task->tk_client; 1077 struct rpc_clnt *clnt = task->tk_client;
1078 struct rpc_xprt *xprt = clnt->cl_xprt;
1079 struct rpc_rqst *req = task->tk_rqstp; 1078 struct rpc_rqst *req = task->tk_rqstp;
1080 u32 *p = req->rq_svec[0].iov_base; 1079 u32 *p = req->rq_svec[0].iov_base;
1081 1080
1082 /* FIXME: check buffer size? */ 1081 /* FIXME: check buffer size? */
1083 if (xprt->stream) 1082
1084 *p++ = 0; /* fill in later */ 1083 p = xprt_skip_transport_header(task->tk_xprt, p);
1085 *p++ = req->rq_xid; /* XID */ 1084 *p++ = req->rq_xid; /* XID */
1086 *p++ = htonl(RPC_CALL); /* CALL */ 1085 *p++ = htonl(RPC_CALL); /* CALL */
1087 *p++ = htonl(RPC_VERSION); /* RPC version */ 1086 *p++ = htonl(RPC_VERSION); /* RPC version */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 57988300640a..aaf053b1a0c4 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -282,6 +282,13 @@ static int xs_udp_send_request(struct rpc_task *task)
282 return status; 282 return status;
283} 283}
284 284
285static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
286{
287 u32 reclen = buf->len - sizeof(rpc_fraghdr);
288 rpc_fraghdr *base = buf->head[0].iov_base;
289 *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen);
290}
291
285/** 292/**
286 * xs_tcp_send_request - write an RPC request to a TCP socket 293 * xs_tcp_send_request - write an RPC request to a TCP socket
287 * @task: address of RPC task that manages the state of an RPC request 294 * @task: address of RPC task that manages the state of an RPC request
@@ -301,11 +308,9 @@ static int xs_tcp_send_request(struct rpc_task *task)
301 struct rpc_rqst *req = task->tk_rqstp; 308 struct rpc_rqst *req = task->tk_rqstp;
302 struct rpc_xprt *xprt = req->rq_xprt; 309 struct rpc_xprt *xprt = req->rq_xprt;
303 struct xdr_buf *xdr = &req->rq_snd_buf; 310 struct xdr_buf *xdr = &req->rq_snd_buf;
304 u32 *marker = req->rq_svec[0].iov_base;
305 int status, retry = 0; 311 int status, retry = 0;
306 312
307 /* Write the record marker */ 313 xs_encode_tcp_record_marker(&req->rq_snd_buf);
308 *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker)));
309 314
310 xs_pktdump("packet data:", 315 xs_pktdump("packet data:",
311 req->rq_svec->iov_base, 316 req->rq_svec->iov_base,
@@ -503,16 +508,19 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc
503 xprt->tcp_offset += used; 508 xprt->tcp_offset += used;
504 if (used != len) 509 if (used != len)
505 return; 510 return;
511
506 xprt->tcp_reclen = ntohl(xprt->tcp_recm); 512 xprt->tcp_reclen = ntohl(xprt->tcp_recm);
507 if (xprt->tcp_reclen & 0x80000000) 513 if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
508 xprt->tcp_flags |= XPRT_LAST_FRAG; 514 xprt->tcp_flags |= XPRT_LAST_FRAG;
509 else 515 else
510 xprt->tcp_flags &= ~XPRT_LAST_FRAG; 516 xprt->tcp_flags &= ~XPRT_LAST_FRAG;
511 xprt->tcp_reclen &= 0x7fffffff; 517 xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
518
512 xprt->tcp_flags &= ~XPRT_COPY_RECM; 519 xprt->tcp_flags &= ~XPRT_COPY_RECM;
513 xprt->tcp_offset = 0; 520 xprt->tcp_offset = 0;
521
514 /* Sanity check of the record length */ 522 /* Sanity check of the record length */
515 if (xprt->tcp_reclen < 4) { 523 if (unlikely(xprt->tcp_reclen < 4)) {
516 dprintk("RPC: invalid TCP record fragment length\n"); 524 dprintk("RPC: invalid TCP record fragment length\n");
517 xprt_disconnect(xprt); 525 xprt_disconnect(xprt);
518 return; 526 return;
@@ -1065,6 +1073,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1065 1073
1066 xprt->prot = IPPROTO_UDP; 1074 xprt->prot = IPPROTO_UDP;
1067 xprt->port = XS_MAX_RESVPORT; 1075 xprt->port = XS_MAX_RESVPORT;
1076 xprt->tsh_size = 0;
1068 xprt->stream = 0; 1077 xprt->stream = 0;
1069 xprt->nocong = 0; 1078 xprt->nocong = 0;
1070 xprt->cwnd = RPC_INITCWND; 1079 xprt->cwnd = RPC_INITCWND;
@@ -1105,11 +1114,12 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1105 1114
1106 xprt->prot = IPPROTO_TCP; 1115 xprt->prot = IPPROTO_TCP;
1107 xprt->port = XS_MAX_RESVPORT; 1116 xprt->port = XS_MAX_RESVPORT;
1117 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
1108 xprt->stream = 1; 1118 xprt->stream = 1;
1109 xprt->nocong = 1; 1119 xprt->nocong = 1;
1110 xprt->cwnd = RPC_MAXCWND(xprt); 1120 xprt->cwnd = RPC_MAXCWND(xprt);
1111 xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; 1121 xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
1112 xprt->max_payload = (1U << 31) - 1; 1122 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
1113 1123
1114 INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); 1124 INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
1115 1125