diff options
author | Chuck Lever <cel@netapp.com> | 2005-08-25 19:25:49 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2005-09-23 12:38:33 -0400 |
commit | 808012fbb23a52ec59352445d2076d175ad4ab26 (patch) | |
tree | 7175edb3917bc35f7f5484f567e91d7303a17663 | |
parent | 262965f53defd312a294b45366ea17907b6a616b (diff) |
[PATCH] RPC: skip over transport-specific heads automatically
Add a generic mechanism for skipping over transport-specific headers
when constructing an RPC request. This removes another "xprt->stream"
dependency.
Test-plan:
Write-intensive workload on a single mount point (try both UDP and
TCP).
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | include/linux/sunrpc/msg_prot.h | 25 | ||||
-rw-r--r-- | include/linux/sunrpc/xprt.h | 7 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/auth_gss.c | 6 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 5 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 24 |
5 files changed, 53 insertions, 14 deletions
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 15f115332389..f43f237360ae 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h | |||
@@ -76,5 +76,30 @@ enum rpc_auth_stat { | |||
76 | 76 | ||
77 | #define RPC_MAXNETNAMELEN 256 | 77 | #define RPC_MAXNETNAMELEN 256 |
78 | 78 | ||
79 | /* | ||
80 | * From RFC 1831: | ||
81 | * | ||
82 | * "A record is composed of one or more record fragments. A record | ||
83 | * fragment is a four-byte header followed by 0 to (2**31) - 1 bytes of | ||
84 | * fragment data. The bytes encode an unsigned binary number; as with | ||
85 | * XDR integers, the byte order is from highest to lowest. The number | ||
86 | * encodes two values -- a boolean which indicates whether the fragment | ||
87 | * is the last fragment of the record (bit value 1 implies the fragment | ||
88 | * is the last fragment) and a 31-bit unsigned binary value which is the | ||
89 | * length in bytes of the fragment's data. The boolean value is the | ||
90 | * highest-order bit of the header; the length is the 31 low-order bits. | ||
91 | * (Note that this record specification is NOT in XDR standard form!)" | ||
92 | * | ||
93 | * The Linux RPC client always sends its requests in a single record | ||
94 | * fragment, limiting the maximum payload size for stream transports to | ||
95 | * 2GB. | ||
96 | */ | ||
97 | |||
98 | typedef u32 rpc_fraghdr; | ||
99 | |||
100 | #define RPC_LAST_STREAM_FRAGMENT (1U << 31) | ||
101 | #define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT) | ||
102 | #define RPC_MAX_FRAGMENT_SIZE ((1U << 31) - 1) | ||
103 | |||
79 | #endif /* __KERNEL__ */ | 104 | #endif /* __KERNEL__ */ |
80 | #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ | 105 | #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e73174c7e450..966c456a0f6d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -155,6 +155,8 @@ struct rpc_xprt { | |||
155 | 155 | ||
156 | size_t max_payload; /* largest RPC payload size, | 156 | size_t max_payload; /* largest RPC payload size, |
157 | in bytes */ | 157 | in bytes */ |
158 | unsigned int tsh_size; /* size of transport specific | ||
159 | header */ | ||
158 | 160 | ||
159 | struct rpc_wait_queue sending; /* requests waiting to send */ | 161 | struct rpc_wait_queue sending; /* requests waiting to send */ |
160 | struct rpc_wait_queue resend; /* requests waiting to resend */ | 162 | struct rpc_wait_queue resend; /* requests waiting to resend */ |
@@ -236,6 +238,11 @@ int xprt_adjust_timeout(struct rpc_rqst *req); | |||
236 | void xprt_release(struct rpc_task *task); | 238 | void xprt_release(struct rpc_task *task); |
237 | int xprt_destroy(struct rpc_xprt *xprt); | 239 | int xprt_destroy(struct rpc_xprt *xprt); |
238 | 240 | ||
241 | static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) | ||
242 | { | ||
243 | return p + xprt->tsh_size; | ||
244 | } | ||
245 | |||
239 | /* | 246 | /* |
240 | * Transport switch helper functions | 247 | * Transport switch helper functions |
241 | */ | 248 | */ |
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 53a030acdf75..d2b08f16c257 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
@@ -844,10 +844,8 @@ gss_marshal(struct rpc_task *task, u32 *p) | |||
844 | 844 | ||
845 | /* We compute the checksum for the verifier over the xdr-encoded bytes | 845 | /* We compute the checksum for the verifier over the xdr-encoded bytes |
846 | * starting with the xid and ending at the end of the credential: */ | 846 | * starting with the xid and ending at the end of the credential: */ |
847 | iov.iov_base = req->rq_snd_buf.head[0].iov_base; | 847 | iov.iov_base = xprt_skip_transport_header(task->tk_xprt, |
848 | if (task->tk_client->cl_xprt->stream) | 848 | req->rq_snd_buf.head[0].iov_base); |
849 | /* See clnt.c:call_header() */ | ||
850 | iov.iov_base += 4; | ||
851 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; | 849 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; |
852 | xdr_buf_from_iov(&iov, &verf_buf); | 850 | xdr_buf_from_iov(&iov, &verf_buf); |
853 | 851 | ||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4677959d2834..cc1b773a79d3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1075,13 +1075,12 @@ static u32 * | |||
1075 | call_header(struct rpc_task *task) | 1075 | call_header(struct rpc_task *task) |
1076 | { | 1076 | { |
1077 | struct rpc_clnt *clnt = task->tk_client; | 1077 | struct rpc_clnt *clnt = task->tk_client; |
1078 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
1079 | struct rpc_rqst *req = task->tk_rqstp; | 1078 | struct rpc_rqst *req = task->tk_rqstp; |
1080 | u32 *p = req->rq_svec[0].iov_base; | 1079 | u32 *p = req->rq_svec[0].iov_base; |
1081 | 1080 | ||
1082 | /* FIXME: check buffer size? */ | 1081 | /* FIXME: check buffer size? */ |
1083 | if (xprt->stream) | 1082 | |
1084 | *p++ = 0; /* fill in later */ | 1083 | p = xprt_skip_transport_header(task->tk_xprt, p); |
1085 | *p++ = req->rq_xid; /* XID */ | 1084 | *p++ = req->rq_xid; /* XID */ |
1086 | *p++ = htonl(RPC_CALL); /* CALL */ | 1085 | *p++ = htonl(RPC_CALL); /* CALL */ |
1087 | *p++ = htonl(RPC_VERSION); /* RPC version */ | 1086 | *p++ = htonl(RPC_VERSION); /* RPC version */ |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 57988300640a..aaf053b1a0c4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -282,6 +282,13 @@ static int xs_udp_send_request(struct rpc_task *task) | |||
282 | return status; | 282 | return status; |
283 | } | 283 | } |
284 | 284 | ||
285 | static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) | ||
286 | { | ||
287 | u32 reclen = buf->len - sizeof(rpc_fraghdr); | ||
288 | rpc_fraghdr *base = buf->head[0].iov_base; | ||
289 | *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); | ||
290 | } | ||
291 | |||
285 | /** | 292 | /** |
286 | * xs_tcp_send_request - write an RPC request to a TCP socket | 293 | * xs_tcp_send_request - write an RPC request to a TCP socket |
287 | * @task: address of RPC task that manages the state of an RPC request | 294 | * @task: address of RPC task that manages the state of an RPC request |
@@ -301,11 +308,9 @@ static int xs_tcp_send_request(struct rpc_task *task) | |||
301 | struct rpc_rqst *req = task->tk_rqstp; | 308 | struct rpc_rqst *req = task->tk_rqstp; |
302 | struct rpc_xprt *xprt = req->rq_xprt; | 309 | struct rpc_xprt *xprt = req->rq_xprt; |
303 | struct xdr_buf *xdr = &req->rq_snd_buf; | 310 | struct xdr_buf *xdr = &req->rq_snd_buf; |
304 | u32 *marker = req->rq_svec[0].iov_base; | ||
305 | int status, retry = 0; | 311 | int status, retry = 0; |
306 | 312 | ||
307 | /* Write the record marker */ | 313 | xs_encode_tcp_record_marker(&req->rq_snd_buf); |
308 | *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); | ||
309 | 314 | ||
310 | xs_pktdump("packet data:", | 315 | xs_pktdump("packet data:", |
311 | req->rq_svec->iov_base, | 316 | req->rq_svec->iov_base, |
@@ -503,16 +508,19 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc | |||
503 | xprt->tcp_offset += used; | 508 | xprt->tcp_offset += used; |
504 | if (used != len) | 509 | if (used != len) |
505 | return; | 510 | return; |
511 | |||
506 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | 512 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); |
507 | if (xprt->tcp_reclen & 0x80000000) | 513 | if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) |
508 | xprt->tcp_flags |= XPRT_LAST_FRAG; | 514 | xprt->tcp_flags |= XPRT_LAST_FRAG; |
509 | else | 515 | else |
510 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | 516 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; |
511 | xprt->tcp_reclen &= 0x7fffffff; | 517 | xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; |
518 | |||
512 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | 519 | xprt->tcp_flags &= ~XPRT_COPY_RECM; |
513 | xprt->tcp_offset = 0; | 520 | xprt->tcp_offset = 0; |
521 | |||
514 | /* Sanity check of the record length */ | 522 | /* Sanity check of the record length */ |
515 | if (xprt->tcp_reclen < 4) { | 523 | if (unlikely(xprt->tcp_reclen < 4)) { |
516 | dprintk("RPC: invalid TCP record fragment length\n"); | 524 | dprintk("RPC: invalid TCP record fragment length\n"); |
517 | xprt_disconnect(xprt); | 525 | xprt_disconnect(xprt); |
518 | return; | 526 | return; |
@@ -1065,6 +1073,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1065 | 1073 | ||
1066 | xprt->prot = IPPROTO_UDP; | 1074 | xprt->prot = IPPROTO_UDP; |
1067 | xprt->port = XS_MAX_RESVPORT; | 1075 | xprt->port = XS_MAX_RESVPORT; |
1076 | xprt->tsh_size = 0; | ||
1068 | xprt->stream = 0; | 1077 | xprt->stream = 0; |
1069 | xprt->nocong = 0; | 1078 | xprt->nocong = 0; |
1070 | xprt->cwnd = RPC_INITCWND; | 1079 | xprt->cwnd = RPC_INITCWND; |
@@ -1105,11 +1114,12 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1105 | 1114 | ||
1106 | xprt->prot = IPPROTO_TCP; | 1115 | xprt->prot = IPPROTO_TCP; |
1107 | xprt->port = XS_MAX_RESVPORT; | 1116 | xprt->port = XS_MAX_RESVPORT; |
1117 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); | ||
1108 | xprt->stream = 1; | 1118 | xprt->stream = 1; |
1109 | xprt->nocong = 1; | 1119 | xprt->nocong = 1; |
1110 | xprt->cwnd = RPC_MAXCWND(xprt); | 1120 | xprt->cwnd = RPC_MAXCWND(xprt); |
1111 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | 1121 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; |
1112 | xprt->max_payload = (1U << 31) - 1; | 1122 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
1113 | 1123 | ||
1114 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); | 1124 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); |
1115 | 1125 | ||