diff options
Diffstat (limited to 'net/sunrpc/xprtsock.c')
-rw-r--r-- | net/sunrpc/xprtsock.c | 1252 |
1 files changed, 1252 insertions, 0 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c new file mode 100644 index 000000000000..2e1529217e65 --- /dev/null +++ b/net/sunrpc/xprtsock.c | |||
@@ -0,0 +1,1252 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/xprtsock.c | ||
3 | * | ||
4 | * Client-side transport implementation for sockets. | ||
5 | * | ||
6 | * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
7 | * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
8 | * TCP NFS related read + write fixes | ||
9 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
10 | * | ||
11 | * Rewrite of larges part of the code in order to stabilize TCP stuff. | ||
12 | * Fix behaviour when socket buffer is full. | ||
13 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
14 | * | ||
15 | * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> | ||
16 | */ | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <linux/capability.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/pagemap.h> | ||
23 | #include <linux/errno.h> | ||
24 | #include <linux/socket.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/net.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/udp.h> | ||
29 | #include <linux/tcp.h> | ||
30 | #include <linux/sunrpc/clnt.h> | ||
31 | #include <linux/file.h> | ||
32 | |||
33 | #include <net/sock.h> | ||
34 | #include <net/checksum.h> | ||
35 | #include <net/udp.h> | ||
36 | #include <net/tcp.h> | ||
37 | |||
38 | /* | ||
39 | * How many times to try sending a request on a socket before waiting | ||
40 | * for the socket buffer to clear. | ||
41 | */ | ||
42 | #define XS_SENDMSG_RETRY (10U) | ||
43 | |||
44 | /* | ||
45 | * Time out for an RPC UDP socket connect. UDP socket connects are | ||
46 | * synchronous, but we set a timeout anyway in case of resource | ||
47 | * exhaustion on the local host. | ||
48 | */ | ||
49 | #define XS_UDP_CONN_TO (5U * HZ) | ||
50 | |||
51 | /* | ||
52 | * Wait duration for an RPC TCP connection to be established. Solaris | ||
53 | * NFS over TCP uses 60 seconds, for example, which is in line with how | ||
54 | * long a server takes to reboot. | ||
55 | */ | ||
56 | #define XS_TCP_CONN_TO (60U * HZ) | ||
57 | |||
58 | /* | ||
59 | * Wait duration for a reply from the RPC portmapper. | ||
60 | */ | ||
61 | #define XS_BIND_TO (60U * HZ) | ||
62 | |||
63 | /* | ||
64 | * Delay if a UDP socket connect error occurs. This is most likely some | ||
65 | * kind of resource problem on the local host. | ||
66 | */ | ||
67 | #define XS_UDP_REEST_TO (2U * HZ) | ||
68 | |||
69 | /* | ||
70 | * The reestablish timeout allows clients to delay for a bit before attempting | ||
71 | * to reconnect to a server that just dropped our connection. | ||
72 | * | ||
73 | * We implement an exponential backoff when trying to reestablish a TCP | ||
74 | * transport connection with the server. Some servers like to drop a TCP | ||
75 | * connection when they are overworked, so we start with a short timeout and | ||
76 | * increase over time if the server is down or not responding. | ||
77 | */ | ||
78 | #define XS_TCP_INIT_REEST_TO (3U * HZ) | ||
79 | #define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) | ||
80 | |||
81 | /* | ||
82 | * TCP idle timeout; client drops the transport socket if it is idle | ||
83 | * for this long. Note that we also timeout UDP sockets to prevent | ||
84 | * holding port numbers when there is no RPC traffic. | ||
85 | */ | ||
86 | #define XS_IDLE_DISC_TO (5U * 60 * HZ) | ||
87 | |||
88 | #ifdef RPC_DEBUG | ||
89 | # undef RPC_DEBUG_DATA | ||
90 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
91 | #endif | ||
92 | |||
93 | #ifdef RPC_DEBUG_DATA | ||
94 | static void xs_pktdump(char *msg, u32 *packet, unsigned int count) | ||
95 | { | ||
96 | u8 *buf = (u8 *) packet; | ||
97 | int j; | ||
98 | |||
99 | dprintk("RPC: %s\n", msg); | ||
100 | for (j = 0; j < count && j < 128; j += 4) { | ||
101 | if (!(j & 31)) { | ||
102 | if (j) | ||
103 | dprintk("\n"); | ||
104 | dprintk("0x%04x ", j); | ||
105 | } | ||
106 | dprintk("%02x%02x%02x%02x ", | ||
107 | buf[j], buf[j+1], buf[j+2], buf[j+3]); | ||
108 | } | ||
109 | dprintk("\n"); | ||
110 | } | ||
111 | #else | ||
112 | static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) | ||
113 | { | ||
114 | /* NOP */ | ||
115 | } | ||
116 | #endif | ||
117 | |||
118 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) | ||
119 | |||
120 | static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) | ||
121 | { | ||
122 | struct kvec iov = { | ||
123 | .iov_base = xdr->head[0].iov_base + base, | ||
124 | .iov_len = len - base, | ||
125 | }; | ||
126 | struct msghdr msg = { | ||
127 | .msg_name = addr, | ||
128 | .msg_namelen = addrlen, | ||
129 | .msg_flags = XS_SENDMSG_FLAGS, | ||
130 | }; | ||
131 | |||
132 | if (xdr->len > len) | ||
133 | msg.msg_flags |= MSG_MORE; | ||
134 | |||
135 | if (likely(iov.iov_len)) | ||
136 | return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
137 | return kernel_sendmsg(sock, &msg, NULL, 0, 0); | ||
138 | } | ||
139 | |||
140 | static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len) | ||
141 | { | ||
142 | struct kvec iov = { | ||
143 | .iov_base = xdr->tail[0].iov_base + base, | ||
144 | .iov_len = len - base, | ||
145 | }; | ||
146 | struct msghdr msg = { | ||
147 | .msg_flags = XS_SENDMSG_FLAGS, | ||
148 | }; | ||
149 | |||
150 | return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * xs_sendpages - write pages directly to a socket | ||
155 | * @sock: socket to send on | ||
156 | * @addr: UDP only -- address of destination | ||
157 | * @addrlen: UDP only -- length of destination address | ||
158 | * @xdr: buffer containing this request | ||
159 | * @base: starting position in the buffer | ||
160 | * | ||
161 | */ | ||
162 | static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) | ||
163 | { | ||
164 | struct page **ppage = xdr->pages; | ||
165 | unsigned int len, pglen = xdr->page_len; | ||
166 | int err, ret = 0; | ||
167 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
168 | |||
169 | if (unlikely(!sock)) | ||
170 | return -ENOTCONN; | ||
171 | |||
172 | clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
173 | |||
174 | len = xdr->head[0].iov_len; | ||
175 | if (base < len || (addr != NULL && base == 0)) { | ||
176 | err = xs_send_head(sock, addr, addrlen, xdr, base, len); | ||
177 | if (ret == 0) | ||
178 | ret = err; | ||
179 | else if (err > 0) | ||
180 | ret += err; | ||
181 | if (err != (len - base)) | ||
182 | goto out; | ||
183 | base = 0; | ||
184 | } else | ||
185 | base -= len; | ||
186 | |||
187 | if (unlikely(pglen == 0)) | ||
188 | goto copy_tail; | ||
189 | if (unlikely(base >= pglen)) { | ||
190 | base -= pglen; | ||
191 | goto copy_tail; | ||
192 | } | ||
193 | if (base || xdr->page_base) { | ||
194 | pglen -= base; | ||
195 | base += xdr->page_base; | ||
196 | ppage += base >> PAGE_CACHE_SHIFT; | ||
197 | base &= ~PAGE_CACHE_MASK; | ||
198 | } | ||
199 | |||
200 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
201 | do { | ||
202 | int flags = XS_SENDMSG_FLAGS; | ||
203 | |||
204 | len = PAGE_CACHE_SIZE; | ||
205 | if (base) | ||
206 | len -= base; | ||
207 | if (pglen < len) | ||
208 | len = pglen; | ||
209 | |||
210 | if (pglen != len || xdr->tail[0].iov_len != 0) | ||
211 | flags |= MSG_MORE; | ||
212 | |||
213 | /* Hmm... We might be dealing with highmem pages */ | ||
214 | if (PageHighMem(*ppage)) | ||
215 | sendpage = sock_no_sendpage; | ||
216 | err = sendpage(sock, *ppage, base, len, flags); | ||
217 | if (ret == 0) | ||
218 | ret = err; | ||
219 | else if (err > 0) | ||
220 | ret += err; | ||
221 | if (err != len) | ||
222 | goto out; | ||
223 | base = 0; | ||
224 | ppage++; | ||
225 | } while ((pglen -= len) != 0); | ||
226 | copy_tail: | ||
227 | len = xdr->tail[0].iov_len; | ||
228 | if (base < len) { | ||
229 | err = xs_send_tail(sock, xdr, base, len); | ||
230 | if (ret == 0) | ||
231 | ret = err; | ||
232 | else if (err > 0) | ||
233 | ret += err; | ||
234 | } | ||
235 | out: | ||
236 | return ret; | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * xs_nospace - place task on wait queue if transmit was incomplete | ||
241 | * @task: task to put to sleep | ||
242 | * | ||
243 | */ | ||
244 | static void xs_nospace(struct rpc_task *task) | ||
245 | { | ||
246 | struct rpc_rqst *req = task->tk_rqstp; | ||
247 | struct rpc_xprt *xprt = req->rq_xprt; | ||
248 | |||
249 | dprintk("RPC: %4d xmit incomplete (%u left of %u)\n", | ||
250 | task->tk_pid, req->rq_slen - req->rq_bytes_sent, | ||
251 | req->rq_slen); | ||
252 | |||
253 | if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { | ||
254 | /* Protect against races with write_space */ | ||
255 | spin_lock_bh(&xprt->transport_lock); | ||
256 | |||
257 | /* Don't race with disconnect */ | ||
258 | if (!xprt_connected(xprt)) | ||
259 | task->tk_status = -ENOTCONN; | ||
260 | else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) | ||
261 | xprt_wait_for_buffer_space(task); | ||
262 | |||
263 | spin_unlock_bh(&xprt->transport_lock); | ||
264 | } else | ||
265 | /* Keep holding the socket if it is blocked */ | ||
266 | rpc_delay(task, HZ>>4); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * xs_udp_send_request - write an RPC request to a UDP socket | ||
271 | * @task: address of RPC task that manages the state of an RPC request | ||
272 | * | ||
273 | * Return values: | ||
274 | * 0: The request has been sent | ||
275 | * EAGAIN: The socket was blocked, please call again later to | ||
276 | * complete the request | ||
277 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
278 | * other: Some other error occured, the request was not sent | ||
279 | */ | ||
280 | static int xs_udp_send_request(struct rpc_task *task) | ||
281 | { | ||
282 | struct rpc_rqst *req = task->tk_rqstp; | ||
283 | struct rpc_xprt *xprt = req->rq_xprt; | ||
284 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
285 | int status; | ||
286 | |||
287 | xs_pktdump("packet data:", | ||
288 | req->rq_svec->iov_base, | ||
289 | req->rq_svec->iov_len); | ||
290 | |||
291 | req->rq_xtime = jiffies; | ||
292 | status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, | ||
293 | sizeof(xprt->addr), xdr, req->rq_bytes_sent); | ||
294 | |||
295 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", | ||
296 | xdr->len - req->rq_bytes_sent, status); | ||
297 | |||
298 | if (likely(status >= (int) req->rq_slen)) | ||
299 | return 0; | ||
300 | |||
301 | /* Still some bytes left; set up for a retry later. */ | ||
302 | if (status > 0) | ||
303 | status = -EAGAIN; | ||
304 | |||
305 | switch (status) { | ||
306 | case -ENETUNREACH: | ||
307 | case -EPIPE: | ||
308 | case -ECONNREFUSED: | ||
309 | /* When the server has died, an ICMP port unreachable message | ||
310 | * prompts ECONNREFUSED. */ | ||
311 | break; | ||
312 | case -EAGAIN: | ||
313 | xs_nospace(task); | ||
314 | break; | ||
315 | default: | ||
316 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
317 | -status); | ||
318 | break; | ||
319 | } | ||
320 | |||
321 | return status; | ||
322 | } | ||
323 | |||
324 | static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) | ||
325 | { | ||
326 | u32 reclen = buf->len - sizeof(rpc_fraghdr); | ||
327 | rpc_fraghdr *base = buf->head[0].iov_base; | ||
328 | *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); | ||
329 | } | ||
330 | |||
331 | /** | ||
332 | * xs_tcp_send_request - write an RPC request to a TCP socket | ||
333 | * @task: address of RPC task that manages the state of an RPC request | ||
334 | * | ||
335 | * Return values: | ||
336 | * 0: The request has been sent | ||
337 | * EAGAIN: The socket was blocked, please call again later to | ||
338 | * complete the request | ||
339 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
340 | * other: Some other error occured, the request was not sent | ||
341 | * | ||
342 | * XXX: In the case of soft timeouts, should we eventually give up | ||
343 | * if sendmsg is not able to make progress? | ||
344 | */ | ||
345 | static int xs_tcp_send_request(struct rpc_task *task) | ||
346 | { | ||
347 | struct rpc_rqst *req = task->tk_rqstp; | ||
348 | struct rpc_xprt *xprt = req->rq_xprt; | ||
349 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
350 | int status, retry = 0; | ||
351 | |||
352 | xs_encode_tcp_record_marker(&req->rq_snd_buf); | ||
353 | |||
354 | xs_pktdump("packet data:", | ||
355 | req->rq_svec->iov_base, | ||
356 | req->rq_svec->iov_len); | ||
357 | |||
358 | /* Continue transmitting the packet/record. We must be careful | ||
359 | * to cope with writespace callbacks arriving _after_ we have | ||
360 | * called sendmsg(). */ | ||
361 | while (1) { | ||
362 | req->rq_xtime = jiffies; | ||
363 | status = xs_sendpages(xprt->sock, NULL, 0, xdr, | ||
364 | req->rq_bytes_sent); | ||
365 | |||
366 | dprintk("RPC: xs_tcp_send_request(%u) = %d\n", | ||
367 | xdr->len - req->rq_bytes_sent, status); | ||
368 | |||
369 | if (unlikely(status < 0)) | ||
370 | break; | ||
371 | |||
372 | /* If we've sent the entire packet, immediately | ||
373 | * reset the count of bytes sent. */ | ||
374 | req->rq_bytes_sent += status; | ||
375 | if (likely(req->rq_bytes_sent >= req->rq_slen)) { | ||
376 | req->rq_bytes_sent = 0; | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | status = -EAGAIN; | ||
381 | if (retry++ > XS_SENDMSG_RETRY) | ||
382 | break; | ||
383 | } | ||
384 | |||
385 | switch (status) { | ||
386 | case -EAGAIN: | ||
387 | xs_nospace(task); | ||
388 | break; | ||
389 | case -ECONNREFUSED: | ||
390 | case -ECONNRESET: | ||
391 | case -ENOTCONN: | ||
392 | case -EPIPE: | ||
393 | status = -ENOTCONN; | ||
394 | break; | ||
395 | default: | ||
396 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
397 | -status); | ||
398 | xprt_disconnect(xprt); | ||
399 | break; | ||
400 | } | ||
401 | |||
402 | return status; | ||
403 | } | ||
404 | |||
405 | /** | ||
406 | * xs_close - close a socket | ||
407 | * @xprt: transport | ||
408 | * | ||
409 | * This is used when all requests are complete; ie, no DRC state remains | ||
410 | * on the server we want to save. | ||
411 | */ | ||
412 | static void xs_close(struct rpc_xprt *xprt) | ||
413 | { | ||
414 | struct socket *sock = xprt->sock; | ||
415 | struct sock *sk = xprt->inet; | ||
416 | |||
417 | if (!sk) | ||
418 | return; | ||
419 | |||
420 | dprintk("RPC: xs_close xprt %p\n", xprt); | ||
421 | |||
422 | write_lock_bh(&sk->sk_callback_lock); | ||
423 | xprt->inet = NULL; | ||
424 | xprt->sock = NULL; | ||
425 | |||
426 | sk->sk_user_data = NULL; | ||
427 | sk->sk_data_ready = xprt->old_data_ready; | ||
428 | sk->sk_state_change = xprt->old_state_change; | ||
429 | sk->sk_write_space = xprt->old_write_space; | ||
430 | write_unlock_bh(&sk->sk_callback_lock); | ||
431 | |||
432 | sk->sk_no_check = 0; | ||
433 | |||
434 | sock_release(sock); | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * xs_destroy - prepare to shutdown a transport | ||
439 | * @xprt: doomed transport | ||
440 | * | ||
441 | */ | ||
442 | static void xs_destroy(struct rpc_xprt *xprt) | ||
443 | { | ||
444 | dprintk("RPC: xs_destroy xprt %p\n", xprt); | ||
445 | |||
446 | cancel_delayed_work(&xprt->connect_worker); | ||
447 | flush_scheduled_work(); | ||
448 | |||
449 | xprt_disconnect(xprt); | ||
450 | xs_close(xprt); | ||
451 | kfree(xprt->slot); | ||
452 | } | ||
453 | |||
454 | static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) | ||
455 | { | ||
456 | return (struct rpc_xprt *) sk->sk_user_data; | ||
457 | } | ||
458 | |||
459 | /** | ||
460 | * xs_udp_data_ready - "data ready" callback for UDP sockets | ||
461 | * @sk: socket with data to read | ||
462 | * @len: how much data to read | ||
463 | * | ||
464 | */ | ||
465 | static void xs_udp_data_ready(struct sock *sk, int len) | ||
466 | { | ||
467 | struct rpc_task *task; | ||
468 | struct rpc_xprt *xprt; | ||
469 | struct rpc_rqst *rovr; | ||
470 | struct sk_buff *skb; | ||
471 | int err, repsize, copied; | ||
472 | u32 _xid, *xp; | ||
473 | |||
474 | read_lock(&sk->sk_callback_lock); | ||
475 | dprintk("RPC: xs_udp_data_ready...\n"); | ||
476 | if (!(xprt = xprt_from_sock(sk))) | ||
477 | goto out; | ||
478 | |||
479 | if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) | ||
480 | goto out; | ||
481 | |||
482 | if (xprt->shutdown) | ||
483 | goto dropit; | ||
484 | |||
485 | repsize = skb->len - sizeof(struct udphdr); | ||
486 | if (repsize < 4) { | ||
487 | dprintk("RPC: impossible RPC reply size %d!\n", repsize); | ||
488 | goto dropit; | ||
489 | } | ||
490 | |||
491 | /* Copy the XID from the skb... */ | ||
492 | xp = skb_header_pointer(skb, sizeof(struct udphdr), | ||
493 | sizeof(_xid), &_xid); | ||
494 | if (xp == NULL) | ||
495 | goto dropit; | ||
496 | |||
497 | /* Look up and lock the request corresponding to the given XID */ | ||
498 | spin_lock(&xprt->transport_lock); | ||
499 | rovr = xprt_lookup_rqst(xprt, *xp); | ||
500 | if (!rovr) | ||
501 | goto out_unlock; | ||
502 | task = rovr->rq_task; | ||
503 | |||
504 | if ((copied = rovr->rq_private_buf.buflen) > repsize) | ||
505 | copied = repsize; | ||
506 | |||
507 | /* Suck it into the iovec, verify checksum if not done by hw. */ | ||
508 | if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) | ||
509 | goto out_unlock; | ||
510 | |||
511 | /* Something worked... */ | ||
512 | dst_confirm(skb->dst); | ||
513 | |||
514 | xprt_adjust_cwnd(task, copied); | ||
515 | xprt_update_rtt(task); | ||
516 | xprt_complete_rqst(task, copied); | ||
517 | |||
518 | out_unlock: | ||
519 | spin_unlock(&xprt->transport_lock); | ||
520 | dropit: | ||
521 | skb_free_datagram(sk, skb); | ||
522 | out: | ||
523 | read_unlock(&sk->sk_callback_lock); | ||
524 | } | ||
525 | |||
526 | static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) | ||
527 | { | ||
528 | if (len > desc->count) | ||
529 | len = desc->count; | ||
530 | if (skb_copy_bits(desc->skb, desc->offset, p, len)) { | ||
531 | dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", | ||
532 | len, desc->count); | ||
533 | return 0; | ||
534 | } | ||
535 | desc->offset += len; | ||
536 | desc->count -= len; | ||
537 | dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", | ||
538 | len, desc->count); | ||
539 | return len; | ||
540 | } | ||
541 | |||
542 | static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
543 | { | ||
544 | size_t len, used; | ||
545 | char *p; | ||
546 | |||
547 | p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; | ||
548 | len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; | ||
549 | used = xs_tcp_copy_data(desc, p, len); | ||
550 | xprt->tcp_offset += used; | ||
551 | if (used != len) | ||
552 | return; | ||
553 | |||
554 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | ||
555 | if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) | ||
556 | xprt->tcp_flags |= XPRT_LAST_FRAG; | ||
557 | else | ||
558 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | ||
559 | xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; | ||
560 | |||
561 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | ||
562 | xprt->tcp_offset = 0; | ||
563 | |||
564 | /* Sanity check of the record length */ | ||
565 | if (unlikely(xprt->tcp_reclen < 4)) { | ||
566 | dprintk("RPC: invalid TCP record fragment length\n"); | ||
567 | xprt_disconnect(xprt); | ||
568 | return; | ||
569 | } | ||
570 | dprintk("RPC: reading TCP record fragment of length %d\n", | ||
571 | xprt->tcp_reclen); | ||
572 | } | ||
573 | |||
574 | static void xs_tcp_check_recm(struct rpc_xprt *xprt) | ||
575 | { | ||
576 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", | ||
577 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); | ||
578 | if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
579 | xprt->tcp_flags |= XPRT_COPY_RECM; | ||
580 | xprt->tcp_offset = 0; | ||
581 | if (xprt->tcp_flags & XPRT_LAST_FRAG) { | ||
582 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
583 | xprt->tcp_flags |= XPRT_COPY_XID; | ||
584 | xprt->tcp_copied = 0; | ||
585 | } | ||
586 | } | ||
587 | } | ||
588 | |||
589 | static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
590 | { | ||
591 | size_t len, used; | ||
592 | char *p; | ||
593 | |||
594 | len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; | ||
595 | dprintk("RPC: reading XID (%Zu bytes)\n", len); | ||
596 | p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; | ||
597 | used = xs_tcp_copy_data(desc, p, len); | ||
598 | xprt->tcp_offset += used; | ||
599 | if (used != len) | ||
600 | return; | ||
601 | xprt->tcp_flags &= ~XPRT_COPY_XID; | ||
602 | xprt->tcp_flags |= XPRT_COPY_DATA; | ||
603 | xprt->tcp_copied = 4; | ||
604 | dprintk("RPC: reading reply for XID %08x\n", | ||
605 | ntohl(xprt->tcp_xid)); | ||
606 | xs_tcp_check_recm(xprt); | ||
607 | } | ||
608 | |||
609 | static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
610 | { | ||
611 | struct rpc_rqst *req; | ||
612 | struct xdr_buf *rcvbuf; | ||
613 | size_t len; | ||
614 | ssize_t r; | ||
615 | |||
616 | /* Find and lock the request corresponding to this xid */ | ||
617 | spin_lock(&xprt->transport_lock); | ||
618 | req = xprt_lookup_rqst(xprt, xprt->tcp_xid); | ||
619 | if (!req) { | ||
620 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
621 | dprintk("RPC: XID %08x request not found!\n", | ||
622 | ntohl(xprt->tcp_xid)); | ||
623 | spin_unlock(&xprt->transport_lock); | ||
624 | return; | ||
625 | } | ||
626 | |||
627 | rcvbuf = &req->rq_private_buf; | ||
628 | len = desc->count; | ||
629 | if (len > xprt->tcp_reclen - xprt->tcp_offset) { | ||
630 | skb_reader_t my_desc; | ||
631 | |||
632 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
633 | memcpy(&my_desc, desc, sizeof(my_desc)); | ||
634 | my_desc.count = len; | ||
635 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
636 | &my_desc, xs_tcp_copy_data); | ||
637 | desc->count -= r; | ||
638 | desc->offset += r; | ||
639 | } else | ||
640 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
641 | desc, xs_tcp_copy_data); | ||
642 | |||
643 | if (r > 0) { | ||
644 | xprt->tcp_copied += r; | ||
645 | xprt->tcp_offset += r; | ||
646 | } | ||
647 | if (r != len) { | ||
648 | /* Error when copying to the receive buffer, | ||
649 | * usually because we weren't able to allocate | ||
650 | * additional buffer pages. All we can do now | ||
651 | * is turn off XPRT_COPY_DATA, so the request | ||
652 | * will not receive any additional updates, | ||
653 | * and time out. | ||
654 | * Any remaining data from this record will | ||
655 | * be discarded. | ||
656 | */ | ||
657 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
658 | dprintk("RPC: XID %08x truncated request\n", | ||
659 | ntohl(xprt->tcp_xid)); | ||
660 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
661 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
662 | goto out; | ||
663 | } | ||
664 | |||
665 | dprintk("RPC: XID %08x read %Zd bytes\n", | ||
666 | ntohl(xprt->tcp_xid), r); | ||
667 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
668 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
669 | |||
670 | if (xprt->tcp_copied == req->rq_private_buf.buflen) | ||
671 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
672 | else if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
673 | if (xprt->tcp_flags & XPRT_LAST_FRAG) | ||
674 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
675 | } | ||
676 | |||
677 | out: | ||
678 | if (!(xprt->tcp_flags & XPRT_COPY_DATA)) | ||
679 | xprt_complete_rqst(req->rq_task, xprt->tcp_copied); | ||
680 | spin_unlock(&xprt->transport_lock); | ||
681 | xs_tcp_check_recm(xprt); | ||
682 | } | ||
683 | |||
684 | static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
685 | { | ||
686 | size_t len; | ||
687 | |||
688 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
689 | if (len > desc->count) | ||
690 | len = desc->count; | ||
691 | desc->count -= len; | ||
692 | desc->offset += len; | ||
693 | xprt->tcp_offset += len; | ||
694 | dprintk("RPC: discarded %Zu bytes\n", len); | ||
695 | xs_tcp_check_recm(xprt); | ||
696 | } | ||
697 | |||
698 | static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) | ||
699 | { | ||
700 | struct rpc_xprt *xprt = rd_desc->arg.data; | ||
701 | skb_reader_t desc = { | ||
702 | .skb = skb, | ||
703 | .offset = offset, | ||
704 | .count = len, | ||
705 | .csum = 0 | ||
706 | }; | ||
707 | |||
708 | dprintk("RPC: xs_tcp_data_recv started\n"); | ||
709 | do { | ||
710 | /* Read in a new fragment marker if necessary */ | ||
711 | /* Can we ever really expect to get completely empty fragments? */ | ||
712 | if (xprt->tcp_flags & XPRT_COPY_RECM) { | ||
713 | xs_tcp_read_fraghdr(xprt, &desc); | ||
714 | continue; | ||
715 | } | ||
716 | /* Read in the xid if necessary */ | ||
717 | if (xprt->tcp_flags & XPRT_COPY_XID) { | ||
718 | xs_tcp_read_xid(xprt, &desc); | ||
719 | continue; | ||
720 | } | ||
721 | /* Read in the request data */ | ||
722 | if (xprt->tcp_flags & XPRT_COPY_DATA) { | ||
723 | xs_tcp_read_request(xprt, &desc); | ||
724 | continue; | ||
725 | } | ||
726 | /* Skip over any trailing bytes on short reads */ | ||
727 | xs_tcp_read_discard(xprt, &desc); | ||
728 | } while (desc.count); | ||
729 | dprintk("RPC: xs_tcp_data_recv done\n"); | ||
730 | return len - desc.count; | ||
731 | } | ||
732 | |||
733 | /** | ||
734 | * xs_tcp_data_ready - "data ready" callback for TCP sockets | ||
735 | * @sk: socket with data to read | ||
736 | * @bytes: how much data to read | ||
737 | * | ||
738 | */ | ||
739 | static void xs_tcp_data_ready(struct sock *sk, int bytes) | ||
740 | { | ||
741 | struct rpc_xprt *xprt; | ||
742 | read_descriptor_t rd_desc; | ||
743 | |||
744 | read_lock(&sk->sk_callback_lock); | ||
745 | dprintk("RPC: xs_tcp_data_ready...\n"); | ||
746 | if (!(xprt = xprt_from_sock(sk))) | ||
747 | goto out; | ||
748 | if (xprt->shutdown) | ||
749 | goto out; | ||
750 | |||
751 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ | ||
752 | rd_desc.arg.data = xprt; | ||
753 | rd_desc.count = 65536; | ||
754 | tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); | ||
755 | out: | ||
756 | read_unlock(&sk->sk_callback_lock); | ||
757 | } | ||
758 | |||
759 | /** | ||
760 | * xs_tcp_state_change - callback to handle TCP socket state changes | ||
761 | * @sk: socket whose state has changed | ||
762 | * | ||
763 | */ | ||
764 | static void xs_tcp_state_change(struct sock *sk) | ||
765 | { | ||
766 | struct rpc_xprt *xprt; | ||
767 | |||
768 | read_lock(&sk->sk_callback_lock); | ||
769 | if (!(xprt = xprt_from_sock(sk))) | ||
770 | goto out; | ||
771 | dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); | ||
772 | dprintk("RPC: state %x conn %d dead %d zapped %d\n", | ||
773 | sk->sk_state, xprt_connected(xprt), | ||
774 | sock_flag(sk, SOCK_DEAD), | ||
775 | sock_flag(sk, SOCK_ZAPPED)); | ||
776 | |||
777 | switch (sk->sk_state) { | ||
778 | case TCP_ESTABLISHED: | ||
779 | spin_lock_bh(&xprt->transport_lock); | ||
780 | if (!xprt_test_and_set_connected(xprt)) { | ||
781 | /* Reset TCP record info */ | ||
782 | xprt->tcp_offset = 0; | ||
783 | xprt->tcp_reclen = 0; | ||
784 | xprt->tcp_copied = 0; | ||
785 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | ||
786 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
787 | xprt_wake_pending_tasks(xprt, 0); | ||
788 | } | ||
789 | spin_unlock_bh(&xprt->transport_lock); | ||
790 | break; | ||
791 | case TCP_SYN_SENT: | ||
792 | case TCP_SYN_RECV: | ||
793 | break; | ||
794 | default: | ||
795 | xprt_disconnect(xprt); | ||
796 | break; | ||
797 | } | ||
798 | out: | ||
799 | read_unlock(&sk->sk_callback_lock); | ||
800 | } | ||
801 | |||
802 | /** | ||
803 | * xs_udp_write_space - callback invoked when socket buffer space | ||
804 | * becomes available | ||
805 | * @sk: socket whose state has changed | ||
806 | * | ||
807 | * Called when more output buffer space is available for this socket. | ||
808 | * We try not to wake our writers until they can make "significant" | ||
809 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg | ||
810 | * with a bunch of small requests. | ||
811 | */ | ||
812 | static void xs_udp_write_space(struct sock *sk) | ||
813 | { | ||
814 | read_lock(&sk->sk_callback_lock); | ||
815 | |||
816 | /* from net/core/sock.c:sock_def_write_space */ | ||
817 | if (sock_writeable(sk)) { | ||
818 | struct socket *sock; | ||
819 | struct rpc_xprt *xprt; | ||
820 | |||
821 | if (unlikely(!(sock = sk->sk_socket))) | ||
822 | goto out; | ||
823 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | ||
824 | goto out; | ||
825 | if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) | ||
826 | goto out; | ||
827 | |||
828 | xprt_write_space(xprt); | ||
829 | } | ||
830 | |||
831 | out: | ||
832 | read_unlock(&sk->sk_callback_lock); | ||
833 | } | ||
834 | |||
835 | /** | ||
836 | * xs_tcp_write_space - callback invoked when socket buffer space | ||
837 | * becomes available | ||
838 | * @sk: socket whose state has changed | ||
839 | * | ||
840 | * Called when more output buffer space is available for this socket. | ||
841 | * We try not to wake our writers until they can make "significant" | ||
842 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg | ||
843 | * with a bunch of small requests. | ||
844 | */ | ||
845 | static void xs_tcp_write_space(struct sock *sk) | ||
846 | { | ||
847 | read_lock(&sk->sk_callback_lock); | ||
848 | |||
849 | /* from net/core/stream.c:sk_stream_write_space */ | ||
850 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { | ||
851 | struct socket *sock; | ||
852 | struct rpc_xprt *xprt; | ||
853 | |||
854 | if (unlikely(!(sock = sk->sk_socket))) | ||
855 | goto out; | ||
856 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | ||
857 | goto out; | ||
858 | if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) | ||
859 | goto out; | ||
860 | |||
861 | xprt_write_space(xprt); | ||
862 | } | ||
863 | |||
864 | out: | ||
865 | read_unlock(&sk->sk_callback_lock); | ||
866 | } | ||
867 | |||
868 | static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) | ||
869 | { | ||
870 | struct sock *sk = xprt->inet; | ||
871 | |||
872 | if (xprt->rcvsize) { | ||
873 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
874 | sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; | ||
875 | } | ||
876 | if (xprt->sndsize) { | ||
877 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
878 | sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; | ||
879 | sk->sk_write_space(sk); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | /** | ||
884 | * xs_udp_set_buffer_size - set send and receive limits | ||
885 | * @xprt: generic transport | ||
886 | * @sndsize: requested size of send buffer, in bytes | ||
887 | * @rcvsize: requested size of receive buffer, in bytes | ||
888 | * | ||
889 | * Set socket send and receive buffer size limits. | ||
890 | */ | ||
891 | static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) | ||
892 | { | ||
893 | xprt->sndsize = 0; | ||
894 | if (sndsize) | ||
895 | xprt->sndsize = sndsize + 1024; | ||
896 | xprt->rcvsize = 0; | ||
897 | if (rcvsize) | ||
898 | xprt->rcvsize = rcvsize + 1024; | ||
899 | |||
900 | xs_udp_do_set_buffer_size(xprt); | ||
901 | } | ||
902 | |||
903 | /** | ||
904 | * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport | ||
905 | * @task: task that timed out | ||
906 | * | ||
907 | * Adjust the congestion window after a retransmit timeout has occurred. | ||
908 | */ | ||
909 | static void xs_udp_timer(struct rpc_task *task) | ||
910 | { | ||
911 | xprt_adjust_cwnd(task, -ETIMEDOUT); | ||
912 | } | ||
913 | |||
914 | static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | ||
915 | { | ||
916 | struct sockaddr_in myaddr = { | ||
917 | .sin_family = AF_INET, | ||
918 | }; | ||
919 | int err; | ||
920 | unsigned short port = xprt->port; | ||
921 | |||
922 | do { | ||
923 | myaddr.sin_port = htons(port); | ||
924 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | ||
925 | sizeof(myaddr)); | ||
926 | if (err == 0) { | ||
927 | xprt->port = port; | ||
928 | dprintk("RPC: xs_bindresvport bound to port %u\n", | ||
929 | port); | ||
930 | return 0; | ||
931 | } | ||
932 | if (port <= xprt_min_resvport) | ||
933 | port = xprt_max_resvport; | ||
934 | else | ||
935 | port--; | ||
936 | } while (err == -EADDRINUSE && port != xprt->port); | ||
937 | |||
938 | dprintk("RPC: can't bind to reserved port (%d).\n", -err); | ||
939 | return err; | ||
940 | } | ||
941 | |||
942 | /** | ||
943 | * xs_udp_connect_worker - set up a UDP socket | ||
944 | * @args: RPC transport to connect | ||
945 | * | ||
946 | * Invoked by a work queue tasklet. | ||
947 | */ | ||
948 | static void xs_udp_connect_worker(void *args) | ||
949 | { | ||
950 | struct rpc_xprt *xprt = (struct rpc_xprt *) args; | ||
951 | struct socket *sock = xprt->sock; | ||
952 | int err, status = -EIO; | ||
953 | |||
954 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
955 | goto out; | ||
956 | |||
957 | dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); | ||
958 | |||
959 | /* Start by resetting any existing state */ | ||
960 | xs_close(xprt); | ||
961 | |||
962 | if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { | ||
963 | dprintk("RPC: can't create UDP transport socket (%d).\n", -err); | ||
964 | goto out; | ||
965 | } | ||
966 | |||
967 | if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { | ||
968 | sock_release(sock); | ||
969 | goto out; | ||
970 | } | ||
971 | |||
972 | if (!xprt->inet) { | ||
973 | struct sock *sk = sock->sk; | ||
974 | |||
975 | write_lock_bh(&sk->sk_callback_lock); | ||
976 | |||
977 | sk->sk_user_data = xprt; | ||
978 | xprt->old_data_ready = sk->sk_data_ready; | ||
979 | xprt->old_state_change = sk->sk_state_change; | ||
980 | xprt->old_write_space = sk->sk_write_space; | ||
981 | sk->sk_data_ready = xs_udp_data_ready; | ||
982 | sk->sk_write_space = xs_udp_write_space; | ||
983 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
984 | |||
985 | xprt_set_connected(xprt); | ||
986 | |||
987 | /* Reset to new socket */ | ||
988 | xprt->sock = sock; | ||
989 | xprt->inet = sk; | ||
990 | |||
991 | write_unlock_bh(&sk->sk_callback_lock); | ||
992 | } | ||
993 | xs_udp_do_set_buffer_size(xprt); | ||
994 | status = 0; | ||
995 | out: | ||
996 | xprt_wake_pending_tasks(xprt, status); | ||
997 | xprt_clear_connecting(xprt); | ||
998 | } | ||
999 | |||
1000 | /* | ||
1001 | * We need to preserve the port number so the reply cache on the server can | ||
1002 | * find our cached RPC replies when we get around to reconnecting. | ||
1003 | */ | ||
1004 | static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) | ||
1005 | { | ||
1006 | int result; | ||
1007 | struct socket *sock = xprt->sock; | ||
1008 | struct sockaddr any; | ||
1009 | |||
1010 | dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); | ||
1011 | |||
1012 | /* | ||
1013 | * Disconnect the transport socket by doing a connect operation | ||
1014 | * with AF_UNSPEC. This should return immediately... | ||
1015 | */ | ||
1016 | memset(&any, 0, sizeof(any)); | ||
1017 | any.sa_family = AF_UNSPEC; | ||
1018 | result = sock->ops->connect(sock, &any, sizeof(any), 0); | ||
1019 | if (result) | ||
1020 | dprintk("RPC: AF_UNSPEC connect return code %d\n", | ||
1021 | result); | ||
1022 | } | ||
1023 | |||
1024 | /** | ||
1025 | * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint | ||
1026 | * @args: RPC transport to connect | ||
1027 | * | ||
1028 | * Invoked by a work queue tasklet. | ||
1029 | */ | ||
1030 | static void xs_tcp_connect_worker(void *args) | ||
1031 | { | ||
1032 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
1033 | struct socket *sock = xprt->sock; | ||
1034 | int err, status = -EIO; | ||
1035 | |||
1036 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
1037 | goto out; | ||
1038 | |||
1039 | dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); | ||
1040 | |||
1041 | if (!xprt->sock) { | ||
1042 | /* start from scratch */ | ||
1043 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | ||
1044 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | ||
1045 | goto out; | ||
1046 | } | ||
1047 | |||
1048 | if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { | ||
1049 | sock_release(sock); | ||
1050 | goto out; | ||
1051 | } | ||
1052 | } else | ||
1053 | /* "close" the socket, preserving the local port */ | ||
1054 | xs_tcp_reuse_connection(xprt); | ||
1055 | |||
1056 | if (!xprt->inet) { | ||
1057 | struct sock *sk = sock->sk; | ||
1058 | |||
1059 | write_lock_bh(&sk->sk_callback_lock); | ||
1060 | |||
1061 | sk->sk_user_data = xprt; | ||
1062 | xprt->old_data_ready = sk->sk_data_ready; | ||
1063 | xprt->old_state_change = sk->sk_state_change; | ||
1064 | xprt->old_write_space = sk->sk_write_space; | ||
1065 | sk->sk_data_ready = xs_tcp_data_ready; | ||
1066 | sk->sk_state_change = xs_tcp_state_change; | ||
1067 | sk->sk_write_space = xs_tcp_write_space; | ||
1068 | |||
1069 | /* socket options */ | ||
1070 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | ||
1071 | sock_reset_flag(sk, SOCK_LINGER); | ||
1072 | tcp_sk(sk)->linger2 = 0; | ||
1073 | tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; | ||
1074 | |||
1075 | xprt_clear_connected(xprt); | ||
1076 | |||
1077 | /* Reset to new socket */ | ||
1078 | xprt->sock = sock; | ||
1079 | xprt->inet = sk; | ||
1080 | |||
1081 | write_unlock_bh(&sk->sk_callback_lock); | ||
1082 | } | ||
1083 | |||
1084 | /* Tell the socket layer to start connecting... */ | ||
1085 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | ||
1086 | sizeof(xprt->addr), O_NONBLOCK); | ||
1087 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | ||
1088 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | ||
1089 | if (status < 0) { | ||
1090 | switch (status) { | ||
1091 | case -EINPROGRESS: | ||
1092 | case -EALREADY: | ||
1093 | goto out_clear; | ||
1094 | case -ECONNREFUSED: | ||
1095 | case -ECONNRESET: | ||
1096 | /* retry with existing socket, after a delay */ | ||
1097 | break; | ||
1098 | default: | ||
1099 | /* get rid of existing socket, and retry */ | ||
1100 | xs_close(xprt); | ||
1101 | break; | ||
1102 | } | ||
1103 | } | ||
1104 | out: | ||
1105 | xprt_wake_pending_tasks(xprt, status); | ||
1106 | out_clear: | ||
1107 | xprt_clear_connecting(xprt); | ||
1108 | } | ||
1109 | |||
1110 | /** | ||
1111 | * xs_connect - connect a socket to a remote endpoint | ||
1112 | * @task: address of RPC task that manages state of connect request | ||
1113 | * | ||
1114 | * TCP: If the remote end dropped the connection, delay reconnecting. | ||
1115 | * | ||
1116 | * UDP socket connects are synchronous, but we use a work queue anyway | ||
1117 | * to guarantee that even unprivileged user processes can set up a | ||
1118 | * socket on a privileged port. | ||
1119 | * | ||
1120 | * If a UDP socket connect fails, the delay behavior here prevents | ||
1121 | * retry floods (hard mounts). | ||
1122 | */ | ||
1123 | static void xs_connect(struct rpc_task *task) | ||
1124 | { | ||
1125 | struct rpc_xprt *xprt = task->tk_xprt; | ||
1126 | |||
1127 | if (xprt_test_and_set_connecting(xprt)) | ||
1128 | return; | ||
1129 | |||
1130 | if (xprt->sock != NULL) { | ||
1131 | dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", | ||
1132 | xprt, xprt->reestablish_timeout / HZ); | ||
1133 | schedule_delayed_work(&xprt->connect_worker, | ||
1134 | xprt->reestablish_timeout); | ||
1135 | xprt->reestablish_timeout <<= 1; | ||
1136 | if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) | ||
1137 | xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; | ||
1138 | } else { | ||
1139 | dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); | ||
1140 | schedule_work(&xprt->connect_worker); | ||
1141 | |||
1142 | /* flush_scheduled_work can sleep... */ | ||
1143 | if (!RPC_IS_ASYNC(task)) | ||
1144 | flush_scheduled_work(); | ||
1145 | } | ||
1146 | } | ||
1147 | |||
1148 | static struct rpc_xprt_ops xs_udp_ops = { | ||
1149 | .set_buffer_size = xs_udp_set_buffer_size, | ||
1150 | .reserve_xprt = xprt_reserve_xprt_cong, | ||
1151 | .release_xprt = xprt_release_xprt_cong, | ||
1152 | .connect = xs_connect, | ||
1153 | .send_request = xs_udp_send_request, | ||
1154 | .set_retrans_timeout = xprt_set_retrans_timeout_rtt, | ||
1155 | .timer = xs_udp_timer, | ||
1156 | .release_request = xprt_release_rqst_cong, | ||
1157 | .close = xs_close, | ||
1158 | .destroy = xs_destroy, | ||
1159 | }; | ||
1160 | |||
1161 | static struct rpc_xprt_ops xs_tcp_ops = { | ||
1162 | .reserve_xprt = xprt_reserve_xprt, | ||
1163 | .release_xprt = xprt_release_xprt, | ||
1164 | .connect = xs_connect, | ||
1165 | .send_request = xs_tcp_send_request, | ||
1166 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | ||
1167 | .close = xs_close, | ||
1168 | .destroy = xs_destroy, | ||
1169 | }; | ||
1170 | |||
1171 | /** | ||
1172 | * xs_setup_udp - Set up transport to use a UDP socket | ||
1173 | * @xprt: transport to set up | ||
1174 | * @to: timeout parameters | ||
1175 | * | ||
1176 | */ | ||
1177 | int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | ||
1178 | { | ||
1179 | size_t slot_table_size; | ||
1180 | |||
1181 | dprintk("RPC: setting up udp-ipv4 transport...\n"); | ||
1182 | |||
1183 | xprt->max_reqs = xprt_udp_slot_table_entries; | ||
1184 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | ||
1185 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1186 | if (xprt->slot == NULL) | ||
1187 | return -ENOMEM; | ||
1188 | memset(xprt->slot, 0, slot_table_size); | ||
1189 | |||
1190 | xprt->prot = IPPROTO_UDP; | ||
1191 | xprt->port = xprt_max_resvport; | ||
1192 | xprt->tsh_size = 0; | ||
1193 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1194 | /* XXX: header size can vary due to auth type, IPv6, etc. */ | ||
1195 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | ||
1196 | |||
1197 | INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); | ||
1198 | xprt->bind_timeout = XS_BIND_TO; | ||
1199 | xprt->connect_timeout = XS_UDP_CONN_TO; | ||
1200 | xprt->reestablish_timeout = XS_UDP_REEST_TO; | ||
1201 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
1202 | |||
1203 | xprt->ops = &xs_udp_ops; | ||
1204 | |||
1205 | if (to) | ||
1206 | xprt->timeout = *to; | ||
1207 | else | ||
1208 | xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); | ||
1209 | |||
1210 | return 0; | ||
1211 | } | ||
1212 | |||
1213 | /** | ||
1214 | * xs_setup_tcp - Set up transport to use a TCP socket | ||
1215 | * @xprt: transport to set up | ||
1216 | * @to: timeout parameters | ||
1217 | * | ||
1218 | */ | ||
1219 | int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | ||
1220 | { | ||
1221 | size_t slot_table_size; | ||
1222 | |||
1223 | dprintk("RPC: setting up tcp-ipv4 transport...\n"); | ||
1224 | |||
1225 | xprt->max_reqs = xprt_tcp_slot_table_entries; | ||
1226 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | ||
1227 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1228 | if (xprt->slot == NULL) | ||
1229 | return -ENOMEM; | ||
1230 | memset(xprt->slot, 0, slot_table_size); | ||
1231 | |||
1232 | xprt->prot = IPPROTO_TCP; | ||
1233 | xprt->port = xprt_max_resvport; | ||
1234 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); | ||
1235 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1236 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; | ||
1237 | |||
1238 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); | ||
1239 | xprt->bind_timeout = XS_BIND_TO; | ||
1240 | xprt->connect_timeout = XS_TCP_CONN_TO; | ||
1241 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
1242 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
1243 | |||
1244 | xprt->ops = &xs_tcp_ops; | ||
1245 | |||
1246 | if (to) | ||
1247 | xprt->timeout = *to; | ||
1248 | else | ||
1249 | xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); | ||
1250 | |||
1251 | return 0; | ||
1252 | } | ||