aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/svcsock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/svcsock.c')
-rw-r--r--net/sunrpc/svcsock.c1311
1 files changed, 316 insertions, 995 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c75bffeb89eb..1d3e5fcc2cc4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * The server scheduling algorithm does not always distribute the load 6 * The server scheduling algorithm does not always distribute the load
7 * evenly when servicing a single client. May need to modify the 7 * evenly when servicing a single client. May need to modify the
8 * svc_sock_enqueue procedure... 8 * svc_xprt_enqueue procedure...
9 * 9 *
10 * TCP support is largely untested and may be a little slow. The problem 10 * TCP support is largely untested and may be a little slow. The problem
11 * is that we currently do two separate recvfrom's, one for the 4-byte 11 * is that we currently do two separate recvfrom's, one for the 4-byte
@@ -48,72 +48,40 @@
48#include <linux/sunrpc/svcsock.h> 48#include <linux/sunrpc/svcsock.h>
49#include <linux/sunrpc/stats.h> 49#include <linux/sunrpc/stats.h>
50 50
51/* SMP locking strategy: 51#define RPCDBG_FACILITY RPCDBG_SVCXPRT
52 *
53 * svc_pool->sp_lock protects most of the fields of that pool.
54 * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
55 * when both need to be taken (rare), svc_serv->sv_lock is first.
56 * BKL protects svc_serv->sv_nrthread.
57 * svc_sock->sk_lock protects the svc_sock->sk_deferred list
58 * and the ->sk_info_authunix cache.
59 * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply.
60 *
61 * Some flags can be set to certain values at any time
62 * providing that certain rules are followed:
63 *
64 * SK_CONN, SK_DATA, can be set or cleared at any time.
65 * after a set, svc_sock_enqueue must be called.
66 * after a clear, the socket must be read/accepted
67 * if this succeeds, it must be set again.
68 * SK_CLOSE can set at any time. It is never cleared.
69 * sk_inuse contains a bias of '1' until SK_DEAD is set.
70 * so when sk_inuse hits zero, we know the socket is dead
71 * and no-one is using it.
72 * SK_DEAD can only be set while SK_BUSY is held which ensures
73 * no other thread will be using the socket or will try to
74 * set SK_DEAD.
75 *
76 */
77
78#define RPCDBG_FACILITY RPCDBG_SVCSOCK
79 52
80 53
81static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, 54static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
82 int *errp, int flags); 55 int *errp, int flags);
83static void svc_delete_socket(struct svc_sock *svsk);
84static void svc_udp_data_ready(struct sock *, int); 56static void svc_udp_data_ready(struct sock *, int);
85static int svc_udp_recvfrom(struct svc_rqst *); 57static int svc_udp_recvfrom(struct svc_rqst *);
86static int svc_udp_sendto(struct svc_rqst *); 58static int svc_udp_sendto(struct svc_rqst *);
87static void svc_close_socket(struct svc_sock *svsk); 59static void svc_sock_detach(struct svc_xprt *);
88 60static void svc_sock_free(struct svc_xprt *);
89static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
90static int svc_deferred_recv(struct svc_rqst *rqstp);
91static struct cache_deferred_req *svc_defer(struct cache_req *req);
92
93/* apparently the "standard" is that clients close
94 * idle connections after 5 minutes, servers after
95 * 6 minutes
96 * http://www.connectathon.org/talks96/nfstcp.pdf
97 */
98static int svc_conn_age_period = 6*60;
99 61
62static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
63 struct sockaddr *, int, int);
100#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
101static struct lock_class_key svc_key[2]; 65static struct lock_class_key svc_key[2];
102static struct lock_class_key svc_slock_key[2]; 66static struct lock_class_key svc_slock_key[2];
103 67
104static inline void svc_reclassify_socket(struct socket *sock) 68static void svc_reclassify_socket(struct socket *sock)
105{ 69{
106 struct sock *sk = sock->sk; 70 struct sock *sk = sock->sk;
107 BUG_ON(sock_owned_by_user(sk)); 71 BUG_ON(sock_owned_by_user(sk));
108 switch (sk->sk_family) { 72 switch (sk->sk_family) {
109 case AF_INET: 73 case AF_INET:
110 sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", 74 sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
111 &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); 75 &svc_slock_key[0],
76 "sk_xprt.xpt_lock-AF_INET-NFSD",
77 &svc_key[0]);
112 break; 78 break;
113 79
114 case AF_INET6: 80 case AF_INET6:
115 sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", 81 sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
116 &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); 82 &svc_slock_key[1],
83 "sk_xprt.xpt_lock-AF_INET6-NFSD",
84 &svc_key[1]);
117 break; 85 break;
118 86
119 default: 87 default:
@@ -121,81 +89,26 @@ static inline void svc_reclassify_socket(struct socket *sock)
121 } 89 }
122} 90}
123#else 91#else
124static inline void svc_reclassify_socket(struct socket *sock) 92static void svc_reclassify_socket(struct socket *sock)
125{ 93{
126} 94}
127#endif 95#endif
128 96
129static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len)
130{
131 switch (addr->sa_family) {
132 case AF_INET:
133 snprintf(buf, len, "%u.%u.%u.%u, port=%u",
134 NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
135 ntohs(((struct sockaddr_in *) addr)->sin_port));
136 break;
137
138 case AF_INET6:
139 snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
140 NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
141 ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
142 break;
143
144 default:
145 snprintf(buf, len, "unknown address type: %d", addr->sa_family);
146 break;
147 }
148 return buf;
149}
150
151/**
152 * svc_print_addr - Format rq_addr field for printing
153 * @rqstp: svc_rqst struct containing address to print
154 * @buf: target buffer for formatted address
155 * @len: length of target buffer
156 *
157 */
158char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
159{
160 return __svc_print_addr(svc_addr(rqstp), buf, len);
161}
162EXPORT_SYMBOL_GPL(svc_print_addr);
163
164/*
165 * Queue up an idle server thread. Must have pool->sp_lock held.
166 * Note: this is really a stack rather than a queue, so that we only
167 * use as many different threads as we need, and the rest don't pollute
168 * the cache.
169 */
170static inline void
171svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
172{
173 list_add(&rqstp->rq_list, &pool->sp_threads);
174}
175
176/*
177 * Dequeue an nfsd thread. Must have pool->sp_lock held.
178 */
179static inline void
180svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
181{
182 list_del(&rqstp->rq_list);
183}
184
185/* 97/*
186 * Release an skbuff after use 98 * Release an skbuff after use
187 */ 99 */
188static inline void 100static void svc_release_skb(struct svc_rqst *rqstp)
189svc_release_skb(struct svc_rqst *rqstp)
190{ 101{
191 struct sk_buff *skb = rqstp->rq_skbuff; 102 struct sk_buff *skb = rqstp->rq_xprt_ctxt;
192 struct svc_deferred_req *dr = rqstp->rq_deferred; 103 struct svc_deferred_req *dr = rqstp->rq_deferred;
193 104
194 if (skb) { 105 if (skb) {
195 rqstp->rq_skbuff = NULL; 106 struct svc_sock *svsk =
107 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
108 rqstp->rq_xprt_ctxt = NULL;
196 109
197 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); 110 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
198 skb_free_datagram(rqstp->rq_sock->sk_sk, skb); 111 skb_free_datagram(svsk->sk_sk, skb);
199 } 112 }
200 if (dr) { 113 if (dr) {
201 rqstp->rq_deferred = NULL; 114 rqstp->rq_deferred = NULL;
@@ -203,253 +116,6 @@ svc_release_skb(struct svc_rqst *rqstp)
203 } 116 }
204} 117}
205 118
206/*
207 * Any space to write?
208 */
209static inline unsigned long
210svc_sock_wspace(struct svc_sock *svsk)
211{
212 int wspace;
213
214 if (svsk->sk_sock->type == SOCK_STREAM)
215 wspace = sk_stream_wspace(svsk->sk_sk);
216 else
217 wspace = sock_wspace(svsk->sk_sk);
218
219 return wspace;
220}
221
222/*
223 * Queue up a socket with data pending. If there are idle nfsd
224 * processes, wake 'em up.
225 *
226 */
227static void
228svc_sock_enqueue(struct svc_sock *svsk)
229{
230 struct svc_serv *serv = svsk->sk_server;
231 struct svc_pool *pool;
232 struct svc_rqst *rqstp;
233 int cpu;
234
235 if (!(svsk->sk_flags &
236 ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
237 return;
238 if (test_bit(SK_DEAD, &svsk->sk_flags))
239 return;
240
241 cpu = get_cpu();
242 pool = svc_pool_for_cpu(svsk->sk_server, cpu);
243 put_cpu();
244
245 spin_lock_bh(&pool->sp_lock);
246
247 if (!list_empty(&pool->sp_threads) &&
248 !list_empty(&pool->sp_sockets))
249 printk(KERN_ERR
250 "svc_sock_enqueue: threads and sockets both waiting??\n");
251
252 if (test_bit(SK_DEAD, &svsk->sk_flags)) {
253 /* Don't enqueue dead sockets */
254 dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk);
255 goto out_unlock;
256 }
257
258 /* Mark socket as busy. It will remain in this state until the
259 * server has processed all pending data and put the socket back
260 * on the idle list. We update SK_BUSY atomically because
261 * it also guards against trying to enqueue the svc_sock twice.
262 */
263 if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) {
264 /* Don't enqueue socket while already enqueued */
265 dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
266 goto out_unlock;
267 }
268 BUG_ON(svsk->sk_pool != NULL);
269 svsk->sk_pool = pool;
270
271 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
272 if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
273 > svc_sock_wspace(svsk))
274 && !test_bit(SK_CLOSE, &svsk->sk_flags)
275 && !test_bit(SK_CONN, &svsk->sk_flags)) {
276 /* Don't enqueue while not enough space for reply */
277 dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
278 svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
279 svc_sock_wspace(svsk));
280 svsk->sk_pool = NULL;
281 clear_bit(SK_BUSY, &svsk->sk_flags);
282 goto out_unlock;
283 }
284 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
285
286
287 if (!list_empty(&pool->sp_threads)) {
288 rqstp = list_entry(pool->sp_threads.next,
289 struct svc_rqst,
290 rq_list);
291 dprintk("svc: socket %p served by daemon %p\n",
292 svsk->sk_sk, rqstp);
293 svc_thread_dequeue(pool, rqstp);
294 if (rqstp->rq_sock)
295 printk(KERN_ERR
296 "svc_sock_enqueue: server %p, rq_sock=%p!\n",
297 rqstp, rqstp->rq_sock);
298 rqstp->rq_sock = svsk;
299 atomic_inc(&svsk->sk_inuse);
300 rqstp->rq_reserved = serv->sv_max_mesg;
301 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
302 BUG_ON(svsk->sk_pool != pool);
303 wake_up(&rqstp->rq_wait);
304 } else {
305 dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
306 list_add_tail(&svsk->sk_ready, &pool->sp_sockets);
307 BUG_ON(svsk->sk_pool != pool);
308 }
309
310out_unlock:
311 spin_unlock_bh(&pool->sp_lock);
312}
313
314/*
315 * Dequeue the first socket. Must be called with the pool->sp_lock held.
316 */
317static inline struct svc_sock *
318svc_sock_dequeue(struct svc_pool *pool)
319{
320 struct svc_sock *svsk;
321
322 if (list_empty(&pool->sp_sockets))
323 return NULL;
324
325 svsk = list_entry(pool->sp_sockets.next,
326 struct svc_sock, sk_ready);
327 list_del_init(&svsk->sk_ready);
328
329 dprintk("svc: socket %p dequeued, inuse=%d\n",
330 svsk->sk_sk, atomic_read(&svsk->sk_inuse));
331
332 return svsk;
333}
334
335/*
336 * Having read something from a socket, check whether it
337 * needs to be re-enqueued.
338 * Note: SK_DATA only gets cleared when a read-attempt finds
339 * no (or insufficient) data.
340 */
341static inline void
342svc_sock_received(struct svc_sock *svsk)
343{
344 svsk->sk_pool = NULL;
345 clear_bit(SK_BUSY, &svsk->sk_flags);
346 svc_sock_enqueue(svsk);
347}
348
349
350/**
351 * svc_reserve - change the space reserved for the reply to a request.
352 * @rqstp: The request in question
353 * @space: new max space to reserve
354 *
355 * Each request reserves some space on the output queue of the socket
356 * to make sure the reply fits. This function reduces that reserved
357 * space to be the amount of space used already, plus @space.
358 *
359 */
360void svc_reserve(struct svc_rqst *rqstp, int space)
361{
362 space += rqstp->rq_res.head[0].iov_len;
363
364 if (space < rqstp->rq_reserved) {
365 struct svc_sock *svsk = rqstp->rq_sock;
366 atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved);
367 rqstp->rq_reserved = space;
368
369 svc_sock_enqueue(svsk);
370 }
371}
372
373/*
374 * Release a socket after use.
375 */
376static inline void
377svc_sock_put(struct svc_sock *svsk)
378{
379 if (atomic_dec_and_test(&svsk->sk_inuse)) {
380 BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
381
382 dprintk("svc: releasing dead socket\n");
383 if (svsk->sk_sock->file)
384 sockfd_put(svsk->sk_sock);
385 else
386 sock_release(svsk->sk_sock);
387 if (svsk->sk_info_authunix != NULL)
388 svcauth_unix_info_release(svsk->sk_info_authunix);
389 kfree(svsk);
390 }
391}
392
393static void
394svc_sock_release(struct svc_rqst *rqstp)
395{
396 struct svc_sock *svsk = rqstp->rq_sock;
397
398 svc_release_skb(rqstp);
399
400 svc_free_res_pages(rqstp);
401 rqstp->rq_res.page_len = 0;
402 rqstp->rq_res.page_base = 0;
403
404
405 /* Reset response buffer and release
406 * the reservation.
407 * But first, check that enough space was reserved
408 * for the reply, otherwise we have a bug!
409 */
410 if ((rqstp->rq_res.len) > rqstp->rq_reserved)
411 printk(KERN_ERR "RPC request reserved %d but used %d\n",
412 rqstp->rq_reserved,
413 rqstp->rq_res.len);
414
415 rqstp->rq_res.head[0].iov_len = 0;
416 svc_reserve(rqstp, 0);
417 rqstp->rq_sock = NULL;
418
419 svc_sock_put(svsk);
420}
421
422/*
423 * External function to wake up a server waiting for data
424 * This really only makes sense for services like lockd
425 * which have exactly one thread anyway.
426 */
427void
428svc_wake_up(struct svc_serv *serv)
429{
430 struct svc_rqst *rqstp;
431 unsigned int i;
432 struct svc_pool *pool;
433
434 for (i = 0; i < serv->sv_nrpools; i++) {
435 pool = &serv->sv_pools[i];
436
437 spin_lock_bh(&pool->sp_lock);
438 if (!list_empty(&pool->sp_threads)) {
439 rqstp = list_entry(pool->sp_threads.next,
440 struct svc_rqst,
441 rq_list);
442 dprintk("svc: daemon %p woken up.\n", rqstp);
443 /*
444 svc_thread_dequeue(pool, rqstp);
445 rqstp->rq_sock = NULL;
446 */
447 wake_up(&rqstp->rq_wait);
448 }
449 spin_unlock_bh(&pool->sp_lock);
450 }
451}
452
453union svc_pktinfo_u { 119union svc_pktinfo_u {
454 struct in_pktinfo pkti; 120 struct in_pktinfo pkti;
455 struct in6_pktinfo pkti6; 121 struct in6_pktinfo pkti6;
@@ -459,7 +125,9 @@ union svc_pktinfo_u {
459 125
460static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) 126static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
461{ 127{
462 switch (rqstp->rq_sock->sk_sk->sk_family) { 128 struct svc_sock *svsk =
129 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
130 switch (svsk->sk_sk->sk_family) {
463 case AF_INET: { 131 case AF_INET: {
464 struct in_pktinfo *pki = CMSG_DATA(cmh); 132 struct in_pktinfo *pki = CMSG_DATA(cmh);
465 133
@@ -489,10 +157,10 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
489/* 157/*
490 * Generic sendto routine 158 * Generic sendto routine
491 */ 159 */
492static int 160static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
493svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
494{ 161{
495 struct svc_sock *svsk = rqstp->rq_sock; 162 struct svc_sock *svsk =
163 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
496 struct socket *sock = svsk->sk_sock; 164 struct socket *sock = svsk->sk_sock;
497 int slen; 165 int slen;
498 union { 166 union {
@@ -565,7 +233,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
565 } 233 }
566out: 234out:
567 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", 235 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
568 rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, 236 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
569 xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); 237 xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
570 238
571 return len; 239 return len;
@@ -602,7 +270,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
602 if (!serv) 270 if (!serv)
603 return 0; 271 return 0;
604 spin_lock_bh(&serv->sv_lock); 272 spin_lock_bh(&serv->sv_lock);
605 list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { 273 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) {
606 int onelen = one_sock_name(buf+len, svsk); 274 int onelen = one_sock_name(buf+len, svsk);
607 if (toclose && strcmp(toclose, buf+len) == 0) 275 if (toclose && strcmp(toclose, buf+len) == 0)
608 closesk = svsk; 276 closesk = svsk;
@@ -614,7 +282,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
614 /* Should unregister with portmap, but you cannot 282 /* Should unregister with portmap, but you cannot
615 * unregister just one protocol... 283 * unregister just one protocol...
616 */ 284 */
617 svc_close_socket(closesk); 285 svc_close_xprt(&closesk->sk_xprt);
618 else if (toclose) 286 else if (toclose)
619 return -ENOENT; 287 return -ENOENT;
620 return len; 288 return len;
@@ -624,8 +292,7 @@ EXPORT_SYMBOL(svc_sock_names);
624/* 292/*
625 * Check input queue length 293 * Check input queue length
626 */ 294 */
627static int 295static int svc_recv_available(struct svc_sock *svsk)
628svc_recv_available(struct svc_sock *svsk)
629{ 296{
630 struct socket *sock = svsk->sk_sock; 297 struct socket *sock = svsk->sk_sock;
631 int avail, err; 298 int avail, err;
@@ -638,48 +305,31 @@ svc_recv_available(struct svc_sock *svsk)
638/* 305/*
639 * Generic recvfrom routine. 306 * Generic recvfrom routine.
640 */ 307 */
641static int 308static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
642svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) 309 int buflen)
643{ 310{
644 struct svc_sock *svsk = rqstp->rq_sock; 311 struct svc_sock *svsk =
312 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
645 struct msghdr msg = { 313 struct msghdr msg = {
646 .msg_flags = MSG_DONTWAIT, 314 .msg_flags = MSG_DONTWAIT,
647 }; 315 };
648 struct sockaddr *sin;
649 int len; 316 int len;
650 317
318 rqstp->rq_xprt_hlen = 0;
319
651 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 320 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
652 msg.msg_flags); 321 msg.msg_flags);
653 322
654 /* sock_recvmsg doesn't fill in the name/namelen, so we must..
655 */
656 memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
657 rqstp->rq_addrlen = svsk->sk_remotelen;
658
659 /* Destination address in request is needed for binding the
660 * source address in RPC callbacks later.
661 */
662 sin = (struct sockaddr *)&svsk->sk_local;
663 switch (sin->sa_family) {
664 case AF_INET:
665 rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
666 break;
667 case AF_INET6:
668 rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
669 break;
670 }
671
672 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", 323 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
673 svsk, iov[0].iov_base, iov[0].iov_len, len); 324 svsk, iov[0].iov_base, iov[0].iov_len, len);
674
675 return len; 325 return len;
676} 326}
677 327
678/* 328/*
679 * Set socket snd and rcv buffer lengths 329 * Set socket snd and rcv buffer lengths
680 */ 330 */
681static inline void 331static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
682svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) 332 unsigned int rcv)
683{ 333{
684#if 0 334#if 0
685 mm_segment_t oldfs; 335 mm_segment_t oldfs;
@@ -704,16 +354,16 @@ svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv)
704/* 354/*
705 * INET callback when data has been received on the socket. 355 * INET callback when data has been received on the socket.
706 */ 356 */
707static void 357static void svc_udp_data_ready(struct sock *sk, int count)
708svc_udp_data_ready(struct sock *sk, int count)
709{ 358{
710 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 359 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
711 360
712 if (svsk) { 361 if (svsk) {
713 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", 362 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
714 svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); 363 svsk, sk, count,
715 set_bit(SK_DATA, &svsk->sk_flags); 364 test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
716 svc_sock_enqueue(svsk); 365 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
366 svc_xprt_enqueue(&svsk->sk_xprt);
717 } 367 }
718 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 368 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
719 wake_up_interruptible(sk->sk_sleep); 369 wake_up_interruptible(sk->sk_sleep);
@@ -722,15 +372,14 @@ svc_udp_data_ready(struct sock *sk, int count)
722/* 372/*
723 * INET callback when space is newly available on the socket. 373 * INET callback when space is newly available on the socket.
724 */ 374 */
725static void 375static void svc_write_space(struct sock *sk)
726svc_write_space(struct sock *sk)
727{ 376{
728 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); 377 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
729 378
730 if (svsk) { 379 if (svsk) {
731 dprintk("svc: socket %p(inet %p), write_space busy=%d\n", 380 dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
732 svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); 381 svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
733 svc_sock_enqueue(svsk); 382 svc_xprt_enqueue(&svsk->sk_xprt);
734 } 383 }
735 384
736 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { 385 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
@@ -740,10 +389,19 @@ svc_write_space(struct sock *sk)
740 } 389 }
741} 390}
742 391
743static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, 392/*
744 struct cmsghdr *cmh) 393 * Copy the UDP datagram's destination address to the rqstp structure.
394 * The 'destination' address in this case is the address to which the
395 * peer sent the datagram, i.e. our local address. For multihomed
396 * hosts, this can change from msg to msg. Note that only the IP
397 * address changes, the port number should remain the same.
398 */
399static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
400 struct cmsghdr *cmh)
745{ 401{
746 switch (rqstp->rq_sock->sk_sk->sk_family) { 402 struct svc_sock *svsk =
403 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
404 switch (svsk->sk_sk->sk_family) {
747 case AF_INET: { 405 case AF_INET: {
748 struct in_pktinfo *pki = CMSG_DATA(cmh); 406 struct in_pktinfo *pki = CMSG_DATA(cmh);
749 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; 407 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
@@ -760,11 +418,11 @@ static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
760/* 418/*
761 * Receive a datagram from a UDP socket. 419 * Receive a datagram from a UDP socket.
762 */ 420 */
763static int 421static int svc_udp_recvfrom(struct svc_rqst *rqstp)
764svc_udp_recvfrom(struct svc_rqst *rqstp)
765{ 422{
766 struct svc_sock *svsk = rqstp->rq_sock; 423 struct svc_sock *svsk =
767 struct svc_serv *serv = svsk->sk_server; 424 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
425 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
768 struct sk_buff *skb; 426 struct sk_buff *skb;
769 union { 427 union {
770 struct cmsghdr hdr; 428 struct cmsghdr hdr;
@@ -779,7 +437,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
779 .msg_flags = MSG_DONTWAIT, 437 .msg_flags = MSG_DONTWAIT,
780 }; 438 };
781 439
782 if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) 440 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
783 /* udp sockets need large rcvbuf as all pending 441 /* udp sockets need large rcvbuf as all pending
784 * requests are still in that buffer. sndbuf must 442 * requests are still in that buffer. sndbuf must
785 * also be large enough that there is enough space 443 * also be large enough that there is enough space
@@ -792,17 +450,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
792 (serv->sv_nrthreads+3) * serv->sv_max_mesg, 450 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
793 (serv->sv_nrthreads+3) * serv->sv_max_mesg); 451 (serv->sv_nrthreads+3) * serv->sv_max_mesg);
794 452
795 if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { 453 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
796 svc_sock_received(svsk);
797 return svc_deferred_recv(rqstp);
798 }
799
800 if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
801 svc_delete_socket(svsk);
802 return 0;
803 }
804
805 clear_bit(SK_DATA, &svsk->sk_flags);
806 skb = NULL; 454 skb = NULL;
807 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 455 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
808 0, 0, MSG_PEEK | MSG_DONTWAIT); 456 0, 0, MSG_PEEK | MSG_DONTWAIT);
@@ -813,24 +461,27 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
813 if (err != -EAGAIN) { 461 if (err != -EAGAIN) {
814 /* possibly an icmp error */ 462 /* possibly an icmp error */
815 dprintk("svc: recvfrom returned error %d\n", -err); 463 dprintk("svc: recvfrom returned error %d\n", -err);
816 set_bit(SK_DATA, &svsk->sk_flags); 464 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
817 } 465 }
818 svc_sock_received(svsk); 466 svc_xprt_received(&svsk->sk_xprt);
819 return -EAGAIN; 467 return -EAGAIN;
820 } 468 }
821 rqstp->rq_addrlen = sizeof(rqstp->rq_addr); 469 len = svc_addr_len(svc_addr(rqstp));
470 if (len < 0)
471 return len;
472 rqstp->rq_addrlen = len;
822 if (skb->tstamp.tv64 == 0) { 473 if (skb->tstamp.tv64 == 0) {
823 skb->tstamp = ktime_get_real(); 474 skb->tstamp = ktime_get_real();
824 /* Don't enable netstamp, sunrpc doesn't 475 /* Don't enable netstamp, sunrpc doesn't
825 need that much accuracy */ 476 need that much accuracy */
826 } 477 }
827 svsk->sk_sk->sk_stamp = skb->tstamp; 478 svsk->sk_sk->sk_stamp = skb->tstamp;
828 set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ 479 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
829 480
830 /* 481 /*
831 * Maybe more packets - kick another thread ASAP. 482 * Maybe more packets - kick another thread ASAP.
832 */ 483 */
833 svc_sock_received(svsk); 484 svc_xprt_received(&svsk->sk_xprt);
834 485
835 len = skb->len - sizeof(struct udphdr); 486 len = skb->len - sizeof(struct udphdr);
836 rqstp->rq_arg.len = len; 487 rqstp->rq_arg.len = len;
@@ -861,13 +512,14 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
861 skb_free_datagram(svsk->sk_sk, skb); 512 skb_free_datagram(svsk->sk_sk, skb);
862 } else { 513 } else {
863 /* we can use it in-place */ 514 /* we can use it in-place */
864 rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); 515 rqstp->rq_arg.head[0].iov_base = skb->data +
516 sizeof(struct udphdr);
865 rqstp->rq_arg.head[0].iov_len = len; 517 rqstp->rq_arg.head[0].iov_len = len;
866 if (skb_checksum_complete(skb)) { 518 if (skb_checksum_complete(skb)) {
867 skb_free_datagram(svsk->sk_sk, skb); 519 skb_free_datagram(svsk->sk_sk, skb);
868 return 0; 520 return 0;
869 } 521 }
870 rqstp->rq_skbuff = skb; 522 rqstp->rq_xprt_ctxt = skb;
871 } 523 }
872 524
873 rqstp->rq_arg.page_base = 0; 525 rqstp->rq_arg.page_base = 0;
@@ -900,27 +552,81 @@ svc_udp_sendto(struct svc_rqst *rqstp)
900 return error; 552 return error;
901} 553}
902 554
903static void 555static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
904svc_udp_init(struct svc_sock *svsk) 556{
557}
558
559static int svc_udp_has_wspace(struct svc_xprt *xprt)
560{
561 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
562 struct svc_serv *serv = xprt->xpt_server;
563 unsigned long required;
564
565 /*
566 * Set the SOCK_NOSPACE flag before checking the available
567 * sock space.
568 */
569 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
570 required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
571 if (required*2 > sock_wspace(svsk->sk_sk))
572 return 0;
573 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
574 return 1;
575}
576
577static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
578{
579 BUG();
580 return NULL;
581}
582
583static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
584 struct sockaddr *sa, int salen,
585 int flags)
586{
587 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags);
588}
589
590static struct svc_xprt_ops svc_udp_ops = {
591 .xpo_create = svc_udp_create,
592 .xpo_recvfrom = svc_udp_recvfrom,
593 .xpo_sendto = svc_udp_sendto,
594 .xpo_release_rqst = svc_release_skb,
595 .xpo_detach = svc_sock_detach,
596 .xpo_free = svc_sock_free,
597 .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
598 .xpo_has_wspace = svc_udp_has_wspace,
599 .xpo_accept = svc_udp_accept,
600};
601
602static struct svc_xprt_class svc_udp_class = {
603 .xcl_name = "udp",
604 .xcl_owner = THIS_MODULE,
605 .xcl_ops = &svc_udp_ops,
606 .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
607};
608
609static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
905{ 610{
906 int one = 1; 611 int one = 1;
907 mm_segment_t oldfs; 612 mm_segment_t oldfs;
908 613
614 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
615 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
909 svsk->sk_sk->sk_data_ready = svc_udp_data_ready; 616 svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
910 svsk->sk_sk->sk_write_space = svc_write_space; 617 svsk->sk_sk->sk_write_space = svc_write_space;
911 svsk->sk_recvfrom = svc_udp_recvfrom;
912 svsk->sk_sendto = svc_udp_sendto;
913 618
914 /* initialise setting must have enough space to 619 /* initialise setting must have enough space to
915 * receive and respond to one request. 620 * receive and respond to one request.
916 * svc_udp_recvfrom will re-adjust if necessary 621 * svc_udp_recvfrom will re-adjust if necessary
917 */ 622 */
918 svc_sock_setbufsize(svsk->sk_sock, 623 svc_sock_setbufsize(svsk->sk_sock,
919 3 * svsk->sk_server->sv_max_mesg, 624 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
920 3 * svsk->sk_server->sv_max_mesg); 625 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
921 626
922 set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ 627 /* data might have come in before data_ready set up */
923 set_bit(SK_CHNGBUF, &svsk->sk_flags); 628 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
629 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
924 630
925 oldfs = get_fs(); 631 oldfs = get_fs();
926 set_fs(KERNEL_DS); 632 set_fs(KERNEL_DS);
@@ -934,8 +640,7 @@ svc_udp_init(struct svc_sock *svsk)
934 * A data_ready event on a listening socket means there's a connection 640 * A data_ready event on a listening socket means there's a connection
935 * pending. Do not use state_change as a substitute for it. 641 * pending. Do not use state_change as a substitute for it.
936 */ 642 */
937static void 643static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
938svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
939{ 644{
940 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 645 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
941 646
@@ -954,8 +659,8 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
954 */ 659 */
955 if (sk->sk_state == TCP_LISTEN) { 660 if (sk->sk_state == TCP_LISTEN) {
956 if (svsk) { 661 if (svsk) {
957 set_bit(SK_CONN, &svsk->sk_flags); 662 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
958 svc_sock_enqueue(svsk); 663 svc_xprt_enqueue(&svsk->sk_xprt);
959 } else 664 } else
960 printk("svc: socket %p: no user data\n", sk); 665 printk("svc: socket %p: no user data\n", sk);
961 } 666 }
@@ -967,8 +672,7 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
967/* 672/*
968 * A state change on a connected socket means it's dying or dead. 673 * A state change on a connected socket means it's dying or dead.
969 */ 674 */
970static void 675static void svc_tcp_state_change(struct sock *sk)
971svc_tcp_state_change(struct sock *sk)
972{ 676{
973 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 677 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
974 678
@@ -978,51 +682,36 @@ svc_tcp_state_change(struct sock *sk)
978 if (!svsk) 682 if (!svsk)
979 printk("svc: socket %p: no user data\n", sk); 683 printk("svc: socket %p: no user data\n", sk);
980 else { 684 else {
981 set_bit(SK_CLOSE, &svsk->sk_flags); 685 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
982 svc_sock_enqueue(svsk); 686 svc_xprt_enqueue(&svsk->sk_xprt);
983 } 687 }
984 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 688 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
985 wake_up_interruptible_all(sk->sk_sleep); 689 wake_up_interruptible_all(sk->sk_sleep);
986} 690}
987 691
988static void 692static void svc_tcp_data_ready(struct sock *sk, int count)
989svc_tcp_data_ready(struct sock *sk, int count)
990{ 693{
991 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 694 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
992 695
993 dprintk("svc: socket %p TCP data ready (svsk %p)\n", 696 dprintk("svc: socket %p TCP data ready (svsk %p)\n",
994 sk, sk->sk_user_data); 697 sk, sk->sk_user_data);
995 if (svsk) { 698 if (svsk) {
996 set_bit(SK_DATA, &svsk->sk_flags); 699 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
997 svc_sock_enqueue(svsk); 700 svc_xprt_enqueue(&svsk->sk_xprt);
998 } 701 }
999 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 702 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1000 wake_up_interruptible(sk->sk_sleep); 703 wake_up_interruptible(sk->sk_sleep);
1001} 704}
1002 705
1003static inline int svc_port_is_privileged(struct sockaddr *sin)
1004{
1005 switch (sin->sa_family) {
1006 case AF_INET:
1007 return ntohs(((struct sockaddr_in *)sin)->sin_port)
1008 < PROT_SOCK;
1009 case AF_INET6:
1010 return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
1011 < PROT_SOCK;
1012 default:
1013 return 0;
1014 }
1015}
1016
1017/* 706/*
1018 * Accept a TCP connection 707 * Accept a TCP connection
1019 */ 708 */
1020static void 709static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
1021svc_tcp_accept(struct svc_sock *svsk)
1022{ 710{
711 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1023 struct sockaddr_storage addr; 712 struct sockaddr_storage addr;
1024 struct sockaddr *sin = (struct sockaddr *) &addr; 713 struct sockaddr *sin = (struct sockaddr *) &addr;
1025 struct svc_serv *serv = svsk->sk_server; 714 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1026 struct socket *sock = svsk->sk_sock; 715 struct socket *sock = svsk->sk_sock;
1027 struct socket *newsock; 716 struct socket *newsock;
1028 struct svc_sock *newsvsk; 717 struct svc_sock *newsvsk;
@@ -1031,9 +720,9 @@ svc_tcp_accept(struct svc_sock *svsk)
1031 720
1032 dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); 721 dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
1033 if (!sock) 722 if (!sock)
1034 return; 723 return NULL;
1035 724
1036 clear_bit(SK_CONN, &svsk->sk_flags); 725 clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1037 err = kernel_accept(sock, &newsock, O_NONBLOCK); 726 err = kernel_accept(sock, &newsock, O_NONBLOCK);
1038 if (err < 0) { 727 if (err < 0) {
1039 if (err == -ENOMEM) 728 if (err == -ENOMEM)
@@ -1042,11 +731,9 @@ svc_tcp_accept(struct svc_sock *svsk)
1042 else if (err != -EAGAIN && net_ratelimit()) 731 else if (err != -EAGAIN && net_ratelimit())
1043 printk(KERN_WARNING "%s: accept failed (err %d)!\n", 732 printk(KERN_WARNING "%s: accept failed (err %d)!\n",
1044 serv->sv_name, -err); 733 serv->sv_name, -err);
1045 return; 734 return NULL;
1046 } 735 }
1047 736 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1048 set_bit(SK_CONN, &svsk->sk_flags);
1049 svc_sock_enqueue(svsk);
1050 737
1051 err = kernel_getpeername(newsock, sin, &slen); 738 err = kernel_getpeername(newsock, sin, &slen);
1052 if (err < 0) { 739 if (err < 0) {
@@ -1077,106 +764,42 @@ svc_tcp_accept(struct svc_sock *svsk)
1077 if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 764 if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
1078 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) 765 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
1079 goto failed; 766 goto failed;
1080 memcpy(&newsvsk->sk_remote, sin, slen); 767 svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
1081 newsvsk->sk_remotelen = slen;
1082 err = kernel_getsockname(newsock, sin, &slen); 768 err = kernel_getsockname(newsock, sin, &slen);
1083 if (unlikely(err < 0)) { 769 if (unlikely(err < 0)) {
1084 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); 770 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
1085 slen = offsetof(struct sockaddr, sa_data); 771 slen = offsetof(struct sockaddr, sa_data);
1086 } 772 }
1087 memcpy(&newsvsk->sk_local, sin, slen); 773 svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
1088
1089 svc_sock_received(newsvsk);
1090
1091 /* make sure that we don't have too many active connections.
1092 * If we have, something must be dropped.
1093 *
1094 * There's no point in trying to do random drop here for
1095 * DoS prevention. The NFS clients does 1 reconnect in 15
1096 * seconds. An attacker can easily beat that.
1097 *
1098 * The only somewhat efficient mechanism would be if drop
1099 * old connections from the same IP first. But right now
1100 * we don't even record the client IP in svc_sock.
1101 */
1102 if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
1103 struct svc_sock *svsk = NULL;
1104 spin_lock_bh(&serv->sv_lock);
1105 if (!list_empty(&serv->sv_tempsocks)) {
1106 if (net_ratelimit()) {
1107 /* Try to help the admin */
1108 printk(KERN_NOTICE "%s: too many open TCP "
1109 "sockets, consider increasing the "
1110 "number of nfsd threads\n",
1111 serv->sv_name);
1112 printk(KERN_NOTICE
1113 "%s: last TCP connect from %s\n",
1114 serv->sv_name, __svc_print_addr(sin,
1115 buf, sizeof(buf)));
1116 }
1117 /*
1118 * Always select the oldest socket. It's not fair,
1119 * but so is life
1120 */
1121 svsk = list_entry(serv->sv_tempsocks.prev,
1122 struct svc_sock,
1123 sk_list);
1124 set_bit(SK_CLOSE, &svsk->sk_flags);
1125 atomic_inc(&svsk->sk_inuse);
1126 }
1127 spin_unlock_bh(&serv->sv_lock);
1128
1129 if (svsk) {
1130 svc_sock_enqueue(svsk);
1131 svc_sock_put(svsk);
1132 }
1133
1134 }
1135 774
1136 if (serv->sv_stats) 775 if (serv->sv_stats)
1137 serv->sv_stats->nettcpconn++; 776 serv->sv_stats->nettcpconn++;
1138 777
1139 return; 778 return &newsvsk->sk_xprt;
1140 779
1141failed: 780failed:
1142 sock_release(newsock); 781 sock_release(newsock);
1143 return; 782 return NULL;
1144} 783}
1145 784
1146/* 785/*
1147 * Receive data from a TCP socket. 786 * Receive data from a TCP socket.
1148 */ 787 */
1149static int 788static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1150svc_tcp_recvfrom(struct svc_rqst *rqstp)
1151{ 789{
1152 struct svc_sock *svsk = rqstp->rq_sock; 790 struct svc_sock *svsk =
1153 struct svc_serv *serv = svsk->sk_server; 791 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
792 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1154 int len; 793 int len;
1155 struct kvec *vec; 794 struct kvec *vec;
1156 int pnum, vlen; 795 int pnum, vlen;
1157 796
1158 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 797 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1159 svsk, test_bit(SK_DATA, &svsk->sk_flags), 798 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
1160 test_bit(SK_CONN, &svsk->sk_flags), 799 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
1161 test_bit(SK_CLOSE, &svsk->sk_flags)); 800 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
1162 801
1163 if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { 802 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
1164 svc_sock_received(svsk);
1165 return svc_deferred_recv(rqstp);
1166 }
1167
1168 if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
1169 svc_delete_socket(svsk);
1170 return 0;
1171 }
1172
1173 if (svsk->sk_sk->sk_state == TCP_LISTEN) {
1174 svc_tcp_accept(svsk);
1175 svc_sock_received(svsk);
1176 return 0;
1177 }
1178
1179 if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
1180 /* sndbuf needs to have room for one request 803 /* sndbuf needs to have room for one request
1181 * per thread, otherwise we can stall even when the 804 * per thread, otherwise we can stall even when the
1182 * network isn't a bottleneck. 805 * network isn't a bottleneck.
@@ -1193,7 +816,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1193 (serv->sv_nrthreads+3) * serv->sv_max_mesg, 816 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
1194 3 * serv->sv_max_mesg); 817 3 * serv->sv_max_mesg);
1195 818
1196 clear_bit(SK_DATA, &svsk->sk_flags); 819 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1197 820
1198 /* Receive data. If we haven't got the record length yet, get 821 /* Receive data. If we haven't got the record length yet, get
1199 * the next four bytes. Otherwise try to gobble up as much as 822 * the next four bytes. Otherwise try to gobble up as much as
@@ -1212,7 +835,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1212 if (len < want) { 835 if (len < want) {
1213 dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", 836 dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",
1214 len, want); 837 len, want);
1215 svc_sock_received(svsk); 838 svc_xprt_received(&svsk->sk_xprt);
1216 return -EAGAIN; /* record header not complete */ 839 return -EAGAIN; /* record header not complete */
1217 } 840 }
1218 841
@@ -1248,11 +871,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1248 if (len < svsk->sk_reclen) { 871 if (len < svsk->sk_reclen) {
1249 dprintk("svc: incomplete TCP record (%d of %d)\n", 872 dprintk("svc: incomplete TCP record (%d of %d)\n",
1250 len, svsk->sk_reclen); 873 len, svsk->sk_reclen);
1251 svc_sock_received(svsk); 874 svc_xprt_received(&svsk->sk_xprt);
1252 return -EAGAIN; /* record not complete */ 875 return -EAGAIN; /* record not complete */
1253 } 876 }
1254 len = svsk->sk_reclen; 877 len = svsk->sk_reclen;
1255 set_bit(SK_DATA, &svsk->sk_flags); 878 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1256 879
1257 vec = rqstp->rq_vec; 880 vec = rqstp->rq_vec;
1258 vec[0] = rqstp->rq_arg.head[0]; 881 vec[0] = rqstp->rq_arg.head[0];
@@ -1281,30 +904,31 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1281 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 904 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
1282 } 905 }
1283 906
1284 rqstp->rq_skbuff = NULL; 907 rqstp->rq_xprt_ctxt = NULL;
1285 rqstp->rq_prot = IPPROTO_TCP; 908 rqstp->rq_prot = IPPROTO_TCP;
1286 909
1287 /* Reset TCP read info */ 910 /* Reset TCP read info */
1288 svsk->sk_reclen = 0; 911 svsk->sk_reclen = 0;
1289 svsk->sk_tcplen = 0; 912 svsk->sk_tcplen = 0;
1290 913
1291 svc_sock_received(svsk); 914 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
915 svc_xprt_received(&svsk->sk_xprt);
1292 if (serv->sv_stats) 916 if (serv->sv_stats)
1293 serv->sv_stats->nettcpcnt++; 917 serv->sv_stats->nettcpcnt++;
1294 918
1295 return len; 919 return len;
1296 920
1297 err_delete: 921 err_delete:
1298 svc_delete_socket(svsk); 922 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1299 return -EAGAIN; 923 return -EAGAIN;
1300 924
1301 error: 925 error:
1302 if (len == -EAGAIN) { 926 if (len == -EAGAIN) {
1303 dprintk("RPC: TCP recvfrom got EAGAIN\n"); 927 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1304 svc_sock_received(svsk); 928 svc_xprt_received(&svsk->sk_xprt);
1305 } else { 929 } else {
1306 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 930 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1307 svsk->sk_server->sv_name, -len); 931 svsk->sk_xprt.xpt_server->sv_name, -len);
1308 goto err_delete; 932 goto err_delete;
1309 } 933 }
1310 934
@@ -1314,8 +938,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1314/* 938/*
1315 * Send out data on TCP socket. 939 * Send out data on TCP socket.
1316 */ 940 */
1317static int 941static int svc_tcp_sendto(struct svc_rqst *rqstp)
1318svc_tcp_sendto(struct svc_rqst *rqstp)
1319{ 942{
1320 struct xdr_buf *xbufp = &rqstp->rq_res; 943 struct xdr_buf *xbufp = &rqstp->rq_res;
1321 int sent; 944 int sent;
@@ -1328,35 +951,109 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
1328 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 951 reclen = htonl(0x80000000|((xbufp->len ) - 4));
1329 memcpy(xbufp->head[0].iov_base, &reclen, 4); 952 memcpy(xbufp->head[0].iov_base, &reclen, 4);
1330 953
1331 if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) 954 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
1332 return -ENOTCONN; 955 return -ENOTCONN;
1333 956
1334 sent = svc_sendto(rqstp, &rqstp->rq_res); 957 sent = svc_sendto(rqstp, &rqstp->rq_res);
1335 if (sent != xbufp->len) { 958 if (sent != xbufp->len) {
1336 printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", 959 printk(KERN_NOTICE
1337 rqstp->rq_sock->sk_server->sv_name, 960 "rpc-srv/tcp: %s: %s %d when sending %d bytes "
961 "- shutting down socket\n",
962 rqstp->rq_xprt->xpt_server->sv_name,
1338 (sent<0)?"got error":"sent only", 963 (sent<0)?"got error":"sent only",
1339 sent, xbufp->len); 964 sent, xbufp->len);
1340 set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); 965 set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags);
1341 svc_sock_enqueue(rqstp->rq_sock); 966 svc_xprt_enqueue(rqstp->rq_xprt);
1342 sent = -EAGAIN; 967 sent = -EAGAIN;
1343 } 968 }
1344 return sent; 969 return sent;
1345} 970}
1346 971
1347static void 972/*
1348svc_tcp_init(struct svc_sock *svsk) 973 * Setup response header. TCP has a 4B record length field.
974 */
975static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
976{
977 struct kvec *resv = &rqstp->rq_res.head[0];
978
979 /* tcp needs a space for the record length... */
980 svc_putnl(resv, 0);
981}
982
983static int svc_tcp_has_wspace(struct svc_xprt *xprt)
984{
985 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
986 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
987 int required;
988 int wspace;
989
990 /*
991 * Set the SOCK_NOSPACE flag before checking the available
992 * sock space.
993 */
994 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
995 required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
996 wspace = sk_stream_wspace(svsk->sk_sk);
997
998 if (wspace < sk_stream_min_wspace(svsk->sk_sk))
999 return 0;
1000 if (required * 2 > wspace)
1001 return 0;
1002
1003 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
1004 return 1;
1005}
1006
1007static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1008 struct sockaddr *sa, int salen,
1009 int flags)
1010{
1011 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags);
1012}
1013
1014static struct svc_xprt_ops svc_tcp_ops = {
1015 .xpo_create = svc_tcp_create,
1016 .xpo_recvfrom = svc_tcp_recvfrom,
1017 .xpo_sendto = svc_tcp_sendto,
1018 .xpo_release_rqst = svc_release_skb,
1019 .xpo_detach = svc_sock_detach,
1020 .xpo_free = svc_sock_free,
1021 .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
1022 .xpo_has_wspace = svc_tcp_has_wspace,
1023 .xpo_accept = svc_tcp_accept,
1024};
1025
1026static struct svc_xprt_class svc_tcp_class = {
1027 .xcl_name = "tcp",
1028 .xcl_owner = THIS_MODULE,
1029 .xcl_ops = &svc_tcp_ops,
1030 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
1031};
1032
1033void svc_init_xprt_sock(void)
1034{
1035 svc_reg_xprt_class(&svc_tcp_class);
1036 svc_reg_xprt_class(&svc_udp_class);
1037}
1038
1039void svc_cleanup_xprt_sock(void)
1040{
1041 svc_unreg_xprt_class(&svc_tcp_class);
1042 svc_unreg_xprt_class(&svc_udp_class);
1043}
1044
1045static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1349{ 1046{
1350 struct sock *sk = svsk->sk_sk; 1047 struct sock *sk = svsk->sk_sk;
1351 struct tcp_sock *tp = tcp_sk(sk); 1048 struct tcp_sock *tp = tcp_sk(sk);
1352 1049
1353 svsk->sk_recvfrom = svc_tcp_recvfrom; 1050 svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
1354 svsk->sk_sendto = svc_tcp_sendto; 1051 set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
1355
1356 if (sk->sk_state == TCP_LISTEN) { 1052 if (sk->sk_state == TCP_LISTEN) {
1357 dprintk("setting up TCP socket for listening\n"); 1053 dprintk("setting up TCP socket for listening\n");
1054 set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
1358 sk->sk_data_ready = svc_tcp_listen_data_ready; 1055 sk->sk_data_ready = svc_tcp_listen_data_ready;
1359 set_bit(SK_CONN, &svsk->sk_flags); 1056 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1360 } else { 1057 } else {
1361 dprintk("setting up TCP socket for reading\n"); 1058 dprintk("setting up TCP socket for reading\n");
1362 sk->sk_state_change = svc_tcp_state_change; 1059 sk->sk_state_change = svc_tcp_state_change;
@@ -1373,18 +1070,17 @@ svc_tcp_init(struct svc_sock *svsk)
1373 * svc_tcp_recvfrom will re-adjust if necessary 1070 * svc_tcp_recvfrom will re-adjust if necessary
1374 */ 1071 */
1375 svc_sock_setbufsize(svsk->sk_sock, 1072 svc_sock_setbufsize(svsk->sk_sock,
1376 3 * svsk->sk_server->sv_max_mesg, 1073 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1377 3 * svsk->sk_server->sv_max_mesg); 1074 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1378 1075
1379 set_bit(SK_CHNGBUF, &svsk->sk_flags); 1076 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1380 set_bit(SK_DATA, &svsk->sk_flags); 1077 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1381 if (sk->sk_state != TCP_ESTABLISHED) 1078 if (sk->sk_state != TCP_ESTABLISHED)
1382 set_bit(SK_CLOSE, &svsk->sk_flags); 1079 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1383 } 1080 }
1384} 1081}
1385 1082
1386void 1083void svc_sock_update_bufs(struct svc_serv *serv)
1387svc_sock_update_bufs(struct svc_serv *serv)
1388{ 1084{
1389 /* 1085 /*
1390 * The number of server threads has changed. Update 1086 * The number of server threads has changed. Update
@@ -1395,232 +1091,18 @@ svc_sock_update_bufs(struct svc_serv *serv)
1395 spin_lock_bh(&serv->sv_lock); 1091 spin_lock_bh(&serv->sv_lock);
1396 list_for_each(le, &serv->sv_permsocks) { 1092 list_for_each(le, &serv->sv_permsocks) {
1397 struct svc_sock *svsk = 1093 struct svc_sock *svsk =
1398 list_entry(le, struct svc_sock, sk_list); 1094 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1399 set_bit(SK_CHNGBUF, &svsk->sk_flags); 1095 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1400 } 1096 }
1401 list_for_each(le, &serv->sv_tempsocks) { 1097 list_for_each(le, &serv->sv_tempsocks) {
1402 struct svc_sock *svsk = 1098 struct svc_sock *svsk =
1403 list_entry(le, struct svc_sock, sk_list); 1099 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1404 set_bit(SK_CHNGBUF, &svsk->sk_flags); 1100 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1405 } 1101 }
1406 spin_unlock_bh(&serv->sv_lock); 1102 spin_unlock_bh(&serv->sv_lock);
1407} 1103}
1408 1104
1409/* 1105/*
1410 * Receive the next request on any socket. This code is carefully
1411 * organised not to touch any cachelines in the shared svc_serv
1412 * structure, only cachelines in the local svc_pool.
1413 */
1414int
1415svc_recv(struct svc_rqst *rqstp, long timeout)
1416{
1417 struct svc_sock *svsk = NULL;
1418 struct svc_serv *serv = rqstp->rq_server;
1419 struct svc_pool *pool = rqstp->rq_pool;
1420 int len, i;
1421 int pages;
1422 struct xdr_buf *arg;
1423 DECLARE_WAITQUEUE(wait, current);
1424
1425 dprintk("svc: server %p waiting for data (to = %ld)\n",
1426 rqstp, timeout);
1427
1428 if (rqstp->rq_sock)
1429 printk(KERN_ERR
1430 "svc_recv: service %p, socket not NULL!\n",
1431 rqstp);
1432 if (waitqueue_active(&rqstp->rq_wait))
1433 printk(KERN_ERR
1434 "svc_recv: service %p, wait queue active!\n",
1435 rqstp);
1436
1437
1438 /* now allocate needed pages. If we get a failure, sleep briefly */
1439 pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
1440 for (i=0; i < pages ; i++)
1441 while (rqstp->rq_pages[i] == NULL) {
1442 struct page *p = alloc_page(GFP_KERNEL);
1443 if (!p)
1444 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
1445 rqstp->rq_pages[i] = p;
1446 }
1447 rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
1448 BUG_ON(pages >= RPCSVC_MAXPAGES);
1449
1450 /* Make arg->head point to first page and arg->pages point to rest */
1451 arg = &rqstp->rq_arg;
1452 arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
1453 arg->head[0].iov_len = PAGE_SIZE;
1454 arg->pages = rqstp->rq_pages + 1;
1455 arg->page_base = 0;
1456 /* save at least one page for response */
1457 arg->page_len = (pages-2)*PAGE_SIZE;
1458 arg->len = (pages-1)*PAGE_SIZE;
1459 arg->tail[0].iov_len = 0;
1460
1461 try_to_freeze();
1462 cond_resched();
1463 if (signalled())
1464 return -EINTR;
1465
1466 spin_lock_bh(&pool->sp_lock);
1467 if ((svsk = svc_sock_dequeue(pool)) != NULL) {
1468 rqstp->rq_sock = svsk;
1469 atomic_inc(&svsk->sk_inuse);
1470 rqstp->rq_reserved = serv->sv_max_mesg;
1471 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
1472 } else {
1473 /* No data pending. Go to sleep */
1474 svc_thread_enqueue(pool, rqstp);
1475
1476 /*
1477 * We have to be able to interrupt this wait
1478 * to bring down the daemons ...
1479 */
1480 set_current_state(TASK_INTERRUPTIBLE);
1481 add_wait_queue(&rqstp->rq_wait, &wait);
1482 spin_unlock_bh(&pool->sp_lock);
1483
1484 schedule_timeout(timeout);
1485
1486 try_to_freeze();
1487
1488 spin_lock_bh(&pool->sp_lock);
1489 remove_wait_queue(&rqstp->rq_wait, &wait);
1490
1491 if (!(svsk = rqstp->rq_sock)) {
1492 svc_thread_dequeue(pool, rqstp);
1493 spin_unlock_bh(&pool->sp_lock);
1494 dprintk("svc: server %p, no data yet\n", rqstp);
1495 return signalled()? -EINTR : -EAGAIN;
1496 }
1497 }
1498 spin_unlock_bh(&pool->sp_lock);
1499
1500 dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
1501 rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
1502 len = svsk->sk_recvfrom(rqstp);
1503 dprintk("svc: got len=%d\n", len);
1504
1505 /* No data, incomplete (TCP) read, or accept() */
1506 if (len == 0 || len == -EAGAIN) {
1507 rqstp->rq_res.len = 0;
1508 svc_sock_release(rqstp);
1509 return -EAGAIN;
1510 }
1511 svsk->sk_lastrecv = get_seconds();
1512 clear_bit(SK_OLD, &svsk->sk_flags);
1513
1514 rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
1515 rqstp->rq_chandle.defer = svc_defer;
1516
1517 if (serv->sv_stats)
1518 serv->sv_stats->netcnt++;
1519 return len;
1520}
1521
1522/*
1523 * Drop request
1524 */
1525void
1526svc_drop(struct svc_rqst *rqstp)
1527{
1528 dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
1529 svc_sock_release(rqstp);
1530}
1531
1532/*
1533 * Return reply to client.
1534 */
1535int
1536svc_send(struct svc_rqst *rqstp)
1537{
1538 struct svc_sock *svsk;
1539 int len;
1540 struct xdr_buf *xb;
1541
1542 if ((svsk = rqstp->rq_sock) == NULL) {
1543 printk(KERN_WARNING "NULL socket pointer in %s:%d\n",
1544 __FILE__, __LINE__);
1545 return -EFAULT;
1546 }
1547
1548 /* release the receive skb before sending the reply */
1549 svc_release_skb(rqstp);
1550
1551 /* calculate over-all length */
1552 xb = & rqstp->rq_res;
1553 xb->len = xb->head[0].iov_len +
1554 xb->page_len +
1555 xb->tail[0].iov_len;
1556
1557 /* Grab svsk->sk_mutex to serialize outgoing data. */
1558 mutex_lock(&svsk->sk_mutex);
1559 if (test_bit(SK_DEAD, &svsk->sk_flags))
1560 len = -ENOTCONN;
1561 else
1562 len = svsk->sk_sendto(rqstp);
1563 mutex_unlock(&svsk->sk_mutex);
1564 svc_sock_release(rqstp);
1565
1566 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
1567 return 0;
1568 return len;
1569}
1570
1571/*
1572 * Timer function to close old temporary sockets, using
1573 * a mark-and-sweep algorithm.
1574 */
1575static void
1576svc_age_temp_sockets(unsigned long closure)
1577{
1578 struct svc_serv *serv = (struct svc_serv *)closure;
1579 struct svc_sock *svsk;
1580 struct list_head *le, *next;
1581 LIST_HEAD(to_be_aged);
1582
1583 dprintk("svc_age_temp_sockets\n");
1584
1585 if (!spin_trylock_bh(&serv->sv_lock)) {
1586 /* busy, try again 1 sec later */
1587 dprintk("svc_age_temp_sockets: busy\n");
1588 mod_timer(&serv->sv_temptimer, jiffies + HZ);
1589 return;
1590 }
1591
1592 list_for_each_safe(le, next, &serv->sv_tempsocks) {
1593 svsk = list_entry(le, struct svc_sock, sk_list);
1594
1595 if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
1596 continue;
1597 if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags))
1598 continue;
1599 atomic_inc(&svsk->sk_inuse);
1600 list_move(le, &to_be_aged);
1601 set_bit(SK_CLOSE, &svsk->sk_flags);
1602 set_bit(SK_DETACHED, &svsk->sk_flags);
1603 }
1604 spin_unlock_bh(&serv->sv_lock);
1605
1606 while (!list_empty(&to_be_aged)) {
1607 le = to_be_aged.next;
1608 /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */
1609 list_del_init(le);
1610 svsk = list_entry(le, struct svc_sock, sk_list);
1611
1612 dprintk("queuing svsk %p for closing, %lu seconds old\n",
1613 svsk, get_seconds() - svsk->sk_lastrecv);
1614
1615 /* a thread will dequeue and close it soon */
1616 svc_sock_enqueue(svsk);
1617 svc_sock_put(svsk);
1618 }
1619
1620 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
1621}
1622
1623/*
1624 * Initialize socket for RPC use and create svc_sock struct 1106 * Initialize socket for RPC use and create svc_sock struct
1625 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. 1107 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
1626 */ 1108 */
@@ -1631,7 +1113,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1631 struct svc_sock *svsk; 1113 struct svc_sock *svsk;
1632 struct sock *inet; 1114 struct sock *inet;
1633 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1115 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1634 int is_temporary = flags & SVC_SOCK_TEMPORARY;
1635 1116
1636 dprintk("svc: svc_setup_socket %p\n", sock); 1117 dprintk("svc: svc_setup_socket %p\n", sock);
1637 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1118 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1651,44 +1132,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1651 return NULL; 1132 return NULL;
1652 } 1133 }
1653 1134
1654 set_bit(SK_BUSY, &svsk->sk_flags);
1655 inet->sk_user_data = svsk; 1135 inet->sk_user_data = svsk;
1656 svsk->sk_sock = sock; 1136 svsk->sk_sock = sock;
1657 svsk->sk_sk = inet; 1137 svsk->sk_sk = inet;
1658 svsk->sk_ostate = inet->sk_state_change; 1138 svsk->sk_ostate = inet->sk_state_change;
1659 svsk->sk_odata = inet->sk_data_ready; 1139 svsk->sk_odata = inet->sk_data_ready;
1660 svsk->sk_owspace = inet->sk_write_space; 1140 svsk->sk_owspace = inet->sk_write_space;
1661 svsk->sk_server = serv;
1662 atomic_set(&svsk->sk_inuse, 1);
1663 svsk->sk_lastrecv = get_seconds();
1664 spin_lock_init(&svsk->sk_lock);
1665 INIT_LIST_HEAD(&svsk->sk_deferred);
1666 INIT_LIST_HEAD(&svsk->sk_ready);
1667 mutex_init(&svsk->sk_mutex);
1668 1141
1669 /* Initialize the socket */ 1142 /* Initialize the socket */
1670 if (sock->type == SOCK_DGRAM) 1143 if (sock->type == SOCK_DGRAM)
1671 svc_udp_init(svsk); 1144 svc_udp_init(svsk, serv);
1672 else 1145 else
1673 svc_tcp_init(svsk); 1146 svc_tcp_init(svsk, serv);
1674
1675 spin_lock_bh(&serv->sv_lock);
1676 if (is_temporary) {
1677 set_bit(SK_TEMP, &svsk->sk_flags);
1678 list_add(&svsk->sk_list, &serv->sv_tempsocks);
1679 serv->sv_tmpcnt++;
1680 if (serv->sv_temptimer.function == NULL) {
1681 /* setup timer to age temp sockets */
1682 setup_timer(&serv->sv_temptimer, svc_age_temp_sockets,
1683 (unsigned long)serv);
1684 mod_timer(&serv->sv_temptimer,
1685 jiffies + svc_conn_age_period * HZ);
1686 }
1687 } else {
1688 clear_bit(SK_TEMP, &svsk->sk_flags);
1689 list_add(&svsk->sk_list, &serv->sv_permsocks);
1690 }
1691 spin_unlock_bh(&serv->sv_lock);
1692 1147
1693 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1148 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1694 svsk, svsk->sk_sk); 1149 svsk, svsk->sk_sk);
@@ -1717,7 +1172,16 @@ int svc_addsock(struct svc_serv *serv,
1717 else { 1172 else {
1718 svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); 1173 svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
1719 if (svsk) { 1174 if (svsk) {
1720 svc_sock_received(svsk); 1175 struct sockaddr_storage addr;
1176 struct sockaddr *sin = (struct sockaddr *)&addr;
1177 int salen;
1178 if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
1179 svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
1180 clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
1181 spin_lock_bh(&serv->sv_lock);
1182 list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks);
1183 spin_unlock_bh(&serv->sv_lock);
1184 svc_xprt_received(&svsk->sk_xprt);
1721 err = 0; 1185 err = 0;
1722 } 1186 }
1723 } 1187 }
@@ -1733,14 +1197,19 @@ EXPORT_SYMBOL_GPL(svc_addsock);
1733/* 1197/*
1734 * Create socket for RPC service. 1198 * Create socket for RPC service.
1735 */ 1199 */
1736static int svc_create_socket(struct svc_serv *serv, int protocol, 1200static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1737 struct sockaddr *sin, int len, int flags) 1201 int protocol,
1202 struct sockaddr *sin, int len,
1203 int flags)
1738{ 1204{
1739 struct svc_sock *svsk; 1205 struct svc_sock *svsk;
1740 struct socket *sock; 1206 struct socket *sock;
1741 int error; 1207 int error;
1742 int type; 1208 int type;
1743 char buf[RPC_MAX_ADDRBUFLEN]; 1209 char buf[RPC_MAX_ADDRBUFLEN];
1210 struct sockaddr_storage addr;
1211 struct sockaddr *newsin = (struct sockaddr *)&addr;
1212 int newlen;
1744 1213
1745 dprintk("svc: svc_create_socket(%s, %d, %s)\n", 1214 dprintk("svc: svc_create_socket(%s, %d, %s)\n",
1746 serv->sv_program->pg_name, protocol, 1215 serv->sv_program->pg_name, protocol,
@@ -1749,13 +1218,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol,
1749 if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { 1218 if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
1750 printk(KERN_WARNING "svc: only UDP and TCP " 1219 printk(KERN_WARNING "svc: only UDP and TCP "
1751 "sockets supported\n"); 1220 "sockets supported\n");
1752 return -EINVAL; 1221 return ERR_PTR(-EINVAL);
1753 } 1222 }
1754 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 1223 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1755 1224
1756 error = sock_create_kern(sin->sa_family, type, protocol, &sock); 1225 error = sock_create_kern(sin->sa_family, type, protocol, &sock);
1757 if (error < 0) 1226 if (error < 0)
1758 return error; 1227 return ERR_PTR(error);
1759 1228
1760 svc_reclassify_socket(sock); 1229 svc_reclassify_socket(sock);
1761 1230
@@ -1765,203 +1234,55 @@ static int svc_create_socket(struct svc_serv *serv, int protocol,
1765 if (error < 0) 1234 if (error < 0)
1766 goto bummer; 1235 goto bummer;
1767 1236
1237 newlen = len;
1238 error = kernel_getsockname(sock, newsin, &newlen);
1239 if (error < 0)
1240 goto bummer;
1241
1768 if (protocol == IPPROTO_TCP) { 1242 if (protocol == IPPROTO_TCP) {
1769 if ((error = kernel_listen(sock, 64)) < 0) 1243 if ((error = kernel_listen(sock, 64)) < 0)
1770 goto bummer; 1244 goto bummer;
1771 } 1245 }
1772 1246
1773 if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { 1247 if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
1774 svc_sock_received(svsk); 1248 svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
1775 return ntohs(inet_sk(svsk->sk_sk)->sport); 1249 return (struct svc_xprt *)svsk;
1776 } 1250 }
1777 1251
1778bummer: 1252bummer:
1779 dprintk("svc: svc_create_socket error = %d\n", -error); 1253 dprintk("svc: svc_create_socket error = %d\n", -error);
1780 sock_release(sock); 1254 sock_release(sock);
1781 return error; 1255 return ERR_PTR(error);
1782} 1256}
1783 1257
1784/* 1258/*
1785 * Remove a dead socket 1259 * Detach the svc_sock from the socket so that no
1260 * more callbacks occur.
1786 */ 1261 */
1787static void 1262static void svc_sock_detach(struct svc_xprt *xprt)
1788svc_delete_socket(struct svc_sock *svsk)
1789{ 1263{
1790 struct svc_serv *serv; 1264 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1791 struct sock *sk; 1265 struct sock *sk = svsk->sk_sk;
1792
1793 dprintk("svc: svc_delete_socket(%p)\n", svsk);
1794 1266
1795 serv = svsk->sk_server; 1267 dprintk("svc: svc_sock_detach(%p)\n", svsk);
1796 sk = svsk->sk_sk;
1797 1268
1269 /* put back the old socket callbacks */
1798 sk->sk_state_change = svsk->sk_ostate; 1270 sk->sk_state_change = svsk->sk_ostate;
1799 sk->sk_data_ready = svsk->sk_odata; 1271 sk->sk_data_ready = svsk->sk_odata;
1800 sk->sk_write_space = svsk->sk_owspace; 1272 sk->sk_write_space = svsk->sk_owspace;
1801
1802 spin_lock_bh(&serv->sv_lock);
1803
1804 if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
1805 list_del_init(&svsk->sk_list);
1806 /*
1807 * We used to delete the svc_sock from whichever list
1808 * it's sk_ready node was on, but we don't actually
1809 * need to. This is because the only time we're called
1810 * while still attached to a queue, the queue itself
1811 * is about to be destroyed (in svc_destroy).
1812 */
1813 if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
1814 BUG_ON(atomic_read(&svsk->sk_inuse)<2);
1815 atomic_dec(&svsk->sk_inuse);
1816 if (test_bit(SK_TEMP, &svsk->sk_flags))
1817 serv->sv_tmpcnt--;
1818 }
1819
1820 spin_unlock_bh(&serv->sv_lock);
1821}
1822
1823static void svc_close_socket(struct svc_sock *svsk)
1824{
1825 set_bit(SK_CLOSE, &svsk->sk_flags);
1826 if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
1827 /* someone else will have to effect the close */
1828 return;
1829
1830 atomic_inc(&svsk->sk_inuse);
1831 svc_delete_socket(svsk);
1832 clear_bit(SK_BUSY, &svsk->sk_flags);
1833 svc_sock_put(svsk);
1834}
1835
1836void svc_force_close_socket(struct svc_sock *svsk)
1837{
1838 set_bit(SK_CLOSE, &svsk->sk_flags);
1839 if (test_bit(SK_BUSY, &svsk->sk_flags)) {
1840 /* Waiting to be processed, but no threads left,
1841 * So just remove it from the waiting list
1842 */
1843 list_del_init(&svsk->sk_ready);
1844 clear_bit(SK_BUSY, &svsk->sk_flags);
1845 }
1846 svc_close_socket(svsk);
1847}
1848
1849/**
1850 * svc_makesock - Make a socket for nfsd and lockd
1851 * @serv: RPC server structure
1852 * @protocol: transport protocol to use
1853 * @port: port to use
1854 * @flags: requested socket characteristics
1855 *
1856 */
1857int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port,
1858 int flags)
1859{
1860 struct sockaddr_in sin = {
1861 .sin_family = AF_INET,
1862 .sin_addr.s_addr = INADDR_ANY,
1863 .sin_port = htons(port),
1864 };
1865
1866 dprintk("svc: creating socket proto = %d\n", protocol);
1867 return svc_create_socket(serv, protocol, (struct sockaddr *) &sin,
1868 sizeof(sin), flags);
1869} 1273}
1870 1274
1871/* 1275/*
1872 * Handle defer and revisit of requests 1276 * Free the svc_sock's socket resources and the svc_sock itself.
1873 */ 1277 */
1874 1278static void svc_sock_free(struct svc_xprt *xprt)
1875static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
1876{ 1279{
1877 struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); 1280 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1878 struct svc_sock *svsk; 1281 dprintk("svc: svc_sock_free(%p)\n", svsk);
1879 1282
1880 if (too_many) { 1283 if (svsk->sk_sock->file)
1881 svc_sock_put(dr->svsk); 1284 sockfd_put(svsk->sk_sock);
1882 kfree(dr); 1285 else
1883 return; 1286 sock_release(svsk->sk_sock);
1884 } 1287 kfree(svsk);
1885 dprintk("revisit queued\n");
1886 svsk = dr->svsk;
1887 dr->svsk = NULL;
1888 spin_lock(&svsk->sk_lock);
1889 list_add(&dr->handle.recent, &svsk->sk_deferred);
1890 spin_unlock(&svsk->sk_lock);
1891 set_bit(SK_DEFERRED, &svsk->sk_flags);
1892 svc_sock_enqueue(svsk);
1893 svc_sock_put(svsk);
1894}
1895
1896static struct cache_deferred_req *
1897svc_defer(struct cache_req *req)
1898{
1899 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
1900 int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len);
1901 struct svc_deferred_req *dr;
1902
1903 if (rqstp->rq_arg.page_len)
1904 return NULL; /* if more than a page, give up FIXME */
1905 if (rqstp->rq_deferred) {
1906 dr = rqstp->rq_deferred;
1907 rqstp->rq_deferred = NULL;
1908 } else {
1909 int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1910 /* FIXME maybe discard if size too large */
1911 dr = kmalloc(size, GFP_KERNEL);
1912 if (dr == NULL)
1913 return NULL;
1914
1915 dr->handle.owner = rqstp->rq_server;
1916 dr->prot = rqstp->rq_prot;
1917 memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen);
1918 dr->addrlen = rqstp->rq_addrlen;
1919 dr->daddr = rqstp->rq_daddr;
1920 dr->argslen = rqstp->rq_arg.len >> 2;
1921 memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
1922 }
1923 atomic_inc(&rqstp->rq_sock->sk_inuse);
1924 dr->svsk = rqstp->rq_sock;
1925
1926 dr->handle.revisit = svc_revisit;
1927 return &dr->handle;
1928}
1929
1930/*
1931 * recv data from a deferred request into an active one
1932 */
1933static int svc_deferred_recv(struct svc_rqst *rqstp)
1934{
1935 struct svc_deferred_req *dr = rqstp->rq_deferred;
1936
1937 rqstp->rq_arg.head[0].iov_base = dr->args;
1938 rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
1939 rqstp->rq_arg.page_len = 0;
1940 rqstp->rq_arg.len = dr->argslen<<2;
1941 rqstp->rq_prot = dr->prot;
1942 memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
1943 rqstp->rq_addrlen = dr->addrlen;
1944 rqstp->rq_daddr = dr->daddr;
1945 rqstp->rq_respages = rqstp->rq_pages;
1946 return dr->argslen<<2;
1947}
1948
1949
1950static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
1951{
1952 struct svc_deferred_req *dr = NULL;
1953
1954 if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
1955 return NULL;
1956 spin_lock(&svsk->sk_lock);
1957 clear_bit(SK_DEFERRED, &svsk->sk_flags);
1958 if (!list_empty(&svsk->sk_deferred)) {
1959 dr = list_entry(svsk->sk_deferred.next,
1960 struct svc_deferred_req,
1961 handle.recent);
1962 list_del_init(&dr->handle.recent);
1963 set_bit(SK_DEFERRED, &svsk->sk_flags);
1964 }
1965 spin_unlock(&svsk->sk_lock);
1966 return dr;
1967} 1288}