diff options
Diffstat (limited to 'net/sunrpc/svcsock.c')
-rw-r--r-- | net/sunrpc/svcsock.c | 1311 |
1 files changed, 316 insertions, 995 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c75bffeb89eb..1d3e5fcc2cc4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * The server scheduling algorithm does not always distribute the load | 6 | * The server scheduling algorithm does not always distribute the load |
7 | * evenly when servicing a single client. May need to modify the | 7 | * evenly when servicing a single client. May need to modify the |
8 | * svc_sock_enqueue procedure... | 8 | * svc_xprt_enqueue procedure... |
9 | * | 9 | * |
10 | * TCP support is largely untested and may be a little slow. The problem | 10 | * TCP support is largely untested and may be a little slow. The problem |
11 | * is that we currently do two separate recvfrom's, one for the 4-byte | 11 | * is that we currently do two separate recvfrom's, one for the 4-byte |
@@ -48,72 +48,40 @@ | |||
48 | #include <linux/sunrpc/svcsock.h> | 48 | #include <linux/sunrpc/svcsock.h> |
49 | #include <linux/sunrpc/stats.h> | 49 | #include <linux/sunrpc/stats.h> |
50 | 50 | ||
51 | /* SMP locking strategy: | 51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT |
52 | * | ||
53 | * svc_pool->sp_lock protects most of the fields of that pool. | ||
54 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
55 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
56 | * BKL protects svc_serv->sv_nrthread. | ||
57 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | ||
58 | * and the ->sk_info_authunix cache. | ||
59 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | ||
60 | * | ||
61 | * Some flags can be set to certain values at any time | ||
62 | * providing that certain rules are followed: | ||
63 | * | ||
64 | * SK_CONN, SK_DATA, can be set or cleared at any time. | ||
65 | * after a set, svc_sock_enqueue must be called. | ||
66 | * after a clear, the socket must be read/accepted | ||
67 | * if this succeeds, it must be set again. | ||
68 | * SK_CLOSE can set at any time. It is never cleared. | ||
69 | * sk_inuse contains a bias of '1' until SK_DEAD is set. | ||
70 | * so when sk_inuse hits zero, we know the socket is dead | ||
71 | * and no-one is using it. | ||
72 | * SK_DEAD can only be set while SK_BUSY is held which ensures | ||
73 | * no other thread will be using the socket or will try to | ||
74 | * set SK_DEAD. | ||
75 | * | ||
76 | */ | ||
77 | |||
78 | #define RPCDBG_FACILITY RPCDBG_SVCSOCK | ||
79 | 52 | ||
80 | 53 | ||
81 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, | 54 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, |
82 | int *errp, int flags); | 55 | int *errp, int flags); |
83 | static void svc_delete_socket(struct svc_sock *svsk); | ||
84 | static void svc_udp_data_ready(struct sock *, int); | 56 | static void svc_udp_data_ready(struct sock *, int); |
85 | static int svc_udp_recvfrom(struct svc_rqst *); | 57 | static int svc_udp_recvfrom(struct svc_rqst *); |
86 | static int svc_udp_sendto(struct svc_rqst *); | 58 | static int svc_udp_sendto(struct svc_rqst *); |
87 | static void svc_close_socket(struct svc_sock *svsk); | 59 | static void svc_sock_detach(struct svc_xprt *); |
88 | 60 | static void svc_sock_free(struct svc_xprt *); | |
89 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); | ||
90 | static int svc_deferred_recv(struct svc_rqst *rqstp); | ||
91 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | ||
92 | |||
93 | /* apparently the "standard" is that clients close | ||
94 | * idle connections after 5 minutes, servers after | ||
95 | * 6 minutes | ||
96 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
97 | */ | ||
98 | static int svc_conn_age_period = 6*60; | ||
99 | 61 | ||
62 | static struct svc_xprt *svc_create_socket(struct svc_serv *, int, | ||
63 | struct sockaddr *, int, int); | ||
100 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 64 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
101 | static struct lock_class_key svc_key[2]; | 65 | static struct lock_class_key svc_key[2]; |
102 | static struct lock_class_key svc_slock_key[2]; | 66 | static struct lock_class_key svc_slock_key[2]; |
103 | 67 | ||
104 | static inline void svc_reclassify_socket(struct socket *sock) | 68 | static void svc_reclassify_socket(struct socket *sock) |
105 | { | 69 | { |
106 | struct sock *sk = sock->sk; | 70 | struct sock *sk = sock->sk; |
107 | BUG_ON(sock_owned_by_user(sk)); | 71 | BUG_ON(sock_owned_by_user(sk)); |
108 | switch (sk->sk_family) { | 72 | switch (sk->sk_family) { |
109 | case AF_INET: | 73 | case AF_INET: |
110 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", | 74 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", |
111 | &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); | 75 | &svc_slock_key[0], |
76 | "sk_xprt.xpt_lock-AF_INET-NFSD", | ||
77 | &svc_key[0]); | ||
112 | break; | 78 | break; |
113 | 79 | ||
114 | case AF_INET6: | 80 | case AF_INET6: |
115 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", | 81 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", |
116 | &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); | 82 | &svc_slock_key[1], |
83 | "sk_xprt.xpt_lock-AF_INET6-NFSD", | ||
84 | &svc_key[1]); | ||
117 | break; | 85 | break; |
118 | 86 | ||
119 | default: | 87 | default: |
@@ -121,81 +89,26 @@ static inline void svc_reclassify_socket(struct socket *sock) | |||
121 | } | 89 | } |
122 | } | 90 | } |
123 | #else | 91 | #else |
124 | static inline void svc_reclassify_socket(struct socket *sock) | 92 | static void svc_reclassify_socket(struct socket *sock) |
125 | { | 93 | { |
126 | } | 94 | } |
127 | #endif | 95 | #endif |
128 | 96 | ||
129 | static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len) | ||
130 | { | ||
131 | switch (addr->sa_family) { | ||
132 | case AF_INET: | ||
133 | snprintf(buf, len, "%u.%u.%u.%u, port=%u", | ||
134 | NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), | ||
135 | ntohs(((struct sockaddr_in *) addr)->sin_port)); | ||
136 | break; | ||
137 | |||
138 | case AF_INET6: | ||
139 | snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", | ||
140 | NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), | ||
141 | ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); | ||
142 | break; | ||
143 | |||
144 | default: | ||
145 | snprintf(buf, len, "unknown address type: %d", addr->sa_family); | ||
146 | break; | ||
147 | } | ||
148 | return buf; | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * svc_print_addr - Format rq_addr field for printing | ||
153 | * @rqstp: svc_rqst struct containing address to print | ||
154 | * @buf: target buffer for formatted address | ||
155 | * @len: length of target buffer | ||
156 | * | ||
157 | */ | ||
158 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) | ||
159 | { | ||
160 | return __svc_print_addr(svc_addr(rqstp), buf, len); | ||
161 | } | ||
162 | EXPORT_SYMBOL_GPL(svc_print_addr); | ||
163 | |||
164 | /* | ||
165 | * Queue up an idle server thread. Must have pool->sp_lock held. | ||
166 | * Note: this is really a stack rather than a queue, so that we only | ||
167 | * use as many different threads as we need, and the rest don't pollute | ||
168 | * the cache. | ||
169 | */ | ||
170 | static inline void | ||
171 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
172 | { | ||
173 | list_add(&rqstp->rq_list, &pool->sp_threads); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | ||
178 | */ | ||
179 | static inline void | ||
180 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
181 | { | ||
182 | list_del(&rqstp->rq_list); | ||
183 | } | ||
184 | |||
185 | /* | 97 | /* |
186 | * Release an skbuff after use | 98 | * Release an skbuff after use |
187 | */ | 99 | */ |
188 | static inline void | 100 | static void svc_release_skb(struct svc_rqst *rqstp) |
189 | svc_release_skb(struct svc_rqst *rqstp) | ||
190 | { | 101 | { |
191 | struct sk_buff *skb = rqstp->rq_skbuff; | 102 | struct sk_buff *skb = rqstp->rq_xprt_ctxt; |
192 | struct svc_deferred_req *dr = rqstp->rq_deferred; | 103 | struct svc_deferred_req *dr = rqstp->rq_deferred; |
193 | 104 | ||
194 | if (skb) { | 105 | if (skb) { |
195 | rqstp->rq_skbuff = NULL; | 106 | struct svc_sock *svsk = |
107 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
108 | rqstp->rq_xprt_ctxt = NULL; | ||
196 | 109 | ||
197 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); | 110 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); |
198 | skb_free_datagram(rqstp->rq_sock->sk_sk, skb); | 111 | skb_free_datagram(svsk->sk_sk, skb); |
199 | } | 112 | } |
200 | if (dr) { | 113 | if (dr) { |
201 | rqstp->rq_deferred = NULL; | 114 | rqstp->rq_deferred = NULL; |
@@ -203,253 +116,6 @@ svc_release_skb(struct svc_rqst *rqstp) | |||
203 | } | 116 | } |
204 | } | 117 | } |
205 | 118 | ||
206 | /* | ||
207 | * Any space to write? | ||
208 | */ | ||
209 | static inline unsigned long | ||
210 | svc_sock_wspace(struct svc_sock *svsk) | ||
211 | { | ||
212 | int wspace; | ||
213 | |||
214 | if (svsk->sk_sock->type == SOCK_STREAM) | ||
215 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
216 | else | ||
217 | wspace = sock_wspace(svsk->sk_sk); | ||
218 | |||
219 | return wspace; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Queue up a socket with data pending. If there are idle nfsd | ||
224 | * processes, wake 'em up. | ||
225 | * | ||
226 | */ | ||
227 | static void | ||
228 | svc_sock_enqueue(struct svc_sock *svsk) | ||
229 | { | ||
230 | struct svc_serv *serv = svsk->sk_server; | ||
231 | struct svc_pool *pool; | ||
232 | struct svc_rqst *rqstp; | ||
233 | int cpu; | ||
234 | |||
235 | if (!(svsk->sk_flags & | ||
236 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) | ||
237 | return; | ||
238 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
239 | return; | ||
240 | |||
241 | cpu = get_cpu(); | ||
242 | pool = svc_pool_for_cpu(svsk->sk_server, cpu); | ||
243 | put_cpu(); | ||
244 | |||
245 | spin_lock_bh(&pool->sp_lock); | ||
246 | |||
247 | if (!list_empty(&pool->sp_threads) && | ||
248 | !list_empty(&pool->sp_sockets)) | ||
249 | printk(KERN_ERR | ||
250 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | ||
251 | |||
252 | if (test_bit(SK_DEAD, &svsk->sk_flags)) { | ||
253 | /* Don't enqueue dead sockets */ | ||
254 | dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); | ||
255 | goto out_unlock; | ||
256 | } | ||
257 | |||
258 | /* Mark socket as busy. It will remain in this state until the | ||
259 | * server has processed all pending data and put the socket back | ||
260 | * on the idle list. We update SK_BUSY atomically because | ||
261 | * it also guards against trying to enqueue the svc_sock twice. | ||
262 | */ | ||
263 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { | ||
264 | /* Don't enqueue socket while already enqueued */ | ||
265 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | ||
266 | goto out_unlock; | ||
267 | } | ||
268 | BUG_ON(svsk->sk_pool != NULL); | ||
269 | svsk->sk_pool = pool; | ||
270 | |||
271 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
272 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 | ||
273 | > svc_sock_wspace(svsk)) | ||
274 | && !test_bit(SK_CLOSE, &svsk->sk_flags) | ||
275 | && !test_bit(SK_CONN, &svsk->sk_flags)) { | ||
276 | /* Don't enqueue while not enough space for reply */ | ||
277 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | ||
278 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, | ||
279 | svc_sock_wspace(svsk)); | ||
280 | svsk->sk_pool = NULL; | ||
281 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
282 | goto out_unlock; | ||
283 | } | ||
284 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
285 | |||
286 | |||
287 | if (!list_empty(&pool->sp_threads)) { | ||
288 | rqstp = list_entry(pool->sp_threads.next, | ||
289 | struct svc_rqst, | ||
290 | rq_list); | ||
291 | dprintk("svc: socket %p served by daemon %p\n", | ||
292 | svsk->sk_sk, rqstp); | ||
293 | svc_thread_dequeue(pool, rqstp); | ||
294 | if (rqstp->rq_sock) | ||
295 | printk(KERN_ERR | ||
296 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | ||
297 | rqstp, rqstp->rq_sock); | ||
298 | rqstp->rq_sock = svsk; | ||
299 | atomic_inc(&svsk->sk_inuse); | ||
300 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
301 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | ||
302 | BUG_ON(svsk->sk_pool != pool); | ||
303 | wake_up(&rqstp->rq_wait); | ||
304 | } else { | ||
305 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | ||
306 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); | ||
307 | BUG_ON(svsk->sk_pool != pool); | ||
308 | } | ||
309 | |||
310 | out_unlock: | ||
311 | spin_unlock_bh(&pool->sp_lock); | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * Dequeue the first socket. Must be called with the pool->sp_lock held. | ||
316 | */ | ||
317 | static inline struct svc_sock * | ||
318 | svc_sock_dequeue(struct svc_pool *pool) | ||
319 | { | ||
320 | struct svc_sock *svsk; | ||
321 | |||
322 | if (list_empty(&pool->sp_sockets)) | ||
323 | return NULL; | ||
324 | |||
325 | svsk = list_entry(pool->sp_sockets.next, | ||
326 | struct svc_sock, sk_ready); | ||
327 | list_del_init(&svsk->sk_ready); | ||
328 | |||
329 | dprintk("svc: socket %p dequeued, inuse=%d\n", | ||
330 | svsk->sk_sk, atomic_read(&svsk->sk_inuse)); | ||
331 | |||
332 | return svsk; | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Having read something from a socket, check whether it | ||
337 | * needs to be re-enqueued. | ||
338 | * Note: SK_DATA only gets cleared when a read-attempt finds | ||
339 | * no (or insufficient) data. | ||
340 | */ | ||
341 | static inline void | ||
342 | svc_sock_received(struct svc_sock *svsk) | ||
343 | { | ||
344 | svsk->sk_pool = NULL; | ||
345 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
346 | svc_sock_enqueue(svsk); | ||
347 | } | ||
348 | |||
349 | |||
350 | /** | ||
351 | * svc_reserve - change the space reserved for the reply to a request. | ||
352 | * @rqstp: The request in question | ||
353 | * @space: new max space to reserve | ||
354 | * | ||
355 | * Each request reserves some space on the output queue of the socket | ||
356 | * to make sure the reply fits. This function reduces that reserved | ||
357 | * space to be the amount of space used already, plus @space. | ||
358 | * | ||
359 | */ | ||
360 | void svc_reserve(struct svc_rqst *rqstp, int space) | ||
361 | { | ||
362 | space += rqstp->rq_res.head[0].iov_len; | ||
363 | |||
364 | if (space < rqstp->rq_reserved) { | ||
365 | struct svc_sock *svsk = rqstp->rq_sock; | ||
366 | atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved); | ||
367 | rqstp->rq_reserved = space; | ||
368 | |||
369 | svc_sock_enqueue(svsk); | ||
370 | } | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Release a socket after use. | ||
375 | */ | ||
376 | static inline void | ||
377 | svc_sock_put(struct svc_sock *svsk) | ||
378 | { | ||
379 | if (atomic_dec_and_test(&svsk->sk_inuse)) { | ||
380 | BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); | ||
381 | |||
382 | dprintk("svc: releasing dead socket\n"); | ||
383 | if (svsk->sk_sock->file) | ||
384 | sockfd_put(svsk->sk_sock); | ||
385 | else | ||
386 | sock_release(svsk->sk_sock); | ||
387 | if (svsk->sk_info_authunix != NULL) | ||
388 | svcauth_unix_info_release(svsk->sk_info_authunix); | ||
389 | kfree(svsk); | ||
390 | } | ||
391 | } | ||
392 | |||
393 | static void | ||
394 | svc_sock_release(struct svc_rqst *rqstp) | ||
395 | { | ||
396 | struct svc_sock *svsk = rqstp->rq_sock; | ||
397 | |||
398 | svc_release_skb(rqstp); | ||
399 | |||
400 | svc_free_res_pages(rqstp); | ||
401 | rqstp->rq_res.page_len = 0; | ||
402 | rqstp->rq_res.page_base = 0; | ||
403 | |||
404 | |||
405 | /* Reset response buffer and release | ||
406 | * the reservation. | ||
407 | * But first, check that enough space was reserved | ||
408 | * for the reply, otherwise we have a bug! | ||
409 | */ | ||
410 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | ||
411 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | ||
412 | rqstp->rq_reserved, | ||
413 | rqstp->rq_res.len); | ||
414 | |||
415 | rqstp->rq_res.head[0].iov_len = 0; | ||
416 | svc_reserve(rqstp, 0); | ||
417 | rqstp->rq_sock = NULL; | ||
418 | |||
419 | svc_sock_put(svsk); | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * External function to wake up a server waiting for data | ||
424 | * This really only makes sense for services like lockd | ||
425 | * which have exactly one thread anyway. | ||
426 | */ | ||
427 | void | ||
428 | svc_wake_up(struct svc_serv *serv) | ||
429 | { | ||
430 | struct svc_rqst *rqstp; | ||
431 | unsigned int i; | ||
432 | struct svc_pool *pool; | ||
433 | |||
434 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
435 | pool = &serv->sv_pools[i]; | ||
436 | |||
437 | spin_lock_bh(&pool->sp_lock); | ||
438 | if (!list_empty(&pool->sp_threads)) { | ||
439 | rqstp = list_entry(pool->sp_threads.next, | ||
440 | struct svc_rqst, | ||
441 | rq_list); | ||
442 | dprintk("svc: daemon %p woken up.\n", rqstp); | ||
443 | /* | ||
444 | svc_thread_dequeue(pool, rqstp); | ||
445 | rqstp->rq_sock = NULL; | ||
446 | */ | ||
447 | wake_up(&rqstp->rq_wait); | ||
448 | } | ||
449 | spin_unlock_bh(&pool->sp_lock); | ||
450 | } | ||
451 | } | ||
452 | |||
453 | union svc_pktinfo_u { | 119 | union svc_pktinfo_u { |
454 | struct in_pktinfo pkti; | 120 | struct in_pktinfo pkti; |
455 | struct in6_pktinfo pkti6; | 121 | struct in6_pktinfo pkti6; |
@@ -459,7 +125,9 @@ union svc_pktinfo_u { | |||
459 | 125 | ||
460 | static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) | 126 | static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) |
461 | { | 127 | { |
462 | switch (rqstp->rq_sock->sk_sk->sk_family) { | 128 | struct svc_sock *svsk = |
129 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
130 | switch (svsk->sk_sk->sk_family) { | ||
463 | case AF_INET: { | 131 | case AF_INET: { |
464 | struct in_pktinfo *pki = CMSG_DATA(cmh); | 132 | struct in_pktinfo *pki = CMSG_DATA(cmh); |
465 | 133 | ||
@@ -489,10 +157,10 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) | |||
489 | /* | 157 | /* |
490 | * Generic sendto routine | 158 | * Generic sendto routine |
491 | */ | 159 | */ |
492 | static int | 160 | static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) |
493 | svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | ||
494 | { | 161 | { |
495 | struct svc_sock *svsk = rqstp->rq_sock; | 162 | struct svc_sock *svsk = |
163 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
496 | struct socket *sock = svsk->sk_sock; | 164 | struct socket *sock = svsk->sk_sock; |
497 | int slen; | 165 | int slen; |
498 | union { | 166 | union { |
@@ -565,7 +233,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | |||
565 | } | 233 | } |
566 | out: | 234 | out: |
567 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", | 235 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", |
568 | rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, | 236 | svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, |
569 | xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); | 237 | xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); |
570 | 238 | ||
571 | return len; | 239 | return len; |
@@ -602,7 +270,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
602 | if (!serv) | 270 | if (!serv) |
603 | return 0; | 271 | return 0; |
604 | spin_lock_bh(&serv->sv_lock); | 272 | spin_lock_bh(&serv->sv_lock); |
605 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { | 273 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { |
606 | int onelen = one_sock_name(buf+len, svsk); | 274 | int onelen = one_sock_name(buf+len, svsk); |
607 | if (toclose && strcmp(toclose, buf+len) == 0) | 275 | if (toclose && strcmp(toclose, buf+len) == 0) |
608 | closesk = svsk; | 276 | closesk = svsk; |
@@ -614,7 +282,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
614 | /* Should unregister with portmap, but you cannot | 282 | /* Should unregister with portmap, but you cannot |
615 | * unregister just one protocol... | 283 | * unregister just one protocol... |
616 | */ | 284 | */ |
617 | svc_close_socket(closesk); | 285 | svc_close_xprt(&closesk->sk_xprt); |
618 | else if (toclose) | 286 | else if (toclose) |
619 | return -ENOENT; | 287 | return -ENOENT; |
620 | return len; | 288 | return len; |
@@ -624,8 +292,7 @@ EXPORT_SYMBOL(svc_sock_names); | |||
624 | /* | 292 | /* |
625 | * Check input queue length | 293 | * Check input queue length |
626 | */ | 294 | */ |
627 | static int | 295 | static int svc_recv_available(struct svc_sock *svsk) |
628 | svc_recv_available(struct svc_sock *svsk) | ||
629 | { | 296 | { |
630 | struct socket *sock = svsk->sk_sock; | 297 | struct socket *sock = svsk->sk_sock; |
631 | int avail, err; | 298 | int avail, err; |
@@ -638,48 +305,31 @@ svc_recv_available(struct svc_sock *svsk) | |||
638 | /* | 305 | /* |
639 | * Generic recvfrom routine. | 306 | * Generic recvfrom routine. |
640 | */ | 307 | */ |
641 | static int | 308 | static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, |
642 | svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) | 309 | int buflen) |
643 | { | 310 | { |
644 | struct svc_sock *svsk = rqstp->rq_sock; | 311 | struct svc_sock *svsk = |
312 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
645 | struct msghdr msg = { | 313 | struct msghdr msg = { |
646 | .msg_flags = MSG_DONTWAIT, | 314 | .msg_flags = MSG_DONTWAIT, |
647 | }; | 315 | }; |
648 | struct sockaddr *sin; | ||
649 | int len; | 316 | int len; |
650 | 317 | ||
318 | rqstp->rq_xprt_hlen = 0; | ||
319 | |||
651 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, | 320 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, |
652 | msg.msg_flags); | 321 | msg.msg_flags); |
653 | 322 | ||
654 | /* sock_recvmsg doesn't fill in the name/namelen, so we must.. | ||
655 | */ | ||
656 | memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen); | ||
657 | rqstp->rq_addrlen = svsk->sk_remotelen; | ||
658 | |||
659 | /* Destination address in request is needed for binding the | ||
660 | * source address in RPC callbacks later. | ||
661 | */ | ||
662 | sin = (struct sockaddr *)&svsk->sk_local; | ||
663 | switch (sin->sa_family) { | ||
664 | case AF_INET: | ||
665 | rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; | ||
666 | break; | ||
667 | case AF_INET6: | ||
668 | rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; | ||
669 | break; | ||
670 | } | ||
671 | |||
672 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", | 323 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", |
673 | svsk, iov[0].iov_base, iov[0].iov_len, len); | 324 | svsk, iov[0].iov_base, iov[0].iov_len, len); |
674 | |||
675 | return len; | 325 | return len; |
676 | } | 326 | } |
677 | 327 | ||
678 | /* | 328 | /* |
679 | * Set socket snd and rcv buffer lengths | 329 | * Set socket snd and rcv buffer lengths |
680 | */ | 330 | */ |
681 | static inline void | 331 | static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, |
682 | svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | 332 | unsigned int rcv) |
683 | { | 333 | { |
684 | #if 0 | 334 | #if 0 |
685 | mm_segment_t oldfs; | 335 | mm_segment_t oldfs; |
@@ -704,16 +354,16 @@ svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | |||
704 | /* | 354 | /* |
705 | * INET callback when data has been received on the socket. | 355 | * INET callback when data has been received on the socket. |
706 | */ | 356 | */ |
707 | static void | 357 | static void svc_udp_data_ready(struct sock *sk, int count) |
708 | svc_udp_data_ready(struct sock *sk, int count) | ||
709 | { | 358 | { |
710 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 359 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
711 | 360 | ||
712 | if (svsk) { | 361 | if (svsk) { |
713 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", | 362 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", |
714 | svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); | 363 | svsk, sk, count, |
715 | set_bit(SK_DATA, &svsk->sk_flags); | 364 | test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); |
716 | svc_sock_enqueue(svsk); | 365 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
366 | svc_xprt_enqueue(&svsk->sk_xprt); | ||
717 | } | 367 | } |
718 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 368 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
719 | wake_up_interruptible(sk->sk_sleep); | 369 | wake_up_interruptible(sk->sk_sleep); |
@@ -722,15 +372,14 @@ svc_udp_data_ready(struct sock *sk, int count) | |||
722 | /* | 372 | /* |
723 | * INET callback when space is newly available on the socket. | 373 | * INET callback when space is newly available on the socket. |
724 | */ | 374 | */ |
725 | static void | 375 | static void svc_write_space(struct sock *sk) |
726 | svc_write_space(struct sock *sk) | ||
727 | { | 376 | { |
728 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); | 377 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); |
729 | 378 | ||
730 | if (svsk) { | 379 | if (svsk) { |
731 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", | 380 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", |
732 | svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); | 381 | svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); |
733 | svc_sock_enqueue(svsk); | 382 | svc_xprt_enqueue(&svsk->sk_xprt); |
734 | } | 383 | } |
735 | 384 | ||
736 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { | 385 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { |
@@ -740,10 +389,19 @@ svc_write_space(struct sock *sk) | |||
740 | } | 389 | } |
741 | } | 390 | } |
742 | 391 | ||
743 | static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, | 392 | /* |
744 | struct cmsghdr *cmh) | 393 | * Copy the UDP datagram's destination address to the rqstp structure. |
394 | * The 'destination' address in this case is the address to which the | ||
395 | * peer sent the datagram, i.e. our local address. For multihomed | ||
396 | * hosts, this can change from msg to msg. Note that only the IP | ||
397 | * address changes, the port number should remain the same. | ||
398 | */ | ||
399 | static void svc_udp_get_dest_address(struct svc_rqst *rqstp, | ||
400 | struct cmsghdr *cmh) | ||
745 | { | 401 | { |
746 | switch (rqstp->rq_sock->sk_sk->sk_family) { | 402 | struct svc_sock *svsk = |
403 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
404 | switch (svsk->sk_sk->sk_family) { | ||
747 | case AF_INET: { | 405 | case AF_INET: { |
748 | struct in_pktinfo *pki = CMSG_DATA(cmh); | 406 | struct in_pktinfo *pki = CMSG_DATA(cmh); |
749 | rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; | 407 | rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; |
@@ -760,11 +418,11 @@ static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, | |||
760 | /* | 418 | /* |
761 | * Receive a datagram from a UDP socket. | 419 | * Receive a datagram from a UDP socket. |
762 | */ | 420 | */ |
763 | static int | 421 | static int svc_udp_recvfrom(struct svc_rqst *rqstp) |
764 | svc_udp_recvfrom(struct svc_rqst *rqstp) | ||
765 | { | 422 | { |
766 | struct svc_sock *svsk = rqstp->rq_sock; | 423 | struct svc_sock *svsk = |
767 | struct svc_serv *serv = svsk->sk_server; | 424 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); |
425 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
768 | struct sk_buff *skb; | 426 | struct sk_buff *skb; |
769 | union { | 427 | union { |
770 | struct cmsghdr hdr; | 428 | struct cmsghdr hdr; |
@@ -779,7 +437,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
779 | .msg_flags = MSG_DONTWAIT, | 437 | .msg_flags = MSG_DONTWAIT, |
780 | }; | 438 | }; |
781 | 439 | ||
782 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | 440 | if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) |
783 | /* udp sockets need large rcvbuf as all pending | 441 | /* udp sockets need large rcvbuf as all pending |
784 | * requests are still in that buffer. sndbuf must | 442 | * requests are still in that buffer. sndbuf must |
785 | * also be large enough that there is enough space | 443 | * also be large enough that there is enough space |
@@ -792,17 +450,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
792 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, | 450 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, |
793 | (serv->sv_nrthreads+3) * serv->sv_max_mesg); | 451 | (serv->sv_nrthreads+3) * serv->sv_max_mesg); |
794 | 452 | ||
795 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 453 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
796 | svc_sock_received(svsk); | ||
797 | return svc_deferred_recv(rqstp); | ||
798 | } | ||
799 | |||
800 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
801 | svc_delete_socket(svsk); | ||
802 | return 0; | ||
803 | } | ||
804 | |||
805 | clear_bit(SK_DATA, &svsk->sk_flags); | ||
806 | skb = NULL; | 454 | skb = NULL; |
807 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, | 455 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, |
808 | 0, 0, MSG_PEEK | MSG_DONTWAIT); | 456 | 0, 0, MSG_PEEK | MSG_DONTWAIT); |
@@ -813,24 +461,27 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
813 | if (err != -EAGAIN) { | 461 | if (err != -EAGAIN) { |
814 | /* possibly an icmp error */ | 462 | /* possibly an icmp error */ |
815 | dprintk("svc: recvfrom returned error %d\n", -err); | 463 | dprintk("svc: recvfrom returned error %d\n", -err); |
816 | set_bit(SK_DATA, &svsk->sk_flags); | 464 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
817 | } | 465 | } |
818 | svc_sock_received(svsk); | 466 | svc_xprt_received(&svsk->sk_xprt); |
819 | return -EAGAIN; | 467 | return -EAGAIN; |
820 | } | 468 | } |
821 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); | 469 | len = svc_addr_len(svc_addr(rqstp)); |
470 | if (len < 0) | ||
471 | return len; | ||
472 | rqstp->rq_addrlen = len; | ||
822 | if (skb->tstamp.tv64 == 0) { | 473 | if (skb->tstamp.tv64 == 0) { |
823 | skb->tstamp = ktime_get_real(); | 474 | skb->tstamp = ktime_get_real(); |
824 | /* Don't enable netstamp, sunrpc doesn't | 475 | /* Don't enable netstamp, sunrpc doesn't |
825 | need that much accuracy */ | 476 | need that much accuracy */ |
826 | } | 477 | } |
827 | svsk->sk_sk->sk_stamp = skb->tstamp; | 478 | svsk->sk_sk->sk_stamp = skb->tstamp; |
828 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ | 479 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ |
829 | 480 | ||
830 | /* | 481 | /* |
831 | * Maybe more packets - kick another thread ASAP. | 482 | * Maybe more packets - kick another thread ASAP. |
832 | */ | 483 | */ |
833 | svc_sock_received(svsk); | 484 | svc_xprt_received(&svsk->sk_xprt); |
834 | 485 | ||
835 | len = skb->len - sizeof(struct udphdr); | 486 | len = skb->len - sizeof(struct udphdr); |
836 | rqstp->rq_arg.len = len; | 487 | rqstp->rq_arg.len = len; |
@@ -861,13 +512,14 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
861 | skb_free_datagram(svsk->sk_sk, skb); | 512 | skb_free_datagram(svsk->sk_sk, skb); |
862 | } else { | 513 | } else { |
863 | /* we can use it in-place */ | 514 | /* we can use it in-place */ |
864 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); | 515 | rqstp->rq_arg.head[0].iov_base = skb->data + |
516 | sizeof(struct udphdr); | ||
865 | rqstp->rq_arg.head[0].iov_len = len; | 517 | rqstp->rq_arg.head[0].iov_len = len; |
866 | if (skb_checksum_complete(skb)) { | 518 | if (skb_checksum_complete(skb)) { |
867 | skb_free_datagram(svsk->sk_sk, skb); | 519 | skb_free_datagram(svsk->sk_sk, skb); |
868 | return 0; | 520 | return 0; |
869 | } | 521 | } |
870 | rqstp->rq_skbuff = skb; | 522 | rqstp->rq_xprt_ctxt = skb; |
871 | } | 523 | } |
872 | 524 | ||
873 | rqstp->rq_arg.page_base = 0; | 525 | rqstp->rq_arg.page_base = 0; |
@@ -900,27 +552,81 @@ svc_udp_sendto(struct svc_rqst *rqstp) | |||
900 | return error; | 552 | return error; |
901 | } | 553 | } |
902 | 554 | ||
903 | static void | 555 | static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp) |
904 | svc_udp_init(struct svc_sock *svsk) | 556 | { |
557 | } | ||
558 | |||
559 | static int svc_udp_has_wspace(struct svc_xprt *xprt) | ||
560 | { | ||
561 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
562 | struct svc_serv *serv = xprt->xpt_server; | ||
563 | unsigned long required; | ||
564 | |||
565 | /* | ||
566 | * Set the SOCK_NOSPACE flag before checking the available | ||
567 | * sock space. | ||
568 | */ | ||
569 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
570 | required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; | ||
571 | if (required*2 > sock_wspace(svsk->sk_sk)) | ||
572 | return 0; | ||
573 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
574 | return 1; | ||
575 | } | ||
576 | |||
577 | static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) | ||
578 | { | ||
579 | BUG(); | ||
580 | return NULL; | ||
581 | } | ||
582 | |||
583 | static struct svc_xprt *svc_udp_create(struct svc_serv *serv, | ||
584 | struct sockaddr *sa, int salen, | ||
585 | int flags) | ||
586 | { | ||
587 | return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); | ||
588 | } | ||
589 | |||
590 | static struct svc_xprt_ops svc_udp_ops = { | ||
591 | .xpo_create = svc_udp_create, | ||
592 | .xpo_recvfrom = svc_udp_recvfrom, | ||
593 | .xpo_sendto = svc_udp_sendto, | ||
594 | .xpo_release_rqst = svc_release_skb, | ||
595 | .xpo_detach = svc_sock_detach, | ||
596 | .xpo_free = svc_sock_free, | ||
597 | .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, | ||
598 | .xpo_has_wspace = svc_udp_has_wspace, | ||
599 | .xpo_accept = svc_udp_accept, | ||
600 | }; | ||
601 | |||
602 | static struct svc_xprt_class svc_udp_class = { | ||
603 | .xcl_name = "udp", | ||
604 | .xcl_owner = THIS_MODULE, | ||
605 | .xcl_ops = &svc_udp_ops, | ||
606 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, | ||
607 | }; | ||
608 | |||
609 | static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) | ||
905 | { | 610 | { |
906 | int one = 1; | 611 | int one = 1; |
907 | mm_segment_t oldfs; | 612 | mm_segment_t oldfs; |
908 | 613 | ||
614 | svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); | ||
615 | clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); | ||
909 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; | 616 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; |
910 | svsk->sk_sk->sk_write_space = svc_write_space; | 617 | svsk->sk_sk->sk_write_space = svc_write_space; |
911 | svsk->sk_recvfrom = svc_udp_recvfrom; | ||
912 | svsk->sk_sendto = svc_udp_sendto; | ||
913 | 618 | ||
914 | /* initialise setting must have enough space to | 619 | /* initialise setting must have enough space to |
915 | * receive and respond to one request. | 620 | * receive and respond to one request. |
916 | * svc_udp_recvfrom will re-adjust if necessary | 621 | * svc_udp_recvfrom will re-adjust if necessary |
917 | */ | 622 | */ |
918 | svc_sock_setbufsize(svsk->sk_sock, | 623 | svc_sock_setbufsize(svsk->sk_sock, |
919 | 3 * svsk->sk_server->sv_max_mesg, | 624 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, |
920 | 3 * svsk->sk_server->sv_max_mesg); | 625 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); |
921 | 626 | ||
922 | set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ | 627 | /* data might have come in before data_ready set up */ |
923 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 628 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
629 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); | ||
924 | 630 | ||
925 | oldfs = get_fs(); | 631 | oldfs = get_fs(); |
926 | set_fs(KERNEL_DS); | 632 | set_fs(KERNEL_DS); |
@@ -934,8 +640,7 @@ svc_udp_init(struct svc_sock *svsk) | |||
934 | * A data_ready event on a listening socket means there's a connection | 640 | * A data_ready event on a listening socket means there's a connection |
935 | * pending. Do not use state_change as a substitute for it. | 641 | * pending. Do not use state_change as a substitute for it. |
936 | */ | 642 | */ |
937 | static void | 643 | static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) |
938 | svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | ||
939 | { | 644 | { |
940 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 645 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
941 | 646 | ||
@@ -954,8 +659,8 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | |||
954 | */ | 659 | */ |
955 | if (sk->sk_state == TCP_LISTEN) { | 660 | if (sk->sk_state == TCP_LISTEN) { |
956 | if (svsk) { | 661 | if (svsk) { |
957 | set_bit(SK_CONN, &svsk->sk_flags); | 662 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
958 | svc_sock_enqueue(svsk); | 663 | svc_xprt_enqueue(&svsk->sk_xprt); |
959 | } else | 664 | } else |
960 | printk("svc: socket %p: no user data\n", sk); | 665 | printk("svc: socket %p: no user data\n", sk); |
961 | } | 666 | } |
@@ -967,8 +672,7 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | |||
967 | /* | 672 | /* |
968 | * A state change on a connected socket means it's dying or dead. | 673 | * A state change on a connected socket means it's dying or dead. |
969 | */ | 674 | */ |
970 | static void | 675 | static void svc_tcp_state_change(struct sock *sk) |
971 | svc_tcp_state_change(struct sock *sk) | ||
972 | { | 676 | { |
973 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 677 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
974 | 678 | ||
@@ -978,51 +682,36 @@ svc_tcp_state_change(struct sock *sk) | |||
978 | if (!svsk) | 682 | if (!svsk) |
979 | printk("svc: socket %p: no user data\n", sk); | 683 | printk("svc: socket %p: no user data\n", sk); |
980 | else { | 684 | else { |
981 | set_bit(SK_CLOSE, &svsk->sk_flags); | 685 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
982 | svc_sock_enqueue(svsk); | 686 | svc_xprt_enqueue(&svsk->sk_xprt); |
983 | } | 687 | } |
984 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 688 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
985 | wake_up_interruptible_all(sk->sk_sleep); | 689 | wake_up_interruptible_all(sk->sk_sleep); |
986 | } | 690 | } |
987 | 691 | ||
988 | static void | 692 | static void svc_tcp_data_ready(struct sock *sk, int count) |
989 | svc_tcp_data_ready(struct sock *sk, int count) | ||
990 | { | 693 | { |
991 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 694 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
992 | 695 | ||
993 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", | 696 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", |
994 | sk, sk->sk_user_data); | 697 | sk, sk->sk_user_data); |
995 | if (svsk) { | 698 | if (svsk) { |
996 | set_bit(SK_DATA, &svsk->sk_flags); | 699 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
997 | svc_sock_enqueue(svsk); | 700 | svc_xprt_enqueue(&svsk->sk_xprt); |
998 | } | 701 | } |
999 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 702 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
1000 | wake_up_interruptible(sk->sk_sleep); | 703 | wake_up_interruptible(sk->sk_sleep); |
1001 | } | 704 | } |
1002 | 705 | ||
1003 | static inline int svc_port_is_privileged(struct sockaddr *sin) | ||
1004 | { | ||
1005 | switch (sin->sa_family) { | ||
1006 | case AF_INET: | ||
1007 | return ntohs(((struct sockaddr_in *)sin)->sin_port) | ||
1008 | < PROT_SOCK; | ||
1009 | case AF_INET6: | ||
1010 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) | ||
1011 | < PROT_SOCK; | ||
1012 | default: | ||
1013 | return 0; | ||
1014 | } | ||
1015 | } | ||
1016 | |||
1017 | /* | 706 | /* |
1018 | * Accept a TCP connection | 707 | * Accept a TCP connection |
1019 | */ | 708 | */ |
1020 | static void | 709 | static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) |
1021 | svc_tcp_accept(struct svc_sock *svsk) | ||
1022 | { | 710 | { |
711 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
1023 | struct sockaddr_storage addr; | 712 | struct sockaddr_storage addr; |
1024 | struct sockaddr *sin = (struct sockaddr *) &addr; | 713 | struct sockaddr *sin = (struct sockaddr *) &addr; |
1025 | struct svc_serv *serv = svsk->sk_server; | 714 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; |
1026 | struct socket *sock = svsk->sk_sock; | 715 | struct socket *sock = svsk->sk_sock; |
1027 | struct socket *newsock; | 716 | struct socket *newsock; |
1028 | struct svc_sock *newsvsk; | 717 | struct svc_sock *newsvsk; |
@@ -1031,9 +720,9 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1031 | 720 | ||
1032 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); | 721 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); |
1033 | if (!sock) | 722 | if (!sock) |
1034 | return; | 723 | return NULL; |
1035 | 724 | ||
1036 | clear_bit(SK_CONN, &svsk->sk_flags); | 725 | clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
1037 | err = kernel_accept(sock, &newsock, O_NONBLOCK); | 726 | err = kernel_accept(sock, &newsock, O_NONBLOCK); |
1038 | if (err < 0) { | 727 | if (err < 0) { |
1039 | if (err == -ENOMEM) | 728 | if (err == -ENOMEM) |
@@ -1042,11 +731,9 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1042 | else if (err != -EAGAIN && net_ratelimit()) | 731 | else if (err != -EAGAIN && net_ratelimit()) |
1043 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", | 732 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", |
1044 | serv->sv_name, -err); | 733 | serv->sv_name, -err); |
1045 | return; | 734 | return NULL; |
1046 | } | 735 | } |
1047 | 736 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); | |
1048 | set_bit(SK_CONN, &svsk->sk_flags); | ||
1049 | svc_sock_enqueue(svsk); | ||
1050 | 737 | ||
1051 | err = kernel_getpeername(newsock, sin, &slen); | 738 | err = kernel_getpeername(newsock, sin, &slen); |
1052 | if (err < 0) { | 739 | if (err < 0) { |
@@ -1077,106 +764,42 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1077 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, | 764 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, |
1078 | (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) | 765 | (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) |
1079 | goto failed; | 766 | goto failed; |
1080 | memcpy(&newsvsk->sk_remote, sin, slen); | 767 | svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); |
1081 | newsvsk->sk_remotelen = slen; | ||
1082 | err = kernel_getsockname(newsock, sin, &slen); | 768 | err = kernel_getsockname(newsock, sin, &slen); |
1083 | if (unlikely(err < 0)) { | 769 | if (unlikely(err < 0)) { |
1084 | dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); | 770 | dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); |
1085 | slen = offsetof(struct sockaddr, sa_data); | 771 | slen = offsetof(struct sockaddr, sa_data); |
1086 | } | 772 | } |
1087 | memcpy(&newsvsk->sk_local, sin, slen); | 773 | svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); |
1088 | |||
1089 | svc_sock_received(newsvsk); | ||
1090 | |||
1091 | /* make sure that we don't have too many active connections. | ||
1092 | * If we have, something must be dropped. | ||
1093 | * | ||
1094 | * There's no point in trying to do random drop here for | ||
1095 | * DoS prevention. The NFS clients does 1 reconnect in 15 | ||
1096 | * seconds. An attacker can easily beat that. | ||
1097 | * | ||
1098 | * The only somewhat efficient mechanism would be if drop | ||
1099 | * old connections from the same IP first. But right now | ||
1100 | * we don't even record the client IP in svc_sock. | ||
1101 | */ | ||
1102 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
1103 | struct svc_sock *svsk = NULL; | ||
1104 | spin_lock_bh(&serv->sv_lock); | ||
1105 | if (!list_empty(&serv->sv_tempsocks)) { | ||
1106 | if (net_ratelimit()) { | ||
1107 | /* Try to help the admin */ | ||
1108 | printk(KERN_NOTICE "%s: too many open TCP " | ||
1109 | "sockets, consider increasing the " | ||
1110 | "number of nfsd threads\n", | ||
1111 | serv->sv_name); | ||
1112 | printk(KERN_NOTICE | ||
1113 | "%s: last TCP connect from %s\n", | ||
1114 | serv->sv_name, __svc_print_addr(sin, | ||
1115 | buf, sizeof(buf))); | ||
1116 | } | ||
1117 | /* | ||
1118 | * Always select the oldest socket. It's not fair, | ||
1119 | * but so is life | ||
1120 | */ | ||
1121 | svsk = list_entry(serv->sv_tempsocks.prev, | ||
1122 | struct svc_sock, | ||
1123 | sk_list); | ||
1124 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1125 | atomic_inc(&svsk->sk_inuse); | ||
1126 | } | ||
1127 | spin_unlock_bh(&serv->sv_lock); | ||
1128 | |||
1129 | if (svsk) { | ||
1130 | svc_sock_enqueue(svsk); | ||
1131 | svc_sock_put(svsk); | ||
1132 | } | ||
1133 | |||
1134 | } | ||
1135 | 774 | ||
1136 | if (serv->sv_stats) | 775 | if (serv->sv_stats) |
1137 | serv->sv_stats->nettcpconn++; | 776 | serv->sv_stats->nettcpconn++; |
1138 | 777 | ||
1139 | return; | 778 | return &newsvsk->sk_xprt; |
1140 | 779 | ||
1141 | failed: | 780 | failed: |
1142 | sock_release(newsock); | 781 | sock_release(newsock); |
1143 | return; | 782 | return NULL; |
1144 | } | 783 | } |
1145 | 784 | ||
1146 | /* | 785 | /* |
1147 | * Receive data from a TCP socket. | 786 | * Receive data from a TCP socket. |
1148 | */ | 787 | */ |
1149 | static int | 788 | static int svc_tcp_recvfrom(struct svc_rqst *rqstp) |
1150 | svc_tcp_recvfrom(struct svc_rqst *rqstp) | ||
1151 | { | 789 | { |
1152 | struct svc_sock *svsk = rqstp->rq_sock; | 790 | struct svc_sock *svsk = |
1153 | struct svc_serv *serv = svsk->sk_server; | 791 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); |
792 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
1154 | int len; | 793 | int len; |
1155 | struct kvec *vec; | 794 | struct kvec *vec; |
1156 | int pnum, vlen; | 795 | int pnum, vlen; |
1157 | 796 | ||
1158 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", | 797 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", |
1159 | svsk, test_bit(SK_DATA, &svsk->sk_flags), | 798 | svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), |
1160 | test_bit(SK_CONN, &svsk->sk_flags), | 799 | test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), |
1161 | test_bit(SK_CLOSE, &svsk->sk_flags)); | 800 | test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); |
1162 | 801 | ||
1163 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 802 | if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) |
1164 | svc_sock_received(svsk); | ||
1165 | return svc_deferred_recv(rqstp); | ||
1166 | } | ||
1167 | |||
1168 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
1169 | svc_delete_socket(svsk); | ||
1170 | return 0; | ||
1171 | } | ||
1172 | |||
1173 | if (svsk->sk_sk->sk_state == TCP_LISTEN) { | ||
1174 | svc_tcp_accept(svsk); | ||
1175 | svc_sock_received(svsk); | ||
1176 | return 0; | ||
1177 | } | ||
1178 | |||
1179 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | ||
1180 | /* sndbuf needs to have room for one request | 803 | /* sndbuf needs to have room for one request |
1181 | * per thread, otherwise we can stall even when the | 804 | * per thread, otherwise we can stall even when the |
1182 | * network isn't a bottleneck. | 805 | * network isn't a bottleneck. |
@@ -1193,7 +816,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1193 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, | 816 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, |
1194 | 3 * serv->sv_max_mesg); | 817 | 3 * serv->sv_max_mesg); |
1195 | 818 | ||
1196 | clear_bit(SK_DATA, &svsk->sk_flags); | 819 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1197 | 820 | ||
1198 | /* Receive data. If we haven't got the record length yet, get | 821 | /* Receive data. If we haven't got the record length yet, get |
1199 | * the next four bytes. Otherwise try to gobble up as much as | 822 | * the next four bytes. Otherwise try to gobble up as much as |
@@ -1212,7 +835,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1212 | if (len < want) { | 835 | if (len < want) { |
1213 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", | 836 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", |
1214 | len, want); | 837 | len, want); |
1215 | svc_sock_received(svsk); | 838 | svc_xprt_received(&svsk->sk_xprt); |
1216 | return -EAGAIN; /* record header not complete */ | 839 | return -EAGAIN; /* record header not complete */ |
1217 | } | 840 | } |
1218 | 841 | ||
@@ -1248,11 +871,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1248 | if (len < svsk->sk_reclen) { | 871 | if (len < svsk->sk_reclen) { |
1249 | dprintk("svc: incomplete TCP record (%d of %d)\n", | 872 | dprintk("svc: incomplete TCP record (%d of %d)\n", |
1250 | len, svsk->sk_reclen); | 873 | len, svsk->sk_reclen); |
1251 | svc_sock_received(svsk); | 874 | svc_xprt_received(&svsk->sk_xprt); |
1252 | return -EAGAIN; /* record not complete */ | 875 | return -EAGAIN; /* record not complete */ |
1253 | } | 876 | } |
1254 | len = svsk->sk_reclen; | 877 | len = svsk->sk_reclen; |
1255 | set_bit(SK_DATA, &svsk->sk_flags); | 878 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1256 | 879 | ||
1257 | vec = rqstp->rq_vec; | 880 | vec = rqstp->rq_vec; |
1258 | vec[0] = rqstp->rq_arg.head[0]; | 881 | vec[0] = rqstp->rq_arg.head[0]; |
@@ -1281,30 +904,31 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1281 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; | 904 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; |
1282 | } | 905 | } |
1283 | 906 | ||
1284 | rqstp->rq_skbuff = NULL; | 907 | rqstp->rq_xprt_ctxt = NULL; |
1285 | rqstp->rq_prot = IPPROTO_TCP; | 908 | rqstp->rq_prot = IPPROTO_TCP; |
1286 | 909 | ||
1287 | /* Reset TCP read info */ | 910 | /* Reset TCP read info */ |
1288 | svsk->sk_reclen = 0; | 911 | svsk->sk_reclen = 0; |
1289 | svsk->sk_tcplen = 0; | 912 | svsk->sk_tcplen = 0; |
1290 | 913 | ||
1291 | svc_sock_received(svsk); | 914 | svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); |
915 | svc_xprt_received(&svsk->sk_xprt); | ||
1292 | if (serv->sv_stats) | 916 | if (serv->sv_stats) |
1293 | serv->sv_stats->nettcpcnt++; | 917 | serv->sv_stats->nettcpcnt++; |
1294 | 918 | ||
1295 | return len; | 919 | return len; |
1296 | 920 | ||
1297 | err_delete: | 921 | err_delete: |
1298 | svc_delete_socket(svsk); | 922 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
1299 | return -EAGAIN; | 923 | return -EAGAIN; |
1300 | 924 | ||
1301 | error: | 925 | error: |
1302 | if (len == -EAGAIN) { | 926 | if (len == -EAGAIN) { |
1303 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); | 927 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); |
1304 | svc_sock_received(svsk); | 928 | svc_xprt_received(&svsk->sk_xprt); |
1305 | } else { | 929 | } else { |
1306 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", | 930 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", |
1307 | svsk->sk_server->sv_name, -len); | 931 | svsk->sk_xprt.xpt_server->sv_name, -len); |
1308 | goto err_delete; | 932 | goto err_delete; |
1309 | } | 933 | } |
1310 | 934 | ||
@@ -1314,8 +938,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1314 | /* | 938 | /* |
1315 | * Send out data on TCP socket. | 939 | * Send out data on TCP socket. |
1316 | */ | 940 | */ |
1317 | static int | 941 | static int svc_tcp_sendto(struct svc_rqst *rqstp) |
1318 | svc_tcp_sendto(struct svc_rqst *rqstp) | ||
1319 | { | 942 | { |
1320 | struct xdr_buf *xbufp = &rqstp->rq_res; | 943 | struct xdr_buf *xbufp = &rqstp->rq_res; |
1321 | int sent; | 944 | int sent; |
@@ -1328,35 +951,109 @@ svc_tcp_sendto(struct svc_rqst *rqstp) | |||
1328 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); | 951 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); |
1329 | memcpy(xbufp->head[0].iov_base, &reclen, 4); | 952 | memcpy(xbufp->head[0].iov_base, &reclen, 4); |
1330 | 953 | ||
1331 | if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) | 954 | if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags)) |
1332 | return -ENOTCONN; | 955 | return -ENOTCONN; |
1333 | 956 | ||
1334 | sent = svc_sendto(rqstp, &rqstp->rq_res); | 957 | sent = svc_sendto(rqstp, &rqstp->rq_res); |
1335 | if (sent != xbufp->len) { | 958 | if (sent != xbufp->len) { |
1336 | printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", | 959 | printk(KERN_NOTICE |
1337 | rqstp->rq_sock->sk_server->sv_name, | 960 | "rpc-srv/tcp: %s: %s %d when sending %d bytes " |
961 | "- shutting down socket\n", | ||
962 | rqstp->rq_xprt->xpt_server->sv_name, | ||
1338 | (sent<0)?"got error":"sent only", | 963 | (sent<0)?"got error":"sent only", |
1339 | sent, xbufp->len); | 964 | sent, xbufp->len); |
1340 | set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); | 965 | set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags); |
1341 | svc_sock_enqueue(rqstp->rq_sock); | 966 | svc_xprt_enqueue(rqstp->rq_xprt); |
1342 | sent = -EAGAIN; | 967 | sent = -EAGAIN; |
1343 | } | 968 | } |
1344 | return sent; | 969 | return sent; |
1345 | } | 970 | } |
1346 | 971 | ||
1347 | static void | 972 | /* |
1348 | svc_tcp_init(struct svc_sock *svsk) | 973 | * Setup response header. TCP has a 4B record length field. |
974 | */ | ||
975 | static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) | ||
976 | { | ||
977 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
978 | |||
979 | /* tcp needs a space for the record length... */ | ||
980 | svc_putnl(resv, 0); | ||
981 | } | ||
982 | |||
983 | static int svc_tcp_has_wspace(struct svc_xprt *xprt) | ||
984 | { | ||
985 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
986 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
987 | int required; | ||
988 | int wspace; | ||
989 | |||
990 | /* | ||
991 | * Set the SOCK_NOSPACE flag before checking the available | ||
992 | * sock space. | ||
993 | */ | ||
994 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
995 | required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; | ||
996 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
997 | |||
998 | if (wspace < sk_stream_min_wspace(svsk->sk_sk)) | ||
999 | return 0; | ||
1000 | if (required * 2 > wspace) | ||
1001 | return 0; | ||
1002 | |||
1003 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
1004 | return 1; | ||
1005 | } | ||
1006 | |||
1007 | static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, | ||
1008 | struct sockaddr *sa, int salen, | ||
1009 | int flags) | ||
1010 | { | ||
1011 | return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); | ||
1012 | } | ||
1013 | |||
1014 | static struct svc_xprt_ops svc_tcp_ops = { | ||
1015 | .xpo_create = svc_tcp_create, | ||
1016 | .xpo_recvfrom = svc_tcp_recvfrom, | ||
1017 | .xpo_sendto = svc_tcp_sendto, | ||
1018 | .xpo_release_rqst = svc_release_skb, | ||
1019 | .xpo_detach = svc_sock_detach, | ||
1020 | .xpo_free = svc_sock_free, | ||
1021 | .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, | ||
1022 | .xpo_has_wspace = svc_tcp_has_wspace, | ||
1023 | .xpo_accept = svc_tcp_accept, | ||
1024 | }; | ||
1025 | |||
1026 | static struct svc_xprt_class svc_tcp_class = { | ||
1027 | .xcl_name = "tcp", | ||
1028 | .xcl_owner = THIS_MODULE, | ||
1029 | .xcl_ops = &svc_tcp_ops, | ||
1030 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, | ||
1031 | }; | ||
1032 | |||
1033 | void svc_init_xprt_sock(void) | ||
1034 | { | ||
1035 | svc_reg_xprt_class(&svc_tcp_class); | ||
1036 | svc_reg_xprt_class(&svc_udp_class); | ||
1037 | } | ||
1038 | |||
1039 | void svc_cleanup_xprt_sock(void) | ||
1040 | { | ||
1041 | svc_unreg_xprt_class(&svc_tcp_class); | ||
1042 | svc_unreg_xprt_class(&svc_udp_class); | ||
1043 | } | ||
1044 | |||
1045 | static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) | ||
1349 | { | 1046 | { |
1350 | struct sock *sk = svsk->sk_sk; | 1047 | struct sock *sk = svsk->sk_sk; |
1351 | struct tcp_sock *tp = tcp_sk(sk); | 1048 | struct tcp_sock *tp = tcp_sk(sk); |
1352 | 1049 | ||
1353 | svsk->sk_recvfrom = svc_tcp_recvfrom; | 1050 | svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); |
1354 | svsk->sk_sendto = svc_tcp_sendto; | 1051 | set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); |
1355 | |||
1356 | if (sk->sk_state == TCP_LISTEN) { | 1052 | if (sk->sk_state == TCP_LISTEN) { |
1357 | dprintk("setting up TCP socket for listening\n"); | 1053 | dprintk("setting up TCP socket for listening\n"); |
1054 | set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); | ||
1358 | sk->sk_data_ready = svc_tcp_listen_data_ready; | 1055 | sk->sk_data_ready = svc_tcp_listen_data_ready; |
1359 | set_bit(SK_CONN, &svsk->sk_flags); | 1056 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
1360 | } else { | 1057 | } else { |
1361 | dprintk("setting up TCP socket for reading\n"); | 1058 | dprintk("setting up TCP socket for reading\n"); |
1362 | sk->sk_state_change = svc_tcp_state_change; | 1059 | sk->sk_state_change = svc_tcp_state_change; |
@@ -1373,18 +1070,17 @@ svc_tcp_init(struct svc_sock *svsk) | |||
1373 | * svc_tcp_recvfrom will re-adjust if necessary | 1070 | * svc_tcp_recvfrom will re-adjust if necessary |
1374 | */ | 1071 | */ |
1375 | svc_sock_setbufsize(svsk->sk_sock, | 1072 | svc_sock_setbufsize(svsk->sk_sock, |
1376 | 3 * svsk->sk_server->sv_max_mesg, | 1073 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, |
1377 | 3 * svsk->sk_server->sv_max_mesg); | 1074 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); |
1378 | 1075 | ||
1379 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1076 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1380 | set_bit(SK_DATA, &svsk->sk_flags); | 1077 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1381 | if (sk->sk_state != TCP_ESTABLISHED) | 1078 | if (sk->sk_state != TCP_ESTABLISHED) |
1382 | set_bit(SK_CLOSE, &svsk->sk_flags); | 1079 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
1383 | } | 1080 | } |
1384 | } | 1081 | } |
1385 | 1082 | ||
1386 | void | 1083 | void svc_sock_update_bufs(struct svc_serv *serv) |
1387 | svc_sock_update_bufs(struct svc_serv *serv) | ||
1388 | { | 1084 | { |
1389 | /* | 1085 | /* |
1390 | * The number of server threads has changed. Update | 1086 | * The number of server threads has changed. Update |
@@ -1395,232 +1091,18 @@ svc_sock_update_bufs(struct svc_serv *serv) | |||
1395 | spin_lock_bh(&serv->sv_lock); | 1091 | spin_lock_bh(&serv->sv_lock); |
1396 | list_for_each(le, &serv->sv_permsocks) { | 1092 | list_for_each(le, &serv->sv_permsocks) { |
1397 | struct svc_sock *svsk = | 1093 | struct svc_sock *svsk = |
1398 | list_entry(le, struct svc_sock, sk_list); | 1094 | list_entry(le, struct svc_sock, sk_xprt.xpt_list); |
1399 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1095 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1400 | } | 1096 | } |
1401 | list_for_each(le, &serv->sv_tempsocks) { | 1097 | list_for_each(le, &serv->sv_tempsocks) { |
1402 | struct svc_sock *svsk = | 1098 | struct svc_sock *svsk = |
1403 | list_entry(le, struct svc_sock, sk_list); | 1099 | list_entry(le, struct svc_sock, sk_xprt.xpt_list); |
1404 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1100 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1405 | } | 1101 | } |
1406 | spin_unlock_bh(&serv->sv_lock); | 1102 | spin_unlock_bh(&serv->sv_lock); |
1407 | } | 1103 | } |
1408 | 1104 | ||
1409 | /* | 1105 | /* |
1410 | * Receive the next request on any socket. This code is carefully | ||
1411 | * organised not to touch any cachelines in the shared svc_serv | ||
1412 | * structure, only cachelines in the local svc_pool. | ||
1413 | */ | ||
1414 | int | ||
1415 | svc_recv(struct svc_rqst *rqstp, long timeout) | ||
1416 | { | ||
1417 | struct svc_sock *svsk = NULL; | ||
1418 | struct svc_serv *serv = rqstp->rq_server; | ||
1419 | struct svc_pool *pool = rqstp->rq_pool; | ||
1420 | int len, i; | ||
1421 | int pages; | ||
1422 | struct xdr_buf *arg; | ||
1423 | DECLARE_WAITQUEUE(wait, current); | ||
1424 | |||
1425 | dprintk("svc: server %p waiting for data (to = %ld)\n", | ||
1426 | rqstp, timeout); | ||
1427 | |||
1428 | if (rqstp->rq_sock) | ||
1429 | printk(KERN_ERR | ||
1430 | "svc_recv: service %p, socket not NULL!\n", | ||
1431 | rqstp); | ||
1432 | if (waitqueue_active(&rqstp->rq_wait)) | ||
1433 | printk(KERN_ERR | ||
1434 | "svc_recv: service %p, wait queue active!\n", | ||
1435 | rqstp); | ||
1436 | |||
1437 | |||
1438 | /* now allocate needed pages. If we get a failure, sleep briefly */ | ||
1439 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | ||
1440 | for (i=0; i < pages ; i++) | ||
1441 | while (rqstp->rq_pages[i] == NULL) { | ||
1442 | struct page *p = alloc_page(GFP_KERNEL); | ||
1443 | if (!p) | ||
1444 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | ||
1445 | rqstp->rq_pages[i] = p; | ||
1446 | } | ||
1447 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | ||
1448 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
1449 | |||
1450 | /* Make arg->head point to first page and arg->pages point to rest */ | ||
1451 | arg = &rqstp->rq_arg; | ||
1452 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | ||
1453 | arg->head[0].iov_len = PAGE_SIZE; | ||
1454 | arg->pages = rqstp->rq_pages + 1; | ||
1455 | arg->page_base = 0; | ||
1456 | /* save at least one page for response */ | ||
1457 | arg->page_len = (pages-2)*PAGE_SIZE; | ||
1458 | arg->len = (pages-1)*PAGE_SIZE; | ||
1459 | arg->tail[0].iov_len = 0; | ||
1460 | |||
1461 | try_to_freeze(); | ||
1462 | cond_resched(); | ||
1463 | if (signalled()) | ||
1464 | return -EINTR; | ||
1465 | |||
1466 | spin_lock_bh(&pool->sp_lock); | ||
1467 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { | ||
1468 | rqstp->rq_sock = svsk; | ||
1469 | atomic_inc(&svsk->sk_inuse); | ||
1470 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
1471 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | ||
1472 | } else { | ||
1473 | /* No data pending. Go to sleep */ | ||
1474 | svc_thread_enqueue(pool, rqstp); | ||
1475 | |||
1476 | /* | ||
1477 | * We have to be able to interrupt this wait | ||
1478 | * to bring down the daemons ... | ||
1479 | */ | ||
1480 | set_current_state(TASK_INTERRUPTIBLE); | ||
1481 | add_wait_queue(&rqstp->rq_wait, &wait); | ||
1482 | spin_unlock_bh(&pool->sp_lock); | ||
1483 | |||
1484 | schedule_timeout(timeout); | ||
1485 | |||
1486 | try_to_freeze(); | ||
1487 | |||
1488 | spin_lock_bh(&pool->sp_lock); | ||
1489 | remove_wait_queue(&rqstp->rq_wait, &wait); | ||
1490 | |||
1491 | if (!(svsk = rqstp->rq_sock)) { | ||
1492 | svc_thread_dequeue(pool, rqstp); | ||
1493 | spin_unlock_bh(&pool->sp_lock); | ||
1494 | dprintk("svc: server %p, no data yet\n", rqstp); | ||
1495 | return signalled()? -EINTR : -EAGAIN; | ||
1496 | } | ||
1497 | } | ||
1498 | spin_unlock_bh(&pool->sp_lock); | ||
1499 | |||
1500 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", | ||
1501 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); | ||
1502 | len = svsk->sk_recvfrom(rqstp); | ||
1503 | dprintk("svc: got len=%d\n", len); | ||
1504 | |||
1505 | /* No data, incomplete (TCP) read, or accept() */ | ||
1506 | if (len == 0 || len == -EAGAIN) { | ||
1507 | rqstp->rq_res.len = 0; | ||
1508 | svc_sock_release(rqstp); | ||
1509 | return -EAGAIN; | ||
1510 | } | ||
1511 | svsk->sk_lastrecv = get_seconds(); | ||
1512 | clear_bit(SK_OLD, &svsk->sk_flags); | ||
1513 | |||
1514 | rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); | ||
1515 | rqstp->rq_chandle.defer = svc_defer; | ||
1516 | |||
1517 | if (serv->sv_stats) | ||
1518 | serv->sv_stats->netcnt++; | ||
1519 | return len; | ||
1520 | } | ||
1521 | |||
1522 | /* | ||
1523 | * Drop request | ||
1524 | */ | ||
1525 | void | ||
1526 | svc_drop(struct svc_rqst *rqstp) | ||
1527 | { | ||
1528 | dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); | ||
1529 | svc_sock_release(rqstp); | ||
1530 | } | ||
1531 | |||
1532 | /* | ||
1533 | * Return reply to client. | ||
1534 | */ | ||
1535 | int | ||
1536 | svc_send(struct svc_rqst *rqstp) | ||
1537 | { | ||
1538 | struct svc_sock *svsk; | ||
1539 | int len; | ||
1540 | struct xdr_buf *xb; | ||
1541 | |||
1542 | if ((svsk = rqstp->rq_sock) == NULL) { | ||
1543 | printk(KERN_WARNING "NULL socket pointer in %s:%d\n", | ||
1544 | __FILE__, __LINE__); | ||
1545 | return -EFAULT; | ||
1546 | } | ||
1547 | |||
1548 | /* release the receive skb before sending the reply */ | ||
1549 | svc_release_skb(rqstp); | ||
1550 | |||
1551 | /* calculate over-all length */ | ||
1552 | xb = & rqstp->rq_res; | ||
1553 | xb->len = xb->head[0].iov_len + | ||
1554 | xb->page_len + | ||
1555 | xb->tail[0].iov_len; | ||
1556 | |||
1557 | /* Grab svsk->sk_mutex to serialize outgoing data. */ | ||
1558 | mutex_lock(&svsk->sk_mutex); | ||
1559 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
1560 | len = -ENOTCONN; | ||
1561 | else | ||
1562 | len = svsk->sk_sendto(rqstp); | ||
1563 | mutex_unlock(&svsk->sk_mutex); | ||
1564 | svc_sock_release(rqstp); | ||
1565 | |||
1566 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | ||
1567 | return 0; | ||
1568 | return len; | ||
1569 | } | ||
1570 | |||
1571 | /* | ||
1572 | * Timer function to close old temporary sockets, using | ||
1573 | * a mark-and-sweep algorithm. | ||
1574 | */ | ||
1575 | static void | ||
1576 | svc_age_temp_sockets(unsigned long closure) | ||
1577 | { | ||
1578 | struct svc_serv *serv = (struct svc_serv *)closure; | ||
1579 | struct svc_sock *svsk; | ||
1580 | struct list_head *le, *next; | ||
1581 | LIST_HEAD(to_be_aged); | ||
1582 | |||
1583 | dprintk("svc_age_temp_sockets\n"); | ||
1584 | |||
1585 | if (!spin_trylock_bh(&serv->sv_lock)) { | ||
1586 | /* busy, try again 1 sec later */ | ||
1587 | dprintk("svc_age_temp_sockets: busy\n"); | ||
1588 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | ||
1589 | return; | ||
1590 | } | ||
1591 | |||
1592 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | ||
1593 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
1594 | |||
1595 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) | ||
1596 | continue; | ||
1597 | if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags)) | ||
1598 | continue; | ||
1599 | atomic_inc(&svsk->sk_inuse); | ||
1600 | list_move(le, &to_be_aged); | ||
1601 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1602 | set_bit(SK_DETACHED, &svsk->sk_flags); | ||
1603 | } | ||
1604 | spin_unlock_bh(&serv->sv_lock); | ||
1605 | |||
1606 | while (!list_empty(&to_be_aged)) { | ||
1607 | le = to_be_aged.next; | ||
1608 | /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */ | ||
1609 | list_del_init(le); | ||
1610 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
1611 | |||
1612 | dprintk("queuing svsk %p for closing, %lu seconds old\n", | ||
1613 | svsk, get_seconds() - svsk->sk_lastrecv); | ||
1614 | |||
1615 | /* a thread will dequeue and close it soon */ | ||
1616 | svc_sock_enqueue(svsk); | ||
1617 | svc_sock_put(svsk); | ||
1618 | } | ||
1619 | |||
1620 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | ||
1621 | } | ||
1622 | |||
1623 | /* | ||
1624 | * Initialize socket for RPC use and create svc_sock struct | 1106 | * Initialize socket for RPC use and create svc_sock struct |
1625 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. | 1107 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. |
1626 | */ | 1108 | */ |
@@ -1631,7 +1113,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1631 | struct svc_sock *svsk; | 1113 | struct svc_sock *svsk; |
1632 | struct sock *inet; | 1114 | struct sock *inet; |
1633 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); | 1115 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); |
1634 | int is_temporary = flags & SVC_SOCK_TEMPORARY; | ||
1635 | 1116 | ||
1636 | dprintk("svc: svc_setup_socket %p\n", sock); | 1117 | dprintk("svc: svc_setup_socket %p\n", sock); |
1637 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { | 1118 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { |
@@ -1651,44 +1132,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1651 | return NULL; | 1132 | return NULL; |
1652 | } | 1133 | } |
1653 | 1134 | ||
1654 | set_bit(SK_BUSY, &svsk->sk_flags); | ||
1655 | inet->sk_user_data = svsk; | 1135 | inet->sk_user_data = svsk; |
1656 | svsk->sk_sock = sock; | 1136 | svsk->sk_sock = sock; |
1657 | svsk->sk_sk = inet; | 1137 | svsk->sk_sk = inet; |
1658 | svsk->sk_ostate = inet->sk_state_change; | 1138 | svsk->sk_ostate = inet->sk_state_change; |
1659 | svsk->sk_odata = inet->sk_data_ready; | 1139 | svsk->sk_odata = inet->sk_data_ready; |
1660 | svsk->sk_owspace = inet->sk_write_space; | 1140 | svsk->sk_owspace = inet->sk_write_space; |
1661 | svsk->sk_server = serv; | ||
1662 | atomic_set(&svsk->sk_inuse, 1); | ||
1663 | svsk->sk_lastrecv = get_seconds(); | ||
1664 | spin_lock_init(&svsk->sk_lock); | ||
1665 | INIT_LIST_HEAD(&svsk->sk_deferred); | ||
1666 | INIT_LIST_HEAD(&svsk->sk_ready); | ||
1667 | mutex_init(&svsk->sk_mutex); | ||
1668 | 1141 | ||
1669 | /* Initialize the socket */ | 1142 | /* Initialize the socket */ |
1670 | if (sock->type == SOCK_DGRAM) | 1143 | if (sock->type == SOCK_DGRAM) |
1671 | svc_udp_init(svsk); | 1144 | svc_udp_init(svsk, serv); |
1672 | else | 1145 | else |
1673 | svc_tcp_init(svsk); | 1146 | svc_tcp_init(svsk, serv); |
1674 | |||
1675 | spin_lock_bh(&serv->sv_lock); | ||
1676 | if (is_temporary) { | ||
1677 | set_bit(SK_TEMP, &svsk->sk_flags); | ||
1678 | list_add(&svsk->sk_list, &serv->sv_tempsocks); | ||
1679 | serv->sv_tmpcnt++; | ||
1680 | if (serv->sv_temptimer.function == NULL) { | ||
1681 | /* setup timer to age temp sockets */ | ||
1682 | setup_timer(&serv->sv_temptimer, svc_age_temp_sockets, | ||
1683 | (unsigned long)serv); | ||
1684 | mod_timer(&serv->sv_temptimer, | ||
1685 | jiffies + svc_conn_age_period * HZ); | ||
1686 | } | ||
1687 | } else { | ||
1688 | clear_bit(SK_TEMP, &svsk->sk_flags); | ||
1689 | list_add(&svsk->sk_list, &serv->sv_permsocks); | ||
1690 | } | ||
1691 | spin_unlock_bh(&serv->sv_lock); | ||
1692 | 1147 | ||
1693 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", | 1148 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", |
1694 | svsk, svsk->sk_sk); | 1149 | svsk, svsk->sk_sk); |
@@ -1717,7 +1172,16 @@ int svc_addsock(struct svc_serv *serv, | |||
1717 | else { | 1172 | else { |
1718 | svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); | 1173 | svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); |
1719 | if (svsk) { | 1174 | if (svsk) { |
1720 | svc_sock_received(svsk); | 1175 | struct sockaddr_storage addr; |
1176 | struct sockaddr *sin = (struct sockaddr *)&addr; | ||
1177 | int salen; | ||
1178 | if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) | ||
1179 | svc_xprt_set_local(&svsk->sk_xprt, sin, salen); | ||
1180 | clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags); | ||
1181 | spin_lock_bh(&serv->sv_lock); | ||
1182 | list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks); | ||
1183 | spin_unlock_bh(&serv->sv_lock); | ||
1184 | svc_xprt_received(&svsk->sk_xprt); | ||
1721 | err = 0; | 1185 | err = 0; |
1722 | } | 1186 | } |
1723 | } | 1187 | } |
@@ -1733,14 +1197,19 @@ EXPORT_SYMBOL_GPL(svc_addsock); | |||
1733 | /* | 1197 | /* |
1734 | * Create socket for RPC service. | 1198 | * Create socket for RPC service. |
1735 | */ | 1199 | */ |
1736 | static int svc_create_socket(struct svc_serv *serv, int protocol, | 1200 | static struct svc_xprt *svc_create_socket(struct svc_serv *serv, |
1737 | struct sockaddr *sin, int len, int flags) | 1201 | int protocol, |
1202 | struct sockaddr *sin, int len, | ||
1203 | int flags) | ||
1738 | { | 1204 | { |
1739 | struct svc_sock *svsk; | 1205 | struct svc_sock *svsk; |
1740 | struct socket *sock; | 1206 | struct socket *sock; |
1741 | int error; | 1207 | int error; |
1742 | int type; | 1208 | int type; |
1743 | char buf[RPC_MAX_ADDRBUFLEN]; | 1209 | char buf[RPC_MAX_ADDRBUFLEN]; |
1210 | struct sockaddr_storage addr; | ||
1211 | struct sockaddr *newsin = (struct sockaddr *)&addr; | ||
1212 | int newlen; | ||
1744 | 1213 | ||
1745 | dprintk("svc: svc_create_socket(%s, %d, %s)\n", | 1214 | dprintk("svc: svc_create_socket(%s, %d, %s)\n", |
1746 | serv->sv_program->pg_name, protocol, | 1215 | serv->sv_program->pg_name, protocol, |
@@ -1749,13 +1218,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, | |||
1749 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { | 1218 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { |
1750 | printk(KERN_WARNING "svc: only UDP and TCP " | 1219 | printk(KERN_WARNING "svc: only UDP and TCP " |
1751 | "sockets supported\n"); | 1220 | "sockets supported\n"); |
1752 | return -EINVAL; | 1221 | return ERR_PTR(-EINVAL); |
1753 | } | 1222 | } |
1754 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | 1223 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; |
1755 | 1224 | ||
1756 | error = sock_create_kern(sin->sa_family, type, protocol, &sock); | 1225 | error = sock_create_kern(sin->sa_family, type, protocol, &sock); |
1757 | if (error < 0) | 1226 | if (error < 0) |
1758 | return error; | 1227 | return ERR_PTR(error); |
1759 | 1228 | ||
1760 | svc_reclassify_socket(sock); | 1229 | svc_reclassify_socket(sock); |
1761 | 1230 | ||
@@ -1765,203 +1234,55 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, | |||
1765 | if (error < 0) | 1234 | if (error < 0) |
1766 | goto bummer; | 1235 | goto bummer; |
1767 | 1236 | ||
1237 | newlen = len; | ||
1238 | error = kernel_getsockname(sock, newsin, &newlen); | ||
1239 | if (error < 0) | ||
1240 | goto bummer; | ||
1241 | |||
1768 | if (protocol == IPPROTO_TCP) { | 1242 | if (protocol == IPPROTO_TCP) { |
1769 | if ((error = kernel_listen(sock, 64)) < 0) | 1243 | if ((error = kernel_listen(sock, 64)) < 0) |
1770 | goto bummer; | 1244 | goto bummer; |
1771 | } | 1245 | } |
1772 | 1246 | ||
1773 | if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { | 1247 | if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { |
1774 | svc_sock_received(svsk); | 1248 | svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); |
1775 | return ntohs(inet_sk(svsk->sk_sk)->sport); | 1249 | return (struct svc_xprt *)svsk; |
1776 | } | 1250 | } |
1777 | 1251 | ||
1778 | bummer: | 1252 | bummer: |
1779 | dprintk("svc: svc_create_socket error = %d\n", -error); | 1253 | dprintk("svc: svc_create_socket error = %d\n", -error); |
1780 | sock_release(sock); | 1254 | sock_release(sock); |
1781 | return error; | 1255 | return ERR_PTR(error); |
1782 | } | 1256 | } |
1783 | 1257 | ||
1784 | /* | 1258 | /* |
1785 | * Remove a dead socket | 1259 | * Detach the svc_sock from the socket so that no |
1260 | * more callbacks occur. | ||
1786 | */ | 1261 | */ |
1787 | static void | 1262 | static void svc_sock_detach(struct svc_xprt *xprt) |
1788 | svc_delete_socket(struct svc_sock *svsk) | ||
1789 | { | 1263 | { |
1790 | struct svc_serv *serv; | 1264 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); |
1791 | struct sock *sk; | 1265 | struct sock *sk = svsk->sk_sk; |
1792 | |||
1793 | dprintk("svc: svc_delete_socket(%p)\n", svsk); | ||
1794 | 1266 | ||
1795 | serv = svsk->sk_server; | 1267 | dprintk("svc: svc_sock_detach(%p)\n", svsk); |
1796 | sk = svsk->sk_sk; | ||
1797 | 1268 | ||
1269 | /* put back the old socket callbacks */ | ||
1798 | sk->sk_state_change = svsk->sk_ostate; | 1270 | sk->sk_state_change = svsk->sk_ostate; |
1799 | sk->sk_data_ready = svsk->sk_odata; | 1271 | sk->sk_data_ready = svsk->sk_odata; |
1800 | sk->sk_write_space = svsk->sk_owspace; | 1272 | sk->sk_write_space = svsk->sk_owspace; |
1801 | |||
1802 | spin_lock_bh(&serv->sv_lock); | ||
1803 | |||
1804 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) | ||
1805 | list_del_init(&svsk->sk_list); | ||
1806 | /* | ||
1807 | * We used to delete the svc_sock from whichever list | ||
1808 | * it's sk_ready node was on, but we don't actually | ||
1809 | * need to. This is because the only time we're called | ||
1810 | * while still attached to a queue, the queue itself | ||
1811 | * is about to be destroyed (in svc_destroy). | ||
1812 | */ | ||
1813 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { | ||
1814 | BUG_ON(atomic_read(&svsk->sk_inuse)<2); | ||
1815 | atomic_dec(&svsk->sk_inuse); | ||
1816 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | ||
1817 | serv->sv_tmpcnt--; | ||
1818 | } | ||
1819 | |||
1820 | spin_unlock_bh(&serv->sv_lock); | ||
1821 | } | ||
1822 | |||
1823 | static void svc_close_socket(struct svc_sock *svsk) | ||
1824 | { | ||
1825 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1826 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) | ||
1827 | /* someone else will have to effect the close */ | ||
1828 | return; | ||
1829 | |||
1830 | atomic_inc(&svsk->sk_inuse); | ||
1831 | svc_delete_socket(svsk); | ||
1832 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
1833 | svc_sock_put(svsk); | ||
1834 | } | ||
1835 | |||
1836 | void svc_force_close_socket(struct svc_sock *svsk) | ||
1837 | { | ||
1838 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1839 | if (test_bit(SK_BUSY, &svsk->sk_flags)) { | ||
1840 | /* Waiting to be processed, but no threads left, | ||
1841 | * So just remove it from the waiting list | ||
1842 | */ | ||
1843 | list_del_init(&svsk->sk_ready); | ||
1844 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
1845 | } | ||
1846 | svc_close_socket(svsk); | ||
1847 | } | ||
1848 | |||
1849 | /** | ||
1850 | * svc_makesock - Make a socket for nfsd and lockd | ||
1851 | * @serv: RPC server structure | ||
1852 | * @protocol: transport protocol to use | ||
1853 | * @port: port to use | ||
1854 | * @flags: requested socket characteristics | ||
1855 | * | ||
1856 | */ | ||
1857 | int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port, | ||
1858 | int flags) | ||
1859 | { | ||
1860 | struct sockaddr_in sin = { | ||
1861 | .sin_family = AF_INET, | ||
1862 | .sin_addr.s_addr = INADDR_ANY, | ||
1863 | .sin_port = htons(port), | ||
1864 | }; | ||
1865 | |||
1866 | dprintk("svc: creating socket proto = %d\n", protocol); | ||
1867 | return svc_create_socket(serv, protocol, (struct sockaddr *) &sin, | ||
1868 | sizeof(sin), flags); | ||
1869 | } | 1273 | } |
1870 | 1274 | ||
1871 | /* | 1275 | /* |
1872 | * Handle defer and revisit of requests | 1276 | * Free the svc_sock's socket resources and the svc_sock itself. |
1873 | */ | 1277 | */ |
1874 | 1278 | static void svc_sock_free(struct svc_xprt *xprt) | |
1875 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | ||
1876 | { | 1279 | { |
1877 | struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); | 1280 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); |
1878 | struct svc_sock *svsk; | 1281 | dprintk("svc: svc_sock_free(%p)\n", svsk); |
1879 | 1282 | ||
1880 | if (too_many) { | 1283 | if (svsk->sk_sock->file) |
1881 | svc_sock_put(dr->svsk); | 1284 | sockfd_put(svsk->sk_sock); |
1882 | kfree(dr); | 1285 | else |
1883 | return; | 1286 | sock_release(svsk->sk_sock); |
1884 | } | 1287 | kfree(svsk); |
1885 | dprintk("revisit queued\n"); | ||
1886 | svsk = dr->svsk; | ||
1887 | dr->svsk = NULL; | ||
1888 | spin_lock(&svsk->sk_lock); | ||
1889 | list_add(&dr->handle.recent, &svsk->sk_deferred); | ||
1890 | spin_unlock(&svsk->sk_lock); | ||
1891 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1892 | svc_sock_enqueue(svsk); | ||
1893 | svc_sock_put(svsk); | ||
1894 | } | ||
1895 | |||
1896 | static struct cache_deferred_req * | ||
1897 | svc_defer(struct cache_req *req) | ||
1898 | { | ||
1899 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | ||
1900 | int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len); | ||
1901 | struct svc_deferred_req *dr; | ||
1902 | |||
1903 | if (rqstp->rq_arg.page_len) | ||
1904 | return NULL; /* if more than a page, give up FIXME */ | ||
1905 | if (rqstp->rq_deferred) { | ||
1906 | dr = rqstp->rq_deferred; | ||
1907 | rqstp->rq_deferred = NULL; | ||
1908 | } else { | ||
1909 | int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | ||
1910 | /* FIXME maybe discard if size too large */ | ||
1911 | dr = kmalloc(size, GFP_KERNEL); | ||
1912 | if (dr == NULL) | ||
1913 | return NULL; | ||
1914 | |||
1915 | dr->handle.owner = rqstp->rq_server; | ||
1916 | dr->prot = rqstp->rq_prot; | ||
1917 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); | ||
1918 | dr->addrlen = rqstp->rq_addrlen; | ||
1919 | dr->daddr = rqstp->rq_daddr; | ||
1920 | dr->argslen = rqstp->rq_arg.len >> 2; | ||
1921 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); | ||
1922 | } | ||
1923 | atomic_inc(&rqstp->rq_sock->sk_inuse); | ||
1924 | dr->svsk = rqstp->rq_sock; | ||
1925 | |||
1926 | dr->handle.revisit = svc_revisit; | ||
1927 | return &dr->handle; | ||
1928 | } | ||
1929 | |||
1930 | /* | ||
1931 | * recv data from a deferred request into an active one | ||
1932 | */ | ||
1933 | static int svc_deferred_recv(struct svc_rqst *rqstp) | ||
1934 | { | ||
1935 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
1936 | |||
1937 | rqstp->rq_arg.head[0].iov_base = dr->args; | ||
1938 | rqstp->rq_arg.head[0].iov_len = dr->argslen<<2; | ||
1939 | rqstp->rq_arg.page_len = 0; | ||
1940 | rqstp->rq_arg.len = dr->argslen<<2; | ||
1941 | rqstp->rq_prot = dr->prot; | ||
1942 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); | ||
1943 | rqstp->rq_addrlen = dr->addrlen; | ||
1944 | rqstp->rq_daddr = dr->daddr; | ||
1945 | rqstp->rq_respages = rqstp->rq_pages; | ||
1946 | return dr->argslen<<2; | ||
1947 | } | ||
1948 | |||
1949 | |||
1950 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) | ||
1951 | { | ||
1952 | struct svc_deferred_req *dr = NULL; | ||
1953 | |||
1954 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) | ||
1955 | return NULL; | ||
1956 | spin_lock(&svsk->sk_lock); | ||
1957 | clear_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1958 | if (!list_empty(&svsk->sk_deferred)) { | ||
1959 | dr = list_entry(svsk->sk_deferred.next, | ||
1960 | struct svc_deferred_req, | ||
1961 | handle.recent); | ||
1962 | list_del_init(&dr->handle.recent); | ||
1963 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1964 | } | ||
1965 | spin_unlock(&svsk->sk_lock); | ||
1966 | return dr; | ||
1967 | } | 1288 | } |