diff options
Diffstat (limited to 'net/ipv4/inet_connection_sock.c')
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 641 |
1 files changed, 641 insertions, 0 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c new file mode 100644 index 000000000000..fe3c6d3d0c91 --- /dev/null +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -0,0 +1,641 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Support for INET connection oriented protocols. | ||
7 | * | ||
8 | * Authors: See the TCP sources | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or(at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/jhash.h> | ||
19 | |||
20 | #include <net/inet_connection_sock.h> | ||
21 | #include <net/inet_hashtables.h> | ||
22 | #include <net/inet_timewait_sock.h> | ||
23 | #include <net/ip.h> | ||
24 | #include <net/route.h> | ||
25 | #include <net/tcp_states.h> | ||
26 | #include <net/xfrm.h> | ||
27 | |||
28 | #ifdef INET_CSK_DEBUG | ||
29 | const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; | ||
30 | EXPORT_SYMBOL(inet_csk_timer_bug_msg); | ||
31 | #endif | ||
32 | |||
33 | /* | ||
34 | * This array holds the first and last local port number. | ||
35 | * For high-usage systems, use sysctl to change this to | ||
36 | * 32768-61000 | ||
37 | */ | ||
38 | int sysctl_local_port_range[2] = { 1024, 4999 }; | ||
39 | |||
40 | static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) | ||
41 | { | ||
42 | const u32 sk_rcv_saddr = inet_rcv_saddr(sk); | ||
43 | struct sock *sk2; | ||
44 | struct hlist_node *node; | ||
45 | int reuse = sk->sk_reuse; | ||
46 | |||
47 | sk_for_each_bound(sk2, node, &tb->owners) { | ||
48 | if (sk != sk2 && | ||
49 | !inet_v6_ipv6only(sk2) && | ||
50 | (!sk->sk_bound_dev_if || | ||
51 | !sk2->sk_bound_dev_if || | ||
52 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { | ||
53 | if (!reuse || !sk2->sk_reuse || | ||
54 | sk2->sk_state == TCP_LISTEN) { | ||
55 | const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | ||
56 | if (!sk2_rcv_saddr || !sk_rcv_saddr || | ||
57 | sk2_rcv_saddr == sk_rcv_saddr) | ||
58 | break; | ||
59 | } | ||
60 | } | ||
61 | } | ||
62 | return node != NULL; | ||
63 | } | ||
64 | |||
65 | /* Obtain a reference to a local port for the given sock, | ||
66 | * if snum is zero it means select any available local port. | ||
67 | */ | ||
68 | int inet_csk_get_port(struct inet_hashinfo *hashinfo, | ||
69 | struct sock *sk, unsigned short snum) | ||
70 | { | ||
71 | struct inet_bind_hashbucket *head; | ||
72 | struct hlist_node *node; | ||
73 | struct inet_bind_bucket *tb; | ||
74 | int ret; | ||
75 | |||
76 | local_bh_disable(); | ||
77 | if (!snum) { | ||
78 | int low = sysctl_local_port_range[0]; | ||
79 | int high = sysctl_local_port_range[1]; | ||
80 | int remaining = (high - low) + 1; | ||
81 | int rover; | ||
82 | |||
83 | spin_lock(&hashinfo->portalloc_lock); | ||
84 | if (hashinfo->port_rover < low) | ||
85 | rover = low; | ||
86 | else | ||
87 | rover = hashinfo->port_rover; | ||
88 | do { | ||
89 | rover++; | ||
90 | if (rover > high) | ||
91 | rover = low; | ||
92 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | ||
93 | spin_lock(&head->lock); | ||
94 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
95 | if (tb->port == rover) | ||
96 | goto next; | ||
97 | break; | ||
98 | next: | ||
99 | spin_unlock(&head->lock); | ||
100 | } while (--remaining > 0); | ||
101 | hashinfo->port_rover = rover; | ||
102 | spin_unlock(&hashinfo->portalloc_lock); | ||
103 | |||
104 | /* Exhausted local port range during search? It is not | ||
105 | * possible for us to be holding one of the bind hash | ||
106 | * locks if this test triggers, because if 'remaining' | ||
107 | * drops to zero, we broke out of the do/while loop at | ||
108 | * the top level, not from the 'break;' statement. | ||
109 | */ | ||
110 | ret = 1; | ||
111 | if (remaining <= 0) | ||
112 | goto fail; | ||
113 | |||
114 | /* OK, here is the one we will use. HEAD is | ||
115 | * non-NULL and we hold it's mutex. | ||
116 | */ | ||
117 | snum = rover; | ||
118 | } else { | ||
119 | head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; | ||
120 | spin_lock(&head->lock); | ||
121 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
122 | if (tb->port == snum) | ||
123 | goto tb_found; | ||
124 | } | ||
125 | tb = NULL; | ||
126 | goto tb_not_found; | ||
127 | tb_found: | ||
128 | if (!hlist_empty(&tb->owners)) { | ||
129 | if (sk->sk_reuse > 1) | ||
130 | goto success; | ||
131 | if (tb->fastreuse > 0 && | ||
132 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { | ||
133 | goto success; | ||
134 | } else { | ||
135 | ret = 1; | ||
136 | if (inet_csk_bind_conflict(sk, tb)) | ||
137 | goto fail_unlock; | ||
138 | } | ||
139 | } | ||
140 | tb_not_found: | ||
141 | ret = 1; | ||
142 | if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) | ||
143 | goto fail_unlock; | ||
144 | if (hlist_empty(&tb->owners)) { | ||
145 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | ||
146 | tb->fastreuse = 1; | ||
147 | else | ||
148 | tb->fastreuse = 0; | ||
149 | } else if (tb->fastreuse && | ||
150 | (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) | ||
151 | tb->fastreuse = 0; | ||
152 | success: | ||
153 | if (!inet_csk(sk)->icsk_bind_hash) | ||
154 | inet_bind_hash(sk, tb, snum); | ||
155 | BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); | ||
156 | ret = 0; | ||
157 | |||
158 | fail_unlock: | ||
159 | spin_unlock(&head->lock); | ||
160 | fail: | ||
161 | local_bh_enable(); | ||
162 | return ret; | ||
163 | } | ||
164 | |||
165 | EXPORT_SYMBOL_GPL(inet_csk_get_port); | ||
166 | |||
167 | /* | ||
168 | * Wait for an incoming connection, avoid race conditions. This must be called | ||
169 | * with the socket locked. | ||
170 | */ | ||
171 | static int inet_csk_wait_for_connect(struct sock *sk, long timeo) | ||
172 | { | ||
173 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
174 | DEFINE_WAIT(wait); | ||
175 | int err; | ||
176 | |||
177 | /* | ||
178 | * True wake-one mechanism for incoming connections: only | ||
179 | * one process gets woken up, not the 'whole herd'. | ||
180 | * Since we do not 'race & poll' for established sockets | ||
181 | * anymore, the common case will execute the loop only once. | ||
182 | * | ||
183 | * Subtle issue: "add_wait_queue_exclusive()" will be added | ||
184 | * after any current non-exclusive waiters, and we know that | ||
185 | * it will always _stay_ after any new non-exclusive waiters | ||
186 | * because all non-exclusive waiters are added at the | ||
187 | * beginning of the wait-queue. As such, it's ok to "drop" | ||
188 | * our exclusiveness temporarily when we get woken up without | ||
189 | * having to remove and re-insert us on the wait queue. | ||
190 | */ | ||
191 | for (;;) { | ||
192 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | ||
193 | TASK_INTERRUPTIBLE); | ||
194 | release_sock(sk); | ||
195 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) | ||
196 | timeo = schedule_timeout(timeo); | ||
197 | lock_sock(sk); | ||
198 | err = 0; | ||
199 | if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) | ||
200 | break; | ||
201 | err = -EINVAL; | ||
202 | if (sk->sk_state != TCP_LISTEN) | ||
203 | break; | ||
204 | err = sock_intr_errno(timeo); | ||
205 | if (signal_pending(current)) | ||
206 | break; | ||
207 | err = -EAGAIN; | ||
208 | if (!timeo) | ||
209 | break; | ||
210 | } | ||
211 | finish_wait(sk->sk_sleep, &wait); | ||
212 | return err; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * This will accept the next outstanding connection. | ||
217 | */ | ||
218 | struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | ||
219 | { | ||
220 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
221 | struct sock *newsk; | ||
222 | int error; | ||
223 | |||
224 | lock_sock(sk); | ||
225 | |||
226 | /* We need to make sure that this socket is listening, | ||
227 | * and that it has something pending. | ||
228 | */ | ||
229 | error = -EINVAL; | ||
230 | if (sk->sk_state != TCP_LISTEN) | ||
231 | goto out_err; | ||
232 | |||
233 | /* Find already established connection */ | ||
234 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { | ||
235 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | ||
236 | |||
237 | /* If this is a non blocking socket don't sleep */ | ||
238 | error = -EAGAIN; | ||
239 | if (!timeo) | ||
240 | goto out_err; | ||
241 | |||
242 | error = inet_csk_wait_for_connect(sk, timeo); | ||
243 | if (error) | ||
244 | goto out_err; | ||
245 | } | ||
246 | |||
247 | newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); | ||
248 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | ||
249 | out: | ||
250 | release_sock(sk); | ||
251 | return newsk; | ||
252 | out_err: | ||
253 | newsk = NULL; | ||
254 | *err = error; | ||
255 | goto out; | ||
256 | } | ||
257 | |||
258 | EXPORT_SYMBOL(inet_csk_accept); | ||
259 | |||
260 | /* | ||
261 | * Using different timers for retransmit, delayed acks and probes | ||
262 | * We may wish use just one timer maintaining a list of expire jiffies | ||
263 | * to optimize. | ||
264 | */ | ||
265 | void inet_csk_init_xmit_timers(struct sock *sk, | ||
266 | void (*retransmit_handler)(unsigned long), | ||
267 | void (*delack_handler)(unsigned long), | ||
268 | void (*keepalive_handler)(unsigned long)) | ||
269 | { | ||
270 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
271 | |||
272 | init_timer(&icsk->icsk_retransmit_timer); | ||
273 | init_timer(&icsk->icsk_delack_timer); | ||
274 | init_timer(&sk->sk_timer); | ||
275 | |||
276 | icsk->icsk_retransmit_timer.function = retransmit_handler; | ||
277 | icsk->icsk_delack_timer.function = delack_handler; | ||
278 | sk->sk_timer.function = keepalive_handler; | ||
279 | |||
280 | icsk->icsk_retransmit_timer.data = | ||
281 | icsk->icsk_delack_timer.data = | ||
282 | sk->sk_timer.data = (unsigned long)sk; | ||
283 | |||
284 | icsk->icsk_pending = icsk->icsk_ack.pending = 0; | ||
285 | } | ||
286 | |||
287 | EXPORT_SYMBOL(inet_csk_init_xmit_timers); | ||
288 | |||
289 | void inet_csk_clear_xmit_timers(struct sock *sk) | ||
290 | { | ||
291 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
292 | |||
293 | icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; | ||
294 | |||
295 | sk_stop_timer(sk, &icsk->icsk_retransmit_timer); | ||
296 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | ||
297 | sk_stop_timer(sk, &sk->sk_timer); | ||
298 | } | ||
299 | |||
300 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); | ||
301 | |||
302 | void inet_csk_delete_keepalive_timer(struct sock *sk) | ||
303 | { | ||
304 | sk_stop_timer(sk, &sk->sk_timer); | ||
305 | } | ||
306 | |||
307 | EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); | ||
308 | |||
309 | void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) | ||
310 | { | ||
311 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); | ||
312 | } | ||
313 | |||
314 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | ||
315 | |||
316 | struct dst_entry* inet_csk_route_req(struct sock *sk, | ||
317 | const struct request_sock *req) | ||
318 | { | ||
319 | struct rtable *rt; | ||
320 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
321 | struct ip_options *opt = inet_rsk(req)->opt; | ||
322 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | ||
323 | .nl_u = { .ip4_u = | ||
324 | { .daddr = ((opt && opt->srr) ? | ||
325 | opt->faddr : | ||
326 | ireq->rmt_addr), | ||
327 | .saddr = ireq->loc_addr, | ||
328 | .tos = RT_CONN_FLAGS(sk) } }, | ||
329 | .proto = sk->sk_protocol, | ||
330 | .uli_u = { .ports = | ||
331 | { .sport = inet_sk(sk)->sport, | ||
332 | .dport = ireq->rmt_port } } }; | ||
333 | |||
334 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
335 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
336 | return NULL; | ||
337 | } | ||
338 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { | ||
339 | ip_rt_put(rt); | ||
340 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
341 | return NULL; | ||
342 | } | ||
343 | return &rt->u.dst; | ||
344 | } | ||
345 | |||
346 | EXPORT_SYMBOL_GPL(inet_csk_route_req); | ||
347 | |||
348 | static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, | ||
349 | const u32 rnd, const u16 synq_hsize) | ||
350 | { | ||
351 | return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); | ||
352 | } | ||
353 | |||
354 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
355 | #define AF_INET_FAMILY(fam) ((fam) == AF_INET) | ||
356 | #else | ||
357 | #define AF_INET_FAMILY(fam) 1 | ||
358 | #endif | ||
359 | |||
360 | struct request_sock *inet_csk_search_req(const struct sock *sk, | ||
361 | struct request_sock ***prevp, | ||
362 | const __u16 rport, const __u32 raddr, | ||
363 | const __u32 laddr) | ||
364 | { | ||
365 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
366 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
367 | struct request_sock *req, **prev; | ||
368 | |||
369 | for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, | ||
370 | lopt->nr_table_entries)]; | ||
371 | (req = *prev) != NULL; | ||
372 | prev = &req->dl_next) { | ||
373 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
374 | |||
375 | if (ireq->rmt_port == rport && | ||
376 | ireq->rmt_addr == raddr && | ||
377 | ireq->loc_addr == laddr && | ||
378 | AF_INET_FAMILY(req->rsk_ops->family)) { | ||
379 | BUG_TRAP(!req->sk); | ||
380 | *prevp = prev; | ||
381 | break; | ||
382 | } | ||
383 | } | ||
384 | |||
385 | return req; | ||
386 | } | ||
387 | |||
388 | EXPORT_SYMBOL_GPL(inet_csk_search_req); | ||
389 | |||
390 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | ||
391 | const unsigned timeout) | ||
392 | { | ||
393 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
394 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
395 | const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, | ||
396 | lopt->hash_rnd, lopt->nr_table_entries); | ||
397 | |||
398 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); | ||
399 | inet_csk_reqsk_queue_added(sk, timeout); | ||
400 | } | ||
401 | |||
402 | /* Only thing we need from tcp.h */ | ||
403 | extern int sysctl_tcp_synack_retries; | ||
404 | |||
405 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | ||
406 | |||
407 | void inet_csk_reqsk_queue_prune(struct sock *parent, | ||
408 | const unsigned long interval, | ||
409 | const unsigned long timeout, | ||
410 | const unsigned long max_rto) | ||
411 | { | ||
412 | struct inet_connection_sock *icsk = inet_csk(parent); | ||
413 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | ||
414 | struct listen_sock *lopt = queue->listen_opt; | ||
415 | int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; | ||
416 | int thresh = max_retries; | ||
417 | unsigned long now = jiffies; | ||
418 | struct request_sock **reqp, *req; | ||
419 | int i, budget; | ||
420 | |||
421 | if (lopt == NULL || lopt->qlen == 0) | ||
422 | return; | ||
423 | |||
424 | /* Normally all the openreqs are young and become mature | ||
425 | * (i.e. converted to established socket) for first timeout. | ||
426 | * If synack was not acknowledged for 3 seconds, it means | ||
427 | * one of the following things: synack was lost, ack was lost, | ||
428 | * rtt is high or nobody planned to ack (i.e. synflood). | ||
429 | * When server is a bit loaded, queue is populated with old | ||
430 | * open requests, reducing effective size of queue. | ||
431 | * When server is well loaded, queue size reduces to zero | ||
432 | * after several minutes of work. It is not synflood, | ||
433 | * it is normal operation. The solution is pruning | ||
434 | * too old entries overriding normal timeout, when | ||
435 | * situation becomes dangerous. | ||
436 | * | ||
437 | * Essentially, we reserve half of room for young | ||
438 | * embrions; and abort old ones without pity, if old | ||
439 | * ones are about to clog our table. | ||
440 | */ | ||
441 | if (lopt->qlen>>(lopt->max_qlen_log-1)) { | ||
442 | int young = (lopt->qlen_young<<1); | ||
443 | |||
444 | while (thresh > 2) { | ||
445 | if (lopt->qlen < young) | ||
446 | break; | ||
447 | thresh--; | ||
448 | young <<= 1; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | if (queue->rskq_defer_accept) | ||
453 | max_retries = queue->rskq_defer_accept; | ||
454 | |||
455 | budget = 2 * (lopt->nr_table_entries / (timeout / interval)); | ||
456 | i = lopt->clock_hand; | ||
457 | |||
458 | do { | ||
459 | reqp=&lopt->syn_table[i]; | ||
460 | while ((req = *reqp) != NULL) { | ||
461 | if (time_after_eq(now, req->expires)) { | ||
462 | if ((req->retrans < thresh || | ||
463 | (inet_rsk(req)->acked && req->retrans < max_retries)) | ||
464 | && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { | ||
465 | unsigned long timeo; | ||
466 | |||
467 | if (req->retrans++ == 0) | ||
468 | lopt->qlen_young--; | ||
469 | timeo = min((timeout << req->retrans), max_rto); | ||
470 | req->expires = now + timeo; | ||
471 | reqp = &req->dl_next; | ||
472 | continue; | ||
473 | } | ||
474 | |||
475 | /* Drop this request */ | ||
476 | inet_csk_reqsk_queue_unlink(parent, req, reqp); | ||
477 | reqsk_queue_removed(queue, req); | ||
478 | reqsk_free(req); | ||
479 | continue; | ||
480 | } | ||
481 | reqp = &req->dl_next; | ||
482 | } | ||
483 | |||
484 | i = (i + 1) & (lopt->nr_table_entries - 1); | ||
485 | |||
486 | } while (--budget > 0); | ||
487 | |||
488 | lopt->clock_hand = i; | ||
489 | |||
490 | if (lopt->qlen) | ||
491 | inet_csk_reset_keepalive_timer(parent, interval); | ||
492 | } | ||
493 | |||
494 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); | ||
495 | |||
496 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | ||
497 | const unsigned int __nocast priority) | ||
498 | { | ||
499 | struct sock *newsk = sk_clone(sk, priority); | ||
500 | |||
501 | if (newsk != NULL) { | ||
502 | struct inet_connection_sock *newicsk = inet_csk(newsk); | ||
503 | |||
504 | newsk->sk_state = TCP_SYN_RECV; | ||
505 | newicsk->icsk_bind_hash = NULL; | ||
506 | |||
507 | inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; | ||
508 | newsk->sk_write_space = sk_stream_write_space; | ||
509 | |||
510 | newicsk->icsk_retransmits = 0; | ||
511 | newicsk->icsk_backoff = 0; | ||
512 | newicsk->icsk_probes_out = 0; | ||
513 | |||
514 | /* Deinitialize accept_queue to trap illegal accesses. */ | ||
515 | memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); | ||
516 | } | ||
517 | return newsk; | ||
518 | } | ||
519 | |||
520 | EXPORT_SYMBOL_GPL(inet_csk_clone); | ||
521 | |||
522 | /* | ||
523 | * At this point, there should be no process reference to this | ||
524 | * socket, and thus no user references at all. Therefore we | ||
525 | * can assume the socket waitqueue is inactive and nobody will | ||
526 | * try to jump onto it. | ||
527 | */ | ||
528 | void inet_csk_destroy_sock(struct sock *sk) | ||
529 | { | ||
530 | BUG_TRAP(sk->sk_state == TCP_CLOSE); | ||
531 | BUG_TRAP(sock_flag(sk, SOCK_DEAD)); | ||
532 | |||
533 | /* It cannot be in hash table! */ | ||
534 | BUG_TRAP(sk_unhashed(sk)); | ||
535 | |||
536 | /* If it has not 0 inet_sk(sk)->num, it must be bound */ | ||
537 | BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); | ||
538 | |||
539 | sk->sk_prot->destroy(sk); | ||
540 | |||
541 | sk_stream_kill_queues(sk); | ||
542 | |||
543 | xfrm_sk_free_policy(sk); | ||
544 | |||
545 | sk_refcnt_debug_release(sk); | ||
546 | |||
547 | atomic_dec(sk->sk_prot->orphan_count); | ||
548 | sock_put(sk); | ||
549 | } | ||
550 | |||
551 | EXPORT_SYMBOL(inet_csk_destroy_sock); | ||
552 | |||
553 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | ||
554 | { | ||
555 | struct inet_sock *inet = inet_sk(sk); | ||
556 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
557 | int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); | ||
558 | |||
559 | if (rc != 0) | ||
560 | return rc; | ||
561 | |||
562 | sk->sk_max_ack_backlog = 0; | ||
563 | sk->sk_ack_backlog = 0; | ||
564 | inet_csk_delack_init(sk); | ||
565 | |||
566 | /* There is race window here: we announce ourselves listening, | ||
567 | * but this transition is still not validated by get_port(). | ||
568 | * It is OK, because this socket enters to hash table only | ||
569 | * after validation is complete. | ||
570 | */ | ||
571 | sk->sk_state = TCP_LISTEN; | ||
572 | if (!sk->sk_prot->get_port(sk, inet->num)) { | ||
573 | inet->sport = htons(inet->num); | ||
574 | |||
575 | sk_dst_reset(sk); | ||
576 | sk->sk_prot->hash(sk); | ||
577 | |||
578 | return 0; | ||
579 | } | ||
580 | |||
581 | sk->sk_state = TCP_CLOSE; | ||
582 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
583 | return -EADDRINUSE; | ||
584 | } | ||
585 | |||
586 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); | ||
587 | |||
588 | /* | ||
589 | * This routine closes sockets which have been at least partially | ||
590 | * opened, but not yet accepted. | ||
591 | */ | ||
592 | void inet_csk_listen_stop(struct sock *sk) | ||
593 | { | ||
594 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
595 | struct request_sock *acc_req; | ||
596 | struct request_sock *req; | ||
597 | |||
598 | inet_csk_delete_keepalive_timer(sk); | ||
599 | |||
600 | /* make all the listen_opt local to us */ | ||
601 | acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); | ||
602 | |||
603 | /* Following specs, it would be better either to send FIN | ||
604 | * (and enter FIN-WAIT-1, it is normal close) | ||
605 | * or to send active reset (abort). | ||
606 | * Certainly, it is pretty dangerous while synflood, but it is | ||
607 | * bad justification for our negligence 8) | ||
608 | * To be honest, we are not able to make either | ||
609 | * of the variants now. --ANK | ||
610 | */ | ||
611 | reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
612 | |||
613 | while ((req = acc_req) != NULL) { | ||
614 | struct sock *child = req->sk; | ||
615 | |||
616 | acc_req = req->dl_next; | ||
617 | |||
618 | local_bh_disable(); | ||
619 | bh_lock_sock(child); | ||
620 | BUG_TRAP(!sock_owned_by_user(child)); | ||
621 | sock_hold(child); | ||
622 | |||
623 | sk->sk_prot->disconnect(child, O_NONBLOCK); | ||
624 | |||
625 | sock_orphan(child); | ||
626 | |||
627 | atomic_inc(sk->sk_prot->orphan_count); | ||
628 | |||
629 | inet_csk_destroy_sock(child); | ||
630 | |||
631 | bh_unlock_sock(child); | ||
632 | local_bh_enable(); | ||
633 | sock_put(child); | ||
634 | |||
635 | sk_acceptq_removed(sk); | ||
636 | __reqsk_free(req); | ||
637 | } | ||
638 | BUG_TRAP(!sk->sk_ack_backlog); | ||
639 | } | ||
640 | |||
641 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | ||