aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_connection_sock.h7
-rw-r--r--include/net/request_sock.h4
-rw-r--r--include/net/tcp.h6
-rw-r--r--net/dccp/timer.c6
-rw-r--r--net/ipv4/inet_connection_sock.c214
-rw-r--r--net/ipv4/tcp.c120
-rw-r--r--net/ipv4/tcp_timer.c93
7 files changed, 224 insertions, 226 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 692825fc8135..bec19d5cff26 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -239,6 +239,13 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
239 reqsk_free(req); 239 reqsk_free(req);
240} 240}
241 241
242extern void inet_csk_reqsk_queue_prune(struct sock *parent,
243 const unsigned long interval,
244 const unsigned long timeout,
245 const unsigned long max_rto);
246
247extern void inet_csk_destroy_sock(struct sock *sk);
248extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries);
242extern void inet_csk_listen_stop(struct sock *sk); 249extern void inet_csk_listen_stop(struct sock *sk);
243 250
244#endif /* _INET_CONNECTION_SOCK_H */ 251#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 447d287a38fd..b52cc52ffe39 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -258,8 +258,4 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
258 write_unlock(&queue->syn_wait_lock); 258 write_unlock(&queue->syn_wait_lock);
259} 259}
260 260
261extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
262 const unsigned long interval, const unsigned long timeout,
263 const unsigned long max_rto, int max_retries);
264
265#endif /* _REQUEST_SOCK_H */ 261#endif /* _REQUEST_SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2423f059b62b..077db859ae0d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -423,9 +423,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
423 size_t len, int nonblock, 423 size_t len, int nonblock,
424 int flags, int *addr_len); 424 int flags, int *addr_len);
425 425
426extern int inet_csk_listen_start(struct sock *sk,
427 const int nr_table_entries);
428
429extern void tcp_parse_options(struct sk_buff *skb, 426extern void tcp_parse_options(struct sk_buff *skb,
430 struct tcp_options_received *opt_rx, 427 struct tcp_options_received *opt_rx,
431 int estab); 428 int estab);
@@ -861,9 +858,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
861 tp->snd_wl1 = seq; 858 tp->snd_wl1 = seq;
862} 859}
863 860
864extern void inet_csk_destroy_sock(struct sock *sk);
865
866
867/* 861/*
868 * Calculate(/check) TCP checksum 862 * Calculate(/check) TCP checksum
869 */ 863 */
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 8c396ee01aac..9f1f1ab9e2b4 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -220,11 +220,7 @@ out:
220 */ 220 */
221static void dccp_response_timer(struct sock *sk) 221static void dccp_response_timer(struct sock *sk)
222{ 222{
223 struct inet_connection_sock *icsk = inet_csk(sk); 223 inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
224 const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */;
225
226 reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
227 DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries);
228} 224}
229 225
230static void dccp_keepalive_timer(unsigned long data) 226static void dccp_keepalive_timer(unsigned long data)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 136ada050b63..026630a15ea0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -23,6 +23,7 @@
23#include <net/ip.h> 23#include <net/ip.h>
24#include <net/route.h> 24#include <net/route.h>
25#include <net/tcp_states.h> 25#include <net/tcp_states.h>
26#include <net/xfrm.h>
26 27
27#ifdef INET_CSK_DEBUG 28#ifdef INET_CSK_DEBUG
28const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 29const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -398,8 +399,100 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
398 inet_csk_reqsk_queue_added(sk, timeout); 399 inet_csk_reqsk_queue_added(sk, timeout);
399} 400}
400 401
402/* Only thing we need from tcp.h */
403extern int sysctl_tcp_synack_retries;
404
401EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 405EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
402 406
407void inet_csk_reqsk_queue_prune(struct sock *parent,
408 const unsigned long interval,
409 const unsigned long timeout,
410 const unsigned long max_rto)
411{
412 struct inet_connection_sock *icsk = inet_csk(parent);
413 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
414 struct listen_sock *lopt = queue->listen_opt;
415 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
416 int thresh = max_retries;
417 unsigned long now = jiffies;
418 struct request_sock **reqp, *req;
419 int i, budget;
420
421 if (lopt == NULL || lopt->qlen == 0)
422 return;
423
424 /* Normally all the openreqs are young and become mature
425 * (i.e. converted to established socket) for first timeout.
426 * If synack was not acknowledged for 3 seconds, it means
427 * one of the following things: synack was lost, ack was lost,
428 * rtt is high or nobody planned to ack (i.e. synflood).
429 * When server is a bit loaded, queue is populated with old
430 * open requests, reducing effective size of queue.
431 * When server is well loaded, queue size reduces to zero
432 * after several minutes of work. It is not synflood,
433 * it is normal operation. The solution is pruning
434 * too old entries overriding normal timeout, when
435 * situation becomes dangerous.
436 *
437 * Essentially, we reserve half of room for young
438 * embrions; and abort old ones without pity, if old
439 * ones are about to clog our table.
440 */
441 if (lopt->qlen>>(lopt->max_qlen_log-1)) {
442 int young = (lopt->qlen_young<<1);
443
444 while (thresh > 2) {
445 if (lopt->qlen < young)
446 break;
447 thresh--;
448 young <<= 1;
449 }
450 }
451
452 if (queue->rskq_defer_accept)
453 max_retries = queue->rskq_defer_accept;
454
455 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
456 i = lopt->clock_hand;
457
458 do {
459 reqp=&lopt->syn_table[i];
460 while ((req = *reqp) != NULL) {
461 if (time_after_eq(now, req->expires)) {
462 if ((req->retrans < thresh ||
463 (inet_rsk(req)->acked && req->retrans < max_retries))
464 && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
465 unsigned long timeo;
466
467 if (req->retrans++ == 0)
468 lopt->qlen_young--;
469 timeo = min((timeout << req->retrans), max_rto);
470 req->expires = now + timeo;
471 reqp = &req->dl_next;
472 continue;
473 }
474
475 /* Drop this request */
476 inet_csk_reqsk_queue_unlink(parent, req, reqp);
477 reqsk_queue_removed(queue, req);
478 reqsk_free(req);
479 continue;
480 }
481 reqp = &req->dl_next;
482 }
483
484 i = (i + 1) & (lopt->nr_table_entries - 1);
485
486 } while (--budget > 0);
487
488 lopt->clock_hand = i;
489
490 if (lopt->qlen)
491 inet_csk_reset_keepalive_timer(parent, interval);
492}
493
494EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
495
403struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, 496struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
404 const unsigned int __nocast priority) 497 const unsigned int __nocast priority)
405{ 498{
@@ -424,3 +517,124 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
424} 517}
425 518
426EXPORT_SYMBOL_GPL(inet_csk_clone); 519EXPORT_SYMBOL_GPL(inet_csk_clone);
520
521/*
522 * At this point, there should be no process reference to this
523 * socket, and thus no user references at all. Therefore we
524 * can assume the socket waitqueue is inactive and nobody will
525 * try to jump onto it.
526 */
527void inet_csk_destroy_sock(struct sock *sk)
528{
529 BUG_TRAP(sk->sk_state == TCP_CLOSE);
530 BUG_TRAP(sock_flag(sk, SOCK_DEAD));
531
532 /* It cannot be in hash table! */
533 BUG_TRAP(sk_unhashed(sk));
534
535 /* If it has not 0 inet_sk(sk)->num, it must be bound */
536 BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
537
538 sk->sk_prot->destroy(sk);
539
540 sk_stream_kill_queues(sk);
541
542 xfrm_sk_free_policy(sk);
543
544 sk_refcnt_debug_release(sk);
545
546 atomic_dec(sk->sk_prot->orphan_count);
547 sock_put(sk);
548}
549
550EXPORT_SYMBOL(inet_csk_destroy_sock);
551
552int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
553{
554 struct inet_sock *inet = inet_sk(sk);
555 struct inet_connection_sock *icsk = inet_csk(sk);
556 int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
557
558 if (rc != 0)
559 return rc;
560
561 sk->sk_max_ack_backlog = 0;
562 sk->sk_ack_backlog = 0;
563 inet_csk_delack_init(sk);
564
565 /* There is race window here: we announce ourselves listening,
566 * but this transition is still not validated by get_port().
567 * It is OK, because this socket enters to hash table only
568 * after validation is complete.
569 */
570 sk->sk_state = TCP_LISTEN;
571 if (!sk->sk_prot->get_port(sk, inet->num)) {
572 inet->sport = htons(inet->num);
573
574 sk_dst_reset(sk);
575 sk->sk_prot->hash(sk);
576
577 return 0;
578 }
579
580 sk->sk_state = TCP_CLOSE;
581 __reqsk_queue_destroy(&icsk->icsk_accept_queue);
582 return -EADDRINUSE;
583}
584
585EXPORT_SYMBOL_GPL(inet_csk_listen_start);
586
587/*
588 * This routine closes sockets which have been at least partially
589 * opened, but not yet accepted.
590 */
591void inet_csk_listen_stop(struct sock *sk)
592{
593 struct inet_connection_sock *icsk = inet_csk(sk);
594 struct request_sock *acc_req;
595 struct request_sock *req;
596
597 inet_csk_delete_keepalive_timer(sk);
598
599 /* make all the listen_opt local to us */
600 acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
601
602 /* Following specs, it would be better either to send FIN
603 * (and enter FIN-WAIT-1, it is normal close)
604 * or to send active reset (abort).
605 * Certainly, it is pretty dangerous while synflood, but it is
606 * bad justification for our negligence 8)
607 * To be honest, we are not able to make either
608 * of the variants now. --ANK
609 */
610 reqsk_queue_destroy(&icsk->icsk_accept_queue);
611
612 while ((req = acc_req) != NULL) {
613 struct sock *child = req->sk;
614
615 acc_req = req->dl_next;
616
617 local_bh_disable();
618 bh_lock_sock(child);
619 BUG_TRAP(!sock_owned_by_user(child));
620 sock_hold(child);
621
622 sk->sk_prot->disconnect(child, O_NONBLOCK);
623
624 sock_orphan(child);
625
626 atomic_inc(sk->sk_prot->orphan_count);
627
628 inet_csk_destroy_sock(child);
629
630 bh_unlock_sock(child);
631 local_bh_enable();
632 sock_put(child);
633
634 sk_acceptq_removed(sk);
635 __reqsk_free(req);
636 }
637 BUG_TRAP(!sk->sk_ack_backlog);
638}
639
640EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a4e9eec44895..4bda522d25cf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -456,96 +456,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
456 return put_user(answ, (int __user *)arg); 456 return put_user(answ, (int __user *)arg);
457} 457}
458 458
459int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
460{
461 struct inet_sock *inet = inet_sk(sk);
462 struct inet_connection_sock *icsk = inet_csk(sk);
463 int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
464
465 if (rc != 0)
466 return rc;
467
468 sk->sk_max_ack_backlog = 0;
469 sk->sk_ack_backlog = 0;
470 inet_csk_delack_init(sk);
471
472 /* There is race window here: we announce ourselves listening,
473 * but this transition is still not validated by get_port().
474 * It is OK, because this socket enters to hash table only
475 * after validation is complete.
476 */
477 sk->sk_state = TCP_LISTEN;
478 if (!sk->sk_prot->get_port(sk, inet->num)) {
479 inet->sport = htons(inet->num);
480
481 sk_dst_reset(sk);
482 sk->sk_prot->hash(sk);
483
484 return 0;
485 }
486
487 sk->sk_state = TCP_CLOSE;
488 __reqsk_queue_destroy(&icsk->icsk_accept_queue);
489 return -EADDRINUSE;
490}
491
492EXPORT_SYMBOL_GPL(inet_csk_listen_start);
493
494/*
495 * This routine closes sockets which have been at least partially
496 * opened, but not yet accepted.
497 */
498void inet_csk_listen_stop(struct sock *sk)
499{
500 struct inet_connection_sock *icsk = inet_csk(sk);
501 struct request_sock *acc_req;
502 struct request_sock *req;
503
504 inet_csk_delete_keepalive_timer(sk);
505
506 /* make all the listen_opt local to us */
507 acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
508
509 /* Following specs, it would be better either to send FIN
510 * (and enter FIN-WAIT-1, it is normal close)
511 * or to send active reset (abort).
512 * Certainly, it is pretty dangerous while synflood, but it is
513 * bad justification for our negligence 8)
514 * To be honest, we are not able to make either
515 * of the variants now. --ANK
516 */
517 reqsk_queue_destroy(&icsk->icsk_accept_queue);
518
519 while ((req = acc_req) != NULL) {
520 struct sock *child = req->sk;
521
522 acc_req = req->dl_next;
523
524 local_bh_disable();
525 bh_lock_sock(child);
526 BUG_TRAP(!sock_owned_by_user(child));
527 sock_hold(child);
528
529 sk->sk_prot->disconnect(child, O_NONBLOCK);
530
531 sock_orphan(child);
532
533 atomic_inc(sk->sk_prot->orphan_count);
534
535 inet_csk_destroy_sock(child);
536
537 bh_unlock_sock(child);
538 local_bh_enable();
539 sock_put(child);
540
541 sk_acceptq_removed(sk);
542 __reqsk_free(req);
543 }
544 BUG_TRAP(!sk->sk_ack_backlog);
545}
546
547EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
548
549static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) 459static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
550{ 460{
551 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 461 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
@@ -1559,35 +1469,6 @@ void tcp_shutdown(struct sock *sk, int how)
1559 } 1469 }
1560} 1470}
1561 1471
1562/*
1563 * At this point, there should be no process reference to this
1564 * socket, and thus no user references at all. Therefore we
1565 * can assume the socket waitqueue is inactive and nobody will
1566 * try to jump onto it.
1567 */
1568void inet_csk_destroy_sock(struct sock *sk)
1569{
1570 BUG_TRAP(sk->sk_state == TCP_CLOSE);
1571 BUG_TRAP(sock_flag(sk, SOCK_DEAD));
1572
1573 /* It cannot be in hash table! */
1574 BUG_TRAP(sk_unhashed(sk));
1575
1576 /* If it has not 0 inet_sk(sk)->num, it must be bound */
1577 BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
1578
1579 sk->sk_prot->destroy(sk);
1580
1581 sk_stream_kill_queues(sk);
1582
1583 xfrm_sk_free_policy(sk);
1584
1585 sk_refcnt_debug_release(sk);
1586
1587 atomic_dec(sk->sk_prot->orphan_count);
1588 sock_put(sk);
1589}
1590
1591void tcp_close(struct sock *sk, long timeout) 1472void tcp_close(struct sock *sk, long timeout)
1592{ 1473{
1593 struct sk_buff *skb; 1474 struct sk_buff *skb;
@@ -2258,7 +2139,6 @@ void __init tcp_init(void)
2258} 2139}
2259 2140
2260EXPORT_SYMBOL(tcp_close); 2141EXPORT_SYMBOL(tcp_close);
2261EXPORT_SYMBOL(inet_csk_destroy_sock);
2262EXPORT_SYMBOL(tcp_disconnect); 2142EXPORT_SYMBOL(tcp_disconnect);
2263EXPORT_SYMBOL(tcp_getsockopt); 2143EXPORT_SYMBOL(tcp_getsockopt);
2264EXPORT_SYMBOL(tcp_ioctl); 2144EXPORT_SYMBOL(tcp_ioctl);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b614ad4d30c9..72cec6981830 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -424,103 +424,14 @@ out_unlock:
424 sock_put(sk); 424 sock_put(sk);
425} 425}
426 426
427void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
428 const unsigned long interval, const unsigned long timeout,
429 const unsigned long max_rto, int max_retries)
430{
431 struct inet_connection_sock *icsk = inet_csk(parent);
432 struct listen_sock *lopt = queue->listen_opt;
433 int thresh = max_retries;
434 unsigned long now = jiffies;
435 struct request_sock **reqp, *req;
436 int i, budget;
437
438 if (lopt == NULL || lopt->qlen == 0)
439 return;
440
441 /* Normally all the openreqs are young and become mature
442 * (i.e. converted to established socket) for first timeout.
443 * If synack was not acknowledged for 3 seconds, it means
444 * one of the following things: synack was lost, ack was lost,
445 * rtt is high or nobody planned to ack (i.e. synflood).
446 * When server is a bit loaded, queue is populated with old
447 * open requests, reducing effective size of queue.
448 * When server is well loaded, queue size reduces to zero
449 * after several minutes of work. It is not synflood,
450 * it is normal operation. The solution is pruning
451 * too old entries overriding normal timeout, when
452 * situation becomes dangerous.
453 *
454 * Essentially, we reserve half of room for young
455 * embrions; and abort old ones without pity, if old
456 * ones are about to clog our table.
457 */
458 if (lopt->qlen>>(lopt->max_qlen_log-1)) {
459 int young = (lopt->qlen_young<<1);
460
461 while (thresh > 2) {
462 if (lopt->qlen < young)
463 break;
464 thresh--;
465 young <<= 1;
466 }
467 }
468
469 if (queue->rskq_defer_accept)
470 max_retries = queue->rskq_defer_accept;
471
472 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
473 i = lopt->clock_hand;
474
475 do {
476 reqp=&lopt->syn_table[i];
477 while ((req = *reqp) != NULL) {
478 if (time_after_eq(now, req->expires)) {
479 if ((req->retrans < thresh ||
480 (inet_rsk(req)->acked && req->retrans < max_retries))
481 && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
482 unsigned long timeo;
483
484 if (req->retrans++ == 0)
485 lopt->qlen_young--;
486 timeo = min((timeout << req->retrans), max_rto);
487 req->expires = now + timeo;
488 reqp = &req->dl_next;
489 continue;
490 }
491
492 /* Drop this request */
493 inet_csk_reqsk_queue_unlink(parent, req, reqp);
494 reqsk_queue_removed(&icsk->icsk_accept_queue, req);
495 reqsk_free(req);
496 continue;
497 }
498 reqp = &req->dl_next;
499 }
500
501 i = (i + 1) & (lopt->nr_table_entries - 1);
502
503 } while (--budget > 0);
504
505 lopt->clock_hand = i;
506
507 if (lopt->qlen)
508 inet_csk_reset_keepalive_timer(parent, interval);
509}
510
511EXPORT_SYMBOL_GPL(reqsk_queue_prune);
512
513/* 427/*
514 * Timer for listening sockets 428 * Timer for listening sockets
515 */ 429 */
516 430
517static void tcp_synack_timer(struct sock *sk) 431static void tcp_synack_timer(struct sock *sk)
518{ 432{
519 struct inet_connection_sock *icsk = inet_csk(sk); 433 inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
520 const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 434 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
521
522 reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
523 TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries);
524} 435}
525 436
526void tcp_set_keepalive(struct sock *sk, int val) 437void tcp_set_keepalive(struct sock *sk, int val)