aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/tcp.h2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/request_sock.h7
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/core/request_sock.c1
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_timer.c46
10 files changed, 67 insertions, 37 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 800930fac388..620096840744 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -270,7 +270,7 @@ struct tcp_sock {
270 __u8 frto_counter; /* Number of new acks after RTO */ 270 __u8 frto_counter; /* Number of new acks after RTO */
271 271
272 __u8 nonagle; /* Disable Nagle algorithm? */ 272 __u8 nonagle; /* Disable Nagle algorithm? */
273 __u8 defer_accept; /* User waits for some data after accept() */ 273 /* ONE BYTE HOLE, TRY TO PACK */
274 274
275/* RTT measurement */ 275/* RTT measurement */
276 __u32 srtt; /* smoothed round trip time << 3 */ 276 __u32 srtt; /* smoothed round trip time << 3 */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index a50f4a4b7b4b..692825fc8135 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -239,4 +239,6 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
239 reqsk_free(req); 239 reqsk_free(req);
240} 240}
241 241
242extern void inet_csk_listen_stop(struct sock *sk);
243
242#endif /* _INET_CONNECTION_SOCK_H */ 244#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b7c7eecbe64d..447d287a38fd 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -97,6 +97,7 @@ struct listen_sock {
97 * 97 *
98 * @rskq_accept_head - FIFO head of established children 98 * @rskq_accept_head - FIFO head of established children
99 * @rskq_accept_tail - FIFO tail of established children 99 * @rskq_accept_tail - FIFO tail of established children
100 * @rskq_defer_accept - User waits for some data after accept()
100 * @syn_wait_lock - serializer 101 * @syn_wait_lock - serializer
101 * 102 *
102 * %syn_wait_lock is necessary only to avoid proc interface having to grab the main 103 * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
@@ -112,6 +113,8 @@ struct request_sock_queue {
112 struct request_sock *rskq_accept_head; 113 struct request_sock *rskq_accept_head;
113 struct request_sock *rskq_accept_tail; 114 struct request_sock *rskq_accept_tail;
114 rwlock_t syn_wait_lock; 115 rwlock_t syn_wait_lock;
116 u8 rskq_defer_accept;
117 /* 3 bytes hole, try to pack */
115 struct listen_sock *listen_opt; 118 struct listen_sock *listen_opt;
116}; 119};
117 120
@@ -255,4 +258,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
255 write_unlock(&queue->syn_wait_lock); 258 write_unlock(&queue->syn_wait_lock);
256} 259}
257 260
261extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
262 const unsigned long interval, const unsigned long timeout,
263 const unsigned long max_rto, int max_retries);
264
258#endif /* _REQUEST_SOCK_H */ 265#endif /* _REQUEST_SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 68f1ec1c583a..2423f059b62b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -423,7 +423,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
423 size_t len, int nonblock, 423 size_t len, int nonblock,
424 int flags, int *addr_len); 424 int flags, int *addr_len);
425 425
426extern int tcp_listen_start(struct sock *sk); 426extern int inet_csk_listen_start(struct sock *sk,
427 const int nr_table_entries);
427 428
428extern void tcp_parse_options(struct sk_buff *skb, 429extern void tcp_parse_options(struct sk_buff *skb,
429 struct tcp_options_received *opt_rx, 430 struct tcp_options_received *opt_rx,
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 98f0fc923f91..b8203de5ff07 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -52,6 +52,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
52 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); 52 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
53 rwlock_init(&queue->syn_wait_lock); 53 rwlock_init(&queue->syn_wait_lock);
54 queue->rskq_accept_head = queue->rskq_accept_head = NULL; 54 queue->rskq_accept_head = queue->rskq_accept_head = NULL;
55 queue->rskq_defer_accept = 0;
55 lopt->nr_table_entries = nr_table_entries; 56 lopt->nr_table_entries = nr_table_entries;
56 57
57 write_lock_bh(&queue->syn_wait_lock); 58 write_lock_bh(&queue->syn_wait_lock);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f691058cf599..52f5ecc58c46 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -99,6 +99,7 @@
99#include <net/arp.h> 99#include <net/arp.h>
100#include <net/route.h> 100#include <net/route.h>
101#include <net/ip_fib.h> 101#include <net/ip_fib.h>
102#include <net/inet_connection_sock.h>
102#include <net/tcp.h> 103#include <net/tcp.h>
103#include <net/udp.h> 104#include <net/udp.h>
104#include <linux/skbuff.h> 105#include <linux/skbuff.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a1f812159ced..a4e9eec44895 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -495,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
495 * This routine closes sockets which have been at least partially 495 * This routine closes sockets which have been at least partially
496 * opened, but not yet accepted. 496 * opened, but not yet accepted.
497 */ 497 */
498static void inet_csk_listen_stop(struct sock *sk) 498void inet_csk_listen_stop(struct sock *sk)
499{ 499{
500 struct inet_connection_sock *icsk = inet_csk(sk); 500 struct inet_connection_sock *icsk = inet_csk(sk);
501 struct request_sock *acc_req; 501 struct request_sock *acc_req;
@@ -1947,15 +1947,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
1947 break; 1947 break;
1948 1948
1949 case TCP_DEFER_ACCEPT: 1949 case TCP_DEFER_ACCEPT:
1950 tp->defer_accept = 0; 1950 icsk->icsk_accept_queue.rskq_defer_accept = 0;
1951 if (val > 0) { 1951 if (val > 0) {
1952 /* Translate value in seconds to number of 1952 /* Translate value in seconds to number of
1953 * retransmits */ 1953 * retransmits */
1954 while (tp->defer_accept < 32 && 1954 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
1955 val > ((TCP_TIMEOUT_INIT / HZ) << 1955 val > ((TCP_TIMEOUT_INIT / HZ) <<
1956 tp->defer_accept)) 1956 icsk->icsk_accept_queue.rskq_defer_accept))
1957 tp->defer_accept++; 1957 icsk->icsk_accept_queue.rskq_defer_accept++;
1958 tp->defer_accept++; 1958 icsk->icsk_accept_queue.rskq_defer_accept++;
1959 } 1959 }
1960 break; 1960 break;
1961 1961
@@ -2058,6 +2058,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info);
2058int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, 2058int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2059 int __user *optlen) 2059 int __user *optlen)
2060{ 2060{
2061 struct inet_connection_sock *icsk = inet_csk(sk);
2061 struct tcp_sock *tp = tcp_sk(sk); 2062 struct tcp_sock *tp = tcp_sk(sk);
2062 int val, len; 2063 int val, len;
2063 2064
@@ -2095,7 +2096,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2095 val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; 2096 val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
2096 break; 2097 break;
2097 case TCP_SYNCNT: 2098 case TCP_SYNCNT:
2098 val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries; 2099 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
2099 break; 2100 break;
2100 case TCP_LINGER2: 2101 case TCP_LINGER2:
2101 val = tp->linger2; 2102 val = tp->linger2;
@@ -2103,8 +2104,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2103 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2104 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2104 break; 2105 break;
2105 case TCP_DEFER_ACCEPT: 2106 case TCP_DEFER_ACCEPT:
2106 val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) << 2107 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
2107 (tp->defer_accept - 1)); 2108 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2108 break; 2109 break;
2109 case TCP_WINDOW_CLAMP: 2110 case TCP_WINDOW_CLAMP:
2110 val = tp->window_clamp; 2111 val = tp->window_clamp;
@@ -2125,7 +2126,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2125 return 0; 2126 return 0;
2126 } 2127 }
2127 case TCP_QUICKACK: 2128 case TCP_QUICKACK:
2128 val = !inet_csk(sk)->icsk_ack.pingpong; 2129 val = !icsk->icsk_ack.pingpong;
2129 break; 2130 break;
2130 2131
2131 case TCP_CONGESTION: 2132 case TCP_CONGESTION:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b35badf53aa5..71d456148de7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3831,6 +3831,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
3831 tcp_parse_options(skb, &tp->rx_opt, 0); 3831 tcp_parse_options(skb, &tp->rx_opt, 0);
3832 3832
3833 if (th->ack) { 3833 if (th->ack) {
3834 struct inet_connection_sock *icsk;
3834 /* rfc793: 3835 /* rfc793:
3835 * "If the state is SYN-SENT then 3836 * "If the state is SYN-SENT then
3836 * first check the ACK bit 3837 * first check the ACK bit
@@ -3956,7 +3957,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
3956 sk_wake_async(sk, 0, POLL_OUT); 3957 sk_wake_async(sk, 0, POLL_OUT);
3957 } 3958 }
3958 3959
3959 if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) { 3960 icsk = inet_csk(sk);
3961
3962 if (sk->sk_write_pending ||
3963 icsk->icsk_accept_queue.rskq_defer_accept ||
3964 icsk->icsk_ack.pingpong) {
3960 /* Save one ACK. Data will be ready after 3965 /* Save one ACK. Data will be ready after
3961 * several ticks, if write_pending is set. 3966 * several ticks, if write_pending is set.
3962 * 3967 *
@@ -3965,8 +3970,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
3965 * to stand against the temptation 8) --ANK 3970 * to stand against the temptation 8) --ANK
3966 */ 3971 */
3967 inet_csk_schedule_ack(sk); 3972 inet_csk_schedule_ack(sk);
3968 inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp; 3973 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
3969 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 3974 icsk->icsk_ack.ato = TCP_ATO_MIN;
3970 tcp_incr_quickack(sk); 3975 tcp_incr_quickack(sk);
3971 tcp_enter_quickack_mode(sk); 3976 tcp_enter_quickack_mode(sk);
3972 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 3977 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4cfbe1d1c920..2d95afe5b393 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -787,9 +787,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
787 does sequence test, SYN is truncated, and thus we consider 787 does sequence test, SYN is truncated, and thus we consider
788 it a bare ACK. 788 it a bare ACK.
789 789
790 If tp->defer_accept, we silently drop this bare ACK. Otherwise, 790 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
791 we create an established connection. Both ends (listening sockets) 791 bare ACK. Otherwise, we create an established connection. Both
792 accept the new incoming connection and try to talk to each other. 8-) 792 ends (listening sockets) accept the new incoming connection and try
793 to talk to each other. 8-)
793 794
794 Note: This case is both harmless, and rare. Possibility is about the 795 Note: This case is both harmless, and rare. Possibility is about the
795 same as us discovering intelligent life on another plant tomorrow. 796 same as us discovering intelligent life on another plant tomorrow.
@@ -856,7 +857,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
856 return NULL; 857 return NULL;
857 858
858 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ 859 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
859 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 860 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
861 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
860 inet_rsk(req)->acked = 1; 862 inet_rsk(req)->acked = 1;
861 return NULL; 863 return NULL;
862 } 864 }
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c03930c48f42..b614ad4d30c9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -424,16 +424,12 @@ out_unlock:
424 sock_put(sk); 424 sock_put(sk);
425} 425}
426 426
427/* 427void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
428 * Timer for listening sockets 428 const unsigned long interval, const unsigned long timeout,
429 */ 429 const unsigned long max_rto, int max_retries)
430
431static void tcp_synack_timer(struct sock *sk)
432{ 430{
433 struct tcp_sock *tp = tcp_sk(sk); 431 struct inet_connection_sock *icsk = inet_csk(parent);
434 struct inet_connection_sock *icsk = inet_csk(sk); 432 struct listen_sock *lopt = queue->listen_opt;
435 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
436 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
437 int thresh = max_retries; 433 int thresh = max_retries;
438 unsigned long now = jiffies; 434 unsigned long now = jiffies;
439 struct request_sock **reqp, *req; 435 struct request_sock **reqp, *req;
@@ -470,10 +466,10 @@ static void tcp_synack_timer(struct sock *sk)
470 } 466 }
471 } 467 }
472 468
473 if (tp->defer_accept) 469 if (queue->rskq_defer_accept)
474 max_retries = tp->defer_accept; 470 max_retries = queue->rskq_defer_accept;
475 471
476 budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL)); 472 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
477 i = lopt->clock_hand; 473 i = lopt->clock_hand;
478 474
479 do { 475 do {
@@ -482,20 +478,19 @@ static void tcp_synack_timer(struct sock *sk)
482 if (time_after_eq(now, req->expires)) { 478 if (time_after_eq(now, req->expires)) {
483 if ((req->retrans < thresh || 479 if ((req->retrans < thresh ||
484 (inet_rsk(req)->acked && req->retrans < max_retries)) 480 (inet_rsk(req)->acked && req->retrans < max_retries))
485 && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) { 481 && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
486 unsigned long timeo; 482 unsigned long timeo;
487 483
488 if (req->retrans++ == 0) 484 if (req->retrans++ == 0)
489 lopt->qlen_young--; 485 lopt->qlen_young--;
490 timeo = min((TCP_TIMEOUT_INIT << req->retrans), 486 timeo = min((timeout << req->retrans), max_rto);
491 TCP_RTO_MAX);
492 req->expires = now + timeo; 487 req->expires = now + timeo;
493 reqp = &req->dl_next; 488 reqp = &req->dl_next;
494 continue; 489 continue;
495 } 490 }
496 491
497 /* Drop this request */ 492 /* Drop this request */
498 inet_csk_reqsk_queue_unlink(sk, req, reqp); 493 inet_csk_reqsk_queue_unlink(parent, req, reqp);
499 reqsk_queue_removed(&icsk->icsk_accept_queue, req); 494 reqsk_queue_removed(&icsk->icsk_accept_queue, req);
500 reqsk_free(req); 495 reqsk_free(req);
501 continue; 496 continue;
@@ -503,14 +498,29 @@ static void tcp_synack_timer(struct sock *sk)
503 reqp = &req->dl_next; 498 reqp = &req->dl_next;
504 } 499 }
505 500
506 i = (i+1)&(TCP_SYNQ_HSIZE-1); 501 i = (i + 1) & (lopt->nr_table_entries - 1);
507 502
508 } while (--budget > 0); 503 } while (--budget > 0);
509 504
510 lopt->clock_hand = i; 505 lopt->clock_hand = i;
511 506
512 if (lopt->qlen) 507 if (lopt->qlen)
513 inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); 508 inet_csk_reset_keepalive_timer(parent, interval);
509}
510
511EXPORT_SYMBOL_GPL(reqsk_queue_prune);
512
513/*
514 * Timer for listening sockets
515 */
516
517static void tcp_synack_timer(struct sock *sk)
518{
519 struct inet_connection_sock *icsk = inet_csk(sk);
520 const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
521
522 reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
523 TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries);
514} 524}
515 525
516void tcp_set_keepalive(struct sock *sk, int val) 526void tcp_set_keepalive(struct sock *sk, int val)