aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick McManus <mcmanus@ducksong.com>2008-03-21 19:33:01 -0400
committerDavid S. Miller <davem@davemloft.net>2008-03-21 19:33:01 -0400
commitec3c0982a2dd1e671bad8e9d26c28dcba0039d87 (patch)
tree11a3cd7c530e4225a4c3d4c3f3cc54eb7d2e0e4f
parente4c78840284f3f51b1896cf3936d60a6033c4d2c (diff)
[TCP]: TCP_DEFER_ACCEPT updates - process as established
Change TCP_DEFER_ACCEPT implementation so that it transitions a connection to ESTABLISHED after handshake is complete instead of leaving it in SYN-RECV until some data arrvies. Place connection in accept queue when first data packet arrives from slow path. Benefits: - established connection is now reset if it never makes it to the accept queue - diagnostic state of established matches with the packet traces showing completed handshake - TCP_DEFER_ACCEPT timeouts are expressed in seconds and can now be enforced with reasonable accuracy instead of rounding up to next exponential back-off of syn-ack retry. Signed-off-by: Patrick McManus <mcmanus@ducksong.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h7
-rw-r--r--include/net/request_sock.h4
-rw-r--r--include/net/tcp.h1
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/tcp.c18
-rw-r--r--net/ipv4/tcp_input.c46
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_minisocks.c32
-rw-r--r--net/ipv4/tcp_timer.c5
9 files changed, 99 insertions, 33 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 08027f1d7f31..d96d9b122304 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -239,6 +239,11 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
239 return (struct tcp_request_sock *)req; 239 return (struct tcp_request_sock *)req;
240} 240}
241 241
242struct tcp_deferred_accept_info {
243 struct sock *listen_sk;
244 struct request_sock *request;
245};
246
242struct tcp_sock { 247struct tcp_sock {
243 /* inet_connection_sock has to be the first member of tcp_sock */ 248 /* inet_connection_sock has to be the first member of tcp_sock */
244 struct inet_connection_sock inet_conn; 249 struct inet_connection_sock inet_conn;
@@ -374,6 +379,8 @@ struct tcp_sock {
374 unsigned int keepalive_intvl; /* time interval between keep alive probes */ 379 unsigned int keepalive_intvl; /* time interval between keep alive probes */
375 int linger2; 380 int linger2;
376 381
382 struct tcp_deferred_accept_info defer_tcp_accept;
383
377 unsigned long last_synq_overflow; 384 unsigned long last_synq_overflow;
378 385
379 u32 tso_deferred; 386 u32 tso_deferred;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 040780add355..0369f98e9f3a 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -115,8 +115,8 @@ struct request_sock_queue {
115 struct request_sock *rskq_accept_head; 115 struct request_sock *rskq_accept_head;
116 struct request_sock *rskq_accept_tail; 116 struct request_sock *rskq_accept_tail;
117 rwlock_t syn_wait_lock; 117 rwlock_t syn_wait_lock;
118 u8 rskq_defer_accept; 118 u16 rskq_defer_accept;
119 /* 3 bytes hole, try to pack */ 119 /* 2 bytes hole, try to pack */
120 struct listen_sock *listen_opt; 120 struct listen_sock *listen_opt;
121}; 121};
122 122
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 847e1634e1f4..67cc3956d29c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,6 +139,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
139#define MAX_TCP_KEEPINTVL 32767 139#define MAX_TCP_KEEPINTVL 32767
140#define MAX_TCP_KEEPCNT 127 140#define MAX_TCP_KEEPCNT 127
141#define MAX_TCP_SYNCNT 127 141#define MAX_TCP_SYNCNT 127
142#define MAX_TCP_ACCEPT_DEFERRED 65535
142 143
143#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */ 144#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
144 145
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8a45be988709..cc1a1859a61b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -414,8 +414,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
414 struct inet_connection_sock *icsk = inet_csk(parent); 414 struct inet_connection_sock *icsk = inet_csk(parent);
415 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 415 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
416 struct listen_sock *lopt = queue->listen_opt; 416 struct listen_sock *lopt = queue->listen_opt;
417 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 417 int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
418 int thresh = max_retries;
419 unsigned long now = jiffies; 418 unsigned long now = jiffies;
420 struct request_sock **reqp, *req; 419 struct request_sock **reqp, *req;
421 int i, budget; 420 int i, budget;
@@ -451,9 +450,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
451 } 450 }
452 } 451 }
453 452
454 if (queue->rskq_defer_accept)
455 max_retries = queue->rskq_defer_accept;
456
457 budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 453 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
458 i = lopt->clock_hand; 454 i = lopt->clock_hand;
459 455
@@ -461,9 +457,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
461 reqp=&lopt->syn_table[i]; 457 reqp=&lopt->syn_table[i];
462 while ((req = *reqp) != NULL) { 458 while ((req = *reqp) != NULL) {
463 if (time_after_eq(now, req->expires)) { 459 if (time_after_eq(now, req->expires)) {
464 if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) && 460 if (req->retrans < thresh &&
465 (inet_rsk(req)->acked || 461 !req->rsk_ops->rtx_syn_ack(parent, req)) {
466 !req->rsk_ops->rtx_syn_ack(parent, req))) {
467 unsigned long timeo; 462 unsigned long timeo;
468 463
469 if (req->retrans++ == 0) 464 if (req->retrans++ == 0)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 071e83a894ad..e0fbc25ca816 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2105 break; 2105 break;
2106 2106
2107 case TCP_DEFER_ACCEPT: 2107 case TCP_DEFER_ACCEPT:
2108 icsk->icsk_accept_queue.rskq_defer_accept = 0; 2108 if (val < 0) {
2109 if (val > 0) { 2109 err = -EINVAL;
2110 /* Translate value in seconds to number of 2110 } else {
2111 * retransmits */ 2111 if (val > MAX_TCP_ACCEPT_DEFERRED)
2112 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && 2112 val = MAX_TCP_ACCEPT_DEFERRED;
2113 val > ((TCP_TIMEOUT_INIT / HZ) << 2113 icsk->icsk_accept_queue.rskq_defer_accept = val;
2114 icsk->icsk_accept_queue.rskq_defer_accept))
2115 icsk->icsk_accept_queue.rskq_defer_accept++;
2116 icsk->icsk_accept_queue.rskq_defer_accept++;
2117 } 2114 }
2118 break; 2115 break;
2119 2116
@@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2295 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2292 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2296 break; 2293 break;
2297 case TCP_DEFER_ACCEPT: 2294 case TCP_DEFER_ACCEPT:
2298 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : 2295 val = icsk->icsk_accept_queue.rskq_defer_accept;
2299 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2300 break; 2296 break;
2301 case TCP_WINDOW_CLAMP: 2297 case TCP_WINDOW_CLAMP:
2302 val = tp->window_clamp; 2298 val = tp->window_clamp;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9cf446427cc2..6e46b4c0f28c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4451,6 +4451,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4451 } 4451 }
4452} 4452}
4453 4453
4454static int tcp_defer_accept_check(struct sock *sk)
4455{
4456 struct tcp_sock *tp = tcp_sk(sk);
4457
4458 if (tp->defer_tcp_accept.request) {
4459 int queued_data = tp->rcv_nxt - tp->copied_seq;
4460 int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
4461 tcp_hdr((struct sk_buff *)
4462 sk->sk_receive_queue.prev)->fin : 0;
4463
4464 if (queued_data && hasfin)
4465 queued_data--;
4466
4467 if (queued_data &&
4468 tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
4469 if (sock_flag(sk, SOCK_KEEPOPEN)) {
4470 inet_csk_reset_keepalive_timer(sk,
4471 keepalive_time_when(tp));
4472 } else {
4473 inet_csk_delete_keepalive_timer(sk);
4474 }
4475
4476 inet_csk_reqsk_queue_add(
4477 tp->defer_tcp_accept.listen_sk,
4478 tp->defer_tcp_accept.request,
4479 sk);
4480
4481 tp->defer_tcp_accept.listen_sk->sk_data_ready(
4482 tp->defer_tcp_accept.listen_sk, 0);
4483
4484 sock_put(tp->defer_tcp_accept.listen_sk);
4485 sock_put(sk);
4486 tp->defer_tcp_accept.listen_sk = NULL;
4487 tp->defer_tcp_accept.request = NULL;
4488 } else if (hasfin ||
4489 tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
4490 tcp_reset(sk);
4491 return -1;
4492 }
4493 }
4494 return 0;
4495}
4496
4454static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) 4497static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4455{ 4498{
4456 struct tcp_sock *tp = tcp_sk(sk); 4499 struct tcp_sock *tp = tcp_sk(sk);
@@ -4811,6 +4854,9 @@ step5:
4811 4854
4812 tcp_data_snd_check(sk); 4855 tcp_data_snd_check(sk);
4813 tcp_ack_snd_check(sk); 4856 tcp_ack_snd_check(sk);
4857
4858 if (tcp_defer_accept_check(sk))
4859 return -1;
4814 return 0; 4860 return 0;
4815 4861
4816csum_error: 4862csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ba6e911c979..167a0f557531 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1920,6 +1920,14 @@ int tcp_v4_destroy_sock(struct sock *sk)
1920 sk->sk_sndmsg_page = NULL; 1920 sk->sk_sndmsg_page = NULL;
1921 } 1921 }
1922 1922
1923 if (tp->defer_tcp_accept.request) {
1924 reqsk_free(tp->defer_tcp_accept.request);
1925 sock_put(tp->defer_tcp_accept.listen_sk);
1926 sock_put(sk);
1927 tp->defer_tcp_accept.listen_sk = NULL;
1928 tp->defer_tcp_accept.request = NULL;
1929 }
1930
1923 atomic_dec(&tcp_sockets_allocated); 1931 atomic_dec(&tcp_sockets_allocated);
1924 1932
1925 return 0; 1933 return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8245247a6ceb..019c8c16e5cc 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
571 does sequence test, SYN is truncated, and thus we consider 571 does sequence test, SYN is truncated, and thus we consider
572 it a bare ACK. 572 it a bare ACK.
573 573
574 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this 574 Both ends (listening sockets) accept the new incoming
575 bare ACK. Otherwise, we create an established connection. Both 575 connection and try to talk to each other. 8-)
576 ends (listening sockets) accept the new incoming connection and try
577 to talk to each other. 8-)
578 576
579 Note: This case is both harmless, and rare. Possibility is about the 577 Note: This case is both harmless, and rare. Possibility is about the
580 same as us discovering intelligent life on another plant tomorrow. 578 same as us discovering intelligent life on another plant tomorrow.
@@ -642,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
642 if (!(flg & TCP_FLAG_ACK)) 640 if (!(flg & TCP_FLAG_ACK))
643 return NULL; 641 return NULL;
644 642
645 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
646 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
647 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
648 inet_rsk(req)->acked = 1;
649 return NULL;
650 }
651
652 /* OK, ACK is valid, create big socket and 643 /* OK, ACK is valid, create big socket and
653 * feed this segment to it. It will repeat all 644 * feed this segment to it. It will repeat all
654 * the tests. THIS SEGMENT MUST MOVE SOCKET TO 645 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -687,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
687 inet_csk_reqsk_queue_unlink(sk, req, prev); 678 inet_csk_reqsk_queue_unlink(sk, req, prev);
688 inet_csk_reqsk_queue_removed(sk, req); 679 inet_csk_reqsk_queue_removed(sk, req);
689 680
690 inet_csk_reqsk_queue_add(sk, req, child); 681 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
682 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
683
684 /* the accept queue handling is done is est recv slow
685 * path so lets make sure to start there
686 */
687 tcp_sk(child)->pred_flags = 0;
688 sock_hold(sk);
689 sock_hold(child);
690 tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
691 tcp_sk(child)->defer_tcp_accept.request = req;
692
693 inet_csk_reset_keepalive_timer(child,
694 inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
695 } else {
696 inet_csk_reqsk_queue_add(sk, req, child);
697 }
698
691 return child; 699 return child;
692 700
693 listen_overflow: 701 listen_overflow:
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 803d758a2b12..160d16f9f4fc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data)
481 goto death; 481 goto death;
482 } 482 }
483 483
484 if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
485 tcp_send_active_reset(sk, GFP_ATOMIC);
486 goto death;
487 }
488
484 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) 489 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
485 goto out; 490 goto out;
486 491