aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-10-27 19:16:46 -0400
committerDavid S. Miller <davem@davemloft.net>2012-11-03 14:45:00 -0400
commite6c022a4fa2d2d9ca9d0a7ac3b05ad988f39fc30 (patch)
tree6dcd4e2dfc7895f33329fb16653a93f7d52f4bd7
parent25b1e67921f448cdddf70042ba233ffe43d33a9c (diff)
tcp: better retrans tracking for defer-accept
For passive TCP connections using TCP_DEFER_ACCEPT facility, we incorrectly increment req->retrans each time timeout triggers while no SYNACK is sent. SYNACK are not sent for TCP_DEFER_ACCEPT that were established (for which we received the ACK from client). Only the last SYNACK is sent so that we can receive again an ACK from client, to move the req into accept queue. We plan to change this later to avoid the useless retransmit (and potential problem as this SYNACK could be lost) TCP_INFO later gives wrong information to user, claiming imaginary retransmits. Decouple req->retrans field into two independent fields : num_retrans : number of retransmit num_timeout : number of timeouts num_timeout is the counter that is incremented at each timeout, regardless of actual SYNACK being sent or not, and used to compute the exponential timeout. Introduce inet_rtx_syn_ack() helper to increment num_retrans only if ->rtx_syn_ack() succeeded. Use inet_rtx_syn_ack() from tcp_check_req() to increment num_retrans when we re-send a SYNACK in answer to a (retransmitted) SYN. Prior to this patch, we were not counting these retransmits. Change tcp_v[46]_rtx_synack() to increment TCP_MIB_RETRANSSEGS only if a synack packet was successfully queued. Reported-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Julian Anastasov <ja@ssi.bg> Cc: Vijay Subramanian <subramanian.vijay@gmail.com> Cc: Elliott Hughes <enh@google.com> Cc: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/request_sock.h12
-rw-r--r--net/dccp/minisocks.c3
-rw-r--r--net/ipv4/inet_connection_sock.c25
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/tcp_minisocks.c8
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c11
11 files changed, 56 insertions, 35 deletions
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b01d8dd9ee7c..a51dbd17c2de 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -49,13 +49,16 @@ struct request_sock_ops {
49 struct request_sock *req); 49 struct request_sock *req);
50}; 50};
51 51
52extern int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req);
53
52/* struct request_sock - mini sock to represent a connection request 54/* struct request_sock - mini sock to represent a connection request
53 */ 55 */
54struct request_sock { 56struct request_sock {
55 struct request_sock *dl_next; /* Must be first member! */ 57 struct request_sock *dl_next; /* Must be first member! */
56 u16 mss; 58 u16 mss;
57 u8 retrans; 59 u8 num_retrans; /* number of retransmits */
58 u8 cookie_ts; /* syncookie: encode tcpopts in timestamp */ 60 u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
61 u8 num_timeout:7; /* number of timeouts */
59 /* The following two fields can be easily recomputed I think -AK */ 62 /* The following two fields can be easily recomputed I think -AK */
60 u32 window_clamp; /* window clamp at creation time */ 63 u32 window_clamp; /* window clamp at creation time */
61 u32 rcv_wnd; /* rcv_wnd offered first time */ 64 u32 rcv_wnd; /* rcv_wnd offered first time */
@@ -231,7 +234,7 @@ static inline int reqsk_queue_removed(struct request_sock_queue *queue,
231{ 234{
232 struct listen_sock *lopt = queue->listen_opt; 235 struct listen_sock *lopt = queue->listen_opt;
233 236
234 if (req->retrans == 0) 237 if (req->num_timeout == 0)
235 --lopt->qlen_young; 238 --lopt->qlen_young;
236 239
237 return --lopt->qlen; 240 return --lopt->qlen;
@@ -269,7 +272,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
269 struct listen_sock *lopt = queue->listen_opt; 272 struct listen_sock *lopt = queue->listen_opt;
270 273
271 req->expires = jiffies + timeout; 274 req->expires = jiffies + timeout;
272 req->retrans = 0; 275 req->num_retrans = 0;
276 req->num_timeout = 0;
273 req->sk = NULL; 277 req->sk = NULL;
274 req->dl_next = lopt->syn_table[hash]; 278 req->dl_next = lopt->syn_table[hash];
275 279
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ea850ce35d4a..662071b249cc 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -174,8 +174,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
174 * To protect against Request floods, increment retrans 174 * To protect against Request floods, increment retrans
175 * counter (backoff, monitored by dccp_response_timer). 175 * counter (backoff, monitored by dccp_response_timer).
176 */ 176 */
177 req->retrans++; 177 inet_rtx_syn_ack(sk, req);
178 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
179 } 178 }
180 /* Network Duplicate, discard packet */ 179 /* Network Duplicate, discard packet */
181 return NULL; 180 return NULL;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d34ce2972c8f..2026542d6836 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -521,21 +521,31 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
521 int *expire, int *resend) 521 int *expire, int *resend)
522{ 522{
523 if (!rskq_defer_accept) { 523 if (!rskq_defer_accept) {
524 *expire = req->retrans >= thresh; 524 *expire = req->num_timeout >= thresh;
525 *resend = 1; 525 *resend = 1;
526 return; 526 return;
527 } 527 }
528 *expire = req->retrans >= thresh && 528 *expire = req->num_timeout >= thresh &&
529 (!inet_rsk(req)->acked || req->retrans >= max_retries); 529 (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
530 /* 530 /*
531 * Do not resend while waiting for data after ACK, 531 * Do not resend while waiting for data after ACK,
532 * start to resend on end of deferring period to give 532 * start to resend on end of deferring period to give
533 * last chance for data or ACK to create established socket. 533 * last chance for data or ACK to create established socket.
534 */ 534 */
535 *resend = !inet_rsk(req)->acked || 535 *resend = !inet_rsk(req)->acked ||
536 req->retrans >= rskq_defer_accept - 1; 536 req->num_timeout >= rskq_defer_accept - 1;
537} 537}
538 538
539int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
540{
541 int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL);
542
543 if (!err)
544 req->num_retrans++;
545 return err;
546}
547EXPORT_SYMBOL(inet_rtx_syn_ack);
548
539void inet_csk_reqsk_queue_prune(struct sock *parent, 549void inet_csk_reqsk_queue_prune(struct sock *parent,
540 const unsigned long interval, 550 const unsigned long interval,
541 const unsigned long timeout, 551 const unsigned long timeout,
@@ -599,13 +609,14 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
599 req->rsk_ops->syn_ack_timeout(parent, req); 609 req->rsk_ops->syn_ack_timeout(parent, req);
600 if (!expire && 610 if (!expire &&
601 (!resend || 611 (!resend ||
602 !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || 612 !inet_rtx_syn_ack(parent, req) ||
603 inet_rsk(req)->acked)) { 613 inet_rsk(req)->acked)) {
604 unsigned long timeo; 614 unsigned long timeo;
605 615
606 if (req->retrans++ == 0) 616 if (req->num_timeout++ == 0)
607 lopt->qlen_young--; 617 lopt->qlen_young--;
608 timeo = min((timeout << req->retrans), max_rto); 618 timeo = min(timeout << req->num_timeout,
619 max_rto);
609 req->expires = now + timeo; 620 req->expires = now + timeo;
610 reqp = &req->dl_next; 621 reqp = &req->dl_next;
611 continue; 622 continue;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e5bad82d3584..b5e781b529aa 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -620,7 +620,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
620 r->idiag_family = sk->sk_family; 620 r->idiag_family = sk->sk_family;
621 r->idiag_state = TCP_SYN_RECV; 621 r->idiag_state = TCP_SYN_RECV;
622 r->idiag_timer = 1; 622 r->idiag_timer = 1;
623 r->idiag_retrans = req->retrans; 623 r->idiag_retrans = req->num_retrans;
624 624
625 r->id.idiag_if = sk->sk_bound_dev_if; 625 r->id.idiag_if = sk->sk_bound_dev_if;
626 sock_diag_save_cookie(req, r->id.idiag_cookie); 626 sock_diag_save_cookie(req, r->id.idiag_cookie);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ba48e799b031..b236ef04914f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -340,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
340 } 340 }
341 341
342 req->expires = 0UL; 342 req->expires = 0UL;
343 req->retrans = 0; 343 req->num_retrans = 0;
344 344
345 /* 345 /*
346 * We need to lookup the route here to get at the correct 346 * We need to lookup the route here to get at the correct
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 60cf836120a1..e95b4e508afe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5991,7 +5991,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5991 */ 5991 */
5992 if (req) { 5992 if (req) {
5993 tcp_synack_rtt_meas(sk, req); 5993 tcp_synack_rtt_meas(sk, req);
5994 tp->total_retrans = req->retrans; 5994 tp->total_retrans = req->num_retrans;
5995 5995
5996 reqsk_fastopen_remove(sk, req, false); 5996 reqsk_fastopen_remove(sk, req, false);
5997 } else { 5997 } else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 60e2e5d3ce29..e3607669064e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -877,10 +877,13 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
877} 877}
878 878
879static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 879static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
880 struct request_values *rvp) 880 struct request_values *rvp)
881{ 881{
882 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 882 int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
883 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); 883
884 if (!res)
885 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
886 return res;
884} 887}
885 888
886/* 889/*
@@ -1386,7 +1389,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
1386 struct sock *child; 1389 struct sock *child;
1387 int err; 1390 int err;
1388 1391
1389 req->retrans = 0; 1392 req->num_retrans = 0;
1393 req->num_timeout = 0;
1390 req->sk = NULL; 1394 req->sk = NULL;
1391 1395
1392 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 1396 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
@@ -1740,7 +1744,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1740 1744
1741 tcp_initialize_rcv_mss(newsk); 1745 tcp_initialize_rcv_mss(newsk);
1742 tcp_synack_rtt_meas(newsk, req); 1746 tcp_synack_rtt_meas(newsk, req);
1743 newtp->total_retrans = req->retrans; 1747 newtp->total_retrans = req->num_retrans;
1744 1748
1745#ifdef CONFIG_TCP_MD5SIG 1749#ifdef CONFIG_TCP_MD5SIG
1746 /* Copy over the MD5 key from the original socket */ 1750 /* Copy over the MD5 key from the original socket */
@@ -2638,7 +2642,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2638 0, 0, /* could print option size, but that is af dependent. */ 2642 0, 0, /* could print option size, but that is af dependent. */
2639 1, /* timers active (only the expire timer) */ 2643 1, /* timers active (only the expire timer) */
2640 jiffies_delta_to_clock_t(delta), 2644 jiffies_delta_to_clock_t(delta),
2641 req->retrans, 2645 req->num_timeout,
2642 from_kuid_munged(seq_user_ns(f), uid), 2646 from_kuid_munged(seq_user_ns(f), uid),
2643 0, /* non standard timer */ 2647 0, /* non standard timer */
2644 0, /* open_requests have no inode */ 2648 0, /* open_requests have no inode */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 27536ba16c9d..0404b3f4c959 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -552,7 +552,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
552 * it can be estimated (approximately) 552 * it can be estimated (approximately)
553 * from another data. 553 * from another data.
554 */ 554 */
555 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); 555 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
556 paws_reject = tcp_paws_reject(&tmp_opt, th->rst); 556 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
557 } 557 }
558 } 558 }
@@ -581,7 +581,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
581 * Note that even if there is new data in the SYN packet 581 * Note that even if there is new data in the SYN packet
582 * they will be thrown away too. 582 * they will be thrown away too.
583 */ 583 */
584 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 584 inet_rtx_syn_ack(sk, req);
585 return NULL; 585 return NULL;
586 } 586 }
587 587
@@ -695,7 +695,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
695 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ 695 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
696 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) 696 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
697 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; 697 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
698 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ 698 else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
699 tcp_rsk(req)->snt_synack = 0; 699 tcp_rsk(req)->snt_synack = 0;
700 700
701 /* For Fast Open no more processing is needed (sk is the 701 /* For Fast Open no more processing is needed (sk is the
@@ -705,7 +705,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
705 return sk; 705 return sk;
706 706
707 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ 707 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
708 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 708 if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
709 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 709 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
710 inet_rsk(req)->acked = 1; 710 inet_rsk(req)->acked = 1;
711 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 711 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index fc04711e80c8..62c69ab19fdf 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -318,7 +318,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
318 req = tcp_sk(sk)->fastopen_rsk; 318 req = tcp_sk(sk)->fastopen_rsk;
319 req->rsk_ops->syn_ack_timeout(sk, req); 319 req->rsk_ops->syn_ack_timeout(sk, req);
320 320
321 if (req->retrans >= max_retries) { 321 if (req->num_timeout >= max_retries) {
322 tcp_write_err(sk); 322 tcp_write_err(sk);
323 return; 323 return;
324 } 324 }
@@ -327,10 +327,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
327 * regular retransmit because if the child socket has been accepted 327 * regular retransmit because if the child socket has been accepted
328 * it's not good to give up too easily. 328 * it's not good to give up too easily.
329 */ 329 */
330 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 330 inet_rtx_syn_ack(sk, req);
331 req->retrans++; 331 req->num_timeout++;
332 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 332 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
333 TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); 333 TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
334} 334}
335 335
336/* 336/*
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 182ab9a85d6c..40161977f7cf 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
214 ireq6->iif = inet6_iif(skb); 214 ireq6->iif = inet6_iif(skb);
215 215
216 req->expires = 0UL; 216 req->expires = 0UL;
217 req->retrans = 0; 217 req->num_retrans = 0;
218 ireq->ecn_ok = ecn_ok; 218 ireq->ecn_ok = ecn_ok;
219 ireq->snd_wscale = tcp_opt.snd_wscale; 219 ireq->snd_wscale = tcp_opt.snd_wscale;
220 ireq->sack_ok = tcp_opt.sack_ok; 220 ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bb6782e84996..c73d0ebde9c8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -495,9 +495,12 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
495 struct request_values *rvp) 495 struct request_values *rvp)
496{ 496{
497 struct flowi6 fl6; 497 struct flowi6 fl6;
498 int res;
498 499
499 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 500 res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
500 return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); 501 if (!res)
502 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
503 return res;
501} 504}
502 505
503static void tcp_v6_reqsk_destructor(struct request_sock *req) 506static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -1364,7 +1367,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1364 1367
1365 tcp_initialize_rcv_mss(newsk); 1368 tcp_initialize_rcv_mss(newsk);
1366 tcp_synack_rtt_meas(newsk, req); 1369 tcp_synack_rtt_meas(newsk, req);
1367 newtp->total_retrans = req->retrans; 1370 newtp->total_retrans = req->num_retrans;
1368 1371
1369 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1372 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1370 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1373 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1866,7 +1869,7 @@ static void get_openreq6(struct seq_file *seq,
1866 0,0, /* could print option size, but that is af dependent. */ 1869 0,0, /* could print option size, but that is af dependent. */
1867 1, /* timers active (only the expire timer) */ 1870 1, /* timers active (only the expire timer) */
1868 jiffies_to_clock_t(ttd), 1871 jiffies_to_clock_t(ttd),
1869 req->retrans, 1872 req->num_timeout,
1870 from_kuid_munged(seq_user_ns(seq), uid), 1873 from_kuid_munged(seq_user_ns(seq), uid),
1871 0, /* non standard timer */ 1874 0, /* non standard timer */
1872 0, /* open_requests have no inode */ 1875 0, /* open_requests have no inode */