tcp: improve undo on timeout

Upon timeout, undo (via both timestamps/Eifel and DSACKs) was disabled if any retransmits were still in flight. The concern was perhaps that spurious retransmission sent in a previous recovery episode may trigger DSACKs to falsely undo the current recovery. However, this inadvertently misses undo opportunities (using either TCP timestamps or DSACKs) when timeout occurs during a loss episode, i.e. recurring timeouts or timeout during fast recovery. In these cases some retransmissions will be in flight but we should allow undo. Furthermore, we should only reset undo_marker and undo_retrans upon timeout if we are starting a new recovery episode. Finally, when we do reset our undo state, we now do so in a manner similar to tcp_enter_recovery(), so that we require a DSACK for each of the outstsanding retransmissions. This will achieve the original goal by requiring that we receive the same number of DSACKs as retransmissions. This patch increases the undo events by 50% on Google servers. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Yuchung Cheng <ycheng@google.com> 2014-08-22 17:15:22 -0400
committer: David S. Miller <davem@davemloft.net> 2014-08-23 00:28:02 -0400
commit: 989e04c5bc3ff77d65e1f0d87bf7904dfa30d41c (patch)
tree: f9e12506a3e88dd1a74088d2137c4126108d5efd /net
parent: a7d5f58d7dfb2f342ef7d676f58d1ec762ebb8ad (diff)
1 files changed, 11 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a906e0200ff2..aba4926ca095 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
        tp->sacked_out = 0;
 }
-static void tcp_clear_retrans_partial(struct tcp_sock *tp)
+void tcp_clear_retrans(struct tcp_sock *tp)
 {
        tp->retrans_out = 0;
        tp->lost_out = 0;
        tp->undo_marker = 0;
        tp->undo_retrans = -1;
+        tp->fackets_out = 0;
+        tp->sacked_out = 0;
 }
-void tcp_clear_retrans(struct tcp_sock *tp)
+static inline void tcp_init_undo(struct tcp_sock *tp)
 {
-        tcp_clear_retrans_partial(tp);
+        tp->undo_marker = tp->snd_una;
+        /* Retransmission still in flight may cause DSACKs later. */
-        tp->fackets_out = 0;
+        tp->undo_retrans = tp->retrans_out ? : -1;
-        tp->sacked_out = 0;
 }
 /* Enter Loss state. If we detect SACK reneging, forget all SACK information
@@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk)
                tp->prior_ssthresh = tcp_current_ssthresh(sk);
                tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
                tcp_ca_event(sk, CA_EVENT_LOSS);
+                tcp_init_undo(tp);
        }
        tp->snd_cwnd       = 1;
        tp->snd_cwnd_cnt   = 0;
        tp->snd_cwnd_stamp = tcp_time_stamp;
-        tcp_clear_retrans_partial(tp);
+        tp->retrans_out = 0;
+        tp->lost_out = 0;
        if (tcp_is_reno(tp))
                tcp_reset_reno_sack(tp);
-        tp->undo_marker = tp->snd_una;
        skb = tcp_write_queue_head(sk);
        is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
        if (is_reneg) {
@@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk)
                if (skb == tcp_send_head(sk))
                        break;
-                if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
-                        tp->undo_marker = 0;
                TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
@@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
        NET_INC_STATS_BH(sock_net(sk), mib_idx);
        tp->prior_ssthresh = 0;
-        tp->undo_marker = tp->snd_una;
+        tcp_init_undo(tp);
-        tp->undo_retrans = tp->retrans_out ? : -1;
        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
                if (!ece_ack)
author	Yuchung Cheng <ycheng@google.com>	2014-08-22 17:15:22 -0400
committer	David S. Miller <davem@davemloft.net>	2014-08-23 00:28:02 -0400
commit	989e04c5bc3ff77d65e1f0d87bf7904dfa30d41c (patch)
tree	f9e12506a3e88dd1a74088d2137c4126108d5efd /net
parent	a7d5f58d7dfb2f342ef7d676f58d1ec762ebb8ad (diff)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a906e0200ff2..aba4926ca095 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c
@@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1888	tp->sacked_out = 0;	1888	tp->sacked_out = 0;
1889	}	1889	}
1890		1890
1891	static void tcp_clear_retrans_partial(struct tcp_sock *tp)	1891	void tcp_clear_retrans(struct tcp_sock *tp)
1892	{	1892	{
1893	tp->retrans_out = 0;	1893	tp->retrans_out = 0;
1894	tp->lost_out = 0;	1894	tp->lost_out = 0;
1895
1896	tp->undo_marker = 0;	1895	tp->undo_marker = 0;
1897	tp->undo_retrans = -1;	1896	tp->undo_retrans = -1;
		1897	tp->fackets_out = 0;
		1898	tp->sacked_out = 0;
1898	}	1899	}
1899		1900
1900	void tcp_clear_retrans(struct tcp_sock *tp)	1901	static inline void tcp_init_undo(struct tcp_sock *tp)
1901	{	1902	{
1902	tcp_clear_retrans_partial(tp);	1903	tp->undo_marker = tp->snd_una;
1903		1904	/* Retransmission still in flight may cause DSACKs later. */
1904	tp->fackets_out = 0;	1905	tp->undo_retrans = tp->retrans_out ? : -1;
1905	tp->sacked_out = 0;
1906	}	1906	}
1907		1907
1908	/* Enter Loss state. If we detect SACK reneging, forget all SACK information	1908	/* Enter Loss state. If we detect SACK reneging, forget all SACK information
@@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk)
1925	tp->prior_ssthresh = tcp_current_ssthresh(sk);	1925	tp->prior_ssthresh = tcp_current_ssthresh(sk);
1926	tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);	1926	tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1927	tcp_ca_event(sk, CA_EVENT_LOSS);	1927	tcp_ca_event(sk, CA_EVENT_LOSS);
		1928	tcp_init_undo(tp);
1928	}	1929	}
1929	tp->snd_cwnd = 1;	1930	tp->snd_cwnd = 1;
1930	tp->snd_cwnd_cnt = 0;	1931	tp->snd_cwnd_cnt = 0;
1931	tp->snd_cwnd_stamp = tcp_time_stamp;	1932	tp->snd_cwnd_stamp = tcp_time_stamp;
1932		1933
1933	tcp_clear_retrans_partial(tp);	1934	tp->retrans_out = 0;
		1935	tp->lost_out = 0;
1934		1936
1935	if (tcp_is_reno(tp))	1937	if (tcp_is_reno(tp))
1936	tcp_reset_reno_sack(tp);	1938	tcp_reset_reno_sack(tp);
1937		1939
1938	tp->undo_marker = tp->snd_una;
1939
1940	skb = tcp_write_queue_head(sk);	1940	skb = tcp_write_queue_head(sk);
1941	is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);	1941	is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1942	if (is_reneg) {	1942	if (is_reneg) {
@@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk)
1950	if (skb == tcp_send_head(sk))	1950	if (skb == tcp_send_head(sk))
1951	break;	1951	break;
1952		1952
1953	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1954	tp->undo_marker = 0;
1955
1956	TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)\|TCPCB_SACKED_ACKED;	1953	TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)\|TCPCB_SACKED_ACKED;
1957	if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) \|\| is_reneg) {	1954	if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) \|\| is_reneg) {
1958	TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;	1955	TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
@@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2671	NET_INC_STATS_BH(sock_net(sk), mib_idx);	2668	NET_INC_STATS_BH(sock_net(sk), mib_idx);
2672		2669
2673	tp->prior_ssthresh = 0;	2670	tp->prior_ssthresh = 0;
2674	tp->undo_marker = tp->snd_una;	2671	tcp_init_undo(tp);
2675	tp->undo_retrans = tp->retrans_out ? : -1;
2676		2672
2677	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {	2673	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2678	if (!ece_ack)	2674	if (!ece_ack)