aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2014-08-22 17:15:22 -0400
committerDavid S. Miller <davem@davemloft.net>2014-08-23 00:28:02 -0400
commit989e04c5bc3ff77d65e1f0d87bf7904dfa30d41c (patch)
treef9e12506a3e88dd1a74088d2137c4126108d5efd /net
parenta7d5f58d7dfb2f342ef7d676f58d1ec762ebb8ad (diff)
tcp: improve undo on timeout
Upon timeout, undo (via both timestamps/Eifel and DSACKs) was disabled if any retransmits were still in flight. The concern was perhaps that spurious retransmission sent in a previous recovery episode may trigger DSACKs to falsely undo the current recovery. However, this inadvertently misses undo opportunities (using either TCP timestamps or DSACKs) when timeout occurs during a loss episode, i.e. recurring timeouts or timeout during fast recovery. In these cases some retransmissions will be in flight but we should allow undo. Furthermore, we should only reset undo_marker and undo_retrans upon timeout if we are starting a new recovery episode. Finally, when we do reset our undo state, we now do so in a manner similar to tcp_enter_recovery(), so that we require a DSACK for each of the outstsanding retransmissions. This will achieve the original goal by requiring that we receive the same number of DSACKs as retransmissions. This patch increases the undo events by 50% on Google servers. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c26
1 files changed, 11 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a906e0200ff2..aba4926ca095 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1888 tp->sacked_out = 0; 1888 tp->sacked_out = 0;
1889} 1889}
1890 1890
1891static void tcp_clear_retrans_partial(struct tcp_sock *tp) 1891void tcp_clear_retrans(struct tcp_sock *tp)
1892{ 1892{
1893 tp->retrans_out = 0; 1893 tp->retrans_out = 0;
1894 tp->lost_out = 0; 1894 tp->lost_out = 0;
1895
1896 tp->undo_marker = 0; 1895 tp->undo_marker = 0;
1897 tp->undo_retrans = -1; 1896 tp->undo_retrans = -1;
1897 tp->fackets_out = 0;
1898 tp->sacked_out = 0;
1898} 1899}
1899 1900
1900void tcp_clear_retrans(struct tcp_sock *tp) 1901static inline void tcp_init_undo(struct tcp_sock *tp)
1901{ 1902{
1902 tcp_clear_retrans_partial(tp); 1903 tp->undo_marker = tp->snd_una;
1903 1904 /* Retransmission still in flight may cause DSACKs later. */
1904 tp->fackets_out = 0; 1905 tp->undo_retrans = tp->retrans_out ? : -1;
1905 tp->sacked_out = 0;
1906} 1906}
1907 1907
1908/* Enter Loss state. If we detect SACK reneging, forget all SACK information 1908/* Enter Loss state. If we detect SACK reneging, forget all SACK information
@@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk)
1925 tp->prior_ssthresh = tcp_current_ssthresh(sk); 1925 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1926 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1926 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1927 tcp_ca_event(sk, CA_EVENT_LOSS); 1927 tcp_ca_event(sk, CA_EVENT_LOSS);
1928 tcp_init_undo(tp);
1928 } 1929 }
1929 tp->snd_cwnd = 1; 1930 tp->snd_cwnd = 1;
1930 tp->snd_cwnd_cnt = 0; 1931 tp->snd_cwnd_cnt = 0;
1931 tp->snd_cwnd_stamp = tcp_time_stamp; 1932 tp->snd_cwnd_stamp = tcp_time_stamp;
1932 1933
1933 tcp_clear_retrans_partial(tp); 1934 tp->retrans_out = 0;
1935 tp->lost_out = 0;
1934 1936
1935 if (tcp_is_reno(tp)) 1937 if (tcp_is_reno(tp))
1936 tcp_reset_reno_sack(tp); 1938 tcp_reset_reno_sack(tp);
1937 1939
1938 tp->undo_marker = tp->snd_una;
1939
1940 skb = tcp_write_queue_head(sk); 1940 skb = tcp_write_queue_head(sk);
1941 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); 1941 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1942 if (is_reneg) { 1942 if (is_reneg) {
@@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk)
1950 if (skb == tcp_send_head(sk)) 1950 if (skb == tcp_send_head(sk))
1951 break; 1951 break;
1952 1952
1953 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1954 tp->undo_marker = 0;
1955
1956 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; 1953 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1957 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { 1954 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
1958 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; 1955 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
@@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2671 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2668 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2672 2669
2673 tp->prior_ssthresh = 0; 2670 tp->prior_ssthresh = 0;
2674 tp->undo_marker = tp->snd_una; 2671 tcp_init_undo(tp);
2675 tp->undo_retrans = tp->retrans_out ? : -1;
2676 2672
2677 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 2673 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2678 if (!ece_ack) 2674 if (!ece_ack)