diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 676 |
1 files changed, 176 insertions, 500 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdacce99f..08bbe6096528 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -93,12 +93,11 @@ int sysctl_tcp_stdurg __read_mostly; | |||
93 | int sysctl_tcp_rfc1337 __read_mostly; | 93 | int sysctl_tcp_rfc1337 __read_mostly; |
94 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; | 94 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; |
95 | int sysctl_tcp_frto __read_mostly = 2; | 95 | int sysctl_tcp_frto __read_mostly = 2; |
96 | int sysctl_tcp_frto_response __read_mostly; | ||
97 | 96 | ||
98 | int sysctl_tcp_thin_dupack __read_mostly; | 97 | int sysctl_tcp_thin_dupack __read_mostly; |
99 | 98 | ||
100 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 99 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
101 | int sysctl_tcp_early_retrans __read_mostly = 2; | 100 | int sysctl_tcp_early_retrans __read_mostly = 3; |
102 | 101 | ||
103 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 102 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
104 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 103 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -108,17 +107,16 @@ int sysctl_tcp_early_retrans __read_mostly = 2; | |||
108 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ | 107 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ |
109 | #define FLAG_ECE 0x40 /* ECE in this ACK */ | 108 | #define FLAG_ECE 0x40 /* ECE in this ACK */ |
110 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ | 109 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ |
111 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ | 110 | #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ |
112 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ | 111 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ |
113 | #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ | 112 | #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ |
114 | #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ | ||
115 | #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ | 113 | #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ |
114 | #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ | ||
116 | 115 | ||
117 | #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) | 116 | #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) |
118 | #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) | 117 | #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) |
119 | #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) | 118 | #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) |
120 | #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) | 119 | #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) |
121 | #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) | ||
122 | 120 | ||
123 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) | 121 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) |
124 | #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) | 122 | #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) |
@@ -1159,10 +1157,8 @@ static u8 tcp_sacktag_one(struct sock *sk, | |||
1159 | tcp_highest_sack_seq(tp))) | 1157 | tcp_highest_sack_seq(tp))) |
1160 | state->reord = min(fack_count, | 1158 | state->reord = min(fack_count, |
1161 | state->reord); | 1159 | state->reord); |
1162 | 1160 | if (!after(end_seq, tp->high_seq)) | |
1163 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ | 1161 | state->flag |= FLAG_ORIG_SACK_ACKED; |
1164 | if (!after(end_seq, tp->frto_highmark)) | ||
1165 | state->flag |= FLAG_ONLY_ORIG_SACKED; | ||
1166 | } | 1162 | } |
1167 | 1163 | ||
1168 | if (sacked & TCPCB_LOST) { | 1164 | if (sacked & TCPCB_LOST) { |
@@ -1555,7 +1551,6 @@ static int | |||
1555 | tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | 1551 | tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, |
1556 | u32 prior_snd_una) | 1552 | u32 prior_snd_una) |
1557 | { | 1553 | { |
1558 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1559 | struct tcp_sock *tp = tcp_sk(sk); | 1554 | struct tcp_sock *tp = tcp_sk(sk); |
1560 | const unsigned char *ptr = (skb_transport_header(ack_skb) + | 1555 | const unsigned char *ptr = (skb_transport_header(ack_skb) + |
1561 | TCP_SKB_CB(ack_skb)->sacked); | 1556 | TCP_SKB_CB(ack_skb)->sacked); |
@@ -1728,12 +1723,6 @@ walk: | |||
1728 | start_seq, end_seq, dup_sack); | 1723 | start_seq, end_seq, dup_sack); |
1729 | 1724 | ||
1730 | advance_sp: | 1725 | advance_sp: |
1731 | /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct | ||
1732 | * due to in-order walk | ||
1733 | */ | ||
1734 | if (after(end_seq, tp->frto_highmark)) | ||
1735 | state.flag &= ~FLAG_ONLY_ORIG_SACKED; | ||
1736 | |||
1737 | i++; | 1726 | i++; |
1738 | } | 1727 | } |
1739 | 1728 | ||
@@ -1750,8 +1739,7 @@ advance_sp: | |||
1750 | tcp_verify_left_out(tp); | 1739 | tcp_verify_left_out(tp); |
1751 | 1740 | ||
1752 | if ((state.reord < tp->fackets_out) && | 1741 | if ((state.reord < tp->fackets_out) && |
1753 | ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && | 1742 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) |
1754 | (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) | ||
1755 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); | 1743 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); |
1756 | 1744 | ||
1757 | out: | 1745 | out: |
@@ -1825,197 +1813,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) | |||
1825 | tp->sacked_out = 0; | 1813 | tp->sacked_out = 0; |
1826 | } | 1814 | } |
1827 | 1815 | ||
1828 | static int tcp_is_sackfrto(const struct tcp_sock *tp) | ||
1829 | { | ||
1830 | return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); | ||
1831 | } | ||
1832 | |||
1833 | /* F-RTO can only be used if TCP has never retransmitted anything other than | ||
1834 | * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) | ||
1835 | */ | ||
1836 | bool tcp_use_frto(struct sock *sk) | ||
1837 | { | ||
1838 | const struct tcp_sock *tp = tcp_sk(sk); | ||
1839 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1840 | struct sk_buff *skb; | ||
1841 | |||
1842 | if (!sysctl_tcp_frto) | ||
1843 | return false; | ||
1844 | |||
1845 | /* MTU probe and F-RTO won't really play nicely along currently */ | ||
1846 | if (icsk->icsk_mtup.probe_size) | ||
1847 | return false; | ||
1848 | |||
1849 | if (tcp_is_sackfrto(tp)) | ||
1850 | return true; | ||
1851 | |||
1852 | /* Avoid expensive walking of rexmit queue if possible */ | ||
1853 | if (tp->retrans_out > 1) | ||
1854 | return false; | ||
1855 | |||
1856 | skb = tcp_write_queue_head(sk); | ||
1857 | if (tcp_skb_is_last(sk, skb)) | ||
1858 | return true; | ||
1859 | skb = tcp_write_queue_next(sk, skb); /* Skips head */ | ||
1860 | tcp_for_write_queue_from(skb, sk) { | ||
1861 | if (skb == tcp_send_head(sk)) | ||
1862 | break; | ||
1863 | if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) | ||
1864 | return false; | ||
1865 | /* Short-circuit when first non-SACKed skb has been checked */ | ||
1866 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | ||
1867 | break; | ||
1868 | } | ||
1869 | return true; | ||
1870 | } | ||
1871 | |||
1872 | /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO | ||
1873 | * recovery a bit and use heuristics in tcp_process_frto() to detect if | ||
1874 | * the RTO was spurious. Only clear SACKED_RETRANS of the head here to | ||
1875 | * keep retrans_out counting accurate (with SACK F-RTO, other than head | ||
1876 | * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS | ||
1877 | * bits are handled if the Loss state is really to be entered (in | ||
1878 | * tcp_enter_frto_loss). | ||
1879 | * | ||
1880 | * Do like tcp_enter_loss() would; when RTO expires the second time it | ||
1881 | * does: | ||
1882 | * "Reduce ssthresh if it has not yet been made inside this window." | ||
1883 | */ | ||
1884 | void tcp_enter_frto(struct sock *sk) | ||
1885 | { | ||
1886 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1887 | struct tcp_sock *tp = tcp_sk(sk); | ||
1888 | struct sk_buff *skb; | ||
1889 | |||
1890 | if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || | ||
1891 | tp->snd_una == tp->high_seq || | ||
1892 | ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && | ||
1893 | !icsk->icsk_retransmits)) { | ||
1894 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | ||
1895 | /* Our state is too optimistic in ssthresh() call because cwnd | ||
1896 | * is not reduced until tcp_enter_frto_loss() when previous F-RTO | ||
1897 | * recovery has not yet completed. Pattern would be this: RTO, | ||
1898 | * Cumulative ACK, RTO (2xRTO for the same segment does not end | ||
1899 | * up here twice). | ||
1900 | * RFC4138 should be more specific on what to do, even though | ||
1901 | * RTO is quite unlikely to occur after the first Cumulative ACK | ||
1902 | * due to back-off and complexity of triggering events ... | ||
1903 | */ | ||
1904 | if (tp->frto_counter) { | ||
1905 | u32 stored_cwnd; | ||
1906 | stored_cwnd = tp->snd_cwnd; | ||
1907 | tp->snd_cwnd = 2; | ||
1908 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
1909 | tp->snd_cwnd = stored_cwnd; | ||
1910 | } else { | ||
1911 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
1912 | } | ||
1913 | /* ... in theory, cong.control module could do "any tricks" in | ||
1914 | * ssthresh(), which means that ca_state, lost bits and lost_out | ||
1915 | * counter would have to be faked before the call occurs. We | ||
1916 | * consider that too expensive, unlikely and hacky, so modules | ||
1917 | * using these in ssthresh() must deal these incompatibility | ||
1918 | * issues if they receives CA_EVENT_FRTO and frto_counter != 0 | ||
1919 | */ | ||
1920 | tcp_ca_event(sk, CA_EVENT_FRTO); | ||
1921 | } | ||
1922 | |||
1923 | tp->undo_marker = tp->snd_una; | ||
1924 | tp->undo_retrans = 0; | ||
1925 | |||
1926 | skb = tcp_write_queue_head(sk); | ||
1927 | if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) | ||
1928 | tp->undo_marker = 0; | ||
1929 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { | ||
1930 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | ||
1931 | tp->retrans_out -= tcp_skb_pcount(skb); | ||
1932 | } | ||
1933 | tcp_verify_left_out(tp); | ||
1934 | |||
1935 | /* Too bad if TCP was application limited */ | ||
1936 | tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); | ||
1937 | |||
1938 | /* Earlier loss recovery underway (see RFC4138; Appendix B). | ||
1939 | * The last condition is necessary at least in tp->frto_counter case. | ||
1940 | */ | ||
1941 | if (tcp_is_sackfrto(tp) && (tp->frto_counter || | ||
1942 | ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && | ||
1943 | after(tp->high_seq, tp->snd_una)) { | ||
1944 | tp->frto_highmark = tp->high_seq; | ||
1945 | } else { | ||
1946 | tp->frto_highmark = tp->snd_nxt; | ||
1947 | } | ||
1948 | tcp_set_ca_state(sk, TCP_CA_Disorder); | ||
1949 | tp->high_seq = tp->snd_nxt; | ||
1950 | tp->frto_counter = 1; | ||
1951 | } | ||
1952 | |||
1953 | /* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, | ||
1954 | * which indicates that we should follow the traditional RTO recovery, | ||
1955 | * i.e. mark everything lost and do go-back-N retransmission. | ||
1956 | */ | ||
1957 | static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) | ||
1958 | { | ||
1959 | struct tcp_sock *tp = tcp_sk(sk); | ||
1960 | struct sk_buff *skb; | ||
1961 | |||
1962 | tp->lost_out = 0; | ||
1963 | tp->retrans_out = 0; | ||
1964 | if (tcp_is_reno(tp)) | ||
1965 | tcp_reset_reno_sack(tp); | ||
1966 | |||
1967 | tcp_for_write_queue(skb, sk) { | ||
1968 | if (skb == tcp_send_head(sk)) | ||
1969 | break; | ||
1970 | |||
1971 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | ||
1972 | /* | ||
1973 | * Count the retransmission made on RTO correctly (only when | ||
1974 | * waiting for the first ACK and did not get it)... | ||
1975 | */ | ||
1976 | if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { | ||
1977 | /* For some reason this R-bit might get cleared? */ | ||
1978 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) | ||
1979 | tp->retrans_out += tcp_skb_pcount(skb); | ||
1980 | /* ...enter this if branch just for the first segment */ | ||
1981 | flag |= FLAG_DATA_ACKED; | ||
1982 | } else { | ||
1983 | if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) | ||
1984 | tp->undo_marker = 0; | ||
1985 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | ||
1986 | } | ||
1987 | |||
1988 | /* Marking forward transmissions that were made after RTO lost | ||
1989 | * can cause unnecessary retransmissions in some scenarios, | ||
1990 | * SACK blocks will mitigate that in some but not in all cases. | ||
1991 | * We used to not mark them but it was causing break-ups with | ||
1992 | * receivers that do only in-order receival. | ||
1993 | * | ||
1994 | * TODO: we could detect presence of such receiver and select | ||
1995 | * different behavior per flow. | ||
1996 | */ | ||
1997 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | ||
1998 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
1999 | tp->lost_out += tcp_skb_pcount(skb); | ||
2000 | tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; | ||
2001 | } | ||
2002 | } | ||
2003 | tcp_verify_left_out(tp); | ||
2004 | |||
2005 | tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; | ||
2006 | tp->snd_cwnd_cnt = 0; | ||
2007 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2008 | tp->frto_counter = 0; | ||
2009 | |||
2010 | tp->reordering = min_t(unsigned int, tp->reordering, | ||
2011 | sysctl_tcp_reordering); | ||
2012 | tcp_set_ca_state(sk, TCP_CA_Loss); | ||
2013 | tp->high_seq = tp->snd_nxt; | ||
2014 | TCP_ECN_queue_cwr(tp); | ||
2015 | |||
2016 | tcp_clear_all_retrans_hints(tp); | ||
2017 | } | ||
2018 | |||
2019 | static void tcp_clear_retrans_partial(struct tcp_sock *tp) | 1816 | static void tcp_clear_retrans_partial(struct tcp_sock *tp) |
2020 | { | 1817 | { |
2021 | tp->retrans_out = 0; | 1818 | tp->retrans_out = 0; |
@@ -2042,10 +1839,13 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
2042 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1839 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2043 | struct tcp_sock *tp = tcp_sk(sk); | 1840 | struct tcp_sock *tp = tcp_sk(sk); |
2044 | struct sk_buff *skb; | 1841 | struct sk_buff *skb; |
1842 | bool new_recovery = false; | ||
2045 | 1843 | ||
2046 | /* Reduce ssthresh if it has not yet been made inside this window. */ | 1844 | /* Reduce ssthresh if it has not yet been made inside this window. */ |
2047 | if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || | 1845 | if (icsk->icsk_ca_state <= TCP_CA_Disorder || |
1846 | !after(tp->high_seq, tp->snd_una) || | ||
2048 | (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { | 1847 | (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { |
1848 | new_recovery = true; | ||
2049 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 1849 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
2050 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | 1850 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); |
2051 | tcp_ca_event(sk, CA_EVENT_LOSS); | 1851 | tcp_ca_event(sk, CA_EVENT_LOSS); |
@@ -2059,11 +1859,8 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
2059 | if (tcp_is_reno(tp)) | 1859 | if (tcp_is_reno(tp)) |
2060 | tcp_reset_reno_sack(tp); | 1860 | tcp_reset_reno_sack(tp); |
2061 | 1861 | ||
2062 | if (!how) { | 1862 | tp->undo_marker = tp->snd_una; |
2063 | /* Push undo marker, if it was plain RTO and nothing | 1863 | if (how) { |
2064 | * was retransmitted. */ | ||
2065 | tp->undo_marker = tp->snd_una; | ||
2066 | } else { | ||
2067 | tp->sacked_out = 0; | 1864 | tp->sacked_out = 0; |
2068 | tp->fackets_out = 0; | 1865 | tp->fackets_out = 0; |
2069 | } | 1866 | } |
@@ -2090,8 +1887,14 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
2090 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1887 | tcp_set_ca_state(sk, TCP_CA_Loss); |
2091 | tp->high_seq = tp->snd_nxt; | 1888 | tp->high_seq = tp->snd_nxt; |
2092 | TCP_ECN_queue_cwr(tp); | 1889 | TCP_ECN_queue_cwr(tp); |
2093 | /* Abort F-RTO algorithm if one is in progress */ | 1890 | |
2094 | tp->frto_counter = 0; | 1891 | /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous |
1892 | * loss recovery is underway except recurring timeout(s) on | ||
1893 | * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing | ||
1894 | */ | ||
1895 | tp->frto = sysctl_tcp_frto && | ||
1896 | (new_recovery || icsk->icsk_retransmits) && | ||
1897 | !inet_csk(sk)->icsk_mtup.probe_size; | ||
2095 | } | 1898 | } |
2096 | 1899 | ||
2097 | /* If ACK arrived pointing to a remembered SACK, it means that our | 1900 | /* If ACK arrived pointing to a remembered SACK, it means that our |
@@ -2150,15 +1953,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) | |||
2150 | * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples | 1953 | * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples |
2151 | * available, or RTO is scheduled to fire first. | 1954 | * available, or RTO is scheduled to fire first. |
2152 | */ | 1955 | */ |
2153 | if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) | 1956 | if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || |
1957 | (flag & FLAG_ECE) || !tp->srtt) | ||
2154 | return false; | 1958 | return false; |
2155 | 1959 | ||
2156 | delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); | 1960 | delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); |
2157 | if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) | 1961 | if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) |
2158 | return false; | 1962 | return false; |
2159 | 1963 | ||
2160 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); | 1964 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, |
2161 | tp->early_retrans_delayed = 1; | 1965 | TCP_RTO_MAX); |
2162 | return true; | 1966 | return true; |
2163 | } | 1967 | } |
2164 | 1968 | ||
@@ -2274,10 +2078,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) | |||
2274 | struct tcp_sock *tp = tcp_sk(sk); | 2078 | struct tcp_sock *tp = tcp_sk(sk); |
2275 | __u32 packets_out; | 2079 | __u32 packets_out; |
2276 | 2080 | ||
2277 | /* Do not perform any recovery during F-RTO algorithm */ | ||
2278 | if (tp->frto_counter) | ||
2279 | return false; | ||
2280 | |||
2281 | /* Trick#1: The loss is proven. */ | 2081 | /* Trick#1: The loss is proven. */ |
2282 | if (tp->lost_out) | 2082 | if (tp->lost_out) |
2283 | return true; | 2083 | return true; |
@@ -2321,7 +2121,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) | |||
2321 | * interval if appropriate. | 2121 | * interval if appropriate. |
2322 | */ | 2122 | */ |
2323 | if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && | 2123 | if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && |
2324 | (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && | 2124 | (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && |
2325 | !tcp_may_send_now(sk)) | 2125 | !tcp_may_send_now(sk)) |
2326 | return !tcp_pause_early_retransmit(sk, flag); | 2126 | return !tcp_pause_early_retransmit(sk, flag); |
2327 | 2127 | ||
@@ -2638,12 +2438,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) | |||
2638 | return failed; | 2438 | return failed; |
2639 | } | 2439 | } |
2640 | 2440 | ||
2641 | /* Undo during loss recovery after partial ACK. */ | 2441 | /* Undo during loss recovery after partial ACK or using F-RTO. */ |
2642 | static bool tcp_try_undo_loss(struct sock *sk) | 2442 | static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) |
2643 | { | 2443 | { |
2644 | struct tcp_sock *tp = tcp_sk(sk); | 2444 | struct tcp_sock *tp = tcp_sk(sk); |
2645 | 2445 | ||
2646 | if (tcp_may_undo(tp)) { | 2446 | if (frto_undo || tcp_may_undo(tp)) { |
2647 | struct sk_buff *skb; | 2447 | struct sk_buff *skb; |
2648 | tcp_for_write_queue(skb, sk) { | 2448 | tcp_for_write_queue(skb, sk) { |
2649 | if (skb == tcp_send_head(sk)) | 2449 | if (skb == tcp_send_head(sk)) |
@@ -2657,9 +2457,12 @@ static bool tcp_try_undo_loss(struct sock *sk) | |||
2657 | tp->lost_out = 0; | 2457 | tp->lost_out = 0; |
2658 | tcp_undo_cwr(sk, true); | 2458 | tcp_undo_cwr(sk, true); |
2659 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); | 2459 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); |
2460 | if (frto_undo) | ||
2461 | NET_INC_STATS_BH(sock_net(sk), | ||
2462 | LINUX_MIB_TCPSPURIOUSRTOS); | ||
2660 | inet_csk(sk)->icsk_retransmits = 0; | 2463 | inet_csk(sk)->icsk_retransmits = 0; |
2661 | tp->undo_marker = 0; | 2464 | tp->undo_marker = 0; |
2662 | if (tcp_is_sack(tp)) | 2465 | if (frto_undo || tcp_is_sack(tp)) |
2663 | tcp_set_ca_state(sk, TCP_CA_Open); | 2466 | tcp_set_ca_state(sk, TCP_CA_Open); |
2664 | return true; | 2467 | return true; |
2665 | } | 2468 | } |
@@ -2681,6 +2484,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) | |||
2681 | struct tcp_sock *tp = tcp_sk(sk); | 2484 | struct tcp_sock *tp = tcp_sk(sk); |
2682 | 2485 | ||
2683 | tp->high_seq = tp->snd_nxt; | 2486 | tp->high_seq = tp->snd_nxt; |
2487 | tp->tlp_high_seq = 0; | ||
2684 | tp->snd_cwnd_cnt = 0; | 2488 | tp->snd_cwnd_cnt = 0; |
2685 | tp->prior_cwnd = tp->snd_cwnd; | 2489 | tp->prior_cwnd = tp->snd_cwnd; |
2686 | tp->prr_delivered = 0; | 2490 | tp->prr_delivered = 0; |
@@ -2758,7 +2562,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) | |||
2758 | 2562 | ||
2759 | tcp_verify_left_out(tp); | 2563 | tcp_verify_left_out(tp); |
2760 | 2564 | ||
2761 | if (!tp->frto_counter && !tcp_any_retrans_done(sk)) | 2565 | if (!tcp_any_retrans_done(sk)) |
2762 | tp->retrans_stamp = 0; | 2566 | tp->retrans_stamp = 0; |
2763 | 2567 | ||
2764 | if (flag & FLAG_ECE) | 2568 | if (flag & FLAG_ECE) |
@@ -2875,6 +2679,58 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2875 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2679 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
2876 | } | 2680 | } |
2877 | 2681 | ||
2682 | /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are | ||
2683 | * recovered or spurious. Otherwise retransmits more on partial ACKs. | ||
2684 | */ | ||
2685 | static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) | ||
2686 | { | ||
2687 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2688 | struct tcp_sock *tp = tcp_sk(sk); | ||
2689 | bool recovered = !before(tp->snd_una, tp->high_seq); | ||
2690 | |||
2691 | if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ | ||
2692 | if (flag & FLAG_ORIG_SACK_ACKED) { | ||
2693 | /* Step 3.b. A timeout is spurious if not all data are | ||
2694 | * lost, i.e., never-retransmitted data are (s)acked. | ||
2695 | */ | ||
2696 | tcp_try_undo_loss(sk, true); | ||
2697 | return; | ||
2698 | } | ||
2699 | if (after(tp->snd_nxt, tp->high_seq) && | ||
2700 | (flag & FLAG_DATA_SACKED || is_dupack)) { | ||
2701 | tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ | ||
2702 | } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { | ||
2703 | tp->high_seq = tp->snd_nxt; | ||
2704 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), | ||
2705 | TCP_NAGLE_OFF); | ||
2706 | if (after(tp->snd_nxt, tp->high_seq)) | ||
2707 | return; /* Step 2.b */ | ||
2708 | tp->frto = 0; | ||
2709 | } | ||
2710 | } | ||
2711 | |||
2712 | if (recovered) { | ||
2713 | /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ | ||
2714 | icsk->icsk_retransmits = 0; | ||
2715 | tcp_try_undo_recovery(sk); | ||
2716 | return; | ||
2717 | } | ||
2718 | if (flag & FLAG_DATA_ACKED) | ||
2719 | icsk->icsk_retransmits = 0; | ||
2720 | if (tcp_is_reno(tp)) { | ||
2721 | /* A Reno DUPACK means new data in F-RTO step 2.b above are | ||
2722 | * delivered. Lower inflight to clock out (re)tranmissions. | ||
2723 | */ | ||
2724 | if (after(tp->snd_nxt, tp->high_seq) && is_dupack) | ||
2725 | tcp_add_reno_sack(sk); | ||
2726 | else if (flag & FLAG_SND_UNA_ADVANCED) | ||
2727 | tcp_reset_reno_sack(tp); | ||
2728 | } | ||
2729 | if (tcp_try_undo_loss(sk, false)) | ||
2730 | return; | ||
2731 | tcp_xmit_retransmit_queue(sk); | ||
2732 | } | ||
2733 | |||
2878 | /* Process an event, which can update packets-in-flight not trivially. | 2734 | /* Process an event, which can update packets-in-flight not trivially. |
2879 | * Main goal of this function is to calculate new estimate for left_out, | 2735 | * Main goal of this function is to calculate new estimate for left_out, |
2880 | * taking into account both packets sitting in receiver's buffer and | 2736 | * taking into account both packets sitting in receiver's buffer and |
@@ -2921,12 +2777,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2921 | tp->retrans_stamp = 0; | 2777 | tp->retrans_stamp = 0; |
2922 | } else if (!before(tp->snd_una, tp->high_seq)) { | 2778 | } else if (!before(tp->snd_una, tp->high_seq)) { |
2923 | switch (icsk->icsk_ca_state) { | 2779 | switch (icsk->icsk_ca_state) { |
2924 | case TCP_CA_Loss: | ||
2925 | icsk->icsk_retransmits = 0; | ||
2926 | if (tcp_try_undo_recovery(sk)) | ||
2927 | return; | ||
2928 | break; | ||
2929 | |||
2930 | case TCP_CA_CWR: | 2780 | case TCP_CA_CWR: |
2931 | /* CWR is to be held something *above* high_seq | 2781 | /* CWR is to be held something *above* high_seq |
2932 | * is ACKed for CWR bit to reach receiver. */ | 2782 | * is ACKed for CWR bit to reach receiver. */ |
@@ -2957,18 +2807,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2957 | newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; | 2807 | newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; |
2958 | break; | 2808 | break; |
2959 | case TCP_CA_Loss: | 2809 | case TCP_CA_Loss: |
2960 | if (flag & FLAG_DATA_ACKED) | 2810 | tcp_process_loss(sk, flag, is_dupack); |
2961 | icsk->icsk_retransmits = 0; | ||
2962 | if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) | ||
2963 | tcp_reset_reno_sack(tp); | ||
2964 | if (!tcp_try_undo_loss(sk)) { | ||
2965 | tcp_moderate_cwnd(tp); | ||
2966 | tcp_xmit_retransmit_queue(sk); | ||
2967 | return; | ||
2968 | } | ||
2969 | if (icsk->icsk_ca_state != TCP_CA_Open) | 2811 | if (icsk->icsk_ca_state != TCP_CA_Open) |
2970 | return; | 2812 | return; |
2971 | /* Loss is undone; fall through to processing in Open state. */ | 2813 | /* Fall through to processing in Open state. */ |
2972 | default: | 2814 | default: |
2973 | if (tcp_is_reno(tp)) { | 2815 | if (tcp_is_reno(tp)) { |
2974 | if (flag & FLAG_SND_UNA_ADVANCED) | 2816 | if (flag & FLAG_SND_UNA_ADVANCED) |
@@ -3081,6 +2923,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
3081 | */ | 2923 | */ |
3082 | void tcp_rearm_rto(struct sock *sk) | 2924 | void tcp_rearm_rto(struct sock *sk) |
3083 | { | 2925 | { |
2926 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
3084 | struct tcp_sock *tp = tcp_sk(sk); | 2927 | struct tcp_sock *tp = tcp_sk(sk); |
3085 | 2928 | ||
3086 | /* If the retrans timer is currently being used by Fast Open | 2929 | /* If the retrans timer is currently being used by Fast Open |
@@ -3094,12 +2937,13 @@ void tcp_rearm_rto(struct sock *sk) | |||
3094 | } else { | 2937 | } else { |
3095 | u32 rto = inet_csk(sk)->icsk_rto; | 2938 | u32 rto = inet_csk(sk)->icsk_rto; |
3096 | /* Offset the time elapsed after installing regular RTO */ | 2939 | /* Offset the time elapsed after installing regular RTO */ |
3097 | if (tp->early_retrans_delayed) { | 2940 | if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || |
2941 | icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { | ||
3098 | struct sk_buff *skb = tcp_write_queue_head(sk); | 2942 | struct sk_buff *skb = tcp_write_queue_head(sk); |
3099 | const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; | 2943 | const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; |
3100 | s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); | 2944 | s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); |
3101 | /* delta may not be positive if the socket is locked | 2945 | /* delta may not be positive if the socket is locked |
3102 | * when the delayed ER timer fires and is rescheduled. | 2946 | * when the retrans timer fires and is rescheduled. |
3103 | */ | 2947 | */ |
3104 | if (delta > 0) | 2948 | if (delta > 0) |
3105 | rto = delta; | 2949 | rto = delta; |
@@ -3107,7 +2951,6 @@ void tcp_rearm_rto(struct sock *sk) | |||
3107 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, | 2951 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, |
3108 | TCP_RTO_MAX); | 2952 | TCP_RTO_MAX); |
3109 | } | 2953 | } |
3110 | tp->early_retrans_delayed = 0; | ||
3111 | } | 2954 | } |
3112 | 2955 | ||
3113 | /* This function is called when the delayed ER timer fires. TCP enters | 2956 | /* This function is called when the delayed ER timer fires. TCP enters |
@@ -3195,8 +3038,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3195 | flag |= FLAG_RETRANS_DATA_ACKED; | 3038 | flag |= FLAG_RETRANS_DATA_ACKED; |
3196 | ca_seq_rtt = -1; | 3039 | ca_seq_rtt = -1; |
3197 | seq_rtt = -1; | 3040 | seq_rtt = -1; |
3198 | if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) | ||
3199 | flag |= FLAG_NONHEAD_RETRANS_ACKED; | ||
3200 | } else { | 3041 | } else { |
3201 | ca_seq_rtt = now - scb->when; | 3042 | ca_seq_rtt = now - scb->when; |
3202 | last_ackt = skb->tstamp; | 3043 | last_ackt = skb->tstamp; |
@@ -3205,6 +3046,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3205 | } | 3046 | } |
3206 | if (!(sacked & TCPCB_SACKED_ACKED)) | 3047 | if (!(sacked & TCPCB_SACKED_ACKED)) |
3207 | reord = min(pkts_acked, reord); | 3048 | reord = min(pkts_acked, reord); |
3049 | if (!after(scb->end_seq, tp->high_seq)) | ||
3050 | flag |= FLAG_ORIG_SACK_ACKED; | ||
3208 | } | 3051 | } |
3209 | 3052 | ||
3210 | if (sacked & TCPCB_SACKED_ACKED) | 3053 | if (sacked & TCPCB_SACKED_ACKED) |
@@ -3405,165 +3248,74 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 | |||
3405 | return flag; | 3248 | return flag; |
3406 | } | 3249 | } |
3407 | 3250 | ||
3408 | /* A very conservative spurious RTO response algorithm: reduce cwnd and | 3251 | /* RFC 5961 7 [ACK Throttling] */ |
3409 | * continue in congestion avoidance. | 3252 | static void tcp_send_challenge_ack(struct sock *sk) |
3410 | */ | ||
3411 | static void tcp_conservative_spur_to_response(struct tcp_sock *tp) | ||
3412 | { | 3253 | { |
3413 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 3254 | /* unprotected vars, we dont care of overwrites */ |
3414 | tp->snd_cwnd_cnt = 0; | 3255 | static u32 challenge_timestamp; |
3415 | TCP_ECN_queue_cwr(tp); | 3256 | static unsigned int challenge_count; |
3416 | tcp_moderate_cwnd(tp); | 3257 | u32 now = jiffies / HZ; |
3417 | } | ||
3418 | 3258 | ||
3419 | /* A conservative spurious RTO response algorithm: reduce cwnd using | 3259 | if (now != challenge_timestamp) { |
3420 | * PRR and continue in congestion avoidance. | 3260 | challenge_timestamp = now; |
3421 | */ | 3261 | challenge_count = 0; |
3422 | static void tcp_cwr_spur_to_response(struct sock *sk) | 3262 | } |
3423 | { | 3263 | if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { |
3424 | tcp_enter_cwr(sk, 0); | 3264 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); |
3265 | tcp_send_ack(sk); | ||
3266 | } | ||
3425 | } | 3267 | } |
3426 | 3268 | ||
3427 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) | 3269 | static void tcp_store_ts_recent(struct tcp_sock *tp) |
3428 | { | 3270 | { |
3429 | if (flag & FLAG_ECE) | 3271 | tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; |
3430 | tcp_cwr_spur_to_response(sk); | 3272 | tp->rx_opt.ts_recent_stamp = get_seconds(); |
3431 | else | ||
3432 | tcp_undo_cwr(sk, true); | ||
3433 | } | 3273 | } |
3434 | 3274 | ||
3435 | /* F-RTO spurious RTO detection algorithm (RFC4138) | 3275 | static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) |
3436 | * | ||
3437 | * F-RTO affects during two new ACKs following RTO (well, almost, see inline | ||
3438 | * comments). State (ACK number) is kept in frto_counter. When ACK advances | ||
3439 | * window (but not to or beyond highest sequence sent before RTO): | ||
3440 | * On First ACK, send two new segments out. | ||
3441 | * On Second ACK, RTO was likely spurious. Do spurious response (response | ||
3442 | * algorithm is not part of the F-RTO detection algorithm | ||
3443 | * given in RFC4138 but can be selected separately). | ||
3444 | * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss | ||
3445 | * and TCP falls back to conventional RTO recovery. F-RTO allows overriding | ||
3446 | * of Nagle, this is done using frto_counter states 2 and 3, when a new data | ||
3447 | * segment of any size sent during F-RTO, state 2 is upgraded to 3. | ||
3448 | * | ||
3449 | * Rationale: if the RTO was spurious, new ACKs should arrive from the | ||
3450 | * original window even after we transmit two new data segments. | ||
3451 | * | ||
3452 | * SACK version: | ||
3453 | * on first step, wait until first cumulative ACK arrives, then move to | ||
3454 | * the second step. In second step, the next ACK decides. | ||
3455 | * | ||
3456 | * F-RTO is implemented (mainly) in four functions: | ||
3457 | * - tcp_use_frto() is used to determine if TCP is can use F-RTO | ||
3458 | * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is | ||
3459 | * called when tcp_use_frto() showed green light | ||
3460 | * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm | ||
3461 | * - tcp_enter_frto_loss() is called if there is not enough evidence | ||
3462 | * to prove that the RTO is indeed spurious. It transfers the control | ||
3463 | * from F-RTO to the conventional RTO recovery | ||
3464 | */ | ||
3465 | static bool tcp_process_frto(struct sock *sk, int flag) | ||
3466 | { | 3276 | { |
3467 | struct tcp_sock *tp = tcp_sk(sk); | 3277 | if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { |
3468 | 3278 | /* PAWS bug workaround wrt. ACK frames, the PAWS discard | |
3469 | tcp_verify_left_out(tp); | 3279 | * extra check below makes sure this can only happen |
3470 | 3280 | * for pure ACK frames. -DaveM | |
3471 | /* Duplicate the behavior from Loss state (fastretrans_alert) */ | 3281 | * |
3472 | if (flag & FLAG_DATA_ACKED) | 3282 | * Not only, also it occurs for expired timestamps. |
3473 | inet_csk(sk)->icsk_retransmits = 0; | ||
3474 | |||
3475 | if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || | ||
3476 | ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) | ||
3477 | tp->undo_marker = 0; | ||
3478 | |||
3479 | if (!before(tp->snd_una, tp->frto_highmark)) { | ||
3480 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); | ||
3481 | return true; | ||
3482 | } | ||
3483 | |||
3484 | if (!tcp_is_sackfrto(tp)) { | ||
3485 | /* RFC4138 shortcoming in step 2; should also have case c): | ||
3486 | * ACK isn't duplicate nor advances window, e.g., opposite dir | ||
3487 | * data, winupdate | ||
3488 | */ | 3283 | */ |
3489 | if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) | ||
3490 | return true; | ||
3491 | |||
3492 | if (!(flag & FLAG_DATA_ACKED)) { | ||
3493 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), | ||
3494 | flag); | ||
3495 | return true; | ||
3496 | } | ||
3497 | } else { | ||
3498 | if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { | ||
3499 | if (!tcp_packets_in_flight(tp)) { | ||
3500 | tcp_enter_frto_loss(sk, 2, flag); | ||
3501 | return true; | ||
3502 | } | ||
3503 | |||
3504 | /* Prevent sending of new data. */ | ||
3505 | tp->snd_cwnd = min(tp->snd_cwnd, | ||
3506 | tcp_packets_in_flight(tp)); | ||
3507 | return true; | ||
3508 | } | ||
3509 | |||
3510 | if ((tp->frto_counter >= 2) && | ||
3511 | (!(flag & FLAG_FORWARD_PROGRESS) || | ||
3512 | ((flag & FLAG_DATA_SACKED) && | ||
3513 | !(flag & FLAG_ONLY_ORIG_SACKED)))) { | ||
3514 | /* RFC4138 shortcoming (see comment above) */ | ||
3515 | if (!(flag & FLAG_FORWARD_PROGRESS) && | ||
3516 | (flag & FLAG_NOT_DUP)) | ||
3517 | return true; | ||
3518 | |||
3519 | tcp_enter_frto_loss(sk, 3, flag); | ||
3520 | return true; | ||
3521 | } | ||
3522 | } | ||
3523 | |||
3524 | if (tp->frto_counter == 1) { | ||
3525 | /* tcp_may_send_now needs to see updated state */ | ||
3526 | tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; | ||
3527 | tp->frto_counter = 2; | ||
3528 | |||
3529 | if (!tcp_may_send_now(sk)) | ||
3530 | tcp_enter_frto_loss(sk, 2, flag); | ||
3531 | 3284 | ||
3532 | return true; | 3285 | if (tcp_paws_check(&tp->rx_opt, 0)) |
3533 | } else { | 3286 | tcp_store_ts_recent(tp); |
3534 | switch (sysctl_tcp_frto_response) { | ||
3535 | case 2: | ||
3536 | tcp_undo_spur_to_response(sk, flag); | ||
3537 | break; | ||
3538 | case 1: | ||
3539 | tcp_conservative_spur_to_response(tp); | ||
3540 | break; | ||
3541 | default: | ||
3542 | tcp_cwr_spur_to_response(sk); | ||
3543 | break; | ||
3544 | } | ||
3545 | tp->frto_counter = 0; | ||
3546 | tp->undo_marker = 0; | ||
3547 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); | ||
3548 | } | 3287 | } |
3549 | return false; | ||
3550 | } | 3288 | } |
3551 | 3289 | ||
3552 | /* RFC 5961 7 [ACK Throttling] */ | 3290 | /* This routine deals with acks during a TLP episode. |
3553 | static void tcp_send_challenge_ack(struct sock *sk) | 3291 | * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. |
3292 | */ | ||
3293 | static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | ||
3554 | { | 3294 | { |
3555 | /* unprotected vars, we dont care of overwrites */ | 3295 | struct tcp_sock *tp = tcp_sk(sk); |
3556 | static u32 challenge_timestamp; | 3296 | bool is_tlp_dupack = (ack == tp->tlp_high_seq) && |
3557 | static unsigned int challenge_count; | 3297 | !(flag & (FLAG_SND_UNA_ADVANCED | |
3558 | u32 now = jiffies / HZ; | 3298 | FLAG_NOT_DUP | FLAG_DATA_SACKED)); |
3559 | 3299 | ||
3560 | if (now != challenge_timestamp) { | 3300 | /* Mark the end of TLP episode on receiving TLP dupack or when |
3561 | challenge_timestamp = now; | 3301 | * ack is after tlp_high_seq. |
3562 | challenge_count = 0; | 3302 | */ |
3303 | if (is_tlp_dupack) { | ||
3304 | tp->tlp_high_seq = 0; | ||
3305 | return; | ||
3563 | } | 3306 | } |
3564 | if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { | 3307 | |
3565 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); | 3308 | if (after(ack, tp->tlp_high_seq)) { |
3566 | tcp_send_ack(sk); | 3309 | tp->tlp_high_seq = 0; |
3310 | /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ | ||
3311 | if (!(flag & FLAG_DSACKING_ACK)) { | ||
3312 | tcp_init_cwnd_reduction(sk, true); | ||
3313 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
3314 | tcp_end_cwnd_reduction(sk); | ||
3315 | tcp_set_ca_state(sk, TCP_CA_Open); | ||
3316 | NET_INC_STATS_BH(sock_net(sk), | ||
3317 | LINUX_MIB_TCPLOSSPROBERECOVERY); | ||
3318 | } | ||
3567 | } | 3319 | } |
3568 | } | 3320 | } |
3569 | 3321 | ||
@@ -3581,7 +3333,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3581 | int prior_packets; | 3333 | int prior_packets; |
3582 | int prior_sacked = tp->sacked_out; | 3334 | int prior_sacked = tp->sacked_out; |
3583 | int pkts_acked = 0; | 3335 | int pkts_acked = 0; |
3584 | bool frto_cwnd = false; | ||
3585 | 3336 | ||
3586 | /* If the ack is older than previous acks | 3337 | /* If the ack is older than previous acks |
3587 | * then we can probably ignore it. | 3338 | * then we can probably ignore it. |
@@ -3601,7 +3352,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3601 | if (after(ack, tp->snd_nxt)) | 3352 | if (after(ack, tp->snd_nxt)) |
3602 | goto invalid_ack; | 3353 | goto invalid_ack; |
3603 | 3354 | ||
3604 | if (tp->early_retrans_delayed) | 3355 | if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || |
3356 | icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) | ||
3605 | tcp_rearm_rto(sk); | 3357 | tcp_rearm_rto(sk); |
3606 | 3358 | ||
3607 | if (after(ack, prior_snd_una)) | 3359 | if (after(ack, prior_snd_una)) |
@@ -3610,6 +3362,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3610 | prior_fackets = tp->fackets_out; | 3362 | prior_fackets = tp->fackets_out; |
3611 | prior_in_flight = tcp_packets_in_flight(tp); | 3363 | prior_in_flight = tcp_packets_in_flight(tp); |
3612 | 3364 | ||
3365 | /* ts_recent update must be made after we are sure that the packet | ||
3366 | * is in window. | ||
3367 | */ | ||
3368 | if (flag & FLAG_UPDATE_TS_RECENT) | ||
3369 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
3370 | |||
3613 | if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { | 3371 | if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { |
3614 | /* Window is constant, pure forward advance. | 3372 | /* Window is constant, pure forward advance. |
3615 | * No more checks are required. | 3373 | * No more checks are required. |
@@ -3654,30 +3412,29 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3654 | 3412 | ||
3655 | pkts_acked = prior_packets - tp->packets_out; | 3413 | pkts_acked = prior_packets - tp->packets_out; |
3656 | 3414 | ||
3657 | if (tp->frto_counter) | ||
3658 | frto_cwnd = tcp_process_frto(sk, flag); | ||
3659 | /* Guarantee sacktag reordering detection against wrap-arounds */ | ||
3660 | if (before(tp->frto_highmark, tp->snd_una)) | ||
3661 | tp->frto_highmark = 0; | ||
3662 | |||
3663 | if (tcp_ack_is_dubious(sk, flag)) { | 3415 | if (tcp_ack_is_dubious(sk, flag)) { |
3664 | /* Advance CWND, if state allows this. */ | 3416 | /* Advance CWND, if state allows this. */ |
3665 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && | 3417 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) |
3666 | tcp_may_raise_cwnd(sk, flag)) | ||
3667 | tcp_cong_avoid(sk, ack, prior_in_flight); | 3418 | tcp_cong_avoid(sk, ack, prior_in_flight); |
3668 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); | 3419 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); |
3669 | tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, | 3420 | tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, |
3670 | is_dupack, flag); | 3421 | is_dupack, flag); |
3671 | } else { | 3422 | } else { |
3672 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) | 3423 | if (flag & FLAG_DATA_ACKED) |
3673 | tcp_cong_avoid(sk, ack, prior_in_flight); | 3424 | tcp_cong_avoid(sk, ack, prior_in_flight); |
3674 | } | 3425 | } |
3675 | 3426 | ||
3427 | if (tp->tlp_high_seq) | ||
3428 | tcp_process_tlp_ack(sk, ack, flag); | ||
3429 | |||
3676 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { | 3430 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { |
3677 | struct dst_entry *dst = __sk_dst_get(sk); | 3431 | struct dst_entry *dst = __sk_dst_get(sk); |
3678 | if (dst) | 3432 | if (dst) |
3679 | dst_confirm(dst); | 3433 | dst_confirm(dst); |
3680 | } | 3434 | } |
3435 | |||
3436 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) | ||
3437 | tcp_schedule_loss_probe(sk); | ||
3681 | return 1; | 3438 | return 1; |
3682 | 3439 | ||
3683 | no_queue: | 3440 | no_queue: |
@@ -3691,6 +3448,9 @@ no_queue: | |||
3691 | */ | 3448 | */ |
3692 | if (tcp_send_head(sk)) | 3449 | if (tcp_send_head(sk)) |
3693 | tcp_ack_probe(sk); | 3450 | tcp_ack_probe(sk); |
3451 | |||
3452 | if (tp->tlp_high_seq) | ||
3453 | tcp_process_tlp_ack(sk, ack, flag); | ||
3694 | return 1; | 3454 | return 1; |
3695 | 3455 | ||
3696 | invalid_ack: | 3456 | invalid_ack: |
@@ -3715,8 +3475,8 @@ old_ack: | |||
3715 | * But, this can also be called on packets in the established flow when | 3475 | * But, this can also be called on packets in the established flow when |
3716 | * the fast version below fails. | 3476 | * the fast version below fails. |
3717 | */ | 3477 | */ |
3718 | void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, | 3478 | void tcp_parse_options(const struct sk_buff *skb, |
3719 | const u8 **hvpp, int estab, | 3479 | struct tcp_options_received *opt_rx, int estab, |
3720 | struct tcp_fastopen_cookie *foc) | 3480 | struct tcp_fastopen_cookie *foc) |
3721 | { | 3481 | { |
3722 | const unsigned char *ptr; | 3482 | const unsigned char *ptr; |
@@ -3800,31 +3560,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o | |||
3800 | */ | 3560 | */ |
3801 | break; | 3561 | break; |
3802 | #endif | 3562 | #endif |
3803 | case TCPOPT_COOKIE: | ||
3804 | /* This option is variable length. | ||
3805 | */ | ||
3806 | switch (opsize) { | ||
3807 | case TCPOLEN_COOKIE_BASE: | ||
3808 | /* not yet implemented */ | ||
3809 | break; | ||
3810 | case TCPOLEN_COOKIE_PAIR: | ||
3811 | /* not yet implemented */ | ||
3812 | break; | ||
3813 | case TCPOLEN_COOKIE_MIN+0: | ||
3814 | case TCPOLEN_COOKIE_MIN+2: | ||
3815 | case TCPOLEN_COOKIE_MIN+4: | ||
3816 | case TCPOLEN_COOKIE_MIN+6: | ||
3817 | case TCPOLEN_COOKIE_MAX: | ||
3818 | /* 16-bit multiple */ | ||
3819 | opt_rx->cookie_plus = opsize; | ||
3820 | *hvpp = ptr; | ||
3821 | break; | ||
3822 | default: | ||
3823 | /* ignore option */ | ||
3824 | break; | ||
3825 | } | ||
3826 | break; | ||
3827 | |||
3828 | case TCPOPT_EXP: | 3563 | case TCPOPT_EXP: |
3829 | /* Fast Open option shares code 254 using a | 3564 | /* Fast Open option shares code 254 using a |
3830 | * 16 bits magic number. It's valid only in | 3565 | * 16 bits magic number. It's valid only in |
@@ -3870,8 +3605,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr | |||
3870 | * If it is wrong it falls back on tcp_parse_options(). | 3605 | * If it is wrong it falls back on tcp_parse_options(). |
3871 | */ | 3606 | */ |
3872 | static bool tcp_fast_parse_options(const struct sk_buff *skb, | 3607 | static bool tcp_fast_parse_options(const struct sk_buff *skb, |
3873 | const struct tcphdr *th, | 3608 | const struct tcphdr *th, struct tcp_sock *tp) |
3874 | struct tcp_sock *tp, const u8 **hvpp) | ||
3875 | { | 3609 | { |
3876 | /* In the spirit of fast parsing, compare doff directly to constant | 3610 | /* In the spirit of fast parsing, compare doff directly to constant |
3877 | * values. Because equality is used, short doff can be ignored here. | 3611 | * values. Because equality is used, short doff can be ignored here. |
@@ -3885,7 +3619,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, | |||
3885 | return true; | 3619 | return true; |
3886 | } | 3620 | } |
3887 | 3621 | ||
3888 | tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); | 3622 | tcp_parse_options(skb, &tp->rx_opt, 1, NULL); |
3889 | if (tp->rx_opt.saw_tstamp) | 3623 | if (tp->rx_opt.saw_tstamp) |
3890 | tp->rx_opt.rcv_tsecr -= tp->tsoffset; | 3624 | tp->rx_opt.rcv_tsecr -= tp->tsoffset; |
3891 | 3625 | ||
@@ -3930,27 +3664,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) | |||
3930 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | 3664 | EXPORT_SYMBOL(tcp_parse_md5sig_option); |
3931 | #endif | 3665 | #endif |
3932 | 3666 | ||
3933 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) | ||
3934 | { | ||
3935 | tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; | ||
3936 | tp->rx_opt.ts_recent_stamp = get_seconds(); | ||
3937 | } | ||
3938 | |||
3939 | static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | ||
3940 | { | ||
3941 | if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { | ||
3942 | /* PAWS bug workaround wrt. ACK frames, the PAWS discard | ||
3943 | * extra check below makes sure this can only happen | ||
3944 | * for pure ACK frames. -DaveM | ||
3945 | * | ||
3946 | * Not only, also it occurs for expired timestamps. | ||
3947 | */ | ||
3948 | |||
3949 | if (tcp_paws_check(&tp->rx_opt, 0)) | ||
3950 | tcp_store_ts_recent(tp); | ||
3951 | } | ||
3952 | } | ||
3953 | |||
3954 | /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM | 3667 | /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM |
3955 | * | 3668 | * |
3956 | * It is not fatal. If this ACK does _not_ change critical state (seqs, window) | 3669 | * It is not fatal. If this ACK does _not_ change critical state (seqs, window) |
@@ -5266,12 +4979,10 @@ out: | |||
5266 | static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | 4979 | static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, |
5267 | const struct tcphdr *th, int syn_inerr) | 4980 | const struct tcphdr *th, int syn_inerr) |
5268 | { | 4981 | { |
5269 | const u8 *hash_location; | ||
5270 | struct tcp_sock *tp = tcp_sk(sk); | 4982 | struct tcp_sock *tp = tcp_sk(sk); |
5271 | 4983 | ||
5272 | /* RFC1323: H1. Apply PAWS check first. */ | 4984 | /* RFC1323: H1. Apply PAWS check first. */ |
5273 | if (tcp_fast_parse_options(skb, th, tp, &hash_location) && | 4985 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && |
5274 | tp->rx_opt.saw_tstamp && | ||
5275 | tcp_paws_discard(sk, skb)) { | 4986 | tcp_paws_discard(sk, skb)) { |
5276 | if (!th->rst) { | 4987 | if (!th->rst) { |
5277 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | 4988 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); |
@@ -5546,14 +5257,9 @@ slow_path: | |||
5546 | return 0; | 5257 | return 0; |
5547 | 5258 | ||
5548 | step5: | 5259 | step5: |
5549 | if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) | 5260 | if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) |
5550 | goto discard; | 5261 | goto discard; |
5551 | 5262 | ||
5552 | /* ts_recent update must be made after we are sure that the packet | ||
5553 | * is in window. | ||
5554 | */ | ||
5555 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
5556 | |||
5557 | tcp_rcv_rtt_measure_ts(sk, skb); | 5263 | tcp_rcv_rtt_measure_ts(sk, skb); |
5558 | 5264 | ||
5559 | /* Process urgent data. */ | 5265 | /* Process urgent data. */ |
@@ -5567,6 +5273,7 @@ step5: | |||
5567 | return 0; | 5273 | return 0; |
5568 | 5274 | ||
5569 | csum_error: | 5275 | csum_error: |
5276 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); | ||
5570 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | 5277 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); |
5571 | 5278 | ||
5572 | discard: | 5279 | discard: |
@@ -5625,12 +5332,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, | |||
5625 | 5332 | ||
5626 | if (mss == tp->rx_opt.user_mss) { | 5333 | if (mss == tp->rx_opt.user_mss) { |
5627 | struct tcp_options_received opt; | 5334 | struct tcp_options_received opt; |
5628 | const u8 *hash_location; | ||
5629 | 5335 | ||
5630 | /* Get original SYNACK MSS value if user MSS sets mss_clamp */ | 5336 | /* Get original SYNACK MSS value if user MSS sets mss_clamp */ |
5631 | tcp_clear_options(&opt); | 5337 | tcp_clear_options(&opt); |
5632 | opt.user_mss = opt.mss_clamp = 0; | 5338 | opt.user_mss = opt.mss_clamp = 0; |
5633 | tcp_parse_options(synack, &opt, &hash_location, 0, NULL); | 5339 | tcp_parse_options(synack, &opt, 0, NULL); |
5634 | mss = opt.mss_clamp; | 5340 | mss = opt.mss_clamp; |
5635 | } | 5341 | } |
5636 | 5342 | ||
@@ -5661,14 +5367,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, | |||
5661 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5367 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
5662 | const struct tcphdr *th, unsigned int len) | 5368 | const struct tcphdr *th, unsigned int len) |
5663 | { | 5369 | { |
5664 | const u8 *hash_location; | ||
5665 | struct inet_connection_sock *icsk = inet_csk(sk); | 5370 | struct inet_connection_sock *icsk = inet_csk(sk); |
5666 | struct tcp_sock *tp = tcp_sk(sk); | 5371 | struct tcp_sock *tp = tcp_sk(sk); |
5667 | struct tcp_cookie_values *cvp = tp->cookie_values; | ||
5668 | struct tcp_fastopen_cookie foc = { .len = -1 }; | 5372 | struct tcp_fastopen_cookie foc = { .len = -1 }; |
5669 | int saved_clamp = tp->rx_opt.mss_clamp; | 5373 | int saved_clamp = tp->rx_opt.mss_clamp; |
5670 | 5374 | ||
5671 | tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); | 5375 | tcp_parse_options(skb, &tp->rx_opt, 0, &foc); |
5672 | if (tp->rx_opt.saw_tstamp) | 5376 | if (tp->rx_opt.saw_tstamp) |
5673 | tp->rx_opt.rcv_tsecr -= tp->tsoffset; | 5377 | tp->rx_opt.rcv_tsecr -= tp->tsoffset; |
5674 | 5378 | ||
@@ -5765,30 +5469,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5765 | * is initialized. */ | 5469 | * is initialized. */ |
5766 | tp->copied_seq = tp->rcv_nxt; | 5470 | tp->copied_seq = tp->rcv_nxt; |
5767 | 5471 | ||
5768 | if (cvp != NULL && | ||
5769 | cvp->cookie_pair_size > 0 && | ||
5770 | tp->rx_opt.cookie_plus > 0) { | ||
5771 | int cookie_size = tp->rx_opt.cookie_plus | ||
5772 | - TCPOLEN_COOKIE_BASE; | ||
5773 | int cookie_pair_size = cookie_size | ||
5774 | + cvp->cookie_desired; | ||
5775 | |||
5776 | /* A cookie extension option was sent and returned. | ||
5777 | * Note that each incoming SYNACK replaces the | ||
5778 | * Responder cookie. The initial exchange is most | ||
5779 | * fragile, as protection against spoofing relies | ||
5780 | * entirely upon the sequence and timestamp (above). | ||
5781 | * This replacement strategy allows the correct pair to | ||
5782 | * pass through, while any others will be filtered via | ||
5783 | * Responder verification later. | ||
5784 | */ | ||
5785 | if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { | ||
5786 | memcpy(&cvp->cookie_pair[cvp->cookie_desired], | ||
5787 | hash_location, cookie_size); | ||
5788 | cvp->cookie_pair_size = cookie_pair_size; | ||
5789 | } | ||
5790 | } | ||
5791 | |||
5792 | smp_mb(); | 5472 | smp_mb(); |
5793 | 5473 | ||
5794 | tcp_finish_connect(sk, skb); | 5474 | tcp_finish_connect(sk, skb); |
@@ -5989,7 +5669,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5989 | 5669 | ||
5990 | /* step 5: check the ACK field */ | 5670 | /* step 5: check the ACK field */ |
5991 | if (true) { | 5671 | if (true) { |
5992 | int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; | 5672 | int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | |
5673 | FLAG_UPDATE_TS_RECENT) > 0; | ||
5993 | 5674 | ||
5994 | switch (sk->sk_state) { | 5675 | switch (sk->sk_state) { |
5995 | case TCP_SYN_RECV: | 5676 | case TCP_SYN_RECV: |
@@ -6140,11 +5821,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
6140 | } | 5821 | } |
6141 | } | 5822 | } |
6142 | 5823 | ||
6143 | /* ts_recent update must be made after we are sure that the packet | ||
6144 | * is in window. | ||
6145 | */ | ||
6146 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
6147 | |||
6148 | /* step 6: check the URG bit */ | 5824 | /* step 6: check the URG bit */ |
6149 | tcp_urg(sk, skb, th); | 5825 | tcp_urg(sk, skb, th); |
6150 | 5826 | ||