aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2013-03-20 09:32:58 -0400
committerDavid S. Miller <davem@davemloft.net>2013-03-21 11:47:50 -0400
commit9b44190dc114c1720b34975b5bfc65aece112ced (patch)
treec1202e05d6a04fa1d31be2ad2942fbe32ffa3f76 /net/ipv4/tcp_input.c
parente306e2c13b8c214618af0c61acf62a6e42d486de (diff)
tcp: refactor F-RTO
The patch series refactor the F-RTO feature (RFC4138/5682). This is to simplify the loss recovery processing. Existing F-RTO was developed during the experimental stage (RFC4138) and has many experimental features. It takes a separate code path from the traditional timeout processing by overloading CA_Disorder instead of using CA_Loss state. This complicates CA_Disorder state handling because it's also used for handling dubious ACKs and undos. While the algorithm in the RFC does not change the congestion control, the implementation intercepts congestion control in various places (e.g., frto_cwnd in tcp_ack()). The new code implements newer F-RTO RFC5682 using CA_Loss processing path. F-RTO becomes a small extension in the timeout processing and interfaces with congestion control and Eifel undo modules. It lets congestion control (module) determines how many to send independently. F-RTO only chooses what to send in order to detect spurious retranmission. If timeout is found spurious it invokes existing Eifel undo algorithms like DSACK or TCP timestamp based detection. The first patch removes all F-RTO code except the sysctl_tcp_frto is left for the new implementation. Since CA_EVENT_FRTO is removed, TCP westwood now computes ssthresh on regular timeout CA_EVENT_LOSS event. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c375
1 files changed, 4 insertions, 371 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 19f0149fb6a2..231c79fe91f3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -93,7 +93,6 @@ int sysctl_tcp_stdurg __read_mostly;
93int sysctl_tcp_rfc1337 __read_mostly; 93int sysctl_tcp_rfc1337 __read_mostly;
94int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 94int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
95int sysctl_tcp_frto __read_mostly = 2; 95int sysctl_tcp_frto __read_mostly = 2;
96int sysctl_tcp_frto_response __read_mostly;
97 96
98int sysctl_tcp_thin_dupack __read_mostly; 97int sysctl_tcp_thin_dupack __read_mostly;
99 98
@@ -108,17 +107,14 @@ int sysctl_tcp_early_retrans __read_mostly = 3;
108#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 107#define FLAG_DATA_SACKED 0x20 /* New SACK. */
109#define FLAG_ECE 0x40 /* ECE in this ACK */ 108#define FLAG_ECE 0x40 /* ECE in this ACK */
110#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
111#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
112#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 110#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
113#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 111#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
114#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
115#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 112#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
116 113
117#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 114#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
118#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 115#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
119#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) 116#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
120#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) 117#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
121#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
122 118
123#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 119#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
124#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) 120#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@@ -1159,10 +1155,6 @@ static u8 tcp_sacktag_one(struct sock *sk,
1159 tcp_highest_sack_seq(tp))) 1155 tcp_highest_sack_seq(tp)))
1160 state->reord = min(fack_count, 1156 state->reord = min(fack_count,
1161 state->reord); 1157 state->reord);
1162
1163 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
1164 if (!after(end_seq, tp->frto_highmark))
1165 state->flag |= FLAG_ONLY_ORIG_SACKED;
1166 } 1158 }
1167 1159
1168 if (sacked & TCPCB_LOST) { 1160 if (sacked & TCPCB_LOST) {
@@ -1555,7 +1547,6 @@ static int
1555tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1547tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1556 u32 prior_snd_una) 1548 u32 prior_snd_una)
1557{ 1549{
1558 const struct inet_connection_sock *icsk = inet_csk(sk);
1559 struct tcp_sock *tp = tcp_sk(sk); 1550 struct tcp_sock *tp = tcp_sk(sk);
1560 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1551 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1561 TCP_SKB_CB(ack_skb)->sacked); 1552 TCP_SKB_CB(ack_skb)->sacked);
@@ -1728,12 +1719,6 @@ walk:
1728 start_seq, end_seq, dup_sack); 1719 start_seq, end_seq, dup_sack);
1729 1720
1730advance_sp: 1721advance_sp:
1731 /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
1732 * due to in-order walk
1733 */
1734 if (after(end_seq, tp->frto_highmark))
1735 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1736
1737 i++; 1722 i++;
1738 } 1723 }
1739 1724
@@ -1750,8 +1735,7 @@ advance_sp:
1750 tcp_verify_left_out(tp); 1735 tcp_verify_left_out(tp);
1751 1736
1752 if ((state.reord < tp->fackets_out) && 1737 if ((state.reord < tp->fackets_out) &&
1753 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && 1738 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1754 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1755 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); 1739 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1756 1740
1757out: 1741out:
@@ -1825,197 +1809,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1825 tp->sacked_out = 0; 1809 tp->sacked_out = 0;
1826} 1810}
1827 1811
1828static int tcp_is_sackfrto(const struct tcp_sock *tp)
1829{
1830 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1831}
1832
1833/* F-RTO can only be used if TCP has never retransmitted anything other than
1834 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
1835 */
1836bool tcp_use_frto(struct sock *sk)
1837{
1838 const struct tcp_sock *tp = tcp_sk(sk);
1839 const struct inet_connection_sock *icsk = inet_csk(sk);
1840 struct sk_buff *skb;
1841
1842 if (!sysctl_tcp_frto)
1843 return false;
1844
1845 /* MTU probe and F-RTO won't really play nicely along currently */
1846 if (icsk->icsk_mtup.probe_size)
1847 return false;
1848
1849 if (tcp_is_sackfrto(tp))
1850 return true;
1851
1852 /* Avoid expensive walking of rexmit queue if possible */
1853 if (tp->retrans_out > 1)
1854 return false;
1855
1856 skb = tcp_write_queue_head(sk);
1857 if (tcp_skb_is_last(sk, skb))
1858 return true;
1859 skb = tcp_write_queue_next(sk, skb); /* Skips head */
1860 tcp_for_write_queue_from(skb, sk) {
1861 if (skb == tcp_send_head(sk))
1862 break;
1863 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1864 return false;
1865 /* Short-circuit when first non-SACKed skb has been checked */
1866 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1867 break;
1868 }
1869 return true;
1870}
1871
1872/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
1873 * recovery a bit and use heuristics in tcp_process_frto() to detect if
1874 * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
1875 * keep retrans_out counting accurate (with SACK F-RTO, other than head
1876 * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
1877 * bits are handled if the Loss state is really to be entered (in
1878 * tcp_enter_frto_loss).
1879 *
1880 * Do like tcp_enter_loss() would; when RTO expires the second time it
1881 * does:
1882 * "Reduce ssthresh if it has not yet been made inside this window."
1883 */
1884void tcp_enter_frto(struct sock *sk)
1885{
1886 const struct inet_connection_sock *icsk = inet_csk(sk);
1887 struct tcp_sock *tp = tcp_sk(sk);
1888 struct sk_buff *skb;
1889
1890 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
1891 tp->snd_una == tp->high_seq ||
1892 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
1893 !icsk->icsk_retransmits)) {
1894 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1895 /* Our state is too optimistic in ssthresh() call because cwnd
1896 * is not reduced until tcp_enter_frto_loss() when previous F-RTO
1897 * recovery has not yet completed. Pattern would be this: RTO,
1898 * Cumulative ACK, RTO (2xRTO for the same segment does not end
1899 * up here twice).
1900 * RFC4138 should be more specific on what to do, even though
1901 * RTO is quite unlikely to occur after the first Cumulative ACK
1902 * due to back-off and complexity of triggering events ...
1903 */
1904 if (tp->frto_counter) {
1905 u32 stored_cwnd;
1906 stored_cwnd = tp->snd_cwnd;
1907 tp->snd_cwnd = 2;
1908 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1909 tp->snd_cwnd = stored_cwnd;
1910 } else {
1911 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1912 }
1913 /* ... in theory, cong.control module could do "any tricks" in
1914 * ssthresh(), which means that ca_state, lost bits and lost_out
1915 * counter would have to be faked before the call occurs. We
1916 * consider that too expensive, unlikely and hacky, so modules
1917 * using these in ssthresh() must deal these incompatibility
1918 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
1919 */
1920 tcp_ca_event(sk, CA_EVENT_FRTO);
1921 }
1922
1923 tp->undo_marker = tp->snd_una;
1924 tp->undo_retrans = 0;
1925
1926 skb = tcp_write_queue_head(sk);
1927 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1928 tp->undo_marker = 0;
1929 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1930 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1931 tp->retrans_out -= tcp_skb_pcount(skb);
1932 }
1933 tcp_verify_left_out(tp);
1934
1935 /* Too bad if TCP was application limited */
1936 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
1937
1938 /* Earlier loss recovery underway (see RFC4138; Appendix B).
1939 * The last condition is necessary at least in tp->frto_counter case.
1940 */
1941 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
1942 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1943 after(tp->high_seq, tp->snd_una)) {
1944 tp->frto_highmark = tp->high_seq;
1945 } else {
1946 tp->frto_highmark = tp->snd_nxt;
1947 }
1948 tcp_set_ca_state(sk, TCP_CA_Disorder);
1949 tp->high_seq = tp->snd_nxt;
1950 tp->frto_counter = 1;
1951}
1952
1953/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
1954 * which indicates that we should follow the traditional RTO recovery,
1955 * i.e. mark everything lost and do go-back-N retransmission.
1956 */
1957static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1958{
1959 struct tcp_sock *tp = tcp_sk(sk);
1960 struct sk_buff *skb;
1961
1962 tp->lost_out = 0;
1963 tp->retrans_out = 0;
1964 if (tcp_is_reno(tp))
1965 tcp_reset_reno_sack(tp);
1966
1967 tcp_for_write_queue(skb, sk) {
1968 if (skb == tcp_send_head(sk))
1969 break;
1970
1971 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1972 /*
1973 * Count the retransmission made on RTO correctly (only when
1974 * waiting for the first ACK and did not get it)...
1975 */
1976 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
1977 /* For some reason this R-bit might get cleared? */
1978 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1979 tp->retrans_out += tcp_skb_pcount(skb);
1980 /* ...enter this if branch just for the first segment */
1981 flag |= FLAG_DATA_ACKED;
1982 } else {
1983 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1984 tp->undo_marker = 0;
1985 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1986 }
1987
1988 /* Marking forward transmissions that were made after RTO lost
1989 * can cause unnecessary retransmissions in some scenarios,
1990 * SACK blocks will mitigate that in some but not in all cases.
1991 * We used to not mark them but it was causing break-ups with
1992 * receivers that do only in-order receival.
1993 *
1994 * TODO: we could detect presence of such receiver and select
1995 * different behavior per flow.
1996 */
1997 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1998 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1999 tp->lost_out += tcp_skb_pcount(skb);
2000 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2001 }
2002 }
2003 tcp_verify_left_out(tp);
2004
2005 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2006 tp->snd_cwnd_cnt = 0;
2007 tp->snd_cwnd_stamp = tcp_time_stamp;
2008 tp->frto_counter = 0;
2009
2010 tp->reordering = min_t(unsigned int, tp->reordering,
2011 sysctl_tcp_reordering);
2012 tcp_set_ca_state(sk, TCP_CA_Loss);
2013 tp->high_seq = tp->snd_nxt;
2014 TCP_ECN_queue_cwr(tp);
2015
2016 tcp_clear_all_retrans_hints(tp);
2017}
2018
2019static void tcp_clear_retrans_partial(struct tcp_sock *tp) 1812static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2020{ 1813{
2021 tp->retrans_out = 0; 1814 tp->retrans_out = 0;
@@ -2090,8 +1883,6 @@ void tcp_enter_loss(struct sock *sk, int how)
2090 tcp_set_ca_state(sk, TCP_CA_Loss); 1883 tcp_set_ca_state(sk, TCP_CA_Loss);
2091 tp->high_seq = tp->snd_nxt; 1884 tp->high_seq = tp->snd_nxt;
2092 TCP_ECN_queue_cwr(tp); 1885 TCP_ECN_queue_cwr(tp);
2093 /* Abort F-RTO algorithm if one is in progress */
2094 tp->frto_counter = 0;
2095} 1886}
2096 1887
2097/* If ACK arrived pointing to a remembered SACK, it means that our 1888/* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2275,10 +2066,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2275 struct tcp_sock *tp = tcp_sk(sk); 2066 struct tcp_sock *tp = tcp_sk(sk);
2276 __u32 packets_out; 2067 __u32 packets_out;
2277 2068
2278 /* Do not perform any recovery during F-RTO algorithm */
2279 if (tp->frto_counter)
2280 return false;
2281
2282 /* Trick#1: The loss is proven. */ 2069 /* Trick#1: The loss is proven. */
2283 if (tp->lost_out) 2070 if (tp->lost_out)
2284 return true; 2071 return true;
@@ -2760,7 +2547,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2760 2547
2761 tcp_verify_left_out(tp); 2548 tcp_verify_left_out(tp);
2762 2549
2763 if (!tp->frto_counter && !tcp_any_retrans_done(sk)) 2550 if (!tcp_any_retrans_done(sk))
2764 tp->retrans_stamp = 0; 2551 tp->retrans_stamp = 0;
2765 2552
2766 if (flag & FLAG_ECE) 2553 if (flag & FLAG_ECE)
@@ -3198,8 +2985,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3198 flag |= FLAG_RETRANS_DATA_ACKED; 2985 flag |= FLAG_RETRANS_DATA_ACKED;
3199 ca_seq_rtt = -1; 2986 ca_seq_rtt = -1;
3200 seq_rtt = -1; 2987 seq_rtt = -1;
3201 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3202 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3203 } else { 2988 } else {
3204 ca_seq_rtt = now - scb->when; 2989 ca_seq_rtt = now - scb->when;
3205 last_ackt = skb->tstamp; 2990 last_ackt = skb->tstamp;
@@ -3408,150 +3193,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3408 return flag; 3193 return flag;
3409} 3194}
3410 3195
3411/* A very conservative spurious RTO response algorithm: reduce cwnd and
3412 * continue in congestion avoidance.
3413 */
3414static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3415{
3416 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3417 tp->snd_cwnd_cnt = 0;
3418 TCP_ECN_queue_cwr(tp);
3419 tcp_moderate_cwnd(tp);
3420}
3421
3422/* A conservative spurious RTO response algorithm: reduce cwnd using
3423 * PRR and continue in congestion avoidance.
3424 */
3425static void tcp_cwr_spur_to_response(struct sock *sk)
3426{
3427 tcp_enter_cwr(sk, 0);
3428}
3429
3430static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3431{
3432 if (flag & FLAG_ECE)
3433 tcp_cwr_spur_to_response(sk);
3434 else
3435 tcp_undo_cwr(sk, true);
3436}
3437
3438/* F-RTO spurious RTO detection algorithm (RFC4138)
3439 *
3440 * F-RTO affects during two new ACKs following RTO (well, almost, see inline
3441 * comments). State (ACK number) is kept in frto_counter. When ACK advances
3442 * window (but not to or beyond highest sequence sent before RTO):
3443 * On First ACK, send two new segments out.
3444 * On Second ACK, RTO was likely spurious. Do spurious response (response
3445 * algorithm is not part of the F-RTO detection algorithm
3446 * given in RFC4138 but can be selected separately).
3447 * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
3448 * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
3449 * of Nagle, this is done using frto_counter states 2 and 3, when a new data
3450 * segment of any size sent during F-RTO, state 2 is upgraded to 3.
3451 *
3452 * Rationale: if the RTO was spurious, new ACKs should arrive from the
3453 * original window even after we transmit two new data segments.
3454 *
3455 * SACK version:
3456 * on first step, wait until first cumulative ACK arrives, then move to
3457 * the second step. In second step, the next ACK decides.
3458 *
3459 * F-RTO is implemented (mainly) in four functions:
3460 * - tcp_use_frto() is used to determine if TCP is can use F-RTO
3461 * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
3462 * called when tcp_use_frto() showed green light
3463 * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
3464 * - tcp_enter_frto_loss() is called if there is not enough evidence
3465 * to prove that the RTO is indeed spurious. It transfers the control
3466 * from F-RTO to the conventional RTO recovery
3467 */
3468static bool tcp_process_frto(struct sock *sk, int flag)
3469{
3470 struct tcp_sock *tp = tcp_sk(sk);
3471
3472 tcp_verify_left_out(tp);
3473
3474 /* Duplicate the behavior from Loss state (fastretrans_alert) */
3475 if (flag & FLAG_DATA_ACKED)
3476 inet_csk(sk)->icsk_retransmits = 0;
3477
3478 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3479 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3480 tp->undo_marker = 0;
3481
3482 if (!before(tp->snd_una, tp->frto_highmark)) {
3483 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3484 return true;
3485 }
3486
3487 if (!tcp_is_sackfrto(tp)) {
3488 /* RFC4138 shortcoming in step 2; should also have case c):
3489 * ACK isn't duplicate nor advances window, e.g., opposite dir
3490 * data, winupdate
3491 */
3492 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3493 return true;
3494
3495 if (!(flag & FLAG_DATA_ACKED)) {
3496 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3497 flag);
3498 return true;
3499 }
3500 } else {
3501 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3502 if (!tcp_packets_in_flight(tp)) {
3503 tcp_enter_frto_loss(sk, 2, flag);
3504 return true;
3505 }
3506
3507 /* Prevent sending of new data. */
3508 tp->snd_cwnd = min(tp->snd_cwnd,
3509 tcp_packets_in_flight(tp));
3510 return true;
3511 }
3512
3513 if ((tp->frto_counter >= 2) &&
3514 (!(flag & FLAG_FORWARD_PROGRESS) ||
3515 ((flag & FLAG_DATA_SACKED) &&
3516 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3517 /* RFC4138 shortcoming (see comment above) */
3518 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3519 (flag & FLAG_NOT_DUP))
3520 return true;
3521
3522 tcp_enter_frto_loss(sk, 3, flag);
3523 return true;
3524 }
3525 }
3526
3527 if (tp->frto_counter == 1) {
3528 /* tcp_may_send_now needs to see updated state */
3529 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3530 tp->frto_counter = 2;
3531
3532 if (!tcp_may_send_now(sk))
3533 tcp_enter_frto_loss(sk, 2, flag);
3534
3535 return true;
3536 } else {
3537 switch (sysctl_tcp_frto_response) {
3538 case 2:
3539 tcp_undo_spur_to_response(sk, flag);
3540 break;
3541 case 1:
3542 tcp_conservative_spur_to_response(tp);
3543 break;
3544 default:
3545 tcp_cwr_spur_to_response(sk);
3546 break;
3547 }
3548 tp->frto_counter = 0;
3549 tp->undo_marker = 0;
3550 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3551 }
3552 return false;
3553}
3554
3555/* RFC 5961 7 [ACK Throttling] */ 3196/* RFC 5961 7 [ACK Throttling] */
3556static void tcp_send_challenge_ack(struct sock *sk) 3197static void tcp_send_challenge_ack(struct sock *sk)
3557{ 3198{
@@ -3616,7 +3257,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3616 int prior_packets; 3257 int prior_packets;
3617 int prior_sacked = tp->sacked_out; 3258 int prior_sacked = tp->sacked_out;
3618 int pkts_acked = 0; 3259 int pkts_acked = 0;
3619 bool frto_cwnd = false;
3620 3260
3621 /* If the ack is older than previous acks 3261 /* If the ack is older than previous acks
3622 * then we can probably ignore it. 3262 * then we can probably ignore it.
@@ -3690,22 +3330,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3690 3330
3691 pkts_acked = prior_packets - tp->packets_out; 3331 pkts_acked = prior_packets - tp->packets_out;
3692 3332
3693 if (tp->frto_counter)
3694 frto_cwnd = tcp_process_frto(sk, flag);
3695 /* Guarantee sacktag reordering detection against wrap-arounds */
3696 if (before(tp->frto_highmark, tp->snd_una))
3697 tp->frto_highmark = 0;
3698
3699 if (tcp_ack_is_dubious(sk, flag)) { 3333 if (tcp_ack_is_dubious(sk, flag)) {
3700 /* Advance CWND, if state allows this. */ 3334 /* Advance CWND, if state allows this. */
3701 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3335 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3702 tcp_may_raise_cwnd(sk, flag))
3703 tcp_cong_avoid(sk, ack, prior_in_flight); 3336 tcp_cong_avoid(sk, ack, prior_in_flight);
3704 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3337 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3705 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3338 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3706 is_dupack, flag); 3339 is_dupack, flag);
3707 } else { 3340 } else {
3708 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3341 if (flag & FLAG_DATA_ACKED)
3709 tcp_cong_avoid(sk, ack, prior_in_flight); 3342 tcp_cong_avoid(sk, ack, prior_in_flight);
3710 } 3343 }
3711 3344