tcp: refactor F-RTO

The patch series refactor the F-RTO feature (RFC4138/5682). This is to simplify the loss recovery processing. Existing F-RTO was developed during the experimental stage (RFC4138) and has many experimental features. It takes a separate code path from the traditional timeout processing by overloading CA_Disorder instead of using CA_Loss state. This complicates CA_Disorder state handling because it's also used for handling dubious ACKs and undos. While the algorithm in the RFC does not change the congestion control, the implementation intercepts congestion control in various places (e.g., frto_cwnd in tcp_ack()). The new code implements newer F-RTO RFC5682 using CA_Loss processing path. F-RTO becomes a small extension in the timeout processing and interfaces with congestion control and Eifel undo modules. It lets congestion control (module) determines how many to send independently. F-RTO only chooses what to send in order to detect spurious retranmission. If timeout is found spurious it invokes existing Eifel undo algorithms like DSACK or TCP timestamp based detection. The first patch removes all F-RTO code except the sysctl_tcp_frto is left for the new implementation. Since CA_EVENT_FRTO is removed, TCP westwood now computes ssthresh on regular timeout CA_EVENT_LOSS event. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Yuchung Cheng <ycheng@google.com> 2013-03-20 09:32:58 -0400
committer: David S. Miller <davem@davemloft.net> 2013-03-21 11:47:50 -0400
commit: 9b44190dc114c1720b34975b5bfc65aece112ced (patch)
tree: c1202e05d6a04fa1d31be2ad2942fbe32ffa3f76 /net/ipv4/tcp_input.c
parent: e306e2c13b8c214618af0c61acf62a6e42d486de (diff)
1 files changed, 4 insertions, 371 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 19f0149fb6a2..231c79fe91f3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -93,7 +93,6 @@ int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
-int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_thin_dupack __read_mostly;
@@ -108,17 +107,14 @@ int sysctl_tcp_early_retrans __read_mostly = 3;
 #define FLAG_DATA_SACKED        0x20 /* New SACK.                               */
 #define FLAG_ECE                0x40 /* ECE in this ACK                         */
 #define FLAG_SLOWPATH           0x100 /* Do not skip RFC checks for window update.*/
-#define FLAG_ONLY_ORIG_SACKED   0x200 /* SACKs only non-rexmit sent before RTO */
 #define FLAG_SND_UNA_ADVANCED   0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK       0x800 /* SACK blocks contained D-SACK info */
-#define FLAG_NONHEAD_RETRANS_ACKED      0x1000 /* Non-head rexmitted data was ACKed */
 #define FLAG_SACK_RENEGING      0x2000 /* snd_una advanced to a sacked seq */
 #define FLAG_ACKED              (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP            (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
 #define FLAG_CA_ALERT           (FLAG_DATA_SACKED|FLAG_ECE)
 #define FLAG_FORWARD_PROGRESS   (FLAG_ACKED|FLAG_DATA_SACKED)
-#define FLAG_ANY_PROGRESS       (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@@ -1159,10 +1155,6 @@ static u8 tcp_sacktag_one(struct sock *sk,
                                           tcp_highest_sack_seq(tp)))
                                        state->reord = min(fack_count,
                                                           state->reord);
-                                /* SACK enhanced F-RTO (RFC4138; Appendix B) */
-                                if (!after(end_seq, tp->frto_highmark))
-                                        state->flag |= FLAG_ONLY_ORIG_SACKED;
                        }
                        if (sacked & TCPCB_LOST) {
@@ -1555,7 +1547,6 @@ static int
 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
                        u32 prior_snd_una)
 {
-        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        const unsigned char *ptr = (skb_transport_header(ack_skb) +
                                    TCP_SKB_CB(ack_skb)->sacked);
@@ -1728,12 +1719,6 @@ walk:
                                       start_seq, end_seq, dup_sack);
 advance_sp:
-                /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
-                 * due to in-order walk
-                 */
-                if (after(end_seq, tp->frto_highmark))
-                        state.flag &= ~FLAG_ONLY_ORIG_SACKED;
                i++;
        }
@@ -1750,8 +1735,7 @@ advance_sp:
        tcp_verify_left_out(tp);
        if ((state.reord < tp->fackets_out) &&
-            ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
+            ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
-            (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
                tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
 out:
@@ -1825,197 +1809,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
        tp->sacked_out = 0;
 }
-static int tcp_is_sackfrto(const struct tcp_sock *tp)
-{
-        return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
-}
-/* F-RTO can only be used if TCP has never retransmitted anything other than
- * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
- */
-bool tcp_use_frto(struct sock *sk)
-{
-        const struct tcp_sock *tp = tcp_sk(sk);
-        const struct inet_connection_sock *icsk = inet_csk(sk);
-        struct sk_buff *skb;
-        if (!sysctl_tcp_frto)
-                return false;
-        /* MTU probe and F-RTO won't really play nicely along currently */
-        if (icsk->icsk_mtup.probe_size)
-                return false;
-        if (tcp_is_sackfrto(tp))
-                return true;
-        /* Avoid expensive walking of rexmit queue if possible */
-        if (tp->retrans_out > 1)
-                return false;
-        skb = tcp_write_queue_head(sk);
-        if (tcp_skb_is_last(sk, skb))
-                return true;
-        skb = tcp_write_queue_next(sk, skb);    /* Skips head */
-        tcp_for_write_queue_from(skb, sk) {
-                if (skb == tcp_send_head(sk))
-                        break;
-                if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
-                        return false;
-                /* Short-circuit when first non-SACKed skb has been checked */
-                if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
-                        break;
-        }
-        return true;
-}
-/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
- * recovery a bit and use heuristics in tcp_process_frto() to detect if
- * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
- * keep retrans_out counting accurate (with SACK F-RTO, other than head
- * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
- * bits are handled if the Loss state is really to be entered (in
- * tcp_enter_frto_loss).
- *
- * Do like tcp_enter_loss() would; when RTO expires the second time it
- * does:
- *  "Reduce ssthresh if it has not yet been made inside this window."
- */
-void tcp_enter_frto(struct sock *sk)
-{
-        const struct inet_connection_sock *icsk = inet_csk(sk);
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct sk_buff *skb;
-        if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
-            tp->snd_una == tp->high_seq ||
-            ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
-             !icsk->icsk_retransmits)) {
-                tp->prior_ssthresh = tcp_current_ssthresh(sk);
-                /* Our state is too optimistic in ssthresh() call because cwnd
-                 * is not reduced until tcp_enter_frto_loss() when previous F-RTO
-                 * recovery has not yet completed. Pattern would be this: RTO,
-                 * Cumulative ACK, RTO (2xRTO for the same segment does not end
-                 * up here twice).
-                 * RFC4138 should be more specific on what to do, even though
-                 * RTO is quite unlikely to occur after the first Cumulative ACK
-                 * due to back-off and complexity of triggering events ...
-                 */
-                if (tp->frto_counter) {
-                        u32 stored_cwnd;
-                        stored_cwnd = tp->snd_cwnd;
-                        tp->snd_cwnd = 2;
-                        tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-                        tp->snd_cwnd = stored_cwnd;
-                } else {
-                        tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-                }
-                /* ... in theory, cong.control module could do "any tricks" in
-                 * ssthresh(), which means that ca_state, lost bits and lost_out
-                 * counter would have to be faked before the call occurs. We
-                 * consider that too expensive, unlikely and hacky, so modules
-                 * using these in ssthresh() must deal these incompatibility
-                 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
-                 */
-                tcp_ca_event(sk, CA_EVENT_FRTO);
-        }
-        tp->undo_marker = tp->snd_una;
-        tp->undo_retrans = 0;
-        skb = tcp_write_queue_head(sk);
-        if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
-                tp->undo_marker = 0;
-        if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
-                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-                tp->retrans_out -= tcp_skb_pcount(skb);
-        }
-        tcp_verify_left_out(tp);
-        /* Too bad if TCP was application limited */
-        tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
-        /* Earlier loss recovery underway (see RFC4138; Appendix B).
-         * The last condition is necessary at least in tp->frto_counter case.
-         */
-        if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
-            ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
-            after(tp->high_seq, tp->snd_una)) {
-                tp->frto_highmark = tp->high_seq;
-        } else {
-                tp->frto_highmark = tp->snd_nxt;
-        }
-        tcp_set_ca_state(sk, TCP_CA_Disorder);
-        tp->high_seq = tp->snd_nxt;
-        tp->frto_counter = 1;
-}
-/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
- * which indicates that we should follow the traditional RTO recovery,
- * i.e. mark everything lost and do go-back-N retransmission.
- */
-static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct sk_buff *skb;
-        tp->lost_out = 0;
-        tp->retrans_out = 0;
-        if (tcp_is_reno(tp))
-                tcp_reset_reno_sack(tp);
-        tcp_for_write_queue(skb, sk) {
-                if (skb == tcp_send_head(sk))
-                        break;
-                TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
-                /*
-                 * Count the retransmission made on RTO correctly (only when
-                 * waiting for the first ACK and did not get it)...
-                 */
-                if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
-                        /* For some reason this R-bit might get cleared? */
-                        if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
-                                tp->retrans_out += tcp_skb_pcount(skb);
-                        /* ...enter this if branch just for the first segment */
-                        flag |= FLAG_DATA_ACKED;
-                } else {
-                        if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
-                                tp->undo_marker = 0;
-                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-                }
-                /* Marking forward transmissions that were made after RTO lost
-                 * can cause unnecessary retransmissions in some scenarios,
-                 * SACK blocks will mitigate that in some but not in all cases.
-                 * We used to not mark them but it was causing break-ups with
-                 * receivers that do only in-order receival.
-                 *
-                 * TODO: we could detect presence of such receiver and select
-                 * different behavior per flow.
-                 */
-                if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
-                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                        tp->lost_out += tcp_skb_pcount(skb);
-                        tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
-                }
-        }
-        tcp_verify_left_out(tp);
-        tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
-        tp->snd_cwnd_cnt = 0;
-        tp->snd_cwnd_stamp = tcp_time_stamp;
-        tp->frto_counter = 0;
-        tp->reordering = min_t(unsigned int, tp->reordering,
-                               sysctl_tcp_reordering);
-        tcp_set_ca_state(sk, TCP_CA_Loss);
-        tp->high_seq = tp->snd_nxt;
-        TCP_ECN_queue_cwr(tp);
-        tcp_clear_all_retrans_hints(tp);
-}
 static void tcp_clear_retrans_partial(struct tcp_sock *tp)
 {
        tp->retrans_out = 0;
@@ -2090,8 +1883,6 @@ void tcp_enter_loss(struct sock *sk, int how)
        tcp_set_ca_state(sk, TCP_CA_Loss);
        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
-        /* Abort F-RTO algorithm if one is in progress */
-        tp->frto_counter = 0;
 }
 /* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2275,10 +2066,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
        struct tcp_sock *tp = tcp_sk(sk);
        __u32 packets_out;
-        /* Do not perform any recovery during F-RTO algorithm */
-        if (tp->frto_counter)
-                return false;
        /* Trick#1: The loss is proven. */
        if (tp->lost_out)
                return true;
@@ -2760,7 +2547,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
        tcp_verify_left_out(tp);
-        if (!tp->frto_counter && !tcp_any_retrans_done(sk))
+        if (!tcp_any_retrans_done(sk))
                tp->retrans_stamp = 0;
        if (flag & FLAG_ECE)
@@ -3198,8 +2985,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                        flag |= FLAG_RETRANS_DATA_ACKED;
                        ca_seq_rtt = -1;
                        seq_rtt = -1;
-                        if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
-                                flag |= FLAG_NONHEAD_RETRANS_ACKED;
                } else {
                        ca_seq_rtt = now - scb->when;
                        last_ackt = skb->tstamp;
@@ -3408,150 +3193,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
        return flag;
 }
-/* A very conservative spurious RTO response algorithm: reduce cwnd and
- * continue in congestion avoidance.
- */
-static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
-{
-        tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-        tp->snd_cwnd_cnt = 0;
-        TCP_ECN_queue_cwr(tp);
-        tcp_moderate_cwnd(tp);
-}
-/* A conservative spurious RTO response algorithm: reduce cwnd using
- * PRR and continue in congestion avoidance.
- */
-static void tcp_cwr_spur_to_response(struct sock *sk)
-{
-        tcp_enter_cwr(sk, 0);
-}
-static void tcp_undo_spur_to_response(struct sock *sk, int flag)
-{
-        if (flag & FLAG_ECE)
-                tcp_cwr_spur_to_response(sk);
-        else
-                tcp_undo_cwr(sk, true);
-}
-/* F-RTO spurious RTO detection algorithm (RFC4138)
- *
- * F-RTO affects during two new ACKs following RTO (well, almost, see inline
- * comments). State (ACK number) is kept in frto_counter. When ACK advances
- * window (but not to or beyond highest sequence sent before RTO):
- *   On First ACK,  send two new segments out.
- *   On Second ACK, RTO was likely spurious. Do spurious response (response
- *                  algorithm is not part of the F-RTO detection algorithm
- *                  given in RFC4138 but can be selected separately).
- * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
- * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
- * of Nagle, this is done using frto_counter states 2 and 3, when a new data
- * segment of any size sent during F-RTO, state 2 is upgraded to 3.
- *
- * Rationale: if the RTO was spurious, new ACKs should arrive from the
- * original window even after we transmit two new data segments.
- *
- * SACK version:
- *   on first step, wait until first cumulative ACK arrives, then move to
- *   the second step. In second step, the next ACK decides.
- *
- * F-RTO is implemented (mainly) in four functions:
- *   - tcp_use_frto() is used to determine if TCP is can use F-RTO
- *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
- *     called when tcp_use_frto() showed green light
- *   - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
- *   - tcp_enter_frto_loss() is called if there is not enough evidence
- *     to prove that the RTO is indeed spurious. It transfers the control
- *     from F-RTO to the conventional RTO recovery
- */
-static bool tcp_process_frto(struct sock *sk, int flag)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        tcp_verify_left_out(tp);
-        /* Duplicate the behavior from Loss state (fastretrans_alert) */
-        if (flag & FLAG_DATA_ACKED)
-                inet_csk(sk)->icsk_retransmits = 0;
-        if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
-            ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
-                tp->undo_marker = 0;
-        if (!before(tp->snd_una, tp->frto_highmark)) {
-                tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
-                return true;
-        }
-        if (!tcp_is_sackfrto(tp)) {
-                /* RFC4138 shortcoming in step 2; should also have case c):
-                 * ACK isn't duplicate nor advances window, e.g., opposite dir
-                 * data, winupdate
-                 */
-                if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
-                        return true;
-                if (!(flag & FLAG_DATA_ACKED)) {
-                        tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
-                                            flag);
-                        return true;
-                }
-        } else {
-                if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
-                        if (!tcp_packets_in_flight(tp)) {
-                                tcp_enter_frto_loss(sk, 2, flag);
-                                return true;
-                        }
-                        /* Prevent sending of new data. */
-                        tp->snd_cwnd = min(tp->snd_cwnd,
-                                           tcp_packets_in_flight(tp));
-                        return true;
-                }
-                if ((tp->frto_counter >= 2) &&
-                    (!(flag & FLAG_FORWARD_PROGRESS) ||
-                     ((flag & FLAG_DATA_SACKED) &&
-                      !(flag & FLAG_ONLY_ORIG_SACKED)))) {
-                        /* RFC4138 shortcoming (see comment above) */
-                        if (!(flag & FLAG_FORWARD_PROGRESS) &&
-                            (flag & FLAG_NOT_DUP))
-                                return true;
-                        tcp_enter_frto_loss(sk, 3, flag);
-                        return true;
-                }
-        }
-        if (tp->frto_counter == 1) {
-                /* tcp_may_send_now needs to see updated state */
-                tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
-                tp->frto_counter = 2;
-                if (!tcp_may_send_now(sk))
-                        tcp_enter_frto_loss(sk, 2, flag);
-                return true;
-        } else {
-                switch (sysctl_tcp_frto_response) {
-                case 2:
-                        tcp_undo_spur_to_response(sk, flag);
-                        break;
-                case 1:
-                        tcp_conservative_spur_to_response(tp);
-                        break;
-                default:
-                        tcp_cwr_spur_to_response(sk);
-                        break;
-                }
-                tp->frto_counter = 0;
-                tp->undo_marker = 0;
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
-        }
-        return false;
-}
 /* RFC 5961 7 [ACK Throttling] */
 static void tcp_send_challenge_ack(struct sock *sk)
 {
@@ -3616,7 +3257,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        int prior_packets;
        int prior_sacked = tp->sacked_out;
        int pkts_acked = 0;
-        bool frto_cwnd = false;
        /* If the ack is older than previous acks
         * then we can probably ignore it.
@@ -3690,22 +3330,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        pkts_acked = prior_packets - tp->packets_out;
-        if (tp->frto_counter)
-                frto_cwnd = tcp_process_frto(sk, flag);
-        /* Guarantee sacktag reordering detection against wrap-arounds */
-        if (before(tp->frto_highmark, tp->snd_una))
-                tp->frto_highmark = 0;
        if (tcp_ack_is_dubious(sk, flag)) {
                /* Advance CWND, if state allows this. */
-                if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
+                if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
-                    tcp_may_raise_cwnd(sk, flag))
                        tcp_cong_avoid(sk, ack, prior_in_flight);
                is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
                tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
                                      is_dupack, flag);
        } else {
-                if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
+                if (flag & FLAG_DATA_ACKED)
                        tcp_cong_avoid(sk, ack, prior_in_flight);
        }
author	Yuchung Cheng <ycheng@google.com>	2013-03-20 09:32:58 -0400
committer	David S. Miller <davem@davemloft.net>	2013-03-21 11:47:50 -0400
commit	9b44190dc114c1720b34975b5bfc65aece112ced (patch)
tree	c1202e05d6a04fa1d31be2ad2942fbe32ffa3f76 /net/ipv4/tcp_input.c
parent	e306e2c13b8c214618af0c61acf62a6e42d486de (diff)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 19f0149fb6a2..231c79fe91f3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c
@@ -93,7 +93,6 @@ int sysctl_tcp_stdurg __read_mostly;
93	int sysctl_tcp_rfc1337 __read_mostly;	93	int sysctl_tcp_rfc1337 __read_mostly;
94	int sysctl_tcp_max_orphans __read_mostly = NR_FILE;	94	int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
95	int sysctl_tcp_frto __read_mostly = 2;	95	int sysctl_tcp_frto __read_mostly = 2;
96	int sysctl_tcp_frto_response __read_mostly;
97		96
98	int sysctl_tcp_thin_dupack __read_mostly;	97	int sysctl_tcp_thin_dupack __read_mostly;
99		98
@@ -108,17 +107,14 @@ int sysctl_tcp_early_retrans __read_mostly = 3;
108	#define FLAG_DATA_SACKED 0x20 /* New SACK. */	107	#define FLAG_DATA_SACKED 0x20 /* New SACK. */
109	#define FLAG_ECE 0x40 /* ECE in this ACK */	108	#define FLAG_ECE 0x40 /* ECE in this ACK */
110	#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/	109	#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
111	#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
112	#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */	110	#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
113	#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */	111	#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
114	#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
115	#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */	112	#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
116		113
117	#define FLAG_ACKED (FLAG_DATA_ACKED\|FLAG_SYN_ACKED)	114	#define FLAG_ACKED (FLAG_DATA_ACKED\|FLAG_SYN_ACKED)
118	#define FLAG_NOT_DUP (FLAG_DATA\|FLAG_WIN_UPDATE\|FLAG_ACKED)	115	#define FLAG_NOT_DUP (FLAG_DATA\|FLAG_WIN_UPDATE\|FLAG_ACKED)
119	#define FLAG_CA_ALERT (FLAG_DATA_SACKED\|FLAG_ECE)	116	#define FLAG_CA_ALERT (FLAG_DATA_SACKED\|FLAG_ECE)
120	#define FLAG_FORWARD_PROGRESS (FLAG_ACKED\|FLAG_DATA_SACKED)	117	#define FLAG_FORWARD_PROGRESS (FLAG_ACKED\|FLAG_DATA_SACKED)
121	#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS\|FLAG_SND_UNA_ADVANCED)
122		118
123	#define TCP_REMNANT (TCP_FLAG_FIN\|TCP_FLAG_URG\|TCP_FLAG_SYN\|TCP_FLAG_PSH)	119	#define TCP_REMNANT (TCP_FLAG_FIN\|TCP_FLAG_URG\|TCP_FLAG_SYN\|TCP_FLAG_PSH)
124	#define TCP_HP_BITS (~(TCP_RESERVED_BITS\|TCP_FLAG_PSH))	120	#define TCP_HP_BITS (~(TCP_RESERVED_BITS\|TCP_FLAG_PSH))
@@ -1159,10 +1155,6 @@ static u8 tcp_sacktag_one(struct sock *sk,
1159	tcp_highest_sack_seq(tp)))	1155	tcp_highest_sack_seq(tp)))
1160	state->reord = min(fack_count,	1156	state->reord = min(fack_count,
1161	state->reord);	1157	state->reord);
1162
1163	/* SACK enhanced F-RTO (RFC4138; Appendix B) */
1164	if (!after(end_seq, tp->frto_highmark))
1165	state->flag \|= FLAG_ONLY_ORIG_SACKED;
1166	}	1158	}
1167		1159
1168	if (sacked & TCPCB_LOST) {	1160	if (sacked & TCPCB_LOST) {
@@ -1555,7 +1547,6 @@ static int
1555	tcp_sacktag_write_queue(struct sock sk, const struct sk_buff ack_skb,	1547	tcp_sacktag_write_queue(struct sock sk, const struct sk_buff ack_skb,
1556	u32 prior_snd_una)	1548	u32 prior_snd_una)
1557	{	1549	{
1558	const struct inet_connection_sock *icsk = inet_csk(sk);
1559	struct tcp_sock *tp = tcp_sk(sk);	1550	struct tcp_sock *tp = tcp_sk(sk);
1560	const unsigned char *ptr = (skb_transport_header(ack_skb) +	1551	const unsigned char *ptr = (skb_transport_header(ack_skb) +
1561	TCP_SKB_CB(ack_skb)->sacked);	1552	TCP_SKB_CB(ack_skb)->sacked);
@@ -1728,12 +1719,6 @@ walk:
1728	start_seq, end_seq, dup_sack);	1719	start_seq, end_seq, dup_sack);
1729		1720
1730	advance_sp:	1721	advance_sp:
1731	/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
1732	* due to in-order walk
1733	*/
1734	if (after(end_seq, tp->frto_highmark))
1735	state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1736
1737	i++;	1722	i++;
1738	}	1723	}
1739		1724
@@ -1750,8 +1735,7 @@ advance_sp:
1750	tcp_verify_left_out(tp);	1735	tcp_verify_left_out(tp);
1751		1736
1752	if ((state.reord < tp->fackets_out) &&	1737	if ((state.reord < tp->fackets_out) &&
1753	((icsk->icsk_ca_state != TCP_CA_Loss) \|\| tp->undo_marker) &&	1738	((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) \|\| tp->undo_marker))
1754	(!tp->frto_highmark \|\| after(tp->snd_una, tp->frto_highmark)))
1755	tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);	1739	tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1756		1740
1757	out:	1741	out:
@@ -1825,197 +1809,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1825	tp->sacked_out = 0;	1809	tp->sacked_out = 0;
1826	}	1810	}
1827		1811
1828	static int tcp_is_sackfrto(const struct tcp_sock *tp)
1829	{
1830	return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1831	}
1832
1833	/* F-RTO can only be used if TCP has never retransmitted anything other than
1834	* head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
1835	*/
1836	bool tcp_use_frto(struct sock *sk)
1837	{
1838	const struct tcp_sock *tp = tcp_sk(sk);
1839	const struct inet_connection_sock *icsk = inet_csk(sk);
1840	struct sk_buff *skb;
1841
1842	if (!sysctl_tcp_frto)
1843	return false;
1844
1845	/* MTU probe and F-RTO won't really play nicely along currently */
1846	if (icsk->icsk_mtup.probe_size)
1847	return false;
1848
1849	if (tcp_is_sackfrto(tp))
1850	return true;
1851
1852	/* Avoid expensive walking of rexmit queue if possible */
1853	if (tp->retrans_out > 1)
1854	return false;
1855
1856	skb = tcp_write_queue_head(sk);
1857	if (tcp_skb_is_last(sk, skb))
1858	return true;
1859	skb = tcp_write_queue_next(sk, skb); /* Skips head */
1860	tcp_for_write_queue_from(skb, sk) {
1861	if (skb == tcp_send_head(sk))
1862	break;
1863	if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1864	return false;
1865	/* Short-circuit when first non-SACKed skb has been checked */
1866	if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1867	break;
1868	}
1869	return true;
1870	}
1871
1872	/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
1873	* recovery a bit and use heuristics in tcp_process_frto() to detect if
1874	* the RTO was spurious. Only clear SACKED_RETRANS of the head here to
1875	* keep retrans_out counting accurate (with SACK F-RTO, other than head
1876	* may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
1877	* bits are handled if the Loss state is really to be entered (in
1878	* tcp_enter_frto_loss).
1879	*
1880	* Do like tcp_enter_loss() would; when RTO expires the second time it
1881	* does:
1882	* "Reduce ssthresh if it has not yet been made inside this window."
1883	*/
1884	void tcp_enter_frto(struct sock *sk)
1885	{
1886	const struct inet_connection_sock *icsk = inet_csk(sk);
1887	struct tcp_sock *tp = tcp_sk(sk);
1888	struct sk_buff *skb;
1889
1890	if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) \|\|
1891	tp->snd_una == tp->high_seq \|\|
1892	((icsk->icsk_ca_state == TCP_CA_Loss \|\| tp->frto_counter) &&
1893	!icsk->icsk_retransmits)) {
1894	tp->prior_ssthresh = tcp_current_ssthresh(sk);
1895	/* Our state is too optimistic in ssthresh() call because cwnd
1896	* is not reduced until tcp_enter_frto_loss() when previous F-RTO
1897	* recovery has not yet completed. Pattern would be this: RTO,
1898	* Cumulative ACK, RTO (2xRTO for the same segment does not end
1899	* up here twice).
1900	* RFC4138 should be more specific on what to do, even though
1901	* RTO is quite unlikely to occur after the first Cumulative ACK
1902	* due to back-off and complexity of triggering events ...
1903	*/
1904	if (tp->frto_counter) {
1905	u32 stored_cwnd;
1906	stored_cwnd = tp->snd_cwnd;
1907	tp->snd_cwnd = 2;
1908	tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1909	tp->snd_cwnd = stored_cwnd;
1910	} else {
1911	tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1912	}
1913	/* ... in theory, cong.control module could do "any tricks" in
1914	* ssthresh(), which means that ca_state, lost bits and lost_out
1915	* counter would have to be faked before the call occurs. We
1916	* consider that too expensive, unlikely and hacky, so modules
1917	* using these in ssthresh() must deal these incompatibility
1918	* issues if they receives CA_EVENT_FRTO and frto_counter != 0
1919	*/
1920	tcp_ca_event(sk, CA_EVENT_FRTO);
1921	}
1922
1923	tp->undo_marker = tp->snd_una;
1924	tp->undo_retrans = 0;
1925
1926	skb = tcp_write_queue_head(sk);
1927	if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1928	tp->undo_marker = 0;
1929	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1930	TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1931	tp->retrans_out -= tcp_skb_pcount(skb);
1932	}
1933	tcp_verify_left_out(tp);
1934
1935	/* Too bad if TCP was application limited */
1936	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
1937
1938	/* Earlier loss recovery underway (see RFC4138; Appendix B).
1939	* The last condition is necessary at least in tp->frto_counter case.
1940	*/
1941	if (tcp_is_sackfrto(tp) && (tp->frto_counter \|\|
1942	((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery\|TCPF_CA_Loss))) &&
1943	after(tp->high_seq, tp->snd_una)) {
1944	tp->frto_highmark = tp->high_seq;
1945	} else {
1946	tp->frto_highmark = tp->snd_nxt;
1947	}
1948	tcp_set_ca_state(sk, TCP_CA_Disorder);
1949	tp->high_seq = tp->snd_nxt;
1950	tp->frto_counter = 1;
1951	}
1952
1953	/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
1954	* which indicates that we should follow the traditional RTO recovery,
1955	* i.e. mark everything lost and do go-back-N retransmission.
1956	*/
1957	static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1958	{
1959	struct tcp_sock *tp = tcp_sk(sk);
1960	struct sk_buff *skb;
1961
1962	tp->lost_out = 0;
1963	tp->retrans_out = 0;
1964	if (tcp_is_reno(tp))
1965	tcp_reset_reno_sack(tp);
1966
1967	tcp_for_write_queue(skb, sk) {
1968	if (skb == tcp_send_head(sk))
1969	break;
1970
1971	TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1972	/*
1973	* Count the retransmission made on RTO correctly (only when
1974	* waiting for the first ACK and did not get it)...
1975	*/
1976	if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
1977	/* For some reason this R-bit might get cleared? */
1978	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1979	tp->retrans_out += tcp_skb_pcount(skb);
1980	/* ...enter this if branch just for the first segment */
1981	flag \|= FLAG_DATA_ACKED;
1982	} else {
1983	if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1984	tp->undo_marker = 0;
1985	TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1986	}
1987
1988	/* Marking forward transmissions that were made after RTO lost
1989	* can cause unnecessary retransmissions in some scenarios,
1990	* SACK blocks will mitigate that in some but not in all cases.
1991	* We used to not mark them but it was causing break-ups with
1992	* receivers that do only in-order receival.
1993	*
1994	* TODO: we could detect presence of such receiver and select
1995	* different behavior per flow.
1996	*/
1997	if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1998	TCP_SKB_CB(skb)->sacked \|= TCPCB_LOST;
1999	tp->lost_out += tcp_skb_pcount(skb);
2000	tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2001	}
2002	}
2003	tcp_verify_left_out(tp);
2004
2005	tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2006	tp->snd_cwnd_cnt = 0;
2007	tp->snd_cwnd_stamp = tcp_time_stamp;
2008	tp->frto_counter = 0;
2009
2010	tp->reordering = min_t(unsigned int, tp->reordering,
2011	sysctl_tcp_reordering);
2012	tcp_set_ca_state(sk, TCP_CA_Loss);
2013	tp->high_seq = tp->snd_nxt;
2014	TCP_ECN_queue_cwr(tp);
2015
2016	tcp_clear_all_retrans_hints(tp);
2017	}
2018
2019	static void tcp_clear_retrans_partial(struct tcp_sock *tp)	1812	static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2020	{	1813	{
2021	tp->retrans_out = 0;	1814	tp->retrans_out = 0;
@@ -2090,8 +1883,6 @@ void tcp_enter_loss(struct sock *sk, int how)
2090	tcp_set_ca_state(sk, TCP_CA_Loss);	1883	tcp_set_ca_state(sk, TCP_CA_Loss);
2091	tp->high_seq = tp->snd_nxt;	1884	tp->high_seq = tp->snd_nxt;
2092	TCP_ECN_queue_cwr(tp);	1885	TCP_ECN_queue_cwr(tp);
2093	/* Abort F-RTO algorithm if one is in progress */
2094	tp->frto_counter = 0;
2095	}	1886	}
2096		1887
2097	/* If ACK arrived pointing to a remembered SACK, it means that our	1888	/* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2275,10 +2066,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2275	struct tcp_sock *tp = tcp_sk(sk);	2066	struct tcp_sock *tp = tcp_sk(sk);
2276	__u32 packets_out;	2067	__u32 packets_out;
2277		2068
2278	/* Do not perform any recovery during F-RTO algorithm */
2279	if (tp->frto_counter)
2280	return false;
2281
2282	/* Trick#1: The loss is proven. */	2069	/* Trick#1: The loss is proven. */
2283	if (tp->lost_out)	2070	if (tp->lost_out)
2284	return true;	2071	return true;
@@ -2760,7 +2547,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2760		2547
2761	tcp_verify_left_out(tp);	2548	tcp_verify_left_out(tp);
2762		2549
2763	if (!tp->frto_counter && !tcp_any_retrans_done(sk))	2550	if (!tcp_any_retrans_done(sk))
2764	tp->retrans_stamp = 0;	2551	tp->retrans_stamp = 0;
2765		2552
2766	if (flag & FLAG_ECE)	2553	if (flag & FLAG_ECE)
@@ -3198,8 +2985,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3198	flag \|= FLAG_RETRANS_DATA_ACKED;	2985	flag \|= FLAG_RETRANS_DATA_ACKED;
3199	ca_seq_rtt = -1;	2986	ca_seq_rtt = -1;
3200	seq_rtt = -1;	2987	seq_rtt = -1;
3201	if ((flag & FLAG_DATA_ACKED) \|\| (acked_pcount > 1))
3202	flag \|= FLAG_NONHEAD_RETRANS_ACKED;
3203	} else {	2988	} else {
3204	ca_seq_rtt = now - scb->when;	2989	ca_seq_rtt = now - scb->when;
3205	last_ackt = skb->tstamp;	2990	last_ackt = skb->tstamp;
@@ -3408,150 +3193,6 @@ static int tcp_ack_update_window(struct sock sk, const struct sk_buff skb, u32
3408	return flag;	3193	return flag;
3409	}	3194	}
3410		3195
3411	/* A very conservative spurious RTO response algorithm: reduce cwnd and
3412	* continue in congestion avoidance.
3413	*/
3414	static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3415	{
3416	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3417	tp->snd_cwnd_cnt = 0;
3418	TCP_ECN_queue_cwr(tp);
3419	tcp_moderate_cwnd(tp);
3420	}
3421
3422	/* A conservative spurious RTO response algorithm: reduce cwnd using
3423	* PRR and continue in congestion avoidance.
3424	*/
3425	static void tcp_cwr_spur_to_response(struct sock *sk)
3426	{
3427	tcp_enter_cwr(sk, 0);
3428	}
3429
3430	static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3431	{
3432	if (flag & FLAG_ECE)
3433	tcp_cwr_spur_to_response(sk);
3434	else
3435	tcp_undo_cwr(sk, true);
3436	}
3437
3438	/* F-RTO spurious RTO detection algorithm (RFC4138)
3439	*
3440	* F-RTO affects during two new ACKs following RTO (well, almost, see inline
3441	* comments). State (ACK number) is kept in frto_counter. When ACK advances
3442	* window (but not to or beyond highest sequence sent before RTO):
3443	* On First ACK, send two new segments out.
3444	* On Second ACK, RTO was likely spurious. Do spurious response (response
3445	* algorithm is not part of the F-RTO detection algorithm
3446	* given in RFC4138 but can be selected separately).
3447	* Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
3448	* and TCP falls back to conventional RTO recovery. F-RTO allows overriding
3449	* of Nagle, this is done using frto_counter states 2 and 3, when a new data
3450	* segment of any size sent during F-RTO, state 2 is upgraded to 3.
3451	*
3452	* Rationale: if the RTO was spurious, new ACKs should arrive from the
3453	* original window even after we transmit two new data segments.
3454	*
3455	* SACK version:
3456	* on first step, wait until first cumulative ACK arrives, then move to
3457	* the second step. In second step, the next ACK decides.
3458	*
3459	* F-RTO is implemented (mainly) in four functions:
3460	* - tcp_use_frto() is used to determine if TCP is can use F-RTO
3461	* - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
3462	* called when tcp_use_frto() showed green light
3463	* - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
3464	* - tcp_enter_frto_loss() is called if there is not enough evidence
3465	* to prove that the RTO is indeed spurious. It transfers the control
3466	* from F-RTO to the conventional RTO recovery
3467	*/
3468	static bool tcp_process_frto(struct sock *sk, int flag)
3469	{
3470	struct tcp_sock *tp = tcp_sk(sk);
3471
3472	tcp_verify_left_out(tp);
3473
3474	/* Duplicate the behavior from Loss state (fastretrans_alert) */
3475	if (flag & FLAG_DATA_ACKED)
3476	inet_csk(sk)->icsk_retransmits = 0;
3477
3478	if ((flag & FLAG_NONHEAD_RETRANS_ACKED) \|\|
3479	((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3480	tp->undo_marker = 0;
3481
3482	if (!before(tp->snd_una, tp->frto_highmark)) {
3483	tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3484	return true;
3485	}
3486
3487	if (!tcp_is_sackfrto(tp)) {
3488	/* RFC4138 shortcoming in step 2; should also have case c):
3489	* ACK isn't duplicate nor advances window, e.g., opposite dir
3490	* data, winupdate
3491	*/
3492	if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3493	return true;
3494
3495	if (!(flag & FLAG_DATA_ACKED)) {
3496	tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3497	flag);
3498	return true;
3499	}
3500	} else {
3501	if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3502	if (!tcp_packets_in_flight(tp)) {
3503	tcp_enter_frto_loss(sk, 2, flag);
3504	return true;
3505	}
3506
3507	/* Prevent sending of new data. */
3508	tp->snd_cwnd = min(tp->snd_cwnd,
3509	tcp_packets_in_flight(tp));
3510	return true;
3511	}
3512
3513	if ((tp->frto_counter >= 2) &&
3514	(!(flag & FLAG_FORWARD_PROGRESS) \|\|
3515	((flag & FLAG_DATA_SACKED) &&
3516	!(flag & FLAG_ONLY_ORIG_SACKED)))) {
3517	/* RFC4138 shortcoming (see comment above) */
3518	if (!(flag & FLAG_FORWARD_PROGRESS) &&
3519	(flag & FLAG_NOT_DUP))
3520	return true;
3521
3522	tcp_enter_frto_loss(sk, 3, flag);
3523	return true;
3524	}
3525	}
3526
3527	if (tp->frto_counter == 1) {
3528	/* tcp_may_send_now needs to see updated state */
3529	tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3530	tp->frto_counter = 2;
3531
3532	if (!tcp_may_send_now(sk))
3533	tcp_enter_frto_loss(sk, 2, flag);
3534
3535	return true;
3536	} else {
3537	switch (sysctl_tcp_frto_response) {
3538	case 2:
3539	tcp_undo_spur_to_response(sk, flag);
3540	break;
3541	case 1:
3542	tcp_conservative_spur_to_response(tp);
3543	break;
3544	default:
3545	tcp_cwr_spur_to_response(sk);
3546	break;
3547	}
3548	tp->frto_counter = 0;
3549	tp->undo_marker = 0;
3550	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3551	}
3552	return false;
3553	}
3554
3555	/* RFC 5961 7 [ACK Throttling] */	3196	/* RFC 5961 7 [ACK Throttling] */
3556	static void tcp_send_challenge_ack(struct sock *sk)	3197	static void tcp_send_challenge_ack(struct sock *sk)
3557	{	3198	{
@@ -3616,7 +3257,6 @@ static int tcp_ack(struct sock sk, const struct sk_buff skb, int flag)
3616	int prior_packets;	3257	int prior_packets;
3617	int prior_sacked = tp->sacked_out;	3258	int prior_sacked = tp->sacked_out;
3618	int pkts_acked = 0;	3259	int pkts_acked = 0;
3619	bool frto_cwnd = false;
3620		3260
3621	/* If the ack is older than previous acks	3261	/* If the ack is older than previous acks
3622	* then we can probably ignore it.	3262	* then we can probably ignore it.
@@ -3690,22 +3330,15 @@ static int tcp_ack(struct sock sk, const struct sk_buff skb, int flag)
3690		3330
3691	pkts_acked = prior_packets - tp->packets_out;	3331	pkts_acked = prior_packets - tp->packets_out;
3692		3332
3693	if (tp->frto_counter)
3694	frto_cwnd = tcp_process_frto(sk, flag);
3695	/* Guarantee sacktag reordering detection against wrap-arounds */
3696	if (before(tp->frto_highmark, tp->snd_una))
3697	tp->frto_highmark = 0;
3698
3699	if (tcp_ack_is_dubious(sk, flag)) {	3333	if (tcp_ack_is_dubious(sk, flag)) {
3700	/* Advance CWND, if state allows this. */	3334	/* Advance CWND, if state allows this. */
3701	if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&	3335	if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3702	tcp_may_raise_cwnd(sk, flag))
3703	tcp_cong_avoid(sk, ack, prior_in_flight);	3336	tcp_cong_avoid(sk, ack, prior_in_flight);
3704	is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED \| FLAG_NOT_DUP));	3337	is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED \| FLAG_NOT_DUP));
3705	tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,	3338	tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3706	is_dupack, flag);	3339	is_dupack, flag);
3707	} else {	3340	} else {
3708	if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)	3341	if (flag & FLAG_DATA_ACKED)
3709	tcp_cong_avoid(sk, ack, prior_in_flight);	3342	tcp_cong_avoid(sk, ack, prior_in_flight);
3710	}	3343	}
3711		3344