Patched in Tegra support.

author: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-17 16:15:55 -0500
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-17 16:15:55 -0500
commit: 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
tree: a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /net/ipv4/tcp_input.c
parent: 406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
1 files changed, 775 insertions, 1030 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 18f97ca76b0..d73aab3fbfc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -61,8 +61,6 @@
 *              Pasi Sarolahti:         F-RTO for dealing with spurious RTOs
 */
-#define pr_fmt(fmt) "TCP: " fmt
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -85,23 +83,20 @@ int sysctl_tcp_ecn __read_mostly = 2;
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
-int sysctl_tcp_adv_win_scale __read_mostly = 1;
+int sysctl_tcp_adv_win_scale __read_mostly = 2;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-/* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 100;
 int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_frto_response __read_mostly;
+int sysctl_tcp_nometrics_save __read_mostly;
 int sysctl_tcp_thin_dupack __read_mostly;
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_abc __read_mostly;
-int sysctl_tcp_early_retrans __read_mostly = 2;
 #define FLAG_DATA               0x01 /* Incoming frame contained data.          */
 #define FLAG_WIN_UPDATE         0x02 /* Incoming ACK was a window update.       */
@@ -110,6 +105,7 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
 #define FLAG_SYN_ACKED          0x10 /* This ACK acknowledged SYN.              */
 #define FLAG_DATA_SACKED        0x20 /* New SACK.                               */
 #define FLAG_ECE                0x40 /* ECE in this ACK                         */
+#define FLAG_DATA_LOST          0x80 /* SACK detected data lossage.             */
 #define FLAG_SLOWPATH           0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ONLY_ORIG_SACKED   0x200 /* SACKs only non-rexmit sent before RTO */
 #define FLAG_SND_UNA_ADVANCED   0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -178,7 +174,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 static void tcp_incr_quickack(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
-        unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
+        unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
        if (quickacks == 0)
                quickacks = 2;
@@ -198,10 +194,9 @@ static void tcp_enter_quickack_mode(struct sock *sk)
 * and the session is not interactive.
 */
-static inline bool tcp_in_quickack_mode(const struct sock *sk)
+static inline int tcp_in_quickack_mode(const struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
 }
@@ -211,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
                tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
 }
-static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
+static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
 {
        if (tcp_hdr(skb)->cwr)
                tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
@@ -222,49 +217,36 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
        tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
 }
-static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
 {
-        if (!(tp->ecn_flags & TCP_ECN_OK))
+        if (tp->ecn_flags & TCP_ECN_OK) {
-                return;
+                if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
+                        tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-        switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
-        case INET_ECN_NOT_ECT:
                /* Funny extension: if ECT is not set on a segment,
-                 * and we already seen ECT on a previous segment,
+                 * it is surely retransmit. It is not in ECN RFC,
-                 * it is probably a retransmit.
+                 * but Linux follows this rule. */
-                 */
+                else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
-                if (tp->ecn_flags & TCP_ECN_SEEN)
-                        tcp_enter_quickack_mode((struct sock *)tp);
-                break;
-        case INET_ECN_CE:
-                if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
-                        /* Better not delay acks, sender can have a very low cwnd */
                        tcp_enter_quickack_mode((struct sock *)tp);
-                        tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-                }
-                /* fallinto */
-        default:
-                tp->ecn_flags |= TCP_ECN_SEEN;
        }
 }
-static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
+static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
 {
        if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
                tp->ecn_flags &= ~TCP_ECN_OK;
 }
-static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
+static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
 {
        if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
                tp->ecn_flags &= ~TCP_ECN_OK;
 }
-static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
+static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
 {
        if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
-                return true;
+                return 1;
-        return false;
+        return 0;
 }
 /* Buffer size and advertised window tuning.
@@ -274,11 +256,14 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
 static void tcp_fixup_sndbuf(struct sock *sk)
 {
-        int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
+        int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
+                     sizeof(struct sk_buff);
-        sndmem *= TCP_INIT_CWND;
+        if (sk->sk_sndbuf < 3 * sndmem) {
-        if (sk->sk_sndbuf < sndmem)
+                sk->sk_sndbuf = 3 * sndmem;
-                sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+                if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
+                        sk->sk_sndbuf = sysctl_tcp_wmem[2];
+        }
 }
 /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -324,14 +309,14 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
        return 0;
 }
-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        /* Check #1 */
        if (tp->rcv_ssthresh < tp->window_clamp &&
            (int)tp->rcv_ssthresh < tcp_space(sk) &&
-            !sk_under_memory_pressure(sk)) {
+            !tcp_memory_pressure) {
                int incr;
                /* Check #2. Increase window, if skb with such overhead
@@ -343,7 +328,6 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
                        incr = __tcp_grow_window(sk, skb);
                if (incr) {
-                        incr = max_t(int, incr, 2 * skb->len);
                        tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
                                               tp->window_clamp);
                        inet_csk(sk)->icsk_ack.quick |= 1;
@@ -355,30 +339,23 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 static void tcp_fixup_rcvbuf(struct sock *sk)
 {
-        u32 mss = tcp_sk(sk)->advmss;
+        struct tcp_sock *tp = tcp_sk(sk);
-        u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
+        int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
-        int rcvmem;
-        /* Limit to 10 segments if mss <= 1460,
+        /* Try to select rcvbuf so that 4 mss-sized segments
-         * or 14600/mss segments, with a minimum of two segments.
+         * will fit to window and corresponding skbs will fit to our rcvbuf.
+         * (was 3; 4 is minimum to allow fast retransmit to work.)
         */
-        if (mss > 1460)
+        while (tcp_win_from_space(rcvmem) < tp->advmss)
-                icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
-        rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
-        while (tcp_win_from_space(rcvmem) < mss)
                rcvmem += 128;
+        if (sk->sk_rcvbuf < 4 * rcvmem)
-        rcvmem *= icwnd;
+                sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
-        if (sk->sk_rcvbuf < rcvmem)
-                sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
 }
 /* 4. Try to fixup all. It is made immediately after connection enters
 *    established state.
 */
-void tcp_init_buffer_space(struct sock *sk)
+static void tcp_init_buffer_space(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int maxwin;
@@ -421,8 +398,8 @@ static void tcp_clamp_window(struct sock *sk)
        if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
            !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-            !sk_under_memory_pressure(sk) &&
+            !tcp_memory_pressure &&
-            sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
+            atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
                sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
                                    sysctl_tcp_rmem[2]);
        }
@@ -439,7 +416,7 @@ static void tcp_clamp_window(struct sock *sk)
 */
 void tcp_initialize_rcv_mss(struct sock *sk)
 {
-        const struct tcp_sock *tp = tcp_sk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
        unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
        hint = min(hint, tp->rcv_wnd / 2);
@@ -483,11 +460,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
                if (!win_dep) {
                        m -= (new_sample >> 3);
                        new_sample += m;
-                } else {
+                } else if (m < new_sample)
-                        m <<= 3;
+                        new_sample = m << 3;
-                        if (m < new_sample)
-                                new_sample = m;
-                }
        } else {
                /* No previous measure. */
                new_sample = m << 3;
@@ -503,7 +477,7 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
                goto new_measure;
        if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
                return;
-        tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
+        tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
 new_measure:
        tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
@@ -557,7 +531,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
                        space /= tp->advmss;
                        if (!space)
                                space = 1;
-                        rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
+                        rcvmem = (tp->advmss + MAX_TCP_HEADER +
+                                  16 + sizeof(struct sk_buff));
                        while (tcp_win_from_space(rcvmem) < tp->advmss)
                                rcvmem += 128;
                        space *= rcvmem;
@@ -707,7 +682,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
 * routine referred to above.
 */
-void tcp_set_rto(struct sock *sk)
+static inline void tcp_set_rto(struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        /* Old crap is replaced with new one. 8)
@@ -734,7 +709,110 @@ void tcp_set_rto(struct sock *sk)
        tcp_bound_rto(sk);
 }
-__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
+/* Save metrics learned by this TCP session.
+   This function is called only, when TCP finishes successfully
+   i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE.
+ */
+void tcp_update_metrics(struct sock *sk)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct dst_entry *dst = __sk_dst_get(sk);
+        if (sysctl_tcp_nometrics_save)
+                return;
+        dst_confirm(dst);
+        if (dst && (dst->flags & DST_HOST)) {
+                const struct inet_connection_sock *icsk = inet_csk(sk);
+                int m;
+                unsigned long rtt;
+                if (icsk->icsk_backoff || !tp->srtt) {
+                        /* This session failed to estimate rtt. Why?
+                         * Probably, no packets returned in time.
+                         * Reset our results.
+                         */
+                        if (!(dst_metric_locked(dst, RTAX_RTT)))
+                                dst_metric_set(dst, RTAX_RTT, 0);
+                        return;
+                }
+                rtt = dst_metric_rtt(dst, RTAX_RTT);
+                m = rtt - tp->srtt;
+                /* If newly calculated rtt larger than stored one,
+                 * store new one. Otherwise, use EWMA. Remember,
+                 * rtt overestimation is always better than underestimation.
+                 */
+                if (!(dst_metric_locked(dst, RTAX_RTT))) {
+                        if (m <= 0)
+                                set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
+                        else
+                                set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
+                }
+                if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
+                        unsigned long var;
+                        if (m < 0)
+                                m = -m;
+                        /* Scale deviation to rttvar fixed point */
+                        m >>= 1;
+                        if (m < tp->mdev)
+                                m = tp->mdev;
+                        var = dst_metric_rtt(dst, RTAX_RTTVAR);
+                        if (m >= var)
+                                var = m;
+                        else
+                                var -= (var - m) >> 2;
+                        set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
+                }
+                if (tcp_in_initial_slowstart(tp)) {
+                        /* Slow start still did not finish. */
+                        if (dst_metric(dst, RTAX_SSTHRESH) &&
+                            !dst_metric_locked(dst, RTAX_SSTHRESH) &&
+                            (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
+                                dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1);
+                        if (!dst_metric_locked(dst, RTAX_CWND) &&
+                            tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
+                                dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd);
+                } else if (tp->snd_cwnd > tp->snd_ssthresh &&
+                           icsk->icsk_ca_state == TCP_CA_Open) {
+                        /* Cong. avoidance phase, cwnd is reliable. */
+                        if (!dst_metric_locked(dst, RTAX_SSTHRESH))
+                                dst_metric_set(dst, RTAX_SSTHRESH,
+                                               max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
+                        if (!dst_metric_locked(dst, RTAX_CWND))
+                                dst_metric_set(dst, RTAX_CWND,
+                                               (dst_metric(dst, RTAX_CWND) +
+                                                tp->snd_cwnd) >> 1);
+                } else {
+                        /* Else slow start did not finish, cwnd is non-sense,
+                           ssthresh may be also invalid.
+                         */
+                        if (!dst_metric_locked(dst, RTAX_CWND))
+                                dst_metric_set(dst, RTAX_CWND,
+                                               (dst_metric(dst, RTAX_CWND) +
+                                                tp->snd_ssthresh) >> 1);
+                        if (dst_metric(dst, RTAX_SSTHRESH) &&
+                            !dst_metric_locked(dst, RTAX_SSTHRESH) &&
+                            tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
+                                dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh);
+                }
+                if (!dst_metric_locked(dst, RTAX_REORDERING)) {
+                        if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
+                            tp->reordering != sysctl_tcp_reordering)
+                                dst_metric_set(dst, RTAX_REORDERING, tp->reordering);
+                }
+        }
+}
+__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 {
        __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
@@ -743,22 +821,124 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
        return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
+/* Set slow start threshold and cwnd not falling to slow start */
+void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        tp->prior_ssthresh = 0;
+        tp->bytes_acked = 0;
+        if (icsk->icsk_ca_state < TCP_CA_CWR) {
+                tp->undo_marker = 0;
+                if (set_ssthresh)
+                        tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+                tp->snd_cwnd = min(tp->snd_cwnd,
+                                   tcp_packets_in_flight(tp) + 1U);
+                tp->snd_cwnd_cnt = 0;
+                tp->high_seq = tp->snd_nxt;
+                tp->snd_cwnd_stamp = tcp_time_stamp;
+                TCP_ECN_queue_cwr(tp);
+                tcp_set_ca_state(sk, TCP_CA_CWR);
+        }
+}
 /*
 * Packet counting of FACK is based on in-order assumptions, therefore TCP
 * disables it when reordering is detected
 */
-void tcp_disable_fack(struct tcp_sock *tp)
+static void tcp_disable_fack(struct tcp_sock *tp)
 {
        /* RFC3517 uses different metric in lost marker => reset on change */
        if (tcp_is_fack(tp))
                tp->lost_skb_hint = NULL;
-        tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
+        tp->rx_opt.sack_ok &= ~2;
 }
 /* Take a notice that peer is sending D-SACKs */
 static void tcp_dsack_seen(struct tcp_sock *tp)
 {
-        tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+        tp->rx_opt.sack_ok |= 4;
+}
+/* Initialize metrics on socket. */
+static void tcp_init_metrics(struct sock *sk)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct dst_entry *dst = __sk_dst_get(sk);
+        if (dst == NULL)
+                goto reset;
+        dst_confirm(dst);
+        if (dst_metric_locked(dst, RTAX_CWND))
+                tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
+        if (dst_metric(dst, RTAX_SSTHRESH)) {
+                tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
+                if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
+                        tp->snd_ssthresh = tp->snd_cwnd_clamp;
+        } else {
+                /* ssthresh may have been reduced unnecessarily during.
+                 * 3WHS. Restore it back to its initial default.
+                 */
+                tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+        }
+        if (dst_metric(dst, RTAX_REORDERING) &&
+            tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
+                tcp_disable_fack(tp);
+                tp->reordering = dst_metric(dst, RTAX_REORDERING);
+        }
+        if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
+                goto reset;
+        /* Initial rtt is determined from SYN,SYN-ACK.
+         * The segment is small and rtt may appear much
+         * less than real one. Use per-dst memory
+         * to make it more realistic.
+         *
+         * A bit of theory. RTT is time passed after "normal" sized packet
+         * is sent until it is ACKed. In normal circumstances sending small
+         * packets force peer to delay ACKs and calculation is correct too.
+         * The algorithm is adaptive and, provided we follow specs, it
+         * NEVER underestimate RTT. BUT! If peer tries to make some clever
+         * tricks sort of "quick acks" for time long enough to decrease RTT
+         * to low value, and then abruptly stops to do it and starts to delay
+         * ACKs, wait for troubles.
+         */
+        if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
+                tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
+                tp->rtt_seq = tp->snd_nxt;
+        }
+        if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
+                tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
+                tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
+        }
+        tcp_set_rto(sk);
+reset:
+        if (tp->srtt == 0) {
+                /* RFC2988bis: We've failed to get a valid RTT sample from
+                 * 3WHS. This is most likely due to retransmission,
+                 * including spurious one. Reset the RTO back to 3secs
+                 * from the more aggressive 1sec to avoid more spurious
+                 * retransmission.
+                 */
+                tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
+                inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
+        }
+        /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
+         * retransmitted. In light of RFC2988bis' more aggressive 1sec
+         * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
+         * retransmission has occurred.
+         */
+        if (tp->total_retrans > 1)
+                tp->snd_cwnd = 1;
+        else
+                tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+        tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 static void tcp_update_reordering(struct sock *sk, const int metric,
@@ -782,18 +962,15 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
                NET_INC_STATS_BH(sock_net(sk), mib_idx);
 #if FASTRETRANS_DEBUG > 1
-                pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
+                printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
-                         tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
+                       tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
-                         tp->reordering,
+                       tp->reordering,
-                         tp->fackets_out,
+                       tp->fackets_out,
-                         tp->sacked_out,
+                       tp->sacked_out,
-                         tp->undo_marker ? tp->undo_retrans : 0);
+                       tp->undo_marker ? tp->undo_retrans : 0);
 #endif
                tcp_disable_fack(tp);
        }
-        if (metric > 0)
-                tcp_disable_early_retrans(tp);
 }
 /* This must be called before lost_out is incremented */
@@ -851,11 +1028,13 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
 * These 6 states form finite state machine, controlled by the following events:
 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
- * 3. Loss detection event of two flavors:
+ * 3. Loss detection event of one of three flavors:
 *      A. Scoreboard estimator decided the packet is lost.
 *         A'. Reno "three dupacks" marks head of queue lost.
- *         A''. Its FACK modification, head until snd.fack is lost.
+ *         A''. Its FACK modfication, head until snd.fack is lost.
- *      B. SACK arrives sacking SND.NXT at the moment, when the
+ *      B. SACK arrives sacking data transmitted after never retransmitted
+ *         hole was sent out.
+ *      C. SACK arrives sacking SND.NXT at the moment, when the
 *         segment was retransmitted.
 * 4. D-SACK added new rule: D-SACK changes any tag to S.
 *
@@ -924,36 +1103,36 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
 * the exact amount is rather hard to quantify. However, tp->max_window can
 * be used as an exaggerated estimate.
 */
-static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
+static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
-                                   u32 start_seq, u32 end_seq)
+                                  u32 start_seq, u32 end_seq)
 {
        /* Too far in future, or reversed (interpretation is ambiguous) */
        if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
-                return false;
+                return 0;
        /* Nasty start_seq wrap-around check (see comments above) */
        if (!before(start_seq, tp->snd_nxt))
-                return false;
+                return 0;
        /* In outstanding window? ...This is valid exit for D-SACKs too.
         * start_seq == snd_una is non-sensical (see comments above)
         */
        if (after(start_seq, tp->snd_una))
-                return true;
+                return 1;
        if (!is_dsack || !tp->undo_marker)
-                return false;
+                return 0;
        /* ...Then it's D-SACK, and must reside below snd_una completely */
        if (after(end_seq, tp->snd_una))
-                return false;
+                return 0;
        if (!before(start_seq, tp->undo_marker))
-                return true;
+                return 1;
        /* Too old */
        if (!after(end_seq, tp->undo_marker))
-                return false;
+                return 0;
        /* Undo_marker boundary crossing (overestimates a lot). Known already:
         *   start_seq < undo_marker and end_seq >= undo_marker.
@@ -962,7 +1141,7 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
 }
 /* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "B". Later note: FACK people cheated me again 8), we have to account
+ * Event "C". Later note: FACK people cheated me again 8), we have to account
 * for reordering! Ugly, but should help.
 *
 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1025,17 +1204,17 @@ static void tcp_mark_lost_retrans(struct sock *sk)
                tp->lost_retrans_low = new_low_seq;
 }
-static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
+static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
-                            struct tcp_sack_block_wire *sp, int num_sacks,
+                           struct tcp_sack_block_wire *sp, int num_sacks,
-                            u32 prior_snd_una)
+                           u32 prior_snd_una)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
        u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
-        bool dup_sack = false;
+        int dup_sack = 0;
        if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
-                dup_sack = true;
+                dup_sack = 1;
                tcp_dsack_seen(tp);
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
        } else if (num_sacks > 1) {
@@ -1044,7 +1223,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
                if (!after(end_seq_0, end_seq_1) &&
                    !before(start_seq_0, start_seq_1)) {
-                        dup_sack = true;
+                        dup_sack = 1;
                        tcp_dsack_seen(tp);
                        NET_INC_STATS_BH(sock_net(sk),
                                        LINUX_MIB_TCPDSACKOFORECV);
@@ -1075,10 +1254,9 @@ struct tcp_sacktag_state {
 * FIXME: this could be merged to shift decision code
 */
 static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
-                                  u32 start_seq, u32 end_seq)
+                                 u32 start_seq, u32 end_seq)
 {
-        int err;
+        int in_sack, err;
-        bool in_sack;
        unsigned int pkt_len;
        unsigned int mss;
@@ -1120,26 +1298,25 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
        return in_sack;
 }
-/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
+static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
-static u8 tcp_sacktag_one(struct sock *sk,
+                          struct tcp_sacktag_state *state,
-                          struct tcp_sacktag_state *state, u8 sacked,
+                          int dup_sack, int pcount)
-                          u32 start_seq, u32 end_seq,
-                          bool dup_sack, int pcount)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+        u8 sacked = TCP_SKB_CB(skb)->sacked;
        int fack_count = state->fack_count;
        /* Account D-SACK for retransmitted packet. */
        if (dup_sack && (sacked & TCPCB_RETRANS)) {
                if (tp->undo_marker && tp->undo_retrans &&
-                    after(end_seq, tp->undo_marker))
+                    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
                        tp->undo_retrans--;
                if (sacked & TCPCB_SACKED_ACKED)
                        state->reord = min(fack_count, state->reord);
        }
        /* Nothing to do; acked frame is about to be dropped (was ACKed). */
-        if (!after(end_seq, tp->snd_una))
+        if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
                return sacked;
        if (!(sacked & TCPCB_SACKED_ACKED)) {
@@ -1158,13 +1335,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
                                /* New sack for not retransmitted frame,
                                 * which was in hole. It is reordering.
                                 */
-                                if (before(start_seq,
+                                if (before(TCP_SKB_CB(skb)->seq,
                                           tcp_highest_sack_seq(tp)))
                                        state->reord = min(fack_count,
                                                           state->reord);
                                /* SACK enhanced F-RTO (RFC4138; Appendix B) */
-                                if (!after(end_seq, tp->frto_highmark))
+                                if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
                                        state->flag |= FLAG_ONLY_ORIG_SACKED;
                        }
@@ -1182,7 +1359,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
                /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
                if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
-                    before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
+                    before(TCP_SKB_CB(skb)->seq,
+                           TCP_SKB_CB(tp->lost_skb_hint)->seq))
                        tp->lost_cnt_hint += pcount;
                if (fack_count > tp->fackets_out)
@@ -1201,30 +1379,16 @@ static u8 tcp_sacktag_one(struct sock *sk,
        return sacked;
 }
-/* Shift newly-SACKed bytes from this skb to the immediately previous
+static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
- * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
+                           struct tcp_sacktag_state *state,
- */
+                           unsigned int pcount, int shifted, int mss,
-static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+                           int dup_sack)
-                            struct tcp_sacktag_state *state,
-                            unsigned int pcount, int shifted, int mss,
-                            bool dup_sack)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
-        u32 start_seq = TCP_SKB_CB(skb)->seq;   /* start of newly-SACKed */
-        u32 end_seq = start_seq + shifted;      /* end of newly-SACKed */
        BUG_ON(!pcount);
-        /* Adjust counters and hints for the newly sacked sequence
-         * range but discard the return value since prev is already
-         * marked. We must tag the range first because the seq
-         * advancement below implicitly advances
-         * tcp_highest_sack_seq() when skb is highest_sack.
-         */
-        tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
-                        start_seq, end_seq, dup_sack, pcount);
        if (skb == tp->lost_skb_hint)
                tp->lost_cnt_hint += pcount;
@@ -1251,13 +1415,16 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
                skb_shinfo(skb)->gso_type = 0;
        }
+        /* We discard results */
+        tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
        /* Difference in this won't matter, both ACKed by the same cumul. ACK */
        TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
        if (skb->len > 0) {
                BUG_ON(!tcp_skb_pcount(skb));
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
-                return false;
+                return 0;
        }
        /* Whole SKB was eaten :-) */
@@ -1271,7 +1438,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
                tp->lost_cnt_hint -= tcp_skb_pcount(prev);
        }
-        TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
+        TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
        if (skb == tcp_highest_sack(sk))
                tcp_advance_highest_sack(sk, skb);
@@ -1280,19 +1447,19 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
-        return true;
+        return 1;
 }
 /* I wish gso_size would have a bit more sane initialization than
 * something-or-zero which complicates things
 */
-static int tcp_skb_seglen(const struct sk_buff *skb)
+static int tcp_skb_seglen(struct sk_buff *skb)
 {
        return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
 }
 /* Shifting pages past head area doesn't work */
-static int skb_can_shift(const struct sk_buff *skb)
+static int skb_can_shift(struct sk_buff *skb)
 {
        return !skb_headlen(skb) && skb_is_nonlinear(skb);
 }
@@ -1303,7 +1470,7 @@ static int skb_can_shift(const struct sk_buff *skb)
 static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
                                          struct tcp_sacktag_state *state,
                                          u32 start_seq, u32 end_seq,
-                                          bool dup_sack)
+                                          int dup_sack)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *prev;
@@ -1398,10 +1565,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
                }
        }
-        /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
-        if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
-                goto fallback;
        if (!skb_shift(prev, skb, len))
                goto fallback;
        if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
@@ -1442,14 +1605,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                                        struct tcp_sack_block *next_dup,
                                        struct tcp_sacktag_state *state,
                                        u32 start_seq, u32 end_seq,
-                                        bool dup_sack_in)
+                                        int dup_sack_in)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *tmp;
        tcp_for_write_queue_from(skb, sk) {
                int in_sack = 0;
-                bool dup_sack = dup_sack_in;
+                int dup_sack = dup_sack_in;
                if (skb == tcp_send_head(sk))
                        break;
@@ -1464,7 +1627,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                                                        next_dup->start_seq,
                                                        next_dup->end_seq);
                        if (in_sack > 0)
-                                dup_sack = true;
+                                dup_sack = 1;
                }
                /* skb reference here is a bit tricky to get right, since
@@ -1492,14 +1655,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                        break;
                if (in_sack) {
-                        TCP_SKB_CB(skb)->sacked =
+                        TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
-                                tcp_sacktag_one(sk,
+                                                                  state,
-                                                state,
+                                                                  dup_sack,
-                                                TCP_SKB_CB(skb)->sacked,
+                                                                  tcp_skb_pcount(skb));
-                                                TCP_SKB_CB(skb)->seq,
-                                                TCP_SKB_CB(skb)->end_seq,
-                                                dup_sack,
-                                                tcp_skb_pcount(skb));
                        if (!before(TCP_SKB_CB(skb)->seq,
                                    tcp_highest_sack_seq(tp)))
@@ -1549,19 +1708,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
        return skb;
 }
-static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
+static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
 {
        return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
 }
 static int
-tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
+tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
                        u32 prior_snd_una)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
-        const unsigned char *ptr = (skb_transport_header(ack_skb) +
+        unsigned char *ptr = (skb_transport_header(ack_skb) +
-                                    TCP_SKB_CB(ack_skb)->sacked);
+                              TCP_SKB_CB(ack_skb)->sacked);
        struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
        struct tcp_sack_block sp[TCP_NUM_SACKS];
        struct tcp_sack_block *cache;
@@ -1569,7 +1728,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        struct sk_buff *skb;
        int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
        int used_sacks;
-        bool found_dup_sack = false;
+        int found_dup_sack = 0;
        int i, j;
        int first_sack_index;
@@ -1600,7 +1759,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        used_sacks = 0;
        first_sack_index = 0;
        for (i = 0; i < num_sacks; i++) {
-                bool dup_sack = !i && found_dup_sack;
+                int dup_sack = !i && found_dup_sack;
                sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
                sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
@@ -1667,12 +1826,16 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        while (i < used_sacks) {
                u32 start_seq = sp[i].start_seq;
                u32 end_seq = sp[i].end_seq;
-                bool dup_sack = (found_dup_sack && (i == first_sack_index));
+                int dup_sack = (found_dup_sack && (i == first_sack_index));
                struct tcp_sack_block *next_dup = NULL;
                if (found_dup_sack && ((i + 1) == first_sack_index))
                        next_dup = &sp[i + 1];
+                /* Event "B" in the comment above. */
+                if (after(end_seq, tp->high_seq))
+                        state.flag |= FLAG_DATA_LOST;
                /* Skip too early cached blocks */
                while (tcp_sack_cache_ok(tp, cache) &&
                       !before(start_seq, cache->end_seq))
@@ -1769,9 +1932,9 @@ out:
 }
 /* Limits sacked_out so that sum with lost_out isn't ever larger than
- * packets_out. Returns false if sacked_out adjustement wasn't necessary.
+ * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
 */
-static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
+static int tcp_limit_reno_sacked(struct tcp_sock *tp)
 {
        u32 holes;
@@ -1780,9 +1943,9 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
        if ((tp->sacked_out + holes) > tp->packets_out) {
                tp->sacked_out = tp->packets_out - holes;
-                return true;
+                return 1;
        }
-        return false;
+        return 0;
 }
 /* If we receive more dupacks than we expected counting segments
@@ -1836,40 +1999,40 @@ static int tcp_is_sackfrto(const struct tcp_sock *tp)
 /* F-RTO can only be used if TCP has never retransmitted anything other than
 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
 */
-bool tcp_use_frto(struct sock *sk)
+int tcp_use_frto(struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct sk_buff *skb;
        if (!sysctl_tcp_frto)
-                return false;
+                return 0;
        /* MTU probe and F-RTO won't really play nicely along currently */
        if (icsk->icsk_mtup.probe_size)
-                return false;
+                return 0;
        if (tcp_is_sackfrto(tp))
-                return true;
+                return 1;
        /* Avoid expensive walking of rexmit queue if possible */
        if (tp->retrans_out > 1)
-                return false;
+                return 0;
        skb = tcp_write_queue_head(sk);
        if (tcp_skb_is_last(sk, skb))
-                return true;
+                return 1;
        skb = tcp_write_queue_next(sk, skb);    /* Skips head */
        tcp_for_write_queue_from(skb, sk) {
                if (skb == tcp_send_head(sk))
                        break;
                if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
-                        return false;
+                        return 0;
                /* Short-circuit when first non-SACKed skb has been checked */
                if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                        break;
        }
-        return true;
+        return 1;
 }
 /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
@@ -2105,7 +2268,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 *
 * Do processing similar to RTO timeout.
 */
-static bool tcp_check_sack_reneging(struct sock *sk, int flag)
+static int tcp_check_sack_reneging(struct sock *sk, int flag)
 {
        if (flag & FLAG_SACK_RENEGING) {
                struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2116,12 +2279,12 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
                tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                          icsk->icsk_rto, TCP_RTO_MAX);
-                return true;
+                return 1;
        }
-        return false;
+        return 0;
 }
-static inline int tcp_fackets_out(const struct tcp_sock *tp)
+static inline int tcp_fackets_out(struct tcp_sock *tp)
 {
        return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
 }
@@ -2141,41 +2304,19 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp)
 * they differ. Since neither occurs due to loss, TCP should really
 * ignore them.
 */
-static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
+static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
 {
        return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
 }
-static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
+static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        unsigned long delay;
-        /* Delay early retransmit and entering fast recovery for
-         * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
-         * available, or RTO is scheduled to fire first.
-         */
-        if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
-                return false;
-        delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
-        if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
-                return false;
-        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
-        tp->early_retrans_delayed = 1;
-        return true;
-}
-static inline int tcp_skb_timedout(const struct sock *sk,
-                                   const struct sk_buff *skb)
 {
        return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
 }
-static inline int tcp_head_timedout(const struct sock *sk)
+static inline int tcp_head_timedout(struct sock *sk)
 {
-        const struct tcp_sock *tp = tcp_sk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
        return tp->packets_out &&
               tcp_skb_timedout(sk, tcp_write_queue_head(sk));
@@ -2274,28 +2415,28 @@ static inline int tcp_head_timedout(const struct sock *sk)
 * Main question: may we further continue forward transmission
 * with the same cwnd?
 */
-static bool tcp_time_to_recover(struct sock *sk, int flag)
+static int tcp_time_to_recover(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        __u32 packets_out;
        /* Do not perform any recovery during F-RTO algorithm */
        if (tp->frto_counter)
-                return false;
+                return 0;
        /* Trick#1: The loss is proven. */
        if (tp->lost_out)
-                return true;
+                return 1;
        /* Not-A-Trick#2 : Classic rule... */
        if (tcp_dupack_heuristics(tp) > tp->reordering)
-                return true;
+                return 1;
        /* Trick#3 : when we use RFC2988 timer restart, fast
         * retransmit can be triggered by timeout of queue head.
         */
        if (tcp_is_fack(tp) && tcp_head_timedout(sk))
-                return true;
+                return 1;
        /* Trick#4: It is still not OK... But will it be useful to delay
         * recovery more?
@@ -2307,7 +2448,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
                /* We have nothing to send. This connection is limited
                 * either by receiver window or by application.
                 */
-                return true;
+                return 1;
        }
        /* If a thin stream is detected, retransmit after first
@@ -2318,19 +2459,9 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
        if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
            tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
            tcp_is_sack(tp) && !tcp_send_head(sk))
-                return true;
+                return 1;
-        /* Trick#6: TCP early retransmit, per RFC5827.  To avoid spurious
+        return 0;
-         * retransmissions due to small network reorderings, we implement
-         * Mitigation A.3 in the RFC and delay the retransmission for a short
-         * interval if appropriate.
-         */
-        if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
-            (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
-            !tcp_may_send_now(sk))
-                return !tcp_pause_early_retransmit(sk, flag);
-        return false;
 }
 /* New heuristics: it is possible only after we switched to restart timer
@@ -2371,11 +2502,8 @@ static void tcp_timeout_skbs(struct sock *sk)
        tcp_verify_left_out(tp);
 }
-/* Detect loss in event "A" above by marking head of queue up as lost.
+/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
- * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * is against sacked "cnt", otherwise it's against facked "cnt"
- * are considered lost. For RFC3517 SACK, a segment is considered lost if it
- * has at least tp->reordering SACKed seqments above it; "packets" refers to
- * the maximum SACKed segments to pass before reaching this limit.
 */
 static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
@@ -2384,8 +2512,6 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
        int cnt, oldcnt;
        int err;
        unsigned int mss;
-        /* Use SACK to deduce losses of new sequences sent during recovery */
-        const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
        WARN_ON(packets > tp->packets_out);
        if (tp->lost_skb_hint) {
@@ -2407,7 +2533,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
                tp->lost_skb_hint = skb;
                tp->lost_cnt_hint = cnt;
-                if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
+                if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
                        break;
                oldcnt = cnt;
@@ -2417,7 +2543,6 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
                if (cnt > packets) {
                        if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
-                            (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                            (oldcnt >= packets))
                                break;
@@ -2470,10 +2595,39 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
        tp->snd_cwnd_stamp = tcp_time_stamp;
 }
+/* Lower bound on congestion window is slow start threshold
+ * unless congestion avoidance choice decides to overide it.
+ */
+static inline u32 tcp_cwnd_min(const struct sock *sk)
+{
+        const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+        return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
+}
+/* Decrease cwnd each second ack. */
+static void tcp_cwnd_down(struct sock *sk, int flag)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        int decr = tp->snd_cwnd_cnt + 1;
+        if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
+            (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
+                tp->snd_cwnd_cnt = decr & 1;
+                decr >>= 1;
+                if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
+                        tp->snd_cwnd -= decr;
+                tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
+                tp->snd_cwnd_stamp = tcp_time_stamp;
+        }
+}
 /* Nothing was retransmitted or returned timestamp is less
 * than timestamp of the first retransmission.
 */
-static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
+static inline int tcp_packet_delayed(struct tcp_sock *tp)
 {
        return !tp->retrans_stamp ||
                (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2489,22 +2643,22 @@ static void DBGUNDO(struct sock *sk, const char *msg)
        struct inet_sock *inet = inet_sk(sk);
        if (sk->sk_family == AF_INET) {
-                pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
+                printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
-                         msg,
+                       msg,
-                         &inet->inet_daddr, ntohs(inet->inet_dport),
+                       &inet->inet_daddr, ntohs(inet->inet_dport),
-                         tp->snd_cwnd, tcp_left_out(tp),
+                       tp->snd_cwnd, tcp_left_out(tp),
-                         tp->snd_ssthresh, tp->prior_ssthresh,
+                       tp->snd_ssthresh, tp->prior_ssthresh,
-                         tp->packets_out);
+                       tp->packets_out);
-        }
+        }
-#if IS_ENABLED(CONFIG_IPV6)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
        else if (sk->sk_family == AF_INET6) {
                struct ipv6_pinfo *np = inet6_sk(sk);
-                pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
+                printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
-                         msg,
+                       msg,
-                         &np->daddr, ntohs(inet->inet_dport),
+                       &np->daddr, ntohs(inet->inet_dport),
-                         tp->snd_cwnd, tcp_left_out(tp),
+                       tp->snd_cwnd, tcp_left_out(tp),
-                         tp->snd_ssthresh, tp->prior_ssthresh,
+                       tp->snd_ssthresh, tp->prior_ssthresh,
-                         tp->packets_out);
+                       tp->packets_out);
        }
 #endif
 }
@@ -2534,13 +2688,13 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
        tp->snd_cwnd_stamp = tcp_time_stamp;
 }
-static inline bool tcp_may_undo(const struct tcp_sock *tp)
+static inline int tcp_may_undo(struct tcp_sock *tp)
 {
        return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
 }
 /* People celebrate: "We love our President!" */
-static bool tcp_try_undo_recovery(struct sock *sk)
+static int tcp_try_undo_recovery(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -2565,10 +2719,10 @@ static bool tcp_try_undo_recovery(struct sock *sk)
                 * is ACKed. For Reno it is MUST to prevent false
                 * fast retransmits (RFC2582). SACK TCP is safe. */
                tcp_moderate_cwnd(tp);
-                return true;
+                return 1;
        }
        tcp_set_ca_state(sk, TCP_CA_Open);
-        return false;
+        return 0;
 }
 /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
@@ -2598,19 +2752,19 @@ static void tcp_try_undo_dsack(struct sock *sk)
 * that successive retransmissions of a segment must not advance
 * retrans_stamp under any conditions.
 */
-static bool tcp_any_retrans_done(const struct sock *sk)
+static int tcp_any_retrans_done(struct sock *sk)
 {
-        const struct tcp_sock *tp = tcp_sk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        if (tp->retrans_out)
-                return true;
+                return 1;
        skb = tcp_write_queue_head(sk);
        if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
-                return true;
+                return 1;
-        return false;
+        return 0;
 }
 /* Undo during fast recovery after partial ACK. */
@@ -2644,7 +2798,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
 }
 /* Undo during loss recovery after partial ACK. */
-static bool tcp_try_undo_loss(struct sock *sk)
+static int tcp_try_undo_loss(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -2666,91 +2820,28 @@ static bool tcp_try_undo_loss(struct sock *sk)
                tp->undo_marker = 0;
                if (tcp_is_sack(tp))
                        tcp_set_ca_state(sk, TCP_CA_Open);
-                return true;
+                return 1;
-        }
-        return false;
-}
-/* The cwnd reduction in CWR and Recovery use the PRR algorithm
- * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
- * It computes the number of packets to send (sndcnt) based on packets newly
- * delivered:
- *   1) If the packets in flight is larger than ssthresh, PRR spreads the
- *      cwnd reductions across a full RTT.
- *   2) If packets in flight is lower than ssthresh (such as due to excess
- *      losses and/or application stalls), do not perform any further cwnd
- *      reductions, but instead slow start up to ssthresh.
- */
-static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        tp->high_seq = tp->snd_nxt;
-        tp->bytes_acked = 0;
-        tp->snd_cwnd_cnt = 0;
-        tp->prior_cwnd = tp->snd_cwnd;
-        tp->prr_delivered = 0;
-        tp->prr_out = 0;
-        if (set_ssthresh)
-                tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
-        TCP_ECN_queue_cwr(tp);
-}
-static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
-                               int fast_rexmit)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        int sndcnt = 0;
-        int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
-        tp->prr_delivered += newly_acked_sacked;
-        if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
-                u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
-                               tp->prior_cwnd - 1;
-                sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
-        } else {
-                sndcnt = min_t(int, delta,
-                               max_t(int, tp->prr_delivered - tp->prr_out,
-                                     newly_acked_sacked) + 1);
        }
+        return 0;
-        sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
-        tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
 }
-static inline void tcp_end_cwnd_reduction(struct sock *sk)
+static inline void tcp_complete_cwr(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+        /* Do not moderate cwnd if it's already undone in cwr or recovery */
-        /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+        if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
-        if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
-            (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
                tp->snd_cwnd = tp->snd_ssthresh;
                tp->snd_cwnd_stamp = tcp_time_stamp;
        }
        tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
-/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
-void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        tp->prior_ssthresh = 0;
-        tp->bytes_acked = 0;
-        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
-                tp->undo_marker = 0;
-                tcp_init_cwnd_reduction(sk, set_ssthresh);
-                tcp_set_ca_state(sk, TCP_CA_CWR);
-        }
-}
 static void tcp_try_keep_open(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int state = TCP_CA_Open;
-        if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
+        if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
                state = TCP_CA_Disorder;
        if (inet_csk(sk)->icsk_ca_state != state) {
@@ -2759,7 +2850,7 @@ static void tcp_try_keep_open(struct sock *sk)
        }
 }
-static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
+static void tcp_try_to_open(struct sock *sk, int flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -2773,10 +2864,9 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
        if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
                tcp_try_keep_open(sk);
-                if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
+                tcp_moderate_cwnd(tp);
-                        tcp_moderate_cwnd(tp);
        } else {
-                tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
+                tcp_cwnd_down(sk, flag);
        }
 }
@@ -2858,30 +2948,6 @@ void tcp_simple_retransmit(struct sock *sk)
 }
 EXPORT_SYMBOL(tcp_simple_retransmit);
-static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        int mib_idx;
-        if (tcp_is_reno(tp))
-                mib_idx = LINUX_MIB_TCPRENORECOVERY;
-        else
-                mib_idx = LINUX_MIB_TCPSACKRECOVERY;
-        NET_INC_STATS_BH(sock_net(sk), mib_idx);
-        tp->prior_ssthresh = 0;
-        tp->undo_marker = tp->snd_una;
-        tp->undo_retrans = tp->retrans_out;
-        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
-                if (!ece_ack)
-                        tp->prior_ssthresh = tcp_current_ssthresh(sk);
-                tcp_init_cwnd_reduction(sk, true);
-        }
-        tcp_set_ca_state(sk, TCP_CA_Recovery);
-}
 /* Process an event, which can update packets-in-flight not trivially.
 * Main goal of this function is to calculate new estimate for left_out,
 * taking into account both packets sitting in receiver's buffer and
@@ -2893,16 +2959,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 * It does _not_ decide what to send, it is made in function
 * tcp_xmit_retransmit_queue().
 */
-static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
+static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
-                                  int prior_sacked, bool is_dupack,
-                                  int flag)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
+        int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
        int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
                                    (tcp_fackets_out(tp) > tp->reordering));
-        int newly_acked_sacked = 0;
+        int fast_rexmit = 0, mib_idx;
-        int fast_rexmit = 0;
        if (WARN_ON(!tp->packets_out && tp->sacked_out))
                tp->sacked_out = 0;
@@ -2918,10 +2982,19 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
        if (tcp_check_sack_reneging(sk, flag))
                return;
-        /* C. Check consistency of the current state. */
+        /* C. Process data loss notification, provided it is valid. */
+        if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
+            before(tp->snd_una, tp->high_seq) &&
+            icsk->icsk_ca_state != TCP_CA_Open &&
+            tp->fackets_out > tp->reordering) {
+                tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
+                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
+        }
+        /* D. Check consistency of the current state. */
        tcp_verify_left_out(tp);
-        /* D. Check state exit conditions. State can be terminated
+        /* E. Check state exit conditions. State can be terminated
         *    when high_seq is ACKed. */
        if (icsk->icsk_ca_state == TCP_CA_Open) {
                WARN_ON(tp->retrans_out != 0);
@@ -2938,7 +3011,18 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                        /* CWR is to be held something *above* high_seq
                         * is ACKed for CWR bit to reach receiver. */
                        if (tp->snd_una != tp->high_seq) {
-                                tcp_end_cwnd_reduction(sk);
+                                tcp_complete_cwr(sk);
+                                tcp_set_ca_state(sk, TCP_CA_Open);
+                        }
+                        break;
+                case TCP_CA_Disorder:
+                        tcp_try_undo_dsack(sk);
+                        if (!tp->undo_marker ||
+                            /* For SACK case do not Open to allow to undo
+                             * catching for all duplicate ACKs. */
+                            tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
+                                tp->undo_marker = 0;
                                tcp_set_ca_state(sk, TCP_CA_Open);
                        }
                        break;
@@ -2948,12 +3032,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                                tcp_reset_reno_sack(tp);
                        if (tcp_try_undo_recovery(sk))
                                return;
-                        tcp_end_cwnd_reduction(sk);
+                        tcp_complete_cwr(sk);
                        break;
                }
        }
-        /* E. Process state. */
+        /* F. Process state. */
        switch (icsk->icsk_ca_state) {
        case TCP_CA_Recovery:
                if (!(flag & FLAG_SND_UNA_ADVANCED)) {
@@ -2961,7 +3045,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                                tcp_add_reno_sack(sk);
                } else
                        do_lost = tcp_try_undo_partial(sk, pkts_acked);
-                newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
                break;
        case TCP_CA_Loss:
                if (flag & FLAG_DATA_ACKED)
@@ -2983,13 +3066,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                        if (is_dupack)
                                tcp_add_reno_sack(sk);
                }
-                newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
-                if (icsk->icsk_ca_state <= TCP_CA_Disorder)
+                if (icsk->icsk_ca_state == TCP_CA_Disorder)
                        tcp_try_undo_dsack(sk);
-                if (!tcp_time_to_recover(sk, flag)) {
+                if (!tcp_time_to_recover(sk)) {
-                        tcp_try_to_open(sk, flag, newly_acked_sacked);
+                        tcp_try_to_open(sk, flag);
                        return;
                }
@@ -3005,13 +3087,35 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                }
                /* Otherwise enter Recovery state */
-                tcp_enter_recovery(sk, (flag & FLAG_ECE));
+                if (tcp_is_reno(tp))
+                        mib_idx = LINUX_MIB_TCPRENORECOVERY;
+                else
+                        mib_idx = LINUX_MIB_TCPSACKRECOVERY;
+                NET_INC_STATS_BH(sock_net(sk), mib_idx);
+                tp->high_seq = tp->snd_nxt;
+                tp->prior_ssthresh = 0;
+                tp->undo_marker = tp->snd_una;
+                tp->undo_retrans = tp->retrans_out;
+                if (icsk->icsk_ca_state < TCP_CA_CWR) {
+                        if (!(flag & FLAG_ECE))
+                                tp->prior_ssthresh = tcp_current_ssthresh(sk);
+                        tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+                        TCP_ECN_queue_cwr(tp);
+                }
+                tp->bytes_acked = 0;
+                tp->snd_cwnd_cnt = 0;
+                tcp_set_ca_state(sk, TCP_CA_Recovery);
                fast_rexmit = 1;
        }
        if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
                tcp_update_scoreboard(sk, fast_rexmit);
-        tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
+        tcp_cwnd_down(sk, flag);
        tcp_xmit_retransmit_queue(sk);
 }
@@ -3086,53 +3190,16 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 /* Restart timer after forward progress on connection.
 * RFC2988 recommends to restart timer to now+rto.
 */
-void tcp_rearm_rto(struct sock *sk)
+static void tcp_rearm_rto(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        /* If the retrans timer is currently being used by Fast Open
-         * for SYN-ACK retrans purpose, stay put.
-         */
-        if (tp->fastopen_rsk)
-                return;
        if (!tp->packets_out) {
                inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
        } else {
-                u32 rto = inet_csk(sk)->icsk_rto;
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-                /* Offset the time elapsed after installing regular RTO */
+                                          inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
-                if (tp->early_retrans_delayed) {
-                        struct sk_buff *skb = tcp_write_queue_head(sk);
-                        const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
-                        s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
-                        /* delta may not be positive if the socket is locked
-                         * when the delayed ER timer fires and is rescheduled.
-                         */
-                        if (delta > 0)
-                                rto = delta;
-                }
-                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
-                                          TCP_RTO_MAX);
        }
-        tp->early_retrans_delayed = 0;
-}
-/* This function is called when the delayed ER timer fires. TCP enters
- * fast recovery and performs fast-retransmit.
- */
-void tcp_resume_early_retransmit(struct sock *sk)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        tcp_rearm_rto(sk);
-        /* Stop if ER is disabled after the delayed ER timer is scheduled */
-        if (!tp->do_early_retrans)
-                return;
-        tcp_enter_recovery(sk, false);
-        tcp_update_scoreboard(sk, 1);
-        tcp_xmit_retransmit_queue(sk);
 }
 /* If we get here, the whole TSO packet has not been acked. */
@@ -3167,7 +3234,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct sk_buff *skb;
        u32 now = tcp_time_stamp;
-        int fully_acked = true;
+        int fully_acked = 1;
        int flag = 0;
        u32 pkts_acked = 0;
        u32 reord = tp->packets_out;
@@ -3191,7 +3258,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                        if (!acked_pcount)
                                break;
-                        fully_acked = false;
+                        fully_acked = 0;
                } else {
                        acked_pcount = tcp_skb_pcount(skb);
                }
@@ -3229,7 +3296,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                 * connection startup slow start one packet too
                 * quickly.  This is severely frowned upon behavior.
                 */
-                if (!(scb->tcp_flags & TCPHDR_SYN)) {
+                if (!(scb->flags & TCPHDR_SYN)) {
                        flag |= FLAG_DATA_ACKED;
                } else {
                        flag |= FLAG_SYN_ACKED;
@@ -3308,18 +3375,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        if (!tp->packets_out && tcp_is_sack(tp)) {
                icsk = inet_csk(sk);
                if (tp->lost_out) {
-                        pr_debug("Leak l=%u %d\n",
+                        printk(KERN_DEBUG "Leak l=%u %d\n",
-                                 tp->lost_out, icsk->icsk_ca_state);
+                               tp->lost_out, icsk->icsk_ca_state);
                        tp->lost_out = 0;
                }
                if (tp->sacked_out) {
-                        pr_debug("Leak s=%u %d\n",
+                        printk(KERN_DEBUG "Leak s=%u %d\n",
-                                 tp->sacked_out, icsk->icsk_ca_state);
+                               tp->sacked_out, icsk->icsk_ca_state);
                        tp->sacked_out = 0;
                }
                if (tp->retrans_out) {
-                        pr_debug("Leak r=%u %d\n",
+                        printk(KERN_DEBUG "Leak r=%u %d\n",
-                                 tp->retrans_out, icsk->icsk_ca_state);
+                               tp->retrans_out, icsk->icsk_ca_state);
                        tp->retrans_out = 0;
                }
        }
@@ -3347,23 +3414,23 @@ static void tcp_ack_probe(struct sock *sk)
        }
 }
-static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
+static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
 {
        return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
                inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
 }
-static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
+static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
-                !tcp_in_cwnd_reduction(sk);
+                !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
 }
 /* Check that window update is acceptable.
 * The function assumes that snd_una<=ack<=snd_next.
 */
-static inline bool tcp_may_update_window(const struct tcp_sock *tp,
+static inline int tcp_may_update_window(const struct tcp_sock *tp,
                                        const u32 ack, const u32 ack_seq,
                                        const u32 nwin)
 {
@@ -3377,7 +3444,7 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
 * and in FreeBSD. NetBSD's one is even worse.) is wrong.
 */
-static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
+static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
                                 u32 ack_seq)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -3425,9 +3492,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 }
 /* A conservative spurious RTO response algorithm: reduce cwnd using
- * PRR and continue in congestion avoidance.
+ * rate halving and continue in congestion avoidance.
 */
-static void tcp_cwr_spur_to_response(struct sock *sk)
+static void tcp_ratehalving_spur_to_response(struct sock *sk)
 {
        tcp_enter_cwr(sk, 0);
 }
@@ -3435,7 +3502,7 @@ static void tcp_cwr_spur_to_response(struct sock *sk)
 static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 {
        if (flag & FLAG_ECE)
-                tcp_cwr_spur_to_response(sk);
+                tcp_ratehalving_spur_to_response(sk);
        else
                tcp_undo_cwr(sk, true);
 }
@@ -3470,7 +3537,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 *     to prove that the RTO is indeed spurious. It transfers the control
 *     from F-RTO to the conventional RTO recovery
 */
-static bool tcp_process_frto(struct sock *sk, int flag)
+static int tcp_process_frto(struct sock *sk, int flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -3486,7 +3553,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
        if (!before(tp->snd_una, tp->frto_highmark)) {
                tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
-                return true;
+                return 1;
        }
        if (!tcp_is_sackfrto(tp)) {
@@ -3495,19 +3562,19 @@ static bool tcp_process_frto(struct sock *sk, int flag)
                 * data, winupdate
                 */
                if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
-                        return true;
+                        return 1;
                if (!(flag & FLAG_DATA_ACKED)) {
                        tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
                                            flag);
-                        return true;
+                        return 1;
                }
        } else {
                if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
                        /* Prevent sending of new data. */
                        tp->snd_cwnd = min(tp->snd_cwnd,
                                           tcp_packets_in_flight(tp));
-                        return true;
+                        return 1;
                }
                if ((tp->frto_counter >= 2) &&
@@ -3517,10 +3584,10 @@ static bool tcp_process_frto(struct sock *sk, int flag)
                        /* RFC4138 shortcoming (see comment above) */
                        if (!(flag & FLAG_FORWARD_PROGRESS) &&
                            (flag & FLAG_NOT_DUP))
-                                return true;
+                                return 1;
                        tcp_enter_frto_loss(sk, 3, flag);
-                        return true;
+                        return 1;
                }
        }
@@ -3532,7 +3599,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
                if (!tcp_may_send_now(sk))
                        tcp_enter_frto_loss(sk, 2, flag);
-                return true;
+                return 1;
        } else {
                switch (sysctl_tcp_frto_response) {
                case 2:
@@ -3542,61 +3609,34 @@ static bool tcp_process_frto(struct sock *sk, int flag)
                        tcp_conservative_spur_to_response(tp);
                        break;
                default:
-                        tcp_cwr_spur_to_response(sk);
+                        tcp_ratehalving_spur_to_response(sk);
                        break;
                }
                tp->frto_counter = 0;
                tp->undo_marker = 0;
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
        }
-        return false;
+        return 0;
-}
-/* RFC 5961 7 [ACK Throttling] */
-static void tcp_send_challenge_ack(struct sock *sk)
-{
-        /* unprotected vars, we dont care of overwrites */
-        static u32 challenge_timestamp;
-        static unsigned int challenge_count;
-        u32 now = jiffies / HZ;
-        if (now != challenge_timestamp) {
-                challenge_timestamp = now;
-                challenge_count = 0;
-        }
-        if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
-                tcp_send_ack(sk);
-        }
 }
 /* This routine deals with incoming acks, but not outgoing ones. */
-static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        u32 prior_snd_una = tp->snd_una;
        u32 ack_seq = TCP_SKB_CB(skb)->seq;
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
-        bool is_dupack = false;
        u32 prior_in_flight;
        u32 prior_fackets;
        int prior_packets;
-        int prior_sacked = tp->sacked_out;
+        int frto_cwnd = 0;
-        int pkts_acked = 0;
-        bool frto_cwnd = false;
        /* If the ack is older than previous acks
         * then we can probably ignore it.
         */
-        if (before(ack, prior_snd_una)) {
+        if (before(ack, prior_snd_una))
-                /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
-                if (before(ack, prior_snd_una - tp->max_window)) {
-                        tcp_send_challenge_ack(sk);
-                        return -1;
-                }
                goto old_ack;
-        }
        /* If the ack includes data we haven't sent yet, discard
         * this segment (RFC793 Section 3.9).
@@ -3604,9 +3644,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        if (after(ack, tp->snd_nxt))
                goto invalid_ack;
-        if (tp->early_retrans_delayed)
-                tcp_rearm_rto(sk);
        if (after(ack, prior_snd_una))
                flag |= FLAG_SND_UNA_ADVANCED;
@@ -3664,8 +3701,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        /* See if we can take anything off of the retransmit queue. */
        flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
-        pkts_acked = prior_packets - tp->packets_out;
        if (tp->frto_counter)
                frto_cwnd = tcp_process_frto(sk, flag);
        /* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3677,26 +3712,19 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
                    tcp_may_raise_cwnd(sk, flag))
                        tcp_cong_avoid(sk, ack, prior_in_flight);
-                is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
+                tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
-                tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
+                                      flag);
-                                      is_dupack, flag);
        } else {
                if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
                        tcp_cong_avoid(sk, ack, prior_in_flight);
        }
-        if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
+        if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
-                struct dst_entry *dst = __sk_dst_get(sk);
+                dst_confirm(__sk_dst_get(sk));
-                if (dst)
-                        dst_confirm(dst);
-        }
        return 1;
 no_queue:
-        /* If data was DSACKed, see if we can undo a cwnd reduction. */
-        if (flag & FLAG_DSACKING_ACK)
-                tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
-                                      is_dupack, flag);
        /* If this ack opens up a zero window, clear backoff.  It was
         * being used to time the probes, and is probably far higher than
         * it needs to be for normal retransmission.
@@ -3710,13 +3738,10 @@ invalid_ack:
        return -1;
 old_ack:
-        /* If data was SACKed, tag it and see if we should send more data.
-         * If data was DSACKed, see if we can undo a cwnd reduction.
-         */
        if (TCP_SKB_CB(skb)->sacked) {
-                flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+                tcp_sacktag_write_queue(sk, skb, prior_snd_una);
-                tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
+                if (icsk->icsk_ca_state == TCP_CA_Open)
-                                      is_dupack, flag);
+                        tcp_try_keep_open(sk);
        }
        SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3727,15 +3752,14 @@ old_ack:
 * But, this can also be called on packets in the established flow when
 * the fast version below fails.
 */
-void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
+void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-                       const u8 **hvpp, int estab,
+                       u8 **hvpp, int estab)
-                       struct tcp_fastopen_cookie *foc)
 {
-        const unsigned char *ptr;
+        unsigned char *ptr;
-        const struct tcphdr *th = tcp_hdr(skb);
+        struct tcphdr *th = tcp_hdr(skb);
        int length = (th->doff * 4) - sizeof(struct tcphdr);
-        ptr = (const unsigned char *)(th + 1);
+        ptr = (unsigned char *)(th + 1);
        opt_rx->saw_tstamp = 0;
        while (length > 0) {
@@ -3772,9 +3796,10 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
                                        __u8 snd_wscale = *(__u8 *)ptr;
                                        opt_rx->wscale_ok = 1;
                                        if (snd_wscale > 14) {
-                                                net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
+                                                if (net_ratelimit())
-                                                                     __func__,
+                                                        printk(KERN_INFO "tcp_parse_options: Illegal window "
-                                                                     snd_wscale);
+                                                               "scaling value %d >14 received.\n",
+                                                               snd_wscale);
                                                snd_wscale = 14;
                                        }
                                        opt_rx->snd_wscale = snd_wscale;
@@ -3792,7 +3817,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
                        case TCPOPT_SACK_PERM:
                                if (opsize == TCPOLEN_SACK_PERM && th->syn &&
                                    !estab && sysctl_tcp_sack) {
-                                        opt_rx->sack_ok = TCP_SACK_SEEN;
+                                        opt_rx->sack_ok = 1;
                                        tcp_sack_reset(opt_rx);
                                }
                                break;
@@ -3836,25 +3861,8 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
                                        break;
                                }
                                break;
-                        case TCPOPT_EXP:
-                                /* Fast Open option shares code 254 using a
-                                 * 16 bits magic number. It's valid only in
-                                 * SYN or SYN-ACK with an even size.
-                                 */
-                                if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
-                                    get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
-                                    foc == NULL || !th->syn || (opsize & 1))
-                                        break;
-                                foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
-                                if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
-                                    foc->len <= TCP_FASTOPEN_COOKIE_MAX)
-                                        memcpy(foc->val, ptr + 2, foc->len);
-                                else if (foc->len != 0)
-                                        foc->len = -1;
-                                break;
                        }
                        ptr += opsize-2;
                        length -= opsize;
                }
@@ -3862,9 +3870,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
 }
 EXPORT_SYMBOL(tcp_parse_options);
-static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
+static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
 {
-        const __be32 *ptr = (const __be32 *)(th + 1);
+        __be32 *ptr = (__be32 *)(th + 1);
        if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
                          | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -3873,41 +3881,40 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
                tp->rx_opt.rcv_tsval = ntohl(*ptr);
                ++ptr;
                tp->rx_opt.rcv_tsecr = ntohl(*ptr);
-                return true;
+                return 1;
        }
-        return false;
+        return 0;
 }
 /* Fast parse options. This hopes to only see timestamps.
 * If it is wrong it falls back on tcp_parse_options().
 */
-static bool tcp_fast_parse_options(const struct sk_buff *skb,
+static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
-                                   const struct tcphdr *th,
+                                  struct tcp_sock *tp, u8 **hvpp)
-                                   struct tcp_sock *tp, const u8 **hvpp)
 {
        /* In the spirit of fast parsing, compare doff directly to constant
         * values.  Because equality is used, short doff can be ignored here.
         */
        if (th->doff == (sizeof(*th) / 4)) {
                tp->rx_opt.saw_tstamp = 0;
-                return false;
+                return 0;
        } else if (tp->rx_opt.tstamp_ok &&
                   th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
                if (tcp_parse_aligned_timestamp(tp, th))
-                        return true;
+                        return 1;
        }
-        tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+        tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
-        return true;
+        return 1;
 }
 #ifdef CONFIG_TCP_MD5SIG
 /*
 * Parse MD5 Signature option
 */
-const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
+u8 *tcp_parse_md5sig_option(struct tcphdr *th)
 {
-        int length = (th->doff << 2) - sizeof(*th);
+        int length = (th->doff << 2) - sizeof (*th);
-        const u8 *ptr = (const u8 *)(th + 1);
+        u8 *ptr = (u8*)(th + 1);
        /* If the TCP option is too short, we can short cut */
        if (length < TCPOLEN_MD5SIG)
@@ -3984,8 +3991,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 {
-        const struct tcp_sock *tp = tcp_sk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
-        const struct tcphdr *th = tcp_hdr(skb);
+        struct tcphdr *th = tcp_hdr(skb);
        u32 seq = TCP_SKB_CB(skb)->seq;
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -4002,7 +4009,7 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
                (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
 }
-static inline bool tcp_paws_discard(const struct sock *sk,
+static inline int tcp_paws_discard(const struct sock *sk,
                                   const struct sk_buff *skb)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
@@ -4024,14 +4031,14 @@ static inline bool tcp_paws_discard(const struct sock *sk,
 * (borrowed from freebsd)
 */
-static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
+static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
        return  !before(end_seq, tp->rcv_wup) &&
                !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
 }
 /* When we get a reset we do this. */
-void tcp_reset(struct sock *sk)
+static void tcp_reset(struct sock *sk)
 {
        /* We want the right error as BSD sees it (and indeed as we do). */
        switch (sk->sk_state) {
@@ -4069,7 +4076,7 @@ void tcp_reset(struct sock *sk)
 *
 *      If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
 */
-static void tcp_fin(struct sock *sk)
+static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -4113,7 +4120,7 @@ static void tcp_fin(struct sock *sk)
                /* Only TCP_LISTEN and TCP_CLOSE are left, in these
                 * cases we should never reach this piece of code.
                 */
-                pr_err("%s: Impossible, sk->sk_state=%d\n",
+                printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
                       __func__, sk->sk_state);
                break;
        }
@@ -4138,7 +4145,7 @@ static void tcp_fin(struct sock *sk)
        }
 }
-static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
+static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
                                  u32 end_seq)
 {
        if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
@@ -4146,9 +4153,9 @@ static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
                        sp->start_seq = seq;
                if (after(end_seq, sp->end_seq))
                        sp->end_seq = end_seq;
-                return true;
+                return 1;
        }
-        return false;
+        return 0;
 }
 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
@@ -4181,7 +4188,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
                tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
 }
-static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
+static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -4340,258 +4347,37 @@ static void tcp_ofo_queue(struct sock *sk)
                __skb_queue_tail(&sk->sk_receive_queue, skb);
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if (tcp_hdr(skb)->fin)
-                        tcp_fin(sk);
+                        tcp_fin(skb, sk, tcp_hdr(skb));
        }
 }
-static bool tcp_prune_ofo_queue(struct sock *sk);
+static int tcp_prune_ofo_queue(struct sock *sk);
 static int tcp_prune_queue(struct sock *sk);
-static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
+static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
-                                 unsigned int size)
 {
        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-            !sk_rmem_schedule(sk, skb, size)) {
+            !sk_rmem_schedule(sk, size)) {
                if (tcp_prune_queue(sk) < 0)
                        return -1;
-                if (!sk_rmem_schedule(sk, skb, size)) {
+                if (!sk_rmem_schedule(sk, size)) {
                        if (!tcp_prune_ofo_queue(sk))
                                return -1;
-                        if (!sk_rmem_schedule(sk, skb, size))
+                        if (!sk_rmem_schedule(sk, size))
                                return -1;
                }
        }
        return 0;
 }
-/**
- * tcp_try_coalesce - try to merge skb to prior one
- * @sk: socket
- * @to: prior buffer
- * @from: buffer to add in queue
- * @fragstolen: pointer to boolean
- *
- * Before queueing skb @from after @to, try to merge them
- * to reduce overall memory use and queue lengths, if cost is small.
- * Packets in ofo or receive queues can stay a long time.
- * Better try to coalesce them right now to avoid future collapses.
- * Returns true if caller should free @from instead of queueing it
- */
-static bool tcp_try_coalesce(struct sock *sk,
-                             struct sk_buff *to,
-                             struct sk_buff *from,
-                             bool *fragstolen)
-{
-        int delta;
-        *fragstolen = false;
-        if (tcp_hdr(from)->fin)
-                return false;
-        /* Its possible this segment overlaps with prior segment in queue */
-        if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
-                return false;
-        if (!skb_try_coalesce(to, from, fragstolen, &delta))
-                return false;
-        atomic_add(delta, &sk->sk_rmem_alloc);
-        sk_mem_charge(sk, delta);
-        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
-        TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
-        TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
-        return true;
-}
-static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct sk_buff *skb1;
-        u32 seq, end_seq;
-        TCP_ECN_check_ce(tp, skb);
-        if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
-                __kfree_skb(skb);
-                return;
-        }
-        /* Disable header prediction. */
-        tp->pred_flags = 0;
-        inet_csk_schedule_ack(sk);
-        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
-        SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
-                   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
-        skb1 = skb_peek_tail(&tp->out_of_order_queue);
-        if (!skb1) {
-                /* Initial out of order segment, build 1 SACK. */
-                if (tcp_is_sack(tp)) {
-                        tp->rx_opt.num_sacks = 1;
-                        tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
-                        tp->selective_acks[0].end_seq =
-                                                TCP_SKB_CB(skb)->end_seq;
-                }
-                __skb_queue_head(&tp->out_of_order_queue, skb);
-                goto end;
-        }
-        seq = TCP_SKB_CB(skb)->seq;
-        end_seq = TCP_SKB_CB(skb)->end_seq;
-        if (seq == TCP_SKB_CB(skb1)->end_seq) {
-                bool fragstolen;
-                if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
-                        __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-                } else {
-                        kfree_skb_partial(skb, fragstolen);
-                        skb = NULL;
-                }
-                if (!tp->rx_opt.num_sacks ||
-                    tp->selective_acks[0].end_seq != seq)
-                        goto add_sack;
-                /* Common case: data arrive in order after hole. */
-                tp->selective_acks[0].end_seq = end_seq;
-                goto end;
-        }
-        /* Find place to insert this segment. */
-        while (1) {
-                if (!after(TCP_SKB_CB(skb1)->seq, seq))
-                        break;
-                if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
-                        skb1 = NULL;
-                        break;
-                }
-                skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
-        }
-        /* Do skb overlap to previous one? */
-        if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
-                if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-                        /* All the bits are present. Drop. */
-                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
-                        __kfree_skb(skb);
-                        skb = NULL;
-                        tcp_dsack_set(sk, seq, end_seq);
-                        goto add_sack;
-                }
-                if (after(seq, TCP_SKB_CB(skb1)->seq)) {
-                        /* Partial overlap. */
-                        tcp_dsack_set(sk, seq,
-                                      TCP_SKB_CB(skb1)->end_seq);
-                } else {
-                        if (skb_queue_is_first(&tp->out_of_order_queue,
-                                               skb1))
-                                skb1 = NULL;
-                        else
-                                skb1 = skb_queue_prev(
-                                        &tp->out_of_order_queue,
-                                        skb1);
-                }
-        }
-        if (!skb1)
-                __skb_queue_head(&tp->out_of_order_queue, skb);
-        else
-                __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-        /* And clean segments covered by new one as whole. */
-        while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
-                skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
-                if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
-                        break;
-                if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-                        tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
-                                         end_seq);
-                        break;
-                }
-                __skb_unlink(skb1, &tp->out_of_order_queue);
-                tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
-                                 TCP_SKB_CB(skb1)->end_seq);
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
-                __kfree_skb(skb1);
-        }
-add_sack:
-        if (tcp_is_sack(tp))
-                tcp_sack_new_ofo_skb(sk, seq, end_seq);
-end:
-        if (skb)
-                skb_set_owner_r(skb, sk);
-}
-static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
-                  bool *fragstolen)
-{
-        int eaten;
-        struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
-        __skb_pull(skb, hdrlen);
-        eaten = (tail &&
-                 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
-        tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-        if (!eaten) {
-                __skb_queue_tail(&sk->sk_receive_queue, skb);
-                skb_set_owner_r(skb, sk);
-        }
-        return eaten;
-}
-int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
-{
-        struct sk_buff *skb = NULL;
-        struct tcphdr *th;
-        bool fragstolen;
-        if (size == 0)
-                return 0;
-        skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
-        if (!skb)
-                goto err;
-        if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
-                goto err_free;
-        th = (struct tcphdr *)skb_put(skb, sizeof(*th));
-        skb_reset_transport_header(skb);
-        memset(th, 0, sizeof(*th));
-        if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
-                goto err_free;
-        TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
-        TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
-        TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
-        if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
-                WARN_ON_ONCE(fragstolen); /* should not happen */
-                __kfree_skb(skb);
-        }
-        return size;
-err_free:
-        kfree_skb(skb);
-err:
-        return -ENOMEM;
-}
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
-        const struct tcphdr *th = tcp_hdr(skb);
+        struct tcphdr *th = tcp_hdr(skb);
        struct tcp_sock *tp = tcp_sk(sk);
        int eaten = -1;
-        bool fragstolen = false;
        if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
                goto drop;
@@ -4633,16 +4419,17 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
                if (eaten <= 0) {
 queue_and_out:
                        if (eaten < 0 &&
-                            tcp_try_rmem_schedule(sk, skb, skb->truesize))
+                            tcp_try_rmem_schedule(sk, skb->truesize))
                                goto drop;
-                        eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
+                        skb_set_owner_r(skb, sk);
+                        __skb_queue_tail(&sk->sk_receive_queue, skb);
                }
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if (skb->len)
                        tcp_event_data_recv(sk, skb);
                if (th->fin)
-                        tcp_fin(sk);
+                        tcp_fin(skb, sk, th);
                if (!skb_queue_empty(&tp->out_of_order_queue)) {
                        tcp_ofo_queue(sk);
@@ -4660,8 +4447,8 @@ queue_and_out:
                tcp_fast_path_check(sk);
                if (eaten > 0)
-                        kfree_skb_partial(skb, fragstolen);
+                        __kfree_skb(skb);
-                if (!sock_flag(sk, SOCK_DEAD))
+                else if (!sock_flag(sk, SOCK_DEAD))
                        sk->sk_data_ready(sk, 0);
                return;
        }
@@ -4701,7 +4488,105 @@ drop:
                goto queue_and_out;
        }
-        tcp_data_queue_ofo(sk, skb);
+        TCP_ECN_check_ce(tp, skb);
+        if (tcp_try_rmem_schedule(sk, skb->truesize))
+                goto drop;
+        /* Disable header prediction. */
+        tp->pred_flags = 0;
+        inet_csk_schedule_ack(sk);
+        SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
+                   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+        skb_set_owner_r(skb, sk);
+        if (!skb_peek(&tp->out_of_order_queue)) {
+                /* Initial out of order segment, build 1 SACK. */
+                if (tcp_is_sack(tp)) {
+                        tp->rx_opt.num_sacks = 1;
+                        tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+                        tp->selective_acks[0].end_seq =
+                                                TCP_SKB_CB(skb)->end_seq;
+                }
+                __skb_queue_head(&tp->out_of_order_queue, skb);
+        } else {
+                struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
+                u32 seq = TCP_SKB_CB(skb)->seq;
+                u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+                if (seq == TCP_SKB_CB(skb1)->end_seq) {
+                        __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+                        if (!tp->rx_opt.num_sacks ||
+                            tp->selective_acks[0].end_seq != seq)
+                                goto add_sack;
+                        /* Common case: data arrive in order after hole. */
+                        tp->selective_acks[0].end_seq = end_seq;
+                        return;
+                }
+                /* Find place to insert this segment. */
+                while (1) {
+                        if (!after(TCP_SKB_CB(skb1)->seq, seq))
+                                break;
+                        if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
+                                skb1 = NULL;
+                                break;
+                        }
+                        skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
+                }
+                /* Do skb overlap to previous one? */
+                if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+                        if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+                                /* All the bits are present. Drop. */
+                                __kfree_skb(skb);
+                                tcp_dsack_set(sk, seq, end_seq);
+                                goto add_sack;
+                        }
+                        if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+                                /* Partial overlap. */
+                                tcp_dsack_set(sk, seq,
+                                              TCP_SKB_CB(skb1)->end_seq);
+                        } else {
+                                if (skb_queue_is_first(&tp->out_of_order_queue,
+                                                       skb1))
+                                        skb1 = NULL;
+                                else
+                                        skb1 = skb_queue_prev(
+                                                &tp->out_of_order_queue,
+                                                skb1);
+                        }
+                }
+                if (!skb1)
+                        __skb_queue_head(&tp->out_of_order_queue, skb);
+                else
+                        __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+                /* And clean segments covered by new one as whole. */
+                while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
+                        skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+                        if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
+                                break;
+                        if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+                                tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
+                                                 end_seq);
+                                break;
+                        }
+                        __skb_unlink(skb1, &tp->out_of_order_queue);
+                        tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
+                                         TCP_SKB_CB(skb1)->end_seq);
+                        __kfree_skb(skb1);
+                }
+add_sack:
+                if (tcp_is_sack(tp))
+                        tcp_sack_new_ofo_skb(sk, seq, end_seq);
+        }
 }
 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4880,10 +4765,10 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 * Purge the out-of-order queue.
 * Return true if queue was pruned.
 */
-static bool tcp_prune_ofo_queue(struct sock *sk)
+static int tcp_prune_ofo_queue(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        bool res = false;
+        int res = 0;
        if (!skb_queue_empty(&tp->out_of_order_queue)) {
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
@@ -4897,7 +4782,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
                if (tp->rx_opt.sack_ok)
                        tcp_sack_reset(&tp->rx_opt);
                sk_mem_reclaim(sk);
-                res = true;
+                res = 1;
        }
        return res;
 }
@@ -4919,7 +4804,7 @@ static int tcp_prune_queue(struct sock *sk)
        if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
                tcp_clamp_window(sk);
-        else if (sk_under_memory_pressure(sk))
+        else if (tcp_memory_pressure)
                tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
        tcp_collapse_ofo_queue(sk);
@@ -4974,29 +4859,29 @@ void tcp_cwnd_application_limited(struct sock *sk)
        tp->snd_cwnd_stamp = tcp_time_stamp;
 }
-static bool tcp_should_expand_sndbuf(const struct sock *sk)
+static int tcp_should_expand_sndbuf(struct sock *sk)
 {
-        const struct tcp_sock *tp = tcp_sk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
        /* If the user specified a specific send buffer setting, do
         * not modify it.
         */
        if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
-                return false;
+                return 0;
        /* If we are under global TCP memory pressure, do not expand.  */
-        if (sk_under_memory_pressure(sk))
+        if (tcp_memory_pressure)
-                return false;
+                return 0;
        /* If we are under soft global TCP memory pressure, do not expand.  */
-        if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
+        if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
-                return false;
+                return 0;
        /* If we filled the congestion window, do not expand.  */
        if (tp->packets_out >= tp->snd_cwnd)
-                return false;
+                return 0;
-        return true;
+        return 1;
 }
 /* When incoming ACK allowed to free some skb from write_queue,
@@ -5010,10 +4895,8 @@ static void tcp_new_space(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        if (tcp_should_expand_sndbuf(sk)) {
-                int sndmem = SKB_TRUESIZE(max_t(u32,
+                int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
-                                                tp->rx_opt.mss_clamp,
+                        MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
-                                                tp->mss_cache) +
-                                          MAX_TCP_HEADER);
                int demanded = max_t(unsigned int, tp->snd_cwnd,
                                     tp->reordering + 1);
                sndmem *= 2 * demanded;
@@ -5085,7 +4968,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
 *      either form (or just set the sysctl tcp_stdurg).
 */
-static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
+static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        u32 ptr = ntohs(th->urg_ptr);
@@ -5151,7 +5034,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
 }
 /* This is the 'fast' part of urgent handling. */
-static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
+static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -5214,7 +5097,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk,
        return result;
 }
-static inline bool tcp_checksum_complete_user(struct sock *sk,
+static inline int tcp_checksum_complete_user(struct sock *sk,
                                             struct sk_buff *skb)
 {
        return !skb_csum_unnecessary(skb) &&
@@ -5222,19 +5105,19 @@ static inline bool tcp_checksum_complete_user(struct sock *sk,
 }
 #ifdef CONFIG_NET_DMA
-static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
+static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
                                  int hlen)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int chunk = skb->len - hlen;
        int dma_cookie;
-        bool copied_early = false;
+        int copied_early = 0;
        if (tp->ucopy.wakeup)
-                return false;
+                return 0;
        if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-                tp->ucopy.dma_chan = net_dma_find_channel();
+                tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
        if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
@@ -5247,7 +5130,7 @@ static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
                        goto out;
                tp->ucopy.dma_cookie = dma_cookie;
-                copied_early = true;
+                copied_early = 1;
                tp->ucopy.len -= chunk;
                tp->copied_seq += chunk;
@@ -5271,10 +5154,10 @@ out:
 /* Does PAWS and seqno based validation of an incoming segment, flags will
 * play significant role here.
 */
-static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
+static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
-                                  const struct tcphdr *th, int syn_inerr)
+                              struct tcphdr *th, int syn_inerr)
 {
-        const u8 *hash_location;
+        u8 *hash_location;
        struct tcp_sock *tp = tcp_sk(sk);
        /* RFC1323: H1. Apply PAWS check first. */
@@ -5297,48 +5180,38 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
                 * an acknowledgment should be sent in reply (unless the RST
                 * bit is set, if so drop the segment and return)".
                 */
-                if (!th->rst) {
+                if (!th->rst)
-                        if (th->syn)
-                                goto syn_challenge;
                        tcp_send_dupack(sk, skb);
-                }
                goto discard;
        }
        /* Step 2: check RST bit */
        if (th->rst) {
-                /* RFC 5961 3.2 :
+                tcp_reset(sk);
-                 * If sequence number exactly matches RCV.NXT, then
-                 *     RESET the connection
-                 * else
-                 *     Send a challenge ACK
-                 */
-                if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
-                        tcp_reset(sk);
-                else
-                        tcp_send_challenge_ack(sk);
                goto discard;
        }
+        /* ts_recent update must be made after we are sure that the packet
+         * is in window.
+         */
+        tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
        /* step 3: check security and precedence [ignored] */
-        /* step 4: Check for a SYN
+        /* step 4: Check for a SYN in window. */
-         * RFC 5691 4.2 : Send a challenge ack
+        if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-         */
-        if (th->syn) {
-syn_challenge:
                if (syn_inerr)
                        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
+                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
-                tcp_send_challenge_ack(sk);
+                tcp_reset(sk);
-                goto discard;
+                return -1;
        }
-        return true;
+        return 1;
 discard:
        __kfree_skb(skb);
-        return false;
+        return 0;
 }
 /*
@@ -5365,12 +5238,11 @@ discard:
 *      tcp_data_queue when everything is OK.
 */
 int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-                        const struct tcphdr *th, unsigned int len)
+                        struct tcphdr *th, unsigned len)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+        int res;
-        if (unlikely(sk->sk_rx_dst == NULL))
-                inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
        /*
         *      Header prediction.
         *      The code loosely follows the one in the famous
@@ -5450,14 +5322,11 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                } else {
                        int eaten = 0;
                        int copied_early = 0;
-                        bool fragstolen = false;
                        if (tp->copied_seq == tp->rcv_nxt &&
                            len - tcp_header_len <= tp->ucopy.len) {
 #ifdef CONFIG_NET_DMA
-                                if (tp->ucopy.task == current &&
+                                if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
-                                    sock_owned_by_user(sk) &&
-                                    tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
                                        copied_early = 1;
                                        eaten = 1;
                                }
@@ -5510,8 +5379,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
                                /* Bulk data transfer: receiver */
-                                eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
+                                __skb_pull(skb, tcp_header_len);
-                                                      &fragstolen);
+                                __skb_queue_tail(&sk->sk_receive_queue, skb);
+                                skb_set_owner_r(skb, sk);
+                                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                        }
                        tcp_event_data_recv(sk, skb);
@@ -5533,8 +5404,9 @@ no_ack:
                        else
 #endif
                        if (eaten)
-                                kfree_skb_partial(skb, fragstolen);
+                                __kfree_skb(skb);
-                        sk->sk_data_ready(sk, 0);
+                        else
+                                sk->sk_data_ready(sk, 0);
                        return 0;
                }
        }
@@ -5543,25 +5415,18 @@ slow_path:
        if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
                goto csum_error;
-        if (!th->ack && !th->rst)
-                goto discard;
        /*
         *      Standard slow path.
         */
-        if (!tcp_validate_incoming(sk, skb, th, 1))
+        res = tcp_validate_incoming(sk, skb, th, 1);
-                return 0;
+        if (res <= 0)
+                return -res;
 step5:
-        if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
+        if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
                goto discard;
-        /* ts_recent update must be made after we are sure that the packet
-         * is in window.
-         */
-        tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
        tcp_rcv_rtt_measure_ts(sk, skb);
        /* Process urgent data. */
@@ -5583,101 +5448,16 @@ discard:
 }
 EXPORT_SYMBOL(tcp_rcv_established);
-void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct inet_connection_sock *icsk = inet_csk(sk);
-        tcp_set_state(sk, TCP_ESTABLISHED);
-        if (skb != NULL) {
-                icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
-                security_inet_conn_established(sk, skb);
-        }
-        /* Make sure socket is routed, for correct metrics.  */
-        icsk->icsk_af_ops->rebuild_header(sk);
-        tcp_init_metrics(sk);
-        tcp_init_congestion_control(sk);
-        /* Prevent spurious tcp_cwnd_restart() on first data
-         * packet.
-         */
-        tp->lsndtime = tcp_time_stamp;
-        tcp_init_buffer_space(sk);
-        if (sock_flag(sk, SOCK_KEEPOPEN))
-                inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
-        if (!tp->rx_opt.snd_wscale)
-                __tcp_fast_path_on(tp, tp->snd_wnd);
-        else
-                tp->pred_flags = 0;
-        if (!sock_flag(sk, SOCK_DEAD)) {
-                sk->sk_state_change(sk);
-                sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
-        }
-}
-static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
-                                    struct tcp_fastopen_cookie *cookie)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
-        u16 mss = tp->rx_opt.mss_clamp;
-        bool syn_drop;
-        if (mss == tp->rx_opt.user_mss) {
-                struct tcp_options_received opt;
-                const u8 *hash_location;
-                /* Get original SYNACK MSS value if user MSS sets mss_clamp */
-                tcp_clear_options(&opt);
-                opt.user_mss = opt.mss_clamp = 0;
-                tcp_parse_options(synack, &opt, &hash_location, 0, NULL);
-                mss = opt.mss_clamp;
-        }
-        if (!tp->syn_fastopen)  /* Ignore an unsolicited cookie */
-                cookie->len = -1;
-        /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably
-         * the remote receives only the retransmitted (regular) SYNs: either
-         * the original SYN-data or the corresponding SYN-ACK is lost.
-         */
-        syn_drop = (cookie->len <= 0 && data &&
-                    inet_csk(sk)->icsk_retransmits);
-        tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
-        if (data) { /* Retransmit unacked data in SYN */
-                tcp_for_write_queue_from(data, sk) {
-                        if (data == tcp_send_head(sk) ||
-                            __tcp_retransmit_skb(sk, data))
-                                break;
-                }
-                tcp_rearm_rto(sk);
-                return true;
-        }
-        tp->syn_data_acked = tp->syn_data;
-        return false;
-}
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
-                                         const struct tcphdr *th, unsigned int len)
+                                         struct tcphdr *th, unsigned len)
 {
-        const u8 *hash_location;
+        u8 *hash_location;
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_cookie_values *cvp = tp->cookie_values;
-        struct tcp_fastopen_cookie foc = { .len = -1 };
        int saved_clamp = tp->rx_opt.mss_clamp;
-        tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
+        tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
        if (th->ack) {
                /* rfc793:
@@ -5687,9 +5467,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                 *        If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
                 *        a reset (unless the RST bit is set, if so drop
                 *        the segment and return)"
+                 *
+                 *  We do not send data with SYN, so that RFC-correct
+                 *  test reduces to:
                 */
-                if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
+                if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
-                    after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
                        goto reset_and_undo;
                if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -5731,7 +5513,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                TCP_ECN_rcv_synack(tp, th);
-                tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
+                tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
                tcp_ack(sk, skb, FLAG_SLOWPATH);
                /* Ok.. it's good. Set up sequence numbers and
@@ -5744,6 +5526,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                 * never scaled.
                 */
                tp->snd_wnd = ntohs(th->window);
+                tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
                if (!tp->rx_opt.wscale_ok) {
                        tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
@@ -5797,12 +5580,36 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                }
                smp_mb();
+                tcp_set_state(sk, TCP_ESTABLISHED);
-                tcp_finish_connect(sk, skb);
+                security_inet_conn_established(sk, skb);
-                if ((tp->syn_fastopen || tp->syn_data) &&
+                /* Make sure socket is routed, for correct metrics.  */
-                    tcp_rcv_fastopen_synack(sk, skb, &foc))
+                icsk->icsk_af_ops->rebuild_header(sk);
-                        return -1;
+                tcp_init_metrics(sk);
+                tcp_init_congestion_control(sk);
+                /* Prevent spurious tcp_cwnd_restart() on first data
+                 * packet.
+                 */
+                tp->lsndtime = tcp_time_stamp;
+                tcp_init_buffer_space(sk);
+                if (sock_flag(sk, SOCK_KEEPOPEN))
+                        inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
+                if (!tp->rx_opt.snd_wscale)
+                        __tcp_fast_path_on(tp, tp->snd_wnd);
+                else
+                        tp->pred_flags = 0;
+                if (!sock_flag(sk, SOCK_DEAD)) {
+                        sk->sk_state_change(sk);
+                        sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+                }
                if (sk->sk_write_pending ||
                    icsk->icsk_accept_queue.rskq_defer_accept ||
@@ -5816,6 +5623,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                         */
                        inet_csk_schedule_ack(sk);
                        icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+                        icsk->icsk_ack.ato       = TCP_ATO_MIN;
+                        tcp_incr_quickack(sk);
                        tcp_enter_quickack_mode(sk);
                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
                                                  TCP_DELACK_MAX, TCP_RTO_MAX);
@@ -5881,9 +5690,7 @@ discard:
                tcp_send_synack(sk);
 #if 0
                /* Note, we could accept data and URG from this segment.
-                 * There are no obstacles to make this (except that we must
+                 * There are no obstacles to make this.
-                 * either change tcp_recvmsg() to prevent it from returning data
-                 * before 3WHS completes per RFC793, or employ TCP Fast Open).
                 *
                 * However, if we ignore data in ACKless segments sometimes,
                 * we have no reasons to accept it sometimes.
@@ -5919,12 +5726,12 @@ reset_and_undo:
 */
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
-                          const struct tcphdr *th, unsigned int len)
+                          struct tcphdr *th, unsigned len)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
-        struct request_sock *req;
        int queued = 0;
+        int res;
        tp->rx_opt.saw_tstamp = 0;
@@ -5940,8 +5747,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                        goto discard;
                if (th->syn) {
-                        if (th->fin)
-                                goto discard;
                        if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
                                return 1;
@@ -5979,47 +5784,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                return 0;
        }
-        req = tp->fastopen_rsk;
+        res = tcp_validate_incoming(sk, skb, th, 0);
-        if (req != NULL) {
+        if (res <= 0)
-                WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
+                return -res;
-                    sk->sk_state != TCP_FIN_WAIT1);
-                if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
-                        goto discard;
-        }
-        if (!th->ack && !th->rst)
-                goto discard;
-        if (!tcp_validate_incoming(sk, skb, th, 0))
-                return 0;
        /* step 5: check the ACK field */
-        if (true) {
+        if (th->ack) {
                int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
                switch (sk->sk_state) {
                case TCP_SYN_RECV:
                        if (acceptable) {
-                                /* Once we leave TCP_SYN_RECV, we no longer
+                                tp->copied_seq = tp->rcv_nxt;
-                                 * need req so release it.
-                                 */
-                                if (req) {
-                                        tcp_synack_rtt_meas(sk, req);
-                                        tp->total_retrans = req->num_retrans;
-                                        reqsk_fastopen_remove(sk, req, false);
-                                } else {
-                                        /* Make sure socket is routed, for
-                                         * correct metrics.
-                                         */
-                                        icsk->icsk_af_ops->rebuild_header(sk);
-                                        tcp_init_congestion_control(sk);
-                                        tcp_mtup_init(sk);
-                                        tcp_init_buffer_space(sk);
-                                        tp->copied_seq = tp->rcv_nxt;
-                                }
                                smp_mb();
                                tcp_set_state(sk, TCP_ESTABLISHED);
                                sk->sk_state_change(sk);
@@ -6041,27 +5817,23 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                if (tp->rx_opt.tstamp_ok)
                                        tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
-                                if (req) {
+                                /* Make sure socket is routed, for
-                                        /* Re-arm the timer because data may
+                                 * correct metrics.
-                                         * have been sent out. This is similar
+                                 */
-                                         * to the regular data transmission case
+                                icsk->icsk_af_ops->rebuild_header(sk);
-                                         * when new data has just been ack'ed.
-                                         *
+                                tcp_init_metrics(sk);
-                                         * (TFO) - we could try to be more
-                                         * aggressive and retranmitting any data
+                                tcp_init_congestion_control(sk);
-                                         * sooner based on when they were sent
-                                         * out.
-                                         */
-                                        tcp_rearm_rto(sk);
-                                } else
-                                        tcp_init_metrics(sk);
                                /* Prevent spurious tcp_cwnd_restart() on
                                 * first data packet.
                                 */
                                tp->lsndtime = tcp_time_stamp;
+                                tcp_mtup_init(sk);
                                tcp_initialize_rcv_mss(sk);
+                                tcp_init_buffer_space(sk);
                                tcp_fast_path_on(tp);
                        } else {
                                return 1;
@@ -6069,33 +5841,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                        break;
                case TCP_FIN_WAIT1:
-                        /* If we enter the TCP_FIN_WAIT1 state and we are a
-                         * Fast Open socket and this is the first acceptable
-                         * ACK we have received, this would have acknowledged
-                         * our SYNACK so stop the SYNACK timer.
-                         */
-                        if (req != NULL) {
-                                /* Return RST if ack_seq is invalid.
-                                 * Note that RFC793 only says to generate a
-                                 * DUPACK for it but for TCP Fast Open it seems
-                                 * better to treat this case like TCP_SYN_RECV
-                                 * above.
-                                 */
-                                if (!acceptable)
-                                        return 1;
-                                /* We no longer need the request sock. */
-                                reqsk_fastopen_remove(sk, req, false);
-                                tcp_rearm_rto(sk);
-                        }
                        if (tp->snd_una == tp->write_seq) {
-                                struct dst_entry *dst;
                                tcp_set_state(sk, TCP_FIN_WAIT2);
                                sk->sk_shutdown |= SEND_SHUTDOWN;
+                                dst_confirm(__sk_dst_get(sk));
-                                dst = __sk_dst_get(sk);
-                                if (dst)
-                                        dst_confirm(dst);
                                if (!sock_flag(sk, SOCK_DEAD))
                                        /* Wake up lingering close() */
@@ -6145,12 +5894,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                        }
                        break;
                }
-        }
+        } else
+                goto discard;
-        /* ts_recent update must be made after we are sure that the packet
-         * is in window.
-         */
-        tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
        /* step 6: check the URG bit */
        tcp_urg(sk, skb, th);
author	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-17 16:15:55 -0500
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-17 16:15:55 -0500
commit	8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
tree	a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /net/ipv4/tcp_input.c
parent	406089d01562f1e2bf9f089fd7637009ebaad589 (diff)