Merge commit 'v3.17' into next

author: James Morris <james.l.morris@oracle.com> 2014-11-19 05:32:12 -0500
committer: James Morris <james.l.morris@oracle.com> 2014-11-19 05:32:12 -0500
commit: b10778a00d40b3d9fdaaf5891e802794781ff71c (patch)
tree: 6ba4cbac86eecedc3f30650e7f764ecf00c83898 /net/ipv4/tcp_input.c
parent: 594081ee7145cc30a3977cb4e218f81213b63dc5 (diff)
parent: bfe01a5ba2490f299e1d2d5508cbbbadd897bbe9 (diff)
1 files changed, 187 insertions, 24 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 40639c288dc2..a906e0200ff2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -74,6 +74,7 @@
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
 #include <net/netdma.h>
+#include <linux/errqueue.h>
 int sysctl_tcp_timestamps __read_mostly = 1;
 int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -1904,16 +1905,17 @@ void tcp_clear_retrans(struct tcp_sock *tp)
        tp->sacked_out = 0;
 }
-/* Enter Loss state. If "how" is not zero, forget all SACK information
+/* Enter Loss state. If we detect SACK reneging, forget all SACK information
 * and reset tags completely, otherwise preserve SACKs. If receiver
 * dropped its ofo queue, we will know this due to reneging detection.
 */
-void tcp_enter_loss(struct sock *sk, int how)
+void tcp_enter_loss(struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        bool new_recovery = false;
+        bool is_reneg;                  /* is receiver reneging on SACKs? */
        /* Reduce ssthresh if it has not yet been made inside this window. */
        if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
@@ -1934,7 +1936,11 @@ void tcp_enter_loss(struct sock *sk, int how)
                tcp_reset_reno_sack(tp);
        tp->undo_marker = tp->snd_una;
-        if (how) {
+        skb = tcp_write_queue_head(sk);
+        is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
+        if (is_reneg) {
+                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
                tp->sacked_out = 0;
                tp->fackets_out = 0;
        }
@@ -1948,7 +1954,7 @@ void tcp_enter_loss(struct sock *sk, int how)
                        tp->undo_marker = 0;
                TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
-                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
+                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
@@ -1981,19 +1987,21 @@ void tcp_enter_loss(struct sock *sk, int how)
 * remembered SACKs do not reflect real state of receiver i.e.
 * receiver _host_ is heavily congested (or buggy).
 *
- * Do processing similar to RTO timeout.
+ * To avoid big spurious retransmission bursts due to transient SACK
+ * scoreboard oddities that look like reneging, we give the receiver a
+ * little time (max(RTT/2, 10ms)) to send us some more ACKs that will
+ * restore sanity to the SACK scoreboard. If the apparent reneging
+ * persists until this RTO then we'll clear the SACK scoreboard.
 */
 static bool tcp_check_sack_reneging(struct sock *sk, int flag)
 {
        if (flag & FLAG_SACK_RENEGING) {
-                struct inet_connection_sock *icsk = inet_csk(sk);
+                struct tcp_sock *tp = tcp_sk(sk);
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
+                unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
+                                          msecs_to_jiffies(10));
-                tcp_enter_loss(sk, 1);
-                icsk->icsk_retransmits++;
-                tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-                                          icsk->icsk_rto, TCP_RTO_MAX);
+                                          delay, TCP_RTO_MAX);
                return true;
        }
        return false;
@@ -2475,7 +2483,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 *      losses and/or application stalls), do not perform any further cwnd
 *      reductions, but instead slow start up to ssthresh.
 */
-static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
+static void tcp_init_cwnd_reduction(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -2485,8 +2493,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
        tp->prior_cwnd = tp->snd_cwnd;
        tp->prr_delivered = 0;
        tp->prr_out = 0;
-        if (set_ssthresh)
+        tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
-                tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
        TCP_ECN_queue_cwr(tp);
 }
@@ -2528,14 +2535,14 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
 }
 /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
-void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
+void tcp_enter_cwr(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        tp->prior_ssthresh = 0;
        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
                tp->undo_marker = 0;
-                tcp_init_cwnd_reduction(sk, set_ssthresh);
+                tcp_init_cwnd_reduction(sk);
                tcp_set_ca_state(sk, TCP_CA_CWR);
        }
 }
@@ -2564,7 +2571,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
                tp->retrans_stamp = 0;
        if (flag & FLAG_ECE)
-                tcp_enter_cwr(sk, 1);
+                tcp_enter_cwr(sk);
        if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
                tcp_try_keep_open(sk);
@@ -2670,7 +2677,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
                if (!ece_ack)
                        tp->prior_ssthresh = tcp_current_ssthresh(sk);
-                tcp_init_cwnd_reduction(sk, true);
+                tcp_init_cwnd_reduction(sk);
        }
        tcp_set_ca_state(sk, TCP_CA_Recovery);
 }
@@ -2680,7 +2687,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 */
 static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
 {
-        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        bool recovered = !before(tp->snd_una, tp->high_seq);
@@ -2706,12 +2712,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
        if (recovered) {
                /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
-                icsk->icsk_retransmits = 0;
                tcp_try_undo_recovery(sk);
                return;
        }
-        if (flag & FLAG_DATA_ACKED)
-                icsk->icsk_retransmits = 0;
        if (tcp_is_reno(tp)) {
                /* A Reno DUPACK means new data in F-RTO step 2.b above are
                 * delivered. Lower inflight to clock out (re)tranmissions.
@@ -3043,10 +3046,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        first_ackt.v64 = 0;
        while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+                struct skb_shared_info *shinfo = skb_shinfo(skb);
                struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
                u8 sacked = scb->sacked;
                u32 acked_pcount;
+                if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
+                    between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
+                        __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
                /* Determine how many packets and what bytes were acked, tso and else */
                if (after(scb->end_seq, tp->snd_una)) {
                        if (tcp_skb_pcount(skb) == 1 ||
@@ -3346,7 +3354,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
                tp->tlp_high_seq = 0;
                /* Don't reduce cwnd if DSACK arrives for TLP retrans. */
                if (!(flag & FLAG_DSACKING_ACK)) {
-                        tcp_init_cwnd_reduction(sk, true);
+                        tcp_init_cwnd_reduction(sk);
                        tcp_set_ca_state(sk, TCP_CA_CWR);
                        tcp_end_cwnd_reduction(sk);
                        tcp_try_keep_open(sk);
@@ -3393,8 +3401,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
                tcp_rearm_rto(sk);
-        if (after(ack, prior_snd_una))
+        if (after(ack, prior_snd_una)) {
                flag |= FLAG_SND_UNA_ADVANCED;
+                icsk->icsk_retransmits = 0;
+        }
        prior_fackets = tp->fackets_out;
@@ -5877,3 +5887,156 @@ discard:
        return 0;
 }
 EXPORT_SYMBOL(tcp_rcv_state_process);
+static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
+{
+        struct inet_request_sock *ireq = inet_rsk(req);
+        if (family == AF_INET)
+                LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
+                               &ireq->ir_rmt_addr, port);
+#if IS_ENABLED(CONFIG_IPV6)
+        else if (family == AF_INET6)
+                LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"),
+                               &ireq->ir_v6_rmt_addr, port);
+#endif
+}
+int tcp_conn_request(struct request_sock_ops *rsk_ops,
+                     const struct tcp_request_sock_ops *af_ops,
+                     struct sock *sk, struct sk_buff *skb)
+{
+        struct tcp_options_received tmp_opt;
+        struct request_sock *req;
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct dst_entry *dst = NULL;
+        __u32 isn = TCP_SKB_CB(skb)->when;
+        bool want_cookie = false, fastopen;
+        struct flowi fl;
+        struct tcp_fastopen_cookie foc = { .len = -1 };
+        int err;
+        /* TW buckets are converted to open requests without
+         * limitations, they conserve resources and peer is
+         * evidently real one.
+         */
+        if ((sysctl_tcp_syncookies == 2 ||
+             inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+                want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
+                if (!want_cookie)
+                        goto drop;
+        }
+        /* Accept backlog is full. If we have already queued enough
+         * of warm entries in syn queue, drop request. It is better than
+         * clogging syn queue with openreqs with exponentially increasing
+         * timeout.
+         */
+        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+                goto drop;
+        }
+        req = inet_reqsk_alloc(rsk_ops);
+        if (!req)
+                goto drop;
+        tcp_rsk(req)->af_specific = af_ops;
+        tcp_clear_options(&tmp_opt);
+        tmp_opt.mss_clamp = af_ops->mss_clamp;
+        tmp_opt.user_mss  = tp->rx_opt.user_mss;
+        tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
+        if (want_cookie && !tmp_opt.saw_tstamp)
+                tcp_clear_options(&tmp_opt);
+        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+        tcp_openreq_init(req, &tmp_opt, skb, sk);
+        af_ops->init_req(req, sk, skb);
+        if (security_inet_conn_request(sk, skb, req))
+                goto drop_and_free;
+        if (!want_cookie || tmp_opt.tstamp_ok)
+                TCP_ECN_create_request(req, skb, sock_net(sk));
+        if (want_cookie) {
+                isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+                req->cookie_ts = tmp_opt.tstamp_ok;
+        } else if (!isn) {
+                /* VJ's idea. We save last timestamp seen
+                 * from the destination in peer table, when entering
+                 * state TIME-WAIT, and check against it before
+                 * accepting new connection request.
+                 *
+                 * If "isn" is not zero, this request hit alive
+                 * timewait bucket, so that all the necessary checks
+                 * are made in the function processing timewait state.
+                 */
+                if (tcp_death_row.sysctl_tw_recycle) {
+                        bool strict;
+                        dst = af_ops->route_req(sk, &fl, req, &strict);
+                        if (dst && strict &&
+                            !tcp_peer_is_proven(req, dst, true,
+                                                tmp_opt.saw_tstamp)) {
+                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+                                goto drop_and_release;
+                        }
+                }
+                /* Kill the following clause, if you dislike this way. */
+                else if (!sysctl_tcp_syncookies &&
+                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+                          (sysctl_max_syn_backlog >> 2)) &&
+                         !tcp_peer_is_proven(req, dst, false,
+                                             tmp_opt.saw_tstamp)) {
+                        /* Without syncookies last quarter of
+                         * backlog is filled with destinations,
+                         * proven to be alive.
+                         * It means that we continue to communicate
+                         * to destinations, already remembered
+                         * to the moment of synflood.
+                         */
+                        pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
+                                    rsk_ops->family);
+                        goto drop_and_release;
+                }
+                isn = af_ops->init_seq(skb);
+        }
+        if (!dst) {
+                dst = af_ops->route_req(sk, &fl, req, NULL);
+                if (!dst)
+                        goto drop_and_free;
+        }
+        tcp_rsk(req)->snt_isn = isn;
+        tcp_openreq_init_rwin(req, sk, dst);
+        fastopen = !want_cookie &&
+                   tcp_try_fastopen(sk, skb, req, &foc, dst);
+        err = af_ops->send_synack(sk, dst, &fl, req,
+                                  skb_get_queue_mapping(skb), &foc);
+        if (!fastopen) {
+                if (err || want_cookie)
+                        goto drop_and_free;
+                tcp_rsk(req)->listener = NULL;
+                af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+        }
+        return 0;
+drop_and_release:
+        dst_release(dst);
+drop_and_free:
+        reqsk_free(req);
+drop:
+        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+        return 0;
+}
+EXPORT_SYMBOL(tcp_conn_request);
author	James Morris <james.l.morris@oracle.com>	2014-11-19 05:32:12 -0500
committer	James Morris <james.l.morris@oracle.com>	2014-11-19 05:32:12 -0500
commit	b10778a00d40b3d9fdaaf5891e802794781ff71c (patch)
tree	6ba4cbac86eecedc3f30650e7f764ecf00c83898 /net/ipv4/tcp_input.c
parent	594081ee7145cc30a3977cb4e218f81213b63dc5 (diff)
parent	bfe01a5ba2490f299e1d2d5508cbbbadd897bbe9 (diff)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 40639c288dc2..a906e0200ff2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c
@@ -74,6 +74,7 @@
74	#include <linux/ipsec.h>	74	#include <linux/ipsec.h>
75	#include <asm/unaligned.h>	75	#include <asm/unaligned.h>
76	#include <net/netdma.h>	76	#include <net/netdma.h>
		77	#include <linux/errqueue.h>
77		78
78	int sysctl_tcp_timestamps __read_mostly = 1;	79	int sysctl_tcp_timestamps __read_mostly = 1;
79	int sysctl_tcp_window_scaling __read_mostly = 1;	80	int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -1904,16 +1905,17 @@ void tcp_clear_retrans(struct tcp_sock *tp)
1904	tp->sacked_out = 0;	1905	tp->sacked_out = 0;
1905	}	1906	}
1906		1907
1907	/* Enter Loss state. If "how" is not zero, forget all SACK information	1908	/* Enter Loss state. If we detect SACK reneging, forget all SACK information
1908	* and reset tags completely, otherwise preserve SACKs. If receiver	1909	* and reset tags completely, otherwise preserve SACKs. If receiver
1909	* dropped its ofo queue, we will know this due to reneging detection.	1910	* dropped its ofo queue, we will know this due to reneging detection.
1910	*/	1911	*/
1911	void tcp_enter_loss(struct sock *sk, int how)	1912	void tcp_enter_loss(struct sock *sk)
1912	{	1913	{
1913	const struct inet_connection_sock *icsk = inet_csk(sk);	1914	const struct inet_connection_sock *icsk = inet_csk(sk);
1914	struct tcp_sock *tp = tcp_sk(sk);	1915	struct tcp_sock *tp = tcp_sk(sk);
1915	struct sk_buff *skb;	1916	struct sk_buff *skb;
1916	bool new_recovery = false;	1917	bool new_recovery = false;
		1918	bool is_reneg; /* is receiver reneging on SACKs? */
1917		1919
1918	/* Reduce ssthresh if it has not yet been made inside this window. */	1920	/* Reduce ssthresh if it has not yet been made inside this window. */
1919	if (icsk->icsk_ca_state <= TCP_CA_Disorder \|\|	1921	if (icsk->icsk_ca_state <= TCP_CA_Disorder \|\|
@@ -1934,7 +1936,11 @@ void tcp_enter_loss(struct sock *sk, int how)
1934	tcp_reset_reno_sack(tp);	1936	tcp_reset_reno_sack(tp);
1935		1937
1936	tp->undo_marker = tp->snd_una;	1938	tp->undo_marker = tp->snd_una;
1937	if (how) {	1939
		1940	skb = tcp_write_queue_head(sk);
		1941	is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
		1942	if (is_reneg) {
		1943	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1938	tp->sacked_out = 0;	1944	tp->sacked_out = 0;
1939	tp->fackets_out = 0;	1945	tp->fackets_out = 0;
1940	}	1946	}
@@ -1948,7 +1954,7 @@ void tcp_enter_loss(struct sock *sk, int how)
1948	tp->undo_marker = 0;	1954	tp->undo_marker = 0;
1949		1955
1950	TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)\|TCPCB_SACKED_ACKED;	1956	TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)\|TCPCB_SACKED_ACKED;
1951	if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) \|\| how) {	1957	if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) \|\| is_reneg) {
1952	TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;	1958	TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1953	TCP_SKB_CB(skb)->sacked \|= TCPCB_LOST;	1959	TCP_SKB_CB(skb)->sacked \|= TCPCB_LOST;
1954	tp->lost_out += tcp_skb_pcount(skb);	1960	tp->lost_out += tcp_skb_pcount(skb);
@@ -1981,19 +1987,21 @@ void tcp_enter_loss(struct sock *sk, int how)
1981	* remembered SACKs do not reflect real state of receiver i.e.	1987	* remembered SACKs do not reflect real state of receiver i.e.
1982	* receiver _host_ is heavily congested (or buggy).	1988	* receiver _host_ is heavily congested (or buggy).
1983	*	1989	*
1984	* Do processing similar to RTO timeout.	1990	* To avoid big spurious retransmission bursts due to transient SACK
		1991	* scoreboard oddities that look like reneging, we give the receiver a
		1992	* little time (max(RTT/2, 10ms)) to send us some more ACKs that will
		1993	* restore sanity to the SACK scoreboard. If the apparent reneging
		1994	* persists until this RTO then we'll clear the SACK scoreboard.
1985	*/	1995	*/
1986	static bool tcp_check_sack_reneging(struct sock *sk, int flag)	1996	static bool tcp_check_sack_reneging(struct sock *sk, int flag)
1987	{	1997	{
1988	if (flag & FLAG_SACK_RENEGING) {	1998	if (flag & FLAG_SACK_RENEGING) {
1989	struct inet_connection_sock *icsk = inet_csk(sk);	1999	struct tcp_sock *tp = tcp_sk(sk);
1990	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);	2000	unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
		2001	msecs_to_jiffies(10));
1991		2002
1992	tcp_enter_loss(sk, 1);
1993	icsk->icsk_retransmits++;
1994	tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
1995	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,	2003	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1996	icsk->icsk_rto, TCP_RTO_MAX);	2004	delay, TCP_RTO_MAX);
1997	return true;	2005	return true;
1998	}	2006	}
1999	return false;	2007	return false;
@@ -2475,7 +2483,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2475	* losses and/or application stalls), do not perform any further cwnd	2483	* losses and/or application stalls), do not perform any further cwnd
2476	* reductions, but instead slow start up to ssthresh.	2484	* reductions, but instead slow start up to ssthresh.
2477	*/	2485	*/
2478	static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)	2486	static void tcp_init_cwnd_reduction(struct sock *sk)
2479	{	2487	{
2480	struct tcp_sock *tp = tcp_sk(sk);	2488	struct tcp_sock *tp = tcp_sk(sk);
2481		2489
@@ -2485,8 +2493,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2485	tp->prior_cwnd = tp->snd_cwnd;	2493	tp->prior_cwnd = tp->snd_cwnd;
2486	tp->prr_delivered = 0;	2494	tp->prr_delivered = 0;
2487	tp->prr_out = 0;	2495	tp->prr_out = 0;
2488	if (set_ssthresh)	2496	tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2489	tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2490	TCP_ECN_queue_cwr(tp);	2497	TCP_ECN_queue_cwr(tp);
2491	}	2498	}
2492		2499
@@ -2528,14 +2535,14 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
2528	}	2535	}
2529		2536
2530	/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */	2537	/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
2531	void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)	2538	void tcp_enter_cwr(struct sock *sk)
2532	{	2539	{
2533	struct tcp_sock *tp = tcp_sk(sk);	2540	struct tcp_sock *tp = tcp_sk(sk);
2534		2541
2535	tp->prior_ssthresh = 0;	2542	tp->prior_ssthresh = 0;
2536	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {	2543	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2537	tp->undo_marker = 0;	2544	tp->undo_marker = 0;
2538	tcp_init_cwnd_reduction(sk, set_ssthresh);	2545	tcp_init_cwnd_reduction(sk);
2539	tcp_set_ca_state(sk, TCP_CA_CWR);	2546	tcp_set_ca_state(sk, TCP_CA_CWR);
2540	}	2547	}
2541	}	2548	}
@@ -2564,7 +2571,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2564	tp->retrans_stamp = 0;	2571	tp->retrans_stamp = 0;
2565		2572
2566	if (flag & FLAG_ECE)	2573	if (flag & FLAG_ECE)
2567	tcp_enter_cwr(sk, 1);	2574	tcp_enter_cwr(sk);
2568		2575
2569	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {	2576	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2570	tcp_try_keep_open(sk);	2577	tcp_try_keep_open(sk);
@@ -2670,7 +2677,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2670	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {	2677	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2671	if (!ece_ack)	2678	if (!ece_ack)
2672	tp->prior_ssthresh = tcp_current_ssthresh(sk);	2679	tp->prior_ssthresh = tcp_current_ssthresh(sk);
2673	tcp_init_cwnd_reduction(sk, true);	2680	tcp_init_cwnd_reduction(sk);
2674	}	2681	}
2675	tcp_set_ca_state(sk, TCP_CA_Recovery);	2682	tcp_set_ca_state(sk, TCP_CA_Recovery);
2676	}	2683	}
@@ -2680,7 +2687,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2680	*/	2687	*/
2681	static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)	2688	static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2682	{	2689	{
2683	struct inet_connection_sock *icsk = inet_csk(sk);
2684	struct tcp_sock *tp = tcp_sk(sk);	2690	struct tcp_sock *tp = tcp_sk(sk);
2685	bool recovered = !before(tp->snd_una, tp->high_seq);	2691	bool recovered = !before(tp->snd_una, tp->high_seq);
2686		2692
@@ -2706,12 +2712,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2706		2712
2707	if (recovered) {	2713	if (recovered) {
2708	/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */	2714	/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
2709	icsk->icsk_retransmits = 0;
2710	tcp_try_undo_recovery(sk);	2715	tcp_try_undo_recovery(sk);
2711	return;	2716	return;
2712	}	2717	}
2713	if (flag & FLAG_DATA_ACKED)
2714	icsk->icsk_retransmits = 0;
2715	if (tcp_is_reno(tp)) {	2718	if (tcp_is_reno(tp)) {
2716	/* A Reno DUPACK means new data in F-RTO step 2.b above are	2719	/* A Reno DUPACK means new data in F-RTO step 2.b above are
2717	* delivered. Lower inflight to clock out (re)tranmissions.	2720	* delivered. Lower inflight to clock out (re)tranmissions.
@@ -3043,10 +3046,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3043	first_ackt.v64 = 0;	3046	first_ackt.v64 = 0;
3044		3047
3045	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {	3048	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
		3049	struct skb_shared_info *shinfo = skb_shinfo(skb);
3046	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);	3050	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3047	u8 sacked = scb->sacked;	3051	u8 sacked = scb->sacked;
3048	u32 acked_pcount;	3052	u32 acked_pcount;
3049		3053
		3054	if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
		3055	between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
		3056	__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
		3057
3050	/* Determine how many packets and what bytes were acked, tso and else */	3058	/* Determine how many packets and what bytes were acked, tso and else */
3051	if (after(scb->end_seq, tp->snd_una)) {	3059	if (after(scb->end_seq, tp->snd_una)) {
3052	if (tcp_skb_pcount(skb) == 1 \|\|	3060	if (tcp_skb_pcount(skb) == 1 \|\|
@@ -3346,7 +3354,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3346	tp->tlp_high_seq = 0;	3354	tp->tlp_high_seq = 0;
3347	/* Don't reduce cwnd if DSACK arrives for TLP retrans. */	3355	/* Don't reduce cwnd if DSACK arrives for TLP retrans. */
3348	if (!(flag & FLAG_DSACKING_ACK)) {	3356	if (!(flag & FLAG_DSACKING_ACK)) {
3349	tcp_init_cwnd_reduction(sk, true);	3357	tcp_init_cwnd_reduction(sk);
3350	tcp_set_ca_state(sk, TCP_CA_CWR);	3358	tcp_set_ca_state(sk, TCP_CA_CWR);
3351	tcp_end_cwnd_reduction(sk);	3359	tcp_end_cwnd_reduction(sk);
3352	tcp_try_keep_open(sk);	3360	tcp_try_keep_open(sk);
@@ -3393,8 +3401,10 @@ static int tcp_ack(struct sock sk, const struct sk_buff skb, int flag)
3393	icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)	3401	icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3394	tcp_rearm_rto(sk);	3402	tcp_rearm_rto(sk);
3395		3403
3396	if (after(ack, prior_snd_una))	3404	if (after(ack, prior_snd_una)) {
3397	flag \|= FLAG_SND_UNA_ADVANCED;	3405	flag \|= FLAG_SND_UNA_ADVANCED;
		3406	icsk->icsk_retransmits = 0;
		3407	}
3398		3408
3399	prior_fackets = tp->fackets_out;	3409	prior_fackets = tp->fackets_out;
3400		3410
@@ -5877,3 +5887,156 @@ discard:
5877	return 0;	5887	return 0;
5878	}	5888	}
5879	EXPORT_SYMBOL(tcp_rcv_state_process);	5889	EXPORT_SYMBOL(tcp_rcv_state_process);
		5890
		5891	static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
		5892	{
		5893	struct inet_request_sock *ireq = inet_rsk(req);
		5894
		5895	if (family == AF_INET)
		5896	LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
		5897	&ireq->ir_rmt_addr, port);
		5898	#if IS_ENABLED(CONFIG_IPV6)
		5899	else if (family == AF_INET6)
		5900	LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"),
		5901	&ireq->ir_v6_rmt_addr, port);
		5902	#endif
		5903	}
		5904
		5905	int tcp_conn_request(struct request_sock_ops *rsk_ops,
		5906	const struct tcp_request_sock_ops *af_ops,
		5907	struct sock sk, struct sk_buff skb)
		5908	{
		5909	struct tcp_options_received tmp_opt;
		5910	struct request_sock *req;
		5911	struct tcp_sock *tp = tcp_sk(sk);
		5912	struct dst_entry *dst = NULL;
		5913	__u32 isn = TCP_SKB_CB(skb)->when;
		5914	bool want_cookie = false, fastopen;
		5915	struct flowi fl;
		5916	struct tcp_fastopen_cookie foc = { .len = -1 };
		5917	int err;
		5918
		5919
		5920	/* TW buckets are converted to open requests without
		5921	* limitations, they conserve resources and peer is
		5922	* evidently real one.
		5923	*/
		5924	if ((sysctl_tcp_syncookies == 2 \|\|
		5925	inet_csk_reqsk_queue_is_full(sk)) && !isn) {
		5926	want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
		5927	if (!want_cookie)
		5928	goto drop;
		5929	}
		5930
		5931
		5932	/* Accept backlog is full. If we have already queued enough
		5933	* of warm entries in syn queue, drop request. It is better than
		5934	* clogging syn queue with openreqs with exponentially increasing
		5935	* timeout.
		5936	*/
		5937	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
		5938	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
		5939	goto drop;
		5940	}
		5941
		5942	req = inet_reqsk_alloc(rsk_ops);
		5943	if (!req)
		5944	goto drop;
		5945
		5946	tcp_rsk(req)->af_specific = af_ops;
		5947
		5948	tcp_clear_options(&tmp_opt);
		5949	tmp_opt.mss_clamp = af_ops->mss_clamp;
		5950	tmp_opt.user_mss = tp->rx_opt.user_mss;
		5951	tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
		5952
		5953	if (want_cookie && !tmp_opt.saw_tstamp)
		5954	tcp_clear_options(&tmp_opt);
		5955
		5956	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
		5957	tcp_openreq_init(req, &tmp_opt, skb, sk);
		5958
		5959	af_ops->init_req(req, sk, skb);
		5960
		5961	if (security_inet_conn_request(sk, skb, req))
		5962	goto drop_and_free;
		5963
		5964	if (!want_cookie \|\| tmp_opt.tstamp_ok)
		5965	TCP_ECN_create_request(req, skb, sock_net(sk));
		5966
		5967	if (want_cookie) {
		5968	isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
		5969	req->cookie_ts = tmp_opt.tstamp_ok;
		5970	} else if (!isn) {
		5971	/* VJ's idea. We save last timestamp seen
		5972	* from the destination in peer table, when entering
		5973	* state TIME-WAIT, and check against it before
		5974	* accepting new connection request.
		5975	*
		5976	* If "isn" is not zero, this request hit alive
		5977	* timewait bucket, so that all the necessary checks
		5978	* are made in the function processing timewait state.
		5979	*/
		5980	if (tcp_death_row.sysctl_tw_recycle) {
		5981	bool strict;
		5982
		5983	dst = af_ops->route_req(sk, &fl, req, &strict);
		5984
		5985	if (dst && strict &&
		5986	!tcp_peer_is_proven(req, dst, true,
		5987	tmp_opt.saw_tstamp)) {
		5988	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
		5989	goto drop_and_release;
		5990	}
		5991	}
		5992	/* Kill the following clause, if you dislike this way. */
		5993	else if (!sysctl_tcp_syncookies &&
		5994	(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
		5995	(sysctl_max_syn_backlog >> 2)) &&
		5996	!tcp_peer_is_proven(req, dst, false,
		5997	tmp_opt.saw_tstamp)) {
		5998	/* Without syncookies last quarter of
		5999	* backlog is filled with destinations,
		6000	* proven to be alive.
		6001	* It means that we continue to communicate
		6002	* to destinations, already remembered
		6003	* to the moment of synflood.
		6004	*/
		6005	pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
		6006	rsk_ops->family);
		6007	goto drop_and_release;
		6008	}
		6009
		6010	isn = af_ops->init_seq(skb);
		6011	}
		6012	if (!dst) {
		6013	dst = af_ops->route_req(sk, &fl, req, NULL);
		6014	if (!dst)
		6015	goto drop_and_free;
		6016	}
		6017
		6018	tcp_rsk(req)->snt_isn = isn;
		6019	tcp_openreq_init_rwin(req, sk, dst);
		6020	fastopen = !want_cookie &&
		6021	tcp_try_fastopen(sk, skb, req, &foc, dst);
		6022	err = af_ops->send_synack(sk, dst, &fl, req,
		6023	skb_get_queue_mapping(skb), &foc);
		6024	if (!fastopen) {
		6025	if (err \|\| want_cookie)
		6026	goto drop_and_free;
		6027
		6028	tcp_rsk(req)->listener = NULL;
		6029	af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
		6030	}
		6031
		6032	return 0;
		6033
		6034	drop_and_release:
		6035	dst_release(dst);
		6036	drop_and_free:
		6037	reqsk_free(req);
		6038	drop:
		6039	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
		6040	return 0;
		6041	}
		6042	EXPORT_SYMBOL(tcp_conn_request);