tcp: TSO packets automatic sizing

[ Upstream commits 6d36824e730f247b602c90e8715a792003e3c5a7, 02cf4ebd82ff0ac7254b88e466820a290ed8289a, and parts of 7eec4174ff29cd42f2acfae8112f51c228545d40 ] After hearing many people over past years complaining against TSO being bursty or even buggy, we are proud to present automatic sizing of TSO packets. One part of the problem is that tcp_tso_should_defer() uses an heuristic relying on upcoming ACKS instead of a timer, but more generally, having big TSO packets makes little sense for low rates, as it tends to create micro bursts on the network, and general consensus is to reduce the buffering amount. This patch introduces a per socket sk_pacing_rate, that approximates the current sending rate, and allows us to size the TSO packets so that we try to send one packet every ms. This field could be set by other transports. Patch has no impact for high speed flows, where having large TSO packets makes sense to reach line rate. For other flows, this helps better packet scheduling and ACK clocking. This patch increases performance of TCP flows in lossy environments. A new sysctl (tcp_min_tso_segs) is added, to specify the minimal size of a TSO packet (default being 2). A follow-up patch will provide a new packet scheduler (FQ), using sk_pacing_rate as an input to perform optional per flow pacing. This explains why we chose to set sk_pacing_rate to twice the current rate, allowing 'slow start' ramp up. sk_pacing_rate = 2 * cwnd * mss / srtt v2: Neal Cardwell reported a suspect deferring of last two segments on initial write of 10 MSS, I had to change tcp_tso_should_defer() to take into account tp->xmit_size_goal_segs Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Van Jacobson <vanj@google.com> Cc: Tom Herbert <therbert@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Eric Dumazet <edumazet@google.com> 2013-08-27 08:46:32 -0400
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2013-11-04 07:30:59 -0500
commit: 5e25ba5003ee5de0ba2be56bfd54d16d4b1b028d (patch)
tree: 7a953aea4b06398a6f3c66835d6e76b8048acaa4
parent: 14e9c7db465387ede7f019c42f28c90f99fc2793 (diff)
8 files changed, 80 insertions, 7 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 3458d6343e01..3994f0bbeeb6 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -478,6 +478,15 @@ tcp_syn_retries - INTEGER
 tcp_timestamps - BOOLEAN
        Enable timestamps as defined in RFC1323.
+tcp_min_tso_segs - INTEGER
+        Minimal number of segments per TSO frame.
+        Since linux-3.12, TCP does an automatic sizing of TSO frames,
+        depending on flow rate, instead of filling 64Kbytes packets.
+        For specific usages, it's possible to force TCP to build big
+        TSO frames. Note that TCP stack might split too big TSO packets
+        if available window is too small.
+        Default: 2
 tcp_tso_win_divisor - INTEGER
        This allows control over what percentage of the congestion window
        can be consumed by a single TSO frame.
diff --git a/include/net/sock.h b/include/net/sock.h
index 66772cf8c3c5..cec4c723db9a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -230,6 +230,7 @@ struct cg_proto;
  *     @sk_wmem_queued: persistent queue size
  *     @sk_forward_alloc: space allocated forward
  *     @sk_allocation: allocation mode
+  *     @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
  *     @sk_sndbuf: size of send buffer in bytes
  *     @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
  *                %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -355,6 +356,7 @@ struct sock {
        kmemcheck_bitfield_end(flags);
        int                     sk_wmem_queued;
        gfp_t                   sk_allocation;
+        u32                     sk_pacing_rate; /* bytes per second */
        netdev_features_t       sk_route_caps;
        netdev_features_t       sk_route_nocaps;
        int                     sk_gso_type;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5bba80fbd1d9..3fc77e90624a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -287,6 +287,7 @@ extern int sysctl_tcp_thin_dupack;
 extern int sysctl_tcp_early_retrans;
 extern int sysctl_tcp_limit_output_bytes;
 extern int sysctl_tcp_challenge_ack_limit;
+extern int sysctl_tcp_min_tso_segs;
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/core/sock.c b/net/core/sock.c
index d6d024cfaaaf..6565431b0e6d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2271,6 +2271,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_stamp = ktime_set(-1L, 0);
+        sk->sk_pacing_rate = ~0U;
        /*
         * Before updating sk_refcnt, we must commit prior changes to memory
         * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3f25e75ae692..90b26beb84d4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -29,6 +29,7 @@
 static int zero;
 static int one = 1;
 static int four = 4;
+static int gso_max_segs = GSO_MAX_SEGS;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -753,6 +754,15 @@ static struct ctl_table ipv4_table[] = {
                .extra2         = &four,
        },
        {
+                .procname       = "tcp_min_tso_segs",
+                .data           = &sysctl_tcp_min_tso_segs,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
+                .extra2         = &gso_max_segs,
+        },
+        {
                .procname       = "udp_mem",
                .data           = &sysctl_udp_mem,
                .maxlen         = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2b1b57f213b2..c888abf5a728 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,6 +282,8 @@
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
+int sysctl_tcp_min_tso_segs __read_mostly = 2;
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -786,12 +788,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
        xmit_size_goal = mss_now;
        if (large_allowed && sk_can_gso(sk)) {
-                xmit_size_goal = ((sk->sk_gso_max_size - 1) -
+                u32 gso_size, hlen;
-                                  inet_csk(sk)->icsk_af_ops->net_header_len -
-                                  inet_csk(sk)->icsk_ext_hdr_len -
+                /* Maybe we should/could use sk->sk_prot->max_header here ? */
-                                  tp->tcp_header_len);
+                hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+                       inet_csk(sk)->icsk_ext_hdr_len +
+                       tp->tcp_header_len;
+                /* Goal is to send at least one packet per ms,
+                 * not one big TSO packet every 100 ms.
+                 * This preserves ACK clocking and is consistent
+                 * with tcp_tso_should_defer() heuristic.
+                 */
+                gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
+                gso_size = max_t(u32, gso_size,
+                                 sysctl_tcp_min_tso_segs * mss_now);
+                xmit_size_goal = min_t(u32, gso_size,
+                                       sk->sk_gso_max_size - 1 - hlen);
-                /* TSQ : try to have two TSO segments in flight */
+                /* TSQ : try to have at least two segments in flight
+                 * (one in NIC TX ring, another in Qdisc)
+                 */
                xmit_size_goal = min_t(u32, xmit_size_goal,
                                       sysctl_tcp_limit_output_bytes >> 1);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4b75aad14b04..70883b87bc5d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -699,6 +699,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
        }
 }
+/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
+ * Note: TCP stack does not yet implement pacing.
+ * FQ packet scheduler can be used to implement cheap but effective
+ * TCP pacing, to smooth the burst on large writes when packets
+ * in flight is significantly lower than cwnd (or rwin)
+ */
+static void tcp_update_pacing_rate(struct sock *sk)
+{
+        const struct tcp_sock *tp = tcp_sk(sk);
+        u64 rate;
+        /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
+        rate = (u64)tp->mss_cache * 2 * (HZ << 3);
+        rate *= max(tp->snd_cwnd, tp->packets_out);
+        /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
+         * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
+         * We probably need usec resolution in the future.
+         * Note: This also takes care of possible srtt=0 case,
+         * when tcp_rtt_estimator() was not yet called.
+         */
+        if (tp->srtt > 8 + 2)
+                do_div(rate, tp->srtt);
+        sk->sk_pacing_rate = min_t(u64, rate, ~0U);
+}
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
 * routine referred to above.
 */
@@ -3330,7 +3358,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        u32 ack_seq = TCP_SKB_CB(skb)->seq;
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
        bool is_dupack = false;
-        u32 prior_in_flight;
+        u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
        u32 prior_fackets;
        int prior_packets = tp->packets_out;
        int prior_sacked = tp->sacked_out;
@@ -3438,6 +3466,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        if (icsk->icsk_pending == ICSK_TIME_RETRANS)
                tcp_schedule_loss_probe(sk);
+        if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
+                tcp_update_pacing_rate(sk);
        return 1;
 no_queue:
@@ -5736,6 +5766,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                } else
                                        tcp_init_metrics(sk);
+                                tcp_update_pacing_rate(sk);
                                /* Prevent spurious tcp_cwnd_restart() on
                                 * first data packet.
                                 */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0145ce7e6098..400b811f5c06 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1623,7 +1623,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
        /* If a full-sized TSO skb can be sent, do it. */
        if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
-                           sk->sk_gso_max_segs * tp->mss_cache))
+                           tp->xmit_size_goal_segs * tp->mss_cache))
                goto send_now;
        /* Middle in queue won't get any more data, full sendable already? */
author	Eric Dumazet <edumazet@google.com>	2013-08-27 08:46:32 -0400
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2013-11-04 07:30:59 -0500
commit	5e25ba5003ee5de0ba2be56bfd54d16d4b1b028d (patch)
tree	7a953aea4b06398a6f3c66835d6e76b8048acaa4
parent	14e9c7db465387ede7f019c42f28c90f99fc2793 (diff)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3458d6343e01..3994f0bbeeb6 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt
@@ -478,6 +478,15 @@ tcp_syn_retries - INTEGER
478	tcp_timestamps - BOOLEAN	478	tcp_timestamps - BOOLEAN
479	Enable timestamps as defined in RFC1323.	479	Enable timestamps as defined in RFC1323.
480		480
		481	tcp_min_tso_segs - INTEGER
		482	Minimal number of segments per TSO frame.
		483	Since linux-3.12, TCP does an automatic sizing of TSO frames,
		484	depending on flow rate, instead of filling 64Kbytes packets.
		485	For specific usages, it's possible to force TCP to build big
		486	TSO frames. Note that TCP stack might split too big TSO packets
		487	if available window is too small.
		488	Default: 2
		489
481	tcp_tso_win_divisor - INTEGER	490	tcp_tso_win_divisor - INTEGER
482	This allows control over what percentage of the congestion window	491	This allows control over what percentage of the congestion window
483	can be consumed by a single TSO frame.	492	can be consumed by a single TSO frame.


diff --git a/include/net/sock.h b/include/net/sock.h index 66772cf8c3c5..cec4c723db9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h
@@ -230,6 +230,7 @@ struct cg_proto;
230	* @sk_wmem_queued: persistent queue size	230	* @sk_wmem_queued: persistent queue size
231	* @sk_forward_alloc: space allocated forward	231	* @sk_forward_alloc: space allocated forward
232	* @sk_allocation: allocation mode	232	* @sk_allocation: allocation mode
		233	* @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
233	* @sk_sndbuf: size of send buffer in bytes	234	* @sk_sndbuf: size of send buffer in bytes
234	* @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,	235	* @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
235	* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings	236	* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -355,6 +356,7 @@ struct sock {
355	kmemcheck_bitfield_end(flags);	356	kmemcheck_bitfield_end(flags);
356	int sk_wmem_queued;	357	int sk_wmem_queued;
357	gfp_t sk_allocation;	358	gfp_t sk_allocation;
		359	u32 sk_pacing_rate; /* bytes per second */
358	netdev_features_t sk_route_caps;	360	netdev_features_t sk_route_caps;
359	netdev_features_t sk_route_nocaps;	361	netdev_features_t sk_route_nocaps;
360	int sk_gso_type;	362	int sk_gso_type;


diff --git a/include/net/tcp.h b/include/net/tcp.h index 5bba80fbd1d9..3fc77e90624a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h
@@ -287,6 +287,7 @@ extern int sysctl_tcp_thin_dupack;
287	extern int sysctl_tcp_early_retrans;	287	extern int sysctl_tcp_early_retrans;
288	extern int sysctl_tcp_limit_output_bytes;	288	extern int sysctl_tcp_limit_output_bytes;
289	extern int sysctl_tcp_challenge_ack_limit;	289	extern int sysctl_tcp_challenge_ack_limit;
		290	extern int sysctl_tcp_min_tso_segs;
290		291
291	extern atomic_long_t tcp_memory_allocated;	292	extern atomic_long_t tcp_memory_allocated;
292	extern struct percpu_counter tcp_sockets_allocated;	293	extern struct percpu_counter tcp_sockets_allocated;


diff --git a/net/core/sock.c b/net/core/sock.c index d6d024cfaaaf..6565431b0e6d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c
@@ -2271,6 +2271,7 @@ void sock_init_data(struct socket sock, struct sock sk)
2271		2271
2272	sk->sk_stamp = ktime_set(-1L, 0);	2272	sk->sk_stamp = ktime_set(-1L, 0);
2273		2273
		2274	sk->sk_pacing_rate = ~0U;
2274	/*	2275	/*
2275	* Before updating sk_refcnt, we must commit prior changes to memory	2276	* Before updating sk_refcnt, we must commit prior changes to memory
2276	* (Documentation/RCU/rculist_nulls.txt for details)	2277	* (Documentation/RCU/rculist_nulls.txt for details)


diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3f25e75ae692..90b26beb84d4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c
@@ -29,6 +29,7 @@
29	static int zero;	29	static int zero;
30	static int one = 1;	30	static int one = 1;
31	static int four = 4;	31	static int four = 4;
		32	static int gso_max_segs = GSO_MAX_SEGS;
32	static int tcp_retr1_max = 255;	33	static int tcp_retr1_max = 255;
33	static int ip_local_port_range_min[] = { 1, 1 };	34	static int ip_local_port_range_min[] = { 1, 1 };
34	static int ip_local_port_range_max[] = { 65535, 65535 };	35	static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -753,6 +754,15 @@ static struct ctl_table ipv4_table[] = {
753	.extra2 = &four,	754	.extra2 = &four,
754	},	755	},
755	{	756	{
		757	.procname = "tcp_min_tso_segs",
		758	.data = &sysctl_tcp_min_tso_segs,
		759	.maxlen = sizeof(int),
		760	.mode = 0644,
		761	.proc_handler = proc_dointvec_minmax,
		762	.extra1 = &zero,
		763	.extra2 = &gso_max_segs,
		764	},
		765	{
756	.procname = "udp_mem",	766	.procname = "udp_mem",
757	.data = &sysctl_udp_mem,	767	.data = &sysctl_udp_mem,
758	.maxlen = sizeof(sysctl_udp_mem),	768	.maxlen = sizeof(sysctl_udp_mem),


diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2b1b57f213b2..c888abf5a728 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c
@@ -282,6 +282,8 @@
282		282
283	int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;	283	int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
284		284
		285	int sysctl_tcp_min_tso_segs __read_mostly = 2;
		286
285	struct percpu_counter tcp_orphan_count;	287	struct percpu_counter tcp_orphan_count;
286	EXPORT_SYMBOL_GPL(tcp_orphan_count);	288	EXPORT_SYMBOL_GPL(tcp_orphan_count);
287		289
@@ -786,12 +788,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
786	xmit_size_goal = mss_now;	788	xmit_size_goal = mss_now;
787		789
788	if (large_allowed && sk_can_gso(sk)) {	790	if (large_allowed && sk_can_gso(sk)) {
789	xmit_size_goal = ((sk->sk_gso_max_size - 1) -	791	u32 gso_size, hlen;
790	inet_csk(sk)->icsk_af_ops->net_header_len -	792
791	inet_csk(sk)->icsk_ext_hdr_len -	793	/* Maybe we should/could use sk->sk_prot->max_header here ? */
792	tp->tcp_header_len);	794	hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
		795	inet_csk(sk)->icsk_ext_hdr_len +
		796	tp->tcp_header_len;
		797
		798	/* Goal is to send at least one packet per ms,
		799	* not one big TSO packet every 100 ms.
		800	* This preserves ACK clocking and is consistent
		801	* with tcp_tso_should_defer() heuristic.
		802	*/
		803	gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
		804	gso_size = max_t(u32, gso_size,
		805	sysctl_tcp_min_tso_segs * mss_now);
		806
		807	xmit_size_goal = min_t(u32, gso_size,
		808	sk->sk_gso_max_size - 1 - hlen);
793		809
794	/* TSQ : try to have two TSO segments in flight */	810	/* TSQ : try to have at least two segments in flight
		811	* (one in NIC TX ring, another in Qdisc)
		812	*/
795	xmit_size_goal = min_t(u32, xmit_size_goal,	813	xmit_size_goal = min_t(u32, xmit_size_goal,
796	sysctl_tcp_limit_output_bytes >> 1);	814	sysctl_tcp_limit_output_bytes >> 1);
797		815


diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4b75aad14b04..70883b87bc5d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c
@@ -699,6 +699,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
699	}	699	}
700	}	700	}
701		701
		702	/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
		703	* Note: TCP stack does not yet implement pacing.
		704	* FQ packet scheduler can be used to implement cheap but effective
		705	* TCP pacing, to smooth the burst on large writes when packets
		706	* in flight is significantly lower than cwnd (or rwin)
		707	*/
		708	static void tcp_update_pacing_rate(struct sock *sk)
		709	{
		710	const struct tcp_sock *tp = tcp_sk(sk);
		711	u64 rate;
		712
		713	/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
		714	rate = (u64)tp->mss_cache * 2 * (HZ << 3);
		715
		716	rate *= max(tp->snd_cwnd, tp->packets_out);
		717
		718	/* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
		719	* be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
		720	* We probably need usec resolution in the future.
		721	* Note: This also takes care of possible srtt=0 case,
		722	* when tcp_rtt_estimator() was not yet called.
		723	*/
		724	if (tp->srtt > 8 + 2)
		725	do_div(rate, tp->srtt);
		726
		727	sk->sk_pacing_rate = min_t(u64, rate, ~0U);
		728	}
		729
702	/* Calculate rto without backoff. This is the second half of Van Jacobson's	730	/* Calculate rto without backoff. This is the second half of Van Jacobson's
703	* routine referred to above.	731	* routine referred to above.
704	*/	732	*/
@@ -3330,7 +3358,7 @@ static int tcp_ack(struct sock sk, const struct sk_buff skb, int flag)
3330	u32 ack_seq = TCP_SKB_CB(skb)->seq;	3358	u32 ack_seq = TCP_SKB_CB(skb)->seq;
3331	u32 ack = TCP_SKB_CB(skb)->ack_seq;	3359	u32 ack = TCP_SKB_CB(skb)->ack_seq;
3332	bool is_dupack = false;	3360	bool is_dupack = false;
3333	u32 prior_in_flight;	3361	u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
3334	u32 prior_fackets;	3362	u32 prior_fackets;
3335	int prior_packets = tp->packets_out;	3363	int prior_packets = tp->packets_out;
3336	int prior_sacked = tp->sacked_out;	3364	int prior_sacked = tp->sacked_out;
@@ -3438,6 +3466,8 @@ static int tcp_ack(struct sock sk, const struct sk_buff skb, int flag)
3438		3466
3439	if (icsk->icsk_pending == ICSK_TIME_RETRANS)	3467	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3440	tcp_schedule_loss_probe(sk);	3468	tcp_schedule_loss_probe(sk);
		3469	if (tp->srtt != prior_rtt \|\| tp->snd_cwnd != prior_cwnd)
		3470	tcp_update_pacing_rate(sk);
3441	return 1;	3471	return 1;
3442		3472
3443	no_queue:	3473	no_queue:
@@ -5736,6 +5766,8 @@ int tcp_rcv_state_process(struct sock sk, struct sk_buff skb,
5736	} else	5766	} else
5737	tcp_init_metrics(sk);	5767	tcp_init_metrics(sk);
5738		5768
		5769	tcp_update_pacing_rate(sk);
		5770
5739	/* Prevent spurious tcp_cwnd_restart() on	5771	/* Prevent spurious tcp_cwnd_restart() on
5740	* first data packet.	5772	* first data packet.
5741	*/	5773	*/


diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0145ce7e6098..400b811f5c06 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c
@@ -1623,7 +1623,7 @@ static bool tcp_tso_should_defer(struct sock sk, struct sk_buff skb)
1623		1623
1624	/* If a full-sized TSO skb can be sent, do it. */	1624	/* If a full-sized TSO skb can be sent, do it. */
1625	if (limit >= min_t(unsigned int, sk->sk_gso_max_size,	1625	if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1626	sk->sk_gso_max_segs * tp->mss_cache))	1626	tp->xmit_size_goal_segs * tp->mss_cache))
1627	goto send_now;	1627	goto send_now;
1628		1628
1629	/* Middle in queue won't get any more data, full sendable already? */	1629	/* Middle in queue won't get any more data, full sendable already? */