tcp: refine TSO autosizing

Commit 95bd09eb2750 ("tcp: TSO packets automatic sizing") tried to control TSO size, but did this at the wrong place (sendmsg() time) At sendmsg() time, we might have a pessimistic view of flow rate, and we end up building very small skbs (with 2 MSS per skb). This is bad because : - It sends small TSO packets even in Slow Start where rate quickly increases. - It tends to make socket write queue very big, increasing tcp_ack() processing time, but also increasing memory needs, not necessarily accounted for, as fast clones overhead is currently ignored. - Lower GRO efficiency and more ACK packets. Servers with a lot of small lived connections suffer from this. Lets instead fill skbs as much as possible (64KB of payload), but split them at xmit time, when we have a precise idea of the flow rate. skb split is actually quite efficient. Patch looks bigger than necessary, because TCP Small Queue decision now has to take place after the eventual split. As Neal suggested, introduce a new tcp_tso_autosize() helper, so that tcp_tso_should_defer() can be synchronized on same goal. Rename tp->xmit_size_goal_segs to tp->gso_segs, as this variable contains number of mss that we can put in GSO packet, and is not related to the autosizing goal anymore. Tested: 40 ms rtt link nstat >/dev/null netperf -H remote -l -2000000 -- -s 1000000 nstat | egrep "IpInReceives|IpOutRequests|TcpOutSegs|IpExtOutOctets" Before patch : Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/s 87380 2000000 2000000 0.36 44.22 IpInReceives 600 0.0 IpOutRequests 599 0.0 TcpOutSegs 1397 0.0 IpExtOutOctets 2033249 0.0 After patch : Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 2000000 2000000 0.36 44.27 IpInReceives 221 0.0 IpOutRequests 232 0.0 TcpOutSegs 1397 0.0 IpExtOutOctets 2013953 0.0 Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <edumazet@google.com> 2014-12-07 15:22:18 -0500
committer: David S. Miller <davem@davemloft.net> 2014-12-09 16:39:22 -0500
commit: 605ad7f184b60cfaacbc038aa6c55ee68dee3c89 (patch)
tree: e4c88937452f13283365fdcd4d1b5a900c6084a7 /net/ipv4/tcp.c
parent: 5e84e189ce1323978afebfba89d3c18cd3f3643c (diff)
1 files changed, 21 insertions, 39 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dc13a3657e8e..427aee33ffc0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -835,47 +835,29 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
                                       int large_allowed)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        u32 xmit_size_goal, old_size_goal;
+        u32 new_size_goal, size_goal, hlen;
-        xmit_size_goal = mss_now;
+        if (!large_allowed || !sk_can_gso(sk))
+                return mss_now;
-        if (large_allowed && sk_can_gso(sk)) {
-                u32 gso_size, hlen;
+        /* Maybe we should/could use sk->sk_prot->max_header here ? */
+        hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
-                /* Maybe we should/could use sk->sk_prot->max_header here ? */
+               inet_csk(sk)->icsk_ext_hdr_len +
-                hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+               tp->tcp_header_len;
-                       inet_csk(sk)->icsk_ext_hdr_len +
-                       tp->tcp_header_len;
+        new_size_goal = sk->sk_gso_max_size - 1 - hlen;
+        new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
-                /* Goal is to send at least one packet per ms,
-                 * not one big TSO packet every 100 ms.
+        /* We try hard to avoid divides here */
-                 * This preserves ACK clocking and is consistent
+        size_goal = tp->gso_segs * mss_now;
-                 * with tcp_tso_should_defer() heuristic.
+        if (unlikely(new_size_goal < size_goal ||
-                 */
+                     new_size_goal >= size_goal + mss_now)) {
-                gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
+                tp->gso_segs = min_t(u16, new_size_goal / mss_now,
-                gso_size = max_t(u32, gso_size,
+                                     sk->sk_gso_max_segs);
-                                 sysctl_tcp_min_tso_segs * mss_now);
+                size_goal = tp->gso_segs * mss_now;
-                xmit_size_goal = min_t(u32, gso_size,
-                                       sk->sk_gso_max_size - 1 - hlen);
-                xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
-                /* We try hard to avoid divides here */
-                old_size_goal = tp->xmit_size_goal_segs * mss_now;
-                if (likely(old_size_goal <= xmit_size_goal &&
-                           old_size_goal + mss_now > xmit_size_goal)) {
-                        xmit_size_goal = old_size_goal;
-                } else {
-                        tp->xmit_size_goal_segs =
-                                min_t(u16, xmit_size_goal / mss_now,
-                                      sk->sk_gso_max_segs);
-                        xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
-                }
        }
-        return max(xmit_size_goal, mss_now);
+        return max(size_goal, mss_now);
 }
 static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
author	Eric Dumazet <edumazet@google.com>	2014-12-07 15:22:18 -0500
committer	David S. Miller <davem@davemloft.net>	2014-12-09 16:39:22 -0500
commit	605ad7f184b60cfaacbc038aa6c55ee68dee3c89 (patch)
tree	e4c88937452f13283365fdcd4d1b5a900c6084a7 /net/ipv4/tcp.c
parent	5e84e189ce1323978afebfba89d3c18cd3f3643c (diff)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dc13a3657e8e..427aee33ffc0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c
@@ -835,47 +835,29 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
835	int large_allowed)	835	int large_allowed)
836	{	836	{
837	struct tcp_sock *tp = tcp_sk(sk);	837	struct tcp_sock *tp = tcp_sk(sk);
838	u32 xmit_size_goal, old_size_goal;	838	u32 new_size_goal, size_goal, hlen;
839		839
840	xmit_size_goal = mss_now;	840	if (!large_allowed \|\| !sk_can_gso(sk))
841		841	return mss_now;
842	if (large_allowed && sk_can_gso(sk)) {	842
843	u32 gso_size, hlen;	843	/* Maybe we should/could use sk->sk_prot->max_header here ? */
844		844	hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
845	/* Maybe we should/could use sk->sk_prot->max_header here ? */	845	inet_csk(sk)->icsk_ext_hdr_len +
846	hlen = inet_csk(sk)->icsk_af_ops->net_header_len +	846	tp->tcp_header_len;
847	inet_csk(sk)->icsk_ext_hdr_len +	847
848	tp->tcp_header_len;	848	new_size_goal = sk->sk_gso_max_size - 1 - hlen;
849		849	new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
850	/* Goal is to send at least one packet per ms,	850
851	* not one big TSO packet every 100 ms.	851	/* We try hard to avoid divides here */
852	* This preserves ACK clocking and is consistent	852	size_goal = tp->gso_segs * mss_now;
853	* with tcp_tso_should_defer() heuristic.	853	if (unlikely(new_size_goal < size_goal \|\|
854	*/	854	new_size_goal >= size_goal + mss_now)) {
855	gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);	855	tp->gso_segs = min_t(u16, new_size_goal / mss_now,
856	gso_size = max_t(u32, gso_size,	856	sk->sk_gso_max_segs);
857	sysctl_tcp_min_tso_segs * mss_now);	857	size_goal = tp->gso_segs * mss_now;
858
859	xmit_size_goal = min_t(u32, gso_size,
860	sk->sk_gso_max_size - 1 - hlen);
861
862	xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
863
864	/* We try hard to avoid divides here */
865	old_size_goal = tp->xmit_size_goal_segs * mss_now;
866
867	if (likely(old_size_goal <= xmit_size_goal &&
868	old_size_goal + mss_now > xmit_size_goal)) {
869	xmit_size_goal = old_size_goal;
870	} else {
871	tp->xmit_size_goal_segs =
872	min_t(u16, xmit_size_goal / mss_now,
873	sk->sk_gso_max_segs);
874	xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
875	}
876	}	858	}
877		859
878	return max(xmit_size_goal, mss_now);	860	return max(size_goal, mss_now);
879	}	861	}
880		862
881	static int tcp_send_mss(struct sock sk, int size_goal, int flags)	863	static int tcp_send_mss(struct sock sk, int size_goal, int flags)