summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-12-07 15:22:18 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-09 16:39:22 -0500
commit605ad7f184b60cfaacbc038aa6c55ee68dee3c89 (patch)
treee4c88937452f13283365fdcd4d1b5a900c6084a7 /net/ipv4/tcp.c
parent5e84e189ce1323978afebfba89d3c18cd3f3643c (diff)
tcp: refine TSO autosizing
Commit 95bd09eb2750 ("tcp: TSO packets automatic sizing") tried to control TSO size, but did this at the wrong place (sendmsg() time) At sendmsg() time, we might have a pessimistic view of flow rate, and we end up building very small skbs (with 2 MSS per skb). This is bad because : - It sends small TSO packets even in Slow Start where rate quickly increases. - It tends to make socket write queue very big, increasing tcp_ack() processing time, but also increasing memory needs, not necessarily accounted for, as fast clones overhead is currently ignored. - Lower GRO efficiency and more ACK packets. Servers with a lot of small lived connections suffer from this. Lets instead fill skbs as much as possible (64KB of payload), but split them at xmit time, when we have a precise idea of the flow rate. skb split is actually quite efficient. Patch looks bigger than necessary, because TCP Small Queue decision now has to take place after the eventual split. As Neal suggested, introduce a new tcp_tso_autosize() helper, so that tcp_tso_should_defer() can be synchronized on same goal. Rename tp->xmit_size_goal_segs to tp->gso_segs, as this variable contains number of mss that we can put in GSO packet, and is not related to the autosizing goal anymore. Tested: 40 ms rtt link nstat >/dev/null netperf -H remote -l -2000000 -- -s 1000000 nstat | egrep "IpInReceives|IpOutRequests|TcpOutSegs|IpExtOutOctets" Before patch : Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/s 87380 2000000 2000000 0.36 44.22 IpInReceives 600 0.0 IpOutRequests 599 0.0 TcpOutSegs 1397 0.0 IpExtOutOctets 2033249 0.0 After patch : Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 2000000 2000000 0.36 44.27 IpInReceives 221 0.0 IpOutRequests 232 0.0 TcpOutSegs 1397 0.0 IpExtOutOctets 2013953 0.0 Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c60
1 files changed, 21 insertions, 39 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dc13a3657e8e..427aee33ffc0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -835,47 +835,29 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
835 int large_allowed) 835 int large_allowed)
836{ 836{
837 struct tcp_sock *tp = tcp_sk(sk); 837 struct tcp_sock *tp = tcp_sk(sk);
838 u32 xmit_size_goal, old_size_goal; 838 u32 new_size_goal, size_goal, hlen;
839 839
840 xmit_size_goal = mss_now; 840 if (!large_allowed || !sk_can_gso(sk))
841 841 return mss_now;
842 if (large_allowed && sk_can_gso(sk)) { 842
843 u32 gso_size, hlen; 843 /* Maybe we should/could use sk->sk_prot->max_header here ? */
844 844 hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
845 /* Maybe we should/could use sk->sk_prot->max_header here ? */ 845 inet_csk(sk)->icsk_ext_hdr_len +
846 hlen = inet_csk(sk)->icsk_af_ops->net_header_len + 846 tp->tcp_header_len;
847 inet_csk(sk)->icsk_ext_hdr_len + 847
848 tp->tcp_header_len; 848 new_size_goal = sk->sk_gso_max_size - 1 - hlen;
849 849 new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
850 /* Goal is to send at least one packet per ms, 850
851 * not one big TSO packet every 100 ms. 851 /* We try hard to avoid divides here */
852 * This preserves ACK clocking and is consistent 852 size_goal = tp->gso_segs * mss_now;
853 * with tcp_tso_should_defer() heuristic. 853 if (unlikely(new_size_goal < size_goal ||
854 */ 854 new_size_goal >= size_goal + mss_now)) {
855 gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); 855 tp->gso_segs = min_t(u16, new_size_goal / mss_now,
856 gso_size = max_t(u32, gso_size, 856 sk->sk_gso_max_segs);
857 sysctl_tcp_min_tso_segs * mss_now); 857 size_goal = tp->gso_segs * mss_now;
858
859 xmit_size_goal = min_t(u32, gso_size,
860 sk->sk_gso_max_size - 1 - hlen);
861
862 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
863
864 /* We try hard to avoid divides here */
865 old_size_goal = tp->xmit_size_goal_segs * mss_now;
866
867 if (likely(old_size_goal <= xmit_size_goal &&
868 old_size_goal + mss_now > xmit_size_goal)) {
869 xmit_size_goal = old_size_goal;
870 } else {
871 tp->xmit_size_goal_segs =
872 min_t(u16, xmit_size_goal / mss_now,
873 sk->sk_gso_max_segs);
874 xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
875 }
876 } 858 }
877 859
878 return max(xmit_size_goal, mss_now); 860 return max(size_goal, mss_now);
879} 861}
880 862
881static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) 863static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)