aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-02-26 17:10:19 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-28 15:10:39 -0500
commit50c8339e9299aaf09e48c8f94de39baec638c874 (patch)
treebf177525f7a5bf2cc0fbf436a6354221ed5d4db3 /net/ipv4/tcp_output.c
parent5f852eb536ad651b8734559dcf4353514cb0bea3 (diff)
tcp: tso: restore IW10 after TSO autosizing
With sysctl_tcp_min_tso_segs being 4, it is very possible that tcp_tso_should_defer() decides not sending last 2 MSS of initial window of 10 packets. This also applies if autosizing decides to send X MSS per GSO packet, and cwnd is not a multiple of X. This patch implements an heuristic based on age of first skb in write queue : If it was sent very recently (less than half srtt), we can predict that no ACK packet will come in less than half rtt, so deferring might cause an under utilization of our window. This is visible on initial send (IW10) on web servers, but more generally on some RPC, as the last part of the message might need an extra RTT to get delivered. Tested: Ran following packetdrill test // A simple server-side test that sends exactly an initial window (IW10) // worth of packets. `sysctl -e -q net.ipv4.tcp_min_tso_segs=4` 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7> +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 6> +.1 < . 1:1(0) ack 1 win 257 +0 accept(3, ..., ...) = 4 +0 write(4, ..., 14600) = 14600 +0 > . 1:5841(5840) ack 1 win 457 +0 > . 5841:11681(5840) ack 1 win 457 // Following packet should be sent right now. +0 > P. 11681:14601(2920) ack 1 win 457 +.1 < . 1:1(0) ack 14601 win 257 +0 close(4) = 0 +0 > F. 14601:14601(0) ack 1 +.1 < F. 1:1(0) ack 14602 win 257 +0 > . 14602:14602(0) ack 2 Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cb95c7a9d1e7..5f4fb4d5bbd6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1752,9 +1752,11 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1752static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, 1752static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1753 bool *is_cwnd_limited, u32 max_segs) 1753 bool *is_cwnd_limited, u32 max_segs)
1754{ 1754{
1755 struct tcp_sock *tp = tcp_sk(sk);
1756 const struct inet_connection_sock *icsk = inet_csk(sk); 1755 const struct inet_connection_sock *icsk = inet_csk(sk);
1757 u32 send_win, cong_win, limit, in_flight; 1756 u32 age, send_win, cong_win, limit, in_flight;
1757 struct tcp_sock *tp = tcp_sk(sk);
1758 struct skb_mstamp now;
1759 struct sk_buff *head;
1758 int win_divisor; 1760 int win_divisor;
1759 1761
1760 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 1762 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -1808,6 +1810,13 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1808 goto send_now; 1810 goto send_now;
1809 } 1811 }
1810 1812
1813 head = tcp_write_queue_head(sk);
1814 skb_mstamp_get(&now);
1815 age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
1816 /* If next ACK is likely to come too late (half srtt), do not defer */
1817 if (age < (tp->srtt_us >> 4))
1818 goto send_now;
1819
1811 /* Ok, it looks like it is advisable to defer. */ 1820 /* Ok, it looks like it is advisable to defer. */
1812 1821
1813 if (cong_win < send_win && cong_win < skb->len) 1822 if (cong_win < send_win && cong_win < skb->len)