aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-08-21 15:30:00 -0400
committerDavid S. Miller <davem@davemloft.net>2015-08-25 14:22:50 -0400
commit6f021c62d64f38092bc2a0c5fe7b81d5e5b21a00 (patch)
treed5029a2d5f2e4cd9df5f7b11d43cd72da0f520c0 /net/ipv4/tcp_output.c
parent56fff0a01fa056502a28d67cb5a2714d64780415 (diff)
tcp: fix slow start after idle vs TSO/GSO
slow start after idle might reduce cwnd, but we perform this after first packet was cooked and sent. With TSO/GSO, it means that we might send a full TSO packet even if cwnd should have been reduced to IW10. Moving the SSAI check in skb_entail() makes sense, because we slightly reduce number of times this check is done, especially for large send() and TCP Small queue callbacks from softirq context. As Neal pointed out, we also need to perform the check if/when receive window opens. Tested: Following packetdrill test demonstrates the problem // Test of slow start after idle `sysctl -q net.ipv4.tcp_slow_start_after_idle=1` 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 6> +.100 < . 1:1(0) ack 1 win 511 +0 accept(3, ..., ...) = 4 +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 +0 write(4, ..., 26000) = 26000 +0 > . 1:5001(5000) ack 1 +0 > . 5001:10001(5000) ack 1 +0 %{ assert tcpi_snd_cwnd == 10 }% +.100 < . 1:1(0) ack 10001 win 511 +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% +0 > . 10001:20001(10000) ack 1 +0 > P. 20001:26001(6000) ack 1 +.100 < . 1:1(0) ack 26001 win 511 +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% +4 write(4, ..., 20000) = 20000 // If slow start after idle works properly, we should send 5 MSS here (cwnd/2) +0 > . 26001:31001(5000) ack 1 +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% +0 > . 31001:36001(5000) ack 1 Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c12
1 files changed, 4 insertions, 8 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 444ab5beecbd..1188e4fcf23b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -137,12 +137,12 @@ static __u16 tcp_advertise_mss(struct sock *sk)
137} 137}
138 138
139/* RFC2861. Reset CWND after idle period longer RTO to "restart window". 139/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
140 * This is the first part of cwnd validation mechanism. */ 140 * This is the first part of cwnd validation mechanism.
141static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst) 141 */
142void tcp_cwnd_restart(struct sock *sk, s32 delta)
142{ 143{
143 struct tcp_sock *tp = tcp_sk(sk); 144 struct tcp_sock *tp = tcp_sk(sk);
144 s32 delta = tcp_time_stamp - tp->lsndtime; 145 u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
145 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
146 u32 cwnd = tp->snd_cwnd; 146 u32 cwnd = tp->snd_cwnd;
147 147
148 tcp_ca_event(sk, CA_EVENT_CWND_RESTART); 148 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
@@ -164,10 +164,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
164 struct inet_connection_sock *icsk = inet_csk(sk); 164 struct inet_connection_sock *icsk = inet_csk(sk);
165 const u32 now = tcp_time_stamp; 165 const u32 now = tcp_time_stamp;
166 166
167 if (sysctl_tcp_slow_start_after_idle &&
168 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
169 tcp_cwnd_restart(sk, __sk_dst_get(sk));
170
171 tp->lsndtime = now; 167 tp->lsndtime = now;
172 168
173 /* If it is a reply for ato after last received 169 /* If it is a reply for ato after last received