diff options
author | Eric Dumazet <edumazet@google.com> | 2013-10-01 13:23:44 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-10-02 16:45:17 -0400 |
commit | 6ae705323b716ea7a8cc26bee79176398a9b2e89 (patch) | |
tree | e3b90b1519e5b90a6d3ec8b21493c804da185b76 /net | |
parent | bbe34cf8a1a2cc174e6516fc230b91b531da7ddf (diff) |
tcp: sndbuf autotuning improvements
tcp_fixup_sndbuf() is underestimating initial send buffer requirements.
It was not noticed because big GSO packets were escaping the limitation,
but with smaller TSO packets (or TSO/GSO/SG off), application hits
sk_sndbuf before having a chance to fill enough packets in socket write
queue.
- initial cwnd can be bigger than 10 for specific routes
- SKB_TRUESIZE() is a bit under real needs in some cases,
because of power-of-two rounding in kmalloc()
- Fast Recovery (RFC 5681 3.2) : Cubic needs 70% factor
- Extra cushion (application might react slowly to POLLOUT)
tcp_v4_conn_req_fastopen() needs to call tcp_init_metrics() before
calling tcp_init_buffer_space()
Then we realize tcp_new_space() should call tcp_fixup_sndbuf()
instead of duplicating this stuff.
Rename tcp_fixup_sndbuf() to tcp_sndbuf_expand() to be more
descriptive.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/tcp_input.c | 38 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 |
2 files changed, 26 insertions, 14 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 66aa816ad30b..cd65674ece92 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr | |||
267 | * 1. Tuning sk->sk_sndbuf, when connection enters established state. | 267 | * 1. Tuning sk->sk_sndbuf, when connection enters established state. |
268 | */ | 268 | */ |
269 | 269 | ||
270 | static void tcp_fixup_sndbuf(struct sock *sk) | 270 | static void tcp_sndbuf_expand(struct sock *sk) |
271 | { | 271 | { |
272 | int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); | 272 | const struct tcp_sock *tp = tcp_sk(sk); |
273 | int sndmem, per_mss; | ||
274 | u32 nr_segs; | ||
275 | |||
276 | /* Worst case is non GSO/TSO : each frame consumes one skb | ||
277 | * and skb->head is kmalloced using power of two area of memory | ||
278 | */ | ||
279 | per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + | ||
280 | MAX_TCP_HEADER + | ||
281 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | ||
282 | |||
283 | per_mss = roundup_pow_of_two(per_mss) + | ||
284 | SKB_DATA_ALIGN(sizeof(struct sk_buff)); | ||
285 | |||
286 | nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); | ||
287 | nr_segs = max_t(u32, nr_segs, tp->reordering + 1); | ||
288 | |||
289 | /* Fast Recovery (RFC 5681 3.2) : | ||
290 | * Cubic needs 1.7 factor, rounded to 2 to include | ||
291 | * extra cushion (application might react slowly to POLLOUT) | ||
292 | */ | ||
293 | sndmem = 2 * nr_segs * per_mss; | ||
273 | 294 | ||
274 | sndmem *= TCP_INIT_CWND; | ||
275 | if (sk->sk_sndbuf < sndmem) | 295 | if (sk->sk_sndbuf < sndmem) |
276 | sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); | 296 | sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); |
277 | } | 297 | } |
@@ -376,7 +396,7 @@ void tcp_init_buffer_space(struct sock *sk) | |||
376 | if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) | 396 | if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) |
377 | tcp_fixup_rcvbuf(sk); | 397 | tcp_fixup_rcvbuf(sk); |
378 | if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) | 398 | if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) |
379 | tcp_fixup_sndbuf(sk); | 399 | tcp_sndbuf_expand(sk); |
380 | 400 | ||
381 | tp->rcvq_space.space = tp->rcv_wnd; | 401 | tp->rcvq_space.space = tp->rcv_wnd; |
382 | tp->rcvq_space.time = tcp_time_stamp; | 402 | tp->rcvq_space.time = tcp_time_stamp; |
@@ -4723,15 +4743,7 @@ static void tcp_new_space(struct sock *sk) | |||
4723 | struct tcp_sock *tp = tcp_sk(sk); | 4743 | struct tcp_sock *tp = tcp_sk(sk); |
4724 | 4744 | ||
4725 | if (tcp_should_expand_sndbuf(sk)) { | 4745 | if (tcp_should_expand_sndbuf(sk)) { |
4726 | int sndmem = SKB_TRUESIZE(max_t(u32, | 4746 | tcp_sndbuf_expand(sk); |
4727 | tp->rx_opt.mss_clamp, | ||
4728 | tp->mss_cache) + | ||
4729 | MAX_TCP_HEADER); | ||
4730 | int demanded = max_t(unsigned int, tp->snd_cwnd, | ||
4731 | tp->reordering + 1); | ||
4732 | sndmem *= 2 * demanded; | ||
4733 | if (sndmem > sk->sk_sndbuf) | ||
4734 | sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); | ||
4735 | tp->snd_cwnd_stamp = tcp_time_stamp; | 4747 | tp->snd_cwnd_stamp = tcp_time_stamp; |
4736 | } | 4748 | } |
4737 | 4749 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b14266bb91eb..5d6b1a609da8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1410,8 +1410,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, | |||
1410 | inet_csk(child)->icsk_af_ops->rebuild_header(child); | 1410 | inet_csk(child)->icsk_af_ops->rebuild_header(child); |
1411 | tcp_init_congestion_control(child); | 1411 | tcp_init_congestion_control(child); |
1412 | tcp_mtup_init(child); | 1412 | tcp_mtup_init(child); |
1413 | tcp_init_buffer_space(child); | ||
1414 | tcp_init_metrics(child); | 1413 | tcp_init_metrics(child); |
1414 | tcp_init_buffer_space(child); | ||
1415 | 1415 | ||
1416 | /* Queue the data carried in the SYN packet. We need to first | 1416 | /* Queue the data carried in the SYN packet. We need to first |
1417 | * bump skb's refcnt because the caller will attempt to free it. | 1417 | * bump skb's refcnt because the caller will attempt to free it. |