aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-10-01 13:23:44 -0400
committerDavid S. Miller <davem@davemloft.net>2013-10-02 16:45:17 -0400
commit6ae705323b716ea7a8cc26bee79176398a9b2e89 (patch)
treee3b90b1519e5b90a6d3ec8b21493c804da185b76 /net
parentbbe34cf8a1a2cc174e6516fc230b91b531da7ddf (diff)
tcp: sndbuf autotuning improvements
tcp_fixup_sndbuf() is underestimating initial send buffer requirements. It was not noticed because big GSO packets were escaping the limitation, but with smaller TSO packets (or TSO/GSO/SG off), application hits sk_sndbuf before having a chance to fill enough packets in socket write queue. - initial cwnd can be bigger than 10 for specific routes - SKB_TRUESIZE() is a bit under real needs in some cases, because of power-of-two rounding in kmalloc() - Fast Recovery (RFC 5681 3.2) : Cubic needs 70% factor - Extra cushion (application might react slowly to POLLOUT) tcp_v4_conn_req_fastopen() needs to call tcp_init_metrics() before calling tcp_init_buffer_space() Then we realize tcp_new_space() should call tcp_fixup_sndbuf() instead of duplicating this stuff. Rename tcp_fixup_sndbuf() to tcp_sndbuf_expand() to be more descriptive. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Maciej Żenczykowski <maze@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c38
-rw-r--r--net/ipv4/tcp_ipv4.c2
2 files changed, 26 insertions, 14 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 66aa816ad30b..cd65674ece92 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
267 * 1. Tuning sk->sk_sndbuf, when connection enters established state. 267 * 1. Tuning sk->sk_sndbuf, when connection enters established state.
268 */ 268 */
269 269
270static void tcp_fixup_sndbuf(struct sock *sk) 270static void tcp_sndbuf_expand(struct sock *sk)
271{ 271{
272 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); 272 const struct tcp_sock *tp = tcp_sk(sk);
273 int sndmem, per_mss;
274 u32 nr_segs;
275
276 /* Worst case is non GSO/TSO : each frame consumes one skb
277 * and skb->head is kmalloced using power of two area of memory
278 */
279 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
280 MAX_TCP_HEADER +
281 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
282
283 per_mss = roundup_pow_of_two(per_mss) +
284 SKB_DATA_ALIGN(sizeof(struct sk_buff));
285
286 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
287 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
288
289 /* Fast Recovery (RFC 5681 3.2) :
290 * Cubic needs 1.7 factor, rounded to 2 to include
291 * extra cushion (application might react slowly to POLLOUT)
292 */
293 sndmem = 2 * nr_segs * per_mss;
273 294
274 sndmem *= TCP_INIT_CWND;
275 if (sk->sk_sndbuf < sndmem) 295 if (sk->sk_sndbuf < sndmem)
276 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); 296 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
277} 297}
@@ -376,7 +396,7 @@ void tcp_init_buffer_space(struct sock *sk)
376 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) 396 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
377 tcp_fixup_rcvbuf(sk); 397 tcp_fixup_rcvbuf(sk);
378 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) 398 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
379 tcp_fixup_sndbuf(sk); 399 tcp_sndbuf_expand(sk);
380 400
381 tp->rcvq_space.space = tp->rcv_wnd; 401 tp->rcvq_space.space = tp->rcv_wnd;
382 tp->rcvq_space.time = tcp_time_stamp; 402 tp->rcvq_space.time = tcp_time_stamp;
@@ -4723,15 +4743,7 @@ static void tcp_new_space(struct sock *sk)
4723 struct tcp_sock *tp = tcp_sk(sk); 4743 struct tcp_sock *tp = tcp_sk(sk);
4724 4744
4725 if (tcp_should_expand_sndbuf(sk)) { 4745 if (tcp_should_expand_sndbuf(sk)) {
4726 int sndmem = SKB_TRUESIZE(max_t(u32, 4746 tcp_sndbuf_expand(sk);
4727 tp->rx_opt.mss_clamp,
4728 tp->mss_cache) +
4729 MAX_TCP_HEADER);
4730 int demanded = max_t(unsigned int, tp->snd_cwnd,
4731 tp->reordering + 1);
4732 sndmem *= 2 * demanded;
4733 if (sndmem > sk->sk_sndbuf)
4734 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4735 tp->snd_cwnd_stamp = tcp_time_stamp; 4747 tp->snd_cwnd_stamp = tcp_time_stamp;
4736 } 4748 }
4737 4749
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b14266bb91eb..5d6b1a609da8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1410,8 +1410,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
1410 inet_csk(child)->icsk_af_ops->rebuild_header(child); 1410 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1411 tcp_init_congestion_control(child); 1411 tcp_init_congestion_control(child);
1412 tcp_mtup_init(child); 1412 tcp_mtup_init(child);
1413 tcp_init_buffer_space(child);
1414 tcp_init_metrics(child); 1413 tcp_init_metrics(child);
1414 tcp_init_buffer_space(child);
1415 1415
1416 /* Queue the data carried in the SYN packet. We need to first 1416 /* Queue the data carried in the SYN packet. We need to first
1417 * bump skb's refcnt because the caller will attempt to free it. 1417 * bump skb's refcnt because the caller will attempt to free it.