aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2017-12-10 20:55:04 -0500
committerDavid S. Miller <davem@davemloft.net>2017-12-12 10:53:04 -0500
commitc3916ad9320eed8eacd7c0b2cf7f881efceda892 (patch)
treea4fdf9cc629e3e5d300c58b80626059e2ac99437 /net/ipv4/tcp_input.c
parent607065bad9931e72207b0cac365d7d4abc06bd99 (diff)
tcp: smoother receiver autotuning
Back in linux-3.13 (commit b0983d3c9b13 ("tcp: fix dynamic right sizing")) I addressed the pressing issues we had with receiver autotuning. But DRS suffers from extra latencies caused by rcv_rtt_est.rtt_us drifts. One common problem happens during slow start, since the apparent RTT measured by the receiver can be inflated by ~50%, at the end of one packet train. Also, a single drop can delay read() calls by one RTT, meaning tcp_rcv_space_adjust() can be called one RTT too late. By replacing the tri-modal heuristic with a continuous function, we can offset the effects of not growing 'at the optimal time'. The curve of the function matches prior behavior if the space increased by 25% and 50% exactly. Cost of added multiply/divide is small, considering a TCP flow typically would run this part of the code few times in its life. I tested this patch with 100 ms RTT / 1% loss link, 100 runs of (netperf -l 5), and got an average throughput of 4600 Mbit instead of 1700 Mbit. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Wei Wang <weiwan@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c19
1 files changed, 5 insertions, 14 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2900e58738cd..fefb46c16de7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -601,26 +601,17 @@ void tcp_rcv_space_adjust(struct sock *sk)
601 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && 601 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
602 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { 602 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
603 int rcvmem, rcvbuf; 603 int rcvmem, rcvbuf;
604 u64 rcvwin; 604 u64 rcvwin, grow;
605 605
606 /* minimal window to cope with packet losses, assuming 606 /* minimal window to cope with packet losses, assuming
607 * steady state. Add some cushion because of small variations. 607 * steady state. Add some cushion because of small variations.
608 */ 608 */
609 rcvwin = ((u64)copied << 1) + 16 * tp->advmss; 609 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
610 610
611 /* If rate increased by 25%, 611 /* Accommodate for sender rate increase (eg. slow start) */
612 * assume slow start, rcvwin = 3 * copied 612 grow = rcvwin * (copied - tp->rcvq_space.space);
613 * If rate increased by 50%, 613 do_div(grow, tp->rcvq_space.space);
614 * assume sender can use 2x growth, rcvwin = 4 * copied 614 rcvwin += (grow << 1);
615 */
616 if (copied >=
617 tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
618 if (copied >=
619 tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
620 rcvwin <<= 1;
621 else
622 rcvwin += (rcvwin >> 1);
623 }
624 615
625 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); 616 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
626 while (tcp_win_from_space(sk, rcvmem) < tp->advmss) 617 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)