aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-04-22 19:38:54 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-23 22:28:28 -0400
commitda882c1f2ecadb0ed582628ec1585e36b137c0f0 (patch)
treec89b136ec4ae978adf1078fdce199423a59ba8c0
parentf545a38f74584cc7424cb74f792a00c6d2589485 (diff)
tcp: sk_add_backlog() is too agressive for TCP
While investigating TCP performance problems on 10Gb+ links, we found a tcp sender was dropping lot of incoming ACKS because of sk_rcvbuf limit in sk_add_backlog(), especially if receiver doesnt use GRO/LRO and sends one ACK every two MSS segments. A sender usually tweaks sk_sndbuf, but sk_rcvbuf stays at its default value (87380), allowing a too small backlog. A TCP ACK, even being small, can consume nearly same truesize space than outgoing packets. Using sk_rcvbuf + sk_sndbuf as a limit makes sense and is fast to compute. Performance results on netperf, single flow, receiver with disabled GRO/LRO : 7500 Mbits instead of 6050 Mbits, no more TCPBacklogDrop increments at sender. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Tom Herbert <therbert@google.com> Cc: Maciej Żenczykowski <maze@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Cc: Rick Jones <rick.jones2@hp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv6/tcp_ipv6.c3
2 files changed, 4 insertions, 2 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 917607e9bd5b..cf97e9821d76 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1752,7 +1752,8 @@ process:
1752 if (!tcp_prequeue(sk, skb)) 1752 if (!tcp_prequeue(sk, skb))
1753 ret = tcp_v4_do_rcv(sk, skb); 1753 ret = tcp_v4_do_rcv(sk, skb);
1754 } 1754 }
1755 } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf))) { 1755 } else if (unlikely(sk_add_backlog(sk, skb,
1756 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1756 bh_unlock_sock(sk); 1757 bh_unlock_sock(sk);
1757 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 1758 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1758 goto discard_and_relse; 1759 goto discard_and_relse;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b04e6d8a8371..5fb19d345cfd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1654,7 +1654,8 @@ process:
1654 if (!tcp_prequeue(sk, skb)) 1654 if (!tcp_prequeue(sk, skb))
1655 ret = tcp_v6_do_rcv(sk, skb); 1655 ret = tcp_v6_do_rcv(sk, skb);
1656 } 1656 }
1657 } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf))) { 1657 } else if (unlikely(sk_add_backlog(sk, skb,
1658 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1658 bh_unlock_sock(sk); 1659 bh_unlock_sock(sk);
1659 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 1660 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1660 goto discard_and_relse; 1661 goto discard_and_relse;