diff options
author | Eric Dumazet <edumazet@google.com> | 2012-04-22 19:38:54 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-04-23 22:28:28 -0400 |
commit | da882c1f2ecadb0ed582628ec1585e36b137c0f0 (patch) | |
tree | c89b136ec4ae978adf1078fdce199423a59ba8c0 | |
parent | f545a38f74584cc7424cb74f792a00c6d2589485 (diff) |
tcp: sk_add_backlog() is too agressive for TCP
While investigating TCP performance problems on 10Gb+ links, we found a
tcp sender was dropping lot of incoming ACKS because of sk_rcvbuf limit
in sk_add_backlog(), especially if receiver doesnt use GRO/LRO and sends
one ACK every two MSS segments.
A sender usually tweaks sk_sndbuf, but sk_rcvbuf stays at its default
value (87380), allowing a too small backlog.
A TCP ACK, even being small, can consume nearly same truesize space than
outgoing packets. Using sk_rcvbuf + sk_sndbuf as a limit makes sense and
is fast to compute.
Performance results on netperf, single flow, receiver with disabled
GRO/LRO : 7500 Mbits instead of 6050 Mbits, no more TCPBacklogDrop
increments at sender.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Rick Jones <rick.jones2@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 3 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 3 |
2 files changed, 4 insertions, 2 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 917607e9bd5b..cf97e9821d76 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1752,7 +1752,8 @@ process: | |||
1752 | if (!tcp_prequeue(sk, skb)) | 1752 | if (!tcp_prequeue(sk, skb)) |
1753 | ret = tcp_v4_do_rcv(sk, skb); | 1753 | ret = tcp_v4_do_rcv(sk, skb); |
1754 | } | 1754 | } |
1755 | } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf))) { | 1755 | } else if (unlikely(sk_add_backlog(sk, skb, |
1756 | sk->sk_rcvbuf + sk->sk_sndbuf))) { | ||
1756 | bh_unlock_sock(sk); | 1757 | bh_unlock_sock(sk); |
1757 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); | 1758 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); |
1758 | goto discard_and_relse; | 1759 | goto discard_and_relse; |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b04e6d8a8371..5fb19d345cfd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1654,7 +1654,8 @@ process: | |||
1654 | if (!tcp_prequeue(sk, skb)) | 1654 | if (!tcp_prequeue(sk, skb)) |
1655 | ret = tcp_v6_do_rcv(sk, skb); | 1655 | ret = tcp_v6_do_rcv(sk, skb); |
1656 | } | 1656 | } |
1657 | } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf))) { | 1657 | } else if (unlikely(sk_add_backlog(sk, skb, |
1658 | sk->sk_rcvbuf + sk->sk_sndbuf))) { | ||
1658 | bh_unlock_sock(sk); | 1659 | bh_unlock_sock(sk); |
1659 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); | 1660 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); |
1660 | goto discard_and_relse; | 1661 | goto discard_and_relse; |