aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-11-13 09:32:54 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-12-08 10:29:24 -0500
commit6ef30bdab8dab92939edbc964237b844efbe5946 (patch)
treedcc670d4fc7165637d2bc6f81eb5f13b21b24237
parenta6c8afd6ef8037c550d646ddb195ad5f0b5bebcf (diff)
tcp: tsq: restore minimal amount of queueing
[ Upstream commit 98e09386c0ef4dfd48af7ba60ff908f0d525cdee ] After commit c9eeec26e32e ("tcp: TSQ can use a dynamic limit"), several users reported throughput regressions, notably on mvneta and wifi adapters. 802.11 AMPDU requires a fair amount of queueing to be effective. This patch partially reverts the change done in tcp_write_xmit() so that the minimal amount is sysctl_tcp_limit_output_bytes. It also remove the use of this sysctl while building skb stored in write queue, as TSO autosizing does the right thing anyway. Users with well behaving NICS and correct qdisc (like sch_fq), can then lower the default sysctl_tcp_limit_output_bytes value from 128KB to 8KB. This new usage of sysctl_tcp_limit_output_bytes permits each driver authors to check how their driver performs when/if the value is set to a minimum of 4KB. Normally, line rate for a single TCP flow should be possible, but some drivers rely on timers to perform TX completion and too long TX completion delays prevent reaching full throughput. Fixes: c9eeec26e32e ("tcp: TSQ can use a dynamic limit") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Sujith Manoharan <sujith@msujith.org> Reported-by: Arnaud Ebalard <arno@natisbad.org> Tested-by: Sujith Manoharan <sujith@msujith.org> Cc: Felix Fietkau <nbd@openwrt.org> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--Documentation/networking/ip-sysctl.txt3
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_output.c6
3 files changed, 5 insertions, 10 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 3994f0bbeeb6..a59ee432a98f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -571,9 +571,6 @@ tcp_limit_output_bytes - INTEGER
571 typical pfifo_fast qdiscs. 571 typical pfifo_fast qdiscs.
572 tcp_limit_output_bytes limits the number of bytes on qdisc 572 tcp_limit_output_bytes limits the number of bytes on qdisc
573 or device to reduce artificial RTT/cwnd and reduce bufferbloat. 573 or device to reduce artificial RTT/cwnd and reduce bufferbloat.
574 Note: For GSO/TSO enabled flows, we try to have at least two
575 packets in flight. Reducing tcp_limit_output_bytes might also
576 reduce the size of individual GSO packet (64KB being the max)
577 Default: 131072 574 Default: 131072
578 575
579tcp_challenge_ack_limit - INTEGER 576tcp_challenge_ack_limit - INTEGER
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c888abf5a728..ae15c18df880 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -807,12 +807,6 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
807 xmit_size_goal = min_t(u32, gso_size, 807 xmit_size_goal = min_t(u32, gso_size,
808 sk->sk_gso_max_size - 1 - hlen); 808 sk->sk_gso_max_size - 1 - hlen);
809 809
810 /* TSQ : try to have at least two segments in flight
811 * (one in NIC TX ring, another in Qdisc)
812 */
813 xmit_size_goal = min_t(u32, xmit_size_goal,
814 sysctl_tcp_limit_output_bytes >> 1);
815
816 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); 810 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
817 811
818 /* We try hard to avoid divides here */ 812 /* We try hard to avoid divides here */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cd16eb06bebf..e21be13f74a6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1866,8 +1866,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1866 * - better RTT estimation and ACK scheduling 1866 * - better RTT estimation and ACK scheduling
1867 * - faster recovery 1867 * - faster recovery
1868 * - high rates 1868 * - high rates
1869 * Alas, some drivers / subsystems require a fair amount
1870 * of queued bytes to ensure line rate.
1871 * One example is wifi aggregation (802.11 AMPDU)
1869 */ 1872 */
1870 limit = max(skb->truesize, sk->sk_pacing_rate >> 10); 1873 limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
1874 sk->sk_pacing_rate >> 10);
1871 1875
1872 if (atomic_read(&sk->sk_wmem_alloc) > limit) { 1876 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1873 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 1877 set_bit(TSQ_THROTTLED, &tp->tsq_flags);