aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-11-13 09:32:54 -0500
committerDavid S. Miller <davem@davemloft.net>2013-11-14 16:25:14 -0500
commit98e09386c0ef4dfd48af7ba60ff908f0d525cdee (patch)
treecc572cb1114420baf8d957814bd778cf57e20d03 /net
parent6afae6457b3025f0b48228292a3ea92d7b30b1d4 (diff)
tcp: tsq: restore minimal amount of queueing
After commit c9eeec26e32e ("tcp: TSQ can use a dynamic limit"), several users reported throughput regressions, notably on mvneta and wifi adapters. 802.11 AMPDU requires a fair amount of queueing to be effective. This patch partially reverts the change done in tcp_write_xmit() so that the minimal amount is sysctl_tcp_limit_output_bytes. It also remove the use of this sysctl while building skb stored in write queue, as TSO autosizing does the right thing anyway. Users with well behaving NICS and correct qdisc (like sch_fq), can then lower the default sysctl_tcp_limit_output_bytes value from 128KB to 8KB. This new usage of sysctl_tcp_limit_output_bytes permits each driver authors to check how their driver performs when/if the value is set to a minimum of 4KB. Normally, line rate for a single TCP flow should be possible, but some drivers rely on timers to perform TX completion and too long TX completion delays prevent reaching full throughput. Fixes: c9eeec26e32e ("tcp: TSQ can use a dynamic limit") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Sujith Manoharan <sujith@msujith.org> Reported-by: Arnaud Ebalard <arno@natisbad.org> Tested-by: Sujith Manoharan <sujith@msujith.org> Cc: Felix Fietkau <nbd@openwrt.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_output.c6
2 files changed, 5 insertions, 7 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8e8529d3c8c9..3dc0c6cf02a8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -808,12 +808,6 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
808 xmit_size_goal = min_t(u32, gso_size, 808 xmit_size_goal = min_t(u32, gso_size,
809 sk->sk_gso_max_size - 1 - hlen); 809 sk->sk_gso_max_size - 1 - hlen);
810 810
811 /* TSQ : try to have at least two segments in flight
812 * (one in NIC TX ring, another in Qdisc)
813 */
814 xmit_size_goal = min_t(u32, xmit_size_goal,
815 sysctl_tcp_limit_output_bytes >> 1);
816
817 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); 811 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
818 812
819 /* We try hard to avoid divides here */ 813 /* We try hard to avoid divides here */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 672854664ff5..c5231d9b06d7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1875,8 +1875,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1875 * - better RTT estimation and ACK scheduling 1875 * - better RTT estimation and ACK scheduling
1876 * - faster recovery 1876 * - faster recovery
1877 * - high rates 1877 * - high rates
1878 * Alas, some drivers / subsystems require a fair amount
1879 * of queued bytes to ensure line rate.
1880 * One example is wifi aggregation (802.11 AMPDU)
1878 */ 1881 */
1879 limit = max(skb->truesize, sk->sk_pacing_rate >> 10); 1882 limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
1883 sk->sk_pacing_rate >> 10);
1880 1884
1881 if (atomic_read(&sk->sk_wmem_alloc) > limit) { 1885 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1882 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 1886 set_bit(TSQ_THROTTLED, &tp->tsq_flags);