aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-08-27 08:46:32 -0400
committerDavid S. Miller <davem@davemloft.net>2013-08-29 15:50:06 -0400
commit95bd09eb27507691520d39ee1044d6ad831c1168 (patch)
treee05045cc6418ce08aa87d5f8c17366a7fa672f3c /include/net
parentb800c3b966bcf004bd8592293a49ed5cb7ea67a9 (diff)
tcp: TSO packets automatic sizing
After hearing many people over past years complaining against TSO being bursty or even buggy, we are proud to present automatic sizing of TSO packets. One part of the problem is that tcp_tso_should_defer() uses an heuristic relying on upcoming ACKS instead of a timer, but more generally, having big TSO packets makes little sense for low rates, as it tends to create micro bursts on the network, and general consensus is to reduce the buffering amount. This patch introduces a per socket sk_pacing_rate, that approximates the current sending rate, and allows us to size the TSO packets so that we try to send one packet every ms. This field could be set by other transports. Patch has no impact for high speed flows, where having large TSO packets makes sense to reach line rate. For other flows, this helps better packet scheduling and ACK clocking. This patch increases performance of TCP flows in lossy environments. A new sysctl (tcp_min_tso_segs) is added, to specify the minimal size of a TSO packet (default being 2). A follow-up patch will provide a new packet scheduler (FQ), using sk_pacing_rate as an input to perform optional per flow pacing. This explains why we chose to set sk_pacing_rate to twice the current rate, allowing 'slow start' ramp up. sk_pacing_rate = 2 * cwnd * mss / srtt v2: Neal Cardwell reported a suspect deferring of last two segments on initial write of 10 MSS, I had to change tcp_tso_should_defer() to take into account tp->xmit_size_goal_segs Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Van Jacobson <vanj@google.com> Cc: Tom Herbert <therbert@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/tcp.h1
2 files changed, 3 insertions, 0 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index e4bbcbfd07ea..6ba2e7b0e2b1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -232,6 +232,7 @@ struct cg_proto;
232 * @sk_napi_id: id of the last napi context to receive data for sk 232 * @sk_napi_id: id of the last napi context to receive data for sk
233 * @sk_ll_usec: usecs to busypoll when there is no data 233 * @sk_ll_usec: usecs to busypoll when there is no data
234 * @sk_allocation: allocation mode 234 * @sk_allocation: allocation mode
235 * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
235 * @sk_sndbuf: size of send buffer in bytes 236 * @sk_sndbuf: size of send buffer in bytes
236 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, 237 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
237 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings 238 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -361,6 +362,7 @@ struct sock {
361 kmemcheck_bitfield_end(flags); 362 kmemcheck_bitfield_end(flags);
362 int sk_wmem_queued; 363 int sk_wmem_queued;
363 gfp_t sk_allocation; 364 gfp_t sk_allocation;
365 u32 sk_pacing_rate; /* bytes per second */
364 netdev_features_t sk_route_caps; 366 netdev_features_t sk_route_caps;
365 netdev_features_t sk_route_nocaps; 367 netdev_features_t sk_route_nocaps;
366 int sk_gso_type; 368 int sk_gso_type;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index dd5e16f66f84..6a6a88db462d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -281,6 +281,7 @@ extern int sysctl_tcp_early_retrans;
281extern int sysctl_tcp_limit_output_bytes; 281extern int sysctl_tcp_limit_output_bytes;
282extern int sysctl_tcp_challenge_ack_limit; 282extern int sysctl_tcp_challenge_ack_limit;
283extern unsigned int sysctl_tcp_notsent_lowat; 283extern unsigned int sysctl_tcp_notsent_lowat;
284extern int sysctl_tcp_min_tso_segs;
284 285
285extern atomic_long_t tcp_memory_allocated; 286extern atomic_long_t tcp_memory_allocated;
286extern struct percpu_counter tcp_sockets_allocated; 287extern struct percpu_counter tcp_sockets_allocated;