aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-08-27 08:46:32 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-11-04 07:30:59 -0500
commit5e25ba5003ee5de0ba2be56bfd54d16d4b1b028d (patch)
tree7a953aea4b06398a6f3c66835d6e76b8048acaa4 /include/net/sock.h
parent14e9c7db465387ede7f019c42f28c90f99fc2793 (diff)
tcp: TSO packets automatic sizing
[ Upstream commits 6d36824e730f247b602c90e8715a792003e3c5a7, 02cf4ebd82ff0ac7254b88e466820a290ed8289a, and parts of 7eec4174ff29cd42f2acfae8112f51c228545d40 ] After hearing many people over past years complaining against TSO being bursty or even buggy, we are proud to present automatic sizing of TSO packets. One part of the problem is that tcp_tso_should_defer() uses an heuristic relying on upcoming ACKS instead of a timer, but more generally, having big TSO packets makes little sense for low rates, as it tends to create micro bursts on the network, and general consensus is to reduce the buffering amount. This patch introduces a per socket sk_pacing_rate, that approximates the current sending rate, and allows us to size the TSO packets so that we try to send one packet every ms. This field could be set by other transports. Patch has no impact for high speed flows, where having large TSO packets makes sense to reach line rate. For other flows, this helps better packet scheduling and ACK clocking. This patch increases performance of TCP flows in lossy environments. A new sysctl (tcp_min_tso_segs) is added, to specify the minimal size of a TSO packet (default being 2). A follow-up patch will provide a new packet scheduler (FQ), using sk_pacing_rate as an input to perform optional per flow pacing. This explains why we chose to set sk_pacing_rate to twice the current rate, allowing 'slow start' ramp up. sk_pacing_rate = 2 * cwnd * mss / srtt v2: Neal Cardwell reported a suspect deferring of last two segments on initial write of 10 MSS, I had to change tcp_tso_should_defer() to take into account tp->xmit_size_goal_segs Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Van Jacobson <vanj@google.com> Cc: Tom Herbert <therbert@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 66772cf8c3c5..cec4c723db9a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -230,6 +230,7 @@ struct cg_proto;
230 * @sk_wmem_queued: persistent queue size 230 * @sk_wmem_queued: persistent queue size
231 * @sk_forward_alloc: space allocated forward 231 * @sk_forward_alloc: space allocated forward
232 * @sk_allocation: allocation mode 232 * @sk_allocation: allocation mode
233 * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
233 * @sk_sndbuf: size of send buffer in bytes 234 * @sk_sndbuf: size of send buffer in bytes
234 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, 235 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
235 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings 236 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -355,6 +356,7 @@ struct sock {
355 kmemcheck_bitfield_end(flags); 356 kmemcheck_bitfield_end(flags);
356 int sk_wmem_queued; 357 int sk_wmem_queued;
357 gfp_t sk_allocation; 358 gfp_t sk_allocation;
359 u32 sk_pacing_rate; /* bytes per second */
358 netdev_features_t sk_route_caps; 360 netdev_features_t sk_route_caps;
359 netdev_features_t sk_route_nocaps; 361 netdev_features_t sk_route_nocaps;
360 int sk_gso_type; 362 int sk_gso_type;