summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosh Hunt <johunt@akamai.com>2019-08-07 19:52:29 -0400
committerDavid S. Miller <davem@davemloft.net>2019-08-09 16:03:30 -0400
commitc04b79b6cfd714144f6a2cf359603d82ee631e62 (patch)
tree3ef5597a3400666cfb0362c0ab1abbd666b87da3
parent3a5e523479c49b082b8ac291a1a9fbd035c06df5 (diff)
tcp: add new tcp_mtu_probe_floor sysctl
The current implementation of TCP MTU probing can considerably underestimate the MTU on lossy connections allowing the MSS to get down to 48. We have found that in almost all of these cases on our networks these paths can handle much larger MTUs meaning the connections are being artificially limited. Even though TCP MTU probing can raise the MSS back up we have seen this not to be the case causing connections to be "stuck" with an MSS of 48 when heavy loss is present. Prior to pushing out this change we could not keep TCP MTU probing enabled b/c of the above reasons. Now with a reasonble floor set we've had it enabled for the past 6 months. The new sysctl will still default to TCP_MIN_SND_MSS (48), but gives administrators the ability to control the floor of MSS probing. Signed-off-by: Josh Hunt <johunt@akamai.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt6
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_timer.c2
5 files changed, 18 insertions, 1 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index df33674799b5..49e95f438ed7 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -256,6 +256,12 @@ tcp_base_mss - INTEGER
256 Path MTU discovery (MTU probing). If MTU probing is enabled, 256 Path MTU discovery (MTU probing). If MTU probing is enabled,
257 this is the initial MSS used by the connection. 257 this is the initial MSS used by the connection.
258 258
259tcp_mtu_probe_floor - INTEGER
260 If MTU probing is enabled this caps the minimum MSS used for search_low
261 for the connection.
262
263 Default : 48
264
259tcp_min_snd_mss - INTEGER 265tcp_min_snd_mss - INTEGER
260 TCP SYN and SYNACK messages usually advertise an ADVMSS option, 266 TCP SYN and SYNACK messages usually advertise an ADVMSS option,
261 as described in RFC 1122 and RFC 6691. 267 as described in RFC 1122 and RFC 6691.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index bc24a8ec1ce5..c0c0791b1912 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -116,6 +116,7 @@ struct netns_ipv4 {
116 int sysctl_tcp_l3mdev_accept; 116 int sysctl_tcp_l3mdev_accept;
117#endif 117#endif
118 int sysctl_tcp_mtu_probing; 118 int sysctl_tcp_mtu_probing;
119 int sysctl_tcp_mtu_probe_floor;
119 int sysctl_tcp_base_mss; 120 int sysctl_tcp_base_mss;
120 int sysctl_tcp_min_snd_mss; 121 int sysctl_tcp_min_snd_mss;
121 int sysctl_tcp_probe_threshold; 122 int sysctl_tcp_probe_threshold;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0b980e841927..59ded25acd04 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -820,6 +820,15 @@ static struct ctl_table ipv4_net_table[] = {
820 .extra2 = &tcp_min_snd_mss_max, 820 .extra2 = &tcp_min_snd_mss_max,
821 }, 821 },
822 { 822 {
823 .procname = "tcp_mtu_probe_floor",
824 .data = &init_net.ipv4.sysctl_tcp_mtu_probe_floor,
825 .maxlen = sizeof(int),
826 .mode = 0644,
827 .proc_handler = proc_dointvec_minmax,
828 .extra1 = &tcp_min_snd_mss_min,
829 .extra2 = &tcp_min_snd_mss_max,
830 },
831 {
823 .procname = "tcp_probe_threshold", 832 .procname = "tcp_probe_threshold",
824 .data = &init_net.ipv4.sysctl_tcp_probe_threshold, 833 .data = &init_net.ipv4.sysctl_tcp_probe_threshold,
825 .maxlen = sizeof(int), 834 .maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d57641cb3477..e0a372676329 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2637,6 +2637,7 @@ static int __net_init tcp_sk_init(struct net *net)
2637 net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; 2637 net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
2638 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; 2638 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2639 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; 2639 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2640 net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
2640 2641
2641 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; 2642 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2642 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; 2643 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c801cd37cc2a..dbd9d2d0ee63 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
154 } else { 154 } else {
155 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; 155 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
156 mss = min(net->ipv4.sysctl_tcp_base_mss, mss); 156 mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
157 mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); 157 mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
158 mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); 158 mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
159 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); 159 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
160 } 160 }