aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorNeal Cardwell <ncardwell@google.com>2015-05-29 13:47:07 -0400
committerDavid S. Miller <davem@davemloft.net>2015-06-01 00:49:14 -0400
commit9f950415e4e28e7cfae2e416b43e862e8101d996 (patch)
treebfcaf984c1d39e6f1d792a09a370cb849ad9f95b /net
parentbeb39db59d14990e401e235faf66a6b9b31240b0 (diff)
tcp: fix child sockets to use system default congestion control if not set
Linux 3.17 and earlier are explicitly engineered so that if the app doesn't specifically request a CC module on a listener before the SYN arrives, then the child gets the system default CC when the connection is established. See tcp_init_congestion_control() in 3.17 or earlier, which says "if no choice made yet assign the current value set as default". The change ("net: tcp: assign tcp cong_ops when tcp sk is created") altered these semantics, so that children got their parent listener's congestion control even if the system default had changed after the listener was created. This commit returns to those original semantics from 3.17 and earlier, since they are the original semantics from 2007 in 4d4d3d1e8 ("[TCP]: Congestion control initialization."), and some Linux congestion control workflows depend on that. In summary, if a listener socket specifically sets TCP_CONGESTION to "x", or the route locks the CC module to "x", then the child gets "x". Otherwise the child gets current system default from net.ipv4.tcp_congestion_control. That's the behavior in 3.17 and earlier, and this commit restores that. Fixes: 55d8694fa82c ("net: tcp: assign tcp cong_ops when tcp sk is created") Cc: Florian Westphal <fw@strlen.de> Cc: Daniel Borkmann <dborkman@redhat.com> Cc: Glenn Judd <glenn.judd@morganstanley.com> Cc: Stephen Hemminger <stephen@networkplumber.org> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_cong.c5
-rw-r--r--net/ipv4/tcp_minisocks.c5
2 files changed, 8 insertions, 2 deletions
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 7a5ae50c80c8..84be008c945c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -187,6 +187,7 @@ static void tcp_reinit_congestion_control(struct sock *sk,
187 187
188 tcp_cleanup_congestion_control(sk); 188 tcp_cleanup_congestion_control(sk);
189 icsk->icsk_ca_ops = ca; 189 icsk->icsk_ca_ops = ca;
190 icsk->icsk_ca_setsockopt = 1;
190 191
191 if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) 192 if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
192 icsk->icsk_ca_ops->init(sk); 193 icsk->icsk_ca_ops->init(sk);
@@ -335,8 +336,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
335 rcu_read_lock(); 336 rcu_read_lock();
336 ca = __tcp_ca_find_autoload(name); 337 ca = __tcp_ca_find_autoload(name);
337 /* No change asking for existing value */ 338 /* No change asking for existing value */
338 if (ca == icsk->icsk_ca_ops) 339 if (ca == icsk->icsk_ca_ops) {
340 icsk->icsk_ca_setsockopt = 1;
339 goto out; 341 goto out;
342 }
340 if (!ca) 343 if (!ca)
341 err = -ENOENT; 344 err = -ENOENT;
342 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || 345 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b5732a54f2ad..17e7339ee5ca 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -420,7 +420,10 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
420 rcu_read_unlock(); 420 rcu_read_unlock();
421 } 421 }
422 422
423 if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) 423 /* If no valid choice made yet, assign current system default ca. */
424 if (!ca_got_dst &&
425 (!icsk->icsk_ca_setsockopt ||
426 !try_module_get(icsk->icsk_ca_ops->owner)))
424 tcp_assign_congestion_control(sk); 427 tcp_assign_congestion_control(sk);
425 428
426 tcp_set_ca_state(sk, TCP_CA_Open); 429 tcp_set_ca_state(sk, TCP_CA_Open);