aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/tcp.h6
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c19
-rw-r--r--net/ipv4/tcp_cong.c76
-rw-r--r--net/ipv4/tcp_ipv4.c9
-rw-r--r--net/ipv6/route.c3
7 files changed, 64 insertions, 54 deletions
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 5e12975fc658..44668c29701a 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -160,6 +160,7 @@ struct netns_ipv4 {
160 struct inet_timewait_death_row tcp_death_row; 160 struct inet_timewait_death_row tcp_death_row;
161 int sysctl_max_syn_backlog; 161 int sysctl_max_syn_backlog;
162 int sysctl_tcp_fastopen; 162 int sysctl_tcp_fastopen;
163 const struct tcp_congestion_ops __rcu *tcp_congestion_control;
163 struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; 164 struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
164 spinlock_t tcp_fastopen_ctx_lock; 165 spinlock_t tcp_fastopen_ctx_lock;
165 unsigned int sysctl_tcp_fastopen_blackhole_timeout; 166 unsigned int sysctl_tcp_fastopen_blackhole_timeout;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ed71511e67a6..35cc7d0d3d47 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1002,8 +1002,8 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
1002void tcp_assign_congestion_control(struct sock *sk); 1002void tcp_assign_congestion_control(struct sock *sk);
1003void tcp_init_congestion_control(struct sock *sk); 1003void tcp_init_congestion_control(struct sock *sk);
1004void tcp_cleanup_congestion_control(struct sock *sk); 1004void tcp_cleanup_congestion_control(struct sock *sk);
1005int tcp_set_default_congestion_control(const char *name); 1005int tcp_set_default_congestion_control(struct net *net, const char *name);
1006void tcp_get_default_congestion_control(char *name); 1006void tcp_get_default_congestion_control(struct net *net, char *name);
1007void tcp_get_available_congestion_control(char *buf, size_t len); 1007void tcp_get_available_congestion_control(char *buf, size_t len);
1008void tcp_get_allowed_congestion_control(char *buf, size_t len); 1008void tcp_get_allowed_congestion_control(char *buf, size_t len);
1009int tcp_set_allowed_congestion_control(char *allowed); 1009int tcp_set_allowed_congestion_control(char *allowed);
@@ -1017,7 +1017,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
1017extern struct tcp_congestion_ops tcp_reno; 1017extern struct tcp_congestion_ops tcp_reno;
1018 1018
1019struct tcp_congestion_ops *tcp_ca_find_key(u32 key); 1019struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
1020u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); 1020u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
1021#ifdef CONFIG_INET 1021#ifdef CONFIG_INET
1022char *tcp_ca_get_name_by_key(u32 key, char *buffer); 1022char *tcp_ca_get_name_by_key(u32 key, char *buffer);
1023#else 1023#else
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 589caaa90613..f04d944f8abe 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -710,7 +710,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
710 bool ecn_ca = false; 710 bool ecn_ca = false;
711 711
712 nla_strlcpy(tmp, nla, sizeof(tmp)); 712 nla_strlcpy(tmp, nla, sizeof(tmp));
713 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 713 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
714 } else { 714 } else {
715 val = nla_get_u32(nla); 715 val = nla_get_u32(nla);
716 } 716 }
@@ -1030,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
1030 char tmp[TCP_CA_NAME_MAX]; 1030 char tmp[TCP_CA_NAME_MAX];
1031 1031
1032 nla_strlcpy(tmp, nla, sizeof(tmp)); 1032 nla_strlcpy(tmp, nla, sizeof(tmp));
1033 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 1033 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
1034 if (val == TCP_CA_UNSPEC) 1034 if (val == TCP_CA_UNSPEC)
1035 return -EINVAL; 1035 return -EINVAL;
1036 } else { 1036 } else {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ef0ff3357a44..93e172118a94 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -201,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
201static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, 201static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
202 void __user *buffer, size_t *lenp, loff_t *ppos) 202 void __user *buffer, size_t *lenp, loff_t *ppos)
203{ 203{
204 struct net *net = container_of(ctl->data, struct net,
205 ipv4.tcp_congestion_control);
204 char val[TCP_CA_NAME_MAX]; 206 char val[TCP_CA_NAME_MAX];
205 struct ctl_table tbl = { 207 struct ctl_table tbl = {
206 .data = val, 208 .data = val,
@@ -208,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
208 }; 210 };
209 int ret; 211 int ret;
210 212
211 tcp_get_default_congestion_control(val); 213 tcp_get_default_congestion_control(net, val);
212 214
213 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 215 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
214 if (write && ret == 0) 216 if (write && ret == 0)
215 ret = tcp_set_default_congestion_control(val); 217 ret = tcp_set_default_congestion_control(net, val);
216 return ret; 218 return ret;
217} 219}
218 220
@@ -447,12 +449,6 @@ static struct ctl_table ipv4_table[] = {
447 .mode = 0644, 449 .mode = 0644,
448 .proc_handler = proc_dointvec 450 .proc_handler = proc_dointvec
449 }, 451 },
450 {
451 .procname = "tcp_congestion_control",
452 .mode = 0644,
453 .maxlen = TCP_CA_NAME_MAX,
454 .proc_handler = proc_tcp_congestion_control,
455 },
456#ifdef CONFIG_NETLABEL 452#ifdef CONFIG_NETLABEL
457 { 453 {
458 .procname = "cipso_cache_enable", 454 .procname = "cipso_cache_enable",
@@ -764,6 +760,13 @@ static struct ctl_table ipv4_net_table[] = {
764 }, 760 },
765#endif 761#endif
766 { 762 {
763 .procname = "tcp_congestion_control",
764 .data = &init_net.ipv4.tcp_congestion_control,
765 .mode = 0644,
766 .maxlen = TCP_CA_NAME_MAX,
767 .proc_handler = proc_tcp_congestion_control,
768 },
769 {
767 .procname = "tcp_keepalive_time", 770 .procname = "tcp_keepalive_time",
768 .data = &init_net.ipv4.sysctl_tcp_keepalive_time, 771 .data = &init_net.ipv4.sysctl_tcp_keepalive_time,
769 .maxlen = sizeof(int), 772 .maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2f26124fd160..bc6c02f16243 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
33} 33}
34 34
35/* Must be called with rcu lock held */ 35/* Must be called with rcu lock held */
36static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) 36static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
37 const char *name)
37{ 38{
38 const struct tcp_congestion_ops *ca = tcp_ca_find(name); 39 struct tcp_congestion_ops *ca = tcp_ca_find(name);
40
39#ifdef CONFIG_MODULES 41#ifdef CONFIG_MODULES
40 if (!ca && capable(CAP_NET_ADMIN)) { 42 if (!ca && capable(CAP_NET_ADMIN)) {
41 rcu_read_unlock(); 43 rcu_read_unlock();
@@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
115} 117}
116EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 118EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
117 119
118u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) 120u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
119{ 121{
120 const struct tcp_congestion_ops *ca; 122 const struct tcp_congestion_ops *ca;
121 u32 key = TCP_CA_UNSPEC; 123 u32 key = TCP_CA_UNSPEC;
@@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
123 might_sleep(); 125 might_sleep();
124 126
125 rcu_read_lock(); 127 rcu_read_lock();
126 ca = __tcp_ca_find_autoload(name); 128 ca = tcp_ca_find_autoload(net, name);
127 if (ca) { 129 if (ca) {
128 key = ca->key; 130 key = ca->key;
129 *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; 131 *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
@@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
153/* Assign choice of congestion control. */ 155/* Assign choice of congestion control. */
154void tcp_assign_congestion_control(struct sock *sk) 156void tcp_assign_congestion_control(struct sock *sk)
155{ 157{
158 struct net *net = sock_net(sk);
156 struct inet_connection_sock *icsk = inet_csk(sk); 159 struct inet_connection_sock *icsk = inet_csk(sk);
157 struct tcp_congestion_ops *ca; 160 const struct tcp_congestion_ops *ca;
158 161
159 rcu_read_lock(); 162 rcu_read_lock();
160 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 163 ca = rcu_dereference(net->ipv4.tcp_congestion_control);
161 if (likely(try_module_get(ca->owner))) { 164 if (unlikely(!try_module_get(ca->owner)))
162 icsk->icsk_ca_ops = ca; 165 ca = &tcp_reno;
163 goto out; 166 icsk->icsk_ca_ops = ca;
164 }
165 /* Fallback to next available. The last really
166 * guaranteed fallback is Reno from this list.
167 */
168 }
169out:
170 rcu_read_unlock(); 167 rcu_read_unlock();
171 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
172 168
169 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
173 if (ca->flags & TCP_CONG_NEEDS_ECN) 170 if (ca->flags & TCP_CONG_NEEDS_ECN)
174 INET_ECN_xmit(sk); 171 INET_ECN_xmit(sk);
175 else 172 else
@@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk)
214} 211}
215 212
216/* Used by sysctl to change default congestion control */ 213/* Used by sysctl to change default congestion control */
217int tcp_set_default_congestion_control(const char *name) 214int tcp_set_default_congestion_control(struct net *net, const char *name)
218{ 215{
219 struct tcp_congestion_ops *ca; 216 struct tcp_congestion_ops *ca;
220 int ret = -ENOENT; 217 const struct tcp_congestion_ops *prev;
221 218 int ret;
222 spin_lock(&tcp_cong_list_lock);
223 ca = tcp_ca_find(name);
224#ifdef CONFIG_MODULES
225 if (!ca && capable(CAP_NET_ADMIN)) {
226 spin_unlock(&tcp_cong_list_lock);
227 219
228 request_module("tcp_%s", name); 220 rcu_read_lock();
229 spin_lock(&tcp_cong_list_lock); 221 ca = tcp_ca_find_autoload(net, name);
230 ca = tcp_ca_find(name); 222 if (!ca) {
231 } 223 ret = -ENOENT;
232#endif 224 } else if (!try_module_get(ca->owner)) {
225 ret = -EBUSY;
226 } else {
227 prev = xchg(&net->ipv4.tcp_congestion_control, ca);
228 if (prev)
229 module_put(prev->owner);
233 230
234 if (ca) { 231 ca->flags |= TCP_CONG_NON_RESTRICTED;
235 ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
236 list_move(&ca->list, &tcp_cong_list);
237 ret = 0; 232 ret = 0;
238 } 233 }
239 spin_unlock(&tcp_cong_list_lock); 234 rcu_read_unlock();
240 235
241 return ret; 236 return ret;
242} 237}
@@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name)
244/* Set default value from kernel configuration at bootup */ 239/* Set default value from kernel configuration at bootup */
245static int __init tcp_congestion_default(void) 240static int __init tcp_congestion_default(void)
246{ 241{
247 return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); 242 return tcp_set_default_congestion_control(&init_net,
243 CONFIG_DEFAULT_TCP_CONG);
248} 244}
249late_initcall(tcp_congestion_default); 245late_initcall(tcp_congestion_default);
250 246
@@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
264} 260}
265 261
266/* Get current default congestion control */ 262/* Get current default congestion control */
267void tcp_get_default_congestion_control(char *name) 263void tcp_get_default_congestion_control(struct net *net, char *name)
268{ 264{
269 struct tcp_congestion_ops *ca; 265 const struct tcp_congestion_ops *ca;
270 /* We will always have reno... */
271 BUG_ON(list_empty(&tcp_cong_list));
272 266
273 rcu_read_lock(); 267 rcu_read_lock();
274 ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); 268 ca = rcu_dereference(net->ipv4.tcp_congestion_control);
275 strncpy(name, ca->name, TCP_CA_NAME_MAX); 269 strncpy(name, ca->name, TCP_CA_NAME_MAX);
276 rcu_read_unlock(); 270 rcu_read_unlock();
277} 271}
@@ -351,12 +345,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
351 if (!load) 345 if (!load)
352 ca = tcp_ca_find(name); 346 ca = tcp_ca_find(name);
353 else 347 else
354 ca = __tcp_ca_find_autoload(name); 348 ca = tcp_ca_find_autoload(sock_net(sk), name);
349
355 /* No change asking for existing value */ 350 /* No change asking for existing value */
356 if (ca == icsk->icsk_ca_ops) { 351 if (ca == icsk->icsk_ca_ops) {
357 icsk->icsk_ca_setsockopt = 1; 352 icsk->icsk_ca_setsockopt = 1;
358 goto out; 353 goto out;
359 } 354 }
355
360 if (!ca) { 356 if (!ca) {
361 err = -ENOENT; 357 err = -ENOENT;
362 } else if (!load) { 358 } else if (!load) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1eac84b8044e..c6bc0c4d19c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2430,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
2430{ 2430{
2431 int cpu; 2431 int cpu;
2432 2432
2433 module_put(net->ipv4.tcp_congestion_control->owner);
2434
2433 for_each_possible_cpu(cpu) 2435 for_each_possible_cpu(cpu)
2434 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); 2436 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2435 free_percpu(net->ipv4.tcp_sk); 2437 free_percpu(net->ipv4.tcp_sk);
@@ -2522,6 +2524,13 @@ static int __net_init tcp_sk_init(struct net *net)
2522 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; 2524 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2523 atomic_set(&net->ipv4.tfo_active_disable_times, 0); 2525 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
2524 2526
2527 /* Reno is always built in */
2528 if (!net_eq(net, &init_net) &&
2529 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2530 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2531 else
2532 net->ipv4.tcp_congestion_control = &tcp_reno;
2533
2525 return 0; 2534 return 0;
2526fail: 2535fail:
2527 tcp_sk_exit(net); 2536 tcp_sk_exit(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 70d9659fc1e9..05eb7bc36156 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2378,6 +2378,7 @@ out:
2378static int ip6_convert_metrics(struct mx6_config *mxc, 2378static int ip6_convert_metrics(struct mx6_config *mxc,
2379 const struct fib6_config *cfg) 2379 const struct fib6_config *cfg)
2380{ 2380{
2381 struct net *net = cfg->fc_nlinfo.nl_net;
2381 bool ecn_ca = false; 2382 bool ecn_ca = false;
2382 struct nlattr *nla; 2383 struct nlattr *nla;
2383 int remaining; 2384 int remaining;
@@ -2403,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
2403 char tmp[TCP_CA_NAME_MAX]; 2404 char tmp[TCP_CA_NAME_MAX];
2404 2405
2405 nla_strlcpy(tmp, nla, sizeof(tmp)); 2406 nla_strlcpy(tmp, nla, sizeof(tmp));
2406 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 2407 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
2407 if (val == TCP_CA_UNSPEC) 2408 if (val == TCP_CA_UNSPEC)
2408 goto err; 2409 goto err;
2409 } else { 2410 } else {