diff options
-rw-r--r-- | include/net/netns/ipv4.h | 1 | ||||
-rw-r--r-- | include/net/tcp.h | 6 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 76 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 9 | ||||
-rw-r--r-- | net/ipv6/route.c | 3 |
7 files changed, 64 insertions, 54 deletions
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 5e12975fc658..44668c29701a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -160,6 +160,7 @@ struct netns_ipv4 { | |||
160 | struct inet_timewait_death_row tcp_death_row; | 160 | struct inet_timewait_death_row tcp_death_row; |
161 | int sysctl_max_syn_backlog; | 161 | int sysctl_max_syn_backlog; |
162 | int sysctl_tcp_fastopen; | 162 | int sysctl_tcp_fastopen; |
163 | const struct tcp_congestion_ops __rcu *tcp_congestion_control; | ||
163 | struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; | 164 | struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; |
164 | spinlock_t tcp_fastopen_ctx_lock; | 165 | spinlock_t tcp_fastopen_ctx_lock; |
165 | unsigned int sysctl_tcp_fastopen_blackhole_timeout; | 166 | unsigned int sysctl_tcp_fastopen_blackhole_timeout; |
diff --git a/include/net/tcp.h b/include/net/tcp.h index ed71511e67a6..35cc7d0d3d47 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -1002,8 +1002,8 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); | |||
1002 | void tcp_assign_congestion_control(struct sock *sk); | 1002 | void tcp_assign_congestion_control(struct sock *sk); |
1003 | void tcp_init_congestion_control(struct sock *sk); | 1003 | void tcp_init_congestion_control(struct sock *sk); |
1004 | void tcp_cleanup_congestion_control(struct sock *sk); | 1004 | void tcp_cleanup_congestion_control(struct sock *sk); |
1005 | int tcp_set_default_congestion_control(const char *name); | 1005 | int tcp_set_default_congestion_control(struct net *net, const char *name); |
1006 | void tcp_get_default_congestion_control(char *name); | 1006 | void tcp_get_default_congestion_control(struct net *net, char *name); |
1007 | void tcp_get_available_congestion_control(char *buf, size_t len); | 1007 | void tcp_get_available_congestion_control(char *buf, size_t len); |
1008 | void tcp_get_allowed_congestion_control(char *buf, size_t len); | 1008 | void tcp_get_allowed_congestion_control(char *buf, size_t len); |
1009 | int tcp_set_allowed_congestion_control(char *allowed); | 1009 | int tcp_set_allowed_congestion_control(char *allowed); |
@@ -1017,7 +1017,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); | |||
1017 | extern struct tcp_congestion_ops tcp_reno; | 1017 | extern struct tcp_congestion_ops tcp_reno; |
1018 | 1018 | ||
1019 | struct tcp_congestion_ops *tcp_ca_find_key(u32 key); | 1019 | struct tcp_congestion_ops *tcp_ca_find_key(u32 key); |
1020 | u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); | 1020 | u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); |
1021 | #ifdef CONFIG_INET | 1021 | #ifdef CONFIG_INET |
1022 | char *tcp_ca_get_name_by_key(u32 key, char *buffer); | 1022 | char *tcp_ca_get_name_by_key(u32 key, char *buffer); |
1023 | #else | 1023 | #else |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 589caaa90613..f04d944f8abe 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -710,7 +710,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) | |||
710 | bool ecn_ca = false; | 710 | bool ecn_ca = false; |
711 | 711 | ||
712 | nla_strlcpy(tmp, nla, sizeof(tmp)); | 712 | nla_strlcpy(tmp, nla, sizeof(tmp)); |
713 | val = tcp_ca_get_key_by_name(tmp, &ecn_ca); | 713 | val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); |
714 | } else { | 714 | } else { |
715 | val = nla_get_u32(nla); | 715 | val = nla_get_u32(nla); |
716 | } | 716 | } |
@@ -1030,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) | |||
1030 | char tmp[TCP_CA_NAME_MAX]; | 1030 | char tmp[TCP_CA_NAME_MAX]; |
1031 | 1031 | ||
1032 | nla_strlcpy(tmp, nla, sizeof(tmp)); | 1032 | nla_strlcpy(tmp, nla, sizeof(tmp)); |
1033 | val = tcp_ca_get_key_by_name(tmp, &ecn_ca); | 1033 | val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); |
1034 | if (val == TCP_CA_UNSPEC) | 1034 | if (val == TCP_CA_UNSPEC) |
1035 | return -EINVAL; | 1035 | return -EINVAL; |
1036 | } else { | 1036 | } else { |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ef0ff3357a44..93e172118a94 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -201,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, | |||
201 | static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, | 201 | static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, |
202 | void __user *buffer, size_t *lenp, loff_t *ppos) | 202 | void __user *buffer, size_t *lenp, loff_t *ppos) |
203 | { | 203 | { |
204 | struct net *net = container_of(ctl->data, struct net, | ||
205 | ipv4.tcp_congestion_control); | ||
204 | char val[TCP_CA_NAME_MAX]; | 206 | char val[TCP_CA_NAME_MAX]; |
205 | struct ctl_table tbl = { | 207 | struct ctl_table tbl = { |
206 | .data = val, | 208 | .data = val, |
@@ -208,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, | |||
208 | }; | 210 | }; |
209 | int ret; | 211 | int ret; |
210 | 212 | ||
211 | tcp_get_default_congestion_control(val); | 213 | tcp_get_default_congestion_control(net, val); |
212 | 214 | ||
213 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); | 215 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); |
214 | if (write && ret == 0) | 216 | if (write && ret == 0) |
215 | ret = tcp_set_default_congestion_control(val); | 217 | ret = tcp_set_default_congestion_control(net, val); |
216 | return ret; | 218 | return ret; |
217 | } | 219 | } |
218 | 220 | ||
@@ -447,12 +449,6 @@ static struct ctl_table ipv4_table[] = { | |||
447 | .mode = 0644, | 449 | .mode = 0644, |
448 | .proc_handler = proc_dointvec | 450 | .proc_handler = proc_dointvec |
449 | }, | 451 | }, |
450 | { | ||
451 | .procname = "tcp_congestion_control", | ||
452 | .mode = 0644, | ||
453 | .maxlen = TCP_CA_NAME_MAX, | ||
454 | .proc_handler = proc_tcp_congestion_control, | ||
455 | }, | ||
456 | #ifdef CONFIG_NETLABEL | 452 | #ifdef CONFIG_NETLABEL |
457 | { | 453 | { |
458 | .procname = "cipso_cache_enable", | 454 | .procname = "cipso_cache_enable", |
@@ -764,6 +760,13 @@ static struct ctl_table ipv4_net_table[] = { | |||
764 | }, | 760 | }, |
765 | #endif | 761 | #endif |
766 | { | 762 | { |
763 | .procname = "tcp_congestion_control", | ||
764 | .data = &init_net.ipv4.tcp_congestion_control, | ||
765 | .mode = 0644, | ||
766 | .maxlen = TCP_CA_NAME_MAX, | ||
767 | .proc_handler = proc_tcp_congestion_control, | ||
768 | }, | ||
769 | { | ||
767 | .procname = "tcp_keepalive_time", | 770 | .procname = "tcp_keepalive_time", |
768 | .data = &init_net.ipv4.sysctl_tcp_keepalive_time, | 771 | .data = &init_net.ipv4.sysctl_tcp_keepalive_time, |
769 | .maxlen = sizeof(int), | 772 | .maxlen = sizeof(int), |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 2f26124fd160..bc6c02f16243 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) | |||
33 | } | 33 | } |
34 | 34 | ||
35 | /* Must be called with rcu lock held */ | 35 | /* Must be called with rcu lock held */ |
36 | static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) | 36 | static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, |
37 | const char *name) | ||
37 | { | 38 | { |
38 | const struct tcp_congestion_ops *ca = tcp_ca_find(name); | 39 | struct tcp_congestion_ops *ca = tcp_ca_find(name); |
40 | |||
39 | #ifdef CONFIG_MODULES | 41 | #ifdef CONFIG_MODULES |
40 | if (!ca && capable(CAP_NET_ADMIN)) { | 42 | if (!ca && capable(CAP_NET_ADMIN)) { |
41 | rcu_read_unlock(); | 43 | rcu_read_unlock(); |
@@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) | |||
115 | } | 117 | } |
116 | EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); | 118 | EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); |
117 | 119 | ||
118 | u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) | 120 | u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) |
119 | { | 121 | { |
120 | const struct tcp_congestion_ops *ca; | 122 | const struct tcp_congestion_ops *ca; |
121 | u32 key = TCP_CA_UNSPEC; | 123 | u32 key = TCP_CA_UNSPEC; |
@@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) | |||
123 | might_sleep(); | 125 | might_sleep(); |
124 | 126 | ||
125 | rcu_read_lock(); | 127 | rcu_read_lock(); |
126 | ca = __tcp_ca_find_autoload(name); | 128 | ca = tcp_ca_find_autoload(net, name); |
127 | if (ca) { | 129 | if (ca) { |
128 | key = ca->key; | 130 | key = ca->key; |
129 | *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; | 131 | *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; |
@@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); | |||
153 | /* Assign choice of congestion control. */ | 155 | /* Assign choice of congestion control. */ |
154 | void tcp_assign_congestion_control(struct sock *sk) | 156 | void tcp_assign_congestion_control(struct sock *sk) |
155 | { | 157 | { |
158 | struct net *net = sock_net(sk); | ||
156 | struct inet_connection_sock *icsk = inet_csk(sk); | 159 | struct inet_connection_sock *icsk = inet_csk(sk); |
157 | struct tcp_congestion_ops *ca; | 160 | const struct tcp_congestion_ops *ca; |
158 | 161 | ||
159 | rcu_read_lock(); | 162 | rcu_read_lock(); |
160 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { | 163 | ca = rcu_dereference(net->ipv4.tcp_congestion_control); |
161 | if (likely(try_module_get(ca->owner))) { | 164 | if (unlikely(!try_module_get(ca->owner))) |
162 | icsk->icsk_ca_ops = ca; | 165 | ca = &tcp_reno; |
163 | goto out; | 166 | icsk->icsk_ca_ops = ca; |
164 | } | ||
165 | /* Fallback to next available. The last really | ||
166 | * guaranteed fallback is Reno from this list. | ||
167 | */ | ||
168 | } | ||
169 | out: | ||
170 | rcu_read_unlock(); | 167 | rcu_read_unlock(); |
171 | memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); | ||
172 | 168 | ||
169 | memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); | ||
173 | if (ca->flags & TCP_CONG_NEEDS_ECN) | 170 | if (ca->flags & TCP_CONG_NEEDS_ECN) |
174 | INET_ECN_xmit(sk); | 171 | INET_ECN_xmit(sk); |
175 | else | 172 | else |
@@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk) | |||
214 | } | 211 | } |
215 | 212 | ||
216 | /* Used by sysctl to change default congestion control */ | 213 | /* Used by sysctl to change default congestion control */ |
217 | int tcp_set_default_congestion_control(const char *name) | 214 | int tcp_set_default_congestion_control(struct net *net, const char *name) |
218 | { | 215 | { |
219 | struct tcp_congestion_ops *ca; | 216 | struct tcp_congestion_ops *ca; |
220 | int ret = -ENOENT; | 217 | const struct tcp_congestion_ops *prev; |
221 | 218 | int ret; | |
222 | spin_lock(&tcp_cong_list_lock); | ||
223 | ca = tcp_ca_find(name); | ||
224 | #ifdef CONFIG_MODULES | ||
225 | if (!ca && capable(CAP_NET_ADMIN)) { | ||
226 | spin_unlock(&tcp_cong_list_lock); | ||
227 | 219 | ||
228 | request_module("tcp_%s", name); | 220 | rcu_read_lock(); |
229 | spin_lock(&tcp_cong_list_lock); | 221 | ca = tcp_ca_find_autoload(net, name); |
230 | ca = tcp_ca_find(name); | 222 | if (!ca) { |
231 | } | 223 | ret = -ENOENT; |
232 | #endif | 224 | } else if (!try_module_get(ca->owner)) { |
225 | ret = -EBUSY; | ||
226 | } else { | ||
227 | prev = xchg(&net->ipv4.tcp_congestion_control, ca); | ||
228 | if (prev) | ||
229 | module_put(prev->owner); | ||
233 | 230 | ||
234 | if (ca) { | 231 | ca->flags |= TCP_CONG_NON_RESTRICTED; |
235 | ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ | ||
236 | list_move(&ca->list, &tcp_cong_list); | ||
237 | ret = 0; | 232 | ret = 0; |
238 | } | 233 | } |
239 | spin_unlock(&tcp_cong_list_lock); | 234 | rcu_read_unlock(); |
240 | 235 | ||
241 | return ret; | 236 | return ret; |
242 | } | 237 | } |
@@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name) | |||
244 | /* Set default value from kernel configuration at bootup */ | 239 | /* Set default value from kernel configuration at bootup */ |
245 | static int __init tcp_congestion_default(void) | 240 | static int __init tcp_congestion_default(void) |
246 | { | 241 | { |
247 | return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); | 242 | return tcp_set_default_congestion_control(&init_net, |
243 | CONFIG_DEFAULT_TCP_CONG); | ||
248 | } | 244 | } |
249 | late_initcall(tcp_congestion_default); | 245 | late_initcall(tcp_congestion_default); |
250 | 246 | ||
@@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen) | |||
264 | } | 260 | } |
265 | 261 | ||
266 | /* Get current default congestion control */ | 262 | /* Get current default congestion control */ |
267 | void tcp_get_default_congestion_control(char *name) | 263 | void tcp_get_default_congestion_control(struct net *net, char *name) |
268 | { | 264 | { |
269 | struct tcp_congestion_ops *ca; | 265 | const struct tcp_congestion_ops *ca; |
270 | /* We will always have reno... */ | ||
271 | BUG_ON(list_empty(&tcp_cong_list)); | ||
272 | 266 | ||
273 | rcu_read_lock(); | 267 | rcu_read_lock(); |
274 | ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); | 268 | ca = rcu_dereference(net->ipv4.tcp_congestion_control); |
275 | strncpy(name, ca->name, TCP_CA_NAME_MAX); | 269 | strncpy(name, ca->name, TCP_CA_NAME_MAX); |
276 | rcu_read_unlock(); | 270 | rcu_read_unlock(); |
277 | } | 271 | } |
@@ -351,12 +345,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo | |||
351 | if (!load) | 345 | if (!load) |
352 | ca = tcp_ca_find(name); | 346 | ca = tcp_ca_find(name); |
353 | else | 347 | else |
354 | ca = __tcp_ca_find_autoload(name); | 348 | ca = tcp_ca_find_autoload(sock_net(sk), name); |
349 | |||
355 | /* No change asking for existing value */ | 350 | /* No change asking for existing value */ |
356 | if (ca == icsk->icsk_ca_ops) { | 351 | if (ca == icsk->icsk_ca_ops) { |
357 | icsk->icsk_ca_setsockopt = 1; | 352 | icsk->icsk_ca_setsockopt = 1; |
358 | goto out; | 353 | goto out; |
359 | } | 354 | } |
355 | |||
360 | if (!ca) { | 356 | if (!ca) { |
361 | err = -ENOENT; | 357 | err = -ENOENT; |
362 | } else if (!load) { | 358 | } else if (!load) { |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1eac84b8044e..c6bc0c4d19c6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -2430,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net) | |||
2430 | { | 2430 | { |
2431 | int cpu; | 2431 | int cpu; |
2432 | 2432 | ||
2433 | module_put(net->ipv4.tcp_congestion_control->owner); | ||
2434 | |||
2433 | for_each_possible_cpu(cpu) | 2435 | for_each_possible_cpu(cpu) |
2434 | inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); | 2436 | inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); |
2435 | free_percpu(net->ipv4.tcp_sk); | 2437 | free_percpu(net->ipv4.tcp_sk); |
@@ -2522,6 +2524,13 @@ static int __net_init tcp_sk_init(struct net *net) | |||
2522 | net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; | 2524 | net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; |
2523 | atomic_set(&net->ipv4.tfo_active_disable_times, 0); | 2525 | atomic_set(&net->ipv4.tfo_active_disable_times, 0); |
2524 | 2526 | ||
2527 | /* Reno is always built in */ | ||
2528 | if (!net_eq(net, &init_net) && | ||
2529 | try_module_get(init_net.ipv4.tcp_congestion_control->owner)) | ||
2530 | net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control; | ||
2531 | else | ||
2532 | net->ipv4.tcp_congestion_control = &tcp_reno; | ||
2533 | |||
2525 | return 0; | 2534 | return 0; |
2526 | fail: | 2535 | fail: |
2527 | tcp_sk_exit(net); | 2536 | tcp_sk_exit(net); |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 70d9659fc1e9..05eb7bc36156 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -2378,6 +2378,7 @@ out: | |||
2378 | static int ip6_convert_metrics(struct mx6_config *mxc, | 2378 | static int ip6_convert_metrics(struct mx6_config *mxc, |
2379 | const struct fib6_config *cfg) | 2379 | const struct fib6_config *cfg) |
2380 | { | 2380 | { |
2381 | struct net *net = cfg->fc_nlinfo.nl_net; | ||
2381 | bool ecn_ca = false; | 2382 | bool ecn_ca = false; |
2382 | struct nlattr *nla; | 2383 | struct nlattr *nla; |
2383 | int remaining; | 2384 | int remaining; |
@@ -2403,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, | |||
2403 | char tmp[TCP_CA_NAME_MAX]; | 2404 | char tmp[TCP_CA_NAME_MAX]; |
2404 | 2405 | ||
2405 | nla_strlcpy(tmp, nla, sizeof(tmp)); | 2406 | nla_strlcpy(tmp, nla, sizeof(tmp)); |
2406 | val = tcp_ca_get_key_by_name(tmp, &ecn_ca); | 2407 | val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); |
2407 | if (val == TCP_CA_UNSPEC) | 2408 | if (val == TCP_CA_UNSPEC) |
2408 | goto err; | 2409 | goto err; |
2409 | } else { | 2410 | } else { |