diff options
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 6 | ||||
-rw-r--r-- | include/linux/sysctl.h | 1 | ||||
-rw-r--r-- | include/net/tcp.h | 3 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 52 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 63 |
5 files changed, 125 insertions, 0 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index db4280856588..bbcc8deda172 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -351,6 +351,12 @@ tcp_frto - BOOLEAN | |||
351 | where packet loss is typically due to random radio interference | 351 | where packet loss is typically due to random radio interference |
352 | rather than intermediate router congestion. | 352 | rather than intermediate router congestion. |
353 | 353 | ||
354 | tcp_allowed_congestion_control - STRING | ||
355 | Show/set the congestion control choices available to non-privileged | ||
356 | processes. The list is a subset of those listed in | ||
357 | tcp_available_congestion_control. | ||
358 | Default is "reno" and the default setting (tcp_congestion_control). | ||
359 | |||
354 | tcp_available_congestion_control - STRING | 360 | tcp_available_congestion_control - STRING |
355 | Shows the available congestion control choices that are registered. | 361 | Shows the available congestion control choices that are registered. |
356 | More congestion control algorithms may be available as modules, | 362 | More congestion control algorithms may be available as modules, |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 28a48279654d..0725441621d0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -427,6 +427,7 @@ enum | |||
427 | NET_CIPSOV4_RBM_OPTFMT=120, | 427 | NET_CIPSOV4_RBM_OPTFMT=120, |
428 | NET_CIPSOV4_RBM_STRICTVALID=121, | 428 | NET_CIPSOV4_RBM_STRICTVALID=121, |
429 | NET_TCP_AVAIL_CONG_CONTROL=122, | 429 | NET_TCP_AVAIL_CONG_CONTROL=122, |
430 | NET_TCP_ALLOWED_CONG_CONTROL=123, | ||
430 | }; | 431 | }; |
431 | 432 | ||
432 | enum { | 433 | enum { |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 6af4baf5b769..e1a5d29d0a1f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -625,6 +625,7 @@ enum tcp_ca_event { | |||
625 | 625 | ||
626 | struct tcp_congestion_ops { | 626 | struct tcp_congestion_ops { |
627 | struct list_head list; | 627 | struct list_head list; |
628 | int non_restricted; | ||
628 | 629 | ||
629 | /* initialize private data (optional) */ | 630 | /* initialize private data (optional) */ |
630 | void (*init)(struct sock *sk); | 631 | void (*init)(struct sock *sk); |
@@ -663,6 +664,8 @@ extern void tcp_cleanup_congestion_control(struct sock *sk); | |||
663 | extern int tcp_set_default_congestion_control(const char *name); | 664 | extern int tcp_set_default_congestion_control(const char *name); |
664 | extern void tcp_get_default_congestion_control(char *name); | 665 | extern void tcp_get_default_congestion_control(char *name); |
665 | extern void tcp_get_available_congestion_control(char *buf, size_t len); | 666 | extern void tcp_get_available_congestion_control(char *buf, size_t len); |
667 | extern void tcp_get_allowed_congestion_control(char *buf, size_t len); | ||
668 | extern int tcp_set_allowed_congestion_control(char *allowed); | ||
666 | extern int tcp_set_congestion_control(struct sock *sk, const char *name); | 669 | extern int tcp_set_congestion_control(struct sock *sk, const char *name); |
667 | extern void tcp_slow_start(struct tcp_sock *tp); | 670 | extern void tcp_slow_start(struct tcp_sock *tp); |
668 | 671 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 2e770f45d829..dfcf47f10f88 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -146,6 +146,50 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, | |||
146 | return ret; | 146 | return ret; |
147 | } | 147 | } |
148 | 148 | ||
149 | static int proc_allowed_congestion_control(ctl_table *ctl, | ||
150 | int write, struct file * filp, | ||
151 | void __user *buffer, size_t *lenp, | ||
152 | loff_t *ppos) | ||
153 | { | ||
154 | ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; | ||
155 | int ret; | ||
156 | |||
157 | tbl.data = kmalloc(tbl.maxlen, GFP_USER); | ||
158 | if (!tbl.data) | ||
159 | return -ENOMEM; | ||
160 | |||
161 | tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); | ||
162 | ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); | ||
163 | if (write && ret == 0) | ||
164 | ret = tcp_set_allowed_congestion_control(tbl.data); | ||
165 | kfree(tbl.data); | ||
166 | return ret; | ||
167 | } | ||
168 | |||
169 | static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, | ||
170 | int nlen, void __user *oldval, | ||
171 | size_t __user *oldlenp, | ||
172 | void __user *newval, size_t newlen, | ||
173 | void **context) | ||
174 | { | ||
175 | ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; | ||
176 | int ret; | ||
177 | |||
178 | tbl.data = kmalloc(tbl.maxlen, GFP_USER); | ||
179 | if (!tbl.data) | ||
180 | return -ENOMEM; | ||
181 | |||
182 | tcp_get_available_congestion_control(tbl.data, tbl.maxlen); | ||
183 | ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen, | ||
184 | context); | ||
185 | if (ret == 0 && newval && newlen) | ||
186 | ret = tcp_set_allowed_congestion_control(tbl.data); | ||
187 | kfree(tbl.data); | ||
188 | |||
189 | return ret; | ||
190 | |||
191 | } | ||
192 | |||
149 | ctl_table ipv4_table[] = { | 193 | ctl_table ipv4_table[] = { |
150 | { | 194 | { |
151 | .ctl_name = NET_IPV4_TCP_TIMESTAMPS, | 195 | .ctl_name = NET_IPV4_TCP_TIMESTAMPS, |
@@ -755,6 +799,14 @@ ctl_table ipv4_table[] = { | |||
755 | .mode = 0444, | 799 | .mode = 0444, |
756 | .proc_handler = &proc_tcp_available_congestion_control, | 800 | .proc_handler = &proc_tcp_available_congestion_control, |
757 | }, | 801 | }, |
802 | { | ||
803 | .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, | ||
804 | .procname = "tcp_allowed_congestion_control", | ||
805 | .maxlen = TCP_CA_BUF_MAX, | ||
806 | .mode = 0644, | ||
807 | .proc_handler = &proc_allowed_congestion_control, | ||
808 | .strategy = &strategy_allowed_congestion_control, | ||
809 | }, | ||
758 | { .ctl_name = 0 } | 810 | { .ctl_name = 0 } |
759 | }; | 811 | }; |
760 | 812 | ||
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index d846d7b95e1f..343d6197c92e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -123,6 +123,7 @@ int tcp_set_default_congestion_control(const char *name) | |||
123 | #endif | 123 | #endif |
124 | 124 | ||
125 | if (ca) { | 125 | if (ca) { |
126 | ca->non_restricted = 1; /* default is always allowed */ | ||
126 | list_move(&ca->list, &tcp_cong_list); | 127 | list_move(&ca->list, &tcp_cong_list); |
127 | ret = 0; | 128 | ret = 0; |
128 | } | 129 | } |
@@ -168,6 +169,64 @@ void tcp_get_default_congestion_control(char *name) | |||
168 | rcu_read_unlock(); | 169 | rcu_read_unlock(); |
169 | } | 170 | } |
170 | 171 | ||
172 | /* Built list of non-restricted congestion control values */ | ||
173 | void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) | ||
174 | { | ||
175 | struct tcp_congestion_ops *ca; | ||
176 | size_t offs = 0; | ||
177 | |||
178 | *buf = '\0'; | ||
179 | rcu_read_lock(); | ||
180 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { | ||
181 | if (!ca->non_restricted) | ||
182 | continue; | ||
183 | offs += snprintf(buf + offs, maxlen - offs, | ||
184 | "%s%s", | ||
185 | offs == 0 ? "" : " ", ca->name); | ||
186 | |||
187 | } | ||
188 | rcu_read_unlock(); | ||
189 | } | ||
190 | |||
191 | /* Change list of non-restricted congestion control */ | ||
192 | int tcp_set_allowed_congestion_control(char *val) | ||
193 | { | ||
194 | struct tcp_congestion_ops *ca; | ||
195 | char *clone, *name; | ||
196 | int ret = 0; | ||
197 | |||
198 | clone = kstrdup(val, GFP_USER); | ||
199 | if (!clone) | ||
200 | return -ENOMEM; | ||
201 | |||
202 | spin_lock(&tcp_cong_list_lock); | ||
203 | /* pass 1 check for bad entries */ | ||
204 | while ((name = strsep(&clone, " ")) && *name) { | ||
205 | ca = tcp_ca_find(name); | ||
206 | if (!ca) { | ||
207 | ret = -ENOENT; | ||
208 | goto out; | ||
209 | } | ||
210 | } | ||
211 | |||
212 | /* pass 2 clear */ | ||
213 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) | ||
214 | ca->non_restricted = 0; | ||
215 | |||
216 | /* pass 3 mark as allowed */ | ||
217 | while ((name = strsep(&val, " ")) && *name) { | ||
218 | ca = tcp_ca_find(name); | ||
219 | WARN_ON(!ca); | ||
220 | if (ca) | ||
221 | ca->non_restricted = 1; | ||
222 | } | ||
223 | out: | ||
224 | spin_unlock(&tcp_cong_list_lock); | ||
225 | |||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | |||
171 | /* Change congestion control for socket */ | 230 | /* Change congestion control for socket */ |
172 | int tcp_set_congestion_control(struct sock *sk, const char *name) | 231 | int tcp_set_congestion_control(struct sock *sk, const char *name) |
173 | { | 232 | { |
@@ -183,6 +242,9 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
183 | if (!ca) | 242 | if (!ca) |
184 | err = -ENOENT; | 243 | err = -ENOENT; |
185 | 244 | ||
245 | else if (!(ca->non_restricted || capable(CAP_NET_ADMIN))) | ||
246 | err = -EPERM; | ||
247 | |||
186 | else if (!try_module_get(ca->owner)) | 248 | else if (!try_module_get(ca->owner)) |
187 | err = -EBUSY; | 249 | err = -EBUSY; |
188 | 250 | ||
@@ -284,6 +346,7 @@ EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); | |||
284 | 346 | ||
285 | struct tcp_congestion_ops tcp_reno = { | 347 | struct tcp_congestion_ops tcp_reno = { |
286 | .name = "reno", | 348 | .name = "reno", |
349 | .non_restricted = 1, | ||
287 | .owner = THIS_MODULE, | 350 | .owner = THIS_MODULE, |
288 | .ssthresh = tcp_reno_ssthresh, | 351 | .ssthresh = tcp_reno_ssthresh, |
289 | .cong_avoid = tcp_reno_cong_avoid, | 352 | .cong_avoid = tcp_reno_cong_avoid, |