aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2006-11-09 19:35:15 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-12-03 00:21:49 -0500
commitce7bc3bf15cbf5dc5a5587ccb6b04c5b4dde4336 (patch)
tree20ccf7f98ac5d1aabbc706fa876e8f361219db97
parent3ff825b28d3345ef381eceae22bf9d92231f23dc (diff)
[TCP]: Restrict congestion control choices.
Allow normal users to only choose among a restricted set of congestion control choices. The default is reno and what ever has been configured as default. But the policy can be changed by administrator at any time. For example, to allow any choice: cp /proc/sys/net/ipv4/tcp_available_congestion_control \ /proc/sys/net/ipv4/tcp_allowed_congestion_control Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt6
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c52
-rw-r--r--net/ipv4/tcp_cong.c63
5 files changed, 125 insertions, 0 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index db4280856588..bbcc8deda172 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -351,6 +351,12 @@ tcp_frto - BOOLEAN
351 where packet loss is typically due to random radio interference 351 where packet loss is typically due to random radio interference
352 rather than intermediate router congestion. 352 rather than intermediate router congestion.
353 353
354tcp_allowed_congestion_control - STRING
355 Show/set the congestion control choices available to non-privileged
356 processes. The list is a subset of those listed in
357 tcp_available_congestion_control.
358 Default is "reno" and the default setting (tcp_congestion_control).
359
354tcp_available_congestion_control - STRING 360tcp_available_congestion_control - STRING
355 Shows the available congestion control choices that are registered. 361 Shows the available congestion control choices that are registered.
356 More congestion control algorithms may be available as modules, 362 More congestion control algorithms may be available as modules,
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 28a48279654d..0725441621d0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -427,6 +427,7 @@ enum
427 NET_CIPSOV4_RBM_OPTFMT=120, 427 NET_CIPSOV4_RBM_OPTFMT=120,
428 NET_CIPSOV4_RBM_STRICTVALID=121, 428 NET_CIPSOV4_RBM_STRICTVALID=121,
429 NET_TCP_AVAIL_CONG_CONTROL=122, 429 NET_TCP_AVAIL_CONG_CONTROL=122,
430 NET_TCP_ALLOWED_CONG_CONTROL=123,
430}; 431};
431 432
432enum { 433enum {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6af4baf5b769..e1a5d29d0a1f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -625,6 +625,7 @@ enum tcp_ca_event {
625 625
626struct tcp_congestion_ops { 626struct tcp_congestion_ops {
627 struct list_head list; 627 struct list_head list;
628 int non_restricted;
628 629
629 /* initialize private data (optional) */ 630 /* initialize private data (optional) */
630 void (*init)(struct sock *sk); 631 void (*init)(struct sock *sk);
@@ -663,6 +664,8 @@ extern void tcp_cleanup_congestion_control(struct sock *sk);
663extern int tcp_set_default_congestion_control(const char *name); 664extern int tcp_set_default_congestion_control(const char *name);
664extern void tcp_get_default_congestion_control(char *name); 665extern void tcp_get_default_congestion_control(char *name);
665extern void tcp_get_available_congestion_control(char *buf, size_t len); 666extern void tcp_get_available_congestion_control(char *buf, size_t len);
667extern void tcp_get_allowed_congestion_control(char *buf, size_t len);
668extern int tcp_set_allowed_congestion_control(char *allowed);
666extern int tcp_set_congestion_control(struct sock *sk, const char *name); 669extern int tcp_set_congestion_control(struct sock *sk, const char *name);
667extern void tcp_slow_start(struct tcp_sock *tp); 670extern void tcp_slow_start(struct tcp_sock *tp);
668 671
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2e770f45d829..dfcf47f10f88 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -146,6 +146,50 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
146 return ret; 146 return ret;
147} 147}
148 148
149static int proc_allowed_congestion_control(ctl_table *ctl,
150 int write, struct file * filp,
151 void __user *buffer, size_t *lenp,
152 loff_t *ppos)
153{
154 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
155 int ret;
156
157 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
158 if (!tbl.data)
159 return -ENOMEM;
160
161 tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
162 ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
163 if (write && ret == 0)
164 ret = tcp_set_allowed_congestion_control(tbl.data);
165 kfree(tbl.data);
166 return ret;
167}
168
169static int strategy_allowed_congestion_control(ctl_table *table, int __user *name,
170 int nlen, void __user *oldval,
171 size_t __user *oldlenp,
172 void __user *newval, size_t newlen,
173 void **context)
174{
175 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
176 int ret;
177
178 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
179 if (!tbl.data)
180 return -ENOMEM;
181
182 tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
183 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen,
184 context);
185 if (ret == 0 && newval && newlen)
186 ret = tcp_set_allowed_congestion_control(tbl.data);
187 kfree(tbl.data);
188
189 return ret;
190
191}
192
149ctl_table ipv4_table[] = { 193ctl_table ipv4_table[] = {
150 { 194 {
151 .ctl_name = NET_IPV4_TCP_TIMESTAMPS, 195 .ctl_name = NET_IPV4_TCP_TIMESTAMPS,
@@ -755,6 +799,14 @@ ctl_table ipv4_table[] = {
755 .mode = 0444, 799 .mode = 0444,
756 .proc_handler = &proc_tcp_available_congestion_control, 800 .proc_handler = &proc_tcp_available_congestion_control,
757 }, 801 },
802 {
803 .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL,
804 .procname = "tcp_allowed_congestion_control",
805 .maxlen = TCP_CA_BUF_MAX,
806 .mode = 0644,
807 .proc_handler = &proc_allowed_congestion_control,
808 .strategy = &strategy_allowed_congestion_control,
809 },
758 { .ctl_name = 0 } 810 { .ctl_name = 0 }
759}; 811};
760 812
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d846d7b95e1f..343d6197c92e 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -123,6 +123,7 @@ int tcp_set_default_congestion_control(const char *name)
123#endif 123#endif
124 124
125 if (ca) { 125 if (ca) {
126 ca->non_restricted = 1; /* default is always allowed */
126 list_move(&ca->list, &tcp_cong_list); 127 list_move(&ca->list, &tcp_cong_list);
127 ret = 0; 128 ret = 0;
128 } 129 }
@@ -168,6 +169,64 @@ void tcp_get_default_congestion_control(char *name)
168 rcu_read_unlock(); 169 rcu_read_unlock();
169} 170}
170 171
172/* Built list of non-restricted congestion control values */
173void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
174{
175 struct tcp_congestion_ops *ca;
176 size_t offs = 0;
177
178 *buf = '\0';
179 rcu_read_lock();
180 list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
181 if (!ca->non_restricted)
182 continue;
183 offs += snprintf(buf + offs, maxlen - offs,
184 "%s%s",
185 offs == 0 ? "" : " ", ca->name);
186
187 }
188 rcu_read_unlock();
189}
190
191/* Change list of non-restricted congestion control */
192int tcp_set_allowed_congestion_control(char *val)
193{
194 struct tcp_congestion_ops *ca;
195 char *clone, *name;
196 int ret = 0;
197
198 clone = kstrdup(val, GFP_USER);
199 if (!clone)
200 return -ENOMEM;
201
202 spin_lock(&tcp_cong_list_lock);
203 /* pass 1 check for bad entries */
204 while ((name = strsep(&clone, " ")) && *name) {
205 ca = tcp_ca_find(name);
206 if (!ca) {
207 ret = -ENOENT;
208 goto out;
209 }
210 }
211
212 /* pass 2 clear */
213 list_for_each_entry_rcu(ca, &tcp_cong_list, list)
214 ca->non_restricted = 0;
215
216 /* pass 3 mark as allowed */
217 while ((name = strsep(&val, " ")) && *name) {
218 ca = tcp_ca_find(name);
219 WARN_ON(!ca);
220 if (ca)
221 ca->non_restricted = 1;
222 }
223out:
224 spin_unlock(&tcp_cong_list_lock);
225
226 return ret;
227}
228
229
171/* Change congestion control for socket */ 230/* Change congestion control for socket */
172int tcp_set_congestion_control(struct sock *sk, const char *name) 231int tcp_set_congestion_control(struct sock *sk, const char *name)
173{ 232{
@@ -183,6 +242,9 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
183 if (!ca) 242 if (!ca)
184 err = -ENOENT; 243 err = -ENOENT;
185 244
245 else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
246 err = -EPERM;
247
186 else if (!try_module_get(ca->owner)) 248 else if (!try_module_get(ca->owner))
187 err = -EBUSY; 249 err = -EBUSY;
188 250
@@ -284,6 +346,7 @@ EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
284 346
285struct tcp_congestion_ops tcp_reno = { 347struct tcp_congestion_ops tcp_reno = {
286 .name = "reno", 348 .name = "reno",
349 .non_restricted = 1,
287 .owner = THIS_MODULE, 350 .owner = THIS_MODULE,
288 .ssthresh = tcp_reno_ssthresh, 351 .ssthresh = tcp_reno_ssthresh,
289 .cong_avoid = tcp_reno_cong_avoid, 352 .cong_avoid = tcp_reno_cong_avoid,