aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2005-06-23 23:37:36 -0400
committerDavid S. Miller <davem@davemloft.net>2005-06-23 23:37:36 -0400
commit5f8ef48d240963093451bcf83df89f1a1364f51d (patch)
treececb30c2f59778f7f509a84b3aa7ea097c3f2b27
parent51b0bdedb8e784d0d969a6b77151911130812400 (diff)
[TCP]: Allow choosing TCP congestion control via sockopt.
Allow using setsockopt to set TCP congestion control to use on a per socket basis. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/ipv4/tcp.c31
-rw-r--r--net/ipv4/tcp_cong.c46
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/tcp_ipv6.c2
6 files changed, 79 insertions, 6 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3ea75dd6640a..dfd93d03f5d2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -127,6 +127,7 @@ enum {
127#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ 127#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */
128#define TCP_INFO 11 /* Information about this connection. */ 128#define TCP_INFO 11 /* Information about this connection. */
129#define TCP_QUICKACK 12 /* Block/reenable quick acks */ 129#define TCP_QUICKACK 12 /* Block/reenable quick acks */
130#define TCP_CONGESTION 13 /* Congestion control algorithm */
130 131
131#define TCPI_OPT_TIMESTAMPS 1 132#define TCPI_OPT_TIMESTAMPS 1
132#define TCPI_OPT_SACK 2 133#define TCPI_OPT_SACK 2
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e427cf35915c..d04b21188ccb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1162,8 +1162,9 @@ extern void tcp_init_congestion_control(struct tcp_sock *tp);
1162extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); 1162extern void tcp_cleanup_congestion_control(struct tcp_sock *tp);
1163extern int tcp_set_default_congestion_control(const char *name); 1163extern int tcp_set_default_congestion_control(const char *name);
1164extern void tcp_get_default_congestion_control(char *name); 1164extern void tcp_get_default_congestion_control(char *name);
1165extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name);
1165 1166
1166extern struct tcp_congestion_ops tcp_reno; 1167extern struct tcp_congestion_ops tcp_init_congestion_ops;
1167extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); 1168extern u32 tcp_reno_ssthresh(struct tcp_sock *tp);
1168extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, 1169extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack,
1169 u32 rtt, u32 in_flight, int flag); 1170 u32 rtt, u32 in_flight, int flag);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f3dbc8dc1263..882436da9a3a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1927,6 +1927,25 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
1927 return tp->af_specific->setsockopt(sk, level, optname, 1927 return tp->af_specific->setsockopt(sk, level, optname,
1928 optval, optlen); 1928 optval, optlen);
1929 1929
1930 /* This is a string value all the others are int's */
1931 if (optname == TCP_CONGESTION) {
1932 char name[TCP_CA_NAME_MAX];
1933
1934 if (optlen < 1)
1935 return -EINVAL;
1936
1937 val = strncpy_from_user(name, optval,
1938 min(TCP_CA_NAME_MAX-1, optlen));
1939 if (val < 0)
1940 return -EFAULT;
1941 name[val] = 0;
1942
1943 lock_sock(sk);
1944 err = tcp_set_congestion_control(tp, name);
1945 release_sock(sk);
1946 return err;
1947 }
1948
1930 if (optlen < sizeof(int)) 1949 if (optlen < sizeof(int))
1931 return -EINVAL; 1950 return -EINVAL;
1932 1951
@@ -2211,6 +2230,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2211 case TCP_QUICKACK: 2230 case TCP_QUICKACK:
2212 val = !tp->ack.pingpong; 2231 val = !tp->ack.pingpong;
2213 break; 2232 break;
2233
2234 case TCP_CONGESTION:
2235 if (get_user(len, optlen))
2236 return -EFAULT;
2237 len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
2238 if (put_user(len, optlen))
2239 return -EFAULT;
2240 if (copy_to_user(optval, tp->ca_ops->name, len))
2241 return -EFAULT;
2242 return 0;
2214 default: 2243 default:
2215 return -ENOPROTOOPT; 2244 return -ENOPROTOOPT;
2216 }; 2245 };
@@ -2224,7 +2253,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2224 2253
2225 2254
2226extern void __skb_cb_too_small_for_tcp(int, int); 2255extern void __skb_cb_too_small_for_tcp(int, int);
2227extern void tcpdiag_init(void); 2256extern struct tcp_congestion_ops tcp_reno;
2228 2257
2229static __initdata unsigned long thash_entries; 2258static __initdata unsigned long thash_entries;
2230static int __init set_thash_entries(char *str) 2259static int __init set_thash_entries(char *str)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 665394a63ae4..4970d10a7785 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -21,7 +21,7 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
21{ 21{
22 struct tcp_congestion_ops *e; 22 struct tcp_congestion_ops *e;
23 23
24 list_for_each_entry(e, &tcp_cong_list, list) { 24 list_for_each_entry_rcu(e, &tcp_cong_list, list) {
25 if (strcmp(e->name, name) == 0) 25 if (strcmp(e->name, name) == 0)
26 return e; 26 return e;
27 } 27 }
@@ -77,6 +77,9 @@ void tcp_init_congestion_control(struct tcp_sock *tp)
77{ 77{
78 struct tcp_congestion_ops *ca; 78 struct tcp_congestion_ops *ca;
79 79
80 if (tp->ca_ops != &tcp_init_congestion_ops)
81 return;
82
80 rcu_read_lock(); 83 rcu_read_lock();
81 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 84 list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
82 if (try_module_get(ca->owner)) { 85 if (try_module_get(ca->owner)) {
@@ -139,6 +142,34 @@ void tcp_get_default_congestion_control(char *name)
139 rcu_read_unlock(); 142 rcu_read_unlock();
140} 143}
141 144
145/* Change congestion control for socket */
146int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
147{
148 struct tcp_congestion_ops *ca;
149 int err = 0;
150
151 rcu_read_lock();
152 ca = tcp_ca_find(name);
153 if (ca == tp->ca_ops)
154 goto out;
155
156 if (!ca)
157 err = -ENOENT;
158
159 else if (!try_module_get(ca->owner))
160 err = -EBUSY;
161
162 else {
163 tcp_cleanup_congestion_control(tp);
164 tp->ca_ops = ca;
165 if (tp->ca_ops->init)
166 tp->ca_ops->init(tp);
167 }
168 out:
169 rcu_read_unlock();
170 return err;
171}
172
142/* 173/*
143 * TCP Reno congestion control 174 * TCP Reno congestion control
144 * This is special case used for fallback as well. 175 * This is special case used for fallback as well.
@@ -192,4 +223,15 @@ struct tcp_congestion_ops tcp_reno = {
192 .min_cwnd = tcp_reno_min_cwnd, 223 .min_cwnd = tcp_reno_min_cwnd,
193}; 224};
194 225
195EXPORT_SYMBOL_GPL(tcp_reno); 226/* Initial congestion control used (until SYN)
227 * really reno under another name so we can tell difference
228 * during tcp_set_default_congestion_control
229 */
230struct tcp_congestion_ops tcp_init_congestion_ops = {
231 .name = "",
232 .owner = THIS_MODULE,
233 .ssthresh = tcp_reno_ssthresh,
234 .cong_avoid = tcp_reno_cong_avoid,
235 .min_cwnd = tcp_reno_min_cwnd,
236};
237EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9122814c13ad..ebf112347a97 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2048,7 +2048,7 @@ static int tcp_v4_init_sock(struct sock *sk)
2048 tp->mss_cache_std = tp->mss_cache = 536; 2048 tp->mss_cache_std = tp->mss_cache = 536;
2049 2049
2050 tp->reordering = sysctl_tcp_reordering; 2050 tp->reordering = sysctl_tcp_reordering;
2051 tp->ca_ops = &tcp_reno; 2051 tp->ca_ops = &tcp_init_congestion_ops;
2052 2052
2053 sk->sk_state = TCP_CLOSE; 2053 sk->sk_state = TCP_CLOSE;
2054 2054
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fce56039b0e9..9dac7fdf4726 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2025,7 +2025,7 @@ static int tcp_v6_init_sock(struct sock *sk)
2025 sk->sk_state = TCP_CLOSE; 2025 sk->sk_state = TCP_CLOSE;
2026 2026
2027 tp->af_specific = &ipv6_specific; 2027 tp->af_specific = &ipv6_specific;
2028 tp->ca_ops = &tcp_reno; 2028 tp->ca_ops = &tcp_init_congestion_ops;
2029 sk->sk_write_space = sk_stream_write_space; 2029 sk->sk_write_space = sk_stream_write_space;
2030 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 2030 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2031 2031