diff options
author | Stephen Hemminger <shemminger@osdl.org> | 2005-06-23 23:37:36 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-06-23 23:37:36 -0400 |
commit | 5f8ef48d240963093451bcf83df89f1a1364f51d (patch) | |
tree | cecb30c2f59778f7f509a84b3aa7ea097c3f2b27 | |
parent | 51b0bdedb8e784d0d969a6b77151911130812400 (diff) |
[TCP]: Allow choosing TCP congestion control via sockopt.
Allow using setsockopt to set TCP congestion control to use on a per
socket basis.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/tcp.h | 1 | ||||
-rw-r--r-- | include/net/tcp.h | 3 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 |
6 files changed, 79 insertions, 6 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3ea75dd6640a..dfd93d03f5d2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -127,6 +127,7 @@ enum { | |||
127 | #define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ | 127 | #define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ |
128 | #define TCP_INFO 11 /* Information about this connection. */ | 128 | #define TCP_INFO 11 /* Information about this connection. */ |
129 | #define TCP_QUICKACK 12 /* Block/reenable quick acks */ | 129 | #define TCP_QUICKACK 12 /* Block/reenable quick acks */ |
130 | #define TCP_CONGESTION 13 /* Congestion control algorithm */ | ||
130 | 131 | ||
131 | #define TCPI_OPT_TIMESTAMPS 1 | 132 | #define TCPI_OPT_TIMESTAMPS 1 |
132 | #define TCPI_OPT_SACK 2 | 133 | #define TCPI_OPT_SACK 2 |
diff --git a/include/net/tcp.h b/include/net/tcp.h index e427cf35915c..d04b21188ccb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -1162,8 +1162,9 @@ extern void tcp_init_congestion_control(struct tcp_sock *tp); | |||
1162 | extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); | 1162 | extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); |
1163 | extern int tcp_set_default_congestion_control(const char *name); | 1163 | extern int tcp_set_default_congestion_control(const char *name); |
1164 | extern void tcp_get_default_congestion_control(char *name); | 1164 | extern void tcp_get_default_congestion_control(char *name); |
1165 | extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); | ||
1165 | 1166 | ||
1166 | extern struct tcp_congestion_ops tcp_reno; | 1167 | extern struct tcp_congestion_ops tcp_init_congestion_ops; |
1167 | extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); | 1168 | extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); |
1168 | extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, | 1169 | extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, |
1169 | u32 rtt, u32 in_flight, int flag); | 1170 | u32 rtt, u32 in_flight, int flag); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f3dbc8dc1263..882436da9a3a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1927,6 +1927,25 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
1927 | return tp->af_specific->setsockopt(sk, level, optname, | 1927 | return tp->af_specific->setsockopt(sk, level, optname, |
1928 | optval, optlen); | 1928 | optval, optlen); |
1929 | 1929 | ||
1930 | /* This is a string value all the others are int's */ | ||
1931 | if (optname == TCP_CONGESTION) { | ||
1932 | char name[TCP_CA_NAME_MAX]; | ||
1933 | |||
1934 | if (optlen < 1) | ||
1935 | return -EINVAL; | ||
1936 | |||
1937 | val = strncpy_from_user(name, optval, | ||
1938 | min(TCP_CA_NAME_MAX-1, optlen)); | ||
1939 | if (val < 0) | ||
1940 | return -EFAULT; | ||
1941 | name[val] = 0; | ||
1942 | |||
1943 | lock_sock(sk); | ||
1944 | err = tcp_set_congestion_control(tp, name); | ||
1945 | release_sock(sk); | ||
1946 | return err; | ||
1947 | } | ||
1948 | |||
1930 | if (optlen < sizeof(int)) | 1949 | if (optlen < sizeof(int)) |
1931 | return -EINVAL; | 1950 | return -EINVAL; |
1932 | 1951 | ||
@@ -2211,6 +2230,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2211 | case TCP_QUICKACK: | 2230 | case TCP_QUICKACK: |
2212 | val = !tp->ack.pingpong; | 2231 | val = !tp->ack.pingpong; |
2213 | break; | 2232 | break; |
2233 | |||
2234 | case TCP_CONGESTION: | ||
2235 | if (get_user(len, optlen)) | ||
2236 | return -EFAULT; | ||
2237 | len = min_t(unsigned int, len, TCP_CA_NAME_MAX); | ||
2238 | if (put_user(len, optlen)) | ||
2239 | return -EFAULT; | ||
2240 | if (copy_to_user(optval, tp->ca_ops->name, len)) | ||
2241 | return -EFAULT; | ||
2242 | return 0; | ||
2214 | default: | 2243 | default: |
2215 | return -ENOPROTOOPT; | 2244 | return -ENOPROTOOPT; |
2216 | }; | 2245 | }; |
@@ -2224,7 +2253,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2224 | 2253 | ||
2225 | 2254 | ||
2226 | extern void __skb_cb_too_small_for_tcp(int, int); | 2255 | extern void __skb_cb_too_small_for_tcp(int, int); |
2227 | extern void tcpdiag_init(void); | 2256 | extern struct tcp_congestion_ops tcp_reno; |
2228 | 2257 | ||
2229 | static __initdata unsigned long thash_entries; | 2258 | static __initdata unsigned long thash_entries; |
2230 | static int __init set_thash_entries(char *str) | 2259 | static int __init set_thash_entries(char *str) |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 665394a63ae4..4970d10a7785 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -21,7 +21,7 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) | |||
21 | { | 21 | { |
22 | struct tcp_congestion_ops *e; | 22 | struct tcp_congestion_ops *e; |
23 | 23 | ||
24 | list_for_each_entry(e, &tcp_cong_list, list) { | 24 | list_for_each_entry_rcu(e, &tcp_cong_list, list) { |
25 | if (strcmp(e->name, name) == 0) | 25 | if (strcmp(e->name, name) == 0) |
26 | return e; | 26 | return e; |
27 | } | 27 | } |
@@ -77,6 +77,9 @@ void tcp_init_congestion_control(struct tcp_sock *tp) | |||
77 | { | 77 | { |
78 | struct tcp_congestion_ops *ca; | 78 | struct tcp_congestion_ops *ca; |
79 | 79 | ||
80 | if (tp->ca_ops != &tcp_init_congestion_ops) | ||
81 | return; | ||
82 | |||
80 | rcu_read_lock(); | 83 | rcu_read_lock(); |
81 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { | 84 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { |
82 | if (try_module_get(ca->owner)) { | 85 | if (try_module_get(ca->owner)) { |
@@ -139,6 +142,34 @@ void tcp_get_default_congestion_control(char *name) | |||
139 | rcu_read_unlock(); | 142 | rcu_read_unlock(); |
140 | } | 143 | } |
141 | 144 | ||
145 | /* Change congestion control for socket */ | ||
146 | int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) | ||
147 | { | ||
148 | struct tcp_congestion_ops *ca; | ||
149 | int err = 0; | ||
150 | |||
151 | rcu_read_lock(); | ||
152 | ca = tcp_ca_find(name); | ||
153 | if (ca == tp->ca_ops) | ||
154 | goto out; | ||
155 | |||
156 | if (!ca) | ||
157 | err = -ENOENT; | ||
158 | |||
159 | else if (!try_module_get(ca->owner)) | ||
160 | err = -EBUSY; | ||
161 | |||
162 | else { | ||
163 | tcp_cleanup_congestion_control(tp); | ||
164 | tp->ca_ops = ca; | ||
165 | if (tp->ca_ops->init) | ||
166 | tp->ca_ops->init(tp); | ||
167 | } | ||
168 | out: | ||
169 | rcu_read_unlock(); | ||
170 | return err; | ||
171 | } | ||
172 | |||
142 | /* | 173 | /* |
143 | * TCP Reno congestion control | 174 | * TCP Reno congestion control |
144 | * This is special case used for fallback as well. | 175 | * This is special case used for fallback as well. |
@@ -192,4 +223,15 @@ struct tcp_congestion_ops tcp_reno = { | |||
192 | .min_cwnd = tcp_reno_min_cwnd, | 223 | .min_cwnd = tcp_reno_min_cwnd, |
193 | }; | 224 | }; |
194 | 225 | ||
195 | EXPORT_SYMBOL_GPL(tcp_reno); | 226 | /* Initial congestion control used (until SYN) |
227 | * really reno under another name so we can tell difference | ||
228 | * during tcp_set_default_congestion_control | ||
229 | */ | ||
230 | struct tcp_congestion_ops tcp_init_congestion_ops = { | ||
231 | .name = "", | ||
232 | .owner = THIS_MODULE, | ||
233 | .ssthresh = tcp_reno_ssthresh, | ||
234 | .cong_avoid = tcp_reno_cong_avoid, | ||
235 | .min_cwnd = tcp_reno_min_cwnd, | ||
236 | }; | ||
237 | EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9122814c13ad..ebf112347a97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -2048,7 +2048,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
2048 | tp->mss_cache_std = tp->mss_cache = 536; | 2048 | tp->mss_cache_std = tp->mss_cache = 536; |
2049 | 2049 | ||
2050 | tp->reordering = sysctl_tcp_reordering; | 2050 | tp->reordering = sysctl_tcp_reordering; |
2051 | tp->ca_ops = &tcp_reno; | 2051 | tp->ca_ops = &tcp_init_congestion_ops; |
2052 | 2052 | ||
2053 | sk->sk_state = TCP_CLOSE; | 2053 | sk->sk_state = TCP_CLOSE; |
2054 | 2054 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fce56039b0e9..9dac7fdf4726 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -2025,7 +2025,7 @@ static int tcp_v6_init_sock(struct sock *sk) | |||
2025 | sk->sk_state = TCP_CLOSE; | 2025 | sk->sk_state = TCP_CLOSE; |
2026 | 2026 | ||
2027 | tp->af_specific = &ipv6_specific; | 2027 | tp->af_specific = &ipv6_specific; |
2028 | tp->ca_ops = &tcp_reno; | 2028 | tp->ca_ops = &tcp_init_congestion_ops; |
2029 | sk->sk_write_space = sk_stream_write_space; | 2029 | sk->sk_write_space = sk_stream_write_space; |
2030 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | 2030 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); |
2031 | 2031 | ||