diff options
| author | Stephen Hemminger <shemminger@osdl.org> | 2005-06-23 23:37:36 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2005-06-23 23:37:36 -0400 |
| commit | 5f8ef48d240963093451bcf83df89f1a1364f51d (patch) | |
| tree | cecb30c2f59778f7f509a84b3aa7ea097c3f2b27 /net/ipv4 | |
| parent | 51b0bdedb8e784d0d969a6b77151911130812400 (diff) | |
[TCP]: Allow choosing TCP congestion control via sockopt.
Allow using setsockopt to set TCP congestion control to use on a per
socket basis.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/tcp.c | 31 | ||||
| -rw-r--r-- | net/ipv4/tcp_cong.c | 46 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 |
3 files changed, 75 insertions, 4 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f3dbc8dc1263..882436da9a3a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -1927,6 +1927,25 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 1927 | return tp->af_specific->setsockopt(sk, level, optname, | 1927 | return tp->af_specific->setsockopt(sk, level, optname, |
| 1928 | optval, optlen); | 1928 | optval, optlen); |
| 1929 | 1929 | ||
| 1930 | /* This is a string value all the others are int's */ | ||
| 1931 | if (optname == TCP_CONGESTION) { | ||
| 1932 | char name[TCP_CA_NAME_MAX]; | ||
| 1933 | |||
| 1934 | if (optlen < 1) | ||
| 1935 | return -EINVAL; | ||
| 1936 | |||
| 1937 | val = strncpy_from_user(name, optval, | ||
| 1938 | min(TCP_CA_NAME_MAX-1, optlen)); | ||
| 1939 | if (val < 0) | ||
| 1940 | return -EFAULT; | ||
| 1941 | name[val] = 0; | ||
| 1942 | |||
| 1943 | lock_sock(sk); | ||
| 1944 | err = tcp_set_congestion_control(tp, name); | ||
| 1945 | release_sock(sk); | ||
| 1946 | return err; | ||
| 1947 | } | ||
| 1948 | |||
| 1930 | if (optlen < sizeof(int)) | 1949 | if (optlen < sizeof(int)) |
| 1931 | return -EINVAL; | 1950 | return -EINVAL; |
| 1932 | 1951 | ||
| @@ -2211,6 +2230,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 2211 | case TCP_QUICKACK: | 2230 | case TCP_QUICKACK: |
| 2212 | val = !tp->ack.pingpong; | 2231 | val = !tp->ack.pingpong; |
| 2213 | break; | 2232 | break; |
| 2233 | |||
| 2234 | case TCP_CONGESTION: | ||
| 2235 | if (get_user(len, optlen)) | ||
| 2236 | return -EFAULT; | ||
| 2237 | len = min_t(unsigned int, len, TCP_CA_NAME_MAX); | ||
| 2238 | if (put_user(len, optlen)) | ||
| 2239 | return -EFAULT; | ||
| 2240 | if (copy_to_user(optval, tp->ca_ops->name, len)) | ||
| 2241 | return -EFAULT; | ||
| 2242 | return 0; | ||
| 2214 | default: | 2243 | default: |
| 2215 | return -ENOPROTOOPT; | 2244 | return -ENOPROTOOPT; |
| 2216 | }; | 2245 | }; |
| @@ -2224,7 +2253,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 2224 | 2253 | ||
| 2225 | 2254 | ||
| 2226 | extern void __skb_cb_too_small_for_tcp(int, int); | 2255 | extern void __skb_cb_too_small_for_tcp(int, int); |
| 2227 | extern void tcpdiag_init(void); | 2256 | extern struct tcp_congestion_ops tcp_reno; |
| 2228 | 2257 | ||
| 2229 | static __initdata unsigned long thash_entries; | 2258 | static __initdata unsigned long thash_entries; |
| 2230 | static int __init set_thash_entries(char *str) | 2259 | static int __init set_thash_entries(char *str) |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 665394a63ae4..4970d10a7785 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -21,7 +21,7 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) | |||
| 21 | { | 21 | { |
| 22 | struct tcp_congestion_ops *e; | 22 | struct tcp_congestion_ops *e; |
| 23 | 23 | ||
| 24 | list_for_each_entry(e, &tcp_cong_list, list) { | 24 | list_for_each_entry_rcu(e, &tcp_cong_list, list) { |
| 25 | if (strcmp(e->name, name) == 0) | 25 | if (strcmp(e->name, name) == 0) |
| 26 | return e; | 26 | return e; |
| 27 | } | 27 | } |
| @@ -77,6 +77,9 @@ void tcp_init_congestion_control(struct tcp_sock *tp) | |||
| 77 | { | 77 | { |
| 78 | struct tcp_congestion_ops *ca; | 78 | struct tcp_congestion_ops *ca; |
| 79 | 79 | ||
| 80 | if (tp->ca_ops != &tcp_init_congestion_ops) | ||
| 81 | return; | ||
| 82 | |||
| 80 | rcu_read_lock(); | 83 | rcu_read_lock(); |
| 81 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { | 84 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { |
| 82 | if (try_module_get(ca->owner)) { | 85 | if (try_module_get(ca->owner)) { |
| @@ -139,6 +142,34 @@ void tcp_get_default_congestion_control(char *name) | |||
| 139 | rcu_read_unlock(); | 142 | rcu_read_unlock(); |
| 140 | } | 143 | } |
| 141 | 144 | ||
| 145 | /* Change congestion control for socket */ | ||
| 146 | int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) | ||
| 147 | { | ||
| 148 | struct tcp_congestion_ops *ca; | ||
| 149 | int err = 0; | ||
| 150 | |||
| 151 | rcu_read_lock(); | ||
| 152 | ca = tcp_ca_find(name); | ||
| 153 | if (ca == tp->ca_ops) | ||
| 154 | goto out; | ||
| 155 | |||
| 156 | if (!ca) | ||
| 157 | err = -ENOENT; | ||
| 158 | |||
| 159 | else if (!try_module_get(ca->owner)) | ||
| 160 | err = -EBUSY; | ||
| 161 | |||
| 162 | else { | ||
| 163 | tcp_cleanup_congestion_control(tp); | ||
| 164 | tp->ca_ops = ca; | ||
| 165 | if (tp->ca_ops->init) | ||
| 166 | tp->ca_ops->init(tp); | ||
| 167 | } | ||
| 168 | out: | ||
| 169 | rcu_read_unlock(); | ||
| 170 | return err; | ||
| 171 | } | ||
| 172 | |||
| 142 | /* | 173 | /* |
| 143 | * TCP Reno congestion control | 174 | * TCP Reno congestion control |
| 144 | * This is special case used for fallback as well. | 175 | * This is special case used for fallback as well. |
| @@ -192,4 +223,15 @@ struct tcp_congestion_ops tcp_reno = { | |||
| 192 | .min_cwnd = tcp_reno_min_cwnd, | 223 | .min_cwnd = tcp_reno_min_cwnd, |
| 193 | }; | 224 | }; |
| 194 | 225 | ||
| 195 | EXPORT_SYMBOL_GPL(tcp_reno); | 226 | /* Initial congestion control used (until SYN) |
| 227 | * really reno under another name so we can tell difference | ||
| 228 | * during tcp_set_default_congestion_control | ||
| 229 | */ | ||
| 230 | struct tcp_congestion_ops tcp_init_congestion_ops = { | ||
| 231 | .name = "", | ||
| 232 | .owner = THIS_MODULE, | ||
| 233 | .ssthresh = tcp_reno_ssthresh, | ||
| 234 | .cong_avoid = tcp_reno_cong_avoid, | ||
| 235 | .min_cwnd = tcp_reno_min_cwnd, | ||
| 236 | }; | ||
| 237 | EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9122814c13ad..ebf112347a97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -2048,7 +2048,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
| 2048 | tp->mss_cache_std = tp->mss_cache = 536; | 2048 | tp->mss_cache_std = tp->mss_cache = 536; |
| 2049 | 2049 | ||
| 2050 | tp->reordering = sysctl_tcp_reordering; | 2050 | tp->reordering = sysctl_tcp_reordering; |
| 2051 | tp->ca_ops = &tcp_reno; | 2051 | tp->ca_ops = &tcp_init_congestion_ops; |
| 2052 | 2052 | ||
| 2053 | sk->sk_state = TCP_CLOSE; | 2053 | sk->sk_state = TCP_CLOSE; |
| 2054 | 2054 | ||
