diff options
-rw-r--r-- | include/net/inet_connection_sock.h | 3 | ||||
-rw-r--r-- | include/net/tcp.h | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 97 |
3 files changed, 91 insertions, 18 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 848e85cb5c61..5976bdecf58b 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h | |||
@@ -98,7 +98,8 @@ struct inet_connection_sock { | |||
98 | const struct tcp_congestion_ops *icsk_ca_ops; | 98 | const struct tcp_congestion_ops *icsk_ca_ops; |
99 | const struct inet_connection_sock_af_ops *icsk_af_ops; | 99 | const struct inet_connection_sock_af_ops *icsk_af_ops; |
100 | unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); | 100 | unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); |
101 | __u8 icsk_ca_state; | 101 | __u8 icsk_ca_state:7, |
102 | icsk_ca_dst_locked:1; | ||
102 | __u8 icsk_retransmits; | 103 | __u8 icsk_retransmits; |
103 | __u8 icsk_pending; | 104 | __u8 icsk_pending; |
104 | __u8 icsk_backoff; | 105 | __u8 icsk_backoff; |
diff --git a/include/net/tcp.h b/include/net/tcp.h index f50f29faf76f..135b70c9a734 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -787,6 +787,8 @@ enum tcp_ca_ack_event_flags { | |||
787 | #define TCP_CA_MAX 128 | 787 | #define TCP_CA_MAX 128 |
788 | #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) | 788 | #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) |
789 | 789 | ||
790 | #define TCP_CA_UNSPEC 0 | ||
791 | |||
790 | /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ | 792 | /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ |
791 | #define TCP_CONG_NON_RESTRICTED 0x1 | 793 | #define TCP_CONG_NON_RESTRICTED 0x1 |
792 | /* Requires ECN/ECT set on all packets */ | 794 | /* Requires ECN/ECT set on all packets */ |
@@ -794,7 +796,8 @@ enum tcp_ca_ack_event_flags { | |||
794 | 796 | ||
795 | struct tcp_congestion_ops { | 797 | struct tcp_congestion_ops { |
796 | struct list_head list; | 798 | struct list_head list; |
797 | unsigned long flags; | 799 | u32 key; |
800 | u32 flags; | ||
798 | 801 | ||
799 | /* initialize private data (optional) */ | 802 | /* initialize private data (optional) */ |
800 | void (*init)(struct sock *sk); | 803 | void (*init)(struct sock *sk); |
@@ -841,6 +844,10 @@ u32 tcp_reno_ssthresh(struct sock *sk); | |||
841 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); | 844 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); |
842 | extern struct tcp_congestion_ops tcp_reno; | 845 | extern struct tcp_congestion_ops tcp_reno; |
843 | 846 | ||
847 | struct tcp_congestion_ops *tcp_ca_find_key(u32 key); | ||
848 | u32 tcp_ca_get_key_by_name(const char *name); | ||
849 | char *tcp_ca_get_name_by_key(u32 key, char *buffer); | ||
850 | |||
844 | static inline bool tcp_ca_needs_ecn(const struct sock *sk) | 851 | static inline bool tcp_ca_needs_ecn(const struct sock *sk) |
845 | { | 852 | { |
846 | const struct inet_connection_sock *icsk = inet_csk(sk); | 853 | const struct inet_connection_sock *icsk = inet_csk(sk); |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 38f2f8aa4ceb..63c29dba68a8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/types.h> | 13 | #include <linux/types.h> |
14 | #include <linux/list.h> | 14 | #include <linux/list.h> |
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/jhash.h> | ||
16 | #include <net/tcp.h> | 17 | #include <net/tcp.h> |
17 | 18 | ||
18 | static DEFINE_SPINLOCK(tcp_cong_list_lock); | 19 | static DEFINE_SPINLOCK(tcp_cong_list_lock); |
@@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) | |||
31 | return NULL; | 32 | return NULL; |
32 | } | 33 | } |
33 | 34 | ||
35 | /* Must be called with rcu lock held */ | ||
36 | static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) | ||
37 | { | ||
38 | const struct tcp_congestion_ops *ca = tcp_ca_find(name); | ||
39 | #ifdef CONFIG_MODULES | ||
40 | if (!ca && capable(CAP_NET_ADMIN)) { | ||
41 | rcu_read_unlock(); | ||
42 | request_module("tcp_%s", name); | ||
43 | rcu_read_lock(); | ||
44 | ca = tcp_ca_find(name); | ||
45 | } | ||
46 | #endif | ||
47 | return ca; | ||
48 | } | ||
49 | |||
50 | /* Simple linear search, not much in here. */ | ||
51 | struct tcp_congestion_ops *tcp_ca_find_key(u32 key) | ||
52 | { | ||
53 | struct tcp_congestion_ops *e; | ||
54 | |||
55 | list_for_each_entry_rcu(e, &tcp_cong_list, list) { | ||
56 | if (e->key == key) | ||
57 | return e; | ||
58 | } | ||
59 | |||
60 | return NULL; | ||
61 | } | ||
62 | |||
34 | /* | 63 | /* |
35 | * Attach new congestion control algorithm to the list | 64 | * Attach new congestion control algorithm to the list |
36 | * of available options. | 65 | * of available options. |
@@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) | |||
45 | return -EINVAL; | 74 | return -EINVAL; |
46 | } | 75 | } |
47 | 76 | ||
77 | ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); | ||
78 | |||
48 | spin_lock(&tcp_cong_list_lock); | 79 | spin_lock(&tcp_cong_list_lock); |
49 | if (tcp_ca_find(ca->name)) { | 80 | if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { |
50 | pr_notice("%s already registered\n", ca->name); | 81 | pr_notice("%s already registered or non-unique key\n", |
82 | ca->name); | ||
51 | ret = -EEXIST; | 83 | ret = -EEXIST; |
52 | } else { | 84 | } else { |
53 | list_add_tail_rcu(&ca->list, &tcp_cong_list); | 85 | list_add_tail_rcu(&ca->list, &tcp_cong_list); |
@@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) | |||
70 | spin_lock(&tcp_cong_list_lock); | 102 | spin_lock(&tcp_cong_list_lock); |
71 | list_del_rcu(&ca->list); | 103 | list_del_rcu(&ca->list); |
72 | spin_unlock(&tcp_cong_list_lock); | 104 | spin_unlock(&tcp_cong_list_lock); |
105 | |||
106 | /* Wait for outstanding readers to complete before the | ||
107 | * module gets removed entirely. | ||
108 | * | ||
109 | * A try_module_get() should fail by now as our module is | ||
110 | * in "going" state since no refs are held anymore and | ||
111 | * module_exit() handler being called. | ||
112 | */ | ||
113 | synchronize_rcu(); | ||
73 | } | 114 | } |
74 | EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); | 115 | EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); |
75 | 116 | ||
117 | u32 tcp_ca_get_key_by_name(const char *name) | ||
118 | { | ||
119 | const struct tcp_congestion_ops *ca; | ||
120 | u32 key; | ||
121 | |||
122 | might_sleep(); | ||
123 | |||
124 | rcu_read_lock(); | ||
125 | ca = __tcp_ca_find_autoload(name); | ||
126 | key = ca ? ca->key : TCP_CA_UNSPEC; | ||
127 | rcu_read_unlock(); | ||
128 | |||
129 | return key; | ||
130 | } | ||
131 | EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name); | ||
132 | |||
133 | char *tcp_ca_get_name_by_key(u32 key, char *buffer) | ||
134 | { | ||
135 | const struct tcp_congestion_ops *ca; | ||
136 | char *ret = NULL; | ||
137 | |||
138 | rcu_read_lock(); | ||
139 | ca = tcp_ca_find_key(key); | ||
140 | if (ca) | ||
141 | ret = strncpy(buffer, ca->name, | ||
142 | TCP_CA_NAME_MAX); | ||
143 | rcu_read_unlock(); | ||
144 | |||
145 | return ret; | ||
146 | } | ||
147 | EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); | ||
148 | |||
76 | /* Assign choice of congestion control. */ | 149 | /* Assign choice of congestion control. */ |
77 | void tcp_assign_congestion_control(struct sock *sk) | 150 | void tcp_assign_congestion_control(struct sock *sk) |
78 | { | 151 | { |
@@ -253,25 +326,17 @@ out: | |||
253 | int tcp_set_congestion_control(struct sock *sk, const char *name) | 326 | int tcp_set_congestion_control(struct sock *sk, const char *name) |
254 | { | 327 | { |
255 | struct inet_connection_sock *icsk = inet_csk(sk); | 328 | struct inet_connection_sock *icsk = inet_csk(sk); |
256 | struct tcp_congestion_ops *ca; | 329 | const struct tcp_congestion_ops *ca; |
257 | int err = 0; | 330 | int err = 0; |
258 | 331 | ||
259 | rcu_read_lock(); | 332 | if (icsk->icsk_ca_dst_locked) |
260 | ca = tcp_ca_find(name); | 333 | return -EPERM; |
261 | 334 | ||
262 | /* no change asking for existing value */ | 335 | rcu_read_lock(); |
336 | ca = __tcp_ca_find_autoload(name); | ||
337 | /* No change asking for existing value */ | ||
263 | if (ca == icsk->icsk_ca_ops) | 338 | if (ca == icsk->icsk_ca_ops) |
264 | goto out; | 339 | goto out; |
265 | |||
266 | #ifdef CONFIG_MODULES | ||
267 | /* not found attempt to autoload module */ | ||
268 | if (!ca && capable(CAP_NET_ADMIN)) { | ||
269 | rcu_read_unlock(); | ||
270 | request_module("tcp_%s", name); | ||
271 | rcu_read_lock(); | ||
272 | ca = tcp_ca_find(name); | ||
273 | } | ||
274 | #endif | ||
275 | if (!ca) | 340 | if (!ca) |
276 | err = -ENOENT; | 341 | err = -ENOENT; |
277 | else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || | 342 | else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || |