aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/tcp.h9
-rw-r--r--net/ipv4/tcp_cong.c97
3 files changed, 91 insertions, 18 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 848e85cb5c61..5976bdecf58b 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -98,7 +98,8 @@ struct inet_connection_sock {
98 const struct tcp_congestion_ops *icsk_ca_ops; 98 const struct tcp_congestion_ops *icsk_ca_ops;
99 const struct inet_connection_sock_af_ops *icsk_af_ops; 99 const struct inet_connection_sock_af_ops *icsk_af_ops;
100 unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); 100 unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
101 __u8 icsk_ca_state; 101 __u8 icsk_ca_state:7,
102 icsk_ca_dst_locked:1;
102 __u8 icsk_retransmits; 103 __u8 icsk_retransmits;
103 __u8 icsk_pending; 104 __u8 icsk_pending;
104 __u8 icsk_backoff; 105 __u8 icsk_backoff;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f50f29faf76f..135b70c9a734 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -787,6 +787,8 @@ enum tcp_ca_ack_event_flags {
787#define TCP_CA_MAX 128 787#define TCP_CA_MAX 128
788#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) 788#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
789 789
790#define TCP_CA_UNSPEC 0
791
790/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ 792/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
791#define TCP_CONG_NON_RESTRICTED 0x1 793#define TCP_CONG_NON_RESTRICTED 0x1
792/* Requires ECN/ECT set on all packets */ 794/* Requires ECN/ECT set on all packets */
@@ -794,7 +796,8 @@ enum tcp_ca_ack_event_flags {
794 796
795struct tcp_congestion_ops { 797struct tcp_congestion_ops {
796 struct list_head list; 798 struct list_head list;
797 unsigned long flags; 799 u32 key;
800 u32 flags;
798 801
799 /* initialize private data (optional) */ 802 /* initialize private data (optional) */
800 void (*init)(struct sock *sk); 803 void (*init)(struct sock *sk);
@@ -841,6 +844,10 @@ u32 tcp_reno_ssthresh(struct sock *sk);
841void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); 844void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
842extern struct tcp_congestion_ops tcp_reno; 845extern struct tcp_congestion_ops tcp_reno;
843 846
847struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
848u32 tcp_ca_get_key_by_name(const char *name);
849char *tcp_ca_get_name_by_key(u32 key, char *buffer);
850
844static inline bool tcp_ca_needs_ecn(const struct sock *sk) 851static inline bool tcp_ca_needs_ecn(const struct sock *sk)
845{ 852{
846 const struct inet_connection_sock *icsk = inet_csk(sk); 853 const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 38f2f8aa4ceb..63c29dba68a8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -13,6 +13,7 @@
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/list.h> 14#include <linux/list.h>
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/jhash.h>
16#include <net/tcp.h> 17#include <net/tcp.h>
17 18
18static DEFINE_SPINLOCK(tcp_cong_list_lock); 19static DEFINE_SPINLOCK(tcp_cong_list_lock);
@@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
31 return NULL; 32 return NULL;
32} 33}
33 34
35/* Must be called with rcu lock held */
36static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
37{
38 const struct tcp_congestion_ops *ca = tcp_ca_find(name);
39#ifdef CONFIG_MODULES
40 if (!ca && capable(CAP_NET_ADMIN)) {
41 rcu_read_unlock();
42 request_module("tcp_%s", name);
43 rcu_read_lock();
44 ca = tcp_ca_find(name);
45 }
46#endif
47 return ca;
48}
49
50/* Simple linear search, not much in here. */
51struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
52{
53 struct tcp_congestion_ops *e;
54
55 list_for_each_entry_rcu(e, &tcp_cong_list, list) {
56 if (e->key == key)
57 return e;
58 }
59
60 return NULL;
61}
62
34/* 63/*
35 * Attach new congestion control algorithm to the list 64 * Attach new congestion control algorithm to the list
36 * of available options. 65 * of available options.
@@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
45 return -EINVAL; 74 return -EINVAL;
46 } 75 }
47 76
77 ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
78
48 spin_lock(&tcp_cong_list_lock); 79 spin_lock(&tcp_cong_list_lock);
49 if (tcp_ca_find(ca->name)) { 80 if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) {
50 pr_notice("%s already registered\n", ca->name); 81 pr_notice("%s already registered or non-unique key\n",
82 ca->name);
51 ret = -EEXIST; 83 ret = -EEXIST;
52 } else { 84 } else {
53 list_add_tail_rcu(&ca->list, &tcp_cong_list); 85 list_add_tail_rcu(&ca->list, &tcp_cong_list);
@@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
70 spin_lock(&tcp_cong_list_lock); 102 spin_lock(&tcp_cong_list_lock);
71 list_del_rcu(&ca->list); 103 list_del_rcu(&ca->list);
72 spin_unlock(&tcp_cong_list_lock); 104 spin_unlock(&tcp_cong_list_lock);
105
106 /* Wait for outstanding readers to complete before the
107 * module gets removed entirely.
108 *
109 * A try_module_get() should fail by now as our module is
110 * in "going" state since no refs are held anymore and
111 * module_exit() handler being called.
112 */
113 synchronize_rcu();
73} 114}
74EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 115EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
75 116
117u32 tcp_ca_get_key_by_name(const char *name)
118{
119 const struct tcp_congestion_ops *ca;
120 u32 key;
121
122 might_sleep();
123
124 rcu_read_lock();
125 ca = __tcp_ca_find_autoload(name);
126 key = ca ? ca->key : TCP_CA_UNSPEC;
127 rcu_read_unlock();
128
129 return key;
130}
131EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name);
132
133char *tcp_ca_get_name_by_key(u32 key, char *buffer)
134{
135 const struct tcp_congestion_ops *ca;
136 char *ret = NULL;
137
138 rcu_read_lock();
139 ca = tcp_ca_find_key(key);
140 if (ca)
141 ret = strncpy(buffer, ca->name,
142 TCP_CA_NAME_MAX);
143 rcu_read_unlock();
144
145 return ret;
146}
147EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
148
76/* Assign choice of congestion control. */ 149/* Assign choice of congestion control. */
77void tcp_assign_congestion_control(struct sock *sk) 150void tcp_assign_congestion_control(struct sock *sk)
78{ 151{
@@ -253,25 +326,17 @@ out:
253int tcp_set_congestion_control(struct sock *sk, const char *name) 326int tcp_set_congestion_control(struct sock *sk, const char *name)
254{ 327{
255 struct inet_connection_sock *icsk = inet_csk(sk); 328 struct inet_connection_sock *icsk = inet_csk(sk);
256 struct tcp_congestion_ops *ca; 329 const struct tcp_congestion_ops *ca;
257 int err = 0; 330 int err = 0;
258 331
259 rcu_read_lock(); 332 if (icsk->icsk_ca_dst_locked)
260 ca = tcp_ca_find(name); 333 return -EPERM;
261 334
262 /* no change asking for existing value */ 335 rcu_read_lock();
336 ca = __tcp_ca_find_autoload(name);
337 /* No change asking for existing value */
263 if (ca == icsk->icsk_ca_ops) 338 if (ca == icsk->icsk_ca_ops)
264 goto out; 339 goto out;
265
266#ifdef CONFIG_MODULES
267 /* not found attempt to autoload module */
268 if (!ca && capable(CAP_NET_ADMIN)) {
269 rcu_read_unlock();
270 request_module("tcp_%s", name);
271 rcu_read_lock();
272 ca = tcp_ca_find(name);
273 }
274#endif
275 if (!ca) 340 if (!ca)
276 err = -ENOENT; 341 err = -ENOENT;
277 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || 342 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||