diff options
Diffstat (limited to 'net/core/sock_reuseport.c')
-rw-r--r-- | net/core/sock_reuseport.c | 173 |
1 files changed, 173 insertions, 0 deletions
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c new file mode 100644 index 000000000000..963c8d5f3027 --- /dev/null +++ b/net/core/sock_reuseport.c | |||
@@ -0,0 +1,173 @@ | |||
1 | /* | ||
2 | * To speed up listener socket lookup, create an array to store all sockets | ||
3 | * listening on the same port. This allows a decision to be made after finding | ||
4 | * the first socket. | ||
5 | */ | ||
6 | |||
7 | #include <net/sock_reuseport.h> | ||
8 | #include <linux/rcupdate.h> | ||
9 | |||
10 | #define INIT_SOCKS 128 | ||
11 | |||
12 | static DEFINE_SPINLOCK(reuseport_lock); | ||
13 | |||
14 | static struct sock_reuseport *__reuseport_alloc(u16 max_socks) | ||
15 | { | ||
16 | size_t size = sizeof(struct sock_reuseport) + | ||
17 | sizeof(struct sock *) * max_socks; | ||
18 | struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); | ||
19 | |||
20 | if (!reuse) | ||
21 | return NULL; | ||
22 | |||
23 | reuse->max_socks = max_socks; | ||
24 | |||
25 | return reuse; | ||
26 | } | ||
27 | |||
28 | int reuseport_alloc(struct sock *sk) | ||
29 | { | ||
30 | struct sock_reuseport *reuse; | ||
31 | |||
32 | /* bh lock used since this function call may precede hlist lock in | ||
33 | * soft irq of receive path or setsockopt from process context | ||
34 | */ | ||
35 | spin_lock_bh(&reuseport_lock); | ||
36 | WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, | ||
37 | lockdep_is_held(&reuseport_lock)), | ||
38 | "multiple allocations for the same socket"); | ||
39 | reuse = __reuseport_alloc(INIT_SOCKS); | ||
40 | if (!reuse) { | ||
41 | spin_unlock_bh(&reuseport_lock); | ||
42 | return -ENOMEM; | ||
43 | } | ||
44 | |||
45 | reuse->socks[0] = sk; | ||
46 | reuse->num_socks = 1; | ||
47 | rcu_assign_pointer(sk->sk_reuseport_cb, reuse); | ||
48 | |||
49 | spin_unlock_bh(&reuseport_lock); | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | EXPORT_SYMBOL(reuseport_alloc); | ||
54 | |||
55 | static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) | ||
56 | { | ||
57 | struct sock_reuseport *more_reuse; | ||
58 | u32 more_socks_size, i; | ||
59 | |||
60 | more_socks_size = reuse->max_socks * 2U; | ||
61 | if (more_socks_size > U16_MAX) | ||
62 | return NULL; | ||
63 | |||
64 | more_reuse = __reuseport_alloc(more_socks_size); | ||
65 | if (!more_reuse) | ||
66 | return NULL; | ||
67 | |||
68 | more_reuse->max_socks = more_socks_size; | ||
69 | more_reuse->num_socks = reuse->num_socks; | ||
70 | |||
71 | memcpy(more_reuse->socks, reuse->socks, | ||
72 | reuse->num_socks * sizeof(struct sock *)); | ||
73 | |||
74 | for (i = 0; i < reuse->num_socks; ++i) | ||
75 | rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, | ||
76 | more_reuse); | ||
77 | |||
78 | kfree_rcu(reuse, rcu); | ||
79 | return more_reuse; | ||
80 | } | ||
81 | |||
82 | /** | ||
83 | * reuseport_add_sock - Add a socket to the reuseport group of another. | ||
84 | * @sk: New socket to add to the group. | ||
85 | * @sk2: Socket belonging to the existing reuseport group. | ||
86 | * May return ENOMEM and not add socket to group under memory pressure. | ||
87 | */ | ||
88 | int reuseport_add_sock(struct sock *sk, const struct sock *sk2) | ||
89 | { | ||
90 | struct sock_reuseport *reuse; | ||
91 | |||
92 | spin_lock_bh(&reuseport_lock); | ||
93 | reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, | ||
94 | lockdep_is_held(&reuseport_lock)), | ||
95 | WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, | ||
96 | lockdep_is_held(&reuseport_lock)), | ||
97 | "socket already in reuseport group"); | ||
98 | |||
99 | if (reuse->num_socks == reuse->max_socks) { | ||
100 | reuse = reuseport_grow(reuse); | ||
101 | if (!reuse) { | ||
102 | spin_unlock_bh(&reuseport_lock); | ||
103 | return -ENOMEM; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | reuse->socks[reuse->num_socks] = sk; | ||
108 | /* paired with smp_rmb() in reuseport_select_sock() */ | ||
109 | smp_wmb(); | ||
110 | reuse->num_socks++; | ||
111 | rcu_assign_pointer(sk->sk_reuseport_cb, reuse); | ||
112 | |||
113 | spin_unlock_bh(&reuseport_lock); | ||
114 | |||
115 | return 0; | ||
116 | } | ||
117 | EXPORT_SYMBOL(reuseport_add_sock); | ||
118 | |||
119 | void reuseport_detach_sock(struct sock *sk) | ||
120 | { | ||
121 | struct sock_reuseport *reuse; | ||
122 | int i; | ||
123 | |||
124 | spin_lock_bh(&reuseport_lock); | ||
125 | reuse = rcu_dereference_protected(sk->sk_reuseport_cb, | ||
126 | lockdep_is_held(&reuseport_lock)); | ||
127 | rcu_assign_pointer(sk->sk_reuseport_cb, NULL); | ||
128 | |||
129 | for (i = 0; i < reuse->num_socks; i++) { | ||
130 | if (reuse->socks[i] == sk) { | ||
131 | reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; | ||
132 | reuse->num_socks--; | ||
133 | if (reuse->num_socks == 0) | ||
134 | kfree_rcu(reuse, rcu); | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | spin_unlock_bh(&reuseport_lock); | ||
139 | } | ||
140 | EXPORT_SYMBOL(reuseport_detach_sock); | ||
141 | |||
142 | /** | ||
143 | * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. | ||
144 | * @sk: First socket in the group. | ||
145 | * @hash: Use this hash to select. | ||
146 | * Returns a socket that should receive the packet (or NULL on error). | ||
147 | */ | ||
148 | struct sock *reuseport_select_sock(struct sock *sk, u32 hash) | ||
149 | { | ||
150 | struct sock_reuseport *reuse; | ||
151 | struct sock *sk2 = NULL; | ||
152 | u16 socks; | ||
153 | |||
154 | rcu_read_lock(); | ||
155 | reuse = rcu_dereference(sk->sk_reuseport_cb); | ||
156 | |||
157 | /* if memory allocation failed or add call is not yet complete */ | ||
158 | if (!reuse) | ||
159 | goto out; | ||
160 | |||
161 | socks = READ_ONCE(reuse->num_socks); | ||
162 | if (likely(socks)) { | ||
163 | /* paired with smp_wmb() in reuseport_add_sock() */ | ||
164 | smp_rmb(); | ||
165 | |||
166 | sk2 = reuse->socks[reciprocal_scale(hash, socks)]; | ||
167 | } | ||
168 | |||
169 | out: | ||
170 | rcu_read_unlock(); | ||
171 | return sk2; | ||
172 | } | ||
173 | EXPORT_SYMBOL(reuseport_select_sock); | ||