aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_hashtables.c178
-rw-r--r--net/ipv4/tcp_ipv4.c173
2 files changed, 179 insertions, 172 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8d29fe736d2..33228115cda4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,12 +15,14 @@
15 15
16#include <linux/config.h> 16#include <linux/config.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/random.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
20#include <linux/wait.h> 21#include <linux/wait.h>
21 22
22#include <net/inet_connection_sock.h> 23#include <net/inet_connection_sock.h>
23#include <net/inet_hashtables.h> 24#include <net/inet_hashtables.h>
25#include <net/ip.h>
24 26
25/* 27/*
26 * Allocate and initialize a new local port bind bucket. 28 * Allocate and initialize a new local port bind bucket.
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
163} 165}
164 166
165EXPORT_SYMBOL_GPL(__inet_lookup_listener); 167EXPORT_SYMBOL_GPL(__inet_lookup_listener);
168
169/* called with local bh disabled */
170static int __inet_check_established(struct inet_timewait_death_row *death_row,
171 struct sock *sk, __u16 lport,
172 struct inet_timewait_sock **twp)
173{
174 struct inet_hashinfo *hinfo = death_row->hashinfo;
175 struct inet_sock *inet = inet_sk(sk);
176 u32 daddr = inet->rcv_saddr;
177 u32 saddr = inet->daddr;
178 int dif = sk->sk_bound_dev_if;
179 INET_ADDR_COOKIE(acookie, saddr, daddr)
180 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
181 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
182 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
183 struct sock *sk2;
184 const struct hlist_node *node;
185 struct inet_timewait_sock *tw;
186
187 prefetch(head->chain.first);
188 write_lock(&head->lock);
189
190 /* Check TIME-WAIT sockets first. */
191 sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
192 tw = inet_twsk(sk2);
193
194 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
195 if (twsk_unique(sk, sk2, twp))
196 goto unique;
197 else
198 goto not_unique;
199 }
200 }
201 tw = NULL;
202
203 /* And established part... */
204 sk_for_each(sk2, node, &head->chain) {
205 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
206 goto not_unique;
207 }
208
209unique:
210 /* Must record num and sport now. Otherwise we will see
211 * in hash table socket with a funny identity. */
212 inet->num = lport;
213 inet->sport = htons(lport);
214 sk->sk_hash = hash;
215 BUG_TRAP(sk_unhashed(sk));
216 __sk_add_node(sk, &head->chain);
217 sock_prot_inc_use(sk->sk_prot);
218 write_unlock(&head->lock);
219
220 if (twp) {
221 *twp = tw;
222 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
223 } else if (tw) {
224 /* Silly. Should hash-dance instead... */
225 inet_twsk_deschedule(tw, death_row);
226 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
227
228 inet_twsk_put(tw);
229 }
230
231 return 0;
232
233not_unique:
234 write_unlock(&head->lock);
235 return -EADDRNOTAVAIL;
236}
237
238static inline u32 inet_sk_port_offset(const struct sock *sk)
239{
240 const struct inet_sock *inet = inet_sk(sk);
241 return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr,
242 inet->dport);
243}
244
245/*
246 * Bind a port for a connect operation and hash it.
247 */
248int inet_hash_connect(struct inet_timewait_death_row *death_row,
249 struct sock *sk)
250{
251 struct inet_hashinfo *hinfo = death_row->hashinfo;
252 const unsigned short snum = inet_sk(sk)->num;
253 struct inet_bind_hashbucket *head;
254 struct inet_bind_bucket *tb;
255 int ret;
256
257 if (!snum) {
258 int low = sysctl_local_port_range[0];
259 int high = sysctl_local_port_range[1];
260 int range = high - low;
261 int i;
262 int port;
263 static u32 hint;
264 u32 offset = hint + inet_sk_port_offset(sk);
265 struct hlist_node *node;
266 struct inet_timewait_sock *tw = NULL;
267
268 local_bh_disable();
269 for (i = 1; i <= range; i++) {
270 port = low + (i + offset) % range;
271 head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
272 spin_lock(&head->lock);
273
274 /* Does not bother with rcv_saddr checks,
275 * because the established check is already
276 * unique enough.
277 */
278 inet_bind_bucket_for_each(tb, node, &head->chain) {
279 if (tb->port == port) {
280 BUG_TRAP(!hlist_empty(&tb->owners));
281 if (tb->fastreuse >= 0)
282 goto next_port;
283 if (!__inet_check_established(death_row,
284 sk, port,
285 &tw))
286 goto ok;
287 goto next_port;
288 }
289 }
290
291 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
292 if (!tb) {
293 spin_unlock(&head->lock);
294 break;
295 }
296 tb->fastreuse = -1;
297 goto ok;
298
299 next_port:
300 spin_unlock(&head->lock);
301 }
302 local_bh_enable();
303
304 return -EADDRNOTAVAIL;
305
306ok:
307 hint += i;
308
309 /* Head lock still held and bh's disabled */
310 inet_bind_hash(sk, tb, port);
311 if (sk_unhashed(sk)) {
312 inet_sk(sk)->sport = htons(port);
313 __inet_hash(hinfo, sk, 0);
314 }
315 spin_unlock(&head->lock);
316
317 if (tw) {
318 inet_twsk_deschedule(tw, death_row);;
319 inet_twsk_put(tw);
320 }
321
322 ret = 0;
323 goto out;
324 }
325
326 head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
327 tb = inet_csk(sk)->icsk_bind_hash;
328 spin_lock_bh(&head->lock);
329 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
330 __inet_hash(hinfo, sk, 0);
331 spin_unlock_bh(&head->lock);
332 return 0;
333 } else {
334 spin_unlock(&head->lock);
335 /* No definite answer... Walk to established hash table */
336 ret = __inet_check_established(death_row, sk, snum, NULL);
337out:
338 local_bh_enable();
339 return ret;
340 }
341}
342
343EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6728772a943a..c2fe61becd61 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -152,177 +152,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
152 152
153EXPORT_SYMBOL_GPL(tcp_twsk_unique); 153EXPORT_SYMBOL_GPL(tcp_twsk_unique);
154 154
155/* called with local bh disabled */
156static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
157 struct inet_timewait_sock **twp)
158{
159 struct inet_sock *inet = inet_sk(sk);
160 u32 daddr = inet->rcv_saddr;
161 u32 saddr = inet->daddr;
162 int dif = sk->sk_bound_dev_if;
163 INET_ADDR_COOKIE(acookie, saddr, daddr)
164 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
165 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
166 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
167 struct sock *sk2;
168 const struct hlist_node *node;
169 struct inet_timewait_sock *tw;
170
171 prefetch(head->chain.first);
172 write_lock(&head->lock);
173
174 /* Check TIME-WAIT sockets first. */
175 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
176 tw = inet_twsk(sk2);
177
178 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
179 if (twsk_unique(sk, sk2, twp))
180 goto unique;
181 else
182 goto not_unique;
183 }
184 }
185 tw = NULL;
186
187 /* And established part... */
188 sk_for_each(sk2, node, &head->chain) {
189 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
190 goto not_unique;
191 }
192
193unique:
194 /* Must record num and sport now. Otherwise we will see
195 * in hash table socket with a funny identity. */
196 inet->num = lport;
197 inet->sport = htons(lport);
198 sk->sk_hash = hash;
199 BUG_TRAP(sk_unhashed(sk));
200 __sk_add_node(sk, &head->chain);
201 sock_prot_inc_use(sk->sk_prot);
202 write_unlock(&head->lock);
203
204 if (twp) {
205 *twp = tw;
206 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
207 } else if (tw) {
208 /* Silly. Should hash-dance instead... */
209 inet_twsk_deschedule(tw, &tcp_death_row);
210 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
211
212 inet_twsk_put(tw);
213 }
214
215 return 0;
216
217not_unique:
218 write_unlock(&head->lock);
219 return -EADDRNOTAVAIL;
220}
221
222static inline u32 connect_port_offset(const struct sock *sk)
223{
224 const struct inet_sock *inet = inet_sk(sk);
225
226 return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr,
227 inet->dport);
228}
229
230/*
231 * Bind a port for a connect operation and hash it.
232 */
233static inline int tcp_v4_hash_connect(struct sock *sk)
234{
235 const unsigned short snum = inet_sk(sk)->num;
236 struct inet_bind_hashbucket *head;
237 struct inet_bind_bucket *tb;
238 int ret;
239
240 if (!snum) {
241 int low = sysctl_local_port_range[0];
242 int high = sysctl_local_port_range[1];
243 int range = high - low;
244 int i;
245 int port;
246 static u32 hint;
247 u32 offset = hint + connect_port_offset(sk);
248 struct hlist_node *node;
249 struct inet_timewait_sock *tw = NULL;
250
251 local_bh_disable();
252 for (i = 1; i <= range; i++) {
253 port = low + (i + offset) % range;
254 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
255 spin_lock(&head->lock);
256
257 /* Does not bother with rcv_saddr checks,
258 * because the established check is already
259 * unique enough.
260 */
261 inet_bind_bucket_for_each(tb, node, &head->chain) {
262 if (tb->port == port) {
263 BUG_TRAP(!hlist_empty(&tb->owners));
264 if (tb->fastreuse >= 0)
265 goto next_port;
266 if (!__tcp_v4_check_established(sk,
267 port,
268 &tw))
269 goto ok;
270 goto next_port;
271 }
272 }
273
274 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
275 if (!tb) {
276 spin_unlock(&head->lock);
277 break;
278 }
279 tb->fastreuse = -1;
280 goto ok;
281
282 next_port:
283 spin_unlock(&head->lock);
284 }
285 local_bh_enable();
286
287 return -EADDRNOTAVAIL;
288
289ok:
290 hint += i;
291
292 /* Head lock still held and bh's disabled */
293 inet_bind_hash(sk, tb, port);
294 if (sk_unhashed(sk)) {
295 inet_sk(sk)->sport = htons(port);
296 __inet_hash(&tcp_hashinfo, sk, 0);
297 }
298 spin_unlock(&head->lock);
299
300 if (tw) {
301 inet_twsk_deschedule(tw, &tcp_death_row);;
302 inet_twsk_put(tw);
303 }
304
305 ret = 0;
306 goto out;
307 }
308
309 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
310 tb = inet_csk(sk)->icsk_bind_hash;
311 spin_lock_bh(&head->lock);
312 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
313 __inet_hash(&tcp_hashinfo, sk, 0);
314 spin_unlock_bh(&head->lock);
315 return 0;
316 } else {
317 spin_unlock(&head->lock);
318 /* No definite answer... Walk to established hash table */
319 ret = __tcp_v4_check_established(sk, snum, NULL);
320out:
321 local_bh_enable();
322 return ret;
323 }
324}
325
326/* This will initiate an outgoing connection. */ 155/* This will initiate an outgoing connection. */
327int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 156int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
328{ 157{
@@ -403,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
403 * complete initialization after this. 232 * complete initialization after this.
404 */ 233 */
405 tcp_set_state(sk, TCP_SYN_SENT); 234 tcp_set_state(sk, TCP_SYN_SENT);
406 err = tcp_v4_hash_connect(sk); 235 err = inet_hash_connect(&tcp_death_row, sk);
407 if (err) 236 if (err)
408 goto failure; 237 goto failure;
409 238