diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2009-12-03 22:46:54 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-12-08 23:17:51 -0500 |
commit | 9327f7053e3993c125944fdb137a0618319ef2a0 (patch) | |
tree | 3cee7de049a2468bef930b1832c42bd1b2e69e9a | |
parent | 74757d49016a8b06ca028196886641d7aeb78de5 (diff) |
tcp: Fix a connect() race with timewait sockets
First patch changes __inet_hash_nolisten() and __inet6_hash()
to get a timewait parameter to be able to unhash it from ehash
at same time the new socket is inserted in hash.
This makes sure timewait socket wont be found by a concurrent
writer in __inet_check_established()
Reported-by: kapil dakhane <kdakhane@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet6_hashtables.h | 2 | ||||
-rw-r--r-- | include/net/inet_hashtables.h | 8 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 2 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 4 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 22 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 8 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 4 |
8 files changed, 35 insertions, 17 deletions
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 92838d3a1ab7..e46674d5daea 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h | |||
@@ -53,7 +53,7 @@ static inline int inet6_sk_ehashfn(const struct sock *sk) | |||
53 | return inet6_ehashfn(net, laddr, lport, faddr, fport); | 53 | return inet6_ehashfn(net, laddr, lport, faddr, fport); |
54 | } | 54 | } |
55 | 55 | ||
56 | extern void __inet6_hash(struct sock *sk); | 56 | extern int __inet6_hash(struct sock *sk, struct inet_timewait_sock *twp); |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | 59 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so |
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 41cbddd25b70..74358d1b3f43 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h | |||
@@ -251,7 +251,7 @@ extern void inet_put_port(struct sock *sk); | |||
251 | 251 | ||
252 | void inet_hashinfo_init(struct inet_hashinfo *h); | 252 | void inet_hashinfo_init(struct inet_hashinfo *h); |
253 | 253 | ||
254 | extern void __inet_hash_nolisten(struct sock *sk); | 254 | extern int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw); |
255 | extern void inet_hash(struct sock *sk); | 255 | extern void inet_hash(struct sock *sk); |
256 | extern void inet_unhash(struct sock *sk); | 256 | extern void inet_unhash(struct sock *sk); |
257 | 257 | ||
@@ -391,10 +391,12 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, | |||
391 | } | 391 | } |
392 | 392 | ||
393 | extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, | 393 | extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
394 | struct sock *sk, u32 port_offset, | 394 | struct sock *sk, |
395 | u32 port_offset, | ||
395 | int (*check_established)(struct inet_timewait_death_row *, | 396 | int (*check_established)(struct inet_timewait_death_row *, |
396 | struct sock *, __u16, struct inet_timewait_sock **), | 397 | struct sock *, __u16, struct inet_timewait_sock **), |
397 | void (*hash)(struct sock *sk)); | 398 | int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)); |
399 | |||
398 | extern int inet_hash_connect(struct inet_timewait_death_row *death_row, | 400 | extern int inet_hash_connect(struct inet_timewait_death_row *death_row, |
399 | struct sock *sk); | 401 | struct sock *sk); |
400 | #endif /* _INET_HASHTABLES_H */ | 402 | #endif /* _INET_HASHTABLES_H */ |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index efbcfdc12796..dad7bc4878e0 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -408,7 +408,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
408 | 408 | ||
409 | dccp_sync_mss(newsk, dst_mtu(dst)); | 409 | dccp_sync_mss(newsk, dst_mtu(dst)); |
410 | 410 | ||
411 | __inet_hash_nolisten(newsk); | 411 | __inet_hash_nolisten(newsk, NULL); |
412 | __inet_inherit_port(sk, newsk); | 412 | __inet_inherit_port(sk, newsk); |
413 | 413 | ||
414 | return newsk; | 414 | return newsk; |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6574215a1f51..baf05cf43c28 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -46,7 +46,7 @@ static void dccp_v6_hash(struct sock *sk) | |||
46 | return; | 46 | return; |
47 | } | 47 | } |
48 | local_bh_disable(); | 48 | local_bh_disable(); |
49 | __inet6_hash(sk); | 49 | __inet6_hash(sk, NULL); |
50 | local_bh_enable(); | 50 | local_bh_enable(); |
51 | } | 51 | } |
52 | } | 52 | } |
@@ -644,7 +644,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
644 | newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; | 644 | newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; |
645 | newinet->inet_rcv_saddr = LOOPBACK4_IPV6; | 645 | newinet->inet_rcv_saddr = LOOPBACK4_IPV6; |
646 | 646 | ||
647 | __inet6_hash(newsk); | 647 | __inet6_hash(newsk, NULL); |
648 | __inet_inherit_port(sk, newsk); | 648 | __inet_inherit_port(sk, newsk); |
649 | 649 | ||
650 | return newsk; | 650 | return newsk; |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 21e5e32d8c60..c4201b7ece38 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -351,12 +351,13 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) | |||
351 | inet->inet_dport); | 351 | inet->inet_dport); |
352 | } | 352 | } |
353 | 353 | ||
354 | void __inet_hash_nolisten(struct sock *sk) | 354 | int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) |
355 | { | 355 | { |
356 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 356 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
357 | struct hlist_nulls_head *list; | 357 | struct hlist_nulls_head *list; |
358 | spinlock_t *lock; | 358 | spinlock_t *lock; |
359 | struct inet_ehash_bucket *head; | 359 | struct inet_ehash_bucket *head; |
360 | int twrefcnt = 0; | ||
360 | 361 | ||
361 | WARN_ON(!sk_unhashed(sk)); | 362 | WARN_ON(!sk_unhashed(sk)); |
362 | 363 | ||
@@ -367,8 +368,13 @@ void __inet_hash_nolisten(struct sock *sk) | |||
367 | 368 | ||
368 | spin_lock(lock); | 369 | spin_lock(lock); |
369 | __sk_nulls_add_node_rcu(sk, list); | 370 | __sk_nulls_add_node_rcu(sk, list); |
371 | if (tw) { | ||
372 | WARN_ON(sk->sk_hash != tw->tw_hash); | ||
373 | twrefcnt = inet_twsk_unhash(tw); | ||
374 | } | ||
370 | spin_unlock(lock); | 375 | spin_unlock(lock); |
371 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 376 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
377 | return twrefcnt; | ||
372 | } | 378 | } |
373 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); | 379 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); |
374 | 380 | ||
@@ -378,7 +384,7 @@ static void __inet_hash(struct sock *sk) | |||
378 | struct inet_listen_hashbucket *ilb; | 384 | struct inet_listen_hashbucket *ilb; |
379 | 385 | ||
380 | if (sk->sk_state != TCP_LISTEN) { | 386 | if (sk->sk_state != TCP_LISTEN) { |
381 | __inet_hash_nolisten(sk); | 387 | __inet_hash_nolisten(sk, NULL); |
382 | return; | 388 | return; |
383 | } | 389 | } |
384 | 390 | ||
@@ -427,7 +433,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
427 | struct sock *sk, u32 port_offset, | 433 | struct sock *sk, u32 port_offset, |
428 | int (*check_established)(struct inet_timewait_death_row *, | 434 | int (*check_established)(struct inet_timewait_death_row *, |
429 | struct sock *, __u16, struct inet_timewait_sock **), | 435 | struct sock *, __u16, struct inet_timewait_sock **), |
430 | void (*hash)(struct sock *sk)) | 436 | int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)) |
431 | { | 437 | { |
432 | struct inet_hashinfo *hinfo = death_row->hashinfo; | 438 | struct inet_hashinfo *hinfo = death_row->hashinfo; |
433 | const unsigned short snum = inet_sk(sk)->inet_num; | 439 | const unsigned short snum = inet_sk(sk)->inet_num; |
@@ -435,6 +441,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
435 | struct inet_bind_bucket *tb; | 441 | struct inet_bind_bucket *tb; |
436 | int ret; | 442 | int ret; |
437 | struct net *net = sock_net(sk); | 443 | struct net *net = sock_net(sk); |
444 | int twrefcnt = 1; | ||
438 | 445 | ||
439 | if (!snum) { | 446 | if (!snum) { |
440 | int i, remaining, low, high, port; | 447 | int i, remaining, low, high, port; |
@@ -493,13 +500,16 @@ ok: | |||
493 | inet_bind_hash(sk, tb, port); | 500 | inet_bind_hash(sk, tb, port); |
494 | if (sk_unhashed(sk)) { | 501 | if (sk_unhashed(sk)) { |
495 | inet_sk(sk)->inet_sport = htons(port); | 502 | inet_sk(sk)->inet_sport = htons(port); |
496 | hash(sk); | 503 | twrefcnt += hash(sk, tw); |
497 | } | 504 | } |
498 | spin_unlock(&head->lock); | 505 | spin_unlock(&head->lock); |
499 | 506 | ||
500 | if (tw) { | 507 | if (tw) { |
501 | inet_twsk_deschedule(tw, death_row); | 508 | inet_twsk_deschedule(tw, death_row); |
502 | inet_twsk_put(tw); | 509 | while (twrefcnt) { |
510 | twrefcnt--; | ||
511 | inet_twsk_put(tw); | ||
512 | } | ||
503 | } | 513 | } |
504 | 514 | ||
505 | ret = 0; | 515 | ret = 0; |
@@ -510,7 +520,7 @@ ok: | |||
510 | tb = inet_csk(sk)->icsk_bind_hash; | 520 | tb = inet_csk(sk)->icsk_bind_hash; |
511 | spin_lock_bh(&head->lock); | 521 | spin_lock_bh(&head->lock); |
512 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 522 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
513 | hash(sk); | 523 | hash(sk, NULL); |
514 | spin_unlock_bh(&head->lock); | 524 | spin_unlock_bh(&head->lock); |
515 | return 0; | 525 | return 0; |
516 | } else { | 526 | } else { |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 29002ab26e0d..15e96030ce47 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1464,7 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1464 | } | 1464 | } |
1465 | #endif | 1465 | #endif |
1466 | 1466 | ||
1467 | __inet_hash_nolisten(newsk); | 1467 | __inet_hash_nolisten(newsk, NULL); |
1468 | __inet_inherit_port(sk, newsk); | 1468 | __inet_inherit_port(sk, newsk); |
1469 | 1469 | ||
1470 | return newsk; | 1470 | return newsk; |
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index c813e294ec0c..633a6c266136 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -22,9 +22,10 @@ | |||
22 | #include <net/inet6_hashtables.h> | 22 | #include <net/inet6_hashtables.h> |
23 | #include <net/ip.h> | 23 | #include <net/ip.h> |
24 | 24 | ||
25 | void __inet6_hash(struct sock *sk) | 25 | int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) |
26 | { | 26 | { |
27 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 27 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
28 | int twrefcnt = 0; | ||
28 | 29 | ||
29 | WARN_ON(!sk_unhashed(sk)); | 30 | WARN_ON(!sk_unhashed(sk)); |
30 | 31 | ||
@@ -45,10 +46,15 @@ void __inet6_hash(struct sock *sk) | |||
45 | lock = inet_ehash_lockp(hashinfo, hash); | 46 | lock = inet_ehash_lockp(hashinfo, hash); |
46 | spin_lock(lock); | 47 | spin_lock(lock); |
47 | __sk_nulls_add_node_rcu(sk, list); | 48 | __sk_nulls_add_node_rcu(sk, list); |
49 | if (tw) { | ||
50 | WARN_ON(sk->sk_hash != tw->tw_hash); | ||
51 | twrefcnt = inet_twsk_unhash(tw); | ||
52 | } | ||
48 | spin_unlock(lock); | 53 | spin_unlock(lock); |
49 | } | 54 | } |
50 | 55 | ||
51 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 56 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
57 | return twrefcnt; | ||
52 | } | 58 | } |
53 | EXPORT_SYMBOL(__inet6_hash); | 59 | EXPORT_SYMBOL(__inet6_hash); |
54 | 60 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index aadd7cef73b3..ee9cf62458d4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -96,7 +96,7 @@ static void tcp_v6_hash(struct sock *sk) | |||
96 | return; | 96 | return; |
97 | } | 97 | } |
98 | local_bh_disable(); | 98 | local_bh_disable(); |
99 | __inet6_hash(sk); | 99 | __inet6_hash(sk, NULL); |
100 | local_bh_enable(); | 100 | local_bh_enable(); |
101 | } | 101 | } |
102 | } | 102 | } |
@@ -1496,7 +1496,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1496 | } | 1496 | } |
1497 | #endif | 1497 | #endif |
1498 | 1498 | ||
1499 | __inet6_hash(newsk); | 1499 | __inet6_hash(newsk, NULL); |
1500 | __inet_inherit_port(sk, newsk); | 1500 | __inet_inherit_port(sk, newsk); |
1501 | 1501 | ||
1502 | return newsk; | 1502 | return newsk; |