aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2009-12-02 17:31:19 -0500
committerDavid S. Miller <davem@davemloft.net>2009-12-03 19:17:43 -0500
commit13475a30b66cdb9250a34052c19ac98847373030 (patch)
tree5f28f671092c2948726fdde92e20c3371cfceb77 /net/ipv4
parentff33a6e2ab97f4cde484cdf1a41778af6d6b7cff (diff)
tcp: connect() race with timewait reuse
Its currently possible that several threads issuing a connect() find the same timewait socket and try to reuse it, leading to list corruptions. Condition for bug is that these threads bound their socket on same address/port of to-be-find timewait socket, and connected to same target. (SO_REUSEADDR needed) To fix this problem, we could unhash timewait socket while holding ehash lock, to make sure lookups/changes will be serialized. Only first thread finds the timewait socket, other ones find the established socket and return an EADDRNOTAVAIL error. This second version takes into account Evgeniy's review and makes sure inet_twsk_put() is called outside of locked sections. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_hashtables.c10
-rw-r--r--net/ipv4/inet_timewait_sock.c38
2 files changed, 35 insertions, 13 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 94ef51aa5bc9..21e5e32d8c60 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -286,6 +286,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
286 struct sock *sk2; 286 struct sock *sk2;
287 const struct hlist_nulls_node *node; 287 const struct hlist_nulls_node *node;
288 struct inet_timewait_sock *tw; 288 struct inet_timewait_sock *tw;
289 int twrefcnt = 0;
289 290
290 spin_lock(lock); 291 spin_lock(lock);
291 292
@@ -318,20 +319,23 @@ unique:
318 sk->sk_hash = hash; 319 sk->sk_hash = hash;
319 WARN_ON(!sk_unhashed(sk)); 320 WARN_ON(!sk_unhashed(sk));
320 __sk_nulls_add_node_rcu(sk, &head->chain); 321 __sk_nulls_add_node_rcu(sk, &head->chain);
322 if (tw) {
323 twrefcnt = inet_twsk_unhash(tw);
324 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
325 }
321 spin_unlock(lock); 326 spin_unlock(lock);
327 if (twrefcnt)
328 inet_twsk_put(tw);
322 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 329 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
323 330
324 if (twp) { 331 if (twp) {
325 *twp = tw; 332 *twp = tw;
326 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
327 } else if (tw) { 333 } else if (tw) {
328 /* Silly. Should hash-dance instead... */ 334 /* Silly. Should hash-dance instead... */
329 inet_twsk_deschedule(tw, death_row); 335 inet_twsk_deschedule(tw, death_row);
330 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
331 336
332 inet_twsk_put(tw); 337 inet_twsk_put(tw);
333 } 338 }
334
335 return 0; 339 return 0;
336 340
337not_unique: 341not_unique:
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 31f931ef3daf..11a107a5af4f 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -14,22 +14,33 @@
14#include <net/inet_timewait_sock.h> 14#include <net/inet_timewait_sock.h>
15#include <net/ip.h> 15#include <net/ip.h>
16 16
17
18/*
19 * unhash a timewait socket from established hash
20 * lock must be hold by caller
21 */
22int inet_twsk_unhash(struct inet_timewait_sock *tw)
23{
24 if (hlist_nulls_unhashed(&tw->tw_node))
25 return 0;
26
27 hlist_nulls_del_rcu(&tw->tw_node);
28 sk_nulls_node_init(&tw->tw_node);
29 return 1;
30}
31
17/* Must be called with locally disabled BHs. */ 32/* Must be called with locally disabled BHs. */
18static void __inet_twsk_kill(struct inet_timewait_sock *tw, 33static void __inet_twsk_kill(struct inet_timewait_sock *tw,
19 struct inet_hashinfo *hashinfo) 34 struct inet_hashinfo *hashinfo)
20{ 35{
21 struct inet_bind_hashbucket *bhead; 36 struct inet_bind_hashbucket *bhead;
22 struct inet_bind_bucket *tb; 37 struct inet_bind_bucket *tb;
38 int refcnt;
23 /* Unlink from established hashes. */ 39 /* Unlink from established hashes. */
24 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 40 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
25 41
26 spin_lock(lock); 42 spin_lock(lock);
27 if (hlist_nulls_unhashed(&tw->tw_node)) { 43 refcnt = inet_twsk_unhash(tw);
28 spin_unlock(lock);
29 return;
30 }
31 hlist_nulls_del_rcu(&tw->tw_node);
32 sk_nulls_node_init(&tw->tw_node);
33 spin_unlock(lock); 44 spin_unlock(lock);
34 45
35 /* Disassociate with bind bucket. */ 46 /* Disassociate with bind bucket. */
@@ -37,9 +48,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
37 hashinfo->bhash_size)]; 48 hashinfo->bhash_size)];
38 spin_lock(&bhead->lock); 49 spin_lock(&bhead->lock);
39 tb = tw->tw_tb; 50 tb = tw->tw_tb;
40 __hlist_del(&tw->tw_bind_node); 51 if (tb) {
41 tw->tw_tb = NULL; 52 __hlist_del(&tw->tw_bind_node);
42 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 53 tw->tw_tb = NULL;
54 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
55 refcnt++;
56 }
43 spin_unlock(&bhead->lock); 57 spin_unlock(&bhead->lock);
44#ifdef SOCK_REFCNT_DEBUG 58#ifdef SOCK_REFCNT_DEBUG
45 if (atomic_read(&tw->tw_refcnt) != 1) { 59 if (atomic_read(&tw->tw_refcnt) != 1) {
@@ -47,7 +61,10 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
47 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); 61 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
48 } 62 }
49#endif 63#endif
50 inet_twsk_put(tw); 64 while (refcnt) {
65 inet_twsk_put(tw);
66 refcnt--;
67 }
51} 68}
52 69
53static noinline void inet_twsk_free(struct inet_timewait_sock *tw) 70static noinline void inet_twsk_free(struct inet_timewait_sock *tw)
@@ -92,6 +109,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
92 tw->tw_tb = icsk->icsk_bind_hash; 109 tw->tw_tb = icsk->icsk_bind_hash;
93 WARN_ON(!icsk->icsk_bind_hash); 110 WARN_ON(!icsk->icsk_bind_hash);
94 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 111 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
112 atomic_inc(&tw->tw_refcnt);
95 spin_unlock(&bhead->lock); 113 spin_unlock(&bhead->lock);
96 114
97 spin_lock(lock); 115 spin_lock(lock);