aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2009-12-02 17:31:19 -0500
committerDavid S. Miller <davem@davemloft.net>2009-12-03 19:17:43 -0500
commit13475a30b66cdb9250a34052c19ac98847373030 (patch)
tree5f28f671092c2948726fdde92e20c3371cfceb77 /net
parentff33a6e2ab97f4cde484cdf1a41778af6d6b7cff (diff)
tcp: connect() race with timewait reuse
Its currently possible that several threads issuing a connect() find the same timewait socket and try to reuse it, leading to list corruptions. Condition for bug is that these threads bound their socket on same address/port of to-be-find timewait socket, and connected to same target. (SO_REUSEADDR needed) To fix this problem, we could unhash timewait socket while holding ehash lock, to make sure lookups/changes will be serialized. Only first thread finds the timewait socket, other ones find the established socket and return an EADDRNOTAVAIL error. This second version takes into account Evgeniy's review and makes sure inet_twsk_put() is called outside of locked sections. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/inet_hashtables.c10
-rw-r--r--net/ipv4/inet_timewait_sock.c38
-rw-r--r--net/ipv6/inet6_hashtables.c15
3 files changed, 45 insertions, 18 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 94ef51aa5bc9..21e5e32d8c60 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -286,6 +286,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
286 struct sock *sk2; 286 struct sock *sk2;
287 const struct hlist_nulls_node *node; 287 const struct hlist_nulls_node *node;
288 struct inet_timewait_sock *tw; 288 struct inet_timewait_sock *tw;
289 int twrefcnt = 0;
289 290
290 spin_lock(lock); 291 spin_lock(lock);
291 292
@@ -318,20 +319,23 @@ unique:
318 sk->sk_hash = hash; 319 sk->sk_hash = hash;
319 WARN_ON(!sk_unhashed(sk)); 320 WARN_ON(!sk_unhashed(sk));
320 __sk_nulls_add_node_rcu(sk, &head->chain); 321 __sk_nulls_add_node_rcu(sk, &head->chain);
322 if (tw) {
323 twrefcnt = inet_twsk_unhash(tw);
324 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
325 }
321 spin_unlock(lock); 326 spin_unlock(lock);
327 if (twrefcnt)
328 inet_twsk_put(tw);
322 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 329 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
323 330
324 if (twp) { 331 if (twp) {
325 *twp = tw; 332 *twp = tw;
326 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
327 } else if (tw) { 333 } else if (tw) {
328 /* Silly. Should hash-dance instead... */ 334 /* Silly. Should hash-dance instead... */
329 inet_twsk_deschedule(tw, death_row); 335 inet_twsk_deschedule(tw, death_row);
330 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
331 336
332 inet_twsk_put(tw); 337 inet_twsk_put(tw);
333 } 338 }
334
335 return 0; 339 return 0;
336 340
337not_unique: 341not_unique:
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 31f931ef3daf..11a107a5af4f 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -14,22 +14,33 @@
14#include <net/inet_timewait_sock.h> 14#include <net/inet_timewait_sock.h>
15#include <net/ip.h> 15#include <net/ip.h>
16 16
17
18/*
19 * unhash a timewait socket from established hash
20 * lock must be hold by caller
21 */
22int inet_twsk_unhash(struct inet_timewait_sock *tw)
23{
24 if (hlist_nulls_unhashed(&tw->tw_node))
25 return 0;
26
27 hlist_nulls_del_rcu(&tw->tw_node);
28 sk_nulls_node_init(&tw->tw_node);
29 return 1;
30}
31
17/* Must be called with locally disabled BHs. */ 32/* Must be called with locally disabled BHs. */
18static void __inet_twsk_kill(struct inet_timewait_sock *tw, 33static void __inet_twsk_kill(struct inet_timewait_sock *tw,
19 struct inet_hashinfo *hashinfo) 34 struct inet_hashinfo *hashinfo)
20{ 35{
21 struct inet_bind_hashbucket *bhead; 36 struct inet_bind_hashbucket *bhead;
22 struct inet_bind_bucket *tb; 37 struct inet_bind_bucket *tb;
38 int refcnt;
23 /* Unlink from established hashes. */ 39 /* Unlink from established hashes. */
24 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 40 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
25 41
26 spin_lock(lock); 42 spin_lock(lock);
27 if (hlist_nulls_unhashed(&tw->tw_node)) { 43 refcnt = inet_twsk_unhash(tw);
28 spin_unlock(lock);
29 return;
30 }
31 hlist_nulls_del_rcu(&tw->tw_node);
32 sk_nulls_node_init(&tw->tw_node);
33 spin_unlock(lock); 44 spin_unlock(lock);
34 45
35 /* Disassociate with bind bucket. */ 46 /* Disassociate with bind bucket. */
@@ -37,9 +48,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
37 hashinfo->bhash_size)]; 48 hashinfo->bhash_size)];
38 spin_lock(&bhead->lock); 49 spin_lock(&bhead->lock);
39 tb = tw->tw_tb; 50 tb = tw->tw_tb;
40 __hlist_del(&tw->tw_bind_node); 51 if (tb) {
41 tw->tw_tb = NULL; 52 __hlist_del(&tw->tw_bind_node);
42 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 53 tw->tw_tb = NULL;
54 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
55 refcnt++;
56 }
43 spin_unlock(&bhead->lock); 57 spin_unlock(&bhead->lock);
44#ifdef SOCK_REFCNT_DEBUG 58#ifdef SOCK_REFCNT_DEBUG
45 if (atomic_read(&tw->tw_refcnt) != 1) { 59 if (atomic_read(&tw->tw_refcnt) != 1) {
@@ -47,7 +61,10 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
47 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); 61 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
48 } 62 }
49#endif 63#endif
50 inet_twsk_put(tw); 64 while (refcnt) {
65 inet_twsk_put(tw);
66 refcnt--;
67 }
51} 68}
52 69
53static noinline void inet_twsk_free(struct inet_timewait_sock *tw) 70static noinline void inet_twsk_free(struct inet_timewait_sock *tw)
@@ -92,6 +109,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
92 tw->tw_tb = icsk->icsk_bind_hash; 109 tw->tw_tb = icsk->icsk_bind_hash;
93 WARN_ON(!icsk->icsk_bind_hash); 110 WARN_ON(!icsk->icsk_bind_hash);
94 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 111 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
112 atomic_inc(&tw->tw_refcnt);
95 spin_unlock(&bhead->lock); 113 spin_unlock(&bhead->lock);
96 114
97 spin_lock(lock); 115 spin_lock(lock);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 00c6a3e6cddf..c813e294ec0c 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -223,6 +223,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
223 struct sock *sk2; 223 struct sock *sk2;
224 const struct hlist_nulls_node *node; 224 const struct hlist_nulls_node *node;
225 struct inet_timewait_sock *tw; 225 struct inet_timewait_sock *tw;
226 int twrefcnt = 0;
226 227
227 spin_lock(lock); 228 spin_lock(lock);
228 229
@@ -250,19 +251,23 @@ unique:
250 * in hash table socket with a funny identity. */ 251 * in hash table socket with a funny identity. */
251 inet->inet_num = lport; 252 inet->inet_num = lport;
252 inet->inet_sport = htons(lport); 253 inet->inet_sport = htons(lport);
254 sk->sk_hash = hash;
253 WARN_ON(!sk_unhashed(sk)); 255 WARN_ON(!sk_unhashed(sk));
254 __sk_nulls_add_node_rcu(sk, &head->chain); 256 __sk_nulls_add_node_rcu(sk, &head->chain);
255 sk->sk_hash = hash; 257 if (tw) {
258 twrefcnt = inet_twsk_unhash(tw);
259 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
260 }
256 spin_unlock(lock); 261 spin_unlock(lock);
262 if (twrefcnt)
263 inet_twsk_put(tw);
257 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 264 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
258 265
259 if (twp != NULL) { 266 if (twp) {
260 *twp = tw; 267 *twp = tw;
261 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); 268 } else if (tw) {
262 } else if (tw != NULL) {
263 /* Silly. Should hash-dance instead... */ 269 /* Silly. Should hash-dance instead... */
264 inet_twsk_deschedule(tw, death_row); 270 inet_twsk_deschedule(tw, death_row);
265 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
266 271
267 inet_twsk_put(tw); 272 inet_twsk_put(tw);
268 } 273 }