aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_timewait_sock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/inet_timewait_sock.c')
-rw-r--r--net/ipv4/inet_timewait_sock.c112
1 files changed, 76 insertions, 36 deletions
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 13f0781f35cd..0fdf45e4c90c 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -14,22 +14,33 @@
14#include <net/inet_timewait_sock.h> 14#include <net/inet_timewait_sock.h>
15#include <net/ip.h> 15#include <net/ip.h>
16 16
17
18/*
19 * unhash a timewait socket from established hash
20 * lock must be hold by caller
21 */
22int inet_twsk_unhash(struct inet_timewait_sock *tw)
23{
24 if (hlist_nulls_unhashed(&tw->tw_node))
25 return 0;
26
27 hlist_nulls_del_rcu(&tw->tw_node);
28 sk_nulls_node_init(&tw->tw_node);
29 return 1;
30}
31
17/* Must be called with locally disabled BHs. */ 32/* Must be called with locally disabled BHs. */
18static void __inet_twsk_kill(struct inet_timewait_sock *tw, 33static void __inet_twsk_kill(struct inet_timewait_sock *tw,
19 struct inet_hashinfo *hashinfo) 34 struct inet_hashinfo *hashinfo)
20{ 35{
21 struct inet_bind_hashbucket *bhead; 36 struct inet_bind_hashbucket *bhead;
22 struct inet_bind_bucket *tb; 37 struct inet_bind_bucket *tb;
38 int refcnt;
23 /* Unlink from established hashes. */ 39 /* Unlink from established hashes. */
24 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 40 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
25 41
26 spin_lock(lock); 42 spin_lock(lock);
27 if (hlist_nulls_unhashed(&tw->tw_node)) { 43 refcnt = inet_twsk_unhash(tw);
28 spin_unlock(lock);
29 return;
30 }
31 hlist_nulls_del_rcu(&tw->tw_node);
32 sk_nulls_node_init(&tw->tw_node);
33 spin_unlock(lock); 44 spin_unlock(lock);
34 45
35 /* Disassociate with bind bucket. */ 46 /* Disassociate with bind bucket. */
@@ -37,9 +48,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
37 hashinfo->bhash_size)]; 48 hashinfo->bhash_size)];
38 spin_lock(&bhead->lock); 49 spin_lock(&bhead->lock);
39 tb = tw->tw_tb; 50 tb = tw->tw_tb;
40 __hlist_del(&tw->tw_bind_node); 51 if (tb) {
41 tw->tw_tb = NULL; 52 __hlist_del(&tw->tw_bind_node);
42 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 53 tw->tw_tb = NULL;
54 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
55 refcnt++;
56 }
43 spin_unlock(&bhead->lock); 57 spin_unlock(&bhead->lock);
44#ifdef SOCK_REFCNT_DEBUG 58#ifdef SOCK_REFCNT_DEBUG
45 if (atomic_read(&tw->tw_refcnt) != 1) { 59 if (atomic_read(&tw->tw_refcnt) != 1) {
@@ -47,7 +61,10 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
47 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); 61 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
48 } 62 }
49#endif 63#endif
50 inet_twsk_put(tw); 64 while (refcnt) {
65 inet_twsk_put(tw);
66 refcnt--;
67 }
51} 68}
52 69
53static noinline void inet_twsk_free(struct inet_timewait_sock *tw) 70static noinline void inet_twsk_free(struct inet_timewait_sock *tw)
@@ -86,7 +103,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
86 Note, that any socket with inet->num != 0 MUST be bound in 103 Note, that any socket with inet->num != 0 MUST be bound in
87 binding cache, even if it is closed. 104 binding cache, even if it is closed.
88 */ 105 */
89 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, 106 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
90 hashinfo->bhash_size)]; 107 hashinfo->bhash_size)];
91 spin_lock(&bhead->lock); 108 spin_lock(&bhead->lock);
92 tw->tw_tb = icsk->icsk_bind_hash; 109 tw->tw_tb = icsk->icsk_bind_hash;
@@ -101,13 +118,22 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
101 * Should be done before removing sk from established chain 118 * Should be done before removing sk from established chain
102 * because readers are lockless and search established first. 119 * because readers are lockless and search established first.
103 */ 120 */
104 atomic_inc(&tw->tw_refcnt);
105 inet_twsk_add_node_rcu(tw, &ehead->twchain); 121 inet_twsk_add_node_rcu(tw, &ehead->twchain);
106 122
107 /* Step 3: Remove SK from established hash. */ 123 /* Step 3: Remove SK from established hash. */
108 if (__sk_nulls_del_node_init_rcu(sk)) 124 if (__sk_nulls_del_node_init_rcu(sk))
109 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 125 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
110 126
127 /*
128 * Notes :
129 * - We initially set tw_refcnt to 0 in inet_twsk_alloc()
130 * - We add one reference for the bhash link
131 * - We add one reference for the ehash link
132 * - We want this refcnt update done before allowing other
133 * threads to find this tw in ehash chain.
134 */
135 atomic_add(1 + 1 + 1, &tw->tw_refcnt);
136
111 spin_unlock(lock); 137 spin_unlock(lock);
112} 138}
113 139
@@ -124,14 +150,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
124 kmemcheck_annotate_bitfield(tw, flags); 150 kmemcheck_annotate_bitfield(tw, flags);
125 151
126 /* Give us an identity. */ 152 /* Give us an identity. */
127 tw->tw_daddr = inet->daddr; 153 tw->tw_daddr = inet->inet_daddr;
128 tw->tw_rcv_saddr = inet->rcv_saddr; 154 tw->tw_rcv_saddr = inet->inet_rcv_saddr;
129 tw->tw_bound_dev_if = sk->sk_bound_dev_if; 155 tw->tw_bound_dev_if = sk->sk_bound_dev_if;
130 tw->tw_num = inet->num; 156 tw->tw_num = inet->inet_num;
131 tw->tw_state = TCP_TIME_WAIT; 157 tw->tw_state = TCP_TIME_WAIT;
132 tw->tw_substate = state; 158 tw->tw_substate = state;
133 tw->tw_sport = inet->sport; 159 tw->tw_sport = inet->inet_sport;
134 tw->tw_dport = inet->dport; 160 tw->tw_dport = inet->inet_dport;
135 tw->tw_family = sk->sk_family; 161 tw->tw_family = sk->sk_family;
136 tw->tw_reuse = sk->sk_reuse; 162 tw->tw_reuse = sk->sk_reuse;
137 tw->tw_hash = sk->sk_hash; 163 tw->tw_hash = sk->sk_hash;
@@ -139,7 +165,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
139 tw->tw_transparent = inet->transparent; 165 tw->tw_transparent = inet->transparent;
140 tw->tw_prot = sk->sk_prot_creator; 166 tw->tw_prot = sk->sk_prot_creator;
141 twsk_net_set(tw, hold_net(sock_net(sk))); 167 twsk_net_set(tw, hold_net(sock_net(sk)));
142 atomic_set(&tw->tw_refcnt, 1); 168 /*
169 * Because we use RCU lookups, we should not set tw_refcnt
170 * to a non null value before everything is setup for this
171 * timewait socket.
172 */
173 atomic_set(&tw->tw_refcnt, 0);
143 inet_twsk_dead_node_init(tw); 174 inet_twsk_dead_node_init(tw);
144 __module_get(tw->tw_prot->owner); 175 __module_get(tw->tw_prot->owner);
145 } 176 }
@@ -421,37 +452,46 @@ out:
421 452
422EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); 453EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
423 454
424void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, 455void inet_twsk_purge(struct inet_hashinfo *hashinfo,
425 struct inet_timewait_death_row *twdr, int family) 456 struct inet_timewait_death_row *twdr, int family)
426{ 457{
427 struct inet_timewait_sock *tw; 458 struct inet_timewait_sock *tw;
428 struct sock *sk; 459 struct sock *sk;
429 struct hlist_nulls_node *node; 460 struct hlist_nulls_node *node;
430 int h; 461 unsigned int slot;
431 462
432 local_bh_disable(); 463 for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
433 for (h = 0; h < (hashinfo->ehash_size); h++) { 464 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
434 struct inet_ehash_bucket *head = 465restart_rcu:
435 inet_ehash_bucket(hashinfo, h); 466 rcu_read_lock();
436 spinlock_t *lock = inet_ehash_lockp(hashinfo, h);
437restart: 467restart:
438 spin_lock(lock); 468 sk_nulls_for_each_rcu(sk, node, &head->twchain) {
439 sk_nulls_for_each(sk, node, &head->twchain) {
440
441 tw = inet_twsk(sk); 469 tw = inet_twsk(sk);
442 if (!net_eq(twsk_net(tw), net) || 470 if ((tw->tw_family != family) ||
443 tw->tw_family != family) 471 atomic_read(&twsk_net(tw)->count))
472 continue;
473
474 if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt)))
444 continue; 475 continue;
445 476
446 atomic_inc(&tw->tw_refcnt); 477 if (unlikely((tw->tw_family != family) ||
447 spin_unlock(lock); 478 atomic_read(&twsk_net(tw)->count))) {
479 inet_twsk_put(tw);
480 goto restart;
481 }
482
483 rcu_read_unlock();
448 inet_twsk_deschedule(tw, twdr); 484 inet_twsk_deschedule(tw, twdr);
449 inet_twsk_put(tw); 485 inet_twsk_put(tw);
450 486 goto restart_rcu;
451 goto restart;
452 } 487 }
453 spin_unlock(lock); 488 /* If the nulls value we got at the end of this lookup is
489 * not the expected one, we must restart lookup.
490 * We probably met an item that was moved to another chain.
491 */
492 if (get_nulls_value(node) != slot)
493 goto restart;
494 rcu_read_unlock();
454 } 495 }
455 local_bh_enable();
456} 496}
457EXPORT_SYMBOL_GPL(inet_twsk_purge); 497EXPORT_SYMBOL_GPL(inet_twsk_purge);