diff options
Diffstat (limited to 'net/ipv4/inet_timewait_sock.c')
| -rw-r--r-- | net/ipv4/inet_timewait_sock.c | 112 |
1 files changed, 76 insertions, 36 deletions
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 13f0781f35cd..0fdf45e4c90c 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
| @@ -14,22 +14,33 @@ | |||
| 14 | #include <net/inet_timewait_sock.h> | 14 | #include <net/inet_timewait_sock.h> |
| 15 | #include <net/ip.h> | 15 | #include <net/ip.h> |
| 16 | 16 | ||
| 17 | |||
| 18 | /* | ||
| 19 | * unhash a timewait socket from established hash | ||
| 20 | * lock must be hold by caller | ||
| 21 | */ | ||
| 22 | int inet_twsk_unhash(struct inet_timewait_sock *tw) | ||
| 23 | { | ||
| 24 | if (hlist_nulls_unhashed(&tw->tw_node)) | ||
| 25 | return 0; | ||
| 26 | |||
| 27 | hlist_nulls_del_rcu(&tw->tw_node); | ||
| 28 | sk_nulls_node_init(&tw->tw_node); | ||
| 29 | return 1; | ||
| 30 | } | ||
| 31 | |||
| 17 | /* Must be called with locally disabled BHs. */ | 32 | /* Must be called with locally disabled BHs. */ |
| 18 | static void __inet_twsk_kill(struct inet_timewait_sock *tw, | 33 | static void __inet_twsk_kill(struct inet_timewait_sock *tw, |
| 19 | struct inet_hashinfo *hashinfo) | 34 | struct inet_hashinfo *hashinfo) |
| 20 | { | 35 | { |
| 21 | struct inet_bind_hashbucket *bhead; | 36 | struct inet_bind_hashbucket *bhead; |
| 22 | struct inet_bind_bucket *tb; | 37 | struct inet_bind_bucket *tb; |
| 38 | int refcnt; | ||
| 23 | /* Unlink from established hashes. */ | 39 | /* Unlink from established hashes. */ |
| 24 | spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); | 40 | spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); |
| 25 | 41 | ||
| 26 | spin_lock(lock); | 42 | spin_lock(lock); |
| 27 | if (hlist_nulls_unhashed(&tw->tw_node)) { | 43 | refcnt = inet_twsk_unhash(tw); |
| 28 | spin_unlock(lock); | ||
| 29 | return; | ||
| 30 | } | ||
| 31 | hlist_nulls_del_rcu(&tw->tw_node); | ||
| 32 | sk_nulls_node_init(&tw->tw_node); | ||
| 33 | spin_unlock(lock); | 44 | spin_unlock(lock); |
| 34 | 45 | ||
| 35 | /* Disassociate with bind bucket. */ | 46 | /* Disassociate with bind bucket. */ |
| @@ -37,9 +48,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, | |||
| 37 | hashinfo->bhash_size)]; | 48 | hashinfo->bhash_size)]; |
| 38 | spin_lock(&bhead->lock); | 49 | spin_lock(&bhead->lock); |
| 39 | tb = tw->tw_tb; | 50 | tb = tw->tw_tb; |
| 40 | __hlist_del(&tw->tw_bind_node); | 51 | if (tb) { |
| 41 | tw->tw_tb = NULL; | 52 | __hlist_del(&tw->tw_bind_node); |
| 42 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | 53 | tw->tw_tb = NULL; |
| 54 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | ||
| 55 | refcnt++; | ||
| 56 | } | ||
| 43 | spin_unlock(&bhead->lock); | 57 | spin_unlock(&bhead->lock); |
| 44 | #ifdef SOCK_REFCNT_DEBUG | 58 | #ifdef SOCK_REFCNT_DEBUG |
| 45 | if (atomic_read(&tw->tw_refcnt) != 1) { | 59 | if (atomic_read(&tw->tw_refcnt) != 1) { |
| @@ -47,7 +61,10 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, | |||
| 47 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); | 61 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); |
| 48 | } | 62 | } |
| 49 | #endif | 63 | #endif |
| 50 | inet_twsk_put(tw); | 64 | while (refcnt) { |
| 65 | inet_twsk_put(tw); | ||
| 66 | refcnt--; | ||
| 67 | } | ||
| 51 | } | 68 | } |
| 52 | 69 | ||
| 53 | static noinline void inet_twsk_free(struct inet_timewait_sock *tw) | 70 | static noinline void inet_twsk_free(struct inet_timewait_sock *tw) |
| @@ -86,7 +103,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
| 86 | Note, that any socket with inet->num != 0 MUST be bound in | 103 | Note, that any socket with inet->num != 0 MUST be bound in |
| 87 | binding cache, even if it is closed. | 104 | binding cache, even if it is closed. |
| 88 | */ | 105 | */ |
| 89 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, | 106 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, |
| 90 | hashinfo->bhash_size)]; | 107 | hashinfo->bhash_size)]; |
| 91 | spin_lock(&bhead->lock); | 108 | spin_lock(&bhead->lock); |
| 92 | tw->tw_tb = icsk->icsk_bind_hash; | 109 | tw->tw_tb = icsk->icsk_bind_hash; |
| @@ -101,13 +118,22 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
| 101 | * Should be done before removing sk from established chain | 118 | * Should be done before removing sk from established chain |
| 102 | * because readers are lockless and search established first. | 119 | * because readers are lockless and search established first. |
| 103 | */ | 120 | */ |
| 104 | atomic_inc(&tw->tw_refcnt); | ||
| 105 | inet_twsk_add_node_rcu(tw, &ehead->twchain); | 121 | inet_twsk_add_node_rcu(tw, &ehead->twchain); |
| 106 | 122 | ||
| 107 | /* Step 3: Remove SK from established hash. */ | 123 | /* Step 3: Remove SK from established hash. */ |
| 108 | if (__sk_nulls_del_node_init_rcu(sk)) | 124 | if (__sk_nulls_del_node_init_rcu(sk)) |
| 109 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 125 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
| 110 | 126 | ||
| 127 | /* | ||
| 128 | * Notes : | ||
| 129 | * - We initially set tw_refcnt to 0 in inet_twsk_alloc() | ||
| 130 | * - We add one reference for the bhash link | ||
| 131 | * - We add one reference for the ehash link | ||
| 132 | * - We want this refcnt update done before allowing other | ||
| 133 | * threads to find this tw in ehash chain. | ||
| 134 | */ | ||
| 135 | atomic_add(1 + 1 + 1, &tw->tw_refcnt); | ||
| 136 | |||
| 111 | spin_unlock(lock); | 137 | spin_unlock(lock); |
| 112 | } | 138 | } |
| 113 | 139 | ||
| @@ -124,14 +150,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat | |||
| 124 | kmemcheck_annotate_bitfield(tw, flags); | 150 | kmemcheck_annotate_bitfield(tw, flags); |
| 125 | 151 | ||
| 126 | /* Give us an identity. */ | 152 | /* Give us an identity. */ |
| 127 | tw->tw_daddr = inet->daddr; | 153 | tw->tw_daddr = inet->inet_daddr; |
| 128 | tw->tw_rcv_saddr = inet->rcv_saddr; | 154 | tw->tw_rcv_saddr = inet->inet_rcv_saddr; |
| 129 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; | 155 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; |
| 130 | tw->tw_num = inet->num; | 156 | tw->tw_num = inet->inet_num; |
| 131 | tw->tw_state = TCP_TIME_WAIT; | 157 | tw->tw_state = TCP_TIME_WAIT; |
| 132 | tw->tw_substate = state; | 158 | tw->tw_substate = state; |
| 133 | tw->tw_sport = inet->sport; | 159 | tw->tw_sport = inet->inet_sport; |
| 134 | tw->tw_dport = inet->dport; | 160 | tw->tw_dport = inet->inet_dport; |
| 135 | tw->tw_family = sk->sk_family; | 161 | tw->tw_family = sk->sk_family; |
| 136 | tw->tw_reuse = sk->sk_reuse; | 162 | tw->tw_reuse = sk->sk_reuse; |
| 137 | tw->tw_hash = sk->sk_hash; | 163 | tw->tw_hash = sk->sk_hash; |
| @@ -139,7 +165,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat | |||
| 139 | tw->tw_transparent = inet->transparent; | 165 | tw->tw_transparent = inet->transparent; |
| 140 | tw->tw_prot = sk->sk_prot_creator; | 166 | tw->tw_prot = sk->sk_prot_creator; |
| 141 | twsk_net_set(tw, hold_net(sock_net(sk))); | 167 | twsk_net_set(tw, hold_net(sock_net(sk))); |
| 142 | atomic_set(&tw->tw_refcnt, 1); | 168 | /* |
| 169 | * Because we use RCU lookups, we should not set tw_refcnt | ||
| 170 | * to a non null value before everything is setup for this | ||
| 171 | * timewait socket. | ||
| 172 | */ | ||
| 173 | atomic_set(&tw->tw_refcnt, 0); | ||
| 143 | inet_twsk_dead_node_init(tw); | 174 | inet_twsk_dead_node_init(tw); |
| 144 | __module_get(tw->tw_prot->owner); | 175 | __module_get(tw->tw_prot->owner); |
| 145 | } | 176 | } |
| @@ -421,37 +452,46 @@ out: | |||
| 421 | 452 | ||
| 422 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); | 453 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); |
| 423 | 454 | ||
| 424 | void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, | 455 | void inet_twsk_purge(struct inet_hashinfo *hashinfo, |
| 425 | struct inet_timewait_death_row *twdr, int family) | 456 | struct inet_timewait_death_row *twdr, int family) |
| 426 | { | 457 | { |
| 427 | struct inet_timewait_sock *tw; | 458 | struct inet_timewait_sock *tw; |
| 428 | struct sock *sk; | 459 | struct sock *sk; |
| 429 | struct hlist_nulls_node *node; | 460 | struct hlist_nulls_node *node; |
| 430 | int h; | 461 | unsigned int slot; |
| 431 | 462 | ||
| 432 | local_bh_disable(); | 463 | for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { |
| 433 | for (h = 0; h < (hashinfo->ehash_size); h++) { | 464 | struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; |
| 434 | struct inet_ehash_bucket *head = | 465 | restart_rcu: |
| 435 | inet_ehash_bucket(hashinfo, h); | 466 | rcu_read_lock(); |
| 436 | spinlock_t *lock = inet_ehash_lockp(hashinfo, h); | ||
| 437 | restart: | 467 | restart: |
| 438 | spin_lock(lock); | 468 | sk_nulls_for_each_rcu(sk, node, &head->twchain) { |
| 439 | sk_nulls_for_each(sk, node, &head->twchain) { | ||
| 440 | |||
| 441 | tw = inet_twsk(sk); | 469 | tw = inet_twsk(sk); |
| 442 | if (!net_eq(twsk_net(tw), net) || | 470 | if ((tw->tw_family != family) || |
| 443 | tw->tw_family != family) | 471 | atomic_read(&twsk_net(tw)->count)) |
| 472 | continue; | ||
| 473 | |||
| 474 | if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) | ||
| 444 | continue; | 475 | continue; |
| 445 | 476 | ||
| 446 | atomic_inc(&tw->tw_refcnt); | 477 | if (unlikely((tw->tw_family != family) || |
| 447 | spin_unlock(lock); | 478 | atomic_read(&twsk_net(tw)->count))) { |
| 479 | inet_twsk_put(tw); | ||
| 480 | goto restart; | ||
| 481 | } | ||
| 482 | |||
| 483 | rcu_read_unlock(); | ||
| 448 | inet_twsk_deschedule(tw, twdr); | 484 | inet_twsk_deschedule(tw, twdr); |
| 449 | inet_twsk_put(tw); | 485 | inet_twsk_put(tw); |
| 450 | 486 | goto restart_rcu; | |
| 451 | goto restart; | ||
| 452 | } | 487 | } |
| 453 | spin_unlock(lock); | 488 | /* If the nulls value we got at the end of this lookup is |
| 489 | * not the expected one, we must restart lookup. | ||
| 490 | * We probably met an item that was moved to another chain. | ||
| 491 | */ | ||
| 492 | if (get_nulls_value(node) != slot) | ||
| 493 | goto restart; | ||
| 494 | rcu_read_unlock(); | ||
| 454 | } | 495 | } |
| 455 | local_bh_enable(); | ||
| 456 | } | 496 | } |
| 457 | EXPORT_SYMBOL_GPL(inet_twsk_purge); | 497 | EXPORT_SYMBOL_GPL(inet_twsk_purge); |
