summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_hashtables.h9
-rw-r--r--include/net/inet_timewait_sock.h13
-rw-r--r--include/net/sock.h8
-rw-r--r--include/net/tcp.h1
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/ipv4/inet_diag.c48
-rw-r--r--net/ipv4/inet_hashtables.c83
-rw-r--r--net/ipv4/inet_timewait_sock.c55
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_ipv4.c83
-rw-r--r--net/ipv6/inet6_hashtables.c75
-rw-r--r--net/ipv6/tcp_ipv6.c9
12 files changed, 132 insertions, 261 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 10d6838378c3..1bdb47715def 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -37,12 +37,11 @@
37#include <asm/byteorder.h> 37#include <asm/byteorder.h>
38 38
39/* This is for all connections with a full identity, no wildcards. 39/* This is for all connections with a full identity, no wildcards.
40 * One chain is dedicated to TIME_WAIT sockets. 40 * The 'e' prefix stands for Establish, but we really put all sockets
41 * I'll experiment with dynamic table growth later. 41 * but LISTEN ones.
42 */ 42 */
43struct inet_ehash_bucket { 43struct inet_ehash_bucket {
44 struct hlist_nulls_head chain; 44 struct hlist_nulls_head chain;
45 struct hlist_nulls_head twchain;
46}; 45};
47 46
48/* There are a few simple rules, which allow for local port reuse by 47/* There are a few simple rules, which allow for local port reuse by
@@ -123,7 +122,6 @@ struct inet_hashinfo {
123 * 122 *
124 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE 123 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
125 * 124 *
126 * TIME_WAIT sockets use a separate chain (twchain).
127 */ 125 */
128 struct inet_ehash_bucket *ehash; 126 struct inet_ehash_bucket *ehash;
129 spinlock_t *ehash_locks; 127 spinlock_t *ehash_locks;
@@ -318,9 +316,6 @@ static inline struct sock *inet_lookup_listener(struct net *net,
318 net_eq(sock_net(__sk), (__net))) 316 net_eq(sock_net(__sk), (__net)))
319#endif /* 64-bit arch */ 317#endif /* 64-bit arch */
320 318
321#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\
322 INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)
323
324/* 319/*
325 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need 320 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
326 * not check it for lookups anymore, thanks Alexey. -DaveM 321 * not check it for lookups anymore, thanks Alexey. -DaveM
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index f528d1b0ac95..de9e3ab7d43d 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -141,18 +141,6 @@ struct inet_timewait_sock {
141}; 141};
142#define tw_tclass tw_tos 142#define tw_tclass tw_tos
143 143
144static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
145 struct hlist_nulls_head *list)
146{
147 hlist_nulls_add_head_rcu(&tw->tw_node, list);
148}
149
150static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
151 struct hlist_head *list)
152{
153 hlist_add_head(&tw->tw_bind_node, list);
154}
155
156static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) 144static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw)
157{ 145{
158 return !hlist_unhashed(&tw->tw_death_node); 146 return !hlist_unhashed(&tw->tw_death_node);
@@ -192,6 +180,7 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
192 return (struct inet_timewait_sock *)sk; 180 return (struct inet_timewait_sock *)sk;
193} 181}
194 182
183void inet_twsk_free(struct inet_timewait_sock *tw);
195void inet_twsk_put(struct inet_timewait_sock *tw); 184void inet_twsk_put(struct inet_timewait_sock *tw);
196 185
197int inet_twsk_unhash(struct inet_timewait_sock *tw); 186int inet_twsk_unhash(struct inet_timewait_sock *tw);
diff --git a/include/net/sock.h b/include/net/sock.h
index 7cf8d2331afb..3f3e48c4704d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -156,7 +156,7 @@ typedef __u64 __bitwise __addrpair;
156 */ 156 */
157struct sock_common { 157struct sock_common {
158 /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned 158 /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
159 * address on 64bit arches : cf INET_MATCH() and INET_TW_MATCH() 159 * address on 64bit arches : cf INET_MATCH()
160 */ 160 */
161 union { 161 union {
162 __addrpair skc_addrpair; 162 __addrpair skc_addrpair;
@@ -301,6 +301,8 @@ struct sock {
301#define sk_dontcopy_end __sk_common.skc_dontcopy_end 301#define sk_dontcopy_end __sk_common.skc_dontcopy_end
302#define sk_hash __sk_common.skc_hash 302#define sk_hash __sk_common.skc_hash
303#define sk_portpair __sk_common.skc_portpair 303#define sk_portpair __sk_common.skc_portpair
304#define sk_num __sk_common.skc_num
305#define sk_dport __sk_common.skc_dport
304#define sk_addrpair __sk_common.skc_addrpair 306#define sk_addrpair __sk_common.skc_addrpair
305#define sk_daddr __sk_common.skc_daddr 307#define sk_daddr __sk_common.skc_daddr
306#define sk_rcv_saddr __sk_common.skc_rcv_saddr 308#define sk_rcv_saddr __sk_common.skc_rcv_saddr
@@ -1653,6 +1655,10 @@ static inline void sock_put(struct sock *sk)
1653 if (atomic_dec_and_test(&sk->sk_refcnt)) 1655 if (atomic_dec_and_test(&sk->sk_refcnt))
1654 sk_free(sk); 1656 sk_free(sk);
1655} 1657}
1658/* Generic version of sock_put(), dealing with all sockets
1659 * (TCP_TIMEWAIT, ESTABLISHED...)
1660 */
1661void sock_gen_put(struct sock *sk);
1656 1662
1657int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); 1663int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested);
1658 1664
diff --git a/include/net/tcp.h b/include/net/tcp.h
index de870ee5582d..39bbfa1602b2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1519,7 +1519,6 @@ enum tcp_seq_states {
1519 TCP_SEQ_STATE_LISTENING, 1519 TCP_SEQ_STATE_LISTENING,
1520 TCP_SEQ_STATE_OPENREQ, 1520 TCP_SEQ_STATE_OPENREQ,
1521 TCP_SEQ_STATE_ESTABLISHED, 1521 TCP_SEQ_STATE_ESTABLISHED,
1522 TCP_SEQ_STATE_TIME_WAIT,
1523}; 1522};
1524 1523
1525int tcp_seq_open(struct inode *inode, struct file *file); 1524int tcp_seq_open(struct inode *inode, struct file *file);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ba64750f0387..eb892b4f4814 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1158,10 +1158,8 @@ static int __init dccp_init(void)
1158 goto out_free_bind_bucket_cachep; 1158 goto out_free_bind_bucket_cachep;
1159 } 1159 }
1160 1160
1161 for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) { 1161 for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1162 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); 1162 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1163 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1164 }
1165 1163
1166 if (inet_ehash_locks_alloc(&dccp_hashinfo)) 1164 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1167 goto out_free_dccp_ehash; 1165 goto out_free_dccp_ehash;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 22000279efc8..8e1e40653357 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -635,12 +635,14 @@ static int inet_csk_diag_dump(struct sock *sk,
635 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 635 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
636} 636}
637 637
638static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, 638static int inet_twsk_diag_dump(struct sock *sk,
639 struct sk_buff *skb, 639 struct sk_buff *skb,
640 struct netlink_callback *cb, 640 struct netlink_callback *cb,
641 struct inet_diag_req_v2 *r, 641 struct inet_diag_req_v2 *r,
642 const struct nlattr *bc) 642 const struct nlattr *bc)
643{ 643{
644 struct inet_timewait_sock *tw = inet_twsk(sk);
645
644 if (bc != NULL) { 646 if (bc != NULL) {
645 struct inet_diag_entry entry; 647 struct inet_diag_entry entry;
646 648
@@ -911,8 +913,7 @@ skip_listen_ht:
911 913
912 num = 0; 914 num = 0;
913 915
914 if (hlist_nulls_empty(&head->chain) && 916 if (hlist_nulls_empty(&head->chain))
915 hlist_nulls_empty(&head->twchain))
916 continue; 917 continue;
917 918
918 if (i > s_i) 919 if (i > s_i)
@@ -920,7 +921,7 @@ skip_listen_ht:
920 921
921 spin_lock_bh(lock); 922 spin_lock_bh(lock);
922 sk_nulls_for_each(sk, node, &head->chain) { 923 sk_nulls_for_each(sk, node, &head->chain) {
923 struct inet_sock *inet = inet_sk(sk); 924 int res;
924 925
925 if (!net_eq(sock_net(sk), net)) 926 if (!net_eq(sock_net(sk), net))
926 continue; 927 continue;
@@ -929,15 +930,19 @@ skip_listen_ht:
929 if (!(r->idiag_states & (1 << sk->sk_state))) 930 if (!(r->idiag_states & (1 << sk->sk_state)))
930 goto next_normal; 931 goto next_normal;
931 if (r->sdiag_family != AF_UNSPEC && 932 if (r->sdiag_family != AF_UNSPEC &&
932 sk->sk_family != r->sdiag_family) 933 sk->sk_family != r->sdiag_family)
933 goto next_normal; 934 goto next_normal;
934 if (r->id.idiag_sport != inet->inet_sport && 935 if (r->id.idiag_sport != htons(sk->sk_num) &&
935 r->id.idiag_sport) 936 r->id.idiag_sport)
936 goto next_normal; 937 goto next_normal;
937 if (r->id.idiag_dport != inet->inet_dport && 938 if (r->id.idiag_dport != sk->sk_dport &&
938 r->id.idiag_dport) 939 r->id.idiag_dport)
939 goto next_normal; 940 goto next_normal;
940 if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { 941 if (sk->sk_state == TCP_TIME_WAIT)
942 res = inet_twsk_diag_dump(sk, skb, cb, r, bc);
943 else
944 res = inet_csk_diag_dump(sk, skb, cb, r, bc);
945 if (res < 0) {
941 spin_unlock_bh(lock); 946 spin_unlock_bh(lock);
942 goto done; 947 goto done;
943 } 948 }
@@ -945,33 +950,6 @@ next_normal:
945 ++num; 950 ++num;
946 } 951 }
947 952
948 if (r->idiag_states & TCPF_TIME_WAIT) {
949 struct inet_timewait_sock *tw;
950
951 inet_twsk_for_each(tw, node,
952 &head->twchain) {
953 if (!net_eq(twsk_net(tw), net))
954 continue;
955
956 if (num < s_num)
957 goto next_dying;
958 if (r->sdiag_family != AF_UNSPEC &&
959 tw->tw_family != r->sdiag_family)
960 goto next_dying;
961 if (r->id.idiag_sport != tw->tw_sport &&
962 r->id.idiag_sport)
963 goto next_dying;
964 if (r->id.idiag_dport != tw->tw_dport &&
965 r->id.idiag_dport)
966 goto next_dying;
967 if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) {
968 spin_unlock_bh(lock);
969 goto done;
970 }
971next_dying:
972 ++num;
973 }
974 }
975 spin_unlock_bh(lock); 953 spin_unlock_bh(lock);
976 } 954 }
977 955
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ae199596b9b0..a4b66bbe4f21 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -230,6 +230,19 @@ begin:
230} 230}
231EXPORT_SYMBOL_GPL(__inet_lookup_listener); 231EXPORT_SYMBOL_GPL(__inet_lookup_listener);
232 232
233/* All sockets share common refcount, but have different destructors */
234void sock_gen_put(struct sock *sk)
235{
236 if (!atomic_dec_and_test(&sk->sk_refcnt))
237 return;
238
239 if (sk->sk_state == TCP_TIME_WAIT)
240 inet_twsk_free(inet_twsk(sk));
241 else
242 sk_free(sk);
243}
244EXPORT_SYMBOL_GPL(sock_gen_put);
245
233struct sock *__inet_lookup_established(struct net *net, 246struct sock *__inet_lookup_established(struct net *net,
234 struct inet_hashinfo *hashinfo, 247 struct inet_hashinfo *hashinfo,
235 const __be32 saddr, const __be16 sport, 248 const __be32 saddr, const __be16 sport,
@@ -255,13 +268,13 @@ begin:
255 if (likely(INET_MATCH(sk, net, acookie, 268 if (likely(INET_MATCH(sk, net, acookie,
256 saddr, daddr, ports, dif))) { 269 saddr, daddr, ports, dif))) {
257 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 270 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
258 goto begintw; 271 goto out;
259 if (unlikely(!INET_MATCH(sk, net, acookie, 272 if (unlikely(!INET_MATCH(sk, net, acookie,
260 saddr, daddr, ports, dif))) { 273 saddr, daddr, ports, dif))) {
261 sock_put(sk); 274 sock_gen_put(sk);
262 goto begin; 275 goto begin;
263 } 276 }
264 goto out; 277 goto found;
265 } 278 }
266 } 279 }
267 /* 280 /*
@@ -271,37 +284,9 @@ begin:
271 */ 284 */
272 if (get_nulls_value(node) != slot) 285 if (get_nulls_value(node) != slot)
273 goto begin; 286 goto begin;
274
275begintw:
276 /* Must check for a TIME_WAIT'er before going to listener hash. */
277 sk_nulls_for_each_rcu(sk, node, &head->twchain) {
278 if (sk->sk_hash != hash)
279 continue;
280 if (likely(INET_TW_MATCH(sk, net, acookie,
281 saddr, daddr, ports,
282 dif))) {
283 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
284 sk = NULL;
285 goto out;
286 }
287 if (unlikely(!INET_TW_MATCH(sk, net, acookie,
288 saddr, daddr, ports,
289 dif))) {
290 inet_twsk_put(inet_twsk(sk));
291 goto begintw;
292 }
293 goto out;
294 }
295 }
296 /*
297 * if the nulls value we got at the end of this lookup is
298 * not the expected one, we must restart lookup.
299 * We probably met an item that was moved to another chain.
300 */
301 if (get_nulls_value(node) != slot)
302 goto begintw;
303 sk = NULL;
304out: 287out:
288 sk = NULL;
289found:
305 rcu_read_unlock(); 290 rcu_read_unlock();
306 return sk; 291 return sk;
307} 292}
@@ -326,39 +311,29 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
326 spinlock_t *lock = inet_ehash_lockp(hinfo, hash); 311 spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
327 struct sock *sk2; 312 struct sock *sk2;
328 const struct hlist_nulls_node *node; 313 const struct hlist_nulls_node *node;
329 struct inet_timewait_sock *tw; 314 struct inet_timewait_sock *tw = NULL;
330 int twrefcnt = 0; 315 int twrefcnt = 0;
331 316
332 spin_lock(lock); 317 spin_lock(lock);
333 318
334 /* Check TIME-WAIT sockets first. */
335 sk_nulls_for_each(sk2, node, &head->twchain) {
336 if (sk2->sk_hash != hash)
337 continue;
338
339 if (likely(INET_TW_MATCH(sk2, net, acookie,
340 saddr, daddr, ports, dif))) {
341 tw = inet_twsk(sk2);
342 if (twsk_unique(sk, sk2, twp))
343 goto unique;
344 else
345 goto not_unique;
346 }
347 }
348 tw = NULL;
349
350 /* And established part... */
351 sk_nulls_for_each(sk2, node, &head->chain) { 319 sk_nulls_for_each(sk2, node, &head->chain) {
352 if (sk2->sk_hash != hash) 320 if (sk2->sk_hash != hash)
353 continue; 321 continue;
322
354 if (likely(INET_MATCH(sk2, net, acookie, 323 if (likely(INET_MATCH(sk2, net, acookie,
355 saddr, daddr, ports, dif))) 324 saddr, daddr, ports, dif))) {
325 if (sk2->sk_state == TCP_TIME_WAIT) {
326 tw = inet_twsk(sk2);
327 if (twsk_unique(sk, sk2, twp))
328 break;
329 }
356 goto not_unique; 330 goto not_unique;
331 }
357 } 332 }
358 333
359unique:
360 /* Must record num and sport now. Otherwise we will see 334 /* Must record num and sport now. Otherwise we will see
361 * in hash table socket with a funny identity. */ 335 * in hash table socket with a funny identity.
336 */
362 inet->inet_num = lport; 337 inet->inet_num = lport;
363 inet->inet_sport = htons(lport); 338 inet->inet_sport = htons(lport);
364 sk->sk_hash = hash; 339 sk->sk_hash = hash;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 9bcd8f7234ec..6d592f8555fb 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -87,19 +87,11 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
87 refcnt += inet_twsk_bind_unhash(tw, hashinfo); 87 refcnt += inet_twsk_bind_unhash(tw, hashinfo);
88 spin_unlock(&bhead->lock); 88 spin_unlock(&bhead->lock);
89 89
90#ifdef SOCK_REFCNT_DEBUG 90 BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
91 if (atomic_read(&tw->tw_refcnt) != 1) { 91 atomic_sub(refcnt, &tw->tw_refcnt);
92 pr_debug("%s timewait_sock %p refcnt=%d\n",
93 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
94 }
95#endif
96 while (refcnt) {
97 inet_twsk_put(tw);
98 refcnt--;
99 }
100} 92}
101 93
102static noinline void inet_twsk_free(struct inet_timewait_sock *tw) 94void inet_twsk_free(struct inet_timewait_sock *tw)
103{ 95{
104 struct module *owner = tw->tw_prot->owner; 96 struct module *owner = tw->tw_prot->owner;
105 twsk_destructor((struct sock *)tw); 97 twsk_destructor((struct sock *)tw);
@@ -118,6 +110,18 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
118} 110}
119EXPORT_SYMBOL_GPL(inet_twsk_put); 111EXPORT_SYMBOL_GPL(inet_twsk_put);
120 112
113static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
114 struct hlist_nulls_head *list)
115{
116 hlist_nulls_add_head_rcu(&tw->tw_node, list);
117}
118
119static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
120 struct hlist_head *list)
121{
122 hlist_add_head(&tw->tw_bind_node, list);
123}
124
121/* 125/*
122 * Enter the time wait state. This is called with locally disabled BH. 126 * Enter the time wait state. This is called with locally disabled BH.
123 * Essentially we whip up a timewait bucket, copy the relevant info into it 127 * Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -146,26 +150,21 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
146 spin_lock(lock); 150 spin_lock(lock);
147 151
148 /* 152 /*
149 * Step 2: Hash TW into TIMEWAIT chain. 153 * Step 2: Hash TW into tcp ehash chain.
150 * Should be done before removing sk from established chain 154 * Notes :
151 * because readers are lockless and search established first. 155 * - tw_refcnt is set to 3 because :
156 * - We have one reference from bhash chain.
157 * - We have one reference from ehash chain.
158 * We can use atomic_set() because prior spin_lock()/spin_unlock()
159 * committed into memory all tw fields.
152 */ 160 */
153 inet_twsk_add_node_rcu(tw, &ehead->twchain); 161 atomic_set(&tw->tw_refcnt, 1 + 1 + 1);
162 inet_twsk_add_node_rcu(tw, &ehead->chain);
154 163
155 /* Step 3: Remove SK from established hash. */ 164 /* Step 3: Remove SK from hash chain */
156 if (__sk_nulls_del_node_init_rcu(sk)) 165 if (__sk_nulls_del_node_init_rcu(sk))
157 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 166 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
158 167
159 /*
160 * Notes :
161 * - We initially set tw_refcnt to 0 in inet_twsk_alloc()
162 * - We add one reference for the bhash link
163 * - We add one reference for the ehash link
164 * - We want this refcnt update done before allowing other
165 * threads to find this tw in ehash chain.
166 */
167 atomic_add(1 + 1 + 1, &tw->tw_refcnt);
168
169 spin_unlock(lock); 168 spin_unlock(lock);
170} 169}
171EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 170EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
@@ -490,7 +489,9 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo,
490restart_rcu: 489restart_rcu:
491 rcu_read_lock(); 490 rcu_read_lock();
492restart: 491restart:
493 sk_nulls_for_each_rcu(sk, node, &head->twchain) { 492 sk_nulls_for_each_rcu(sk, node, &head->chain) {
493 if (sk->sk_state != TCP_TIME_WAIT)
494 continue;
494 tw = inet_twsk(sk); 495 tw = inet_twsk(sk);
495 if ((tw->tw_family != family) || 496 if ((tw->tw_family != family) ||
496 atomic_read(&twsk_net(tw)->count)) 497 atomic_read(&twsk_net(tw)->count))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6e5617b9f9db..be4b161802e8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3137,10 +3137,9 @@ void __init tcp_init(void)
3137 &tcp_hashinfo.ehash_mask, 3137 &tcp_hashinfo.ehash_mask,
3138 0, 3138 0,
3139 thash_entries ? 0 : 512 * 1024); 3139 thash_entries ? 0 : 512 * 1024);
3140 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { 3140 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++)
3141 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); 3141 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
3142 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); 3142
3143 }
3144 if (inet_ehash_locks_alloc(&tcp_hashinfo)) 3143 if (inet_ehash_locks_alloc(&tcp_hashinfo))
3145 panic("TCP: failed to alloc ehash_locks"); 3144 panic("TCP: failed to alloc ehash_locks");
3146 tcp_hashinfo.bhash = 3145 tcp_hashinfo.bhash =
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5d6b1a609da8..e4695dde1af6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2194,18 +2194,6 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
2194#ifdef CONFIG_PROC_FS 2194#ifdef CONFIG_PROC_FS
2195/* Proc filesystem TCP sock list dumping. */ 2195/* Proc filesystem TCP sock list dumping. */
2196 2196
2197static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
2198{
2199 return hlist_nulls_empty(head) ? NULL :
2200 list_entry(head->first, struct inet_timewait_sock, tw_node);
2201}
2202
2203static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
2204{
2205 return !is_a_nulls(tw->tw_node.next) ?
2206 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2207}
2208
2209/* 2197/*
2210 * Get next listener socket follow cur. If cur is NULL, get first socket 2198 * Get next listener socket follow cur. If cur is NULL, get first socket
2211 * starting from bucket given in st->bucket; when st->bucket is zero the 2199 * starting from bucket given in st->bucket; when st->bucket is zero the
@@ -2309,10 +2297,9 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2309 return rc; 2297 return rc;
2310} 2298}
2311 2299
2312static inline bool empty_bucket(struct tcp_iter_state *st) 2300static inline bool empty_bucket(const struct tcp_iter_state *st)
2313{ 2301{
2314 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && 2302 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
2315 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2316} 2303}
2317 2304
2318/* 2305/*
@@ -2329,7 +2316,6 @@ static void *established_get_first(struct seq_file *seq)
2329 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2316 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2330 struct sock *sk; 2317 struct sock *sk;
2331 struct hlist_nulls_node *node; 2318 struct hlist_nulls_node *node;
2332 struct inet_timewait_sock *tw;
2333 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 2319 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2334 2320
2335 /* Lockless fast path for the common case of empty buckets */ 2321 /* Lockless fast path for the common case of empty buckets */
@@ -2345,18 +2331,7 @@ static void *established_get_first(struct seq_file *seq)
2345 rc = sk; 2331 rc = sk;
2346 goto out; 2332 goto out;
2347 } 2333 }
2348 st->state = TCP_SEQ_STATE_TIME_WAIT;
2349 inet_twsk_for_each(tw, node,
2350 &tcp_hashinfo.ehash[st->bucket].twchain) {
2351 if (tw->tw_family != st->family ||
2352 !net_eq(twsk_net(tw), net)) {
2353 continue;
2354 }
2355 rc = tw;
2356 goto out;
2357 }
2358 spin_unlock_bh(lock); 2334 spin_unlock_bh(lock);
2359 st->state = TCP_SEQ_STATE_ESTABLISHED;
2360 } 2335 }
2361out: 2336out:
2362 return rc; 2337 return rc;
@@ -2365,7 +2340,6 @@ out:
2365static void *established_get_next(struct seq_file *seq, void *cur) 2340static void *established_get_next(struct seq_file *seq, void *cur)
2366{ 2341{
2367 struct sock *sk = cur; 2342 struct sock *sk = cur;
2368 struct inet_timewait_sock *tw;
2369 struct hlist_nulls_node *node; 2343 struct hlist_nulls_node *node;
2370 struct tcp_iter_state *st = seq->private; 2344 struct tcp_iter_state *st = seq->private;
2371 struct net *net = seq_file_net(seq); 2345 struct net *net = seq_file_net(seq);
@@ -2373,45 +2347,16 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2373 ++st->num; 2347 ++st->num;
2374 ++st->offset; 2348 ++st->offset;
2375 2349
2376 if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2350 sk = sk_nulls_next(sk);
2377 tw = cur;
2378 tw = tw_next(tw);
2379get_tw:
2380 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2381 tw = tw_next(tw);
2382 }
2383 if (tw) {
2384 cur = tw;
2385 goto out;
2386 }
2387 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2388 st->state = TCP_SEQ_STATE_ESTABLISHED;
2389
2390 /* Look for next non empty bucket */
2391 st->offset = 0;
2392 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2393 empty_bucket(st))
2394 ;
2395 if (st->bucket > tcp_hashinfo.ehash_mask)
2396 return NULL;
2397
2398 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2399 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2400 } else
2401 sk = sk_nulls_next(sk);
2402 2351
2403 sk_nulls_for_each_from(sk, node) { 2352 sk_nulls_for_each_from(sk, node) {
2404 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 2353 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2405 goto found; 2354 return sk;
2406 } 2355 }
2407 2356
2408 st->state = TCP_SEQ_STATE_TIME_WAIT; 2357 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2409 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); 2358 ++st->bucket;
2410 goto get_tw; 2359 return established_get_first(seq);
2411found:
2412 cur = sk;
2413out:
2414 return cur;
2415} 2360}
2416 2361
2417static void *established_get_idx(struct seq_file *seq, loff_t pos) 2362static void *established_get_idx(struct seq_file *seq, loff_t pos)
@@ -2464,10 +2409,9 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
2464 if (rc) 2409 if (rc)
2465 break; 2410 break;
2466 st->bucket = 0; 2411 st->bucket = 0;
2412 st->state = TCP_SEQ_STATE_ESTABLISHED;
2467 /* Fallthrough */ 2413 /* Fallthrough */
2468 case TCP_SEQ_STATE_ESTABLISHED: 2414 case TCP_SEQ_STATE_ESTABLISHED:
2469 case TCP_SEQ_STATE_TIME_WAIT:
2470 st->state = TCP_SEQ_STATE_ESTABLISHED;
2471 if (st->bucket > tcp_hashinfo.ehash_mask) 2415 if (st->bucket > tcp_hashinfo.ehash_mask)
2472 break; 2416 break;
2473 rc = established_get_first(seq); 2417 rc = established_get_first(seq);
@@ -2524,7 +2468,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2524 } 2468 }
2525 break; 2469 break;
2526 case TCP_SEQ_STATE_ESTABLISHED: 2470 case TCP_SEQ_STATE_ESTABLISHED:
2527 case TCP_SEQ_STATE_TIME_WAIT:
2528 rc = established_get_next(seq, v); 2471 rc = established_get_next(seq, v);
2529 break; 2472 break;
2530 } 2473 }
@@ -2548,7 +2491,6 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2548 if (v != SEQ_START_TOKEN) 2491 if (v != SEQ_START_TOKEN)
2549 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 2492 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2550 break; 2493 break;
2551 case TCP_SEQ_STATE_TIME_WAIT:
2552 case TCP_SEQ_STATE_ESTABLISHED: 2494 case TCP_SEQ_STATE_ESTABLISHED:
2553 if (v) 2495 if (v)
2554 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2496 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
@@ -2707,6 +2649,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2707static int tcp4_seq_show(struct seq_file *seq, void *v) 2649static int tcp4_seq_show(struct seq_file *seq, void *v)
2708{ 2650{
2709 struct tcp_iter_state *st; 2651 struct tcp_iter_state *st;
2652 struct sock *sk = v;
2710 int len; 2653 int len;
2711 2654
2712 if (v == SEQ_START_TOKEN) { 2655 if (v == SEQ_START_TOKEN) {
@@ -2721,14 +2664,14 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
2721 switch (st->state) { 2664 switch (st->state) {
2722 case TCP_SEQ_STATE_LISTENING: 2665 case TCP_SEQ_STATE_LISTENING:
2723 case TCP_SEQ_STATE_ESTABLISHED: 2666 case TCP_SEQ_STATE_ESTABLISHED:
2724 get_tcp4_sock(v, seq, st->num, &len); 2667 if (sk->sk_state == TCP_TIME_WAIT)
2668 get_timewait4_sock(v, seq, st->num, &len);
2669 else
2670 get_tcp4_sock(v, seq, st->num, &len);
2725 break; 2671 break;
2726 case TCP_SEQ_STATE_OPENREQ: 2672 case TCP_SEQ_STATE_OPENREQ:
2727 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); 2673 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2728 break; 2674 break;
2729 case TCP_SEQ_STATE_TIME_WAIT:
2730 get_timewait4_sock(v, seq, st->num, &len);
2731 break;
2732 } 2675 }
2733 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); 2676 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2734out: 2677out:
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 066640e0ba8e..46440777e1c5 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -89,43 +89,36 @@ begin:
89 sk_nulls_for_each_rcu(sk, node, &head->chain) { 89 sk_nulls_for_each_rcu(sk, node, &head->chain) {
90 if (sk->sk_hash != hash) 90 if (sk->sk_hash != hash)
91 continue; 91 continue;
92 if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { 92 if (sk->sk_state == TCP_TIME_WAIT) {
93 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 93 if (!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif))
94 goto begintw; 94 continue;
95 } else {
96 if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
97 continue;
98 }
99 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
100 goto out;
101
102 if (sk->sk_state == TCP_TIME_WAIT) {
103 if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
104 ports, dif))) {
105 sock_gen_put(sk);
106 goto begin;
107 }
108 } else {
95 if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, 109 if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
96 ports, dif))) { 110 ports, dif))) {
97 sock_put(sk); 111 sock_put(sk);
98 goto begin; 112 goto begin;
99 } 113 }
100 goto out; 114 goto found;
101 } 115 }
102 } 116 }
103 if (get_nulls_value(node) != slot) 117 if (get_nulls_value(node) != slot)
104 goto begin; 118 goto begin;
105
106begintw:
107 /* Must check for a TIME_WAIT'er before going to listener hash. */
108 sk_nulls_for_each_rcu(sk, node, &head->twchain) {
109 if (sk->sk_hash != hash)
110 continue;
111 if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
112 ports, dif))) {
113 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
114 sk = NULL;
115 goto out;
116 }
117 if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
118 ports, dif))) {
119 inet_twsk_put(inet_twsk(sk));
120 goto begintw;
121 }
122 goto out;
123 }
124 }
125 if (get_nulls_value(node) != slot)
126 goto begintw;
127 sk = NULL;
128out: 119out:
120 sk = NULL;
121found:
129 rcu_read_unlock(); 122 rcu_read_unlock();
130 return sk; 123 return sk;
131} 124}
@@ -248,31 +241,25 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
248 spinlock_t *lock = inet_ehash_lockp(hinfo, hash); 241 spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
249 struct sock *sk2; 242 struct sock *sk2;
250 const struct hlist_nulls_node *node; 243 const struct hlist_nulls_node *node;
251 struct inet_timewait_sock *tw; 244 struct inet_timewait_sock *tw = NULL;
252 int twrefcnt = 0; 245 int twrefcnt = 0;
253 246
254 spin_lock(lock); 247 spin_lock(lock);
255 248
256 /* Check TIME-WAIT sockets first. */ 249 sk_nulls_for_each(sk2, node, &head->chain) {
257 sk_nulls_for_each(sk2, node, &head->twchain) {
258 if (sk2->sk_hash != hash) 250 if (sk2->sk_hash != hash)
259 continue; 251 continue;
260 252
261 if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, 253 if (sk2->sk_state == TCP_TIME_WAIT) {
262 ports, dif))) { 254 if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
263 tw = inet_twsk(sk2); 255 ports, dif))) {
264 if (twsk_unique(sk, sk2, twp)) 256 tw = inet_twsk(sk2);
265 goto unique; 257 if (twsk_unique(sk, sk2, twp))
266 else 258 goto unique;
267 goto not_unique; 259 else
260 goto not_unique;
261 }
268 } 262 }
269 }
270 tw = NULL;
271
272 /* And established part... */
273 sk_nulls_for_each(sk2, node, &head->chain) {
274 if (sk2->sk_hash != hash)
275 continue;
276 if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) 263 if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
277 goto not_unique; 264 goto not_unique;
278 } 265 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index dde8bad04481..528e61afaf5e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1834,6 +1834,7 @@ static void get_timewait6_sock(struct seq_file *seq,
1834static int tcp6_seq_show(struct seq_file *seq, void *v) 1834static int tcp6_seq_show(struct seq_file *seq, void *v)
1835{ 1835{
1836 struct tcp_iter_state *st; 1836 struct tcp_iter_state *st;
1837 struct sock *sk = v;
1837 1838
1838 if (v == SEQ_START_TOKEN) { 1839 if (v == SEQ_START_TOKEN) {
1839 seq_puts(seq, 1840 seq_puts(seq,
@@ -1849,14 +1850,14 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
1849 switch (st->state) { 1850 switch (st->state) {
1850 case TCP_SEQ_STATE_LISTENING: 1851 case TCP_SEQ_STATE_LISTENING:
1851 case TCP_SEQ_STATE_ESTABLISHED: 1852 case TCP_SEQ_STATE_ESTABLISHED:
1852 get_tcp6_sock(seq, v, st->num); 1853 if (sk->sk_state == TCP_TIME_WAIT)
1854 get_timewait6_sock(seq, v, st->num);
1855 else
1856 get_tcp6_sock(seq, v, st->num);
1853 break; 1857 break;
1854 case TCP_SEQ_STATE_OPENREQ: 1858 case TCP_SEQ_STATE_OPENREQ:
1855 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid); 1859 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1856 break; 1860 break;
1857 case TCP_SEQ_STATE_TIME_WAIT:
1858 get_timewait6_sock(seq, v, st->num);
1859 break;
1860 } 1861 }
1861out: 1862out:
1862 return 0; 1863 return 0;