aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_hashtables.h48
-rw-r--r--include/net/tcp.h21
-rw-r--r--net/ipv4/inet_hashtables.c32
-rw-r--r--net/ipv4/tcp_diag.c8
-rw-r--r--net/ipv4/tcp_ipv4.c70
-rw-r--r--net/ipv6/tcp_ipv6.c2
6 files changed, 94 insertions, 87 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index da07411b36d2..f5d65121f7b7 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -19,10 +19,14 @@
19#include <linux/list.h> 19#include <linux/list.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/tcp.h> /* only for TCP_LISTEN, damn :-( */
22#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/wait.h>
23 25
24#include <net/sock.h> 26#include <net/sock.h>
25 27
28#include <asm/atomic.h>
29
26/* This is for all connections with a full identity, no wildcards. 30/* This is for all connections with a full identity, no wildcards.
27 * New scheme, half the table is for TIME_WAIT, the other half is 31 * New scheme, half the table is for TIME_WAIT, the other half is
28 * for the rest. I'll experiment with dynamic table growth later. 32 * for the rest. I'll experiment with dynamic table growth later.
@@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table,
192 196
193extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); 197extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
194 198
199extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
200
201/*
202 * - We may sleep inside this lock.
203 * - If sleeping is not required (or called from BH),
204 * use plain read_(un)lock(&inet_hashinfo.lhash_lock).
205 */
206static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
207{
208 /* read_lock synchronizes to candidates to writers */
209 read_lock(&hashinfo->lhash_lock);
210 atomic_inc(&hashinfo->lhash_users);
211 read_unlock(&hashinfo->lhash_lock);
212}
213
214static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
215{
216 if (atomic_dec_and_test(&hashinfo->lhash_users))
217 wake_up(&hashinfo->lhash_wait);
218}
219
220static inline void __inet_hash(struct inet_hashinfo *hashinfo,
221 struct sock *sk, const int listen_possible)
222{
223 struct hlist_head *list;
224 rwlock_t *lock;
225
226 BUG_TRAP(sk_unhashed(sk));
227 if (listen_possible && sk->sk_state == TCP_LISTEN) {
228 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
229 lock = &hashinfo->lhash_lock;
230 inet_listen_wlock(hashinfo);
231 } else {
232 sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
233 list = &hashinfo->ehash[sk->sk_hashent].chain;
234 lock = &hashinfo->ehash[sk->sk_hashent].lock;
235 write_lock(lock);
236 }
237 __sk_add_node(sk, list);
238 sock_prot_inc_use(sk->sk_prot);
239 write_unlock(lock);
240 if (listen_possible && sk->sk_state == TCP_LISTEN)
241 wake_up(&hashinfo->lhash_wait);
242}
195#endif /* _INET_HASHTABLES_H */ 243#endif /* _INET_HASHTABLES_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 99e47695d4b6..bc110cc7022b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,
1447 1447
1448extern void tcp_enter_memory_pressure(void); 1448extern void tcp_enter_memory_pressure(void);
1449 1449
1450extern void tcp_listen_wlock(void);
1451
1452/* - We may sleep inside this lock.
1453 * - If sleeping is not required (or called from BH),
1454 * use plain read_(un)lock(&inet_hashinfo.lhash_lock).
1455 */
1456
1457static inline void tcp_listen_lock(void)
1458{
1459 /* read_lock synchronizes to candidates to writers */
1460 read_lock(&tcp_hashinfo.lhash_lock);
1461 atomic_inc(&tcp_hashinfo.lhash_users);
1462 read_unlock(&tcp_hashinfo.lhash_lock);
1463}
1464
1465static inline void tcp_listen_unlock(void)
1466{
1467 if (atomic_dec_and_test(&tcp_hashinfo.lhash_users))
1468 wake_up(&tcp_hashinfo.lhash_wait);
1469}
1470
1471static inline int keepalive_intvl_when(const struct tcp_sock *tp) 1450static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1472{ 1451{
1473 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; 1452 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 33d6cbe32cdc..06cbc6f689c5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,7 +15,9 @@
15 15
16#include <linux/config.h> 16#include <linux/config.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/sched.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/wait.h>
19 21
20#include <net/inet_hashtables.h> 22#include <net/inet_hashtables.h>
21 23
@@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
89} 91}
90 92
91EXPORT_SYMBOL(inet_put_port); 93EXPORT_SYMBOL(inet_put_port);
94
95/*
96 * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
97 * Look, when several writers sleep and reader wakes them up, all but one
98 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
99 * this, _but_ remember, it adds useless work on UP machines (wake up each
100 * exclusive lock release). It should be ifdefed really.
101 */
102void inet_listen_wlock(struct inet_hashinfo *hashinfo)
103{
104 write_lock(&hashinfo->lhash_lock);
105
106 if (atomic_read(&hashinfo->lhash_users)) {
107 DEFINE_WAIT(wait);
108
109 for (;;) {
110 prepare_to_wait_exclusive(&hashinfo->lhash_wait,
111 &wait, TASK_UNINTERRUPTIBLE);
112 if (!atomic_read(&hashinfo->lhash_users))
113 break;
114 write_unlock_bh(&hashinfo->lhash_lock);
115 schedule();
116 write_lock_bh(&hashinfo->lhash_lock);
117 }
118
119 finish_wait(&hashinfo->lhash_wait, &wait);
120 }
121}
122
123EXPORT_SYMBOL(inet_listen_wlock);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 0ae738b455f0..1a89a03c449b 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
589 if (cb->args[0] == 0) { 589 if (cb->args[0] == 0) {
590 if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) 590 if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
591 goto skip_listen_ht; 591 goto skip_listen_ht;
592 tcp_listen_lock(); 592 inet_listen_lock(&tcp_hashinfo);
593 for (i = s_i; i < INET_LHTABLE_SIZE; i++) { 593 for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
594 struct sock *sk; 594 struct sock *sk;
595 struct hlist_node *node; 595 struct hlist_node *node;
@@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
613 goto syn_recv; 613 goto syn_recv;
614 614
615 if (tcpdiag_dump_sock(skb, sk, cb) < 0) { 615 if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
616 tcp_listen_unlock(); 616 inet_listen_unlock(&tcp_hashinfo);
617 goto done; 617 goto done;
618 } 618 }
619 619
@@ -622,7 +622,7 @@ syn_recv:
622 goto next_listen; 622 goto next_listen;
623 623
624 if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { 624 if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
625 tcp_listen_unlock(); 625 inet_listen_unlock(&tcp_hashinfo);
626 goto done; 626 goto done;
627 } 627 }
628 628
@@ -636,7 +636,7 @@ next_listen:
636 cb->args[3] = 0; 636 cb->args[3] = 0;
637 cb->args[4] = 0; 637 cb->args[4] = 0;
638 } 638 }
639 tcp_listen_unlock(); 639 inet_listen_unlock(&tcp_hashinfo);
640skip_listen_ht: 640skip_listen_ht:
641 cb->args[0] = 1; 641 cb->args[0] = 1;
642 s_i = num = s_num = 0; 642 s_i = num = s_num = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f5373f9f00ac..5f9ad95304ca 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -228,62 +228,11 @@ fail:
228 return ret; 228 return ret;
229} 229}
230 230
231/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
232 * Look, when several writers sleep and reader wakes them up, all but one
233 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
234 * this, _but_ remember, it adds useless work on UP machines (wake up each
235 * exclusive lock release). It should be ifdefed really.
236 */
237
238void tcp_listen_wlock(void)
239{
240 write_lock(&tcp_hashinfo.lhash_lock);
241
242 if (atomic_read(&tcp_hashinfo.lhash_users)) {
243 DEFINE_WAIT(wait);
244
245 for (;;) {
246 prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait,
247 &wait, TASK_UNINTERRUPTIBLE);
248 if (!atomic_read(&tcp_hashinfo.lhash_users))
249 break;
250 write_unlock_bh(&tcp_hashinfo.lhash_lock);
251 schedule();
252 write_lock_bh(&tcp_hashinfo.lhash_lock);
253 }
254
255 finish_wait(&tcp_hashinfo.lhash_wait, &wait);
256 }
257}
258
259static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
260{
261 struct hlist_head *list;
262 rwlock_t *lock;
263
264 BUG_TRAP(sk_unhashed(sk));
265 if (listen_possible && sk->sk_state == TCP_LISTEN) {
266 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
267 lock = &tcp_hashinfo.lhash_lock;
268 tcp_listen_wlock();
269 } else {
270 sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
271 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
272 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
273 write_lock(lock);
274 }
275 __sk_add_node(sk, list);
276 sock_prot_inc_use(sk->sk_prot);
277 write_unlock(lock);
278 if (listen_possible && sk->sk_state == TCP_LISTEN)
279 wake_up(&tcp_hashinfo.lhash_wait);
280}
281
282static void tcp_v4_hash(struct sock *sk) 231static void tcp_v4_hash(struct sock *sk)
283{ 232{
284 if (sk->sk_state != TCP_CLOSE) { 233 if (sk->sk_state != TCP_CLOSE) {
285 local_bh_disable(); 234 local_bh_disable();
286 __tcp_v4_hash(sk, 1); 235 __inet_hash(&tcp_hashinfo, sk, 1);
287 local_bh_enable(); 236 local_bh_enable();
288 } 237 }
289} 238}
@@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk)
297 246
298 if (sk->sk_state == TCP_LISTEN) { 247 if (sk->sk_state == TCP_LISTEN) {
299 local_bh_disable(); 248 local_bh_disable();
300 tcp_listen_wlock(); 249 inet_listen_wlock(&tcp_hashinfo);
301 lock = &tcp_hashinfo.lhash_lock; 250 lock = &tcp_hashinfo.lhash_lock;
302 } else { 251 } else {
303 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; 252 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent];
@@ -624,7 +573,7 @@ ok:
624 inet_bind_hash(sk, tb, port); 573 inet_bind_hash(sk, tb, port);
625 if (sk_unhashed(sk)) { 574 if (sk_unhashed(sk)) {
626 inet_sk(sk)->sport = htons(port); 575 inet_sk(sk)->sport = htons(port);
627 __tcp_v4_hash(sk, 0); 576 __inet_hash(&tcp_hashinfo, sk, 0);
628 } 577 }
629 spin_unlock(&head->lock); 578 spin_unlock(&head->lock);
630 579
@@ -641,7 +590,7 @@ ok:
641 tb = inet_sk(sk)->bind_hash; 590 tb = inet_sk(sk)->bind_hash;
642 spin_lock_bh(&head->lock); 591 spin_lock_bh(&head->lock);
643 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 592 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
644 __tcp_v4_hash(sk, 0); 593 __inet_hash(&tcp_hashinfo, sk, 0);
645 spin_unlock_bh(&head->lock); 594 spin_unlock_bh(&head->lock);
646 return 0; 595 return 0;
647 } else { 596 } else {
@@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1479 newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 1428 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1480 tcp_initialize_rcv_mss(newsk); 1429 tcp_initialize_rcv_mss(newsk);
1481 1430
1482 __tcp_v4_hash(newsk, 0); 1431 __inet_hash(&tcp_hashinfo, newsk, 0);
1483 __inet_inherit_port(&tcp_hashinfo, sk, newsk); 1432 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1484 1433
1485 return newsk; 1434 return newsk;
@@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2102 void *rc; 2051 void *rc;
2103 struct tcp_iter_state* st = seq->private; 2052 struct tcp_iter_state* st = seq->private;
2104 2053
2105 tcp_listen_lock(); 2054 inet_listen_lock(&tcp_hashinfo);
2106 st->state = TCP_SEQ_STATE_LISTENING; 2055 st->state = TCP_SEQ_STATE_LISTENING;
2107 rc = listening_get_idx(seq, &pos); 2056 rc = listening_get_idx(seq, &pos);
2108 2057
2109 if (!rc) { 2058 if (!rc) {
2110 tcp_listen_unlock(); 2059 inet_listen_unlock(&tcp_hashinfo);
2111 local_bh_disable(); 2060 local_bh_disable();
2112 st->state = TCP_SEQ_STATE_ESTABLISHED; 2061 st->state = TCP_SEQ_STATE_ESTABLISHED;
2113 rc = established_get_idx(seq, pos); 2062 rc = established_get_idx(seq, pos);
@@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2140 case TCP_SEQ_STATE_LISTENING: 2089 case TCP_SEQ_STATE_LISTENING:
2141 rc = listening_get_next(seq, v); 2090 rc = listening_get_next(seq, v);
2142 if (!rc) { 2091 if (!rc) {
2143 tcp_listen_unlock(); 2092 inet_listen_unlock(&tcp_hashinfo);
2144 local_bh_disable(); 2093 local_bh_disable();
2145 st->state = TCP_SEQ_STATE_ESTABLISHED; 2094 st->state = TCP_SEQ_STATE_ESTABLISHED;
2146 rc = established_get_first(seq); 2095 rc = established_get_first(seq);
@@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2168 } 2117 }
2169 case TCP_SEQ_STATE_LISTENING: 2118 case TCP_SEQ_STATE_LISTENING:
2170 if (v != SEQ_START_TOKEN) 2119 if (v != SEQ_START_TOKEN)
2171 tcp_listen_unlock(); 2120 inet_listen_unlock(&tcp_hashinfo);
2172 break; 2121 break;
2173 case TCP_SEQ_STATE_TIME_WAIT: 2122 case TCP_SEQ_STATE_TIME_WAIT:
2174 case TCP_SEQ_STATE_ESTABLISHED: 2123 case TCP_SEQ_STATE_ESTABLISHED:
@@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops)
2431EXPORT_SYMBOL(ipv4_specific); 2380EXPORT_SYMBOL(ipv4_specific);
2432EXPORT_SYMBOL(inet_bind_bucket_create); 2381EXPORT_SYMBOL(inet_bind_bucket_create);
2433EXPORT_SYMBOL(tcp_hashinfo); 2382EXPORT_SYMBOL(tcp_hashinfo);
2434EXPORT_SYMBOL(tcp_listen_wlock);
2435EXPORT_SYMBOL(tcp_prot); 2383EXPORT_SYMBOL(tcp_prot);
2436EXPORT_SYMBOL(tcp_unhash); 2384EXPORT_SYMBOL(tcp_unhash);
2437EXPORT_SYMBOL(tcp_v4_conn_request); 2385EXPORT_SYMBOL(tcp_v4_conn_request);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 362ef5a64062..93a66b9a76e1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -229,7 +229,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk)
229 if (sk->sk_state == TCP_LISTEN) { 229 if (sk->sk_state == TCP_LISTEN) {
230 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; 230 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
231 lock = &tcp_hashinfo.lhash_lock; 231 lock = &tcp_hashinfo.lhash_lock;
232 tcp_listen_wlock(); 232 inet_listen_wlock(&tcp_hashinfo);
233 } else { 233 } else {
234 sk->sk_hashent = tcp_v6_sk_hashfn(sk); 234 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
235 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; 235 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;