diff options
-rw-r--r-- | include/net/inet_hashtables.h | 48 | ||||
-rw-r--r-- | include/net/tcp.h | 21 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 32 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 70 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 |
6 files changed, 94 insertions, 87 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da07411b36d2..f5d65121f7b7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h | |||
@@ -19,10 +19,14 @@ | |||
19 | #include <linux/list.h> | 19 | #include <linux/list.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
22 | #include <linux/tcp.h> /* only for TCP_LISTEN, damn :-( */ | ||
22 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <linux/wait.h> | ||
23 | 25 | ||
24 | #include <net/sock.h> | 26 | #include <net/sock.h> |
25 | 27 | ||
28 | #include <asm/atomic.h> | ||
29 | |||
26 | /* This is for all connections with a full identity, no wildcards. | 30 | /* This is for all connections with a full identity, no wildcards. |
27 | * New scheme, half the table is for TIME_WAIT, the other half is | 31 | * New scheme, half the table is for TIME_WAIT, the other half is |
28 | * for the rest. I'll experiment with dynamic table growth later. | 32 | * for the rest. I'll experiment with dynamic table growth later. |
@@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table, | |||
192 | 196 | ||
193 | extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); | 197 | extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); |
194 | 198 | ||
199 | extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); | ||
200 | |||
201 | /* | ||
202 | * - We may sleep inside this lock. | ||
203 | * - If sleeping is not required (or called from BH), | ||
204 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). | ||
205 | */ | ||
206 | static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) | ||
207 | { | ||
208 | /* read_lock synchronizes to candidates to writers */ | ||
209 | read_lock(&hashinfo->lhash_lock); | ||
210 | atomic_inc(&hashinfo->lhash_users); | ||
211 | read_unlock(&hashinfo->lhash_lock); | ||
212 | } | ||
213 | |||
214 | static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) | ||
215 | { | ||
216 | if (atomic_dec_and_test(&hashinfo->lhash_users)) | ||
217 | wake_up(&hashinfo->lhash_wait); | ||
218 | } | ||
219 | |||
220 | static inline void __inet_hash(struct inet_hashinfo *hashinfo, | ||
221 | struct sock *sk, const int listen_possible) | ||
222 | { | ||
223 | struct hlist_head *list; | ||
224 | rwlock_t *lock; | ||
225 | |||
226 | BUG_TRAP(sk_unhashed(sk)); | ||
227 | if (listen_possible && sk->sk_state == TCP_LISTEN) { | ||
228 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | ||
229 | lock = &hashinfo->lhash_lock; | ||
230 | inet_listen_wlock(hashinfo); | ||
231 | } else { | ||
232 | sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); | ||
233 | list = &hashinfo->ehash[sk->sk_hashent].chain; | ||
234 | lock = &hashinfo->ehash[sk->sk_hashent].lock; | ||
235 | write_lock(lock); | ||
236 | } | ||
237 | __sk_add_node(sk, list); | ||
238 | sock_prot_inc_use(sk->sk_prot); | ||
239 | write_unlock(lock); | ||
240 | if (listen_possible && sk->sk_state == TCP_LISTEN) | ||
241 | wake_up(&hashinfo->lhash_wait); | ||
242 | } | ||
195 | #endif /* _INET_HASHTABLES_H */ | 243 | #endif /* _INET_HASHTABLES_H */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 99e47695d4b6..bc110cc7022b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, | |||
1447 | 1447 | ||
1448 | extern void tcp_enter_memory_pressure(void); | 1448 | extern void tcp_enter_memory_pressure(void); |
1449 | 1449 | ||
1450 | extern void tcp_listen_wlock(void); | ||
1451 | |||
1452 | /* - We may sleep inside this lock. | ||
1453 | * - If sleeping is not required (or called from BH), | ||
1454 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). | ||
1455 | */ | ||
1456 | |||
1457 | static inline void tcp_listen_lock(void) | ||
1458 | { | ||
1459 | /* read_lock synchronizes to candidates to writers */ | ||
1460 | read_lock(&tcp_hashinfo.lhash_lock); | ||
1461 | atomic_inc(&tcp_hashinfo.lhash_users); | ||
1462 | read_unlock(&tcp_hashinfo.lhash_lock); | ||
1463 | } | ||
1464 | |||
1465 | static inline void tcp_listen_unlock(void) | ||
1466 | { | ||
1467 | if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) | ||
1468 | wake_up(&tcp_hashinfo.lhash_wait); | ||
1469 | } | ||
1470 | |||
1471 | static inline int keepalive_intvl_when(const struct tcp_sock *tp) | 1450 | static inline int keepalive_intvl_when(const struct tcp_sock *tp) |
1472 | { | 1451 | { |
1473 | return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; | 1452 | return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 33d6cbe32cdc..06cbc6f689c5 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -15,7 +15,9 @@ | |||
15 | 15 | ||
16 | #include <linux/config.h> | 16 | #include <linux/config.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/sched.h> | ||
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/wait.h> | ||
19 | 21 | ||
20 | #include <net/inet_hashtables.h> | 22 | #include <net/inet_hashtables.h> |
21 | 23 | ||
@@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) | |||
89 | } | 91 | } |
90 | 92 | ||
91 | EXPORT_SYMBOL(inet_put_port); | 93 | EXPORT_SYMBOL(inet_put_port); |
94 | |||
95 | /* | ||
96 | * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | ||
97 | * Look, when several writers sleep and reader wakes them up, all but one | ||
98 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves | ||
99 | * this, _but_ remember, it adds useless work on UP machines (wake up each | ||
100 | * exclusive lock release). It should be ifdefed really. | ||
101 | */ | ||
102 | void inet_listen_wlock(struct inet_hashinfo *hashinfo) | ||
103 | { | ||
104 | write_lock(&hashinfo->lhash_lock); | ||
105 | |||
106 | if (atomic_read(&hashinfo->lhash_users)) { | ||
107 | DEFINE_WAIT(wait); | ||
108 | |||
109 | for (;;) { | ||
110 | prepare_to_wait_exclusive(&hashinfo->lhash_wait, | ||
111 | &wait, TASK_UNINTERRUPTIBLE); | ||
112 | if (!atomic_read(&hashinfo->lhash_users)) | ||
113 | break; | ||
114 | write_unlock_bh(&hashinfo->lhash_lock); | ||
115 | schedule(); | ||
116 | write_lock_bh(&hashinfo->lhash_lock); | ||
117 | } | ||
118 | |||
119 | finish_wait(&hashinfo->lhash_wait, &wait); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | EXPORT_SYMBOL(inet_listen_wlock); | ||
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0ae738b455f0..1a89a03c449b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c | |||
@@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
589 | if (cb->args[0] == 0) { | 589 | if (cb->args[0] == 0) { |
590 | if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) | 590 | if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) |
591 | goto skip_listen_ht; | 591 | goto skip_listen_ht; |
592 | tcp_listen_lock(); | 592 | inet_listen_lock(&tcp_hashinfo); |
593 | for (i = s_i; i < INET_LHTABLE_SIZE; i++) { | 593 | for (i = s_i; i < INET_LHTABLE_SIZE; i++) { |
594 | struct sock *sk; | 594 | struct sock *sk; |
595 | struct hlist_node *node; | 595 | struct hlist_node *node; |
@@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
613 | goto syn_recv; | 613 | goto syn_recv; |
614 | 614 | ||
615 | if (tcpdiag_dump_sock(skb, sk, cb) < 0) { | 615 | if (tcpdiag_dump_sock(skb, sk, cb) < 0) { |
616 | tcp_listen_unlock(); | 616 | inet_listen_unlock(&tcp_hashinfo); |
617 | goto done; | 617 | goto done; |
618 | } | 618 | } |
619 | 619 | ||
@@ -622,7 +622,7 @@ syn_recv: | |||
622 | goto next_listen; | 622 | goto next_listen; |
623 | 623 | ||
624 | if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { | 624 | if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { |
625 | tcp_listen_unlock(); | 625 | inet_listen_unlock(&tcp_hashinfo); |
626 | goto done; | 626 | goto done; |
627 | } | 627 | } |
628 | 628 | ||
@@ -636,7 +636,7 @@ next_listen: | |||
636 | cb->args[3] = 0; | 636 | cb->args[3] = 0; |
637 | cb->args[4] = 0; | 637 | cb->args[4] = 0; |
638 | } | 638 | } |
639 | tcp_listen_unlock(); | 639 | inet_listen_unlock(&tcp_hashinfo); |
640 | skip_listen_ht: | 640 | skip_listen_ht: |
641 | cb->args[0] = 1; | 641 | cb->args[0] = 1; |
642 | s_i = num = s_num = 0; | 642 | s_i = num = s_num = 0; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f5373f9f00ac..5f9ad95304ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -228,62 +228,11 @@ fail: | |||
228 | return ret; | 228 | return ret; |
229 | } | 229 | } |
230 | 230 | ||
231 | /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | ||
232 | * Look, when several writers sleep and reader wakes them up, all but one | ||
233 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves | ||
234 | * this, _but_ remember, it adds useless work on UP machines (wake up each | ||
235 | * exclusive lock release). It should be ifdefed really. | ||
236 | */ | ||
237 | |||
238 | void tcp_listen_wlock(void) | ||
239 | { | ||
240 | write_lock(&tcp_hashinfo.lhash_lock); | ||
241 | |||
242 | if (atomic_read(&tcp_hashinfo.lhash_users)) { | ||
243 | DEFINE_WAIT(wait); | ||
244 | |||
245 | for (;;) { | ||
246 | prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait, | ||
247 | &wait, TASK_UNINTERRUPTIBLE); | ||
248 | if (!atomic_read(&tcp_hashinfo.lhash_users)) | ||
249 | break; | ||
250 | write_unlock_bh(&tcp_hashinfo.lhash_lock); | ||
251 | schedule(); | ||
252 | write_lock_bh(&tcp_hashinfo.lhash_lock); | ||
253 | } | ||
254 | |||
255 | finish_wait(&tcp_hashinfo.lhash_wait, &wait); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) | ||
260 | { | ||
261 | struct hlist_head *list; | ||
262 | rwlock_t *lock; | ||
263 | |||
264 | BUG_TRAP(sk_unhashed(sk)); | ||
265 | if (listen_possible && sk->sk_state == TCP_LISTEN) { | ||
266 | list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; | ||
267 | lock = &tcp_hashinfo.lhash_lock; | ||
268 | tcp_listen_wlock(); | ||
269 | } else { | ||
270 | sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size); | ||
271 | list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; | ||
272 | lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; | ||
273 | write_lock(lock); | ||
274 | } | ||
275 | __sk_add_node(sk, list); | ||
276 | sock_prot_inc_use(sk->sk_prot); | ||
277 | write_unlock(lock); | ||
278 | if (listen_possible && sk->sk_state == TCP_LISTEN) | ||
279 | wake_up(&tcp_hashinfo.lhash_wait); | ||
280 | } | ||
281 | |||
282 | static void tcp_v4_hash(struct sock *sk) | 231 | static void tcp_v4_hash(struct sock *sk) |
283 | { | 232 | { |
284 | if (sk->sk_state != TCP_CLOSE) { | 233 | if (sk->sk_state != TCP_CLOSE) { |
285 | local_bh_disable(); | 234 | local_bh_disable(); |
286 | __tcp_v4_hash(sk, 1); | 235 | __inet_hash(&tcp_hashinfo, sk, 1); |
287 | local_bh_enable(); | 236 | local_bh_enable(); |
288 | } | 237 | } |
289 | } | 238 | } |
@@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk) | |||
297 | 246 | ||
298 | if (sk->sk_state == TCP_LISTEN) { | 247 | if (sk->sk_state == TCP_LISTEN) { |
299 | local_bh_disable(); | 248 | local_bh_disable(); |
300 | tcp_listen_wlock(); | 249 | inet_listen_wlock(&tcp_hashinfo); |
301 | lock = &tcp_hashinfo.lhash_lock; | 250 | lock = &tcp_hashinfo.lhash_lock; |
302 | } else { | 251 | } else { |
303 | struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; | 252 | struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; |
@@ -624,7 +573,7 @@ ok: | |||
624 | inet_bind_hash(sk, tb, port); | 573 | inet_bind_hash(sk, tb, port); |
625 | if (sk_unhashed(sk)) { | 574 | if (sk_unhashed(sk)) { |
626 | inet_sk(sk)->sport = htons(port); | 575 | inet_sk(sk)->sport = htons(port); |
627 | __tcp_v4_hash(sk, 0); | 576 | __inet_hash(&tcp_hashinfo, sk, 0); |
628 | } | 577 | } |
629 | spin_unlock(&head->lock); | 578 | spin_unlock(&head->lock); |
630 | 579 | ||
@@ -641,7 +590,7 @@ ok: | |||
641 | tb = inet_sk(sk)->bind_hash; | 590 | tb = inet_sk(sk)->bind_hash; |
642 | spin_lock_bh(&head->lock); | 591 | spin_lock_bh(&head->lock); |
643 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 592 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
644 | __tcp_v4_hash(sk, 0); | 593 | __inet_hash(&tcp_hashinfo, sk, 0); |
645 | spin_unlock_bh(&head->lock); | 594 | spin_unlock_bh(&head->lock); |
646 | return 0; | 595 | return 0; |
647 | } else { | 596 | } else { |
@@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1479 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); | 1428 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); |
1480 | tcp_initialize_rcv_mss(newsk); | 1429 | tcp_initialize_rcv_mss(newsk); |
1481 | 1430 | ||
1482 | __tcp_v4_hash(newsk, 0); | 1431 | __inet_hash(&tcp_hashinfo, newsk, 0); |
1483 | __inet_inherit_port(&tcp_hashinfo, sk, newsk); | 1432 | __inet_inherit_port(&tcp_hashinfo, sk, newsk); |
1484 | 1433 | ||
1485 | return newsk; | 1434 | return newsk; |
@@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) | |||
2102 | void *rc; | 2051 | void *rc; |
2103 | struct tcp_iter_state* st = seq->private; | 2052 | struct tcp_iter_state* st = seq->private; |
2104 | 2053 | ||
2105 | tcp_listen_lock(); | 2054 | inet_listen_lock(&tcp_hashinfo); |
2106 | st->state = TCP_SEQ_STATE_LISTENING; | 2055 | st->state = TCP_SEQ_STATE_LISTENING; |
2107 | rc = listening_get_idx(seq, &pos); | 2056 | rc = listening_get_idx(seq, &pos); |
2108 | 2057 | ||
2109 | if (!rc) { | 2058 | if (!rc) { |
2110 | tcp_listen_unlock(); | 2059 | inet_listen_unlock(&tcp_hashinfo); |
2111 | local_bh_disable(); | 2060 | local_bh_disable(); |
2112 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2061 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2113 | rc = established_get_idx(seq, pos); | 2062 | rc = established_get_idx(seq, pos); |
@@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2140 | case TCP_SEQ_STATE_LISTENING: | 2089 | case TCP_SEQ_STATE_LISTENING: |
2141 | rc = listening_get_next(seq, v); | 2090 | rc = listening_get_next(seq, v); |
2142 | if (!rc) { | 2091 | if (!rc) { |
2143 | tcp_listen_unlock(); | 2092 | inet_listen_unlock(&tcp_hashinfo); |
2144 | local_bh_disable(); | 2093 | local_bh_disable(); |
2145 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2094 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2146 | rc = established_get_first(seq); | 2095 | rc = established_get_first(seq); |
@@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) | |||
2168 | } | 2117 | } |
2169 | case TCP_SEQ_STATE_LISTENING: | 2118 | case TCP_SEQ_STATE_LISTENING: |
2170 | if (v != SEQ_START_TOKEN) | 2119 | if (v != SEQ_START_TOKEN) |
2171 | tcp_listen_unlock(); | 2120 | inet_listen_unlock(&tcp_hashinfo); |
2172 | break; | 2121 | break; |
2173 | case TCP_SEQ_STATE_TIME_WAIT: | 2122 | case TCP_SEQ_STATE_TIME_WAIT: |
2174 | case TCP_SEQ_STATE_ESTABLISHED: | 2123 | case TCP_SEQ_STATE_ESTABLISHED: |
@@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops) | |||
2431 | EXPORT_SYMBOL(ipv4_specific); | 2380 | EXPORT_SYMBOL(ipv4_specific); |
2432 | EXPORT_SYMBOL(inet_bind_bucket_create); | 2381 | EXPORT_SYMBOL(inet_bind_bucket_create); |
2433 | EXPORT_SYMBOL(tcp_hashinfo); | 2382 | EXPORT_SYMBOL(tcp_hashinfo); |
2434 | EXPORT_SYMBOL(tcp_listen_wlock); | ||
2435 | EXPORT_SYMBOL(tcp_prot); | 2383 | EXPORT_SYMBOL(tcp_prot); |
2436 | EXPORT_SYMBOL(tcp_unhash); | 2384 | EXPORT_SYMBOL(tcp_unhash); |
2437 | EXPORT_SYMBOL(tcp_v4_conn_request); | 2385 | EXPORT_SYMBOL(tcp_v4_conn_request); |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 362ef5a64062..93a66b9a76e1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -229,7 +229,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) | |||
229 | if (sk->sk_state == TCP_LISTEN) { | 229 | if (sk->sk_state == TCP_LISTEN) { |
230 | list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; | 230 | list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; |
231 | lock = &tcp_hashinfo.lhash_lock; | 231 | lock = &tcp_hashinfo.lhash_lock; |
232 | tcp_listen_wlock(); | 232 | inet_listen_wlock(&tcp_hashinfo); |
233 | } else { | 233 | } else { |
234 | sk->sk_hashent = tcp_v6_sk_hashfn(sk); | 234 | sk->sk_hashent = tcp_v6_sk_hashfn(sk); |
235 | list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; | 235 | list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; |