diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2008-11-20 03:40:07 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-11-20 03:40:07 -0500 |
commit | 5caea4ea7088e80ac5410d04660346094608b909 (patch) | |
tree | fad95133683c002d24ff5de7fb756dad806b41ed /net/ipv4/inet_hashtables.c | |
parent | d8b83c57a7e497cba9b5cb156e63176323035785 (diff) |
net: listening_hash get a spinlock per bucket
This patch prepares RCU migration of listening_hash table for
TCP/DCCP protocols.
listening_hash table being small (32 slots per protocol), we add
a spinlock for each slot, instead of a single rwlock for whole table.
This should reduce hold time of readers, and writers concurrency.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r-- | net/ipv4/inet_hashtables.c | 86 |
1 files changed, 31 insertions, 55 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fd269cfef0ec..377d004e5723 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -111,35 +111,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) | |||
111 | EXPORT_SYMBOL_GPL(__inet_inherit_port); | 111 | EXPORT_SYMBOL_GPL(__inet_inherit_port); |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | ||
115 | * Look, when several writers sleep and reader wakes them up, all but one | ||
116 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves | ||
117 | * this, _but_ remember, it adds useless work on UP machines (wake up each | ||
118 | * exclusive lock release). It should be ifdefed really. | ||
119 | */ | ||
120 | void inet_listen_wlock(struct inet_hashinfo *hashinfo) | ||
121 | __acquires(hashinfo->lhash_lock) | ||
122 | { | ||
123 | write_lock(&hashinfo->lhash_lock); | ||
124 | |||
125 | if (atomic_read(&hashinfo->lhash_users)) { | ||
126 | DEFINE_WAIT(wait); | ||
127 | |||
128 | for (;;) { | ||
129 | prepare_to_wait_exclusive(&hashinfo->lhash_wait, | ||
130 | &wait, TASK_UNINTERRUPTIBLE); | ||
131 | if (!atomic_read(&hashinfo->lhash_users)) | ||
132 | break; | ||
133 | write_unlock_bh(&hashinfo->lhash_lock); | ||
134 | schedule(); | ||
135 | write_lock_bh(&hashinfo->lhash_lock); | ||
136 | } | ||
137 | |||
138 | finish_wait(&hashinfo->lhash_wait, &wait); | ||
139 | } | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Don't inline this cruft. Here are some nice properties to exploit here. The | 114 | * Don't inline this cruft. Here are some nice properties to exploit here. The |
144 | * BSD API does not allow a listening sock to specify the remote port nor the | 115 | * BSD API does not allow a listening sock to specify the remote port nor the |
145 | * remote address for the connection. So always assume those are both | 116 | * remote address for the connection. So always assume those are both |
@@ -191,25 +162,25 @@ struct sock *__inet_lookup_listener(struct net *net, | |||
191 | const int dif) | 162 | const int dif) |
192 | { | 163 | { |
193 | struct sock *sk = NULL; | 164 | struct sock *sk = NULL; |
194 | const struct hlist_head *head; | 165 | struct inet_listen_hashbucket *ilb; |
195 | 166 | ||
196 | read_lock(&hashinfo->lhash_lock); | 167 | ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; |
197 | head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; | 168 | spin_lock(&ilb->lock); |
198 | if (!hlist_empty(head)) { | 169 | if (!hlist_empty(&ilb->head)) { |
199 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | 170 | const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head))); |
200 | 171 | ||
201 | if (inet->num == hnum && !sk->sk_node.next && | 172 | if (inet->num == hnum && !sk->sk_node.next && |
202 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | 173 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && |
203 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | 174 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && |
204 | !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) | 175 | !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) |
205 | goto sherry_cache; | 176 | goto sherry_cache; |
206 | sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); | 177 | sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif); |
207 | } | 178 | } |
208 | if (sk) { | 179 | if (sk) { |
209 | sherry_cache: | 180 | sherry_cache: |
210 | sock_hold(sk); | 181 | sock_hold(sk); |
211 | } | 182 | } |
212 | read_unlock(&hashinfo->lhash_lock); | 183 | spin_unlock(&ilb->lock); |
213 | return sk; | 184 | return sk; |
214 | } | 185 | } |
215 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | 186 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
@@ -389,8 +360,7 @@ EXPORT_SYMBOL_GPL(__inet_hash_nolisten); | |||
389 | static void __inet_hash(struct sock *sk) | 360 | static void __inet_hash(struct sock *sk) |
390 | { | 361 | { |
391 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 362 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
392 | struct hlist_head *list; | 363 | struct inet_listen_hashbucket *ilb; |
393 | rwlock_t *lock; | ||
394 | 364 | ||
395 | if (sk->sk_state != TCP_LISTEN) { | 365 | if (sk->sk_state != TCP_LISTEN) { |
396 | __inet_hash_nolisten(sk); | 366 | __inet_hash_nolisten(sk); |
@@ -398,14 +368,12 @@ static void __inet_hash(struct sock *sk) | |||
398 | } | 368 | } |
399 | 369 | ||
400 | WARN_ON(!sk_unhashed(sk)); | 370 | WARN_ON(!sk_unhashed(sk)); |
401 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | 371 | ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
402 | lock = &hashinfo->lhash_lock; | ||
403 | 372 | ||
404 | inet_listen_wlock(hashinfo); | 373 | spin_lock(&ilb->lock); |
405 | __sk_add_node(sk, list); | 374 | __sk_add_node(sk, &ilb->head); |
406 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 375 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
407 | write_unlock(lock); | 376 | spin_unlock(&ilb->lock); |
408 | wake_up(&hashinfo->lhash_wait); | ||
409 | } | 377 | } |
410 | 378 | ||
411 | void inet_hash(struct sock *sk) | 379 | void inet_hash(struct sock *sk) |
@@ -420,29 +388,27 @@ EXPORT_SYMBOL_GPL(inet_hash); | |||
420 | 388 | ||
421 | void inet_unhash(struct sock *sk) | 389 | void inet_unhash(struct sock *sk) |
422 | { | 390 | { |
423 | rwlock_t *lock; | ||
424 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 391 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
425 | 392 | ||
426 | if (sk_unhashed(sk)) | 393 | if (sk_unhashed(sk)) |
427 | goto out; | 394 | return; |
428 | 395 | ||
429 | if (sk->sk_state == TCP_LISTEN) { | 396 | if (sk->sk_state == TCP_LISTEN) { |
430 | local_bh_disable(); | 397 | struct inet_listen_hashbucket *ilb; |
431 | inet_listen_wlock(hashinfo); | 398 | |
432 | lock = &hashinfo->lhash_lock; | 399 | ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
400 | spin_lock_bh(&ilb->lock); | ||
433 | if (__sk_del_node_init(sk)) | 401 | if (__sk_del_node_init(sk)) |
434 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 402 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
403 | spin_unlock_bh(&ilb->lock); | ||
435 | } else { | 404 | } else { |
436 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 405 | rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
406 | |||
437 | write_lock_bh(lock); | 407 | write_lock_bh(lock); |
438 | if (__sk_nulls_del_node_init_rcu(sk)) | 408 | if (__sk_nulls_del_node_init_rcu(sk)) |
439 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 409 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
410 | write_unlock_bh(lock); | ||
440 | } | 411 | } |
441 | |||
442 | write_unlock_bh(lock); | ||
443 | out: | ||
444 | if (sk->sk_state == TCP_LISTEN) | ||
445 | wake_up(&hashinfo->lhash_wait); | ||
446 | } | 412 | } |
447 | EXPORT_SYMBOL_GPL(inet_unhash); | 413 | EXPORT_SYMBOL_GPL(inet_unhash); |
448 | 414 | ||
@@ -556,3 +522,13 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
556 | } | 522 | } |
557 | 523 | ||
558 | EXPORT_SYMBOL_GPL(inet_hash_connect); | 524 | EXPORT_SYMBOL_GPL(inet_hash_connect); |
525 | |||
526 | void inet_hashinfo_init(struct inet_hashinfo *h) | ||
527 | { | ||
528 | int i; | ||
529 | |||
530 | for (i = 0; i < INET_LHTABLE_SIZE; i++) | ||
531 | spin_lock_init(&h->listening_hash[i].lock); | ||
532 | } | ||
533 | |||
534 | EXPORT_SYMBOL_GPL(inet_hashinfo_init); | ||