diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2008-11-16 22:40:17 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-11-16 22:40:17 -0500 |
commit | 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 (patch) | |
tree | 468296b7be813643248d4ca67497d6ddb6934fc6 /net/ipv4/inet_hashtables.c | |
parent | 88ab1932eac721c6e7336708558fa5ed02c85c80 (diff) |
net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls
RCU was added to UDP lookups, using a fast infrastructure :
- sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the
price of call_rcu() at freeing time.
- hlist_nulls permits to use few memory barriers.
This patch uses same infrastructure for TCP/DCCP established
and timewait sockets.
Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications
using short lived TCP connections. A followup patch, converting
rwlocks to spinlocks will even speedup this case.
__inet_lookup_established() is pretty fast now we dont have to
dirty a contended cache line (read_lock/read_unlock)
Only established and timewait hashtable are converted to RCU
(bind table and listen table are still using traditional locking)
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r-- | net/ipv4/inet_hashtables.c | 78 |
1 files changed, 55 insertions, 23 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index be41ebbec4eb..fd269cfef0ec 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -223,35 +223,65 @@ struct sock * __inet_lookup_established(struct net *net, | |||
223 | INET_ADDR_COOKIE(acookie, saddr, daddr) | 223 | INET_ADDR_COOKIE(acookie, saddr, daddr) |
224 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); | 224 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
225 | struct sock *sk; | 225 | struct sock *sk; |
226 | const struct hlist_node *node; | 226 | const struct hlist_nulls_node *node; |
227 | /* Optimize here for direct hit, only listening connections can | 227 | /* Optimize here for direct hit, only listening connections can |
228 | * have wildcards anyways. | 228 | * have wildcards anyways. |
229 | */ | 229 | */ |
230 | unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); | 230 | unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); |
231 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 231 | unsigned int slot = hash & (hashinfo->ehash_size - 1); |
232 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); | 232 | struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; |
233 | 233 | ||
234 | prefetch(head->chain.first); | 234 | rcu_read_lock(); |
235 | read_lock(lock); | 235 | begin: |
236 | sk_for_each(sk, node, &head->chain) { | 236 | sk_nulls_for_each_rcu(sk, node, &head->chain) { |
237 | if (INET_MATCH(sk, net, hash, acookie, | 237 | if (INET_MATCH(sk, net, hash, acookie, |
238 | saddr, daddr, ports, dif)) | 238 | saddr, daddr, ports, dif)) { |
239 | goto hit; /* You sunk my battleship! */ | 239 | if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) |
240 | goto begintw; | ||
241 | if (unlikely(!INET_MATCH(sk, net, hash, acookie, | ||
242 | saddr, daddr, ports, dif))) { | ||
243 | sock_put(sk); | ||
244 | goto begin; | ||
245 | } | ||
246 | goto out; | ||
247 | } | ||
240 | } | 248 | } |
249 | /* | ||
250 | * if the nulls value we got at the end of this lookup is | ||
251 | * not the expected one, we must restart lookup. | ||
252 | * We probably met an item that was moved to another chain. | ||
253 | */ | ||
254 | if (get_nulls_value(node) != slot) | ||
255 | goto begin; | ||
241 | 256 | ||
257 | begintw: | ||
242 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | 258 | /* Must check for a TIME_WAIT'er before going to listener hash. */ |
243 | sk_for_each(sk, node, &head->twchain) { | 259 | sk_nulls_for_each_rcu(sk, node, &head->twchain) { |
244 | if (INET_TW_MATCH(sk, net, hash, acookie, | 260 | if (INET_TW_MATCH(sk, net, hash, acookie, |
245 | saddr, daddr, ports, dif)) | 261 | saddr, daddr, ports, dif)) { |
246 | goto hit; | 262 | if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { |
263 | sk = NULL; | ||
264 | goto out; | ||
265 | } | ||
266 | if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, | ||
267 | saddr, daddr, ports, dif))) { | ||
268 | sock_put(sk); | ||
269 | goto begintw; | ||
270 | } | ||
271 | goto out; | ||
272 | } | ||
247 | } | 273 | } |
274 | /* | ||
275 | * if the nulls value we got at the end of this lookup is | ||
276 | * not the expected one, we must restart lookup. | ||
277 | * We probably met an item that was moved to another chain. | ||
278 | */ | ||
279 | if (get_nulls_value(node) != slot) | ||
280 | goto begintw; | ||
248 | sk = NULL; | 281 | sk = NULL; |
249 | out: | 282 | out: |
250 | read_unlock(lock); | 283 | rcu_read_unlock(); |
251 | return sk; | 284 | return sk; |
252 | hit: | ||
253 | sock_hold(sk); | ||
254 | goto out; | ||
255 | } | 285 | } |
256 | EXPORT_SYMBOL_GPL(__inet_lookup_established); | 286 | EXPORT_SYMBOL_GPL(__inet_lookup_established); |
257 | 287 | ||
@@ -272,14 +302,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
272 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 302 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
273 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); | 303 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); |
274 | struct sock *sk2; | 304 | struct sock *sk2; |
275 | const struct hlist_node *node; | 305 | const struct hlist_nulls_node *node; |
276 | struct inet_timewait_sock *tw; | 306 | struct inet_timewait_sock *tw; |
277 | 307 | ||
278 | prefetch(head->chain.first); | 308 | prefetch(head->chain.first); |
279 | write_lock(lock); | 309 | write_lock(lock); |
280 | 310 | ||
281 | /* Check TIME-WAIT sockets first. */ | 311 | /* Check TIME-WAIT sockets first. */ |
282 | sk_for_each(sk2, node, &head->twchain) { | 312 | sk_nulls_for_each(sk2, node, &head->twchain) { |
283 | tw = inet_twsk(sk2); | 313 | tw = inet_twsk(sk2); |
284 | 314 | ||
285 | if (INET_TW_MATCH(sk2, net, hash, acookie, | 315 | if (INET_TW_MATCH(sk2, net, hash, acookie, |
@@ -293,7 +323,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
293 | tw = NULL; | 323 | tw = NULL; |
294 | 324 | ||
295 | /* And established part... */ | 325 | /* And established part... */ |
296 | sk_for_each(sk2, node, &head->chain) { | 326 | sk_nulls_for_each(sk2, node, &head->chain) { |
297 | if (INET_MATCH(sk2, net, hash, acookie, | 327 | if (INET_MATCH(sk2, net, hash, acookie, |
298 | saddr, daddr, ports, dif)) | 328 | saddr, daddr, ports, dif)) |
299 | goto not_unique; | 329 | goto not_unique; |
@@ -306,7 +336,7 @@ unique: | |||
306 | inet->sport = htons(lport); | 336 | inet->sport = htons(lport); |
307 | sk->sk_hash = hash; | 337 | sk->sk_hash = hash; |
308 | WARN_ON(!sk_unhashed(sk)); | 338 | WARN_ON(!sk_unhashed(sk)); |
309 | __sk_add_node(sk, &head->chain); | 339 | __sk_nulls_add_node_rcu(sk, &head->chain); |
310 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 340 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
311 | write_unlock(lock); | 341 | write_unlock(lock); |
312 | 342 | ||
@@ -338,7 +368,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) | |||
338 | void __inet_hash_nolisten(struct sock *sk) | 368 | void __inet_hash_nolisten(struct sock *sk) |
339 | { | 369 | { |
340 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 370 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
341 | struct hlist_head *list; | 371 | struct hlist_nulls_head *list; |
342 | rwlock_t *lock; | 372 | rwlock_t *lock; |
343 | struct inet_ehash_bucket *head; | 373 | struct inet_ehash_bucket *head; |
344 | 374 | ||
@@ -350,7 +380,7 @@ void __inet_hash_nolisten(struct sock *sk) | |||
350 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 380 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
351 | 381 | ||
352 | write_lock(lock); | 382 | write_lock(lock); |
353 | __sk_add_node(sk, list); | 383 | __sk_nulls_add_node_rcu(sk, list); |
354 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 384 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
355 | write_unlock(lock); | 385 | write_unlock(lock); |
356 | } | 386 | } |
@@ -400,13 +430,15 @@ void inet_unhash(struct sock *sk) | |||
400 | local_bh_disable(); | 430 | local_bh_disable(); |
401 | inet_listen_wlock(hashinfo); | 431 | inet_listen_wlock(hashinfo); |
402 | lock = &hashinfo->lhash_lock; | 432 | lock = &hashinfo->lhash_lock; |
433 | if (__sk_del_node_init(sk)) | ||
434 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | ||
403 | } else { | 435 | } else { |
404 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 436 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
405 | write_lock_bh(lock); | 437 | write_lock_bh(lock); |
438 | if (__sk_nulls_del_node_init_rcu(sk)) | ||
439 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | ||
406 | } | 440 | } |
407 | 441 | ||
408 | if (__sk_del_node_init(sk)) | ||
409 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | ||
410 | write_unlock_bh(lock); | 442 | write_unlock_bh(lock); |
411 | out: | 443 | out: |
412 | if (sk->sk_state == TCP_LISTEN) | 444 | if (sk->sk_state == TCP_LISTEN) |