aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_hashtables.c
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-11-16 22:40:17 -0500
committerDavid S. Miller <davem@davemloft.net>2008-11-16 22:40:17 -0500
commit3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 (patch)
tree468296b7be813643248d4ca67497d6ddb6934fc6 /net/ipv4/inet_hashtables.c
parent88ab1932eac721c6e7336708558fa5ed02c85c80 (diff)
net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls
RCU was added to UDP lookups, using a fast infrastructure : - sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the price of call_rcu() at freeing time. - hlist_nulls permits to use few memory barriers. This patch uses same infrastructure for TCP/DCCP established and timewait sockets. Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications using short lived TCP connections. A followup patch, converting rwlocks to spinlocks will even speedup this case. __inet_lookup_established() is pretty fast now we dont have to dirty a contended cache line (read_lock/read_unlock) Only established and timewait hashtable are converted to RCU (bind table and listen table are still using traditional locking) Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r--net/ipv4/inet_hashtables.c78
1 files changed, 55 insertions, 23 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index be41ebbec4eb..fd269cfef0ec 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -223,35 +223,65 @@ struct sock * __inet_lookup_established(struct net *net,
223 INET_ADDR_COOKIE(acookie, saddr, daddr) 223 INET_ADDR_COOKIE(acookie, saddr, daddr)
224 const __portpair ports = INET_COMBINED_PORTS(sport, hnum); 224 const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
225 struct sock *sk; 225 struct sock *sk;
226 const struct hlist_node *node; 226 const struct hlist_nulls_node *node;
227 /* Optimize here for direct hit, only listening connections can 227 /* Optimize here for direct hit, only listening connections can
228 * have wildcards anyways. 228 * have wildcards anyways.
229 */ 229 */
230 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); 230 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
231 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); 231 unsigned int slot = hash & (hashinfo->ehash_size - 1);
232 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); 232 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
233 233
234 prefetch(head->chain.first); 234 rcu_read_lock();
235 read_lock(lock); 235begin:
236 sk_for_each(sk, node, &head->chain) { 236 sk_nulls_for_each_rcu(sk, node, &head->chain) {
237 if (INET_MATCH(sk, net, hash, acookie, 237 if (INET_MATCH(sk, net, hash, acookie,
238 saddr, daddr, ports, dif)) 238 saddr, daddr, ports, dif)) {
239 goto hit; /* You sunk my battleship! */ 239 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
240 goto begintw;
241 if (unlikely(!INET_MATCH(sk, net, hash, acookie,
242 saddr, daddr, ports, dif))) {
243 sock_put(sk);
244 goto begin;
245 }
246 goto out;
247 }
240 } 248 }
249 /*
250 * if the nulls value we got at the end of this lookup is
251 * not the expected one, we must restart lookup.
252 * We probably met an item that was moved to another chain.
253 */
254 if (get_nulls_value(node) != slot)
255 goto begin;
241 256
257begintw:
242 /* Must check for a TIME_WAIT'er before going to listener hash. */ 258 /* Must check for a TIME_WAIT'er before going to listener hash. */
243 sk_for_each(sk, node, &head->twchain) { 259 sk_nulls_for_each_rcu(sk, node, &head->twchain) {
244 if (INET_TW_MATCH(sk, net, hash, acookie, 260 if (INET_TW_MATCH(sk, net, hash, acookie,
245 saddr, daddr, ports, dif)) 261 saddr, daddr, ports, dif)) {
246 goto hit; 262 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
263 sk = NULL;
264 goto out;
265 }
266 if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie,
267 saddr, daddr, ports, dif))) {
268 sock_put(sk);
269 goto begintw;
270 }
271 goto out;
272 }
247 } 273 }
274 /*
275 * if the nulls value we got at the end of this lookup is
276 * not the expected one, we must restart lookup.
277 * We probably met an item that was moved to another chain.
278 */
279 if (get_nulls_value(node) != slot)
280 goto begintw;
248 sk = NULL; 281 sk = NULL;
249out: 282out:
250 read_unlock(lock); 283 rcu_read_unlock();
251 return sk; 284 return sk;
252hit:
253 sock_hold(sk);
254 goto out;
255} 285}
256EXPORT_SYMBOL_GPL(__inet_lookup_established); 286EXPORT_SYMBOL_GPL(__inet_lookup_established);
257 287
@@ -272,14 +302,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
272 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 302 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
273 rwlock_t *lock = inet_ehash_lockp(hinfo, hash); 303 rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
274 struct sock *sk2; 304 struct sock *sk2;
275 const struct hlist_node *node; 305 const struct hlist_nulls_node *node;
276 struct inet_timewait_sock *tw; 306 struct inet_timewait_sock *tw;
277 307
278 prefetch(head->chain.first); 308 prefetch(head->chain.first);
279 write_lock(lock); 309 write_lock(lock);
280 310
281 /* Check TIME-WAIT sockets first. */ 311 /* Check TIME-WAIT sockets first. */
282 sk_for_each(sk2, node, &head->twchain) { 312 sk_nulls_for_each(sk2, node, &head->twchain) {
283 tw = inet_twsk(sk2); 313 tw = inet_twsk(sk2);
284 314
285 if (INET_TW_MATCH(sk2, net, hash, acookie, 315 if (INET_TW_MATCH(sk2, net, hash, acookie,
@@ -293,7 +323,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
293 tw = NULL; 323 tw = NULL;
294 324
295 /* And established part... */ 325 /* And established part... */
296 sk_for_each(sk2, node, &head->chain) { 326 sk_nulls_for_each(sk2, node, &head->chain) {
297 if (INET_MATCH(sk2, net, hash, acookie, 327 if (INET_MATCH(sk2, net, hash, acookie,
298 saddr, daddr, ports, dif)) 328 saddr, daddr, ports, dif))
299 goto not_unique; 329 goto not_unique;
@@ -306,7 +336,7 @@ unique:
306 inet->sport = htons(lport); 336 inet->sport = htons(lport);
307 sk->sk_hash = hash; 337 sk->sk_hash = hash;
308 WARN_ON(!sk_unhashed(sk)); 338 WARN_ON(!sk_unhashed(sk));
309 __sk_add_node(sk, &head->chain); 339 __sk_nulls_add_node_rcu(sk, &head->chain);
310 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 340 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
311 write_unlock(lock); 341 write_unlock(lock);
312 342
@@ -338,7 +368,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
338void __inet_hash_nolisten(struct sock *sk) 368void __inet_hash_nolisten(struct sock *sk)
339{ 369{
340 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 370 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
341 struct hlist_head *list; 371 struct hlist_nulls_head *list;
342 rwlock_t *lock; 372 rwlock_t *lock;
343 struct inet_ehash_bucket *head; 373 struct inet_ehash_bucket *head;
344 374
@@ -350,7 +380,7 @@ void __inet_hash_nolisten(struct sock *sk)
350 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 380 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
351 381
352 write_lock(lock); 382 write_lock(lock);
353 __sk_add_node(sk, list); 383 __sk_nulls_add_node_rcu(sk, list);
354 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 384 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
355 write_unlock(lock); 385 write_unlock(lock);
356} 386}
@@ -400,13 +430,15 @@ void inet_unhash(struct sock *sk)
400 local_bh_disable(); 430 local_bh_disable();
401 inet_listen_wlock(hashinfo); 431 inet_listen_wlock(hashinfo);
402 lock = &hashinfo->lhash_lock; 432 lock = &hashinfo->lhash_lock;
433 if (__sk_del_node_init(sk))
434 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
403 } else { 435 } else {
404 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 436 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
405 write_lock_bh(lock); 437 write_lock_bh(lock);
438 if (__sk_nulls_del_node_init_rcu(sk))
439 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
406 } 440 }
407 441
408 if (__sk_del_node_init(sk))
409 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
410 write_unlock_bh(lock); 442 write_unlock_bh(lock);
411out: 443out:
412 if (sk->sk_state == TCP_LISTEN) 444 if (sk->sk_state == TCP_LISTEN)