aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-11-20 23:39:09 -0500
committerDavid S. Miller <davem@davemloft.net>2008-11-20 23:39:09 -0500
commit9db66bdcc83749affe61c61eb8ff3cf08f42afec (patch)
tree81bb20e4f569d3b44731498428277db9d77fa7a9
parentb8c26a33c8b6f0a150e9cb38ed80b890be55395c (diff)
net: convert TCP/DCCP ehash rwlocks to spinlocks
Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks. /proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'. This should speedup writers, since spin_lock()/spin_unlock() only use one atomic operation instead of two for write_lock()/write_unlock() Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_hashtables.h14
-rw-r--r--net/ipv4/inet_hashtables.c21
-rw-r--r--net/ipv4/inet_timewait_sock.c22
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv6/inet6_hashtables.c15
5 files changed, 41 insertions, 43 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 62d2dd0d7860..28b3ee3e8d6d 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -116,7 +116,7 @@ struct inet_hashinfo {
116 * TIME_WAIT sockets use a separate chain (twchain). 116 * TIME_WAIT sockets use a separate chain (twchain).
117 */ 117 */
118 struct inet_ehash_bucket *ehash; 118 struct inet_ehash_bucket *ehash;
119 rwlock_t *ehash_locks; 119 spinlock_t *ehash_locks;
120 unsigned int ehash_size; 120 unsigned int ehash_size;
121 unsigned int ehash_locks_mask; 121 unsigned int ehash_locks_mask;
122 122
@@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
152 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; 152 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
153} 153}
154 154
155static inline rwlock_t *inet_ehash_lockp( 155static inline spinlock_t *inet_ehash_lockp(
156 struct inet_hashinfo *hashinfo, 156 struct inet_hashinfo *hashinfo,
157 unsigned int hash) 157 unsigned int hash)
158{ 158{
@@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
177 size = 4096; 177 size = 4096;
178 if (sizeof(rwlock_t) != 0) { 178 if (sizeof(rwlock_t) != 0) {
179#ifdef CONFIG_NUMA 179#ifdef CONFIG_NUMA
180 if (size * sizeof(rwlock_t) > PAGE_SIZE) 180 if (size * sizeof(spinlock_t) > PAGE_SIZE)
181 hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); 181 hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t));
182 else 182 else
183#endif 183#endif
184 hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), 184 hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t),
185 GFP_KERNEL); 185 GFP_KERNEL);
186 if (!hashinfo->ehash_locks) 186 if (!hashinfo->ehash_locks)
187 return ENOMEM; 187 return ENOMEM;
188 for (i = 0; i < size; i++) 188 for (i = 0; i < size; i++)
189 rwlock_init(&hashinfo->ehash_locks[i]); 189 spin_lock_init(&hashinfo->ehash_locks[i]);
190 } 190 }
191 hashinfo->ehash_locks_mask = size - 1; 191 hashinfo->ehash_locks_mask = size - 1;
192 return 0; 192 return 0;
@@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
197 if (hashinfo->ehash_locks) { 197 if (hashinfo->ehash_locks) {
198#ifdef CONFIG_NUMA 198#ifdef CONFIG_NUMA
199 unsigned int size = (hashinfo->ehash_locks_mask + 1) * 199 unsigned int size = (hashinfo->ehash_locks_mask + 1) *
200 sizeof(rwlock_t); 200 sizeof(spinlock_t);
201 if (size > PAGE_SIZE) 201 if (size > PAGE_SIZE)
202 vfree(hashinfo->ehash_locks); 202 vfree(hashinfo->ehash_locks);
203 else 203 else
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 377d004e5723..4c273a9981a6 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
271 struct net *net = sock_net(sk); 271 struct net *net = sock_net(sk);
272 unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); 272 unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
273 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 273 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
274 rwlock_t *lock = inet_ehash_lockp(hinfo, hash); 274 spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
275 struct sock *sk2; 275 struct sock *sk2;
276 const struct hlist_nulls_node *node; 276 const struct hlist_nulls_node *node;
277 struct inet_timewait_sock *tw; 277 struct inet_timewait_sock *tw;
278 278
279 prefetch(head->chain.first); 279 spin_lock(lock);
280 write_lock(lock);
281 280
282 /* Check TIME-WAIT sockets first. */ 281 /* Check TIME-WAIT sockets first. */
283 sk_nulls_for_each(sk2, node, &head->twchain) { 282 sk_nulls_for_each(sk2, node, &head->twchain) {
@@ -308,8 +307,8 @@ unique:
308 sk->sk_hash = hash; 307 sk->sk_hash = hash;
309 WARN_ON(!sk_unhashed(sk)); 308 WARN_ON(!sk_unhashed(sk));
310 __sk_nulls_add_node_rcu(sk, &head->chain); 309 __sk_nulls_add_node_rcu(sk, &head->chain);
310 spin_unlock(lock);
311 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 311 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
312 write_unlock(lock);
313 312
314 if (twp) { 313 if (twp) {
315 *twp = tw; 314 *twp = tw;
@@ -325,7 +324,7 @@ unique:
325 return 0; 324 return 0;
326 325
327not_unique: 326not_unique:
328 write_unlock(lock); 327 spin_unlock(lock);
329 return -EADDRNOTAVAIL; 328 return -EADDRNOTAVAIL;
330} 329}
331 330
@@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk)
340{ 339{
341 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 340 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
342 struct hlist_nulls_head *list; 341 struct hlist_nulls_head *list;
343 rwlock_t *lock; 342 spinlock_t *lock;
344 struct inet_ehash_bucket *head; 343 struct inet_ehash_bucket *head;
345 344
346 WARN_ON(!sk_unhashed(sk)); 345 WARN_ON(!sk_unhashed(sk));
@@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk)
350 list = &head->chain; 349 list = &head->chain;
351 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 350 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
352 351
353 write_lock(lock); 352 spin_lock(lock);
354 __sk_nulls_add_node_rcu(sk, list); 353 __sk_nulls_add_node_rcu(sk, list);
354 spin_unlock(lock);
355 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 355 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
356 write_unlock(lock);
357} 356}
358EXPORT_SYMBOL_GPL(__inet_hash_nolisten); 357EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
359 358
@@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk)
402 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 401 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
403 spin_unlock_bh(&ilb->lock); 402 spin_unlock_bh(&ilb->lock);
404 } else { 403 } else {
405 rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 404 spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
406 405
407 write_lock_bh(lock); 406 spin_lock_bh(lock);
408 if (__sk_nulls_del_node_init_rcu(sk)) 407 if (__sk_nulls_del_node_init_rcu(sk))
409 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 408 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
410 write_unlock_bh(lock); 409 spin_unlock_bh(lock);
411 } 410 }
412} 411}
413EXPORT_SYMBOL_GPL(inet_unhash); 412EXPORT_SYMBOL_GPL(inet_unhash);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 60689951ecdb..8554d0ea1719 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
20 struct inet_bind_hashbucket *bhead; 20 struct inet_bind_hashbucket *bhead;
21 struct inet_bind_bucket *tb; 21 struct inet_bind_bucket *tb;
22 /* Unlink from established hashes. */ 22 /* Unlink from established hashes. */
23 rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 23 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
24 24
25 write_lock(lock); 25 spin_lock(lock);
26 if (hlist_nulls_unhashed(&tw->tw_node)) { 26 if (hlist_nulls_unhashed(&tw->tw_node)) {
27 write_unlock(lock); 27 spin_unlock(lock);
28 return; 28 return;
29 } 29 }
30 hlist_nulls_del_rcu(&tw->tw_node); 30 hlist_nulls_del_rcu(&tw->tw_node);
31 sk_nulls_node_init(&tw->tw_node); 31 sk_nulls_node_init(&tw->tw_node);
32 write_unlock(lock); 32 spin_unlock(lock);
33 33
34 /* Disassociate with bind bucket. */ 34 /* Disassociate with bind bucket. */
35 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, 35 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
@@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
76 const struct inet_sock *inet = inet_sk(sk); 76 const struct inet_sock *inet = inet_sk(sk);
77 const struct inet_connection_sock *icsk = inet_csk(sk); 77 const struct inet_connection_sock *icsk = inet_csk(sk);
78 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); 78 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
79 rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 79 spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
80 struct inet_bind_hashbucket *bhead; 80 struct inet_bind_hashbucket *bhead;
81 /* Step 1: Put TW into bind hash. Original socket stays there too. 81 /* Step 1: Put TW into bind hash. Original socket stays there too.
82 Note, that any socket with inet->num != 0 MUST be bound in 82 Note, that any socket with inet->num != 0 MUST be bound in
@@ -90,7 +90,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
90 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 90 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
91 spin_unlock(&bhead->lock); 91 spin_unlock(&bhead->lock);
92 92
93 write_lock(lock); 93 spin_lock(lock);
94 94
95 /* 95 /*
96 * Step 2: Hash TW into TIMEWAIT chain. 96 * Step 2: Hash TW into TIMEWAIT chain.
@@ -104,7 +104,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
104 if (__sk_nulls_del_node_init_rcu(sk)) 104 if (__sk_nulls_del_node_init_rcu(sk))
105 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 105 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
106 106
107 write_unlock(lock); 107 spin_unlock(lock);
108} 108}
109 109
110EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 110EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
@@ -427,9 +427,9 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
427 for (h = 0; h < (hashinfo->ehash_size); h++) { 427 for (h = 0; h < (hashinfo->ehash_size); h++) {
428 struct inet_ehash_bucket *head = 428 struct inet_ehash_bucket *head =
429 inet_ehash_bucket(hashinfo, h); 429 inet_ehash_bucket(hashinfo, h);
430 rwlock_t *lock = inet_ehash_lockp(hashinfo, h); 430 spinlock_t *lock = inet_ehash_lockp(hashinfo, h);
431restart: 431restart:
432 write_lock(lock); 432 spin_lock(lock);
433 sk_nulls_for_each(sk, node, &head->twchain) { 433 sk_nulls_for_each(sk, node, &head->twchain) {
434 434
435 tw = inet_twsk(sk); 435 tw = inet_twsk(sk);
@@ -438,13 +438,13 @@ restart:
438 continue; 438 continue;
439 439
440 atomic_inc(&tw->tw_refcnt); 440 atomic_inc(&tw->tw_refcnt);
441 write_unlock(lock); 441 spin_unlock(lock);
442 inet_twsk_deschedule(tw, twdr); 442 inet_twsk_deschedule(tw, twdr);
443 inet_twsk_put(tw); 443 inet_twsk_put(tw);
444 444
445 goto restart; 445 goto restart;
446 } 446 }
447 write_unlock(lock); 447 spin_unlock(lock);
448 } 448 }
449 local_bh_enable(); 449 local_bh_enable();
450} 450}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 330b08a12274..a81caa1be0cf 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1970,13 +1970,13 @@ static void *established_get_first(struct seq_file *seq)
1970 struct sock *sk; 1970 struct sock *sk;
1971 struct hlist_nulls_node *node; 1971 struct hlist_nulls_node *node;
1972 struct inet_timewait_sock *tw; 1972 struct inet_timewait_sock *tw;
1973 rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 1973 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1974 1974
1975 /* Lockless fast path for the common case of empty buckets */ 1975 /* Lockless fast path for the common case of empty buckets */
1976 if (empty_bucket(st)) 1976 if (empty_bucket(st))
1977 continue; 1977 continue;
1978 1978
1979 read_lock_bh(lock); 1979 spin_lock_bh(lock);
1980 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 1980 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1981 if (sk->sk_family != st->family || 1981 if (sk->sk_family != st->family ||
1982 !net_eq(sock_net(sk), net)) { 1982 !net_eq(sock_net(sk), net)) {
@@ -1995,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq)
1995 rc = tw; 1995 rc = tw;
1996 goto out; 1996 goto out;
1997 } 1997 }
1998 read_unlock_bh(lock); 1998 spin_unlock_bh(lock);
1999 st->state = TCP_SEQ_STATE_ESTABLISHED; 1999 st->state = TCP_SEQ_STATE_ESTABLISHED;
2000 } 2000 }
2001out: 2001out:
@@ -2023,7 +2023,7 @@ get_tw:
2023 cur = tw; 2023 cur = tw;
2024 goto out; 2024 goto out;
2025 } 2025 }
2026 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2026 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2027 st->state = TCP_SEQ_STATE_ESTABLISHED; 2027 st->state = TCP_SEQ_STATE_ESTABLISHED;
2028 2028
2029 /* Look for next non empty bucket */ 2029 /* Look for next non empty bucket */
@@ -2033,7 +2033,7 @@ get_tw:
2033 if (st->bucket >= tcp_hashinfo.ehash_size) 2033 if (st->bucket >= tcp_hashinfo.ehash_size)
2034 return NULL; 2034 return NULL;
2035 2035
2036 read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2036 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2037 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); 2037 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2038 } else 2038 } else
2039 sk = sk_nulls_next(sk); 2039 sk = sk_nulls_next(sk);
@@ -2134,7 +2134,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2134 case TCP_SEQ_STATE_TIME_WAIT: 2134 case TCP_SEQ_STATE_TIME_WAIT:
2135 case TCP_SEQ_STATE_ESTABLISHED: 2135 case TCP_SEQ_STATE_ESTABLISHED:
2136 if (v) 2136 if (v)
2137 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2137 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2138 break; 2138 break;
2139 } 2139 }
2140} 2140}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21544b9be259..e0fd68187f83 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk)
38 } else { 38 } else {
39 unsigned int hash; 39 unsigned int hash;
40 struct hlist_nulls_head *list; 40 struct hlist_nulls_head *list;
41 rwlock_t *lock; 41 spinlock_t *lock;
42 42
43 sk->sk_hash = hash = inet6_sk_ehashfn(sk); 43 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
44 list = &inet_ehash_bucket(hashinfo, hash)->chain; 44 list = &inet_ehash_bucket(hashinfo, hash)->chain;
45 lock = inet_ehash_lockp(hashinfo, hash); 45 lock = inet_ehash_lockp(hashinfo, hash);
46 write_lock(lock); 46 spin_lock(lock);
47 __sk_nulls_add_node_rcu(sk, list); 47 __sk_nulls_add_node_rcu(sk, list);
48 write_unlock(lock); 48 spin_unlock(lock);
49 } 49 }
50 50
51 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 51 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
195 const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, 195 const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
196 inet->dport); 196 inet->dport);
197 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 197 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
198 rwlock_t *lock = inet_ehash_lockp(hinfo, hash); 198 spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
199 struct sock *sk2; 199 struct sock *sk2;
200 const struct hlist_nulls_node *node; 200 const struct hlist_nulls_node *node;
201 struct inet_timewait_sock *tw; 201 struct inet_timewait_sock *tw;
202 202
203 prefetch(head->chain.first); 203 spin_lock(lock);
204 write_lock(lock);
205 204
206 /* Check TIME-WAIT sockets first. */ 205 /* Check TIME-WAIT sockets first. */
207 sk_nulls_for_each(sk2, node, &head->twchain) { 206 sk_nulls_for_each(sk2, node, &head->twchain) {
@@ -230,8 +229,8 @@ unique:
230 WARN_ON(!sk_unhashed(sk)); 229 WARN_ON(!sk_unhashed(sk));
231 __sk_nulls_add_node_rcu(sk, &head->chain); 230 __sk_nulls_add_node_rcu(sk, &head->chain);
232 sk->sk_hash = hash; 231 sk->sk_hash = hash;
232 spin_unlock(lock);
233 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 233 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
234 write_unlock(lock);
235 234
236 if (twp != NULL) { 235 if (twp != NULL) {
237 *twp = tw; 236 *twp = tw;
@@ -246,7 +245,7 @@ unique:
246 return 0; 245 return 0;
247 246
248not_unique: 247not_unique:
249 write_unlock(lock); 248 spin_unlock(lock);
250 return -EADDRNOTAVAIL; 249 return -EADDRNOTAVAIL;
251} 250}
252 251