aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2007-11-07 05:40:20 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2007-11-07 07:15:11 -0500
commit230140cffa7feae90ad50bf259db1fa07674f3a7 (patch)
tree815472add31606423a508a17806b7884f0ab3e2e /include/net
parentefac52762b1e3fe3035d29e82d8ee1aebc45e4a7 (diff)
[INET]: Remove per bucket rwlock in tcp/dccp ehash table.
As done two years ago on IP route cache table (commit 22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one lock per hash bucket for the huge TCP/DCCP hash tables. On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for litle performance differences. (we hit a different cache line for the rwlock, but then the bucket cache line have a better sharing factor among cpus, since we dirty it less often). For netstat or ss commands that want a full scan of hash table, we perform fewer memory accesses. Using a 'small' table of hashed rwlocks should be more than enough to provide correct SMP concurrency between different buckets, without using too much memory. Sizing of this table depends on num_possible_cpus() and various CONFIG settings. This patch provides some locking abstraction that may ease a future work using a different model for TCP/DCCP table. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/inet_hashtables.h71
1 files changed, 65 insertions, 6 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 4427dcd1e53a..8461cda37490 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -37,7 +37,6 @@
37 * I'll experiment with dynamic table growth later. 37 * I'll experiment with dynamic table growth later.
38 */ 38 */
39struct inet_ehash_bucket { 39struct inet_ehash_bucket {
40 rwlock_t lock;
41 struct hlist_head chain; 40 struct hlist_head chain;
42 struct hlist_head twchain; 41 struct hlist_head twchain;
43}; 42};
@@ -100,6 +99,9 @@ struct inet_hashinfo {
100 * TIME_WAIT sockets use a separate chain (twchain). 99 * TIME_WAIT sockets use a separate chain (twchain).
101 */ 100 */
102 struct inet_ehash_bucket *ehash; 101 struct inet_ehash_bucket *ehash;
102 rwlock_t *ehash_locks;
103 unsigned int ehash_size;
104 unsigned int ehash_locks_mask;
103 105
104 /* Ok, let's try this, I give up, we do need a local binding 106 /* Ok, let's try this, I give up, we do need a local binding
105 * TCP hash as well as the others for fast bind/connect. 107 * TCP hash as well as the others for fast bind/connect.
@@ -107,7 +109,7 @@ struct inet_hashinfo {
107 struct inet_bind_hashbucket *bhash; 109 struct inet_bind_hashbucket *bhash;
108 110
109 unsigned int bhash_size; 111 unsigned int bhash_size;
110 unsigned int ehash_size; 112 /* Note : 4 bytes padding on 64 bit arches */
111 113
112 /* All sockets in TCP_LISTEN state will be in here. This is the only 114 /* All sockets in TCP_LISTEN state will be in here. This is the only
113 * table where wildcard'd TCP sockets can exist. Hash function here 115 * table where wildcard'd TCP sockets can exist. Hash function here
@@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
134 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; 136 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
135} 137}
136 138
139static inline rwlock_t *inet_ehash_lockp(
140 struct inet_hashinfo *hashinfo,
141 unsigned int hash)
142{
143 return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
144}
145
146static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
147{
148 unsigned int i, size = 256;
149#if defined(CONFIG_PROVE_LOCKING)
150 unsigned int nr_pcpus = 2;
151#else
152 unsigned int nr_pcpus = num_possible_cpus();
153#endif
154 if (nr_pcpus >= 4)
155 size = 512;
156 if (nr_pcpus >= 8)
157 size = 1024;
158 if (nr_pcpus >= 16)
159 size = 2048;
160 if (nr_pcpus >= 32)
161 size = 4096;
162 if (sizeof(rwlock_t) != 0) {
163#ifdef CONFIG_NUMA
164 if (size * sizeof(rwlock_t) > PAGE_SIZE)
165 hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t));
166 else
167#endif
168 hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t),
169 GFP_KERNEL);
170 if (!hashinfo->ehash_locks)
171 return ENOMEM;
172 for (i = 0; i < size; i++)
173 rwlock_init(&hashinfo->ehash_locks[i]);
174 }
175 hashinfo->ehash_locks_mask = size - 1;
176 return 0;
177}
178
179static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
180{
181 if (hashinfo->ehash_locks) {
182#ifdef CONFIG_NUMA
183 unsigned int size = (hashinfo->ehash_locks_mask + 1) *
184 sizeof(rwlock_t);
185 if (size > PAGE_SIZE)
186 vfree(hashinfo->ehash_locks);
187 else
188#else
189 kfree(hashinfo->ehash_locks);
190#endif
191 hashinfo->ehash_locks = NULL;
192 }
193}
194
137extern struct inet_bind_bucket * 195extern struct inet_bind_bucket *
138 inet_bind_bucket_create(struct kmem_cache *cachep, 196 inet_bind_bucket_create(struct kmem_cache *cachep,
139 struct inet_bind_hashbucket *head, 197 struct inet_bind_hashbucket *head,
@@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo,
222 sk->sk_hash = inet_sk_ehashfn(sk); 280 sk->sk_hash = inet_sk_ehashfn(sk);
223 head = inet_ehash_bucket(hashinfo, sk->sk_hash); 281 head = inet_ehash_bucket(hashinfo, sk->sk_hash);
224 list = &head->chain; 282 list = &head->chain;
225 lock = &head->lock; 283 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
226 write_lock(lock); 284 write_lock(lock);
227 } 285 }
228 __sk_add_node(sk, list); 286 __sk_add_node(sk, list);
@@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
253 inet_listen_wlock(hashinfo); 311 inet_listen_wlock(hashinfo);
254 lock = &hashinfo->lhash_lock; 312 lock = &hashinfo->lhash_lock;
255 } else { 313 } else {
256 lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; 314 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
257 write_lock_bh(lock); 315 write_lock_bh(lock);
258 } 316 }
259 317
@@ -354,9 +412,10 @@ static inline struct sock *
354 */ 412 */
355 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); 413 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
356 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); 414 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
415 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
357 416
358 prefetch(head->chain.first); 417 prefetch(head->chain.first);
359 read_lock(&head->lock); 418 read_lock(lock);
360 sk_for_each(sk, node, &head->chain) { 419 sk_for_each(sk, node, &head->chain) {
361 if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) 420 if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
362 goto hit; /* You sunk my battleship! */ 421 goto hit; /* You sunk my battleship! */
@@ -369,7 +428,7 @@ static inline struct sock *
369 } 428 }
370 sk = NULL; 429 sk = NULL;
371out: 430out:
372 read_unlock(&head->lock); 431 read_unlock(lock);
373 return sk; 432 return sk;
374hit: 433hit:
375 sock_hold(sk); 434 sock_hold(sk);