diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2007-11-07 05:40:20 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-11-07 07:15:11 -0500 |
commit | 230140cffa7feae90ad50bf259db1fa07674f3a7 (patch) | |
tree | 815472add31606423a508a17806b7884f0ab3e2e /include/net | |
parent | efac52762b1e3fe3035d29e82d8ee1aebc45e4a7 (diff) |
[INET]: Remove per bucket rwlock in tcp/dccp ehash table.
As done two years ago on IP route cache table (commit
22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one
lock per hash bucket for the huge TCP/DCCP hash tables.
On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for
litle performance differences. (we hit a different cache line for the
rwlock, but then the bucket cache line have a better sharing factor
among cpus, since we dirty it less often). For netstat or ss commands
that want a full scan of hash table, we perform fewer memory accesses.
Using a 'small' table of hashed rwlocks should be more than enough to
provide correct SMP concurrency between different buckets, without
using too much memory. Sizing of this table depends on
num_possible_cpus() and various CONFIG settings.
This patch provides some locking abstraction that may ease a future
work using a different model for TCP/DCCP table.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/inet_hashtables.h | 71 |
1 files changed, 65 insertions, 6 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 4427dcd1e53a..8461cda37490 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h | |||
@@ -37,7 +37,6 @@ | |||
37 | * I'll experiment with dynamic table growth later. | 37 | * I'll experiment with dynamic table growth later. |
38 | */ | 38 | */ |
39 | struct inet_ehash_bucket { | 39 | struct inet_ehash_bucket { |
40 | rwlock_t lock; | ||
41 | struct hlist_head chain; | 40 | struct hlist_head chain; |
42 | struct hlist_head twchain; | 41 | struct hlist_head twchain; |
43 | }; | 42 | }; |
@@ -100,6 +99,9 @@ struct inet_hashinfo { | |||
100 | * TIME_WAIT sockets use a separate chain (twchain). | 99 | * TIME_WAIT sockets use a separate chain (twchain). |
101 | */ | 100 | */ |
102 | struct inet_ehash_bucket *ehash; | 101 | struct inet_ehash_bucket *ehash; |
102 | rwlock_t *ehash_locks; | ||
103 | unsigned int ehash_size; | ||
104 | unsigned int ehash_locks_mask; | ||
103 | 105 | ||
104 | /* Ok, let's try this, I give up, we do need a local binding | 106 | /* Ok, let's try this, I give up, we do need a local binding |
105 | * TCP hash as well as the others for fast bind/connect. | 107 | * TCP hash as well as the others for fast bind/connect. |
@@ -107,7 +109,7 @@ struct inet_hashinfo { | |||
107 | struct inet_bind_hashbucket *bhash; | 109 | struct inet_bind_hashbucket *bhash; |
108 | 110 | ||
109 | unsigned int bhash_size; | 111 | unsigned int bhash_size; |
110 | unsigned int ehash_size; | 112 | /* Note : 4 bytes padding on 64 bit arches */ |
111 | 113 | ||
112 | /* All sockets in TCP_LISTEN state will be in here. This is the only | 114 | /* All sockets in TCP_LISTEN state will be in here. This is the only |
113 | * table where wildcard'd TCP sockets can exist. Hash function here | 115 | * table where wildcard'd TCP sockets can exist. Hash function here |
@@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( | |||
134 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; | 136 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; |
135 | } | 137 | } |
136 | 138 | ||
139 | static inline rwlock_t *inet_ehash_lockp( | ||
140 | struct inet_hashinfo *hashinfo, | ||
141 | unsigned int hash) | ||
142 | { | ||
143 | return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; | ||
144 | } | ||
145 | |||
146 | static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) | ||
147 | { | ||
148 | unsigned int i, size = 256; | ||
149 | #if defined(CONFIG_PROVE_LOCKING) | ||
150 | unsigned int nr_pcpus = 2; | ||
151 | #else | ||
152 | unsigned int nr_pcpus = num_possible_cpus(); | ||
153 | #endif | ||
154 | if (nr_pcpus >= 4) | ||
155 | size = 512; | ||
156 | if (nr_pcpus >= 8) | ||
157 | size = 1024; | ||
158 | if (nr_pcpus >= 16) | ||
159 | size = 2048; | ||
160 | if (nr_pcpus >= 32) | ||
161 | size = 4096; | ||
162 | if (sizeof(rwlock_t) != 0) { | ||
163 | #ifdef CONFIG_NUMA | ||
164 | if (size * sizeof(rwlock_t) > PAGE_SIZE) | ||
165 | hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); | ||
166 | else | ||
167 | #endif | ||
168 | hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), | ||
169 | GFP_KERNEL); | ||
170 | if (!hashinfo->ehash_locks) | ||
171 | return ENOMEM; | ||
172 | for (i = 0; i < size; i++) | ||
173 | rwlock_init(&hashinfo->ehash_locks[i]); | ||
174 | } | ||
175 | hashinfo->ehash_locks_mask = size - 1; | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) | ||
180 | { | ||
181 | if (hashinfo->ehash_locks) { | ||
182 | #ifdef CONFIG_NUMA | ||
183 | unsigned int size = (hashinfo->ehash_locks_mask + 1) * | ||
184 | sizeof(rwlock_t); | ||
185 | if (size > PAGE_SIZE) | ||
186 | vfree(hashinfo->ehash_locks); | ||
187 | else | ||
188 | #else | ||
189 | kfree(hashinfo->ehash_locks); | ||
190 | #endif | ||
191 | hashinfo->ehash_locks = NULL; | ||
192 | } | ||
193 | } | ||
194 | |||
137 | extern struct inet_bind_bucket * | 195 | extern struct inet_bind_bucket * |
138 | inet_bind_bucket_create(struct kmem_cache *cachep, | 196 | inet_bind_bucket_create(struct kmem_cache *cachep, |
139 | struct inet_bind_hashbucket *head, | 197 | struct inet_bind_hashbucket *head, |
@@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, | |||
222 | sk->sk_hash = inet_sk_ehashfn(sk); | 280 | sk->sk_hash = inet_sk_ehashfn(sk); |
223 | head = inet_ehash_bucket(hashinfo, sk->sk_hash); | 281 | head = inet_ehash_bucket(hashinfo, sk->sk_hash); |
224 | list = &head->chain; | 282 | list = &head->chain; |
225 | lock = &head->lock; | 283 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
226 | write_lock(lock); | 284 | write_lock(lock); |
227 | } | 285 | } |
228 | __sk_add_node(sk, list); | 286 | __sk_add_node(sk, list); |
@@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) | |||
253 | inet_listen_wlock(hashinfo); | 311 | inet_listen_wlock(hashinfo); |
254 | lock = &hashinfo->lhash_lock; | 312 | lock = &hashinfo->lhash_lock; |
255 | } else { | 313 | } else { |
256 | lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; | 314 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
257 | write_lock_bh(lock); | 315 | write_lock_bh(lock); |
258 | } | 316 | } |
259 | 317 | ||
@@ -354,9 +412,10 @@ static inline struct sock * | |||
354 | */ | 412 | */ |
355 | unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); | 413 | unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); |
356 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 414 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); |
415 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); | ||
357 | 416 | ||
358 | prefetch(head->chain.first); | 417 | prefetch(head->chain.first); |
359 | read_lock(&head->lock); | 418 | read_lock(lock); |
360 | sk_for_each(sk, node, &head->chain) { | 419 | sk_for_each(sk, node, &head->chain) { |
361 | if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) | 420 | if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) |
362 | goto hit; /* You sunk my battleship! */ | 421 | goto hit; /* You sunk my battleship! */ |
@@ -369,7 +428,7 @@ static inline struct sock * | |||
369 | } | 428 | } |
370 | sk = NULL; | 429 | sk = NULL; |
371 | out: | 430 | out: |
372 | read_unlock(&head->lock); | 431 | read_unlock(lock); |
373 | return sk; | 432 | return sk; |
374 | hit: | 433 | hit: |
375 | sock_hold(sk); | 434 | sock_hold(sk); |