diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2007-11-07 05:40:20 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-11-07 07:15:11 -0500 |
commit | 230140cffa7feae90ad50bf259db1fa07674f3a7 (patch) | |
tree | 815472add31606423a508a17806b7884f0ab3e2e /net/ipv6 | |
parent | efac52762b1e3fe3035d29e82d8ee1aebc45e4a7 (diff) |
[INET]: Remove per bucket rwlock in tcp/dccp ehash table.
As done two years ago on IP route cache table (commit
22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one
lock per hash bucket for the huge TCP/DCCP hash tables.
On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for
litle performance differences. (we hit a different cache line for the
rwlock, but then the bucket cache line have a better sharing factor
among cpus, since we dirty it less often). For netstat or ss commands
that want a full scan of hash table, we perform fewer memory accesses.
Using a 'small' table of hashed rwlocks should be more than enough to
provide correct SMP concurrency between different buckets, without
using too much memory. Sizing of this table depends on
num_possible_cpus() and various CONFIG settings.
This patch provides some locking abstraction that may ease a future
work using a different model for TCP/DCCP table.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d6f1026f1943..adc73adadfae 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -37,9 +37,8 @@ void __inet6_hash(struct inet_hashinfo *hashinfo, | |||
37 | } else { | 37 | } else { |
38 | unsigned int hash; | 38 | unsigned int hash; |
39 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); | 39 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); |
40 | hash &= (hashinfo->ehash_size - 1); | 40 | list = &inet_ehash_bucket(hashinfo, hash)->chain; |
41 | list = &hashinfo->ehash[hash].chain; | 41 | lock = inet_ehash_lockp(hashinfo, hash); |
42 | lock = &hashinfo->ehash[hash].lock; | ||
43 | write_lock(lock); | 42 | write_lock(lock); |
44 | } | 43 | } |
45 | 44 | ||
@@ -70,9 +69,10 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, | |||
70 | */ | 69 | */ |
71 | unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); | 70 | unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); |
72 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 71 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); |
72 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); | ||
73 | 73 | ||
74 | prefetch(head->chain.first); | 74 | prefetch(head->chain.first); |
75 | read_lock(&head->lock); | 75 | read_lock(lock); |
76 | sk_for_each(sk, node, &head->chain) { | 76 | sk_for_each(sk, node, &head->chain) { |
77 | /* For IPV6 do the cheaper port and family tests first. */ | 77 | /* For IPV6 do the cheaper port and family tests first. */ |
78 | if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) | 78 | if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) |
@@ -92,12 +92,12 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, | |||
92 | goto hit; | 92 | goto hit; |
93 | } | 93 | } |
94 | } | 94 | } |
95 | read_unlock(&head->lock); | 95 | read_unlock(lock); |
96 | return NULL; | 96 | return NULL; |
97 | 97 | ||
98 | hit: | 98 | hit: |
99 | sock_hold(sk); | 99 | sock_hold(sk); |
100 | read_unlock(&head->lock); | 100 | read_unlock(lock); |
101 | return sk; | 101 | return sk; |
102 | } | 102 | } |
103 | EXPORT_SYMBOL(__inet6_lookup_established); | 103 | EXPORT_SYMBOL(__inet6_lookup_established); |
@@ -175,12 +175,13 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, | |||
175 | const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, | 175 | const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, |
176 | inet->dport); | 176 | inet->dport); |
177 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 177 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
178 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); | ||
178 | struct sock *sk2; | 179 | struct sock *sk2; |
179 | const struct hlist_node *node; | 180 | const struct hlist_node *node; |
180 | struct inet_timewait_sock *tw; | 181 | struct inet_timewait_sock *tw; |
181 | 182 | ||
182 | prefetch(head->chain.first); | 183 | prefetch(head->chain.first); |
183 | write_lock(&head->lock); | 184 | write_lock(lock); |
184 | 185 | ||
185 | /* Check TIME-WAIT sockets first. */ | 186 | /* Check TIME-WAIT sockets first. */ |
186 | sk_for_each(sk2, node, &head->twchain) { | 187 | sk_for_each(sk2, node, &head->twchain) { |
@@ -216,7 +217,7 @@ unique: | |||
216 | __sk_add_node(sk, &head->chain); | 217 | __sk_add_node(sk, &head->chain); |
217 | sk->sk_hash = hash; | 218 | sk->sk_hash = hash; |
218 | sock_prot_inc_use(sk->sk_prot); | 219 | sock_prot_inc_use(sk->sk_prot); |
219 | write_unlock(&head->lock); | 220 | write_unlock(lock); |
220 | 221 | ||
221 | if (twp != NULL) { | 222 | if (twp != NULL) { |
222 | *twp = tw; | 223 | *twp = tw; |
@@ -231,7 +232,7 @@ unique: | |||
231 | return 0; | 232 | return 0; |
232 | 233 | ||
233 | not_unique: | 234 | not_unique: |
234 | write_unlock(&head->lock); | 235 | write_unlock(lock); |
235 | return -EADDRNOTAVAIL; | 236 | return -EADDRNOTAVAIL; |
236 | } | 237 | } |
237 | 238 | ||