diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2007-11-07 05:40:20 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-11-07 07:15:11 -0500 |
commit | 230140cffa7feae90ad50bf259db1fa07674f3a7 (patch) | |
tree | 815472add31606423a508a17806b7884f0ab3e2e | |
parent | efac52762b1e3fe3035d29e82d8ee1aebc45e4a7 (diff) |
[INET]: Remove per bucket rwlock in tcp/dccp ehash table.
As done two years ago on IP route cache table (commit
22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one
lock per hash bucket for the huge TCP/DCCP hash tables.
On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for
litle performance differences. (we hit a different cache line for the
rwlock, but then the bucket cache line have a better sharing factor
among cpus, since we dirty it less often). For netstat or ss commands
that want a full scan of hash table, we perform fewer memory accesses.
Using a 'small' table of hashed rwlocks should be more than enough to
provide correct SMP concurrency between different buckets, without
using too much memory. Sizing of this table depends on
num_possible_cpus() and various CONFIG settings.
This patch provides some locking abstraction that may ease a future
work using a different model for TCP/DCCP table.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet_hashtables.h | 71 | ||||
-rw-r--r-- | net/dccp/proto.c | 9 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 9 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 7 | ||||
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 11 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 19 |
8 files changed, 106 insertions, 37 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 4427dcd1e53a..8461cda37490 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h | |||
@@ -37,7 +37,6 @@ | |||
37 | * I'll experiment with dynamic table growth later. | 37 | * I'll experiment with dynamic table growth later. |
38 | */ | 38 | */ |
39 | struct inet_ehash_bucket { | 39 | struct inet_ehash_bucket { |
40 | rwlock_t lock; | ||
41 | struct hlist_head chain; | 40 | struct hlist_head chain; |
42 | struct hlist_head twchain; | 41 | struct hlist_head twchain; |
43 | }; | 42 | }; |
@@ -100,6 +99,9 @@ struct inet_hashinfo { | |||
100 | * TIME_WAIT sockets use a separate chain (twchain). | 99 | * TIME_WAIT sockets use a separate chain (twchain). |
101 | */ | 100 | */ |
102 | struct inet_ehash_bucket *ehash; | 101 | struct inet_ehash_bucket *ehash; |
102 | rwlock_t *ehash_locks; | ||
103 | unsigned int ehash_size; | ||
104 | unsigned int ehash_locks_mask; | ||
103 | 105 | ||
104 | /* Ok, let's try this, I give up, we do need a local binding | 106 | /* Ok, let's try this, I give up, we do need a local binding |
105 | * TCP hash as well as the others for fast bind/connect. | 107 | * TCP hash as well as the others for fast bind/connect. |
@@ -107,7 +109,7 @@ struct inet_hashinfo { | |||
107 | struct inet_bind_hashbucket *bhash; | 109 | struct inet_bind_hashbucket *bhash; |
108 | 110 | ||
109 | unsigned int bhash_size; | 111 | unsigned int bhash_size; |
110 | unsigned int ehash_size; | 112 | /* Note : 4 bytes padding on 64 bit arches */ |
111 | 113 | ||
112 | /* All sockets in TCP_LISTEN state will be in here. This is the only | 114 | /* All sockets in TCP_LISTEN state will be in here. This is the only |
113 | * table where wildcard'd TCP sockets can exist. Hash function here | 115 | * table where wildcard'd TCP sockets can exist. Hash function here |
@@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( | |||
134 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; | 136 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; |
135 | } | 137 | } |
136 | 138 | ||
139 | static inline rwlock_t *inet_ehash_lockp( | ||
140 | struct inet_hashinfo *hashinfo, | ||
141 | unsigned int hash) | ||
142 | { | ||
143 | return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; | ||
144 | } | ||
145 | |||
146 | static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) | ||
147 | { | ||
148 | unsigned int i, size = 256; | ||
149 | #if defined(CONFIG_PROVE_LOCKING) | ||
150 | unsigned int nr_pcpus = 2; | ||
151 | #else | ||
152 | unsigned int nr_pcpus = num_possible_cpus(); | ||
153 | #endif | ||
154 | if (nr_pcpus >= 4) | ||
155 | size = 512; | ||
156 | if (nr_pcpus >= 8) | ||
157 | size = 1024; | ||
158 | if (nr_pcpus >= 16) | ||
159 | size = 2048; | ||
160 | if (nr_pcpus >= 32) | ||
161 | size = 4096; | ||
162 | if (sizeof(rwlock_t) != 0) { | ||
163 | #ifdef CONFIG_NUMA | ||
164 | if (size * sizeof(rwlock_t) > PAGE_SIZE) | ||
165 | hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); | ||
166 | else | ||
167 | #endif | ||
168 | hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), | ||
169 | GFP_KERNEL); | ||
170 | if (!hashinfo->ehash_locks) | ||
171 | return ENOMEM; | ||
172 | for (i = 0; i < size; i++) | ||
173 | rwlock_init(&hashinfo->ehash_locks[i]); | ||
174 | } | ||
175 | hashinfo->ehash_locks_mask = size - 1; | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) | ||
180 | { | ||
181 | if (hashinfo->ehash_locks) { | ||
182 | #ifdef CONFIG_NUMA | ||
183 | unsigned int size = (hashinfo->ehash_locks_mask + 1) * | ||
184 | sizeof(rwlock_t); | ||
185 | if (size > PAGE_SIZE) | ||
186 | vfree(hashinfo->ehash_locks); | ||
187 | else | ||
188 | #else | ||
189 | kfree(hashinfo->ehash_locks); | ||
190 | #endif | ||
191 | hashinfo->ehash_locks = NULL; | ||
192 | } | ||
193 | } | ||
194 | |||
137 | extern struct inet_bind_bucket * | 195 | extern struct inet_bind_bucket * |
138 | inet_bind_bucket_create(struct kmem_cache *cachep, | 196 | inet_bind_bucket_create(struct kmem_cache *cachep, |
139 | struct inet_bind_hashbucket *head, | 197 | struct inet_bind_hashbucket *head, |
@@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, | |||
222 | sk->sk_hash = inet_sk_ehashfn(sk); | 280 | sk->sk_hash = inet_sk_ehashfn(sk); |
223 | head = inet_ehash_bucket(hashinfo, sk->sk_hash); | 281 | head = inet_ehash_bucket(hashinfo, sk->sk_hash); |
224 | list = &head->chain; | 282 | list = &head->chain; |
225 | lock = &head->lock; | 283 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
226 | write_lock(lock); | 284 | write_lock(lock); |
227 | } | 285 | } |
228 | __sk_add_node(sk, list); | 286 | __sk_add_node(sk, list); |
@@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) | |||
253 | inet_listen_wlock(hashinfo); | 311 | inet_listen_wlock(hashinfo); |
254 | lock = &hashinfo->lhash_lock; | 312 | lock = &hashinfo->lhash_lock; |
255 | } else { | 313 | } else { |
256 | lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; | 314 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
257 | write_lock_bh(lock); | 315 | write_lock_bh(lock); |
258 | } | 316 | } |
259 | 317 | ||
@@ -354,9 +412,10 @@ static inline struct sock * | |||
354 | */ | 412 | */ |
355 | unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); | 413 | unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); |
356 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 414 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); |
415 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); | ||
357 | 416 | ||
358 | prefetch(head->chain.first); | 417 | prefetch(head->chain.first); |
359 | read_lock(&head->lock); | 418 | read_lock(lock); |
360 | sk_for_each(sk, node, &head->chain) { | 419 | sk_for_each(sk, node, &head->chain) { |
361 | if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) | 420 | if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) |
362 | goto hit; /* You sunk my battleship! */ | 421 | goto hit; /* You sunk my battleship! */ |
@@ -369,7 +428,7 @@ static inline struct sock * | |||
369 | } | 428 | } |
370 | sk = NULL; | 429 | sk = NULL; |
371 | out: | 430 | out: |
372 | read_unlock(&head->lock); | 431 | read_unlock(lock); |
373 | return sk; | 432 | return sk; |
374 | hit: | 433 | hit: |
375 | sock_hold(sk); | 434 | sock_hold(sk); |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index d84973928033..7a3bea9c28c1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -1072,11 +1072,13 @@ static int __init dccp_init(void) | |||
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | for (i = 0; i < dccp_hashinfo.ehash_size; i++) { | 1074 | for (i = 0; i < dccp_hashinfo.ehash_size; i++) { |
1075 | rwlock_init(&dccp_hashinfo.ehash[i].lock); | ||
1076 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); | 1075 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); |
1077 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); | 1076 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); |
1078 | } | 1077 | } |
1079 | 1078 | ||
1079 | if (inet_ehash_locks_alloc(&dccp_hashinfo)) | ||
1080 | goto out_free_dccp_ehash; | ||
1081 | |||
1080 | bhash_order = ehash_order; | 1082 | bhash_order = ehash_order; |
1081 | 1083 | ||
1082 | do { | 1084 | do { |
@@ -1091,7 +1093,7 @@ static int __init dccp_init(void) | |||
1091 | 1093 | ||
1092 | if (!dccp_hashinfo.bhash) { | 1094 | if (!dccp_hashinfo.bhash) { |
1093 | DCCP_CRIT("Failed to allocate DCCP bind hash table"); | 1095 | DCCP_CRIT("Failed to allocate DCCP bind hash table"); |
1094 | goto out_free_dccp_ehash; | 1096 | goto out_free_dccp_locks; |
1095 | } | 1097 | } |
1096 | 1098 | ||
1097 | for (i = 0; i < dccp_hashinfo.bhash_size; i++) { | 1099 | for (i = 0; i < dccp_hashinfo.bhash_size; i++) { |
@@ -1121,6 +1123,8 @@ out_free_dccp_mib: | |||
1121 | out_free_dccp_bhash: | 1123 | out_free_dccp_bhash: |
1122 | free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); | 1124 | free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); |
1123 | dccp_hashinfo.bhash = NULL; | 1125 | dccp_hashinfo.bhash = NULL; |
1126 | out_free_dccp_locks: | ||
1127 | inet_ehash_locks_free(&dccp_hashinfo); | ||
1124 | out_free_dccp_ehash: | 1128 | out_free_dccp_ehash: |
1125 | free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); | 1129 | free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); |
1126 | dccp_hashinfo.ehash = NULL; | 1130 | dccp_hashinfo.ehash = NULL; |
@@ -1139,6 +1143,7 @@ static void __exit dccp_fini(void) | |||
1139 | free_pages((unsigned long)dccp_hashinfo.ehash, | 1143 | free_pages((unsigned long)dccp_hashinfo.ehash, |
1140 | get_order(dccp_hashinfo.ehash_size * | 1144 | get_order(dccp_hashinfo.ehash_size * |
1141 | sizeof(struct inet_ehash_bucket))); | 1145 | sizeof(struct inet_ehash_bucket))); |
1146 | inet_ehash_locks_free(&dccp_hashinfo); | ||
1142 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | 1147 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); |
1143 | dccp_ackvec_exit(); | 1148 | dccp_ackvec_exit(); |
1144 | dccp_sysctl_exit(); | 1149 | dccp_sysctl_exit(); |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index dc429b6b0ba6..b0170732b5e9 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -747,13 +747,14 @@ skip_listen_ht: | |||
747 | 747 | ||
748 | for (i = s_i; i < hashinfo->ehash_size; i++) { | 748 | for (i = s_i; i < hashinfo->ehash_size; i++) { |
749 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; | 749 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; |
750 | rwlock_t *lock = inet_ehash_lockp(hashinfo, i); | ||
750 | struct sock *sk; | 751 | struct sock *sk; |
751 | struct hlist_node *node; | 752 | struct hlist_node *node; |
752 | 753 | ||
753 | if (i > s_i) | 754 | if (i > s_i) |
754 | s_num = 0; | 755 | s_num = 0; |
755 | 756 | ||
756 | read_lock_bh(&head->lock); | 757 | read_lock_bh(lock); |
757 | num = 0; | 758 | num = 0; |
758 | sk_for_each(sk, node, &head->chain) { | 759 | sk_for_each(sk, node, &head->chain) { |
759 | struct inet_sock *inet = inet_sk(sk); | 760 | struct inet_sock *inet = inet_sk(sk); |
@@ -769,7 +770,7 @@ skip_listen_ht: | |||
769 | r->id.idiag_dport) | 770 | r->id.idiag_dport) |
770 | goto next_normal; | 771 | goto next_normal; |
771 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { | 772 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { |
772 | read_unlock_bh(&head->lock); | 773 | read_unlock_bh(lock); |
773 | goto done; | 774 | goto done; |
774 | } | 775 | } |
775 | next_normal: | 776 | next_normal: |
@@ -791,14 +792,14 @@ next_normal: | |||
791 | r->id.idiag_dport) | 792 | r->id.idiag_dport) |
792 | goto next_dying; | 793 | goto next_dying; |
793 | if (inet_twsk_diag_dump(tw, skb, cb) < 0) { | 794 | if (inet_twsk_diag_dump(tw, skb, cb) < 0) { |
794 | read_unlock_bh(&head->lock); | 795 | read_unlock_bh(lock); |
795 | goto done; | 796 | goto done; |
796 | } | 797 | } |
797 | next_dying: | 798 | next_dying: |
798 | ++num; | 799 | ++num; |
799 | } | 800 | } |
800 | } | 801 | } |
801 | read_unlock_bh(&head->lock); | 802 | read_unlock_bh(lock); |
802 | } | 803 | } |
803 | 804 | ||
804 | done: | 805 | done: |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 16eecc7046a3..67704da04fc4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -204,12 +204,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
204 | const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); | 204 | const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); |
205 | unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); | 205 | unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); |
206 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 206 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
207 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); | ||
207 | struct sock *sk2; | 208 | struct sock *sk2; |
208 | const struct hlist_node *node; | 209 | const struct hlist_node *node; |
209 | struct inet_timewait_sock *tw; | 210 | struct inet_timewait_sock *tw; |
210 | 211 | ||
211 | prefetch(head->chain.first); | 212 | prefetch(head->chain.first); |
212 | write_lock(&head->lock); | 213 | write_lock(lock); |
213 | 214 | ||
214 | /* Check TIME-WAIT sockets first. */ | 215 | /* Check TIME-WAIT sockets first. */ |
215 | sk_for_each(sk2, node, &head->twchain) { | 216 | sk_for_each(sk2, node, &head->twchain) { |
@@ -239,7 +240,7 @@ unique: | |||
239 | BUG_TRAP(sk_unhashed(sk)); | 240 | BUG_TRAP(sk_unhashed(sk)); |
240 | __sk_add_node(sk, &head->chain); | 241 | __sk_add_node(sk, &head->chain); |
241 | sock_prot_inc_use(sk->sk_prot); | 242 | sock_prot_inc_use(sk->sk_prot); |
242 | write_unlock(&head->lock); | 243 | write_unlock(lock); |
243 | 244 | ||
244 | if (twp) { | 245 | if (twp) { |
245 | *twp = tw; | 246 | *twp = tw; |
@@ -255,7 +256,7 @@ unique: | |||
255 | return 0; | 256 | return 0; |
256 | 257 | ||
257 | not_unique: | 258 | not_unique: |
258 | write_unlock(&head->lock); | 259 | write_unlock(lock); |
259 | return -EADDRNOTAVAIL; | 260 | return -EADDRNOTAVAIL; |
260 | } | 261 | } |
261 | 262 | ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 4e189e28f306..a60b99e0ebdc 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, | |||
20 | struct inet_bind_hashbucket *bhead; | 20 | struct inet_bind_hashbucket *bhead; |
21 | struct inet_bind_bucket *tb; | 21 | struct inet_bind_bucket *tb; |
22 | /* Unlink from established hashes. */ | 22 | /* Unlink from established hashes. */ |
23 | struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); | 23 | rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); |
24 | 24 | ||
25 | write_lock(&ehead->lock); | 25 | write_lock(lock); |
26 | if (hlist_unhashed(&tw->tw_node)) { | 26 | if (hlist_unhashed(&tw->tw_node)) { |
27 | write_unlock(&ehead->lock); | 27 | write_unlock(lock); |
28 | return; | 28 | return; |
29 | } | 29 | } |
30 | __hlist_del(&tw->tw_node); | 30 | __hlist_del(&tw->tw_node); |
31 | sk_node_init(&tw->tw_node); | 31 | sk_node_init(&tw->tw_node); |
32 | write_unlock(&ehead->lock); | 32 | write_unlock(lock); |
33 | 33 | ||
34 | /* Disassociate with bind bucket. */ | 34 | /* Disassociate with bind bucket. */ |
35 | bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; | 35 | bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; |
@@ -59,6 +59,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
59 | const struct inet_sock *inet = inet_sk(sk); | 59 | const struct inet_sock *inet = inet_sk(sk); |
60 | const struct inet_connection_sock *icsk = inet_csk(sk); | 60 | const struct inet_connection_sock *icsk = inet_csk(sk); |
61 | struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); | 61 | struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); |
62 | rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | ||
62 | struct inet_bind_hashbucket *bhead; | 63 | struct inet_bind_hashbucket *bhead; |
63 | /* Step 1: Put TW into bind hash. Original socket stays there too. | 64 | /* Step 1: Put TW into bind hash. Original socket stays there too. |
64 | Note, that any socket with inet->num != 0 MUST be bound in | 65 | Note, that any socket with inet->num != 0 MUST be bound in |
@@ -71,7 +72,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
71 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); | 72 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); |
72 | spin_unlock(&bhead->lock); | 73 | spin_unlock(&bhead->lock); |
73 | 74 | ||
74 | write_lock(&ehead->lock); | 75 | write_lock(lock); |
75 | 76 | ||
76 | /* Step 2: Remove SK from established hash. */ | 77 | /* Step 2: Remove SK from established hash. */ |
77 | if (__sk_del_node_init(sk)) | 78 | if (__sk_del_node_init(sk)) |
@@ -81,7 +82,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
81 | inet_twsk_add_node(tw, &ehead->twchain); | 82 | inet_twsk_add_node(tw, &ehead->twchain); |
82 | atomic_inc(&tw->tw_refcnt); | 83 | atomic_inc(&tw->tw_refcnt); |
83 | 84 | ||
84 | write_unlock(&ehead->lock); | 85 | write_unlock(lock); |
85 | } | 86 | } |
86 | 87 | ||
87 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); | 88 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c64072bb504b..8e65182f7af1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2456,11 +2456,11 @@ void __init tcp_init(void) | |||
2456 | thash_entries ? 0 : 512 * 1024); | 2456 | thash_entries ? 0 : 512 * 1024); |
2457 | tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; | 2457 | tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; |
2458 | for (i = 0; i < tcp_hashinfo.ehash_size; i++) { | 2458 | for (i = 0; i < tcp_hashinfo.ehash_size; i++) { |
2459 | rwlock_init(&tcp_hashinfo.ehash[i].lock); | ||
2460 | INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); | 2459 | INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); |
2461 | INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); | 2460 | INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); |
2462 | } | 2461 | } |
2463 | 2462 | if (inet_ehash_locks_alloc(&tcp_hashinfo)) | |
2463 | panic("TCP: failed to alloc ehash_locks"); | ||
2464 | tcp_hashinfo.bhash = | 2464 | tcp_hashinfo.bhash = |
2465 | alloc_large_system_hash("TCP bind", | 2465 | alloc_large_system_hash("TCP bind", |
2466 | sizeof(struct inet_bind_hashbucket), | 2466 | sizeof(struct inet_bind_hashbucket), |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9127cdced20..e566f3c67677 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq) | |||
2049 | struct sock *sk; | 2049 | struct sock *sk; |
2050 | struct hlist_node *node; | 2050 | struct hlist_node *node; |
2051 | struct inet_timewait_sock *tw; | 2051 | struct inet_timewait_sock *tw; |
2052 | rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); | ||
2052 | 2053 | ||
2053 | read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); | 2054 | read_lock_bh(lock); |
2054 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { | 2055 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { |
2055 | if (sk->sk_family != st->family) { | 2056 | if (sk->sk_family != st->family) { |
2056 | continue; | 2057 | continue; |
@@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq) | |||
2067 | rc = tw; | 2068 | rc = tw; |
2068 | goto out; | 2069 | goto out; |
2069 | } | 2070 | } |
2070 | read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); | 2071 | read_unlock_bh(lock); |
2071 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2072 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2072 | } | 2073 | } |
2073 | out: | 2074 | out: |
@@ -2094,11 +2095,11 @@ get_tw: | |||
2094 | cur = tw; | 2095 | cur = tw; |
2095 | goto out; | 2096 | goto out; |
2096 | } | 2097 | } |
2097 | read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); | 2098 | read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2098 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2099 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2099 | 2100 | ||
2100 | if (++st->bucket < tcp_hashinfo.ehash_size) { | 2101 | if (++st->bucket < tcp_hashinfo.ehash_size) { |
2101 | read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); | 2102 | read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2102 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); | 2103 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); |
2103 | } else { | 2104 | } else { |
2104 | cur = NULL; | 2105 | cur = NULL; |
@@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) | |||
2206 | case TCP_SEQ_STATE_TIME_WAIT: | 2207 | case TCP_SEQ_STATE_TIME_WAIT: |
2207 | case TCP_SEQ_STATE_ESTABLISHED: | 2208 | case TCP_SEQ_STATE_ESTABLISHED: |
2208 | if (v) | 2209 | if (v) |
2209 | read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); | 2210 | read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2210 | break; | 2211 | break; |
2211 | } | 2212 | } |
2212 | } | 2213 | } |
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d6f1026f1943..adc73adadfae 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -37,9 +37,8 @@ void __inet6_hash(struct inet_hashinfo *hashinfo, | |||
37 | } else { | 37 | } else { |
38 | unsigned int hash; | 38 | unsigned int hash; |
39 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); | 39 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); |
40 | hash &= (hashinfo->ehash_size - 1); | 40 | list = &inet_ehash_bucket(hashinfo, hash)->chain; |
41 | list = &hashinfo->ehash[hash].chain; | 41 | lock = inet_ehash_lockp(hashinfo, hash); |
42 | lock = &hashinfo->ehash[hash].lock; | ||
43 | write_lock(lock); | 42 | write_lock(lock); |
44 | } | 43 | } |
45 | 44 | ||
@@ -70,9 +69,10 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, | |||
70 | */ | 69 | */ |
71 | unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); | 70 | unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); |
72 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 71 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); |
72 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); | ||
73 | 73 | ||
74 | prefetch(head->chain.first); | 74 | prefetch(head->chain.first); |
75 | read_lock(&head->lock); | 75 | read_lock(lock); |
76 | sk_for_each(sk, node, &head->chain) { | 76 | sk_for_each(sk, node, &head->chain) { |
77 | /* For IPV6 do the cheaper port and family tests first. */ | 77 | /* For IPV6 do the cheaper port and family tests first. */ |
78 | if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) | 78 | if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) |
@@ -92,12 +92,12 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, | |||
92 | goto hit; | 92 | goto hit; |
93 | } | 93 | } |
94 | } | 94 | } |
95 | read_unlock(&head->lock); | 95 | read_unlock(lock); |
96 | return NULL; | 96 | return NULL; |
97 | 97 | ||
98 | hit: | 98 | hit: |
99 | sock_hold(sk); | 99 | sock_hold(sk); |
100 | read_unlock(&head->lock); | 100 | read_unlock(lock); |
101 | return sk; | 101 | return sk; |
102 | } | 102 | } |
103 | EXPORT_SYMBOL(__inet6_lookup_established); | 103 | EXPORT_SYMBOL(__inet6_lookup_established); |
@@ -175,12 +175,13 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, | |||
175 | const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, | 175 | const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, |
176 | inet->dport); | 176 | inet->dport); |
177 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 177 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
178 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); | ||
178 | struct sock *sk2; | 179 | struct sock *sk2; |
179 | const struct hlist_node *node; | 180 | const struct hlist_node *node; |
180 | struct inet_timewait_sock *tw; | 181 | struct inet_timewait_sock *tw; |
181 | 182 | ||
182 | prefetch(head->chain.first); | 183 | prefetch(head->chain.first); |
183 | write_lock(&head->lock); | 184 | write_lock(lock); |
184 | 185 | ||
185 | /* Check TIME-WAIT sockets first. */ | 186 | /* Check TIME-WAIT sockets first. */ |
186 | sk_for_each(sk2, node, &head->twchain) { | 187 | sk_for_each(sk2, node, &head->twchain) { |
@@ -216,7 +217,7 @@ unique: | |||
216 | __sk_add_node(sk, &head->chain); | 217 | __sk_add_node(sk, &head->chain); |
217 | sk->sk_hash = hash; | 218 | sk->sk_hash = hash; |
218 | sock_prot_inc_use(sk->sk_prot); | 219 | sock_prot_inc_use(sk->sk_prot); |
219 | write_unlock(&head->lock); | 220 | write_unlock(lock); |
220 | 221 | ||
221 | if (twp != NULL) { | 222 | if (twp != NULL) { |
222 | *twp = tw; | 223 | *twp = tw; |
@@ -231,7 +232,7 @@ unique: | |||
231 | return 0; | 232 | return 0; |
232 | 233 | ||
233 | not_unique: | 234 | not_unique: |
234 | write_unlock(&head->lock); | 235 | write_unlock(lock); |
235 | return -EADDRNOTAVAIL; | 236 | return -EADDRNOTAVAIL; |
236 | } | 237 | } |
237 | 238 | ||