aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 12:00:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 12:00:47 -0400
commit6c373ca89399c5a3f7ef210ad8f63dc3437da345 (patch)
tree74d1ec65087df1da1021b43ac51acc1ee8601809 /lib
parentbb0fd7ab0986105765d11baa82e619c618a235aa (diff)
parent9f9151412dd7aae0e3f51a89ae4a1f8755fdb4d0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add BQL support to via-rhine, from Tino Reichardt. 2) Integrate SWITCHDEV layer support into the DSA layer, so DSA drivers can support hw switch offloading. From Floria Fainelli. 3) Allow 'ip address' commands to initiate multicast group join/leave, from Madhu Challa. 4) Many ipv4 FIB lookup optimizations from Alexander Duyck. 5) Support EBPF in cls_bpf classifier and act_bpf action, from Daniel Borkmann. 6) Remove the ugly compat support in ARP for ugly layers like ax25, rose, etc. And use this to clean up the neigh layer, then use it to implement MPLS support. All from Eric Biederman. 7) Support L3 forwarding offloading in switches, from Scott Feldman. 8) Collapse the LOCAL and MAIN ipv4 FIB tables when possible, to speed up route lookups even further. From Alexander Duyck. 9) Many improvements and bug fixes to the rhashtable implementation, from Herbert Xu and Thomas Graf. In particular, in the case where an rhashtable user bulk adds a large number of items into an empty table, we expand the table much more sanely. 10) Don't make the tcp_metrics hash table per-namespace, from Eric Biederman. 11) Extend EBPF to access SKB fields, from Alexei Starovoitov. 12) Split out new connection request sockets so that they can be established in the main hash table. Much less false sharing since hash lookups go direct to the request sockets instead of having to go first to the listener then to the request socks hashed underneath. From Eric Dumazet. 13) Add async I/O support for crytpo AF_ALG sockets, from Tadeusz Struk. 14) Support stable privacy address generation for RFC7217 in IPV6. From Hannes Frederic Sowa. 15) Hash network namespace into IP frag IDs, also from Hannes Frederic Sowa. 16) Convert PTP get/set methods to use 64-bit time, from Richard Cochran. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1816 commits) fm10k: Bump driver version to 0.15.2 fm10k: corrected VF multicast update fm10k: mbx_update_max_size does not drop all oversized messages fm10k: reset head instead of calling update_max_size fm10k: renamed mbx_tx_dropped to mbx_tx_oversized fm10k: update xcast mode before synchronizing multicast addresses fm10k: start service timer on probe fm10k: fix function header comment fm10k: comment next_vf_mbx flow fm10k: don't handle mailbox events in iov_event path and always process mailbox fm10k: use separate workqueue for fm10k driver fm10k: Set PF queues to unlimited bandwidth during virtualization fm10k: expose tx_timeout_count as an ethtool stat fm10k: only increment tx_timeout_count in Tx hang path fm10k: remove extraneous "Reset interface" message fm10k: separate PF only stats so that VF does not display them fm10k: use hw->mac.max_queues for stats fm10k: only show actual queues, not the maximum in hardware fm10k: allow creation of VLAN on default vid fm10k: fix unused warnings ...
Diffstat (limited to 'lib')
-rw-r--r--lib/rhashtable.c1022
-rw-r--r--lib/sha1.c1
-rw-r--r--lib/test_rhashtable.c58
3 files changed, 380 insertions, 701 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index b5344ef4c684..4898442b837f 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,13 +1,13 @@
1/* 1/*
2 * Resizable, Scalable, Concurrent Hash Table 2 * Resizable, Scalable, Concurrent Hash Table
3 * 3 *
4 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
4 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> 5 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
5 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 6 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * Based on the following paper:
8 * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
9 *
10 * Code partially derived from nft_hash 8 * Code partially derived from nft_hash
9 * Rewritten with rehash code from br_multicast plus single list
10 * pointer as suggested by Josh Triplett
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -27,121 +27,18 @@
27#include <linux/err.h> 27#include <linux/err.h>
28 28
29#define HASH_DEFAULT_SIZE 64UL 29#define HASH_DEFAULT_SIZE 64UL
30#define HASH_MIN_SIZE 4UL 30#define HASH_MIN_SIZE 4U
31#define BUCKET_LOCKS_PER_CPU 128UL 31#define BUCKET_LOCKS_PER_CPU 128UL
32 32
33/* Base bits plus 1 bit for nulls marker */ 33static u32 head_hashfn(struct rhashtable *ht,
34#define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
35
36enum {
37 RHT_LOCK_NORMAL,
38 RHT_LOCK_NESTED,
39};
40
41/* The bucket lock is selected based on the hash and protects mutations
42 * on a group of hash buckets.
43 *
44 * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
45 * a single lock always covers both buckets which may both contains
46 * entries which link to the same bucket of the old table during resizing.
47 * This allows to simplify the locking as locking the bucket in both
48 * tables during resize always guarantee protection.
49 *
50 * IMPORTANT: When holding the bucket lock of both the old and new table
51 * during expansions and shrinking, the old bucket lock must always be
52 * acquired first.
53 */
54static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
55{
56 return &tbl->locks[hash & tbl->locks_mask];
57}
58
59static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
60{
61 return (void *) he - ht->p.head_offset;
62}
63
64static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
65{
66 return hash & (tbl->size - 1);
67}
68
69static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
70{
71 u32 hash;
72
73 if (unlikely(!ht->p.key_len))
74 hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
75 else
76 hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
77 ht->p.hash_rnd);
78
79 return hash >> HASH_RESERVED_SPACE;
80}
81
82static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
83{
84 return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE;
85}
86
87static u32 head_hashfn(const struct rhashtable *ht,
88 const struct bucket_table *tbl, 34 const struct bucket_table *tbl,
89 const struct rhash_head *he) 35 const struct rhash_head *he)
90{ 36{
91 return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); 37 return rht_head_hashfn(ht, tbl, he, ht->p);
92} 38}
93 39
94#ifdef CONFIG_PROVE_LOCKING 40#ifdef CONFIG_PROVE_LOCKING
95static void debug_dump_buckets(const struct rhashtable *ht,
96 const struct bucket_table *tbl)
97{
98 struct rhash_head *he;
99 unsigned int i, hash;
100
101 for (i = 0; i < tbl->size; i++) {
102 pr_warn(" [Bucket %d] ", i);
103 rht_for_each_rcu(he, tbl, i) {
104 hash = head_hashfn(ht, tbl, he);
105 pr_cont("[hash = %#x, lock = %p] ",
106 hash, bucket_lock(tbl, hash));
107 }
108 pr_cont("\n");
109 }
110
111}
112
113static void debug_dump_table(struct rhashtable *ht,
114 const struct bucket_table *tbl,
115 unsigned int hash)
116{
117 struct bucket_table *old_tbl, *future_tbl;
118
119 pr_emerg("BUG: lock for hash %#x in table %p not held\n",
120 hash, tbl);
121
122 rcu_read_lock();
123 future_tbl = rht_dereference_rcu(ht->future_tbl, ht);
124 old_tbl = rht_dereference_rcu(ht->tbl, ht);
125 if (future_tbl != old_tbl) {
126 pr_warn("Future table %p (size: %zd)\n",
127 future_tbl, future_tbl->size);
128 debug_dump_buckets(ht, future_tbl);
129 }
130
131 pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size);
132 debug_dump_buckets(ht, old_tbl);
133
134 rcu_read_unlock();
135}
136
137#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 41#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
138#define ASSERT_BUCKET_LOCK(HT, TBL, HASH) \
139 do { \
140 if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) { \
141 debug_dump_table(HT, TBL, HASH); \
142 BUG(); \
143 } \
144 } while (0)
145 42
146int lockdep_rht_mutex_is_held(struct rhashtable *ht) 43int lockdep_rht_mutex_is_held(struct rhashtable *ht)
147{ 44{
@@ -151,30 +48,18 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
151 48
152int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) 49int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
153{ 50{
154 spinlock_t *lock = bucket_lock(tbl, hash); 51 spinlock_t *lock = rht_bucket_lock(tbl, hash);
155 52
156 return (debug_locks) ? lockdep_is_held(lock) : 1; 53 return (debug_locks) ? lockdep_is_held(lock) : 1;
157} 54}
158EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); 55EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
159#else 56#else
160#define ASSERT_RHT_MUTEX(HT) 57#define ASSERT_RHT_MUTEX(HT)
161#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)
162#endif 58#endif
163 59
164 60
165static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n) 61static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
166{ 62 gfp_t gfp)
167 struct rhash_head __rcu **pprev;
168
169 for (pprev = &tbl->buckets[n];
170 !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n));
171 pprev = &rht_dereference_bucket(*pprev, tbl, n)->next)
172 ;
173
174 return pprev;
175}
176
177static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
178{ 63{
179 unsigned int i, size; 64 unsigned int i, size;
180#if defined(CONFIG_PROVE_LOCKING) 65#if defined(CONFIG_PROVE_LOCKING)
@@ -191,12 +76,13 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
191 76
192 if (sizeof(spinlock_t) != 0) { 77 if (sizeof(spinlock_t) != 0) {
193#ifdef CONFIG_NUMA 78#ifdef CONFIG_NUMA
194 if (size * sizeof(spinlock_t) > PAGE_SIZE) 79 if (size * sizeof(spinlock_t) > PAGE_SIZE &&
80 gfp == GFP_KERNEL)
195 tbl->locks = vmalloc(size * sizeof(spinlock_t)); 81 tbl->locks = vmalloc(size * sizeof(spinlock_t));
196 else 82 else
197#endif 83#endif
198 tbl->locks = kmalloc_array(size, sizeof(spinlock_t), 84 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
199 GFP_KERNEL); 85 gfp);
200 if (!tbl->locks) 86 if (!tbl->locks)
201 return -ENOMEM; 87 return -ENOMEM;
202 for (i = 0; i < size; i++) 88 for (i = 0; i < size; i++)
@@ -215,153 +101,181 @@ static void bucket_table_free(const struct bucket_table *tbl)
215 kvfree(tbl); 101 kvfree(tbl);
216} 102}
217 103
104static void bucket_table_free_rcu(struct rcu_head *head)
105{
106 bucket_table_free(container_of(head, struct bucket_table, rcu));
107}
108
218static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, 109static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
219 size_t nbuckets) 110 size_t nbuckets,
111 gfp_t gfp)
220{ 112{
221 struct bucket_table *tbl = NULL; 113 struct bucket_table *tbl = NULL;
222 size_t size; 114 size_t size;
223 int i; 115 int i;
224 116
225 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); 117 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
226 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) 118 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
227 tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); 119 gfp != GFP_KERNEL)
228 if (tbl == NULL) 120 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
121 if (tbl == NULL && gfp == GFP_KERNEL)
229 tbl = vzalloc(size); 122 tbl = vzalloc(size);
230 if (tbl == NULL) 123 if (tbl == NULL)
231 return NULL; 124 return NULL;
232 125
233 tbl->size = nbuckets; 126 tbl->size = nbuckets;
234 127
235 if (alloc_bucket_locks(ht, tbl) < 0) { 128 if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
236 bucket_table_free(tbl); 129 bucket_table_free(tbl);
237 return NULL; 130 return NULL;
238 } 131 }
239 132
133 INIT_LIST_HEAD(&tbl->walkers);
134
135 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
136
240 for (i = 0; i < nbuckets; i++) 137 for (i = 0; i < nbuckets; i++)
241 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); 138 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
242 139
243 return tbl; 140 return tbl;
244} 141}
245 142
246/** 143static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
247 * rht_grow_above_75 - returns true if nelems > 0.75 * table-size 144 struct bucket_table *tbl)
248 * @ht: hash table
249 * @new_size: new table size
250 */
251static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
252{ 145{
253 /* Expand table when exceeding 75% load */ 146 struct bucket_table *new_tbl;
254 return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
255 (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift);
256}
257 147
258/** 148 do {
259 * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size 149 new_tbl = tbl;
260 * @ht: hash table 150 tbl = rht_dereference_rcu(tbl->future_tbl, ht);
261 * @new_size: new table size 151 } while (tbl);
262 */
263static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
264{
265 /* Shrink table beneath 30% load */
266 return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
267 (atomic_read(&ht->shift) > ht->p.min_shift);
268}
269 152
270static void lock_buckets(struct bucket_table *new_tbl, 153 return new_tbl;
271 struct bucket_table *old_tbl, unsigned int hash)
272 __acquires(old_bucket_lock)
273{
274 spin_lock_bh(bucket_lock(old_tbl, hash));
275 if (new_tbl != old_tbl)
276 spin_lock_bh_nested(bucket_lock(new_tbl, hash),
277 RHT_LOCK_NESTED);
278} 154}
279 155
280static void unlock_buckets(struct bucket_table *new_tbl, 156static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
281 struct bucket_table *old_tbl, unsigned int hash)
282 __releases(old_bucket_lock)
283{ 157{
284 if (new_tbl != old_tbl) 158 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
285 spin_unlock_bh(bucket_lock(new_tbl, hash)); 159 struct bucket_table *new_tbl = rhashtable_last_table(ht,
286 spin_unlock_bh(bucket_lock(old_tbl, hash)); 160 rht_dereference_rcu(old_tbl->future_tbl, ht));
161 struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
162 int err = -ENOENT;
163 struct rhash_head *head, *next, *entry;
164 spinlock_t *new_bucket_lock;
165 unsigned int new_hash;
166
167 rht_for_each(entry, old_tbl, old_hash) {
168 err = 0;
169 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
170
171 if (rht_is_a_nulls(next))
172 break;
173
174 pprev = &entry->next;
175 }
176
177 if (err)
178 goto out;
179
180 new_hash = head_hashfn(ht, new_tbl, entry);
181
182 new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
183
184 spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
185 head = rht_dereference_bucket(new_tbl->buckets[new_hash],
186 new_tbl, new_hash);
187
188 if (rht_is_a_nulls(head))
189 INIT_RHT_NULLS_HEAD(entry->next, ht, new_hash);
190 else
191 RCU_INIT_POINTER(entry->next, head);
192
193 rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
194 spin_unlock(new_bucket_lock);
195
196 rcu_assign_pointer(*pprev, next);
197
198out:
199 return err;
287} 200}
288 201
289/** 202static void rhashtable_rehash_chain(struct rhashtable *ht,
290 * Unlink entries on bucket which hash to different bucket. 203 unsigned int old_hash)
291 *
292 * Returns true if no more work needs to be performed on the bucket.
293 */
294static bool hashtable_chain_unzip(struct rhashtable *ht,
295 const struct bucket_table *new_tbl,
296 struct bucket_table *old_tbl,
297 size_t old_hash)
298{ 204{
299 struct rhash_head *he, *p, *next; 205 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
300 unsigned int new_hash, new_hash2; 206 spinlock_t *old_bucket_lock;
301
302 ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash);
303 207
304 /* Old bucket empty, no work needed. */ 208 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
305 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
306 old_hash);
307 if (rht_is_a_nulls(p))
308 return false;
309 209
310 new_hash = head_hashfn(ht, new_tbl, p); 210 spin_lock_bh(old_bucket_lock);
311 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); 211 while (!rhashtable_rehash_one(ht, old_hash))
212 ;
213 old_tbl->rehash++;
214 spin_unlock_bh(old_bucket_lock);
215}
312 216
313 /* Advance the old bucket pointer one or more times until it 217static int rhashtable_rehash_attach(struct rhashtable *ht,
314 * reaches a node that doesn't hash to the same bucket as the 218 struct bucket_table *old_tbl,
315 * previous node p. Call the previous node p; 219 struct bucket_table *new_tbl)
316 */ 220{
317 rht_for_each_continue(he, p->next, old_tbl, old_hash) { 221 /* Protect future_tbl using the first bucket lock. */
318 new_hash2 = head_hashfn(ht, new_tbl, he); 222 spin_lock_bh(old_tbl->locks);
319 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2);
320 223
321 if (new_hash != new_hash2) 224 /* Did somebody beat us to it? */
322 break; 225 if (rcu_access_pointer(old_tbl->future_tbl)) {
323 p = he; 226 spin_unlock_bh(old_tbl->locks);
227 return -EEXIST;
324 } 228 }
325 rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
326 229
327 /* Find the subsequent node which does hash to the same 230 /* Make insertions go into the new, empty table right away. Deletions
328 * bucket as node P, or NULL if no such node exists. 231 * and lookups will be attempted in both tables until we synchronize.
329 */ 232 */
330 INIT_RHT_NULLS_HEAD(next, ht, old_hash); 233 rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
331 if (!rht_is_a_nulls(he)) {
332 rht_for_each_continue(he, he->next, old_tbl, old_hash) {
333 if (head_hashfn(ht, new_tbl, he) == new_hash) {
334 next = he;
335 break;
336 }
337 }
338 }
339 234
340 /* Set p's next pointer to that subsequent node pointer, 235 /* Ensure the new table is visible to readers. */
341 * bypassing the nodes which do not hash to p's bucket 236 smp_wmb();
342 */
343 rcu_assign_pointer(p->next, next);
344 237
345 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, 238 spin_unlock_bh(old_tbl->locks);
346 old_hash);
347 239
348 return !rht_is_a_nulls(p); 240 return 0;
349} 241}
350 242
351static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl, 243static int rhashtable_rehash_table(struct rhashtable *ht)
352 unsigned int new_hash, struct rhash_head *entry)
353{ 244{
354 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); 245 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
246 struct bucket_table *new_tbl;
247 struct rhashtable_walker *walker;
248 unsigned int old_hash;
249
250 new_tbl = rht_dereference(old_tbl->future_tbl, ht);
251 if (!new_tbl)
252 return 0;
253
254 for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
255 rhashtable_rehash_chain(ht, old_hash);
256
257 /* Publish the new table pointer. */
258 rcu_assign_pointer(ht->tbl, new_tbl);
259
260 spin_lock(&ht->lock);
261 list_for_each_entry(walker, &old_tbl->walkers, list)
262 walker->tbl = NULL;
263 spin_unlock(&ht->lock);
355 264
356 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry); 265 /* Wait for readers. All new readers will see the new
266 * table, and thus no references to the old table will
267 * remain.
268 */
269 call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
270
271 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
357} 272}
358 273
359/** 274/**
360 * rhashtable_expand - Expand hash table while allowing concurrent lookups 275 * rhashtable_expand - Expand hash table while allowing concurrent lookups
361 * @ht: the hash table to expand 276 * @ht: the hash table to expand
362 * 277 *
363 * A secondary bucket array is allocated and the hash entries are migrated 278 * A secondary bucket array is allocated and the hash entries are migrated.
364 * while keeping them on both lists until the end of the RCU grace period.
365 * 279 *
366 * This function may only be called in a context where it is safe to call 280 * This function may only be called in a context where it is safe to call
367 * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 281 * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
@@ -372,89 +286,32 @@ static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
372 * It is valid to have concurrent insertions and deletions protected by per 286 * It is valid to have concurrent insertions and deletions protected by per
373 * bucket locks or concurrent RCU protected lookups and traversals. 287 * bucket locks or concurrent RCU protected lookups and traversals.
374 */ 288 */
375int rhashtable_expand(struct rhashtable *ht) 289static int rhashtable_expand(struct rhashtable *ht)
376{ 290{
377 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 291 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
378 struct rhash_head *he; 292 int err;
379 unsigned int new_hash, old_hash;
380 bool complete = false;
381 293
382 ASSERT_RHT_MUTEX(ht); 294 ASSERT_RHT_MUTEX(ht);
383 295
384 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); 296 old_tbl = rhashtable_last_table(ht, old_tbl);
297
298 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
385 if (new_tbl == NULL) 299 if (new_tbl == NULL)
386 return -ENOMEM; 300 return -ENOMEM;
387 301
388 atomic_inc(&ht->shift); 302 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
389 303 if (err)
390 /* Make insertions go into the new, empty table right away. Deletions 304 bucket_table_free(new_tbl);
391 * and lookups will be attempted in both tables until we synchronize.
392 * The synchronize_rcu() guarantees for the new table to be picked up
393 * so no new additions go into the old table while we relink.
394 */
395 rcu_assign_pointer(ht->future_tbl, new_tbl);
396 synchronize_rcu();
397
398 /* For each new bucket, search the corresponding old bucket for the
399 * first entry that hashes to the new bucket, and link the end of
400 * newly formed bucket chain (containing entries added to future
401 * table) to that entry. Since all the entries which will end up in
402 * the new bucket appear in the same old bucket, this constructs an
403 * entirely valid new hash table, but with multiple buckets
404 * "zipped" together into a single imprecise chain.
405 */
406 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
407 old_hash = rht_bucket_index(old_tbl, new_hash);
408 lock_buckets(new_tbl, old_tbl, new_hash);
409 rht_for_each(he, old_tbl, old_hash) {
410 if (head_hashfn(ht, new_tbl, he) == new_hash) {
411 link_old_to_new(ht, new_tbl, new_hash, he);
412 break;
413 }
414 }
415 unlock_buckets(new_tbl, old_tbl, new_hash);
416 cond_resched();
417 }
418
419 /* Unzip interleaved hash chains */
420 while (!complete && !ht->being_destroyed) {
421 /* Wait for readers. All new readers will see the new
422 * table, and thus no references to the old table will
423 * remain.
424 */
425 synchronize_rcu();
426
427 /* For each bucket in the old table (each of which
428 * contains items from multiple buckets of the new
429 * table): ...
430 */
431 complete = true;
432 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
433 lock_buckets(new_tbl, old_tbl, old_hash);
434
435 if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
436 old_hash))
437 complete = false;
438
439 unlock_buckets(new_tbl, old_tbl, old_hash);
440 cond_resched();
441 }
442 }
443 305
444 rcu_assign_pointer(ht->tbl, new_tbl); 306 return err;
445 synchronize_rcu();
446
447 bucket_table_free(old_tbl);
448 return 0;
449} 307}
450EXPORT_SYMBOL_GPL(rhashtable_expand);
451 308
452/** 309/**
453 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups 310 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
454 * @ht: the hash table to shrink 311 * @ht: the hash table to shrink
455 * 312 *
456 * This function may only be called in a context where it is safe to call 313 * This function shrinks the hash table to fit, i.e., the smallest
457 * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 314 * size would not cause it to expand right away automatically.
458 * 315 *
459 * The caller must ensure that no concurrent resizing occurs by holding 316 * The caller must ensure that no concurrent resizing occurs by holding
460 * ht->mutex. 317 * ht->mutex.
@@ -465,395 +322,146 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
465 * It is valid to have concurrent insertions and deletions protected by per 322 * It is valid to have concurrent insertions and deletions protected by per
466 * bucket locks or concurrent RCU protected lookups and traversals. 323 * bucket locks or concurrent RCU protected lookups and traversals.
467 */ 324 */
468int rhashtable_shrink(struct rhashtable *ht) 325static int rhashtable_shrink(struct rhashtable *ht)
469{ 326{
470 struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); 327 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
471 unsigned int new_hash; 328 unsigned int size;
329 int err;
472 330
473 ASSERT_RHT_MUTEX(ht); 331 ASSERT_RHT_MUTEX(ht);
474 332
475 new_tbl = bucket_table_alloc(ht, tbl->size / 2); 333 size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
476 if (new_tbl == NULL) 334 if (size < ht->p.min_size)
477 return -ENOMEM; 335 size = ht->p.min_size;
478
479 rcu_assign_pointer(ht->future_tbl, new_tbl);
480 synchronize_rcu();
481
482 /* Link the first entry in the old bucket to the end of the
483 * bucket in the new table. As entries are concurrently being
484 * added to the new table, lock down the new bucket. As we
485 * always divide the size in half when shrinking, each bucket
486 * in the new table maps to exactly two buckets in the old
487 * table.
488 */
489 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
490 lock_buckets(new_tbl, tbl, new_hash);
491
492 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
493 tbl->buckets[new_hash]);
494 ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size);
495 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
496 tbl->buckets[new_hash + new_tbl->size]);
497 336
498 unlock_buckets(new_tbl, tbl, new_hash); 337 if (old_tbl->size <= size)
499 cond_resched(); 338 return 0;
500 }
501 339
502 /* Publish the new, valid hash table */ 340 if (rht_dereference(old_tbl->future_tbl, ht))
503 rcu_assign_pointer(ht->tbl, new_tbl); 341 return -EEXIST;
504 atomic_dec(&ht->shift);
505 342
506 /* Wait for readers. No new readers will have references to the 343 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
507 * old hash table. 344 if (new_tbl == NULL)
508 */ 345 return -ENOMEM;
509 synchronize_rcu();
510 346
511 bucket_table_free(tbl); 347 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
348 if (err)
349 bucket_table_free(new_tbl);
512 350
513 return 0; 351 return err;
514} 352}
515EXPORT_SYMBOL_GPL(rhashtable_shrink);
516 353
517static void rht_deferred_worker(struct work_struct *work) 354static void rht_deferred_worker(struct work_struct *work)
518{ 355{
519 struct rhashtable *ht; 356 struct rhashtable *ht;
520 struct bucket_table *tbl; 357 struct bucket_table *tbl;
521 struct rhashtable_walker *walker; 358 int err = 0;
522 359
523 ht = container_of(work, struct rhashtable, run_work); 360 ht = container_of(work, struct rhashtable, run_work);
524 mutex_lock(&ht->mutex); 361 mutex_lock(&ht->mutex);
525 if (ht->being_destroyed)
526 goto unlock;
527 362
528 tbl = rht_dereference(ht->tbl, ht); 363 tbl = rht_dereference(ht->tbl, ht);
364 tbl = rhashtable_last_table(ht, tbl);
529 365
530 list_for_each_entry(walker, &ht->walkers, list) 366 if (rht_grow_above_75(ht, tbl))
531 walker->resize = true;
532
533 if (rht_grow_above_75(ht, tbl->size))
534 rhashtable_expand(ht); 367 rhashtable_expand(ht);
535 else if (rht_shrink_below_30(ht, tbl->size)) 368 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
536 rhashtable_shrink(ht); 369 rhashtable_shrink(ht);
537unlock:
538 mutex_unlock(&ht->mutex);
539}
540 370
541static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, 371 err = rhashtable_rehash_table(ht);
542 struct bucket_table *tbl,
543 const struct bucket_table *old_tbl, u32 hash)
544{
545 bool no_resize_running = tbl == old_tbl;
546 struct rhash_head *head;
547 372
548 hash = rht_bucket_index(tbl, hash); 373 mutex_unlock(&ht->mutex);
549 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
550
551 ASSERT_BUCKET_LOCK(ht, tbl, hash);
552
553 if (rht_is_a_nulls(head))
554 INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
555 else
556 RCU_INIT_POINTER(obj->next, head);
557
558 rcu_assign_pointer(tbl->buckets[hash], obj);
559 374
560 atomic_inc(&ht->nelems); 375 if (err)
561 if (no_resize_running && rht_grow_above_75(ht, tbl->size))
562 schedule_work(&ht->run_work); 376 schedule_work(&ht->run_work);
563} 377}
564 378
565/** 379static bool rhashtable_check_elasticity(struct rhashtable *ht,
566 * rhashtable_insert - insert object into hash table 380 struct bucket_table *tbl,
567 * @ht: hash table 381 unsigned int hash)
568 * @obj: pointer to hash head inside object
569 *
570 * Will take a per bucket spinlock to protect against mutual mutations
571 * on the same bucket. Multiple insertions may occur in parallel unless
572 * they map to the same bucket lock.
573 *
574 * It is safe to call this function from atomic context.
575 *
576 * Will trigger an automatic deferred table resizing if the size grows
577 * beyond the watermark indicated by grow_decision() which can be passed
578 * to rhashtable_init().
579 */
580void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
581{ 382{
582 struct bucket_table *tbl, *old_tbl; 383 unsigned int elasticity = ht->elasticity;
583 unsigned hash; 384 struct rhash_head *head;
584
585 rcu_read_lock();
586
587 tbl = rht_dereference_rcu(ht->future_tbl, ht);
588 old_tbl = rht_dereference_rcu(ht->tbl, ht);
589 hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
590 385
591 lock_buckets(tbl, old_tbl, hash); 386 rht_for_each(head, tbl, hash)
592 __rhashtable_insert(ht, obj, tbl, old_tbl, hash); 387 if (!--elasticity)
593 unlock_buckets(tbl, old_tbl, hash); 388 return true;
594 389
595 rcu_read_unlock(); 390 return false;
596} 391}
597EXPORT_SYMBOL_GPL(rhashtable_insert);
598 392
599/** 393int rhashtable_insert_rehash(struct rhashtable *ht)
600 * rhashtable_remove - remove object from hash table
601 * @ht: hash table
602 * @obj: pointer to hash head inside object
603 *
604 * Since the hash chain is single linked, the removal operation needs to
605 * walk the bucket chain upon removal. The removal operation is thus
606 * considerable slow if the hash table is not correctly sized.
607 *
608 * Will automatically shrink the table via rhashtable_expand() if the
609 * shrink_decision function specified at rhashtable_init() returns true.
610 *
611 * The caller must ensure that no concurrent table mutations occur. It is
612 * however valid to have concurrent lookups if they are RCU protected.
613 */
614bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
615{ 394{
616 struct bucket_table *tbl, *new_tbl, *old_tbl; 395 struct bucket_table *old_tbl;
617 struct rhash_head __rcu **pprev; 396 struct bucket_table *new_tbl;
618 struct rhash_head *he, *he2; 397 struct bucket_table *tbl;
619 unsigned int hash, new_hash; 398 unsigned int size;
620 bool ret = false; 399 int err;
621 400
622 rcu_read_lock();
623 old_tbl = rht_dereference_rcu(ht->tbl, ht); 401 old_tbl = rht_dereference_rcu(ht->tbl, ht);
624 tbl = new_tbl = rht_dereference_rcu(ht->future_tbl, ht); 402 tbl = rhashtable_last_table(ht, old_tbl);
625 new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
626
627 lock_buckets(new_tbl, old_tbl, new_hash);
628restart:
629 hash = rht_bucket_index(tbl, new_hash);
630 pprev = &tbl->buckets[hash];
631 rht_for_each(he, tbl, hash) {
632 if (he != obj) {
633 pprev = &he->next;
634 continue;
635 }
636
637 ASSERT_BUCKET_LOCK(ht, tbl, hash);
638
639 if (old_tbl->size > new_tbl->size && tbl == old_tbl &&
640 !rht_is_a_nulls(obj->next) &&
641 head_hashfn(ht, tbl, obj->next) != hash) {
642 rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
643 } else if (unlikely(old_tbl->size < new_tbl->size && tbl == new_tbl)) {
644 rht_for_each_continue(he2, obj->next, tbl, hash) {
645 if (head_hashfn(ht, tbl, he2) == hash) {
646 rcu_assign_pointer(*pprev, he2);
647 goto found;
648 }
649 }
650
651 rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
652 } else {
653 rcu_assign_pointer(*pprev, obj->next);
654 }
655
656found:
657 ret = true;
658 break;
659 }
660
661 /* The entry may be linked in either 'tbl', 'future_tbl', or both.
662 * 'future_tbl' only exists for a short period of time during
663 * resizing. Thus traversing both is fine and the added cost is
664 * very rare.
665 */
666 if (tbl != old_tbl) {
667 tbl = old_tbl;
668 goto restart;
669 }
670
671 unlock_buckets(new_tbl, old_tbl, new_hash);
672
673 if (ret) {
674 bool no_resize_running = new_tbl == old_tbl;
675
676 atomic_dec(&ht->nelems);
677 if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size))
678 schedule_work(&ht->run_work);
679 }
680
681 rcu_read_unlock();
682 403
683 return ret; 404 size = tbl->size;
684}
685EXPORT_SYMBOL_GPL(rhashtable_remove);
686
687struct rhashtable_compare_arg {
688 struct rhashtable *ht;
689 const void *key;
690};
691
692static bool rhashtable_compare(void *ptr, void *arg)
693{
694 struct rhashtable_compare_arg *x = arg;
695 struct rhashtable *ht = x->ht;
696
697 return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len);
698}
699
700/**
701 * rhashtable_lookup - lookup key in hash table
702 * @ht: hash table
703 * @key: pointer to key
704 *
705 * Computes the hash value for the key and traverses the bucket chain looking
706 * for a entry with an identical key. The first matching entry is returned.
707 *
708 * This lookup function may only be used for fixed key hash table (key_len
709 * parameter set). It will BUG() if used inappropriately.
710 *
711 * Lookups may occur in parallel with hashtable mutations and resizing.
712 */
713void *rhashtable_lookup(struct rhashtable *ht, const void *key)
714{
715 struct rhashtable_compare_arg arg = {
716 .ht = ht,
717 .key = key,
718 };
719
720 BUG_ON(!ht->p.key_len);
721
722 return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg);
723}
724EXPORT_SYMBOL_GPL(rhashtable_lookup);
725
726/**
727 * rhashtable_lookup_compare - search hash table with compare function
728 * @ht: hash table
729 * @key: the pointer to the key
730 * @compare: compare function, must return true on match
731 * @arg: argument passed on to compare function
732 *
733 * Traverses the bucket chain behind the provided hash value and calls the
734 * specified compare function for each entry.
735 *
736 * Lookups may occur in parallel with hashtable mutations and resizing.
737 *
738 * Returns the first entry on which the compare function returned true.
739 */
740void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
741 bool (*compare)(void *, void *), void *arg)
742{
743 const struct bucket_table *tbl, *old_tbl;
744 struct rhash_head *he;
745 u32 hash;
746 405
747 rcu_read_lock(); 406 if (rht_grow_above_75(ht, tbl))
407 size *= 2;
408 /* More than two rehashes (not resizes) detected. */
409 else if (WARN_ON(old_tbl != tbl && old_tbl->size == size))
410 return -EBUSY;
748 411
749 old_tbl = rht_dereference_rcu(ht->tbl, ht); 412 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
750 tbl = rht_dereference_rcu(ht->future_tbl, ht); 413 if (new_tbl == NULL)
751 hash = key_hashfn(ht, key, ht->p.key_len); 414 return -ENOMEM;
752restart:
753 rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
754 if (!compare(rht_obj(ht, he), arg))
755 continue;
756 rcu_read_unlock();
757 return rht_obj(ht, he);
758 }
759 415
760 if (unlikely(tbl != old_tbl)) { 416 err = rhashtable_rehash_attach(ht, tbl, new_tbl);
761 tbl = old_tbl; 417 if (err) {
762 goto restart; 418 bucket_table_free(new_tbl);
763 } 419 if (err == -EEXIST)
764 rcu_read_unlock(); 420 err = 0;
421 } else
422 schedule_work(&ht->run_work);
765 423
766 return NULL; 424 return err;
767} 425}
768EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); 426EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
769 427
770/** 428int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
771 * rhashtable_lookup_insert - lookup and insert object into hash table 429 struct rhash_head *obj,
772 * @ht: hash table 430 struct bucket_table *tbl)
773 * @obj: pointer to hash head inside object
774 *
775 * Locks down the bucket chain in both the old and new table if a resize
776 * is in progress to ensure that writers can't remove from the old table
777 * and can't insert to the new table during the atomic operation of search
778 * and insertion. Searches for duplicates in both the old and new table if
779 * a resize is in progress.
780 *
781 * This lookup function may only be used for fixed key hash table (key_len
782 * parameter set). It will BUG() if used inappropriately.
783 *
784 * It is safe to call this function from atomic context.
785 *
786 * Will trigger an automatic deferred table resizing if the size grows
787 * beyond the watermark indicated by grow_decision() which can be passed
788 * to rhashtable_init().
789 */
790bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
791{ 431{
792 struct rhashtable_compare_arg arg = { 432 struct rhash_head *head;
793 .ht = ht, 433 unsigned int hash;
794 .key = rht_obj(ht, obj) + ht->p.key_offset, 434 int err;
795 };
796 435
797 BUG_ON(!ht->p.key_len); 436 tbl = rhashtable_last_table(ht, tbl);
437 hash = head_hashfn(ht, tbl, obj);
438 spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
798 439
799 return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, 440 err = -EEXIST;
800 &arg); 441 if (key && rhashtable_lookup_fast(ht, key, ht->p))
801} 442 goto exit;
802EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
803 443
804/** 444 err = -EAGAIN;
805 * rhashtable_lookup_compare_insert - search and insert object to hash table 445 if (rhashtable_check_elasticity(ht, tbl, hash) ||
806 * with compare function 446 rht_grow_above_100(ht, tbl))
807 * @ht: hash table 447 goto exit;
808 * @obj: pointer to hash head inside object
809 * @compare: compare function, must return true on match
810 * @arg: argument passed on to compare function
811 *
812 * Locks down the bucket chain in both the old and new table if a resize
813 * is in progress to ensure that writers can't remove from the old table
814 * and can't insert to the new table during the atomic operation of search
815 * and insertion. Searches for duplicates in both the old and new table if
816 * a resize is in progress.
817 *
818 * Lookups may occur in parallel with hashtable mutations and resizing.
819 *
820 * Will trigger an automatic deferred table resizing if the size grows
821 * beyond the watermark indicated by grow_decision() which can be passed
822 * to rhashtable_init().
823 */
824bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
825 struct rhash_head *obj,
826 bool (*compare)(void *, void *),
827 void *arg)
828{
829 struct bucket_table *new_tbl, *old_tbl;
830 u32 new_hash;
831 bool success = true;
832 448
833 BUG_ON(!ht->p.key_len); 449 err = 0;
834 450
835 rcu_read_lock(); 451 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
836 old_tbl = rht_dereference_rcu(ht->tbl, ht);
837 new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
838 new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
839 452
840 lock_buckets(new_tbl, old_tbl, new_hash); 453 RCU_INIT_POINTER(obj->next, head);
841 454
842 if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, 455 rcu_assign_pointer(tbl->buckets[hash], obj);
843 compare, arg)) {
844 success = false;
845 goto exit;
846 }
847 456
848 __rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash); 457 atomic_inc(&ht->nelems);
849 458
850exit: 459exit:
851 unlock_buckets(new_tbl, old_tbl, new_hash); 460 spin_unlock(rht_bucket_lock(tbl, hash));
852 rcu_read_unlock();
853 461
854 return success; 462 return err;
855} 463}
856EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); 464EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
857 465
858/** 466/**
859 * rhashtable_walk_init - Initialise an iterator 467 * rhashtable_walk_init - Initialise an iterator
@@ -887,11 +495,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
887 if (!iter->walker) 495 if (!iter->walker)
888 return -ENOMEM; 496 return -ENOMEM;
889 497
890 INIT_LIST_HEAD(&iter->walker->list);
891 iter->walker->resize = false;
892
893 mutex_lock(&ht->mutex); 498 mutex_lock(&ht->mutex);
894 list_add(&iter->walker->list, &ht->walkers); 499 iter->walker->tbl = rht_dereference(ht->tbl, ht);
500 list_add(&iter->walker->list, &iter->walker->tbl->walkers);
895 mutex_unlock(&ht->mutex); 501 mutex_unlock(&ht->mutex);
896 502
897 return 0; 503 return 0;
@@ -907,7 +513,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
907void rhashtable_walk_exit(struct rhashtable_iter *iter) 513void rhashtable_walk_exit(struct rhashtable_iter *iter)
908{ 514{
909 mutex_lock(&iter->ht->mutex); 515 mutex_lock(&iter->ht->mutex);
910 list_del(&iter->walker->list); 516 if (iter->walker->tbl)
517 list_del(&iter->walker->list);
911 mutex_unlock(&iter->ht->mutex); 518 mutex_unlock(&iter->ht->mutex);
912 kfree(iter->walker); 519 kfree(iter->walker);
913} 520}
@@ -928,13 +535,21 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
928 * by calling rhashtable_walk_next. 535 * by calling rhashtable_walk_next.
929 */ 536 */
930int rhashtable_walk_start(struct rhashtable_iter *iter) 537int rhashtable_walk_start(struct rhashtable_iter *iter)
538 __acquires(RCU)
931{ 539{
540 struct rhashtable *ht = iter->ht;
541
542 mutex_lock(&ht->mutex);
543
544 if (iter->walker->tbl)
545 list_del(&iter->walker->list);
546
932 rcu_read_lock(); 547 rcu_read_lock();
933 548
934 if (iter->walker->resize) { 549 mutex_unlock(&ht->mutex);
935 iter->slot = 0; 550
936 iter->skip = 0; 551 if (!iter->walker->tbl) {
937 iter->walker->resize = false; 552 iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
938 return -EAGAIN; 553 return -EAGAIN;
939 } 554 }
940 555
@@ -956,13 +571,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
956 */ 571 */
957void *rhashtable_walk_next(struct rhashtable_iter *iter) 572void *rhashtable_walk_next(struct rhashtable_iter *iter)
958{ 573{
959 const struct bucket_table *tbl; 574 struct bucket_table *tbl = iter->walker->tbl;
960 struct rhashtable *ht = iter->ht; 575 struct rhashtable *ht = iter->ht;
961 struct rhash_head *p = iter->p; 576 struct rhash_head *p = iter->p;
962 void *obj = NULL; 577 void *obj = NULL;
963 578
964 tbl = rht_dereference_rcu(ht->tbl, ht);
965
966 if (p) { 579 if (p) {
967 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); 580 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
968 goto next; 581 goto next;
@@ -988,17 +601,20 @@ next:
988 iter->skip = 0; 601 iter->skip = 0;
989 } 602 }
990 603
991 iter->p = NULL; 604 /* Ensure we see any new tables. */
605 smp_rmb();
992 606
993out: 607 iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
994 if (iter->walker->resize) { 608 if (iter->walker->tbl) {
995 iter->p = NULL;
996 iter->slot = 0; 609 iter->slot = 0;
997 iter->skip = 0; 610 iter->skip = 0;
998 iter->walker->resize = false;
999 return ERR_PTR(-EAGAIN); 611 return ERR_PTR(-EAGAIN);
1000 } 612 }
1001 613
614 iter->p = NULL;
615
616out:
617
1002 return obj; 618 return obj;
1003} 619}
1004EXPORT_SYMBOL_GPL(rhashtable_walk_next); 620EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -1010,16 +626,39 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
1010 * Finish a hash table walk. 626 * Finish a hash table walk.
1011 */ 627 */
1012void rhashtable_walk_stop(struct rhashtable_iter *iter) 628void rhashtable_walk_stop(struct rhashtable_iter *iter)
629 __releases(RCU)
1013{ 630{
1014 rcu_read_unlock(); 631 struct rhashtable *ht;
632 struct bucket_table *tbl = iter->walker->tbl;
633
634 if (!tbl)
635 goto out;
636
637 ht = iter->ht;
638
639 spin_lock(&ht->lock);
640 if (tbl->rehash < tbl->size)
641 list_add(&iter->walker->list, &tbl->walkers);
642 else
643 iter->walker->tbl = NULL;
644 spin_unlock(&ht->lock);
645
1015 iter->p = NULL; 646 iter->p = NULL;
647
648out:
649 rcu_read_unlock();
1016} 650}
1017EXPORT_SYMBOL_GPL(rhashtable_walk_stop); 651EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
1018 652
1019static size_t rounded_hashtable_size(struct rhashtable_params *params) 653static size_t rounded_hashtable_size(const struct rhashtable_params *params)
1020{ 654{
1021 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 655 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
1022 1UL << params->min_shift); 656 (unsigned long)params->min_size);
657}
658
659static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
660{
661 return jhash2(key, length, seed);
1023} 662}
1024 663
1025/** 664/**
@@ -1052,7 +691,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
1052 * struct rhash_head node; 691 * struct rhash_head node;
1053 * }; 692 * };
1054 * 693 *
1055 * u32 my_hash_fn(const void *data, u32 seed) 694 * u32 my_hash_fn(const void *data, u32 len, u32 seed)
1056 * { 695 * {
1057 * struct test_obj *obj = data; 696 * struct test_obj *obj = data;
1058 * 697 *
@@ -1065,47 +704,74 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
1065 * .obj_hashfn = my_hash_fn, 704 * .obj_hashfn = my_hash_fn,
1066 * }; 705 * };
1067 */ 706 */
1068int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) 707int rhashtable_init(struct rhashtable *ht,
708 const struct rhashtable_params *params)
1069{ 709{
1070 struct bucket_table *tbl; 710 struct bucket_table *tbl;
1071 size_t size; 711 size_t size;
1072 712
1073 size = HASH_DEFAULT_SIZE; 713 size = HASH_DEFAULT_SIZE;
1074 714
1075 if ((params->key_len && !params->hashfn) || 715 if ((!params->key_len && !params->obj_hashfn) ||
1076 (!params->key_len && !params->obj_hashfn)) 716 (params->obj_hashfn && !params->obj_cmpfn))
1077 return -EINVAL; 717 return -EINVAL;
1078 718
1079 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) 719 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
1080 return -EINVAL; 720 return -EINVAL;
1081 721
1082 params->min_shift = max_t(size_t, params->min_shift,
1083 ilog2(HASH_MIN_SIZE));
1084
1085 if (params->nelem_hint) 722 if (params->nelem_hint)
1086 size = rounded_hashtable_size(params); 723 size = rounded_hashtable_size(params);
1087 724
1088 memset(ht, 0, sizeof(*ht)); 725 memset(ht, 0, sizeof(*ht));
1089 mutex_init(&ht->mutex); 726 mutex_init(&ht->mutex);
727 spin_lock_init(&ht->lock);
1090 memcpy(&ht->p, params, sizeof(*params)); 728 memcpy(&ht->p, params, sizeof(*params));
1091 INIT_LIST_HEAD(&ht->walkers); 729
730 if (params->min_size)
731 ht->p.min_size = roundup_pow_of_two(params->min_size);
732
733 if (params->max_size)
734 ht->p.max_size = rounddown_pow_of_two(params->max_size);
735
736 ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
737
738 /* The maximum (not average) chain length grows with the
739 * size of the hash table, at a rate of (log N)/(log log N).
740 * The value of 16 is selected so that even if the hash
741 * table grew to 2^32 you would not expect the maximum
742 * chain length to exceed it unless we are under attack
743 * (or extremely unlucky).
744 *
745 * As this limit is only to detect attacks, we don't need
746 * to set it to a lower value as you'd need the chain
747 * length to vastly exceed 16 to have any real effect
748 * on the system.
749 */
750 if (!params->insecure_elasticity)
751 ht->elasticity = 16;
1092 752
1093 if (params->locks_mul) 753 if (params->locks_mul)
1094 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); 754 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
1095 else 755 else
1096 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; 756 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
1097 757
1098 tbl = bucket_table_alloc(ht, size); 758 ht->key_len = ht->p.key_len;
759 if (!params->hashfn) {
760 ht->p.hashfn = jhash;
761
762 if (!(ht->key_len & (sizeof(u32) - 1))) {
763 ht->key_len /= sizeof(u32);
764 ht->p.hashfn = rhashtable_jhash2;
765 }
766 }
767
768 tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
1099 if (tbl == NULL) 769 if (tbl == NULL)
1100 return -ENOMEM; 770 return -ENOMEM;
1101 771
1102 atomic_set(&ht->nelems, 0); 772 atomic_set(&ht->nelems, 0);
1103 atomic_set(&ht->shift, ilog2(tbl->size));
1104 RCU_INIT_POINTER(ht->tbl, tbl);
1105 RCU_INIT_POINTER(ht->future_tbl, tbl);
1106 773
1107 if (!ht->p.hash_rnd) 774 RCU_INIT_POINTER(ht->tbl, tbl);
1108 get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
1109 775
1110 INIT_WORK(&ht->run_work, rht_deferred_worker); 776 INIT_WORK(&ht->run_work, rht_deferred_worker);
1111 777
@@ -1114,21 +780,53 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
1114EXPORT_SYMBOL_GPL(rhashtable_init); 780EXPORT_SYMBOL_GPL(rhashtable_init);
1115 781
1116/** 782/**
1117 * rhashtable_destroy - destroy hash table 783 * rhashtable_free_and_destroy - free elements and destroy hash table
1118 * @ht: the hash table to destroy 784 * @ht: the hash table to destroy
785 * @free_fn: callback to release resources of element
786 * @arg: pointer passed to free_fn
787 *
788 * Stops an eventual async resize. If defined, invokes free_fn for each
789 * element to releasal resources. Please note that RCU protected
790 * readers may still be accessing the elements. Releasing of resources
791 * must occur in a compatible manner. Then frees the bucket array.
1119 * 792 *
1120 * Frees the bucket array. This function is not rcu safe, therefore the caller 793 * This function will eventually sleep to wait for an async resize
1121 * has to make sure that no resizing may happen by unpublishing the hashtable 794 * to complete. The caller is responsible that no further write operations
1122 * and waiting for the quiescent cycle before releasing the bucket array. 795 * occurs in parallel.
1123 */ 796 */
1124void rhashtable_destroy(struct rhashtable *ht) 797void rhashtable_free_and_destroy(struct rhashtable *ht,
798 void (*free_fn)(void *ptr, void *arg),
799 void *arg)
1125{ 800{
1126 ht->being_destroyed = true; 801 const struct bucket_table *tbl;
802 unsigned int i;
1127 803
1128 cancel_work_sync(&ht->run_work); 804 cancel_work_sync(&ht->run_work);
1129 805
1130 mutex_lock(&ht->mutex); 806 mutex_lock(&ht->mutex);
1131 bucket_table_free(rht_dereference(ht->tbl, ht)); 807 tbl = rht_dereference(ht->tbl, ht);
808 if (free_fn) {
809 for (i = 0; i < tbl->size; i++) {
810 struct rhash_head *pos, *next;
811
812 for (pos = rht_dereference(tbl->buckets[i], ht),
813 next = !rht_is_a_nulls(pos) ?
814 rht_dereference(pos->next, ht) : NULL;
815 !rht_is_a_nulls(pos);
816 pos = next,
817 next = !rht_is_a_nulls(pos) ?
818 rht_dereference(pos->next, ht) : NULL)
819 free_fn(rht_obj(ht, pos), arg);
820 }
821 }
822
823 bucket_table_free(tbl);
1132 mutex_unlock(&ht->mutex); 824 mutex_unlock(&ht->mutex);
1133} 825}
826EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
827
828void rhashtable_destroy(struct rhashtable *ht)
829{
830 return rhashtable_free_and_destroy(ht, NULL, NULL);
831}
1134EXPORT_SYMBOL_GPL(rhashtable_destroy); 832EXPORT_SYMBOL_GPL(rhashtable_destroy);
diff --git a/lib/sha1.c b/lib/sha1.c
index 1df191e04a24..5a56dfd7b99d 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -198,3 +198,4 @@ void sha_init(__u32 *buf)
198 buf[3] = 0x10325476; 198 buf[3] = 0x10325476;
199 buf[4] = 0xc3d2e1f0; 199 buf[4] = 0xc3d2e1f0;
200} 200}
201EXPORT_SYMBOL(sha_init);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 67c7593d1dd6..b2957540d3c7 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,6 +38,15 @@ struct test_obj {
38 struct rhash_head node; 38 struct rhash_head node;
39}; 39};
40 40
41static const struct rhashtable_params test_rht_params = {
42 .nelem_hint = TEST_HT_SIZE,
43 .head_offset = offsetof(struct test_obj, node),
44 .key_offset = offsetof(struct test_obj, value),
45 .key_len = sizeof(int),
46 .hashfn = jhash,
47 .nulls_base = (3U << RHT_BASE_SHIFT),
48};
49
41static int __init test_rht_lookup(struct rhashtable *ht) 50static int __init test_rht_lookup(struct rhashtable *ht)
42{ 51{
43 unsigned int i; 52 unsigned int i;
@@ -47,7 +56,7 @@ static int __init test_rht_lookup(struct rhashtable *ht)
47 bool expected = !(i % 2); 56 bool expected = !(i % 2);
48 u32 key = i; 57 u32 key = i;
49 58
50 obj = rhashtable_lookup(ht, &key); 59 obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
51 60
52 if (expected && !obj) { 61 if (expected && !obj) {
53 pr_warn("Test failed: Could not find key %u\n", key); 62 pr_warn("Test failed: Could not find key %u\n", key);
@@ -80,7 +89,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
80 rcu_cnt = cnt = 0; 89 rcu_cnt = cnt = 0;
81 90
82 if (!quiet) 91 if (!quiet)
83 pr_info(" [%#4x/%zu]", i, tbl->size); 92 pr_info(" [%#4x/%u]", i, tbl->size);
84 93
85 rht_for_each_entry_rcu(obj, pos, tbl, i, node) { 94 rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
86 cnt++; 95 cnt++;
@@ -133,7 +142,11 @@ static int __init test_rhashtable(struct rhashtable *ht)
133 obj->ptr = TEST_PTR; 142 obj->ptr = TEST_PTR;
134 obj->value = i * 2; 143 obj->value = i * 2;
135 144
136 rhashtable_insert(ht, &obj->node); 145 err = rhashtable_insert_fast(ht, &obj->node, test_rht_params);
146 if (err) {
147 kfree(obj);
148 goto error;
149 }
137 } 150 }
138 151
139 rcu_read_lock(); 152 rcu_read_lock();
@@ -141,30 +154,6 @@ static int __init test_rhashtable(struct rhashtable *ht)
141 test_rht_lookup(ht); 154 test_rht_lookup(ht);
142 rcu_read_unlock(); 155 rcu_read_unlock();
143 156
144 for (i = 0; i < TEST_NEXPANDS; i++) {
145 pr_info(" Table expansion iteration %u...\n", i);
146 mutex_lock(&ht->mutex);
147 rhashtable_expand(ht);
148 mutex_unlock(&ht->mutex);
149
150 rcu_read_lock();
151 pr_info(" Verifying lookups...\n");
152 test_rht_lookup(ht);
153 rcu_read_unlock();
154 }
155
156 for (i = 0; i < TEST_NEXPANDS; i++) {
157 pr_info(" Table shrinkage iteration %u...\n", i);
158 mutex_lock(&ht->mutex);
159 rhashtable_shrink(ht);
160 mutex_unlock(&ht->mutex);
161
162 rcu_read_lock();
163 pr_info(" Verifying lookups...\n");
164 test_rht_lookup(ht);
165 rcu_read_unlock();
166 }
167
168 rcu_read_lock(); 157 rcu_read_lock();
169 test_bucket_stats(ht, true); 158 test_bucket_stats(ht, true);
170 rcu_read_unlock(); 159 rcu_read_unlock();
@@ -173,10 +162,10 @@ static int __init test_rhashtable(struct rhashtable *ht)
173 for (i = 0; i < TEST_ENTRIES; i++) { 162 for (i = 0; i < TEST_ENTRIES; i++) {
174 u32 key = i * 2; 163 u32 key = i * 2;
175 164
176 obj = rhashtable_lookup(ht, &key); 165 obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
177 BUG_ON(!obj); 166 BUG_ON(!obj);
178 167
179 rhashtable_remove(ht, &obj->node); 168 rhashtable_remove_fast(ht, &obj->node, test_rht_params);
180 kfree(obj); 169 kfree(obj);
181 } 170 }
182 171
@@ -195,20 +184,11 @@ static struct rhashtable ht;
195 184
196static int __init test_rht_init(void) 185static int __init test_rht_init(void)
197{ 186{
198 struct rhashtable_params params = {
199 .nelem_hint = TEST_HT_SIZE,
200 .head_offset = offsetof(struct test_obj, node),
201 .key_offset = offsetof(struct test_obj, value),
202 .key_len = sizeof(int),
203 .hashfn = jhash,
204 .max_shift = 1, /* we expand/shrink manually here */
205 .nulls_base = (3U << RHT_BASE_SHIFT),
206 };
207 int err; 187 int err;
208 188
209 pr_info("Running resizable hashtable tests...\n"); 189 pr_info("Running resizable hashtable tests...\n");
210 190
211 err = rhashtable_init(&ht, &params); 191 err = rhashtable_init(&ht, &test_rht_params);
212 if (err < 0) { 192 if (err < 0) {
213 pr_warn("Test failed: Unable to initialize hashtable: %d\n", 193 pr_warn("Test failed: Unable to initialize hashtable: %d\n",
214 err); 194 err);