aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rhashtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rhashtable.c')
-rw-r--r--lib/rhashtable.c458
1 files changed, 344 insertions, 114 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e6b85c4a5828..312e3437c7bc 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -26,19 +26,42 @@
26 26
27#define HASH_DEFAULT_SIZE 64UL 27#define HASH_DEFAULT_SIZE 64UL
28#define HASH_MIN_SIZE 4UL 28#define HASH_MIN_SIZE 4UL
29#define BUCKET_LOCKS_PER_CPU 128UL
30
31enum {
32 RHT_LOCK_NORMAL,
33 RHT_LOCK_NESTED,
34 RHT_LOCK_NESTED2,
35};
36
37/* The bucket lock is selected based on the hash and protects mutations
38 * on a group of hash buckets.
39 *
40 * IMPORTANT: When holding the bucket lock of both the old and new table
41 * during expansions and shrinking, the old bucket lock must always be
42 * acquired first.
43 */
44static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
45{
46 return &tbl->locks[hash & tbl->locks_mask];
47}
29 48
30#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 49#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
50#define ASSERT_BUCKET_LOCK(TBL, HASH) \
51 BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH))
31 52
32#ifdef CONFIG_PROVE_LOCKING 53#ifdef CONFIG_PROVE_LOCKING
33int lockdep_rht_mutex_is_held(const struct rhashtable *ht) 54int lockdep_rht_mutex_is_held(struct rhashtable *ht)
34{ 55{
35 return ht->p.mutex_is_held(ht->p.parent); 56 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
36} 57}
37EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); 58EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
38 59
39int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) 60int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
40{ 61{
41 return 1; 62 spinlock_t *lock = bucket_lock(tbl, hash);
63
64 return (debug_locks) ? lockdep_is_held(lock) : 1;
42} 65}
43EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); 66EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
44#endif 67#endif
@@ -66,7 +89,7 @@ static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
66 return hash; 89 return hash;
67} 90}
68 91
69static u32 key_hashfn(const struct rhashtable *ht, const void *key, u32 len) 92static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
70{ 93{
71 struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 94 struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
72 u32 hash; 95 u32 hash;
@@ -95,7 +118,49 @@ static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
95 return pprev; 118 return pprev;
96} 119}
97 120
98static struct bucket_table *bucket_table_alloc(size_t nbuckets) 121static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
122{
123 unsigned int i, size;
124#if defined(CONFIG_PROVE_LOCKING)
125 unsigned int nr_pcpus = 2;
126#else
127 unsigned int nr_pcpus = num_possible_cpus();
128#endif
129
130 nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
131 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
132
133 /* Never allocate more than one lock per bucket */
134 size = min_t(unsigned int, size, tbl->size);
135
136 if (sizeof(spinlock_t) != 0) {
137#ifdef CONFIG_NUMA
138 if (size * sizeof(spinlock_t) > PAGE_SIZE)
139 tbl->locks = vmalloc(size * sizeof(spinlock_t));
140 else
141#endif
142 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
143 GFP_KERNEL);
144 if (!tbl->locks)
145 return -ENOMEM;
146 for (i = 0; i < size; i++)
147 spin_lock_init(&tbl->locks[i]);
148 }
149 tbl->locks_mask = size - 1;
150
151 return 0;
152}
153
154static void bucket_table_free(const struct bucket_table *tbl)
155{
156 if (tbl)
157 kvfree(tbl->locks);
158
159 kvfree(tbl);
160}
161
162static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
163 size_t nbuckets)
99{ 164{
100 struct bucket_table *tbl; 165 struct bucket_table *tbl;
101 size_t size; 166 size_t size;
@@ -110,12 +175,12 @@ static struct bucket_table *bucket_table_alloc(size_t nbuckets)
110 175
111 tbl->size = nbuckets; 176 tbl->size = nbuckets;
112 177
113 return tbl; 178 if (alloc_bucket_locks(ht, tbl) < 0) {
114} 179 bucket_table_free(tbl);
180 return NULL;
181 }
115 182
116static void bucket_table_free(const struct bucket_table *tbl) 183 return tbl;
117{
118 kvfree(tbl);
119} 184}
120 185
121/** 186/**
@@ -126,7 +191,7 @@ static void bucket_table_free(const struct bucket_table *tbl)
126bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) 191bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
127{ 192{
128 /* Expand table when exceeding 75% load */ 193 /* Expand table when exceeding 75% load */
129 return ht->nelems > (new_size / 4 * 3); 194 return atomic_read(&ht->nelems) > (new_size / 4 * 3);
130} 195}
131EXPORT_SYMBOL_GPL(rht_grow_above_75); 196EXPORT_SYMBOL_GPL(rht_grow_above_75);
132 197
@@ -138,41 +203,59 @@ EXPORT_SYMBOL_GPL(rht_grow_above_75);
138bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) 203bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
139{ 204{
140 /* Shrink table beneath 30% load */ 205 /* Shrink table beneath 30% load */
141 return ht->nelems < (new_size * 3 / 10); 206 return atomic_read(&ht->nelems) < (new_size * 3 / 10);
142} 207}
143EXPORT_SYMBOL_GPL(rht_shrink_below_30); 208EXPORT_SYMBOL_GPL(rht_shrink_below_30);
144 209
145static void hashtable_chain_unzip(const struct rhashtable *ht, 210static void hashtable_chain_unzip(const struct rhashtable *ht,
146 const struct bucket_table *new_tbl, 211 const struct bucket_table *new_tbl,
147 struct bucket_table *old_tbl, size_t n) 212 struct bucket_table *old_tbl,
213 size_t old_hash)
148{ 214{
149 struct rhash_head *he, *p, *next; 215 struct rhash_head *he, *p, *next;
150 unsigned int h; 216 spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
217 unsigned int new_hash, new_hash2;
218
219 ASSERT_BUCKET_LOCK(old_tbl, old_hash);
151 220
152 /* Old bucket empty, no work needed. */ 221 /* Old bucket empty, no work needed. */
153 p = rht_dereference(old_tbl->buckets[n], ht); 222 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
223 old_hash);
154 if (!p) 224 if (!p)
155 return; 225 return;
156 226
227 new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
228 new_bucket_lock = bucket_lock(new_tbl, new_hash);
229
157 /* Advance the old bucket pointer one or more times until it 230 /* Advance the old bucket pointer one or more times until it
158 * reaches a node that doesn't hash to the same bucket as the 231 * reaches a node that doesn't hash to the same bucket as the
159 * previous node p. Call the previous node p; 232 * previous node p. Call the previous node p;
160 */ 233 */
161 h = head_hashfn(ht, new_tbl, p); 234 rht_for_each_continue(he, p->next, old_tbl, old_hash) {
162 rht_for_each_continue(he, p->next, old_tbl, n) { 235 new_hash2 = head_hashfn(ht, new_tbl, he);
163 if (head_hashfn(ht, new_tbl, he) != h) 236 if (new_hash != new_hash2)
164 break; 237 break;
165 p = he; 238 p = he;
166 } 239 }
167 RCU_INIT_POINTER(old_tbl->buckets[n], p->next); 240 rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
241
242 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
243
244 /* If we have encountered an entry that maps to a different bucket in
245 * the new table, lock down that bucket as well as we might cut off
246 * the end of the chain.
247 */
248 new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
249 if (new_bucket_lock != new_bucket_lock2)
250 spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
168 251
169 /* Find the subsequent node which does hash to the same 252 /* Find the subsequent node which does hash to the same
170 * bucket as node P, or NULL if no such node exists. 253 * bucket as node P, or NULL if no such node exists.
171 */ 254 */
172 next = NULL; 255 next = NULL;
173 if (he) { 256 if (he) {
174 rht_for_each_continue(he, he->next, old_tbl, n) { 257 rht_for_each_continue(he, he->next, old_tbl, old_hash) {
175 if (head_hashfn(ht, new_tbl, he) == h) { 258 if (head_hashfn(ht, new_tbl, he) == new_hash) {
176 next = he; 259 next = he;
177 break; 260 break;
178 } 261 }
@@ -182,7 +265,23 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
182 /* Set p's next pointer to that subsequent node pointer, 265 /* Set p's next pointer to that subsequent node pointer,
183 * bypassing the nodes which do not hash to p's bucket 266 * bypassing the nodes which do not hash to p's bucket
184 */ 267 */
185 RCU_INIT_POINTER(p->next, next); 268 rcu_assign_pointer(p->next, next);
269
270 if (new_bucket_lock != new_bucket_lock2)
271 spin_unlock_bh(new_bucket_lock2);
272 spin_unlock_bh(new_bucket_lock);
273}
274
275static void link_old_to_new(struct bucket_table *new_tbl,
276 unsigned int new_hash, struct rhash_head *entry)
277{
278 spinlock_t *new_bucket_lock;
279
280 new_bucket_lock = bucket_lock(new_tbl, new_hash);
281
282 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
283 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
284 spin_unlock_bh(new_bucket_lock);
186} 285}
187 286
188/** 287/**
@@ -195,43 +294,59 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
195 * This function may only be called in a context where it is safe to call 294 * This function may only be called in a context where it is safe to call
196 * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 295 * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
197 * 296 *
198 * The caller must ensure that no concurrent table mutations take place. 297 * The caller must ensure that no concurrent resizing occurs by holding
199 * It is however valid to have concurrent lookups if they are RCU protected. 298 * ht->mutex.
299 *
300 * It is valid to have concurrent insertions and deletions protected by per
301 * bucket locks or concurrent RCU protected lookups and traversals.
200 */ 302 */
201int rhashtable_expand(struct rhashtable *ht) 303int rhashtable_expand(struct rhashtable *ht)
202{ 304{
203 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 305 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
204 struct rhash_head *he; 306 struct rhash_head *he;
205 unsigned int i, h; 307 spinlock_t *old_bucket_lock;
206 bool complete; 308 unsigned int new_hash, old_hash;
309 bool complete = false;
207 310
208 ASSERT_RHT_MUTEX(ht); 311 ASSERT_RHT_MUTEX(ht);
209 312
210 if (ht->p.max_shift && ht->shift >= ht->p.max_shift) 313 if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
211 return 0; 314 return 0;
212 315
213 new_tbl = bucket_table_alloc(old_tbl->size * 2); 316 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
214 if (new_tbl == NULL) 317 if (new_tbl == NULL)
215 return -ENOMEM; 318 return -ENOMEM;
216 319
217 ht->shift++; 320 ht->shift++;
218 321
219 /* For each new bucket, search the corresponding old bucket 322 /* Make insertions go into the new, empty table right away. Deletions
220 * for the first entry that hashes to the new bucket, and 323 * and lookups will be attempted in both tables until we synchronize.
221 * link the new bucket to that entry. Since all the entries 324 * The synchronize_rcu() guarantees for the new table to be picked up
222 * which will end up in the new bucket appear in the same 325 * so no new additions go into the old table while we relink.
223 * old bucket, this constructs an entirely valid new hash 326 */
224 * table, but with multiple buckets "zipped" together into a 327 rcu_assign_pointer(ht->future_tbl, new_tbl);
225 * single imprecise chain. 328 synchronize_rcu();
329
330 /* For each new bucket, search the corresponding old bucket for the
331 * first entry that hashes to the new bucket, and link the end of
332 * newly formed bucket chain (containing entries added to future
333 * table) to that entry. Since all the entries which will end up in
334 * the new bucket appear in the same old bucket, this constructs an
335 * entirely valid new hash table, but with multiple buckets
336 * "zipped" together into a single imprecise chain.
226 */ 337 */
227 for (i = 0; i < new_tbl->size; i++) { 338 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
228 h = rht_bucket_index(old_tbl, i); 339 old_hash = rht_bucket_index(old_tbl, new_hash);
229 rht_for_each(he, old_tbl, h) { 340 old_bucket_lock = bucket_lock(old_tbl, old_hash);
230 if (head_hashfn(ht, new_tbl, he) == i) { 341
231 RCU_INIT_POINTER(new_tbl->buckets[i], he); 342 spin_lock_bh(old_bucket_lock);
343 rht_for_each(he, old_tbl, old_hash) {
344 if (head_hashfn(ht, new_tbl, he) == new_hash) {
345 link_old_to_new(new_tbl, new_hash, he);
232 break; 346 break;
233 } 347 }
234 } 348 }
349 spin_unlock_bh(old_bucket_lock);
235 } 350 }
236 351
237 /* Publish the new table pointer. Lookups may now traverse 352 /* Publish the new table pointer. Lookups may now traverse
@@ -241,7 +356,7 @@ int rhashtable_expand(struct rhashtable *ht)
241 rcu_assign_pointer(ht->tbl, new_tbl); 356 rcu_assign_pointer(ht->tbl, new_tbl);
242 357
243 /* Unzip interleaved hash chains */ 358 /* Unzip interleaved hash chains */
244 do { 359 while (!complete && !ht->being_destroyed) {
245 /* Wait for readers. All new readers will see the new 360 /* Wait for readers. All new readers will see the new
246 * table, and thus no references to the old table will 361 * table, and thus no references to the old table will
247 * remain. 362 * remain.
@@ -253,12 +368,17 @@ int rhashtable_expand(struct rhashtable *ht)
253 * table): ... 368 * table): ...
254 */ 369 */
255 complete = true; 370 complete = true;
256 for (i = 0; i < old_tbl->size; i++) { 371 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
257 hashtable_chain_unzip(ht, new_tbl, old_tbl, i); 372 old_bucket_lock = bucket_lock(old_tbl, old_hash);
258 if (old_tbl->buckets[i] != NULL) 373 spin_lock_bh(old_bucket_lock);
374
375 hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
376 if (old_tbl->buckets[old_hash] != NULL)
259 complete = false; 377 complete = false;
378
379 spin_unlock_bh(old_bucket_lock);
260 } 380 }
261 } while (!complete); 381 }
262 382
263 bucket_table_free(old_tbl); 383 bucket_table_free(old_tbl);
264 return 0; 384 return 0;
@@ -272,38 +392,65 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
272 * This function may only be called in a context where it is safe to call 392 * This function may only be called in a context where it is safe to call
273 * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 393 * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
274 * 394 *
395 * The caller must ensure that no concurrent resizing occurs by holding
396 * ht->mutex.
397 *
275 * The caller must ensure that no concurrent table mutations take place. 398 * The caller must ensure that no concurrent table mutations take place.
276 * It is however valid to have concurrent lookups if they are RCU protected. 399 * It is however valid to have concurrent lookups if they are RCU protected.
400 *
401 * It is valid to have concurrent insertions and deletions protected by per
402 * bucket locks or concurrent RCU protected lookups and traversals.
277 */ 403 */
278int rhashtable_shrink(struct rhashtable *ht) 404int rhashtable_shrink(struct rhashtable *ht)
279{ 405{
280 struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht); 406 struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
281 unsigned int i; 407 spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
408 unsigned int new_hash;
282 409
283 ASSERT_RHT_MUTEX(ht); 410 ASSERT_RHT_MUTEX(ht);
284 411
285 if (ht->shift <= ht->p.min_shift) 412 if (ht->shift <= ht->p.min_shift)
286 return 0; 413 return 0;
287 414
288 ntbl = bucket_table_alloc(tbl->size / 2); 415 new_tbl = bucket_table_alloc(ht, tbl->size / 2);
289 if (ntbl == NULL) 416 if (new_tbl == NULL)
290 return -ENOMEM; 417 return -ENOMEM;
291 418
292 ht->shift--; 419 rcu_assign_pointer(ht->future_tbl, new_tbl);
420 synchronize_rcu();
293 421
294 /* Link each bucket in the new table to the first bucket 422 /* Link the first entry in the old bucket to the end of the
295 * in the old table that contains entries which will hash 423 * bucket in the new table. As entries are concurrently being
296 * to the new bucket. 424 * added to the new table, lock down the new bucket. As we
425 * always divide the size in half when shrinking, each bucket
426 * in the new table maps to exactly two buckets in the old
427 * table.
428 *
429 * As removals can occur concurrently on the old table, we need
430 * to lock down both matching buckets in the old table.
297 */ 431 */
298 for (i = 0; i < ntbl->size; i++) { 432 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
299 ntbl->buckets[i] = tbl->buckets[i]; 433 old_bucket_lock1 = bucket_lock(tbl, new_hash);
300 RCU_INIT_POINTER(*bucket_tail(ntbl, i), 434 old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
301 tbl->buckets[i + ntbl->size]); 435 new_bucket_lock = bucket_lock(new_tbl, new_hash);
302 436
437 spin_lock_bh(old_bucket_lock1);
438 spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
439 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
440
441 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
442 tbl->buckets[new_hash]);
443 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
444 tbl->buckets[new_hash + new_tbl->size]);
445
446 spin_unlock_bh(new_bucket_lock);
447 spin_unlock_bh(old_bucket_lock2);
448 spin_unlock_bh(old_bucket_lock1);
303 } 449 }
304 450
305 /* Publish the new, valid hash table */ 451 /* Publish the new, valid hash table */
306 rcu_assign_pointer(ht->tbl, ntbl); 452 rcu_assign_pointer(ht->tbl, new_tbl);
453 ht->shift--;
307 454
308 /* Wait for readers. No new readers will have references to the 455 /* Wait for readers. No new readers will have references to the
309 * old hash table. 456 * old hash table.
@@ -316,31 +463,63 @@ int rhashtable_shrink(struct rhashtable *ht)
316} 463}
317EXPORT_SYMBOL_GPL(rhashtable_shrink); 464EXPORT_SYMBOL_GPL(rhashtable_shrink);
318 465
466static void rht_deferred_worker(struct work_struct *work)
467{
468 struct rhashtable *ht;
469 struct bucket_table *tbl;
470
471 ht = container_of(work, struct rhashtable, run_work.work);
472 mutex_lock(&ht->mutex);
473 tbl = rht_dereference(ht->tbl, ht);
474
475 if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
476 rhashtable_expand(ht);
477 else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
478 rhashtable_shrink(ht);
479
480 mutex_unlock(&ht->mutex);
481}
482
319/** 483/**
320 * rhashtable_insert - insert object into hash hash table 484 * rhashtable_insert - insert object into hash hash table
321 * @ht: hash table 485 * @ht: hash table
322 * @obj: pointer to hash head inside object 486 * @obj: pointer to hash head inside object
323 * 487 *
324 * Will automatically grow the table via rhashtable_expand() if the the 488 * Will take a per bucket spinlock to protect against mutual mutations
325 * grow_decision function specified at rhashtable_init() returns true. 489 * on the same bucket. Multiple insertions may occur in parallel unless
490 * they map to the same bucket lock.
326 * 491 *
327 * The caller must ensure that no concurrent table mutations occur. It is 492 * It is safe to call this function from atomic context.
328 * however valid to have concurrent lookups if they are RCU protected. 493 *
494 * Will trigger an automatic deferred table resizing if the size grows
495 * beyond the watermark indicated by grow_decision() which can be passed
496 * to rhashtable_init().
329 */ 497 */
330void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) 498void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
331{ 499{
332 struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 500 struct bucket_table *tbl;
333 u32 hash; 501 spinlock_t *lock;
502 unsigned hash;
334 503
335 ASSERT_RHT_MUTEX(ht); 504 rcu_read_lock();
336 505
506 tbl = rht_dereference_rcu(ht->future_tbl, ht);
337 hash = head_hashfn(ht, tbl, obj); 507 hash = head_hashfn(ht, tbl, obj);
508 lock = bucket_lock(tbl, hash);
509
510 spin_lock_bh(lock);
338 RCU_INIT_POINTER(obj->next, tbl->buckets[hash]); 511 RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
339 rcu_assign_pointer(tbl->buckets[hash], obj); 512 rcu_assign_pointer(tbl->buckets[hash], obj);
340 ht->nelems++; 513 spin_unlock_bh(lock);
341 514
342 if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) 515 atomic_inc(&ht->nelems);
343 rhashtable_expand(ht); 516
517 /* Only grow the table if no resizing is currently in progress. */
518 if (ht->tbl != ht->future_tbl &&
519 ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
520 schedule_delayed_work(&ht->run_work, 0);
521
522 rcu_read_unlock();
344} 523}
345EXPORT_SYMBOL_GPL(rhashtable_insert); 524EXPORT_SYMBOL_GPL(rhashtable_insert);
346 525
@@ -361,32 +540,56 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
361 */ 540 */
362bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) 541bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
363{ 542{
364 struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 543 struct bucket_table *tbl;
365 struct rhash_head __rcu **pprev; 544 struct rhash_head __rcu **pprev;
366 struct rhash_head *he; 545 struct rhash_head *he;
367 u32 h; 546 spinlock_t *lock;
547 unsigned int hash;
368 548
369 ASSERT_RHT_MUTEX(ht); 549 rcu_read_lock();
550 tbl = rht_dereference_rcu(ht->tbl, ht);
551 hash = head_hashfn(ht, tbl, obj);
370 552
371 h = head_hashfn(ht, tbl, obj); 553 lock = bucket_lock(tbl, hash);
554 spin_lock_bh(lock);
372 555
373 pprev = &tbl->buckets[h]; 556restart:
374 rht_for_each(he, tbl, h) { 557 pprev = &tbl->buckets[hash];
558 rht_for_each(he, tbl, hash) {
375 if (he != obj) { 559 if (he != obj) {
376 pprev = &he->next; 560 pprev = &he->next;
377 continue; 561 continue;
378 } 562 }
379 563
380 RCU_INIT_POINTER(*pprev, he->next); 564 rcu_assign_pointer(*pprev, obj->next);
381 ht->nelems--; 565 atomic_dec(&ht->nelems);
382 566
383 if (ht->p.shrink_decision && 567 spin_unlock_bh(lock);
568
569 if (ht->tbl != ht->future_tbl &&
570 ht->p.shrink_decision &&
384 ht->p.shrink_decision(ht, tbl->size)) 571 ht->p.shrink_decision(ht, tbl->size))
385 rhashtable_shrink(ht); 572 schedule_delayed_work(&ht->run_work, 0);
573
574 rcu_read_unlock();
386 575
387 return true; 576 return true;
388 } 577 }
389 578
579 if (tbl != rht_dereference_rcu(ht->tbl, ht)) {
580 spin_unlock_bh(lock);
581
582 tbl = rht_dereference_rcu(ht->tbl, ht);
583 hash = head_hashfn(ht, tbl, obj);
584
585 lock = bucket_lock(tbl, hash);
586 spin_lock_bh(lock);
587 goto restart;
588 }
589
590 spin_unlock_bh(lock);
591 rcu_read_unlock();
592
390 return false; 593 return false;
391} 594}
392EXPORT_SYMBOL_GPL(rhashtable_remove); 595EXPORT_SYMBOL_GPL(rhashtable_remove);
@@ -402,25 +605,35 @@ EXPORT_SYMBOL_GPL(rhashtable_remove);
402 * This lookup function may only be used for fixed key hash table (key_len 605 * This lookup function may only be used for fixed key hash table (key_len
403 * paramter set). It will BUG() if used inappropriately. 606 * paramter set). It will BUG() if used inappropriately.
404 * 607 *
405 * Lookups may occur in parallel with hash mutations as long as the lookup is 608 * Lookups may occur in parallel with hashtable mutations and resizing.
406 * guarded by rcu_read_lock(). The caller must take care of this.
407 */ 609 */
408void *rhashtable_lookup(const struct rhashtable *ht, const void *key) 610void *rhashtable_lookup(struct rhashtable *ht, const void *key)
409{ 611{
410 const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 612 const struct bucket_table *tbl, *old_tbl;
411 struct rhash_head *he; 613 struct rhash_head *he;
412 u32 h; 614 u32 hash;
413 615
414 BUG_ON(!ht->p.key_len); 616 BUG_ON(!ht->p.key_len);
415 617
416 h = key_hashfn(ht, key, ht->p.key_len); 618 rcu_read_lock();
417 rht_for_each_rcu(he, tbl, h) { 619 old_tbl = rht_dereference_rcu(ht->tbl, ht);
620 tbl = rht_dereference_rcu(ht->future_tbl, ht);
621 hash = key_hashfn(ht, key, ht->p.key_len);
622restart:
623 rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
418 if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key, 624 if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
419 ht->p.key_len)) 625 ht->p.key_len))
420 continue; 626 continue;
627 rcu_read_unlock();
421 return rht_obj(ht, he); 628 return rht_obj(ht, he);
422 } 629 }
423 630
631 if (unlikely(tbl != old_tbl)) {
632 tbl = old_tbl;
633 goto restart;
634 }
635
636 rcu_read_unlock();
424 return NULL; 637 return NULL;
425} 638}
426EXPORT_SYMBOL_GPL(rhashtable_lookup); 639EXPORT_SYMBOL_GPL(rhashtable_lookup);
@@ -435,25 +648,36 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
435 * Traverses the bucket chain behind the provided hash value and calls the 648 * Traverses the bucket chain behind the provided hash value and calls the
436 * specified compare function for each entry. 649 * specified compare function for each entry.
437 * 650 *
438 * Lookups may occur in parallel with hash mutations as long as the lookup is 651 * Lookups may occur in parallel with hashtable mutations and resizing.
439 * guarded by rcu_read_lock(). The caller must take care of this.
440 * 652 *
441 * Returns the first entry on which the compare function returned true. 653 * Returns the first entry on which the compare function returned true.
442 */ 654 */
443void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key, 655void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
444 bool (*compare)(void *, void *), void *arg) 656 bool (*compare)(void *, void *), void *arg)
445{ 657{
446 const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 658 const struct bucket_table *tbl, *old_tbl;
447 struct rhash_head *he; 659 struct rhash_head *he;
448 u32 hash; 660 u32 hash;
449 661
662 rcu_read_lock();
663
664 old_tbl = rht_dereference_rcu(ht->tbl, ht);
665 tbl = rht_dereference_rcu(ht->future_tbl, ht);
450 hash = key_hashfn(ht, key, ht->p.key_len); 666 hash = key_hashfn(ht, key, ht->p.key_len);
451 rht_for_each_rcu(he, tbl, hash) { 667restart:
668 rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
452 if (!compare(rht_obj(ht, he), arg)) 669 if (!compare(rht_obj(ht, he), arg))
453 continue; 670 continue;
671 rcu_read_unlock();
454 return rht_obj(ht, he); 672 return rht_obj(ht, he);
455 } 673 }
456 674
675 if (unlikely(tbl != old_tbl)) {
676 tbl = old_tbl;
677 goto restart;
678 }
679 rcu_read_unlock();
680
457 return NULL; 681 return NULL;
458} 682}
459EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); 683EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
@@ -485,9 +709,6 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
485 * .key_offset = offsetof(struct test_obj, key), 709 * .key_offset = offsetof(struct test_obj, key),
486 * .key_len = sizeof(int), 710 * .key_len = sizeof(int),
487 * .hashfn = jhash, 711 * .hashfn = jhash,
488 * #ifdef CONFIG_PROVE_LOCKING
489 * .mutex_is_held = &my_mutex_is_held,
490 * #endif
491 * }; 712 * };
492 * 713 *
493 * Configuration Example 2: Variable length keys 714 * Configuration Example 2: Variable length keys
@@ -507,9 +728,6 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
507 * .head_offset = offsetof(struct test_obj, node), 728 * .head_offset = offsetof(struct test_obj, node),
508 * .hashfn = jhash, 729 * .hashfn = jhash,
509 * .obj_hashfn = my_hash_fn, 730 * .obj_hashfn = my_hash_fn,
510 * #ifdef CONFIG_PROVE_LOCKING
511 * .mutex_is_held = &my_mutex_is_held,
512 * #endif
513 * }; 731 * };
514 */ 732 */
515int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) 733int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
@@ -529,18 +747,29 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
529 if (params->nelem_hint) 747 if (params->nelem_hint)
530 size = rounded_hashtable_size(params); 748 size = rounded_hashtable_size(params);
531 749
532 tbl = bucket_table_alloc(size); 750 memset(ht, 0, sizeof(*ht));
751 mutex_init(&ht->mutex);
752 memcpy(&ht->p, params, sizeof(*params));
753
754 if (params->locks_mul)
755 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
756 else
757 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
758
759 tbl = bucket_table_alloc(ht, size);
533 if (tbl == NULL) 760 if (tbl == NULL)
534 return -ENOMEM; 761 return -ENOMEM;
535 762
536 memset(ht, 0, sizeof(*ht));
537 ht->shift = ilog2(tbl->size); 763 ht->shift = ilog2(tbl->size);
538 memcpy(&ht->p, params, sizeof(*params));
539 RCU_INIT_POINTER(ht->tbl, tbl); 764 RCU_INIT_POINTER(ht->tbl, tbl);
765 RCU_INIT_POINTER(ht->future_tbl, tbl);
540 766
541 if (!ht->p.hash_rnd) 767 if (!ht->p.hash_rnd)
542 get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); 768 get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
543 769
770 if (ht->p.grow_decision || ht->p.shrink_decision)
771 INIT_DEFERRABLE_WORK(&ht->run_work, rht_deferred_worker);
772
544 return 0; 773 return 0;
545} 774}
546EXPORT_SYMBOL_GPL(rhashtable_init); 775EXPORT_SYMBOL_GPL(rhashtable_init);
@@ -553,9 +782,16 @@ EXPORT_SYMBOL_GPL(rhashtable_init);
553 * has to make sure that no resizing may happen by unpublishing the hashtable 782 * has to make sure that no resizing may happen by unpublishing the hashtable
554 * and waiting for the quiescent cycle before releasing the bucket array. 783 * and waiting for the quiescent cycle before releasing the bucket array.
555 */ 784 */
556void rhashtable_destroy(const struct rhashtable *ht) 785void rhashtable_destroy(struct rhashtable *ht)
557{ 786{
558 bucket_table_free(ht->tbl); 787 ht->being_destroyed = true;
788
789 mutex_lock(&ht->mutex);
790
791 cancel_delayed_work(&ht->run_work);
792 bucket_table_free(rht_dereference(ht->tbl, ht));
793
794 mutex_unlock(&ht->mutex);
559} 795}
560EXPORT_SYMBOL_GPL(rhashtable_destroy); 796EXPORT_SYMBOL_GPL(rhashtable_destroy);
561 797
@@ -570,13 +806,6 @@ EXPORT_SYMBOL_GPL(rhashtable_destroy);
570#define TEST_PTR ((void *) 0xdeadbeef) 806#define TEST_PTR ((void *) 0xdeadbeef)
571#define TEST_NEXPANDS 4 807#define TEST_NEXPANDS 4
572 808
573#ifdef CONFIG_PROVE_LOCKING
574static int test_mutex_is_held(void *parent)
575{
576 return 1;
577}
578#endif
579
580struct test_obj { 809struct test_obj {
581 void *ptr; 810 void *ptr;
582 int value; 811 int value;
@@ -646,10 +875,10 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
646 i, tbl->buckets[i], cnt); 875 i, tbl->buckets[i], cnt);
647 } 876 }
648 877
649 pr_info(" Traversal complete: counted=%u, nelems=%zu, entries=%d\n", 878 pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d\n",
650 total, ht->nelems, TEST_ENTRIES); 879 total, atomic_read(&ht->nelems), TEST_ENTRIES);
651 880
652 if (total != ht->nelems || total != TEST_ENTRIES) 881 if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES)
653 pr_warn("Test failed: Total count mismatch ^^^"); 882 pr_warn("Test failed: Total count mismatch ^^^");
654} 883}
655 884
@@ -688,7 +917,9 @@ static int __init test_rhashtable(struct rhashtable *ht)
688 917
689 for (i = 0; i < TEST_NEXPANDS; i++) { 918 for (i = 0; i < TEST_NEXPANDS; i++) {
690 pr_info(" Table expansion iteration %u...\n", i); 919 pr_info(" Table expansion iteration %u...\n", i);
920 mutex_lock(&ht->mutex);
691 rhashtable_expand(ht); 921 rhashtable_expand(ht);
922 mutex_unlock(&ht->mutex);
692 923
693 rcu_read_lock(); 924 rcu_read_lock();
694 pr_info(" Verifying lookups...\n"); 925 pr_info(" Verifying lookups...\n");
@@ -698,7 +929,9 @@ static int __init test_rhashtable(struct rhashtable *ht)
698 929
699 for (i = 0; i < TEST_NEXPANDS; i++) { 930 for (i = 0; i < TEST_NEXPANDS; i++) {
700 pr_info(" Table shrinkage iteration %u...\n", i); 931 pr_info(" Table shrinkage iteration %u...\n", i);
932 mutex_lock(&ht->mutex);
701 rhashtable_shrink(ht); 933 rhashtable_shrink(ht);
934 mutex_unlock(&ht->mutex);
702 935
703 rcu_read_lock(); 936 rcu_read_lock();
704 pr_info(" Verifying lookups...\n"); 937 pr_info(" Verifying lookups...\n");
@@ -741,9 +974,6 @@ static int __init test_rht_init(void)
741 .key_offset = offsetof(struct test_obj, value), 974 .key_offset = offsetof(struct test_obj, value),
742 .key_len = sizeof(int), 975 .key_len = sizeof(int),
743 .hashfn = jhash, 976 .hashfn = jhash,
744#ifdef CONFIG_PROVE_LOCKING
745 .mutex_is_held = &test_mutex_is_held,
746#endif
747 .grow_decision = rht_grow_above_75, 977 .grow_decision = rht_grow_above_75,
748 .shrink_decision = rht_shrink_below_30, 978 .shrink_decision = rht_shrink_below_30,
749 }; 979 };