aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-02-06 18:18:39 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-06 18:18:39 -0500
commit41e8b206f8df6daf90e132a73fc2c60f51a36c6b (patch)
tree709001bcb9373ca4d622f70d4c48095aea4d1a61 /lib
parent2ca292d968ef20cb04f31192d1f626bd8d782960 (diff)
parentcf52d52f9ccb9966ac019d9f79824195583e3e6c (diff)
Merge branch 'rhashtable-next'
Thomas Graf says: ==================== rhashtable fixes This series fixes all remaining known issues with rhashtable that have been reported. In particular the race condition reported by Ying Xue. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'lib')
-rw-r--r--lib/rhashtable.c305
1 files changed, 167 insertions, 138 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 057919164e23..5919d63f58e4 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Resizable, Scalable, Concurrent Hash Table 2 * Resizable, Scalable, Concurrent Hash Table
3 * 3 *
4 * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> 4 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
5 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 5 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6 * 6 *
7 * Based on the following paper: 7 * Based on the following paper:
@@ -34,12 +34,17 @@
34enum { 34enum {
35 RHT_LOCK_NORMAL, 35 RHT_LOCK_NORMAL,
36 RHT_LOCK_NESTED, 36 RHT_LOCK_NESTED,
37 RHT_LOCK_NESTED2,
38}; 37};
39 38
40/* The bucket lock is selected based on the hash and protects mutations 39/* The bucket lock is selected based on the hash and protects mutations
41 * on a group of hash buckets. 40 * on a group of hash buckets.
42 * 41 *
42 * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
43 * a single lock always covers both buckets which may both contains
44 * entries which link to the same bucket of the old table during resizing.
45 * This allows to simplify the locking as locking the bucket in both
46 * tables during resize always guarantee protection.
47 *
43 * IMPORTANT: When holding the bucket lock of both the old and new table 48 * IMPORTANT: When holding the bucket lock of both the old and new table
44 * during expansions and shrinking, the old bucket lock must always be 49 * during expansions and shrinking, the old bucket lock must always be
45 * acquired first. 50 * acquired first.
@@ -49,26 +54,6 @@ static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
49 return &tbl->locks[hash & tbl->locks_mask]; 54 return &tbl->locks[hash & tbl->locks_mask];
50} 55}
51 56
52#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
53#define ASSERT_BUCKET_LOCK(TBL, HASH) \
54 BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH))
55
56#ifdef CONFIG_PROVE_LOCKING
57int lockdep_rht_mutex_is_held(struct rhashtable *ht)
58{
59 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
60}
61EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
62
63int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
64{
65 spinlock_t *lock = bucket_lock(tbl, hash);
66
67 return (debug_locks) ? lockdep_is_held(lock) : 1;
68}
69EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
70#endif
71
72static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) 57static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
73{ 58{
74 return (void *) he - ht->p.head_offset; 59 return (void *) he - ht->p.head_offset;
@@ -94,13 +79,7 @@ static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
94 79
95static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) 80static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
96{ 81{
97 struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 82 return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE;
98 u32 hash;
99
100 hash = ht->p.hashfn(key, len, ht->p.hash_rnd);
101 hash >>= HASH_RESERVED_SPACE;
102
103 return rht_bucket_index(tbl, hash);
104} 83}
105 84
106static u32 head_hashfn(const struct rhashtable *ht, 85static u32 head_hashfn(const struct rhashtable *ht,
@@ -110,6 +89,77 @@ static u32 head_hashfn(const struct rhashtable *ht,
110 return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); 89 return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
111} 90}
112 91
92#ifdef CONFIG_PROVE_LOCKING
93static void debug_dump_buckets(const struct rhashtable *ht,
94 const struct bucket_table *tbl)
95{
96 struct rhash_head *he;
97 unsigned int i, hash;
98
99 for (i = 0; i < tbl->size; i++) {
100 pr_warn(" [Bucket %d] ", i);
101 rht_for_each_rcu(he, tbl, i) {
102 hash = head_hashfn(ht, tbl, he);
103 pr_cont("[hash = %#x, lock = %p] ",
104 hash, bucket_lock(tbl, hash));
105 }
106 pr_cont("\n");
107 }
108
109}
110
111static void debug_dump_table(struct rhashtable *ht,
112 const struct bucket_table *tbl,
113 unsigned int hash)
114{
115 struct bucket_table *old_tbl, *future_tbl;
116
117 pr_emerg("BUG: lock for hash %#x in table %p not held\n",
118 hash, tbl);
119
120 rcu_read_lock();
121 future_tbl = rht_dereference_rcu(ht->future_tbl, ht);
122 old_tbl = rht_dereference_rcu(ht->tbl, ht);
123 if (future_tbl != old_tbl) {
124 pr_warn("Future table %p (size: %zd)\n",
125 future_tbl, future_tbl->size);
126 debug_dump_buckets(ht, future_tbl);
127 }
128
129 pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size);
130 debug_dump_buckets(ht, old_tbl);
131
132 rcu_read_unlock();
133}
134
135#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
136#define ASSERT_BUCKET_LOCK(HT, TBL, HASH) \
137 do { \
138 if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) { \
139 debug_dump_table(HT, TBL, HASH); \
140 BUG(); \
141 } \
142 } while (0)
143
144int lockdep_rht_mutex_is_held(struct rhashtable *ht)
145{
146 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
147}
148EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
149
150int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
151{
152 spinlock_t *lock = bucket_lock(tbl, hash);
153
154 return (debug_locks) ? lockdep_is_held(lock) : 1;
155}
156EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
157#else
158#define ASSERT_RHT_MUTEX(HT)
159#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)
160#endif
161
162
113static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n) 163static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
114{ 164{
115 struct rhash_head __rcu **pprev; 165 struct rhash_head __rcu **pprev;
@@ -134,8 +184,8 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
134 nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); 184 nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
135 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); 185 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
136 186
137 /* Never allocate more than one lock per bucket */ 187 /* Never allocate more than 0.5 locks per bucket */
138 size = min_t(unsigned int, size, tbl->size); 188 size = min_t(unsigned int, size, tbl->size >> 1);
139 189
140 if (sizeof(spinlock_t) != 0) { 190 if (sizeof(spinlock_t) != 0) {
141#ifdef CONFIG_NUMA 191#ifdef CONFIG_NUMA
@@ -217,25 +267,48 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
217} 267}
218EXPORT_SYMBOL_GPL(rht_shrink_below_30); 268EXPORT_SYMBOL_GPL(rht_shrink_below_30);
219 269
220static void hashtable_chain_unzip(const struct rhashtable *ht, 270static void lock_buckets(struct bucket_table *new_tbl,
271 struct bucket_table *old_tbl, unsigned int hash)
272 __acquires(old_bucket_lock)
273{
274 spin_lock_bh(bucket_lock(old_tbl, hash));
275 if (new_tbl != old_tbl)
276 spin_lock_bh_nested(bucket_lock(new_tbl, hash),
277 RHT_LOCK_NESTED);
278}
279
280static void unlock_buckets(struct bucket_table *new_tbl,
281 struct bucket_table *old_tbl, unsigned int hash)
282 __releases(old_bucket_lock)
283{
284 if (new_tbl != old_tbl)
285 spin_unlock_bh(bucket_lock(new_tbl, hash));
286 spin_unlock_bh(bucket_lock(old_tbl, hash));
287}
288
289/**
290 * Unlink entries on bucket which hash to different bucket.
291 *
292 * Returns true if no more work needs to be performed on the bucket.
293 */
294static bool hashtable_chain_unzip(struct rhashtable *ht,
221 const struct bucket_table *new_tbl, 295 const struct bucket_table *new_tbl,
222 struct bucket_table *old_tbl, 296 struct bucket_table *old_tbl,
223 size_t old_hash) 297 size_t old_hash)
224{ 298{
225 struct rhash_head *he, *p, *next; 299 struct rhash_head *he, *p, *next;
226 spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
227 unsigned int new_hash, new_hash2; 300 unsigned int new_hash, new_hash2;
228 301
229 ASSERT_BUCKET_LOCK(old_tbl, old_hash); 302 ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash);
230 303
231 /* Old bucket empty, no work needed. */ 304 /* Old bucket empty, no work needed. */
232 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, 305 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
233 old_hash); 306 old_hash);
234 if (rht_is_a_nulls(p)) 307 if (rht_is_a_nulls(p))
235 return; 308 return false;
236 309
237 new_hash = new_hash2 = head_hashfn(ht, new_tbl, p); 310 new_hash = head_hashfn(ht, new_tbl, p);
238 new_bucket_lock = bucket_lock(new_tbl, new_hash); 311 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
239 312
240 /* Advance the old bucket pointer one or more times until it 313 /* Advance the old bucket pointer one or more times until it
241 * reaches a node that doesn't hash to the same bucket as the 314 * reaches a node that doesn't hash to the same bucket as the
@@ -243,22 +316,14 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
243 */ 316 */
244 rht_for_each_continue(he, p->next, old_tbl, old_hash) { 317 rht_for_each_continue(he, p->next, old_tbl, old_hash) {
245 new_hash2 = head_hashfn(ht, new_tbl, he); 318 new_hash2 = head_hashfn(ht, new_tbl, he);
319 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2);
320
246 if (new_hash != new_hash2) 321 if (new_hash != new_hash2)
247 break; 322 break;
248 p = he; 323 p = he;
249 } 324 }
250 rcu_assign_pointer(old_tbl->buckets[old_hash], p->next); 325 rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
251 326
252 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
253
254 /* If we have encountered an entry that maps to a different bucket in
255 * the new table, lock down that bucket as well as we might cut off
256 * the end of the chain.
257 */
258 new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
259 if (new_bucket_lock != new_bucket_lock2)
260 spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
261
262 /* Find the subsequent node which does hash to the same 327 /* Find the subsequent node which does hash to the same
263 * bucket as node P, or NULL if no such node exists. 328 * bucket as node P, or NULL if no such node exists.
264 */ 329 */
@@ -277,21 +342,18 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
277 */ 342 */
278 rcu_assign_pointer(p->next, next); 343 rcu_assign_pointer(p->next, next);
279 344
280 if (new_bucket_lock != new_bucket_lock2) 345 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
281 spin_unlock_bh(new_bucket_lock2); 346 old_hash);
282 spin_unlock_bh(new_bucket_lock); 347
348 return !rht_is_a_nulls(p);
283} 349}
284 350
285static void link_old_to_new(struct bucket_table *new_tbl, 351static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
286 unsigned int new_hash, struct rhash_head *entry) 352 unsigned int new_hash, struct rhash_head *entry)
287{ 353{
288 spinlock_t *new_bucket_lock; 354 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
289 355
290 new_bucket_lock = bucket_lock(new_tbl, new_hash);
291
292 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
293 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry); 356 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
294 spin_unlock_bh(new_bucket_lock);
295} 357}
296 358
297/** 359/**
@@ -314,7 +376,6 @@ int rhashtable_expand(struct rhashtable *ht)
314{ 376{
315 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 377 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
316 struct rhash_head *he; 378 struct rhash_head *he;
317 spinlock_t *old_bucket_lock;
318 unsigned int new_hash, old_hash; 379 unsigned int new_hash, old_hash;
319 bool complete = false; 380 bool complete = false;
320 381
@@ -344,24 +405,16 @@ int rhashtable_expand(struct rhashtable *ht)
344 */ 405 */
345 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { 406 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
346 old_hash = rht_bucket_index(old_tbl, new_hash); 407 old_hash = rht_bucket_index(old_tbl, new_hash);
347 old_bucket_lock = bucket_lock(old_tbl, old_hash); 408 lock_buckets(new_tbl, old_tbl, new_hash);
348
349 spin_lock_bh(old_bucket_lock);
350 rht_for_each(he, old_tbl, old_hash) { 409 rht_for_each(he, old_tbl, old_hash) {
351 if (head_hashfn(ht, new_tbl, he) == new_hash) { 410 if (head_hashfn(ht, new_tbl, he) == new_hash) {
352 link_old_to_new(new_tbl, new_hash, he); 411 link_old_to_new(ht, new_tbl, new_hash, he);
353 break; 412 break;
354 } 413 }
355 } 414 }
356 spin_unlock_bh(old_bucket_lock); 415 unlock_buckets(new_tbl, old_tbl, new_hash);
357 } 416 }
358 417
359 /* Publish the new table pointer. Lookups may now traverse
360 * the new table, but they will not benefit from any
361 * additional efficiency until later steps unzip the buckets.
362 */
363 rcu_assign_pointer(ht->tbl, new_tbl);
364
365 /* Unzip interleaved hash chains */ 418 /* Unzip interleaved hash chains */
366 while (!complete && !ht->being_destroyed) { 419 while (!complete && !ht->being_destroyed) {
367 /* Wait for readers. All new readers will see the new 420 /* Wait for readers. All new readers will see the new
@@ -376,21 +429,19 @@ int rhashtable_expand(struct rhashtable *ht)
376 */ 429 */
377 complete = true; 430 complete = true;
378 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { 431 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
379 struct rhash_head *head; 432 lock_buckets(new_tbl, old_tbl, old_hash);
380
381 old_bucket_lock = bucket_lock(old_tbl, old_hash);
382 spin_lock_bh(old_bucket_lock);
383 433
384 hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash); 434 if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
385 head = rht_dereference_bucket(old_tbl->buckets[old_hash], 435 old_hash))
386 old_tbl, old_hash);
387 if (!rht_is_a_nulls(head))
388 complete = false; 436 complete = false;
389 437
390 spin_unlock_bh(old_bucket_lock); 438 unlock_buckets(new_tbl, old_tbl, old_hash);
391 } 439 }
392 } 440 }
393 441
442 rcu_assign_pointer(ht->tbl, new_tbl);
443 synchronize_rcu();
444
394 bucket_table_free(old_tbl); 445 bucket_table_free(old_tbl);
395 return 0; 446 return 0;
396} 447}
@@ -415,7 +466,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
415int rhashtable_shrink(struct rhashtable *ht) 466int rhashtable_shrink(struct rhashtable *ht)
416{ 467{
417 struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); 468 struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
418 spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
419 unsigned int new_hash; 469 unsigned int new_hash;
420 470
421 ASSERT_RHT_MUTEX(ht); 471 ASSERT_RHT_MUTEX(ht);
@@ -433,36 +483,17 @@ int rhashtable_shrink(struct rhashtable *ht)
433 * always divide the size in half when shrinking, each bucket 483 * always divide the size in half when shrinking, each bucket
434 * in the new table maps to exactly two buckets in the old 484 * in the new table maps to exactly two buckets in the old
435 * table. 485 * table.
436 *
437 * As removals can occur concurrently on the old table, we need
438 * to lock down both matching buckets in the old table.
439 */ 486 */
440 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { 487 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
441 old_bucket_lock1 = bucket_lock(tbl, new_hash); 488 lock_buckets(new_tbl, tbl, new_hash);
442 old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
443 new_bucket_lock = bucket_lock(new_tbl, new_hash);
444
445 spin_lock_bh(old_bucket_lock1);
446
447 /* Depending on the lock per buckets mapping, the bucket in
448 * the lower and upper region may map to the same lock.
449 */
450 if (old_bucket_lock1 != old_bucket_lock2) {
451 spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
452 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
453 } else {
454 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
455 }
456 489
457 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), 490 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
458 tbl->buckets[new_hash]); 491 tbl->buckets[new_hash]);
492 ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size);
459 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), 493 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
460 tbl->buckets[new_hash + new_tbl->size]); 494 tbl->buckets[new_hash + new_tbl->size]);
461 495
462 spin_unlock_bh(new_bucket_lock); 496 unlock_buckets(new_tbl, tbl, new_hash);
463 if (old_bucket_lock1 != old_bucket_lock2)
464 spin_unlock_bh(old_bucket_lock2);
465 spin_unlock_bh(old_bucket_lock1);
466 } 497 }
467 498
468 /* Publish the new, valid hash table */ 499 /* Publish the new, valid hash table */
@@ -524,6 +555,8 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
524 struct rhash_head *head = rht_dereference_bucket(tbl->buckets[hash], 555 struct rhash_head *head = rht_dereference_bucket(tbl->buckets[hash],
525 tbl, hash); 556 tbl, hash);
526 557
558 ASSERT_BUCKET_LOCK(ht, tbl, hash);
559
527 if (rht_is_a_nulls(head)) 560 if (rht_is_a_nulls(head))
528 INIT_RHT_NULLS_HEAD(obj->next, ht, hash); 561 INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
529 else 562 else
@@ -553,19 +586,18 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
553 */ 586 */
554void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) 587void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
555{ 588{
556 struct bucket_table *tbl; 589 struct bucket_table *tbl, *old_tbl;
557 spinlock_t *lock;
558 unsigned hash; 590 unsigned hash;
559 591
560 rcu_read_lock(); 592 rcu_read_lock();
561 593
562 tbl = rht_dereference_rcu(ht->future_tbl, ht); 594 tbl = rht_dereference_rcu(ht->future_tbl, ht);
595 old_tbl = rht_dereference_rcu(ht->tbl, ht);
563 hash = head_hashfn(ht, tbl, obj); 596 hash = head_hashfn(ht, tbl, obj);
564 lock = bucket_lock(tbl, hash);
565 597
566 spin_lock_bh(lock); 598 lock_buckets(tbl, old_tbl, hash);
567 __rhashtable_insert(ht, obj, tbl, hash); 599 __rhashtable_insert(ht, obj, tbl, hash);
568 spin_unlock_bh(lock); 600 unlock_buckets(tbl, old_tbl, hash);
569 601
570 rcu_read_unlock(); 602 rcu_read_unlock();
571} 603}
@@ -588,21 +620,20 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
588 */ 620 */
589bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) 621bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
590{ 622{
591 struct bucket_table *tbl; 623 struct bucket_table *tbl, *new_tbl, *old_tbl;
592 struct rhash_head __rcu **pprev; 624 struct rhash_head __rcu **pprev;
593 struct rhash_head *he; 625 struct rhash_head *he, *he2;
594 spinlock_t *lock; 626 unsigned int hash, new_hash;
595 unsigned int hash;
596 bool ret = false; 627 bool ret = false;
597 628
598 rcu_read_lock(); 629 rcu_read_lock();
599 tbl = rht_dereference_rcu(ht->tbl, ht); 630 tbl = old_tbl = rht_dereference_rcu(ht->tbl, ht);
600 hash = head_hashfn(ht, tbl, obj); 631 new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
601 632 new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
602 lock = bucket_lock(tbl, hash);
603 spin_lock_bh(lock);
604 633
634 lock_buckets(new_tbl, old_tbl, new_hash);
605restart: 635restart:
636 hash = rht_bucket_index(tbl, new_hash);
606 pprev = &tbl->buckets[hash]; 637 pprev = &tbl->buckets[hash];
607 rht_for_each(he, tbl, hash) { 638 rht_for_each(he, tbl, hash) {
608 if (he != obj) { 639 if (he != obj) {
@@ -610,8 +641,22 @@ restart:
610 continue; 641 continue;
611 } 642 }
612 643
613 rcu_assign_pointer(*pprev, obj->next); 644 ASSERT_BUCKET_LOCK(ht, tbl, hash);
645
646 if (unlikely(new_tbl != tbl)) {
647 rht_for_each_continue(he2, he->next, tbl, hash) {
648 if (head_hashfn(ht, tbl, he2) == hash) {
649 rcu_assign_pointer(*pprev, he2);
650 goto found;
651 }
652 }
653
654 INIT_RHT_NULLS_HEAD(*pprev, ht, hash);
655 } else {
656 rcu_assign_pointer(*pprev, obj->next);
657 }
614 658
659found:
615 ret = true; 660 ret = true;
616 break; 661 break;
617 } 662 }
@@ -621,18 +666,12 @@ restart:
621 * resizing. Thus traversing both is fine and the added cost is 666 * resizing. Thus traversing both is fine and the added cost is
622 * very rare. 667 * very rare.
623 */ 668 */
624 if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) { 669 if (tbl != new_tbl) {
625 spin_unlock_bh(lock); 670 tbl = new_tbl;
626
627 tbl = rht_dereference_rcu(ht->future_tbl, ht);
628 hash = head_hashfn(ht, tbl, obj);
629
630 lock = bucket_lock(tbl, hash);
631 spin_lock_bh(lock);
632 goto restart; 671 goto restart;
633 } 672 }
634 673
635 spin_unlock_bh(lock); 674 unlock_buckets(new_tbl, old_tbl, new_hash);
636 675
637 if (ret) { 676 if (ret) {
638 atomic_dec(&ht->nelems); 677 atomic_dec(&ht->nelems);
@@ -788,24 +827,17 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
788 void *arg) 827 void *arg)
789{ 828{
790 struct bucket_table *new_tbl, *old_tbl; 829 struct bucket_table *new_tbl, *old_tbl;
791 spinlock_t *new_bucket_lock, *old_bucket_lock; 830 u32 new_hash;
792 u32 new_hash, old_hash;
793 bool success = true; 831 bool success = true;
794 832
795 BUG_ON(!ht->p.key_len); 833 BUG_ON(!ht->p.key_len);
796 834
797 rcu_read_lock(); 835 rcu_read_lock();
798
799 old_tbl = rht_dereference_rcu(ht->tbl, ht); 836 old_tbl = rht_dereference_rcu(ht->tbl, ht);
800 old_hash = head_hashfn(ht, old_tbl, obj);
801 old_bucket_lock = bucket_lock(old_tbl, old_hash);
802 spin_lock_bh(old_bucket_lock);
803
804 new_tbl = rht_dereference_rcu(ht->future_tbl, ht); 837 new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
805 new_hash = head_hashfn(ht, new_tbl, obj); 838 new_hash = head_hashfn(ht, new_tbl, obj);
806 new_bucket_lock = bucket_lock(new_tbl, new_hash); 839
807 if (unlikely(old_tbl != new_tbl)) 840 lock_buckets(new_tbl, old_tbl, new_hash);
808 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
809 841
810 if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, 842 if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
811 compare, arg)) { 843 compare, arg)) {
@@ -816,10 +848,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
816 __rhashtable_insert(ht, obj, new_tbl, new_hash); 848 __rhashtable_insert(ht, obj, new_tbl, new_hash);
817 849
818exit: 850exit:
819 if (unlikely(old_tbl != new_tbl)) 851 unlock_buckets(new_tbl, old_tbl, new_hash);
820 spin_unlock_bh(new_bucket_lock);
821 spin_unlock_bh(old_bucket_lock);
822
823 rcu_read_unlock(); 852 rcu_read_unlock();
824 853
825 return success; 854 return success;