diff options
author | David S. Miller <davem@davemloft.net> | 2015-02-06 18:18:39 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-02-06 18:18:39 -0500 |
commit | 41e8b206f8df6daf90e132a73fc2c60f51a36c6b (patch) | |
tree | 709001bcb9373ca4d622f70d4c48095aea4d1a61 /lib | |
parent | 2ca292d968ef20cb04f31192d1f626bd8d782960 (diff) | |
parent | cf52d52f9ccb9966ac019d9f79824195583e3e6c (diff) |
Merge branch 'rhashtable-next'
Thomas Graf says:
====================
rhashtable fixes
This series fixes all remaining known issues with rhashtable that
have been reported. In particular the race condition reported by
Ying Xue.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/rhashtable.c | 305 |
1 files changed, 167 insertions, 138 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 057919164e23..5919d63f58e4 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Resizable, Scalable, Concurrent Hash Table | 2 | * Resizable, Scalable, Concurrent Hash Table |
3 | * | 3 | * |
4 | * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> | 4 | * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> |
5 | * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> | 5 | * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> |
6 | * | 6 | * |
7 | * Based on the following paper: | 7 | * Based on the following paper: |
@@ -34,12 +34,17 @@ | |||
34 | enum { | 34 | enum { |
35 | RHT_LOCK_NORMAL, | 35 | RHT_LOCK_NORMAL, |
36 | RHT_LOCK_NESTED, | 36 | RHT_LOCK_NESTED, |
37 | RHT_LOCK_NESTED2, | ||
38 | }; | 37 | }; |
39 | 38 | ||
40 | /* The bucket lock is selected based on the hash and protects mutations | 39 | /* The bucket lock is selected based on the hash and protects mutations |
41 | * on a group of hash buckets. | 40 | * on a group of hash buckets. |
42 | * | 41 | * |
42 | * A maximum of tbl->size/2 bucket locks is allocated. This ensures that | ||
43 | * a single lock always covers both buckets which may both contains | ||
44 | * entries which link to the same bucket of the old table during resizing. | ||
45 | * This allows to simplify the locking as locking the bucket in both | ||
46 | * tables during resize always guarantee protection. | ||
47 | * | ||
43 | * IMPORTANT: When holding the bucket lock of both the old and new table | 48 | * IMPORTANT: When holding the bucket lock of both the old and new table |
44 | * during expansions and shrinking, the old bucket lock must always be | 49 | * during expansions and shrinking, the old bucket lock must always be |
45 | * acquired first. | 50 | * acquired first. |
@@ -49,26 +54,6 @@ static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash) | |||
49 | return &tbl->locks[hash & tbl->locks_mask]; | 54 | return &tbl->locks[hash & tbl->locks_mask]; |
50 | } | 55 | } |
51 | 56 | ||
52 | #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) | ||
53 | #define ASSERT_BUCKET_LOCK(TBL, HASH) \ | ||
54 | BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH)) | ||
55 | |||
56 | #ifdef CONFIG_PROVE_LOCKING | ||
57 | int lockdep_rht_mutex_is_held(struct rhashtable *ht) | ||
58 | { | ||
59 | return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; | ||
60 | } | ||
61 | EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); | ||
62 | |||
63 | int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) | ||
64 | { | ||
65 | spinlock_t *lock = bucket_lock(tbl, hash); | ||
66 | |||
67 | return (debug_locks) ? lockdep_is_held(lock) : 1; | ||
68 | } | ||
69 | EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); | ||
70 | #endif | ||
71 | |||
72 | static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) | 57 | static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) |
73 | { | 58 | { |
74 | return (void *) he - ht->p.head_offset; | 59 | return (void *) he - ht->p.head_offset; |
@@ -94,13 +79,7 @@ static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr) | |||
94 | 79 | ||
95 | static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) | 80 | static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) |
96 | { | 81 | { |
97 | struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); | 82 | return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE; |
98 | u32 hash; | ||
99 | |||
100 | hash = ht->p.hashfn(key, len, ht->p.hash_rnd); | ||
101 | hash >>= HASH_RESERVED_SPACE; | ||
102 | |||
103 | return rht_bucket_index(tbl, hash); | ||
104 | } | 83 | } |
105 | 84 | ||
106 | static u32 head_hashfn(const struct rhashtable *ht, | 85 | static u32 head_hashfn(const struct rhashtable *ht, |
@@ -110,6 +89,77 @@ static u32 head_hashfn(const struct rhashtable *ht, | |||
110 | return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); | 89 | return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); |
111 | } | 90 | } |
112 | 91 | ||
92 | #ifdef CONFIG_PROVE_LOCKING | ||
93 | static void debug_dump_buckets(const struct rhashtable *ht, | ||
94 | const struct bucket_table *tbl) | ||
95 | { | ||
96 | struct rhash_head *he; | ||
97 | unsigned int i, hash; | ||
98 | |||
99 | for (i = 0; i < tbl->size; i++) { | ||
100 | pr_warn(" [Bucket %d] ", i); | ||
101 | rht_for_each_rcu(he, tbl, i) { | ||
102 | hash = head_hashfn(ht, tbl, he); | ||
103 | pr_cont("[hash = %#x, lock = %p] ", | ||
104 | hash, bucket_lock(tbl, hash)); | ||
105 | } | ||
106 | pr_cont("\n"); | ||
107 | } | ||
108 | |||
109 | } | ||
110 | |||
111 | static void debug_dump_table(struct rhashtable *ht, | ||
112 | const struct bucket_table *tbl, | ||
113 | unsigned int hash) | ||
114 | { | ||
115 | struct bucket_table *old_tbl, *future_tbl; | ||
116 | |||
117 | pr_emerg("BUG: lock for hash %#x in table %p not held\n", | ||
118 | hash, tbl); | ||
119 | |||
120 | rcu_read_lock(); | ||
121 | future_tbl = rht_dereference_rcu(ht->future_tbl, ht); | ||
122 | old_tbl = rht_dereference_rcu(ht->tbl, ht); | ||
123 | if (future_tbl != old_tbl) { | ||
124 | pr_warn("Future table %p (size: %zd)\n", | ||
125 | future_tbl, future_tbl->size); | ||
126 | debug_dump_buckets(ht, future_tbl); | ||
127 | } | ||
128 | |||
129 | pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size); | ||
130 | debug_dump_buckets(ht, old_tbl); | ||
131 | |||
132 | rcu_read_unlock(); | ||
133 | } | ||
134 | |||
135 | #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) | ||
136 | #define ASSERT_BUCKET_LOCK(HT, TBL, HASH) \ | ||
137 | do { \ | ||
138 | if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) { \ | ||
139 | debug_dump_table(HT, TBL, HASH); \ | ||
140 | BUG(); \ | ||
141 | } \ | ||
142 | } while (0) | ||
143 | |||
144 | int lockdep_rht_mutex_is_held(struct rhashtable *ht) | ||
145 | { | ||
146 | return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; | ||
147 | } | ||
148 | EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); | ||
149 | |||
150 | int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) | ||
151 | { | ||
152 | spinlock_t *lock = bucket_lock(tbl, hash); | ||
153 | |||
154 | return (debug_locks) ? lockdep_is_held(lock) : 1; | ||
155 | } | ||
156 | EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); | ||
157 | #else | ||
158 | #define ASSERT_RHT_MUTEX(HT) | ||
159 | #define ASSERT_BUCKET_LOCK(HT, TBL, HASH) | ||
160 | #endif | ||
161 | |||
162 | |||
113 | static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n) | 163 | static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n) |
114 | { | 164 | { |
115 | struct rhash_head __rcu **pprev; | 165 | struct rhash_head __rcu **pprev; |
@@ -134,8 +184,8 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) | |||
134 | nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); | 184 | nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); |
135 | size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); | 185 | size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); |
136 | 186 | ||
137 | /* Never allocate more than one lock per bucket */ | 187 | /* Never allocate more than 0.5 locks per bucket */ |
138 | size = min_t(unsigned int, size, tbl->size); | 188 | size = min_t(unsigned int, size, tbl->size >> 1); |
139 | 189 | ||
140 | if (sizeof(spinlock_t) != 0) { | 190 | if (sizeof(spinlock_t) != 0) { |
141 | #ifdef CONFIG_NUMA | 191 | #ifdef CONFIG_NUMA |
@@ -217,25 +267,48 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) | |||
217 | } | 267 | } |
218 | EXPORT_SYMBOL_GPL(rht_shrink_below_30); | 268 | EXPORT_SYMBOL_GPL(rht_shrink_below_30); |
219 | 269 | ||
220 | static void hashtable_chain_unzip(const struct rhashtable *ht, | 270 | static void lock_buckets(struct bucket_table *new_tbl, |
271 | struct bucket_table *old_tbl, unsigned int hash) | ||
272 | __acquires(old_bucket_lock) | ||
273 | { | ||
274 | spin_lock_bh(bucket_lock(old_tbl, hash)); | ||
275 | if (new_tbl != old_tbl) | ||
276 | spin_lock_bh_nested(bucket_lock(new_tbl, hash), | ||
277 | RHT_LOCK_NESTED); | ||
278 | } | ||
279 | |||
280 | static void unlock_buckets(struct bucket_table *new_tbl, | ||
281 | struct bucket_table *old_tbl, unsigned int hash) | ||
282 | __releases(old_bucket_lock) | ||
283 | { | ||
284 | if (new_tbl != old_tbl) | ||
285 | spin_unlock_bh(bucket_lock(new_tbl, hash)); | ||
286 | spin_unlock_bh(bucket_lock(old_tbl, hash)); | ||
287 | } | ||
288 | |||
289 | /** | ||
290 | * Unlink entries on bucket which hash to different bucket. | ||
291 | * | ||
292 | * Returns true if no more work needs to be performed on the bucket. | ||
293 | */ | ||
294 | static bool hashtable_chain_unzip(struct rhashtable *ht, | ||
221 | const struct bucket_table *new_tbl, | 295 | const struct bucket_table *new_tbl, |
222 | struct bucket_table *old_tbl, | 296 | struct bucket_table *old_tbl, |
223 | size_t old_hash) | 297 | size_t old_hash) |
224 | { | 298 | { |
225 | struct rhash_head *he, *p, *next; | 299 | struct rhash_head *he, *p, *next; |
226 | spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL; | ||
227 | unsigned int new_hash, new_hash2; | 300 | unsigned int new_hash, new_hash2; |
228 | 301 | ||
229 | ASSERT_BUCKET_LOCK(old_tbl, old_hash); | 302 | ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash); |
230 | 303 | ||
231 | /* Old bucket empty, no work needed. */ | 304 | /* Old bucket empty, no work needed. */ |
232 | p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, | 305 | p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, |
233 | old_hash); | 306 | old_hash); |
234 | if (rht_is_a_nulls(p)) | 307 | if (rht_is_a_nulls(p)) |
235 | return; | 308 | return false; |
236 | 309 | ||
237 | new_hash = new_hash2 = head_hashfn(ht, new_tbl, p); | 310 | new_hash = head_hashfn(ht, new_tbl, p); |
238 | new_bucket_lock = bucket_lock(new_tbl, new_hash); | 311 | ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); |
239 | 312 | ||
240 | /* Advance the old bucket pointer one or more times until it | 313 | /* Advance the old bucket pointer one or more times until it |
241 | * reaches a node that doesn't hash to the same bucket as the | 314 | * reaches a node that doesn't hash to the same bucket as the |
@@ -243,22 +316,14 @@ static void hashtable_chain_unzip(const struct rhashtable *ht, | |||
243 | */ | 316 | */ |
244 | rht_for_each_continue(he, p->next, old_tbl, old_hash) { | 317 | rht_for_each_continue(he, p->next, old_tbl, old_hash) { |
245 | new_hash2 = head_hashfn(ht, new_tbl, he); | 318 | new_hash2 = head_hashfn(ht, new_tbl, he); |
319 | ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2); | ||
320 | |||
246 | if (new_hash != new_hash2) | 321 | if (new_hash != new_hash2) |
247 | break; | 322 | break; |
248 | p = he; | 323 | p = he; |
249 | } | 324 | } |
250 | rcu_assign_pointer(old_tbl->buckets[old_hash], p->next); | 325 | rcu_assign_pointer(old_tbl->buckets[old_hash], p->next); |
251 | 326 | ||
252 | spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); | ||
253 | |||
254 | /* If we have encountered an entry that maps to a different bucket in | ||
255 | * the new table, lock down that bucket as well as we might cut off | ||
256 | * the end of the chain. | ||
257 | */ | ||
258 | new_bucket_lock2 = bucket_lock(new_tbl, new_hash); | ||
259 | if (new_bucket_lock != new_bucket_lock2) | ||
260 | spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2); | ||
261 | |||
262 | /* Find the subsequent node which does hash to the same | 327 | /* Find the subsequent node which does hash to the same |
263 | * bucket as node P, or NULL if no such node exists. | 328 | * bucket as node P, or NULL if no such node exists. |
264 | */ | 329 | */ |
@@ -277,21 +342,18 @@ static void hashtable_chain_unzip(const struct rhashtable *ht, | |||
277 | */ | 342 | */ |
278 | rcu_assign_pointer(p->next, next); | 343 | rcu_assign_pointer(p->next, next); |
279 | 344 | ||
280 | if (new_bucket_lock != new_bucket_lock2) | 345 | p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, |
281 | spin_unlock_bh(new_bucket_lock2); | 346 | old_hash); |
282 | spin_unlock_bh(new_bucket_lock); | 347 | |
348 | return !rht_is_a_nulls(p); | ||
283 | } | 349 | } |
284 | 350 | ||
285 | static void link_old_to_new(struct bucket_table *new_tbl, | 351 | static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl, |
286 | unsigned int new_hash, struct rhash_head *entry) | 352 | unsigned int new_hash, struct rhash_head *entry) |
287 | { | 353 | { |
288 | spinlock_t *new_bucket_lock; | 354 | ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); |
289 | 355 | ||
290 | new_bucket_lock = bucket_lock(new_tbl, new_hash); | ||
291 | |||
292 | spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); | ||
293 | rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry); | 356 | rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry); |
294 | spin_unlock_bh(new_bucket_lock); | ||
295 | } | 357 | } |
296 | 358 | ||
297 | /** | 359 | /** |
@@ -314,7 +376,6 @@ int rhashtable_expand(struct rhashtable *ht) | |||
314 | { | 376 | { |
315 | struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); | 377 | struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); |
316 | struct rhash_head *he; | 378 | struct rhash_head *he; |
317 | spinlock_t *old_bucket_lock; | ||
318 | unsigned int new_hash, old_hash; | 379 | unsigned int new_hash, old_hash; |
319 | bool complete = false; | 380 | bool complete = false; |
320 | 381 | ||
@@ -344,24 +405,16 @@ int rhashtable_expand(struct rhashtable *ht) | |||
344 | */ | 405 | */ |
345 | for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { | 406 | for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { |
346 | old_hash = rht_bucket_index(old_tbl, new_hash); | 407 | old_hash = rht_bucket_index(old_tbl, new_hash); |
347 | old_bucket_lock = bucket_lock(old_tbl, old_hash); | 408 | lock_buckets(new_tbl, old_tbl, new_hash); |
348 | |||
349 | spin_lock_bh(old_bucket_lock); | ||
350 | rht_for_each(he, old_tbl, old_hash) { | 409 | rht_for_each(he, old_tbl, old_hash) { |
351 | if (head_hashfn(ht, new_tbl, he) == new_hash) { | 410 | if (head_hashfn(ht, new_tbl, he) == new_hash) { |
352 | link_old_to_new(new_tbl, new_hash, he); | 411 | link_old_to_new(ht, new_tbl, new_hash, he); |
353 | break; | 412 | break; |
354 | } | 413 | } |
355 | } | 414 | } |
356 | spin_unlock_bh(old_bucket_lock); | 415 | unlock_buckets(new_tbl, old_tbl, new_hash); |
357 | } | 416 | } |
358 | 417 | ||
359 | /* Publish the new table pointer. Lookups may now traverse | ||
360 | * the new table, but they will not benefit from any | ||
361 | * additional efficiency until later steps unzip the buckets. | ||
362 | */ | ||
363 | rcu_assign_pointer(ht->tbl, new_tbl); | ||
364 | |||
365 | /* Unzip interleaved hash chains */ | 418 | /* Unzip interleaved hash chains */ |
366 | while (!complete && !ht->being_destroyed) { | 419 | while (!complete && !ht->being_destroyed) { |
367 | /* Wait for readers. All new readers will see the new | 420 | /* Wait for readers. All new readers will see the new |
@@ -376,21 +429,19 @@ int rhashtable_expand(struct rhashtable *ht) | |||
376 | */ | 429 | */ |
377 | complete = true; | 430 | complete = true; |
378 | for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { | 431 | for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { |
379 | struct rhash_head *head; | 432 | lock_buckets(new_tbl, old_tbl, old_hash); |
380 | |||
381 | old_bucket_lock = bucket_lock(old_tbl, old_hash); | ||
382 | spin_lock_bh(old_bucket_lock); | ||
383 | 433 | ||
384 | hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash); | 434 | if (hashtable_chain_unzip(ht, new_tbl, old_tbl, |
385 | head = rht_dereference_bucket(old_tbl->buckets[old_hash], | 435 | old_hash)) |
386 | old_tbl, old_hash); | ||
387 | if (!rht_is_a_nulls(head)) | ||
388 | complete = false; | 436 | complete = false; |
389 | 437 | ||
390 | spin_unlock_bh(old_bucket_lock); | 438 | unlock_buckets(new_tbl, old_tbl, old_hash); |
391 | } | 439 | } |
392 | } | 440 | } |
393 | 441 | ||
442 | rcu_assign_pointer(ht->tbl, new_tbl); | ||
443 | synchronize_rcu(); | ||
444 | |||
394 | bucket_table_free(old_tbl); | 445 | bucket_table_free(old_tbl); |
395 | return 0; | 446 | return 0; |
396 | } | 447 | } |
@@ -415,7 +466,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); | |||
415 | int rhashtable_shrink(struct rhashtable *ht) | 466 | int rhashtable_shrink(struct rhashtable *ht) |
416 | { | 467 | { |
417 | struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); | 468 | struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); |
418 | spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2; | ||
419 | unsigned int new_hash; | 469 | unsigned int new_hash; |
420 | 470 | ||
421 | ASSERT_RHT_MUTEX(ht); | 471 | ASSERT_RHT_MUTEX(ht); |
@@ -433,36 +483,17 @@ int rhashtable_shrink(struct rhashtable *ht) | |||
433 | * always divide the size in half when shrinking, each bucket | 483 | * always divide the size in half when shrinking, each bucket |
434 | * in the new table maps to exactly two buckets in the old | 484 | * in the new table maps to exactly two buckets in the old |
435 | * table. | 485 | * table. |
436 | * | ||
437 | * As removals can occur concurrently on the old table, we need | ||
438 | * to lock down both matching buckets in the old table. | ||
439 | */ | 486 | */ |
440 | for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { | 487 | for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { |
441 | old_bucket_lock1 = bucket_lock(tbl, new_hash); | 488 | lock_buckets(new_tbl, tbl, new_hash); |
442 | old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size); | ||
443 | new_bucket_lock = bucket_lock(new_tbl, new_hash); | ||
444 | |||
445 | spin_lock_bh(old_bucket_lock1); | ||
446 | |||
447 | /* Depending on the lock per buckets mapping, the bucket in | ||
448 | * the lower and upper region may map to the same lock. | ||
449 | */ | ||
450 | if (old_bucket_lock1 != old_bucket_lock2) { | ||
451 | spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED); | ||
452 | spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2); | ||
453 | } else { | ||
454 | spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); | ||
455 | } | ||
456 | 489 | ||
457 | rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), | 490 | rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), |
458 | tbl->buckets[new_hash]); | 491 | tbl->buckets[new_hash]); |
492 | ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size); | ||
459 | rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), | 493 | rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), |
460 | tbl->buckets[new_hash + new_tbl->size]); | 494 | tbl->buckets[new_hash + new_tbl->size]); |
461 | 495 | ||
462 | spin_unlock_bh(new_bucket_lock); | 496 | unlock_buckets(new_tbl, tbl, new_hash); |
463 | if (old_bucket_lock1 != old_bucket_lock2) | ||
464 | spin_unlock_bh(old_bucket_lock2); | ||
465 | spin_unlock_bh(old_bucket_lock1); | ||
466 | } | 497 | } |
467 | 498 | ||
468 | /* Publish the new, valid hash table */ | 499 | /* Publish the new, valid hash table */ |
@@ -524,6 +555,8 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, | |||
524 | struct rhash_head *head = rht_dereference_bucket(tbl->buckets[hash], | 555 | struct rhash_head *head = rht_dereference_bucket(tbl->buckets[hash], |
525 | tbl, hash); | 556 | tbl, hash); |
526 | 557 | ||
558 | ASSERT_BUCKET_LOCK(ht, tbl, hash); | ||
559 | |||
527 | if (rht_is_a_nulls(head)) | 560 | if (rht_is_a_nulls(head)) |
528 | INIT_RHT_NULLS_HEAD(obj->next, ht, hash); | 561 | INIT_RHT_NULLS_HEAD(obj->next, ht, hash); |
529 | else | 562 | else |
@@ -553,19 +586,18 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, | |||
553 | */ | 586 | */ |
554 | void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) | 587 | void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) |
555 | { | 588 | { |
556 | struct bucket_table *tbl; | 589 | struct bucket_table *tbl, *old_tbl; |
557 | spinlock_t *lock; | ||
558 | unsigned hash; | 590 | unsigned hash; |
559 | 591 | ||
560 | rcu_read_lock(); | 592 | rcu_read_lock(); |
561 | 593 | ||
562 | tbl = rht_dereference_rcu(ht->future_tbl, ht); | 594 | tbl = rht_dereference_rcu(ht->future_tbl, ht); |
595 | old_tbl = rht_dereference_rcu(ht->tbl, ht); | ||
563 | hash = head_hashfn(ht, tbl, obj); | 596 | hash = head_hashfn(ht, tbl, obj); |
564 | lock = bucket_lock(tbl, hash); | ||
565 | 597 | ||
566 | spin_lock_bh(lock); | 598 | lock_buckets(tbl, old_tbl, hash); |
567 | __rhashtable_insert(ht, obj, tbl, hash); | 599 | __rhashtable_insert(ht, obj, tbl, hash); |
568 | spin_unlock_bh(lock); | 600 | unlock_buckets(tbl, old_tbl, hash); |
569 | 601 | ||
570 | rcu_read_unlock(); | 602 | rcu_read_unlock(); |
571 | } | 603 | } |
@@ -588,21 +620,20 @@ EXPORT_SYMBOL_GPL(rhashtable_insert); | |||
588 | */ | 620 | */ |
589 | bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) | 621 | bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) |
590 | { | 622 | { |
591 | struct bucket_table *tbl; | 623 | struct bucket_table *tbl, *new_tbl, *old_tbl; |
592 | struct rhash_head __rcu **pprev; | 624 | struct rhash_head __rcu **pprev; |
593 | struct rhash_head *he; | 625 | struct rhash_head *he, *he2; |
594 | spinlock_t *lock; | 626 | unsigned int hash, new_hash; |
595 | unsigned int hash; | ||
596 | bool ret = false; | 627 | bool ret = false; |
597 | 628 | ||
598 | rcu_read_lock(); | 629 | rcu_read_lock(); |
599 | tbl = rht_dereference_rcu(ht->tbl, ht); | 630 | tbl = old_tbl = rht_dereference_rcu(ht->tbl, ht); |
600 | hash = head_hashfn(ht, tbl, obj); | 631 | new_tbl = rht_dereference_rcu(ht->future_tbl, ht); |
601 | 632 | new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); | |
602 | lock = bucket_lock(tbl, hash); | ||
603 | spin_lock_bh(lock); | ||
604 | 633 | ||
634 | lock_buckets(new_tbl, old_tbl, new_hash); | ||
605 | restart: | 635 | restart: |
636 | hash = rht_bucket_index(tbl, new_hash); | ||
606 | pprev = &tbl->buckets[hash]; | 637 | pprev = &tbl->buckets[hash]; |
607 | rht_for_each(he, tbl, hash) { | 638 | rht_for_each(he, tbl, hash) { |
608 | if (he != obj) { | 639 | if (he != obj) { |
@@ -610,8 +641,22 @@ restart: | |||
610 | continue; | 641 | continue; |
611 | } | 642 | } |
612 | 643 | ||
613 | rcu_assign_pointer(*pprev, obj->next); | 644 | ASSERT_BUCKET_LOCK(ht, tbl, hash); |
645 | |||
646 | if (unlikely(new_tbl != tbl)) { | ||
647 | rht_for_each_continue(he2, he->next, tbl, hash) { | ||
648 | if (head_hashfn(ht, tbl, he2) == hash) { | ||
649 | rcu_assign_pointer(*pprev, he2); | ||
650 | goto found; | ||
651 | } | ||
652 | } | ||
653 | |||
654 | INIT_RHT_NULLS_HEAD(*pprev, ht, hash); | ||
655 | } else { | ||
656 | rcu_assign_pointer(*pprev, obj->next); | ||
657 | } | ||
614 | 658 | ||
659 | found: | ||
615 | ret = true; | 660 | ret = true; |
616 | break; | 661 | break; |
617 | } | 662 | } |
@@ -621,18 +666,12 @@ restart: | |||
621 | * resizing. Thus traversing both is fine and the added cost is | 666 | * resizing. Thus traversing both is fine and the added cost is |
622 | * very rare. | 667 | * very rare. |
623 | */ | 668 | */ |
624 | if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) { | 669 | if (tbl != new_tbl) { |
625 | spin_unlock_bh(lock); | 670 | tbl = new_tbl; |
626 | |||
627 | tbl = rht_dereference_rcu(ht->future_tbl, ht); | ||
628 | hash = head_hashfn(ht, tbl, obj); | ||
629 | |||
630 | lock = bucket_lock(tbl, hash); | ||
631 | spin_lock_bh(lock); | ||
632 | goto restart; | 671 | goto restart; |
633 | } | 672 | } |
634 | 673 | ||
635 | spin_unlock_bh(lock); | 674 | unlock_buckets(new_tbl, old_tbl, new_hash); |
636 | 675 | ||
637 | if (ret) { | 676 | if (ret) { |
638 | atomic_dec(&ht->nelems); | 677 | atomic_dec(&ht->nelems); |
@@ -788,24 +827,17 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht, | |||
788 | void *arg) | 827 | void *arg) |
789 | { | 828 | { |
790 | struct bucket_table *new_tbl, *old_tbl; | 829 | struct bucket_table *new_tbl, *old_tbl; |
791 | spinlock_t *new_bucket_lock, *old_bucket_lock; | 830 | u32 new_hash; |
792 | u32 new_hash, old_hash; | ||
793 | bool success = true; | 831 | bool success = true; |
794 | 832 | ||
795 | BUG_ON(!ht->p.key_len); | 833 | BUG_ON(!ht->p.key_len); |
796 | 834 | ||
797 | rcu_read_lock(); | 835 | rcu_read_lock(); |
798 | |||
799 | old_tbl = rht_dereference_rcu(ht->tbl, ht); | 836 | old_tbl = rht_dereference_rcu(ht->tbl, ht); |
800 | old_hash = head_hashfn(ht, old_tbl, obj); | ||
801 | old_bucket_lock = bucket_lock(old_tbl, old_hash); | ||
802 | spin_lock_bh(old_bucket_lock); | ||
803 | |||
804 | new_tbl = rht_dereference_rcu(ht->future_tbl, ht); | 837 | new_tbl = rht_dereference_rcu(ht->future_tbl, ht); |
805 | new_hash = head_hashfn(ht, new_tbl, obj); | 838 | new_hash = head_hashfn(ht, new_tbl, obj); |
806 | new_bucket_lock = bucket_lock(new_tbl, new_hash); | 839 | |
807 | if (unlikely(old_tbl != new_tbl)) | 840 | lock_buckets(new_tbl, old_tbl, new_hash); |
808 | spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); | ||
809 | 841 | ||
810 | if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, | 842 | if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, |
811 | compare, arg)) { | 843 | compare, arg)) { |
@@ -816,10 +848,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht, | |||
816 | __rhashtable_insert(ht, obj, new_tbl, new_hash); | 848 | __rhashtable_insert(ht, obj, new_tbl, new_hash); |
817 | 849 | ||
818 | exit: | 850 | exit: |
819 | if (unlikely(old_tbl != new_tbl)) | 851 | unlock_buckets(new_tbl, old_tbl, new_hash); |
820 | spin_unlock_bh(new_bucket_lock); | ||
821 | spin_unlock_bh(old_bucket_lock); | ||
822 | |||
823 | rcu_read_unlock(); | 852 | rcu_read_unlock(); |
824 | 853 | ||
825 | return success; | 854 | return success; |