diff options
Diffstat (limited to 'lib/rhashtable.c')
-rw-r--r-- | lib/rhashtable.c | 163 |
1 files changed, 50 insertions, 113 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e0a9d59f80d6..d1d23fb58525 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c | |||
@@ -1,13 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * Resizable, Scalable, Concurrent Hash Table | 2 | * Resizable, Scalable, Concurrent Hash Table |
3 | * | 3 | * |
4 | * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> | ||
4 | * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> | 5 | * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> |
5 | * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> | 6 | * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> |
6 | * | 7 | * |
7 | * Based on the following paper: | ||
8 | * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf | ||
9 | * | ||
10 | * Code partially derived from nft_hash | 8 | * Code partially derived from nft_hash |
9 | * Rewritten with rehash code from br_multicast plus single list | ||
10 | * pointer as suggested by Josh Triplett | ||
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or modify | 12 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License version 2 as | 13 | * it under the terms of the GNU General Public License version 2 as |
@@ -30,53 +30,11 @@ | |||
30 | #define HASH_MIN_SIZE 4U | 30 | #define HASH_MIN_SIZE 4U |
31 | #define BUCKET_LOCKS_PER_CPU 128UL | 31 | #define BUCKET_LOCKS_PER_CPU 128UL |
32 | 32 | ||
33 | /* Base bits plus 1 bit for nulls marker */ | ||
34 | #define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) | ||
35 | |||
36 | /* The bucket lock is selected based on the hash and protects mutations | ||
37 | * on a group of hash buckets. | ||
38 | * | ||
39 | * A maximum of tbl->size/2 bucket locks is allocated. This ensures that | ||
40 | * a single lock always covers both buckets which may both contains | ||
41 | * entries which link to the same bucket of the old table during resizing. | ||
42 | * This allows to simplify the locking as locking the bucket in both | ||
43 | * tables during resize always guarantee protection. | ||
44 | * | ||
45 | * IMPORTANT: When holding the bucket lock of both the old and new table | ||
46 | * during expansions and shrinking, the old bucket lock must always be | ||
47 | * acquired first. | ||
48 | */ | ||
49 | static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash) | ||
50 | { | ||
51 | return &tbl->locks[hash & tbl->locks_mask]; | ||
52 | } | ||
53 | |||
54 | static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) | ||
55 | { | ||
56 | return (void *) he - ht->p.head_offset; | ||
57 | } | ||
58 | |||
59 | static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) | ||
60 | { | ||
61 | return (hash >> HASH_RESERVED_SPACE) & (tbl->size - 1); | ||
62 | } | ||
63 | |||
64 | static u32 key_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, | ||
65 | const void *key) | ||
66 | { | ||
67 | return rht_bucket_index(tbl, ht->p.hashfn(key, ht->p.key_len, | ||
68 | tbl->hash_rnd)); | ||
69 | } | ||
70 | |||
71 | static u32 head_hashfn(struct rhashtable *ht, | 33 | static u32 head_hashfn(struct rhashtable *ht, |
72 | const struct bucket_table *tbl, | 34 | const struct bucket_table *tbl, |
73 | const struct rhash_head *he) | 35 | const struct rhash_head *he) |
74 | { | 36 | { |
75 | const char *ptr = rht_obj(ht, he); | 37 | return rht_head_hashfn(ht, tbl, he, ht->p); |
76 | |||
77 | return likely(ht->p.key_len) ? | ||
78 | key_hashfn(ht, tbl, ptr + ht->p.key_offset) : | ||
79 | rht_bucket_index(tbl, ht->p.obj_hashfn(ptr, tbl->hash_rnd)); | ||
80 | } | 38 | } |
81 | 39 | ||
82 | #ifdef CONFIG_PROVE_LOCKING | 40 | #ifdef CONFIG_PROVE_LOCKING |
@@ -90,7 +48,7 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); | |||
90 | 48 | ||
91 | int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) | 49 | int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) |
92 | { | 50 | { |
93 | spinlock_t *lock = bucket_lock(tbl, hash); | 51 | spinlock_t *lock = rht_bucket_lock(tbl, hash); |
94 | 52 | ||
95 | return (debug_locks) ? lockdep_is_held(lock) : 1; | 53 | return (debug_locks) ? lockdep_is_held(lock) : 1; |
96 | } | 54 | } |
@@ -178,32 +136,6 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, | |||
178 | return tbl; | 136 | return tbl; |
179 | } | 137 | } |
180 | 138 | ||
181 | /** | ||
182 | * rht_grow_above_75 - returns true if nelems > 0.75 * table-size | ||
183 | * @ht: hash table | ||
184 | * @tbl: current table | ||
185 | */ | ||
186 | static bool rht_grow_above_75(const struct rhashtable *ht, | ||
187 | const struct bucket_table *tbl) | ||
188 | { | ||
189 | /* Expand table when exceeding 75% load */ | ||
190 | return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && | ||
191 | (!ht->p.max_size || tbl->size < ht->p.max_size); | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size | ||
196 | * @ht: hash table | ||
197 | * @tbl: current table | ||
198 | */ | ||
199 | static bool rht_shrink_below_30(const struct rhashtable *ht, | ||
200 | const struct bucket_table *tbl) | ||
201 | { | ||
202 | /* Shrink table beneath 30% load */ | ||
203 | return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && | ||
204 | tbl->size > ht->p.min_size; | ||
205 | } | ||
206 | |||
207 | static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) | 139 | static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) |
208 | { | 140 | { |
209 | struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); | 141 | struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); |
@@ -230,7 +162,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) | |||
230 | 162 | ||
231 | new_hash = head_hashfn(ht, new_tbl, entry); | 163 | new_hash = head_hashfn(ht, new_tbl, entry); |
232 | 164 | ||
233 | new_bucket_lock = bucket_lock(new_tbl, new_hash); | 165 | new_bucket_lock = rht_bucket_lock(new_tbl, new_hash); |
234 | 166 | ||
235 | spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); | 167 | spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); |
236 | head = rht_dereference_bucket(new_tbl->buckets[new_hash], | 168 | head = rht_dereference_bucket(new_tbl->buckets[new_hash], |
@@ -255,7 +187,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) | |||
255 | struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); | 187 | struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); |
256 | spinlock_t *old_bucket_lock; | 188 | spinlock_t *old_bucket_lock; |
257 | 189 | ||
258 | old_bucket_lock = bucket_lock(old_tbl, old_hash); | 190 | old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); |
259 | 191 | ||
260 | spin_lock_bh(old_bucket_lock); | 192 | spin_lock_bh(old_bucket_lock); |
261 | while (!rhashtable_rehash_one(ht, old_hash)) | 193 | while (!rhashtable_rehash_one(ht, old_hash)) |
@@ -376,6 +308,37 @@ unlock: | |||
376 | mutex_unlock(&ht->mutex); | 308 | mutex_unlock(&ht->mutex); |
377 | } | 309 | } |
378 | 310 | ||
311 | int rhashtable_insert_slow(struct rhashtable *ht, const void *key, | ||
312 | struct rhash_head *obj, | ||
313 | struct bucket_table *tbl) | ||
314 | { | ||
315 | struct rhash_head *head; | ||
316 | unsigned hash; | ||
317 | int err = -EEXIST; | ||
318 | |||
319 | hash = head_hashfn(ht, tbl, obj); | ||
320 | spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); | ||
321 | |||
322 | if (key && rhashtable_lookup_fast(ht, key, ht->p)) | ||
323 | goto exit; | ||
324 | |||
325 | err = 0; | ||
326 | |||
327 | head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); | ||
328 | |||
329 | RCU_INIT_POINTER(obj->next, head); | ||
330 | |||
331 | rcu_assign_pointer(tbl->buckets[hash], obj); | ||
332 | |||
333 | atomic_inc(&ht->nelems); | ||
334 | |||
335 | exit: | ||
336 | spin_unlock(rht_bucket_lock(tbl, hash)); | ||
337 | |||
338 | return err; | ||
339 | } | ||
340 | EXPORT_SYMBOL_GPL(rhashtable_insert_slow); | ||
341 | |||
379 | static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, | 342 | static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, |
380 | bool (*compare)(void *, void *), void *arg) | 343 | bool (*compare)(void *, void *), void *arg) |
381 | { | 344 | { |
@@ -390,7 +353,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, | |||
390 | 353 | ||
391 | old_tbl = rht_dereference_rcu(ht->tbl, ht); | 354 | old_tbl = rht_dereference_rcu(ht->tbl, ht); |
392 | hash = head_hashfn(ht, old_tbl, obj); | 355 | hash = head_hashfn(ht, old_tbl, obj); |
393 | old_lock = bucket_lock(old_tbl, hash); | 356 | old_lock = rht_bucket_lock(old_tbl, hash); |
394 | 357 | ||
395 | spin_lock_bh(old_lock); | 358 | spin_lock_bh(old_lock); |
396 | 359 | ||
@@ -403,7 +366,8 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, | |||
403 | tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; | 366 | tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; |
404 | if (tbl != old_tbl) { | 367 | if (tbl != old_tbl) { |
405 | hash = head_hashfn(ht, tbl, obj); | 368 | hash = head_hashfn(ht, tbl, obj); |
406 | spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); | 369 | spin_lock_nested(rht_bucket_lock(tbl, hash), |
370 | SINGLE_DEPTH_NESTING); | ||
407 | } | 371 | } |
408 | 372 | ||
409 | if (compare && | 373 | if (compare && |
@@ -430,7 +394,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, | |||
430 | 394 | ||
431 | exit: | 395 | exit: |
432 | if (tbl != old_tbl) | 396 | if (tbl != old_tbl) |
433 | spin_unlock(bucket_lock(tbl, hash)); | 397 | spin_unlock(rht_bucket_lock(tbl, hash)); |
434 | 398 | ||
435 | spin_unlock_bh(old_lock); | 399 | spin_unlock_bh(old_lock); |
436 | 400 | ||
@@ -471,7 +435,7 @@ static bool __rhashtable_remove(struct rhashtable *ht, | |||
471 | bool ret = false; | 435 | bool ret = false; |
472 | 436 | ||
473 | hash = head_hashfn(ht, tbl, obj); | 437 | hash = head_hashfn(ht, tbl, obj); |
474 | lock = bucket_lock(tbl, hash); | 438 | lock = rht_bucket_lock(tbl, hash); |
475 | 439 | ||
476 | spin_lock_bh(lock); | 440 | spin_lock_bh(lock); |
477 | 441 | ||
@@ -537,19 +501,6 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) | |||
537 | } | 501 | } |
538 | EXPORT_SYMBOL_GPL(rhashtable_remove); | 502 | EXPORT_SYMBOL_GPL(rhashtable_remove); |
539 | 503 | ||
540 | struct rhashtable_compare_arg { | ||
541 | struct rhashtable *ht; | ||
542 | const void *key; | ||
543 | }; | ||
544 | |||
545 | static bool rhashtable_compare(void *ptr, void *arg) | ||
546 | { | ||
547 | struct rhashtable_compare_arg *x = arg; | ||
548 | struct rhashtable *ht = x->ht; | ||
549 | |||
550 | return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len); | ||
551 | } | ||
552 | |||
553 | /** | 504 | /** |
554 | * rhashtable_lookup - lookup key in hash table | 505 | * rhashtable_lookup - lookup key in hash table |
555 | * @ht: hash table | 506 | * @ht: hash table |
@@ -565,14 +516,7 @@ static bool rhashtable_compare(void *ptr, void *arg) | |||
565 | */ | 516 | */ |
566 | void *rhashtable_lookup(struct rhashtable *ht, const void *key) | 517 | void *rhashtable_lookup(struct rhashtable *ht, const void *key) |
567 | { | 518 | { |
568 | struct rhashtable_compare_arg arg = { | 519 | return rhashtable_lookup_fast(ht, key, ht->p); |
569 | .ht = ht, | ||
570 | .key = key, | ||
571 | }; | ||
572 | |||
573 | BUG_ON(!ht->p.key_len); | ||
574 | |||
575 | return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg); | ||
576 | } | 520 | } |
577 | EXPORT_SYMBOL_GPL(rhashtable_lookup); | 521 | EXPORT_SYMBOL_GPL(rhashtable_lookup); |
578 | 522 | ||
@@ -591,7 +535,8 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup); | |||
591 | * Returns the first entry on which the compare function returned true. | 535 | * Returns the first entry on which the compare function returned true. |
592 | */ | 536 | */ |
593 | void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, | 537 | void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, |
594 | bool (*compare)(void *, void *), void *arg) | 538 | bool (*compare)(void *, void *), |
539 | void *arg) | ||
595 | { | 540 | { |
596 | const struct bucket_table *tbl; | 541 | const struct bucket_table *tbl; |
597 | struct rhash_head *he; | 542 | struct rhash_head *he; |
@@ -601,7 +546,7 @@ void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, | |||
601 | 546 | ||
602 | tbl = rht_dereference_rcu(ht->tbl, ht); | 547 | tbl = rht_dereference_rcu(ht->tbl, ht); |
603 | restart: | 548 | restart: |
604 | hash = key_hashfn(ht, tbl, key); | 549 | hash = rht_key_hashfn(ht, tbl, key, ht->p); |
605 | rht_for_each_rcu(he, tbl, hash) { | 550 | rht_for_each_rcu(he, tbl, hash) { |
606 | if (!compare(rht_obj(ht, he), arg)) | 551 | if (!compare(rht_obj(ht, he), arg)) |
607 | continue; | 552 | continue; |
@@ -643,15 +588,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); | |||
643 | */ | 588 | */ |
644 | bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) | 589 | bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) |
645 | { | 590 | { |
646 | struct rhashtable_compare_arg arg = { | 591 | return rhashtable_lookup_insert_fast(ht, obj, ht->p); |
647 | .ht = ht, | ||
648 | .key = rht_obj(ht, obj) + ht->p.key_offset, | ||
649 | }; | ||
650 | |||
651 | BUG_ON(!ht->p.key_len); | ||
652 | |||
653 | return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, | ||
654 | &arg); | ||
655 | } | 592 | } |
656 | EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); | 593 | EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); |
657 | 594 | ||
@@ -927,8 +864,8 @@ int rhashtable_init(struct rhashtable *ht, | |||
927 | 864 | ||
928 | size = HASH_DEFAULT_SIZE; | 865 | size = HASH_DEFAULT_SIZE; |
929 | 866 | ||
930 | if ((params->key_len && !params->hashfn) || | 867 | if ((!(params->key_len && params->hashfn) && !params->obj_hashfn) || |
931 | (!params->key_len && !params->obj_hashfn)) | 868 | (params->obj_hashfn && !params->obj_cmpfn)) |
932 | return -EINVAL; | 869 | return -EINVAL; |
933 | 870 | ||
934 | if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) | 871 | if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) |