aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-24 17:53:06 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-24 17:53:06 -0400
commit9ead3527f5967440f2ff57fd2fa25dd0e460fc5a (patch)
tree8464850fd94dba0dfa34c75c46dfb3fcf4c3c381
parent27cd5452476978283decb19e429e81fc6c71e74b (diff)
parent6b6f302ceda7a052dab545d6c69abf5f0d4a6cab (diff)
Merge branch 'rhashtable-next'
Thomas Graf says: ==================== rhashtable updates on top of Herbert's work Patch 1 is a bugfix for an RCU splash I encountered while testing. Patch 2 & 3 are pure cleanups. Patch 4 disables automatic shrinking by default as discussed in previous thread. Patch 5 removes some rhashtable internal knowledge from nft_hash and fixes another RCU splash. I've pushed various rhashtable tests (Netlink, nft) together with a Makefile to a git tree [0] for easier stress testing. [0] https://github.com/tgraf/rhashtable ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/rhashtable.h30
-rw-r--r--lib/rhashtable.c69
-rw-r--r--net/netfilter/nft_hash.c26
-rw-r--r--net/netlink/af_netlink.c1
-rw-r--r--net/tipc/socket.c1
5 files changed, 78 insertions, 49 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index d7be9cb0e91f..99f2e49a8a07 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -2,7 +2,7 @@
2 * Resizable, Scalable, Concurrent Hash Table 2 * Resizable, Scalable, Concurrent Hash Table
3 * 3 *
4 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> 4 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
5 * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> 5 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
6 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 6 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
7 * 7 *
8 * Code partially derived from nft_hash 8 * Code partially derived from nft_hash
@@ -104,6 +104,7 @@ struct rhashtable;
104 * @min_size: Minimum size while shrinking 104 * @min_size: Minimum size while shrinking
105 * @nulls_base: Base value to generate nulls marker 105 * @nulls_base: Base value to generate nulls marker
106 * @insecure_elasticity: Set to true to disable chain length checks 106 * @insecure_elasticity: Set to true to disable chain length checks
107 * @automatic_shrinking: Enable automatic shrinking of tables
107 * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) 108 * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
108 * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) 109 * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
109 * @obj_hashfn: Function to hash object 110 * @obj_hashfn: Function to hash object
@@ -118,6 +119,7 @@ struct rhashtable_params {
118 unsigned int min_size; 119 unsigned int min_size;
119 u32 nulls_base; 120 u32 nulls_base;
120 bool insecure_elasticity; 121 bool insecure_elasticity;
122 bool automatic_shrinking;
121 size_t locks_mul; 123 size_t locks_mul;
122 rht_hashfn_t hashfn; 124 rht_hashfn_t hashfn;
123 rht_obj_hashfn_t obj_hashfn; 125 rht_obj_hashfn_t obj_hashfn;
@@ -134,12 +136,10 @@ struct rhashtable_params {
134 * @run_work: Deferred worker to expand/shrink asynchronously 136 * @run_work: Deferred worker to expand/shrink asynchronously
135 * @mutex: Mutex to protect current/future table swapping 137 * @mutex: Mutex to protect current/future table swapping
136 * @lock: Spin lock to protect walker list 138 * @lock: Spin lock to protect walker list
137 * @being_destroyed: True if table is set up for destruction
138 */ 139 */
139struct rhashtable { 140struct rhashtable {
140 struct bucket_table __rcu *tbl; 141 struct bucket_table __rcu *tbl;
141 atomic_t nelems; 142 atomic_t nelems;
142 bool being_destroyed;
143 unsigned int key_len; 143 unsigned int key_len;
144 unsigned int elasticity; 144 unsigned int elasticity;
145 struct rhashtable_params p; 145 struct rhashtable_params p;
@@ -208,13 +208,13 @@ static inline unsigned int rht_key_hashfn(
208 struct rhashtable *ht, const struct bucket_table *tbl, 208 struct rhashtable *ht, const struct bucket_table *tbl,
209 const void *key, const struct rhashtable_params params) 209 const void *key, const struct rhashtable_params params)
210{ 210{
211 unsigned hash; 211 unsigned int hash;
212 212
213 /* params must be equal to ht->p if it isn't constant. */ 213 /* params must be equal to ht->p if it isn't constant. */
214 if (!__builtin_constant_p(params.key_len)) 214 if (!__builtin_constant_p(params.key_len))
215 hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); 215 hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
216 else if (params.key_len) { 216 else if (params.key_len) {
217 unsigned key_len = params.key_len; 217 unsigned int key_len = params.key_len;
218 218
219 if (params.hashfn) 219 if (params.hashfn)
220 hash = params.hashfn(key, key_len, tbl->hash_rnd); 220 hash = params.hashfn(key, key_len, tbl->hash_rnd);
@@ -224,7 +224,7 @@ static inline unsigned int rht_key_hashfn(
224 hash = jhash2(key, key_len / sizeof(u32), 224 hash = jhash2(key, key_len / sizeof(u32),
225 tbl->hash_rnd); 225 tbl->hash_rnd);
226 } else { 226 } else {
227 unsigned key_len = ht->p.key_len; 227 unsigned int key_len = ht->p.key_len;
228 228
229 if (params.hashfn) 229 if (params.hashfn)
230 hash = params.hashfn(key, key_len, tbl->hash_rnd); 230 hash = params.hashfn(key, key_len, tbl->hash_rnd);
@@ -332,6 +332,9 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
332void *rhashtable_walk_next(struct rhashtable_iter *iter); 332void *rhashtable_walk_next(struct rhashtable_iter *iter);
333void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); 333void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
334 334
335void rhashtable_free_and_destroy(struct rhashtable *ht,
336 void (*free_fn)(void *ptr, void *arg),
337 void *arg);
335void rhashtable_destroy(struct rhashtable *ht); 338void rhashtable_destroy(struct rhashtable *ht);
336 339
337#define rht_dereference(p, ht) \ 340#define rht_dereference(p, ht) \
@@ -512,7 +515,7 @@ static inline void *rhashtable_lookup_fast(
512 }; 515 };
513 const struct bucket_table *tbl; 516 const struct bucket_table *tbl;
514 struct rhash_head *he; 517 struct rhash_head *he;
515 unsigned hash; 518 unsigned int hash;
516 519
517 rcu_read_lock(); 520 rcu_read_lock();
518 521
@@ -539,6 +542,7 @@ restart:
539 return NULL; 542 return NULL;
540} 543}
541 544
545/* Internal function, please use rhashtable_insert_fast() instead */
542static inline int __rhashtable_insert_fast( 546static inline int __rhashtable_insert_fast(
543 struct rhashtable *ht, const void *key, struct rhash_head *obj, 547 struct rhashtable *ht, const void *key, struct rhash_head *obj,
544 const struct rhashtable_params params) 548 const struct rhashtable_params params)
@@ -550,8 +554,8 @@ static inline int __rhashtable_insert_fast(
550 struct bucket_table *tbl, *new_tbl; 554 struct bucket_table *tbl, *new_tbl;
551 struct rhash_head *head; 555 struct rhash_head *head;
552 spinlock_t *lock; 556 spinlock_t *lock;
553 unsigned elasticity; 557 unsigned int elasticity;
554 unsigned hash; 558 unsigned int hash;
555 int err; 559 int err;
556 560
557restart: 561restart:
@@ -585,8 +589,8 @@ restart:
585 if (unlikely(rht_grow_above_100(ht, tbl))) { 589 if (unlikely(rht_grow_above_100(ht, tbl))) {
586slow_path: 590slow_path:
587 spin_unlock_bh(lock); 591 spin_unlock_bh(lock);
588 rcu_read_unlock();
589 err = rhashtable_insert_rehash(ht); 592 err = rhashtable_insert_rehash(ht);
593 rcu_read_unlock();
590 if (err) 594 if (err)
591 return err; 595 return err;
592 596
@@ -711,6 +715,7 @@ static inline int rhashtable_lookup_insert_key(
711 return __rhashtable_insert_fast(ht, key, obj, params); 715 return __rhashtable_insert_fast(ht, key, obj, params);
712} 716}
713 717
718/* Internal function, please use rhashtable_remove_fast() instead */
714static inline int __rhashtable_remove_fast( 719static inline int __rhashtable_remove_fast(
715 struct rhashtable *ht, struct bucket_table *tbl, 720 struct rhashtable *ht, struct bucket_table *tbl,
716 struct rhash_head *obj, const struct rhashtable_params params) 721 struct rhash_head *obj, const struct rhashtable_params params)
@@ -718,7 +723,7 @@ static inline int __rhashtable_remove_fast(
718 struct rhash_head __rcu **pprev; 723 struct rhash_head __rcu **pprev;
719 struct rhash_head *he; 724 struct rhash_head *he;
720 spinlock_t * lock; 725 spinlock_t * lock;
721 unsigned hash; 726 unsigned int hash;
722 int err = -ENOENT; 727 int err = -ENOENT;
723 728
724 hash = rht_head_hashfn(ht, tbl, obj, params); 729 hash = rht_head_hashfn(ht, tbl, obj, params);
@@ -782,7 +787,8 @@ static inline int rhashtable_remove_fast(
782 goto out; 787 goto out;
783 788
784 atomic_dec(&ht->nelems); 789 atomic_dec(&ht->nelems);
785 if (rht_shrink_below_30(ht, tbl)) 790 if (unlikely(ht->p.automatic_shrinking &&
791 rht_shrink_below_30(ht, tbl)))
786 schedule_work(&ht->run_work); 792 schedule_work(&ht->run_work);
787 793
788out: 794out:
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 8514f7c5f029..4b7b7e672b93 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -153,7 +153,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
153 return new_tbl; 153 return new_tbl;
154} 154}
155 155
156static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) 156static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
157{ 157{
158 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 158 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
159 struct bucket_table *new_tbl = rhashtable_last_table(ht, 159 struct bucket_table *new_tbl = rhashtable_last_table(ht,
@@ -162,7 +162,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
162 int err = -ENOENT; 162 int err = -ENOENT;
163 struct rhash_head *head, *next, *entry; 163 struct rhash_head *head, *next, *entry;
164 spinlock_t *new_bucket_lock; 164 spinlock_t *new_bucket_lock;
165 unsigned new_hash; 165 unsigned int new_hash;
166 166
167 rht_for_each(entry, old_tbl, old_hash) { 167 rht_for_each(entry, old_tbl, old_hash) {
168 err = 0; 168 err = 0;
@@ -199,7 +199,8 @@ out:
199 return err; 199 return err;
200} 200}
201 201
202static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) 202static void rhashtable_rehash_chain(struct rhashtable *ht,
203 unsigned int old_hash)
203{ 204{
204 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 205 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
205 spinlock_t *old_bucket_lock; 206 spinlock_t *old_bucket_lock;
@@ -244,7 +245,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
244 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 245 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
245 struct bucket_table *new_tbl; 246 struct bucket_table *new_tbl;
246 struct rhashtable_walker *walker; 247 struct rhashtable_walker *walker;
247 unsigned old_hash; 248 unsigned int old_hash;
248 249
249 new_tbl = rht_dereference(old_tbl->future_tbl, ht); 250 new_tbl = rht_dereference(old_tbl->future_tbl, ht);
250 if (!new_tbl) 251 if (!new_tbl)
@@ -324,11 +325,12 @@ static int rhashtable_expand(struct rhashtable *ht)
324static int rhashtable_shrink(struct rhashtable *ht) 325static int rhashtable_shrink(struct rhashtable *ht)
325{ 326{
326 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 327 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
327 unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); 328 unsigned int size;
328 int err; 329 int err;
329 330
330 ASSERT_RHT_MUTEX(ht); 331 ASSERT_RHT_MUTEX(ht);
331 332
333 size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
332 if (size < ht->p.min_size) 334 if (size < ht->p.min_size)
333 size = ht->p.min_size; 335 size = ht->p.min_size;
334 336
@@ -357,20 +359,17 @@ static void rht_deferred_worker(struct work_struct *work)
357 359
358 ht = container_of(work, struct rhashtable, run_work); 360 ht = container_of(work, struct rhashtable, run_work);
359 mutex_lock(&ht->mutex); 361 mutex_lock(&ht->mutex);
360 if (ht->being_destroyed)
361 goto unlock;
362 362
363 tbl = rht_dereference(ht->tbl, ht); 363 tbl = rht_dereference(ht->tbl, ht);
364 tbl = rhashtable_last_table(ht, tbl); 364 tbl = rhashtable_last_table(ht, tbl);
365 365
366 if (rht_grow_above_75(ht, tbl)) 366 if (rht_grow_above_75(ht, tbl))
367 rhashtable_expand(ht); 367 rhashtable_expand(ht);
368 else if (rht_shrink_below_30(ht, tbl)) 368 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
369 rhashtable_shrink(ht); 369 rhashtable_shrink(ht);
370 370
371 err = rhashtable_rehash_table(ht); 371 err = rhashtable_rehash_table(ht);
372 372
373unlock:
374 mutex_unlock(&ht->mutex); 373 mutex_unlock(&ht->mutex);
375 374
376 if (err) 375 if (err)
@@ -379,9 +378,9 @@ unlock:
379 378
380static bool rhashtable_check_elasticity(struct rhashtable *ht, 379static bool rhashtable_check_elasticity(struct rhashtable *ht,
381 struct bucket_table *tbl, 380 struct bucket_table *tbl,
382 unsigned hash) 381 unsigned int hash)
383{ 382{
384 unsigned elasticity = ht->elasticity; 383 unsigned int elasticity = ht->elasticity;
385 struct rhash_head *head; 384 struct rhash_head *head;
386 385
387 rht_for_each(head, tbl, hash) 386 rht_for_each(head, tbl, hash)
@@ -431,7 +430,7 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
431 struct bucket_table *tbl) 430 struct bucket_table *tbl)
432{ 431{
433 struct rhash_head *head; 432 struct rhash_head *head;
434 unsigned hash; 433 unsigned int hash;
435 int err; 434 int err;
436 435
437 tbl = rhashtable_last_table(ht, tbl); 436 tbl = rhashtable_last_table(ht, tbl);
@@ -781,21 +780,53 @@ int rhashtable_init(struct rhashtable *ht,
781EXPORT_SYMBOL_GPL(rhashtable_init); 780EXPORT_SYMBOL_GPL(rhashtable_init);
782 781
783/** 782/**
784 * rhashtable_destroy - destroy hash table 783 * rhashtable_free_and_destroy - free elements and destroy hash table
785 * @ht: the hash table to destroy 784 * @ht: the hash table to destroy
785 * @free_fn: callback to release resources of element
786 * @arg: pointer passed to free_fn
787 *
788 * Stops an eventual async resize. If defined, invokes free_fn for each
789 * element to releasal resources. Please note that RCU protected
790 * readers may still be accessing the elements. Releasing of resources
791 * must occur in a compatible manner. Then frees the bucket array.
786 * 792 *
787 * Frees the bucket array. This function is not rcu safe, therefore the caller 793 * This function will eventually sleep to wait for an async resize
788 * has to make sure that no resizing may happen by unpublishing the hashtable 794 * to complete. The caller is responsible that no further write operations
789 * and waiting for the quiescent cycle before releasing the bucket array. 795 * occurs in parallel.
790 */ 796 */
791void rhashtable_destroy(struct rhashtable *ht) 797void rhashtable_free_and_destroy(struct rhashtable *ht,
798 void (*free_fn)(void *ptr, void *arg),
799 void *arg)
792{ 800{
793 ht->being_destroyed = true; 801 const struct bucket_table *tbl;
802 unsigned int i;
794 803
795 cancel_work_sync(&ht->run_work); 804 cancel_work_sync(&ht->run_work);
796 805
797 mutex_lock(&ht->mutex); 806 mutex_lock(&ht->mutex);
798 bucket_table_free(rht_dereference(ht->tbl, ht)); 807 tbl = rht_dereference(ht->tbl, ht);
808 if (free_fn) {
809 for (i = 0; i < tbl->size; i++) {
810 struct rhash_head *pos, *next;
811
812 for (pos = rht_dereference(tbl->buckets[i], ht),
813 next = !rht_is_a_nulls(pos) ?
814 rht_dereference(pos->next, ht) : NULL;
815 !rht_is_a_nulls(pos);
816 pos = next,
817 next = !rht_is_a_nulls(pos) ?
818 rht_dereference(pos->next, ht) : NULL)
819 free_fn(rht_obj(ht, pos), arg);
820 }
821 }
822
823 bucket_table_free(tbl);
799 mutex_unlock(&ht->mutex); 824 mutex_unlock(&ht->mutex);
800} 825}
826EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
827
828void rhashtable_destroy(struct rhashtable *ht)
829{
830 return rhashtable_free_and_destroy(ht, NULL, NULL);
831}
801EXPORT_SYMBOL_GPL(rhashtable_destroy); 832EXPORT_SYMBOL_GPL(rhashtable_destroy);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index ad3966976cf5..f9ce2195fd63 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -172,6 +172,7 @@ static const struct rhashtable_params nft_hash_params = {
172 .head_offset = offsetof(struct nft_hash_elem, node), 172 .head_offset = offsetof(struct nft_hash_elem, node),
173 .key_offset = offsetof(struct nft_hash_elem, key), 173 .key_offset = offsetof(struct nft_hash_elem, key),
174 .hashfn = jhash, 174 .hashfn = jhash,
175 .automatic_shrinking = true,
175}; 176};
176 177
177static int nft_hash_init(const struct nft_set *set, 178static int nft_hash_init(const struct nft_set *set,
@@ -187,26 +188,15 @@ static int nft_hash_init(const struct nft_set *set,
187 return rhashtable_init(priv, &params); 188 return rhashtable_init(priv, &params);
188} 189}
189 190
190static void nft_hash_destroy(const struct nft_set *set) 191static void nft_free_element(void *ptr, void *arg)
191{ 192{
192 struct rhashtable *priv = nft_set_priv(set); 193 nft_hash_elem_destroy((const struct nft_set *)arg, ptr);
193 const struct bucket_table *tbl; 194}
194 struct nft_hash_elem *he;
195 struct rhash_head *pos, *next;
196 unsigned int i;
197
198 /* Stop an eventual async resizing */
199 priv->being_destroyed = true;
200 mutex_lock(&priv->mutex);
201
202 tbl = rht_dereference(priv->tbl, priv);
203 for (i = 0; i < tbl->size; i++) {
204 rht_for_each_entry_safe(he, pos, next, tbl, i, node)
205 nft_hash_elem_destroy(set, he);
206 }
207 mutex_unlock(&priv->mutex);
208 195
209 rhashtable_destroy(priv); 196static void nft_hash_destroy(const struct nft_set *set)
197{
198 rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element,
199 (void *)set);
210} 200}
211 201
212static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, 202static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e2f7f28148e0..4caa809dbbe0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3142,6 +3142,7 @@ static const struct rhashtable_params netlink_rhashtable_params = {
3142 .obj_hashfn = netlink_hash, 3142 .obj_hashfn = netlink_hash,
3143 .obj_cmpfn = netlink_compare, 3143 .obj_cmpfn = netlink_compare,
3144 .max_size = 65536, 3144 .max_size = 65536,
3145 .automatic_shrinking = true,
3145}; 3146};
3146 3147
3147static int __init netlink_proto_init(void) 3148static int __init netlink_proto_init(void)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 094710519477..ee90d74d7516 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2297,6 +2297,7 @@ static const struct rhashtable_params tsk_rht_params = {
2297 .key_len = sizeof(u32), /* portid */ 2297 .key_len = sizeof(u32), /* portid */
2298 .max_size = 1048576, 2298 .max_size = 1048576,
2299 .min_size = 256, 2299 .min_size = 256,
2300 .automatic_shrinking = true,
2300}; 2301};
2301 2302
2302int tipc_sk_rht_init(struct net *net) 2303int tipc_sk_rht_init(struct net *net)