aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rhashtable.c
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2015-03-13 22:57:20 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-15 01:35:34 -0400
commiteddee5ba34eb6c9890ef106f19ead2b370e5342f (patch)
tree2cf7fcc92a1e2c2f541eda13aef5efd38501a0fb /lib/rhashtable.c
parent96026d057a1fb7da1e314a24e3a1c528321ed45e (diff)
rhashtable: Fix walker behaviour during rehash
Previously whenever the walker encountered a resize it simply snaps back to the beginning and starts again. However, this only works if the rehash started and completed while the walker was idle. If the walker attempts to restart while the rehash is still ongoing, we may miss objects that we shouldn't have. This patch fixes this by making the walker walk the old table followed by the new table just like all other readers. If a rehash is detected we will still signal our caller of the fact so they can prepare for duplicates but we will simply continue the walk onto the new table after the old one is finished either by us or by the rehasher. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'lib/rhashtable.c')
-rw-r--r--lib/rhashtable.c69
1 files changed, 46 insertions, 23 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index fc0d451279f0..f7c76079f8f1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -170,6 +170,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
170 return NULL; 170 return NULL;
171 } 171 }
172 172
173 INIT_LIST_HEAD(&tbl->walkers);
174
173 for (i = 0; i < nbuckets; i++) 175 for (i = 0; i < nbuckets; i++)
174 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); 176 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
175 177
@@ -264,6 +266,7 @@ static void rhashtable_rehash(struct rhashtable *ht,
264 struct bucket_table *new_tbl) 266 struct bucket_table *new_tbl)
265{ 267{
266 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 268 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
269 struct rhashtable_walker *walker;
267 unsigned old_hash; 270 unsigned old_hash;
268 271
269 get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd)); 272 get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd));
@@ -284,6 +287,9 @@ static void rhashtable_rehash(struct rhashtable *ht,
284 /* Publish the new table pointer. */ 287 /* Publish the new table pointer. */
285 rcu_assign_pointer(ht->tbl, new_tbl); 288 rcu_assign_pointer(ht->tbl, new_tbl);
286 289
290 list_for_each_entry(walker, &old_tbl->walkers, list)
291 walker->tbl = NULL;
292
287 /* Wait for readers. All new readers will see the new 293 /* Wait for readers. All new readers will see the new
288 * table, and thus no references to the old table will 294 * table, and thus no references to the old table will
289 * remain. 295 * remain.
@@ -358,7 +364,6 @@ static void rht_deferred_worker(struct work_struct *work)
358{ 364{
359 struct rhashtable *ht; 365 struct rhashtable *ht;
360 struct bucket_table *tbl; 366 struct bucket_table *tbl;
361 struct rhashtable_walker *walker;
362 367
363 ht = container_of(work, struct rhashtable, run_work); 368 ht = container_of(work, struct rhashtable, run_work);
364 mutex_lock(&ht->mutex); 369 mutex_lock(&ht->mutex);
@@ -367,9 +372,6 @@ static void rht_deferred_worker(struct work_struct *work)
367 372
368 tbl = rht_dereference(ht->tbl, ht); 373 tbl = rht_dereference(ht->tbl, ht);
369 374
370 list_for_each_entry(walker, &ht->walkers, list)
371 walker->resize = true;
372
373 if (rht_grow_above_75(ht, tbl)) 375 if (rht_grow_above_75(ht, tbl))
374 rhashtable_expand(ht); 376 rhashtable_expand(ht);
375 else if (rht_shrink_below_30(ht, tbl)) 377 else if (rht_shrink_below_30(ht, tbl))
@@ -725,11 +727,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
725 if (!iter->walker) 727 if (!iter->walker)
726 return -ENOMEM; 728 return -ENOMEM;
727 729
728 INIT_LIST_HEAD(&iter->walker->list);
729 iter->walker->resize = false;
730
731 mutex_lock(&ht->mutex); 730 mutex_lock(&ht->mutex);
732 list_add(&iter->walker->list, &ht->walkers); 731 iter->walker->tbl = rht_dereference(ht->tbl, ht);
732 list_add(&iter->walker->list, &iter->walker->tbl->walkers);
733 mutex_unlock(&ht->mutex); 733 mutex_unlock(&ht->mutex);
734 734
735 return 0; 735 return 0;
@@ -745,7 +745,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
745void rhashtable_walk_exit(struct rhashtable_iter *iter) 745void rhashtable_walk_exit(struct rhashtable_iter *iter)
746{ 746{
747 mutex_lock(&iter->ht->mutex); 747 mutex_lock(&iter->ht->mutex);
748 list_del(&iter->walker->list); 748 if (iter->walker->tbl)
749 list_del(&iter->walker->list);
749 mutex_unlock(&iter->ht->mutex); 750 mutex_unlock(&iter->ht->mutex);
750 kfree(iter->walker); 751 kfree(iter->walker);
751} 752}
@@ -767,12 +768,19 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
767 */ 768 */
768int rhashtable_walk_start(struct rhashtable_iter *iter) 769int rhashtable_walk_start(struct rhashtable_iter *iter)
769{ 770{
771 struct rhashtable *ht = iter->ht;
772
773 mutex_lock(&ht->mutex);
774
775 if (iter->walker->tbl)
776 list_del(&iter->walker->list);
777
770 rcu_read_lock(); 778 rcu_read_lock();
771 779
772 if (iter->walker->resize) { 780 mutex_unlock(&ht->mutex);
773 iter->slot = 0; 781
774 iter->skip = 0; 782 if (!iter->walker->tbl) {
775 iter->walker->resize = false; 783 iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
776 return -EAGAIN; 784 return -EAGAIN;
777 } 785 }
778 786
@@ -794,13 +802,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
794 */ 802 */
795void *rhashtable_walk_next(struct rhashtable_iter *iter) 803void *rhashtable_walk_next(struct rhashtable_iter *iter)
796{ 804{
797 const struct bucket_table *tbl; 805 struct bucket_table *tbl = iter->walker->tbl;
798 struct rhashtable *ht = iter->ht; 806 struct rhashtable *ht = iter->ht;
799 struct rhash_head *p = iter->p; 807 struct rhash_head *p = iter->p;
800 void *obj = NULL; 808 void *obj = NULL;
801 809
802 tbl = rht_dereference_rcu(ht->tbl, ht);
803
804 if (p) { 810 if (p) {
805 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); 811 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
806 goto next; 812 goto next;
@@ -826,17 +832,18 @@ next:
826 iter->skip = 0; 832 iter->skip = 0;
827 } 833 }
828 834
829 iter->p = NULL; 835 iter->walker->tbl = rht_dereference_rcu(ht->future_tbl, ht);
830 836 if (iter->walker->tbl != tbl) {
831out:
832 if (iter->walker->resize) {
833 iter->p = NULL;
834 iter->slot = 0; 837 iter->slot = 0;
835 iter->skip = 0; 838 iter->skip = 0;
836 iter->walker->resize = false;
837 return ERR_PTR(-EAGAIN); 839 return ERR_PTR(-EAGAIN);
838 } 840 }
839 841
842 iter->walker->tbl = NULL;
843 iter->p = NULL;
844
845out:
846
840 return obj; 847 return obj;
841} 848}
842EXPORT_SYMBOL_GPL(rhashtable_walk_next); 849EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -849,7 +856,24 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
849 */ 856 */
850void rhashtable_walk_stop(struct rhashtable_iter *iter) 857void rhashtable_walk_stop(struct rhashtable_iter *iter)
851{ 858{
859 struct rhashtable *ht;
860 struct bucket_table *tbl = iter->walker->tbl;
861
852 rcu_read_unlock(); 862 rcu_read_unlock();
863
864 if (!tbl)
865 return;
866
867 ht = iter->ht;
868
869 mutex_lock(&ht->mutex);
870 if (rht_dereference(ht->tbl, ht) == tbl ||
871 rht_dereference(ht->future_tbl, ht) == tbl)
872 list_add(&iter->walker->list, &tbl->walkers);
873 else
874 iter->walker->tbl = NULL;
875 mutex_unlock(&ht->mutex);
876
853 iter->p = NULL; 877 iter->p = NULL;
854} 878}
855EXPORT_SYMBOL_GPL(rhashtable_walk_stop); 879EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
@@ -927,7 +951,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
927 memset(ht, 0, sizeof(*ht)); 951 memset(ht, 0, sizeof(*ht));
928 mutex_init(&ht->mutex); 952 mutex_init(&ht->mutex);
929 memcpy(&ht->p, params, sizeof(*params)); 953 memcpy(&ht->p, params, sizeof(*params));
930 INIT_LIST_HEAD(&ht->walkers);
931 954
932 if (params->locks_mul) 955 if (params->locks_mul)
933 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); 956 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);