aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rhashtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rhashtable.c')
-rw-r--r--lib/rhashtable.c270
1 files changed, 220 insertions, 50 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 32d0ad058380..172454e6b979 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -32,6 +32,11 @@
32#define HASH_MIN_SIZE 4U 32#define HASH_MIN_SIZE 4U
33#define BUCKET_LOCKS_PER_CPU 32UL 33#define BUCKET_LOCKS_PER_CPU 32UL
34 34
35union nested_table {
36 union nested_table __rcu *table;
37 struct rhash_head __rcu *bucket;
38};
39
35static u32 head_hashfn(struct rhashtable *ht, 40static u32 head_hashfn(struct rhashtable *ht,
36 const struct bucket_table *tbl, 41 const struct bucket_table *tbl,
37 const struct rhash_head *he) 42 const struct rhash_head *he)
@@ -76,6 +81,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
76 /* Never allocate more than 0.5 locks per bucket */ 81 /* Never allocate more than 0.5 locks per bucket */
77 size = min_t(unsigned int, size, tbl->size >> 1); 82 size = min_t(unsigned int, size, tbl->size >> 1);
78 83
84 if (tbl->nest)
85 size = min(size, 1U << tbl->nest);
86
79 if (sizeof(spinlock_t) != 0) { 87 if (sizeof(spinlock_t) != 0) {
80 tbl->locks = NULL; 88 tbl->locks = NULL;
81#ifdef CONFIG_NUMA 89#ifdef CONFIG_NUMA
@@ -99,8 +107,45 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
99 return 0; 107 return 0;
100} 108}
101 109
110static void nested_table_free(union nested_table *ntbl, unsigned int size)
111{
112 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
113 const unsigned int len = 1 << shift;
114 unsigned int i;
115
116 ntbl = rcu_dereference_raw(ntbl->table);
117 if (!ntbl)
118 return;
119
120 if (size > len) {
121 size >>= shift;
122 for (i = 0; i < len; i++)
123 nested_table_free(ntbl + i, size);
124 }
125
126 kfree(ntbl);
127}
128
129static void nested_bucket_table_free(const struct bucket_table *tbl)
130{
131 unsigned int size = tbl->size >> tbl->nest;
132 unsigned int len = 1 << tbl->nest;
133 union nested_table *ntbl;
134 unsigned int i;
135
136 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
137
138 for (i = 0; i < len; i++)
139 nested_table_free(ntbl + i, size);
140
141 kfree(ntbl);
142}
143
102static void bucket_table_free(const struct bucket_table *tbl) 144static void bucket_table_free(const struct bucket_table *tbl)
103{ 145{
146 if (tbl->nest)
147 nested_bucket_table_free(tbl);
148
104 if (tbl) 149 if (tbl)
105 kvfree(tbl->locks); 150 kvfree(tbl->locks);
106 151
@@ -112,6 +157,59 @@ static void bucket_table_free_rcu(struct rcu_head *head)
112 bucket_table_free(container_of(head, struct bucket_table, rcu)); 157 bucket_table_free(container_of(head, struct bucket_table, rcu));
113} 158}
114 159
160static union nested_table *nested_table_alloc(struct rhashtable *ht,
161 union nested_table __rcu **prev,
162 unsigned int shifted,
163 unsigned int nhash)
164{
165 union nested_table *ntbl;
166 int i;
167
168 ntbl = rcu_dereference(*prev);
169 if (ntbl)
170 return ntbl;
171
172 ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
173
174 if (ntbl && shifted) {
175 for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
176 INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
177 (i << shifted) | nhash);
178 }
179
180 rcu_assign_pointer(*prev, ntbl);
181
182 return ntbl;
183}
184
185static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
186 size_t nbuckets,
187 gfp_t gfp)
188{
189 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
190 struct bucket_table *tbl;
191 size_t size;
192
193 if (nbuckets < (1 << (shift + 1)))
194 return NULL;
195
196 size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
197
198 tbl = kzalloc(size, gfp);
199 if (!tbl)
200 return NULL;
201
202 if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
203 0, 0)) {
204 kfree(tbl);
205 return NULL;
206 }
207
208 tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
209
210 return tbl;
211}
212
115static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, 213static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
116 size_t nbuckets, 214 size_t nbuckets,
117 gfp_t gfp) 215 gfp_t gfp)
@@ -126,10 +224,17 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
126 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); 224 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
127 if (tbl == NULL && gfp == GFP_KERNEL) 225 if (tbl == NULL && gfp == GFP_KERNEL)
128 tbl = vzalloc(size); 226 tbl = vzalloc(size);
227
228 size = nbuckets;
229
230 if (tbl == NULL && gfp != GFP_KERNEL) {
231 tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
232 nbuckets = 0;
233 }
129 if (tbl == NULL) 234 if (tbl == NULL)
130 return NULL; 235 return NULL;
131 236
132 tbl->size = nbuckets; 237 tbl->size = size;
133 238
134 if (alloc_bucket_locks(ht, tbl, gfp) < 0) { 239 if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
135 bucket_table_free(tbl); 240 bucket_table_free(tbl);
@@ -164,12 +269,17 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
164 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 269 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
165 struct bucket_table *new_tbl = rhashtable_last_table(ht, 270 struct bucket_table *new_tbl = rhashtable_last_table(ht,
166 rht_dereference_rcu(old_tbl->future_tbl, ht)); 271 rht_dereference_rcu(old_tbl->future_tbl, ht));
167 struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; 272 struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
168 int err = -ENOENT; 273 int err = -EAGAIN;
169 struct rhash_head *head, *next, *entry; 274 struct rhash_head *head, *next, *entry;
170 spinlock_t *new_bucket_lock; 275 spinlock_t *new_bucket_lock;
171 unsigned int new_hash; 276 unsigned int new_hash;
172 277
278 if (new_tbl->nest)
279 goto out;
280
281 err = -ENOENT;
282
173 rht_for_each(entry, old_tbl, old_hash) { 283 rht_for_each(entry, old_tbl, old_hash) {
174 err = 0; 284 err = 0;
175 next = rht_dereference_bucket(entry->next, old_tbl, old_hash); 285 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -202,19 +312,26 @@ out:
202 return err; 312 return err;
203} 313}
204 314
205static void rhashtable_rehash_chain(struct rhashtable *ht, 315static int rhashtable_rehash_chain(struct rhashtable *ht,
206 unsigned int old_hash) 316 unsigned int old_hash)
207{ 317{
208 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 318 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
209 spinlock_t *old_bucket_lock; 319 spinlock_t *old_bucket_lock;
320 int err;
210 321
211 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); 322 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
212 323
213 spin_lock_bh(old_bucket_lock); 324 spin_lock_bh(old_bucket_lock);
214 while (!rhashtable_rehash_one(ht, old_hash)) 325 while (!(err = rhashtable_rehash_one(ht, old_hash)))
215 ; 326 ;
216 old_tbl->rehash++; 327
328 if (err == -ENOENT) {
329 old_tbl->rehash++;
330 err = 0;
331 }
217 spin_unlock_bh(old_bucket_lock); 332 spin_unlock_bh(old_bucket_lock);
333
334 return err;
218} 335}
219 336
220static int rhashtable_rehash_attach(struct rhashtable *ht, 337static int rhashtable_rehash_attach(struct rhashtable *ht,
@@ -246,13 +363,17 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
246 struct bucket_table *new_tbl; 363 struct bucket_table *new_tbl;
247 struct rhashtable_walker *walker; 364 struct rhashtable_walker *walker;
248 unsigned int old_hash; 365 unsigned int old_hash;
366 int err;
249 367
250 new_tbl = rht_dereference(old_tbl->future_tbl, ht); 368 new_tbl = rht_dereference(old_tbl->future_tbl, ht);
251 if (!new_tbl) 369 if (!new_tbl)
252 return 0; 370 return 0;
253 371
254 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) 372 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
255 rhashtable_rehash_chain(ht, old_hash); 373 err = rhashtable_rehash_chain(ht, old_hash);
374 if (err)
375 return err;
376 }
256 377
257 /* Publish the new table pointer. */ 378 /* Publish the new table pointer. */
258 rcu_assign_pointer(ht->tbl, new_tbl); 379 rcu_assign_pointer(ht->tbl, new_tbl);
@@ -271,31 +392,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
271 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; 392 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
272} 393}
273 394
274/** 395static int rhashtable_rehash_alloc(struct rhashtable *ht,
275 * rhashtable_expand - Expand hash table while allowing concurrent lookups 396 struct bucket_table *old_tbl,
276 * @ht: the hash table to expand 397 unsigned int size)
277 *
278 * A secondary bucket array is allocated and the hash entries are migrated.
279 *
280 * This function may only be called in a context where it is safe to call
281 * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
282 *
283 * The caller must ensure that no concurrent resizing occurs by holding
284 * ht->mutex.
285 *
286 * It is valid to have concurrent insertions and deletions protected by per
287 * bucket locks or concurrent RCU protected lookups and traversals.
288 */
289static int rhashtable_expand(struct rhashtable *ht)
290{ 398{
291 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 399 struct bucket_table *new_tbl;
292 int err; 400 int err;
293 401
294 ASSERT_RHT_MUTEX(ht); 402 ASSERT_RHT_MUTEX(ht);
295 403
296 old_tbl = rhashtable_last_table(ht, old_tbl); 404 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
297
298 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
299 if (new_tbl == NULL) 405 if (new_tbl == NULL)
300 return -ENOMEM; 406 return -ENOMEM;
301 407
@@ -324,12 +430,9 @@ static int rhashtable_expand(struct rhashtable *ht)
324 */ 430 */
325static int rhashtable_shrink(struct rhashtable *ht) 431static int rhashtable_shrink(struct rhashtable *ht)
326{ 432{
327 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 433 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
328 unsigned int nelems = atomic_read(&ht->nelems); 434 unsigned int nelems = atomic_read(&ht->nelems);
329 unsigned int size = 0; 435 unsigned int size = 0;
330 int err;
331
332 ASSERT_RHT_MUTEX(ht);
333 436
334 if (nelems) 437 if (nelems)
335 size = roundup_pow_of_two(nelems * 3 / 2); 438 size = roundup_pow_of_two(nelems * 3 / 2);
@@ -342,15 +445,7 @@ static int rhashtable_shrink(struct rhashtable *ht)
342 if (rht_dereference(old_tbl->future_tbl, ht)) 445 if (rht_dereference(old_tbl->future_tbl, ht))
343 return -EEXIST; 446 return -EEXIST;
344 447
345 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); 448 return rhashtable_rehash_alloc(ht, old_tbl, size);
346 if (new_tbl == NULL)
347 return -ENOMEM;
348
349 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
350 if (err)
351 bucket_table_free(new_tbl);
352
353 return err;
354} 449}
355 450
356static void rht_deferred_worker(struct work_struct *work) 451static void rht_deferred_worker(struct work_struct *work)
@@ -366,11 +461,14 @@ static void rht_deferred_worker(struct work_struct *work)
366 tbl = rhashtable_last_table(ht, tbl); 461 tbl = rhashtable_last_table(ht, tbl);
367 462
368 if (rht_grow_above_75(ht, tbl)) 463 if (rht_grow_above_75(ht, tbl))
369 rhashtable_expand(ht); 464 err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
370 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) 465 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
371 rhashtable_shrink(ht); 466 err = rhashtable_shrink(ht);
467 else if (tbl->nest)
468 err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
372 469
373 err = rhashtable_rehash_table(ht); 470 if (!err)
471 err = rhashtable_rehash_table(ht);
374 472
375 mutex_unlock(&ht->mutex); 473 mutex_unlock(&ht->mutex);
376 474
@@ -439,8 +537,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
439 int elasticity; 537 int elasticity;
440 538
441 elasticity = ht->elasticity; 539 elasticity = ht->elasticity;
442 pprev = &tbl->buckets[hash]; 540 pprev = rht_bucket_var(tbl, hash);
443 rht_for_each(head, tbl, hash) { 541 rht_for_each_continue(head, *pprev, tbl, hash) {
444 struct rhlist_head *list; 542 struct rhlist_head *list;
445 struct rhlist_head *plist; 543 struct rhlist_head *plist;
446 544
@@ -477,6 +575,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
477 struct rhash_head *obj, 575 struct rhash_head *obj,
478 void *data) 576 void *data)
479{ 577{
578 struct rhash_head __rcu **pprev;
480 struct bucket_table *new_tbl; 579 struct bucket_table *new_tbl;
481 struct rhash_head *head; 580 struct rhash_head *head;
482 581
@@ -499,7 +598,11 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
499 if (unlikely(rht_grow_above_100(ht, tbl))) 598 if (unlikely(rht_grow_above_100(ht, tbl)))
500 return ERR_PTR(-EAGAIN); 599 return ERR_PTR(-EAGAIN);
501 600
502 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); 601 pprev = rht_bucket_insert(ht, tbl, hash);
602 if (!pprev)
603 return ERR_PTR(-ENOMEM);
604
605 head = rht_dereference_bucket(*pprev, tbl, hash);
503 606
504 RCU_INIT_POINTER(obj->next, head); 607 RCU_INIT_POINTER(obj->next, head);
505 if (ht->rhlist) { 608 if (ht->rhlist) {
@@ -509,7 +612,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
509 RCU_INIT_POINTER(list->next, NULL); 612 RCU_INIT_POINTER(list->next, NULL);
510 } 613 }
511 614
512 rcu_assign_pointer(tbl->buckets[hash], obj); 615 rcu_assign_pointer(*pprev, obj);
513 616
514 atomic_inc(&ht->nelems); 617 atomic_inc(&ht->nelems);
515 if (rht_grow_above_75(ht, tbl)) 618 if (rht_grow_above_75(ht, tbl))
@@ -975,7 +1078,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
975 void (*free_fn)(void *ptr, void *arg), 1078 void (*free_fn)(void *ptr, void *arg),
976 void *arg) 1079 void *arg)
977{ 1080{
978 const struct bucket_table *tbl; 1081 struct bucket_table *tbl;
979 unsigned int i; 1082 unsigned int i;
980 1083
981 cancel_work_sync(&ht->run_work); 1084 cancel_work_sync(&ht->run_work);
@@ -986,7 +1089,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
986 for (i = 0; i < tbl->size; i++) { 1089 for (i = 0; i < tbl->size; i++) {
987 struct rhash_head *pos, *next; 1090 struct rhash_head *pos, *next;
988 1091
989 for (pos = rht_dereference(tbl->buckets[i], ht), 1092 for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
990 next = !rht_is_a_nulls(pos) ? 1093 next = !rht_is_a_nulls(pos) ?
991 rht_dereference(pos->next, ht) : NULL; 1094 rht_dereference(pos->next, ht) : NULL;
992 !rht_is_a_nulls(pos); 1095 !rht_is_a_nulls(pos);
@@ -1007,3 +1110,70 @@ void rhashtable_destroy(struct rhashtable *ht)
1007 return rhashtable_free_and_destroy(ht, NULL, NULL); 1110 return rhashtable_free_and_destroy(ht, NULL, NULL);
1008} 1111}
1009EXPORT_SYMBOL_GPL(rhashtable_destroy); 1112EXPORT_SYMBOL_GPL(rhashtable_destroy);
1113
1114struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
1115 unsigned int hash)
1116{
1117 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1118 static struct rhash_head __rcu *rhnull =
1119 (struct rhash_head __rcu *)NULLS_MARKER(0);
1120 unsigned int index = hash & ((1 << tbl->nest) - 1);
1121 unsigned int size = tbl->size >> tbl->nest;
1122 unsigned int subhash = hash;
1123 union nested_table *ntbl;
1124
1125 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1126 ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
1127 subhash >>= tbl->nest;
1128
1129 while (ntbl && size > (1 << shift)) {
1130 index = subhash & ((1 << shift) - 1);
1131 ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
1132 size >>= shift;
1133 subhash >>= shift;
1134 }
1135
1136 if (!ntbl)
1137 return &rhnull;
1138
1139 return &ntbl[subhash].bucket;
1140
1141}
1142EXPORT_SYMBOL_GPL(rht_bucket_nested);
1143
1144struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
1145 struct bucket_table *tbl,
1146 unsigned int hash)
1147{
1148 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1149 unsigned int index = hash & ((1 << tbl->nest) - 1);
1150 unsigned int size = tbl->size >> tbl->nest;
1151 union nested_table *ntbl;
1152 unsigned int shifted;
1153 unsigned int nhash;
1154
1155 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1156 hash >>= tbl->nest;
1157 nhash = index;
1158 shifted = tbl->nest;
1159 ntbl = nested_table_alloc(ht, &ntbl[index].table,
1160 size <= (1 << shift) ? shifted : 0, nhash);
1161
1162 while (ntbl && size > (1 << shift)) {
1163 index = hash & ((1 << shift) - 1);
1164 size >>= shift;
1165 hash >>= shift;
1166 nhash |= index << shifted;
1167 shifted += shift;
1168 ntbl = nested_table_alloc(ht, &ntbl[index].table,
1169 size <= (1 << shift) ? shifted : 0,
1170 nhash);
1171 }
1172
1173 if (!ntbl)
1174 return NULL;
1175
1176 return &ntbl[hash].bucket;
1177
1178}
1179EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);