aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/gfs2/glock.c28
-rw-r--r--include/linux/rhashtable.h78
-rw-r--r--lib/rhashtable.c270
-rw-r--r--net/tipc/net.c4
-rw-r--r--net/tipc/socket.c30
5 files changed, 316 insertions, 94 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 94f50cac91c6..70e94170af85 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1420,26 +1420,32 @@ static struct shrinker glock_shrinker = {
1420 * @sdp: the filesystem 1420 * @sdp: the filesystem
1421 * @bucket: the bucket 1421 * @bucket: the bucket
1422 * 1422 *
1423 * Note that the function can be called multiple times on the same
1424 * object. So the user must ensure that the function can cope with
1425 * that.
1423 */ 1426 */
1424 1427
1425static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) 1428static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1426{ 1429{
1427 struct gfs2_glock *gl; 1430 struct gfs2_glock *gl;
1428 struct rhash_head *pos; 1431 struct rhashtable_iter iter;
1429 const struct bucket_table *tbl;
1430 int i;
1431 1432
1432 rcu_read_lock(); 1433 rhashtable_walk_enter(&gl_hash_table, &iter);
1433 tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table); 1434
1434 for (i = 0; i < tbl->size; i++) { 1435 do {
1435 rht_for_each_entry_rcu(gl, pos, tbl, i, gl_node) { 1436 gl = ERR_PTR(rhashtable_walk_start(&iter));
1437 if (gl)
1438 continue;
1439
1440 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
1436 if ((gl->gl_name.ln_sbd == sdp) && 1441 if ((gl->gl_name.ln_sbd == sdp) &&
1437 lockref_get_not_dead(&gl->gl_lockref)) 1442 lockref_get_not_dead(&gl->gl_lockref))
1438 examiner(gl); 1443 examiner(gl);
1439 } 1444
1440 } 1445 rhashtable_walk_stop(&iter);
1441 rcu_read_unlock(); 1446 } while (cond_resched(), gl == ERR_PTR(-EAGAIN));
1442 cond_resched(); 1447
1448 rhashtable_walk_exit(&iter);
1443} 1449}
1444 1450
1445/** 1451/**
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 5c132d3188be..f2e12a845910 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -61,6 +61,7 @@ struct rhlist_head {
61/** 61/**
62 * struct bucket_table - Table of hash buckets 62 * struct bucket_table - Table of hash buckets
63 * @size: Number of hash buckets 63 * @size: Number of hash buckets
64 * @nest: Number of bits of first-level nested table.
64 * @rehash: Current bucket being rehashed 65 * @rehash: Current bucket being rehashed
65 * @hash_rnd: Random seed to fold into hash 66 * @hash_rnd: Random seed to fold into hash
66 * @locks_mask: Mask to apply before accessing locks[] 67 * @locks_mask: Mask to apply before accessing locks[]
@@ -68,10 +69,12 @@ struct rhlist_head {
68 * @walkers: List of active walkers 69 * @walkers: List of active walkers
69 * @rcu: RCU structure for freeing the table 70 * @rcu: RCU structure for freeing the table
70 * @future_tbl: Table under construction during rehashing 71 * @future_tbl: Table under construction during rehashing
72 * @ntbl: Nested table used when out of memory.
71 * @buckets: size * hash buckets 73 * @buckets: size * hash buckets
72 */ 74 */
73struct bucket_table { 75struct bucket_table {
74 unsigned int size; 76 unsigned int size;
77 unsigned int nest;
75 unsigned int rehash; 78 unsigned int rehash;
76 u32 hash_rnd; 79 u32 hash_rnd;
77 unsigned int locks_mask; 80 unsigned int locks_mask;
@@ -81,7 +84,7 @@ struct bucket_table {
81 84
82 struct bucket_table __rcu *future_tbl; 85 struct bucket_table __rcu *future_tbl;
83 86
84 struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; 87 struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
85}; 88};
86 89
87/** 90/**
@@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
374 void *arg); 377 void *arg);
375void rhashtable_destroy(struct rhashtable *ht); 378void rhashtable_destroy(struct rhashtable *ht);
376 379
380struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
381 unsigned int hash);
382struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
383 struct bucket_table *tbl,
384 unsigned int hash);
385
377#define rht_dereference(p, ht) \ 386#define rht_dereference(p, ht) \
378 rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) 387 rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
379 388
@@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht);
389#define rht_entry(tpos, pos, member) \ 398#define rht_entry(tpos, pos, member) \
390 ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) 399 ({ tpos = container_of(pos, typeof(*tpos), member); 1; })
391 400
401static inline struct rhash_head __rcu *const *rht_bucket(
402 const struct bucket_table *tbl, unsigned int hash)
403{
404 return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
405 &tbl->buckets[hash];
406}
407
408static inline struct rhash_head __rcu **rht_bucket_var(
409 struct bucket_table *tbl, unsigned int hash)
410{
411 return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
412 &tbl->buckets[hash];
413}
414
415static inline struct rhash_head __rcu **rht_bucket_insert(
416 struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
417{
418 return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
419 &tbl->buckets[hash];
420}
421
392/** 422/**
393 * rht_for_each_continue - continue iterating over hash chain 423 * rht_for_each_continue - continue iterating over hash chain
394 * @pos: the &struct rhash_head to use as a loop cursor. 424 * @pos: the &struct rhash_head to use as a loop cursor.
@@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht);
408 * @hash: the hash value / bucket index 438 * @hash: the hash value / bucket index
409 */ 439 */
410#define rht_for_each(pos, tbl, hash) \ 440#define rht_for_each(pos, tbl, hash) \
411 rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) 441 rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
412 442
413/** 443/**
414 * rht_for_each_entry_continue - continue iterating over hash chain 444 * rht_for_each_entry_continue - continue iterating over hash chain
@@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht);
433 * @member: name of the &struct rhash_head within the hashable struct. 463 * @member: name of the &struct rhash_head within the hashable struct.
434 */ 464 */
435#define rht_for_each_entry(tpos, pos, tbl, hash, member) \ 465#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
436 rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ 466 rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \
437 tbl, hash, member) 467 tbl, hash, member)
438 468
439/** 469/**
@@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht);
448 * This hash chain list-traversal primitive allows for the looped code to 478 * This hash chain list-traversal primitive allows for the looped code to
449 * remove the loop cursor from the list. 479 * remove the loop cursor from the list.
450 */ 480 */
451#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ 481#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
452 for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ 482 for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
453 next = !rht_is_a_nulls(pos) ? \ 483 next = !rht_is_a_nulls(pos) ? \
454 rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ 484 rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
455 (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ 485 (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
456 pos = next, \ 486 pos = next, \
457 next = !rht_is_a_nulls(pos) ? \ 487 next = !rht_is_a_nulls(pos) ? \
458 rht_dereference_bucket(pos->next, tbl, hash) : NULL) 488 rht_dereference_bucket(pos->next, tbl, hash) : NULL)
459 489
460/** 490/**
@@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht);
485 * traversal is guarded by rcu_read_lock(). 515 * traversal is guarded by rcu_read_lock().
486 */ 516 */
487#define rht_for_each_rcu(pos, tbl, hash) \ 517#define rht_for_each_rcu(pos, tbl, hash) \
488 rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) 518 rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
489 519
490/** 520/**
491 * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain 521 * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
@@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht);
518 * the _rcu mutation primitives such as rhashtable_insert() as long as the 548 * the _rcu mutation primitives such as rhashtable_insert() as long as the
519 * traversal is guarded by rcu_read_lock(). 549 * traversal is guarded by rcu_read_lock().
520 */ 550 */
521#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ 551#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
522 rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ 552 rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
523 tbl, hash, member) 553 tbl, hash, member)
524 554
525/** 555/**
@@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup(
565 .ht = ht, 595 .ht = ht,
566 .key = key, 596 .key = key,
567 }; 597 };
568 const struct bucket_table *tbl; 598 struct bucket_table *tbl;
569 struct rhash_head *he; 599 struct rhash_head *he;
570 unsigned int hash; 600 unsigned int hash;
571 601
@@ -697,8 +727,12 @@ slow_path:
697 } 727 }
698 728
699 elasticity = ht->elasticity; 729 elasticity = ht->elasticity;
700 pprev = &tbl->buckets[hash]; 730 pprev = rht_bucket_insert(ht, tbl, hash);
701 rht_for_each(head, tbl, hash) { 731 data = ERR_PTR(-ENOMEM);
732 if (!pprev)
733 goto out;
734
735 rht_for_each_continue(head, *pprev, tbl, hash) {
702 struct rhlist_head *plist; 736 struct rhlist_head *plist;
703 struct rhlist_head *list; 737 struct rhlist_head *list;
704 738
@@ -736,7 +770,7 @@ slow_path:
736 if (unlikely(rht_grow_above_100(ht, tbl))) 770 if (unlikely(rht_grow_above_100(ht, tbl)))
737 goto slow_path; 771 goto slow_path;
738 772
739 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); 773 head = rht_dereference_bucket(*pprev, tbl, hash);
740 774
741 RCU_INIT_POINTER(obj->next, head); 775 RCU_INIT_POINTER(obj->next, head);
742 if (rhlist) { 776 if (rhlist) {
@@ -746,7 +780,7 @@ slow_path:
746 RCU_INIT_POINTER(list->next, NULL); 780 RCU_INIT_POINTER(list->next, NULL);
747 } 781 }
748 782
749 rcu_assign_pointer(tbl->buckets[hash], obj); 783 rcu_assign_pointer(*pprev, obj);
750 784
751 atomic_inc(&ht->nelems); 785 atomic_inc(&ht->nelems);
752 if (rht_grow_above_75(ht, tbl)) 786 if (rht_grow_above_75(ht, tbl))
@@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one(
955 989
956 spin_lock_bh(lock); 990 spin_lock_bh(lock);
957 991
958 pprev = &tbl->buckets[hash]; 992 pprev = rht_bucket_var(tbl, hash);
959 rht_for_each(he, tbl, hash) { 993 rht_for_each_continue(he, *pprev, tbl, hash) {
960 struct rhlist_head *list; 994 struct rhlist_head *list;
961 995
962 list = container_of(he, struct rhlist_head, rhead); 996 list = container_of(he, struct rhlist_head, rhead);
@@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast(
1107 1141
1108 spin_lock_bh(lock); 1142 spin_lock_bh(lock);
1109 1143
1110 pprev = &tbl->buckets[hash]; 1144 pprev = rht_bucket_var(tbl, hash);
1111 rht_for_each(he, tbl, hash) { 1145 rht_for_each_continue(he, *pprev, tbl, hash) {
1112 if (he != obj_old) { 1146 if (he != obj_old) {
1113 pprev = &he->next; 1147 pprev = &he->next;
1114 continue; 1148 continue;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 32d0ad058380..172454e6b979 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -32,6 +32,11 @@
32#define HASH_MIN_SIZE 4U 32#define HASH_MIN_SIZE 4U
33#define BUCKET_LOCKS_PER_CPU 32UL 33#define BUCKET_LOCKS_PER_CPU 32UL
34 34
35union nested_table {
36 union nested_table __rcu *table;
37 struct rhash_head __rcu *bucket;
38};
39
35static u32 head_hashfn(struct rhashtable *ht, 40static u32 head_hashfn(struct rhashtable *ht,
36 const struct bucket_table *tbl, 41 const struct bucket_table *tbl,
37 const struct rhash_head *he) 42 const struct rhash_head *he)
@@ -76,6 +81,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
76 /* Never allocate more than 0.5 locks per bucket */ 81 /* Never allocate more than 0.5 locks per bucket */
77 size = min_t(unsigned int, size, tbl->size >> 1); 82 size = min_t(unsigned int, size, tbl->size >> 1);
78 83
84 if (tbl->nest)
85 size = min(size, 1U << tbl->nest);
86
79 if (sizeof(spinlock_t) != 0) { 87 if (sizeof(spinlock_t) != 0) {
80 tbl->locks = NULL; 88 tbl->locks = NULL;
81#ifdef CONFIG_NUMA 89#ifdef CONFIG_NUMA
@@ -99,8 +107,45 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
99 return 0; 107 return 0;
100} 108}
101 109
110static void nested_table_free(union nested_table *ntbl, unsigned int size)
111{
112 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
113 const unsigned int len = 1 << shift;
114 unsigned int i;
115
116 ntbl = rcu_dereference_raw(ntbl->table);
117 if (!ntbl)
118 return;
119
120 if (size > len) {
121 size >>= shift;
122 for (i = 0; i < len; i++)
123 nested_table_free(ntbl + i, size);
124 }
125
126 kfree(ntbl);
127}
128
129static void nested_bucket_table_free(const struct bucket_table *tbl)
130{
131 unsigned int size = tbl->size >> tbl->nest;
132 unsigned int len = 1 << tbl->nest;
133 union nested_table *ntbl;
134 unsigned int i;
135
136 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
137
138 for (i = 0; i < len; i++)
139 nested_table_free(ntbl + i, size);
140
141 kfree(ntbl);
142}
143
102static void bucket_table_free(const struct bucket_table *tbl) 144static void bucket_table_free(const struct bucket_table *tbl)
103{ 145{
146 if (tbl->nest)
147 nested_bucket_table_free(tbl);
148
104 if (tbl) 149 if (tbl)
105 kvfree(tbl->locks); 150 kvfree(tbl->locks);
106 151
@@ -112,6 +157,59 @@ static void bucket_table_free_rcu(struct rcu_head *head)
112 bucket_table_free(container_of(head, struct bucket_table, rcu)); 157 bucket_table_free(container_of(head, struct bucket_table, rcu));
113} 158}
114 159
160static union nested_table *nested_table_alloc(struct rhashtable *ht,
161 union nested_table __rcu **prev,
162 unsigned int shifted,
163 unsigned int nhash)
164{
165 union nested_table *ntbl;
166 int i;
167
168 ntbl = rcu_dereference(*prev);
169 if (ntbl)
170 return ntbl;
171
172 ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
173
174 if (ntbl && shifted) {
175 for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
176 INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
177 (i << shifted) | nhash);
178 }
179
180 rcu_assign_pointer(*prev, ntbl);
181
182 return ntbl;
183}
184
185static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
186 size_t nbuckets,
187 gfp_t gfp)
188{
189 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
190 struct bucket_table *tbl;
191 size_t size;
192
193 if (nbuckets < (1 << (shift + 1)))
194 return NULL;
195
196 size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
197
198 tbl = kzalloc(size, gfp);
199 if (!tbl)
200 return NULL;
201
202 if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
203 0, 0)) {
204 kfree(tbl);
205 return NULL;
206 }
207
208 tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
209
210 return tbl;
211}
212
115static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, 213static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
116 size_t nbuckets, 214 size_t nbuckets,
117 gfp_t gfp) 215 gfp_t gfp)
@@ -126,10 +224,17 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
126 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); 224 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
127 if (tbl == NULL && gfp == GFP_KERNEL) 225 if (tbl == NULL && gfp == GFP_KERNEL)
128 tbl = vzalloc(size); 226 tbl = vzalloc(size);
227
228 size = nbuckets;
229
230 if (tbl == NULL && gfp != GFP_KERNEL) {
231 tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
232 nbuckets = 0;
233 }
129 if (tbl == NULL) 234 if (tbl == NULL)
130 return NULL; 235 return NULL;
131 236
132 tbl->size = nbuckets; 237 tbl->size = size;
133 238
134 if (alloc_bucket_locks(ht, tbl, gfp) < 0) { 239 if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
135 bucket_table_free(tbl); 240 bucket_table_free(tbl);
@@ -164,12 +269,17 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
164 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 269 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
165 struct bucket_table *new_tbl = rhashtable_last_table(ht, 270 struct bucket_table *new_tbl = rhashtable_last_table(ht,
166 rht_dereference_rcu(old_tbl->future_tbl, ht)); 271 rht_dereference_rcu(old_tbl->future_tbl, ht));
167 struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; 272 struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
168 int err = -ENOENT; 273 int err = -EAGAIN;
169 struct rhash_head *head, *next, *entry; 274 struct rhash_head *head, *next, *entry;
170 spinlock_t *new_bucket_lock; 275 spinlock_t *new_bucket_lock;
171 unsigned int new_hash; 276 unsigned int new_hash;
172 277
278 if (new_tbl->nest)
279 goto out;
280
281 err = -ENOENT;
282
173 rht_for_each(entry, old_tbl, old_hash) { 283 rht_for_each(entry, old_tbl, old_hash) {
174 err = 0; 284 err = 0;
175 next = rht_dereference_bucket(entry->next, old_tbl, old_hash); 285 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -202,19 +312,26 @@ out:
202 return err; 312 return err;
203} 313}
204 314
205static void rhashtable_rehash_chain(struct rhashtable *ht, 315static int rhashtable_rehash_chain(struct rhashtable *ht,
206 unsigned int old_hash) 316 unsigned int old_hash)
207{ 317{
208 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 318 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
209 spinlock_t *old_bucket_lock; 319 spinlock_t *old_bucket_lock;
320 int err;
210 321
211 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); 322 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
212 323
213 spin_lock_bh(old_bucket_lock); 324 spin_lock_bh(old_bucket_lock);
214 while (!rhashtable_rehash_one(ht, old_hash)) 325 while (!(err = rhashtable_rehash_one(ht, old_hash)))
215 ; 326 ;
216 old_tbl->rehash++; 327
328 if (err == -ENOENT) {
329 old_tbl->rehash++;
330 err = 0;
331 }
217 spin_unlock_bh(old_bucket_lock); 332 spin_unlock_bh(old_bucket_lock);
333
334 return err;
218} 335}
219 336
220static int rhashtable_rehash_attach(struct rhashtable *ht, 337static int rhashtable_rehash_attach(struct rhashtable *ht,
@@ -246,13 +363,17 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
246 struct bucket_table *new_tbl; 363 struct bucket_table *new_tbl;
247 struct rhashtable_walker *walker; 364 struct rhashtable_walker *walker;
248 unsigned int old_hash; 365 unsigned int old_hash;
366 int err;
249 367
250 new_tbl = rht_dereference(old_tbl->future_tbl, ht); 368 new_tbl = rht_dereference(old_tbl->future_tbl, ht);
251 if (!new_tbl) 369 if (!new_tbl)
252 return 0; 370 return 0;
253 371
254 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) 372 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
255 rhashtable_rehash_chain(ht, old_hash); 373 err = rhashtable_rehash_chain(ht, old_hash);
374 if (err)
375 return err;
376 }
256 377
257 /* Publish the new table pointer. */ 378 /* Publish the new table pointer. */
258 rcu_assign_pointer(ht->tbl, new_tbl); 379 rcu_assign_pointer(ht->tbl, new_tbl);
@@ -271,31 +392,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
271 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; 392 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
272} 393}
273 394
274/** 395static int rhashtable_rehash_alloc(struct rhashtable *ht,
275 * rhashtable_expand - Expand hash table while allowing concurrent lookups 396 struct bucket_table *old_tbl,
276 * @ht: the hash table to expand 397 unsigned int size)
277 *
278 * A secondary bucket array is allocated and the hash entries are migrated.
279 *
280 * This function may only be called in a context where it is safe to call
281 * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
282 *
283 * The caller must ensure that no concurrent resizing occurs by holding
284 * ht->mutex.
285 *
286 * It is valid to have concurrent insertions and deletions protected by per
287 * bucket locks or concurrent RCU protected lookups and traversals.
288 */
289static int rhashtable_expand(struct rhashtable *ht)
290{ 398{
291 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 399 struct bucket_table *new_tbl;
292 int err; 400 int err;
293 401
294 ASSERT_RHT_MUTEX(ht); 402 ASSERT_RHT_MUTEX(ht);
295 403
296 old_tbl = rhashtable_last_table(ht, old_tbl); 404 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
297
298 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
299 if (new_tbl == NULL) 405 if (new_tbl == NULL)
300 return -ENOMEM; 406 return -ENOMEM;
301 407
@@ -324,12 +430,9 @@ static int rhashtable_expand(struct rhashtable *ht)
324 */ 430 */
325static int rhashtable_shrink(struct rhashtable *ht) 431static int rhashtable_shrink(struct rhashtable *ht)
326{ 432{
327 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 433 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
328 unsigned int nelems = atomic_read(&ht->nelems); 434 unsigned int nelems = atomic_read(&ht->nelems);
329 unsigned int size = 0; 435 unsigned int size = 0;
330 int err;
331
332 ASSERT_RHT_MUTEX(ht);
333 436
334 if (nelems) 437 if (nelems)
335 size = roundup_pow_of_two(nelems * 3 / 2); 438 size = roundup_pow_of_two(nelems * 3 / 2);
@@ -342,15 +445,7 @@ static int rhashtable_shrink(struct rhashtable *ht)
342 if (rht_dereference(old_tbl->future_tbl, ht)) 445 if (rht_dereference(old_tbl->future_tbl, ht))
343 return -EEXIST; 446 return -EEXIST;
344 447
345 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); 448 return rhashtable_rehash_alloc(ht, old_tbl, size);
346 if (new_tbl == NULL)
347 return -ENOMEM;
348
349 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
350 if (err)
351 bucket_table_free(new_tbl);
352
353 return err;
354} 449}
355 450
356static void rht_deferred_worker(struct work_struct *work) 451static void rht_deferred_worker(struct work_struct *work)
@@ -366,11 +461,14 @@ static void rht_deferred_worker(struct work_struct *work)
366 tbl = rhashtable_last_table(ht, tbl); 461 tbl = rhashtable_last_table(ht, tbl);
367 462
368 if (rht_grow_above_75(ht, tbl)) 463 if (rht_grow_above_75(ht, tbl))
369 rhashtable_expand(ht); 464 err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
370 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) 465 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
371 rhashtable_shrink(ht); 466 err = rhashtable_shrink(ht);
467 else if (tbl->nest)
468 err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
372 469
373 err = rhashtable_rehash_table(ht); 470 if (!err)
471 err = rhashtable_rehash_table(ht);
374 472
375 mutex_unlock(&ht->mutex); 473 mutex_unlock(&ht->mutex);
376 474
@@ -439,8 +537,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
439 int elasticity; 537 int elasticity;
440 538
441 elasticity = ht->elasticity; 539 elasticity = ht->elasticity;
442 pprev = &tbl->buckets[hash]; 540 pprev = rht_bucket_var(tbl, hash);
443 rht_for_each(head, tbl, hash) { 541 rht_for_each_continue(head, *pprev, tbl, hash) {
444 struct rhlist_head *list; 542 struct rhlist_head *list;
445 struct rhlist_head *plist; 543 struct rhlist_head *plist;
446 544
@@ -477,6 +575,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
477 struct rhash_head *obj, 575 struct rhash_head *obj,
478 void *data) 576 void *data)
479{ 577{
578 struct rhash_head __rcu **pprev;
480 struct bucket_table *new_tbl; 579 struct bucket_table *new_tbl;
481 struct rhash_head *head; 580 struct rhash_head *head;
482 581
@@ -499,7 +598,11 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
499 if (unlikely(rht_grow_above_100(ht, tbl))) 598 if (unlikely(rht_grow_above_100(ht, tbl)))
500 return ERR_PTR(-EAGAIN); 599 return ERR_PTR(-EAGAIN);
501 600
502 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); 601 pprev = rht_bucket_insert(ht, tbl, hash);
602 if (!pprev)
603 return ERR_PTR(-ENOMEM);
604
605 head = rht_dereference_bucket(*pprev, tbl, hash);
503 606
504 RCU_INIT_POINTER(obj->next, head); 607 RCU_INIT_POINTER(obj->next, head);
505 if (ht->rhlist) { 608 if (ht->rhlist) {
@@ -509,7 +612,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
509 RCU_INIT_POINTER(list->next, NULL); 612 RCU_INIT_POINTER(list->next, NULL);
510 } 613 }
511 614
512 rcu_assign_pointer(tbl->buckets[hash], obj); 615 rcu_assign_pointer(*pprev, obj);
513 616
514 atomic_inc(&ht->nelems); 617 atomic_inc(&ht->nelems);
515 if (rht_grow_above_75(ht, tbl)) 618 if (rht_grow_above_75(ht, tbl))
@@ -975,7 +1078,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
975 void (*free_fn)(void *ptr, void *arg), 1078 void (*free_fn)(void *ptr, void *arg),
976 void *arg) 1079 void *arg)
977{ 1080{
978 const struct bucket_table *tbl; 1081 struct bucket_table *tbl;
979 unsigned int i; 1082 unsigned int i;
980 1083
981 cancel_work_sync(&ht->run_work); 1084 cancel_work_sync(&ht->run_work);
@@ -986,7 +1089,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
986 for (i = 0; i < tbl->size; i++) { 1089 for (i = 0; i < tbl->size; i++) {
987 struct rhash_head *pos, *next; 1090 struct rhash_head *pos, *next;
988 1091
989 for (pos = rht_dereference(tbl->buckets[i], ht), 1092 for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
990 next = !rht_is_a_nulls(pos) ? 1093 next = !rht_is_a_nulls(pos) ?
991 rht_dereference(pos->next, ht) : NULL; 1094 rht_dereference(pos->next, ht) : NULL;
992 !rht_is_a_nulls(pos); 1095 !rht_is_a_nulls(pos);
@@ -1007,3 +1110,70 @@ void rhashtable_destroy(struct rhashtable *ht)
1007 return rhashtable_free_and_destroy(ht, NULL, NULL); 1110 return rhashtable_free_and_destroy(ht, NULL, NULL);
1008} 1111}
1009EXPORT_SYMBOL_GPL(rhashtable_destroy); 1112EXPORT_SYMBOL_GPL(rhashtable_destroy);
1113
1114struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
1115 unsigned int hash)
1116{
1117 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1118 static struct rhash_head __rcu *rhnull =
1119 (struct rhash_head __rcu *)NULLS_MARKER(0);
1120 unsigned int index = hash & ((1 << tbl->nest) - 1);
1121 unsigned int size = tbl->size >> tbl->nest;
1122 unsigned int subhash = hash;
1123 union nested_table *ntbl;
1124
1125 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1126 ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
1127 subhash >>= tbl->nest;
1128
1129 while (ntbl && size > (1 << shift)) {
1130 index = subhash & ((1 << shift) - 1);
1131 ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
1132 size >>= shift;
1133 subhash >>= shift;
1134 }
1135
1136 if (!ntbl)
1137 return &rhnull;
1138
1139 return &ntbl[subhash].bucket;
1140
1141}
1142EXPORT_SYMBOL_GPL(rht_bucket_nested);
1143
1144struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
1145 struct bucket_table *tbl,
1146 unsigned int hash)
1147{
1148 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1149 unsigned int index = hash & ((1 << tbl->nest) - 1);
1150 unsigned int size = tbl->size >> tbl->nest;
1151 union nested_table *ntbl;
1152 unsigned int shifted;
1153 unsigned int nhash;
1154
1155 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1156 hash >>= tbl->nest;
1157 nhash = index;
1158 shifted = tbl->nest;
1159 ntbl = nested_table_alloc(ht, &ntbl[index].table,
1160 size <= (1 << shift) ? shifted : 0, nhash);
1161
1162 while (ntbl && size > (1 << shift)) {
1163 index = hash & ((1 << shift) - 1);
1164 size >>= shift;
1165 hash >>= shift;
1166 nhash |= index << shifted;
1167 shifted += shift;
1168 ntbl = nested_table_alloc(ht, &ntbl[index].table,
1169 size <= (1 << shift) ? shifted : 0,
1170 nhash);
1171 }
1172
1173 if (!ntbl)
1174 return NULL;
1175
1176 return &ntbl[hash].bucket;
1177
1178}
1179EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 28bf4feeb81c..ab8a2d5d1e32 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -110,6 +110,10 @@ int tipc_net_start(struct net *net, u32 addr)
110 char addr_string[16]; 110 char addr_string[16];
111 111
112 tn->own_addr = addr; 112 tn->own_addr = addr;
113
114 /* Ensure that the new address is visible before we reinit. */
115 smp_mb();
116
113 tipc_named_reinit(net); 117 tipc_named_reinit(net);
114 tipc_sk_reinit(net); 118 tipc_sk_reinit(net);
115 119
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 103d1fd058c0..6b09a778cc71 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -430,8 +430,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
430 INIT_LIST_HEAD(&tsk->cong_links); 430 INIT_LIST_HEAD(&tsk->cong_links);
431 msg = &tsk->phdr; 431 msg = &tsk->phdr;
432 tn = net_generic(sock_net(sk), tipc_net_id); 432 tn = net_generic(sock_net(sk), tipc_net_id);
433 tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
434 NAMED_H_SIZE, 0);
435 433
436 /* Finish initializing socket data structures */ 434 /* Finish initializing socket data structures */
437 sock->ops = ops; 435 sock->ops = ops;
@@ -441,6 +439,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
441 pr_warn("Socket create failed; port number exhausted\n"); 439 pr_warn("Socket create failed; port number exhausted\n");
442 return -EINVAL; 440 return -EINVAL;
443 } 441 }
442
443 /* Ensure tsk is visible before we read own_addr. */
444 smp_mb();
445
446 tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
447 NAMED_H_SIZE, 0);
448
444 msg_set_origport(msg, tsk->portid); 449 msg_set_origport(msg, tsk->portid);
445 setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); 450 setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
446 sk->sk_shutdown = 0; 451 sk->sk_shutdown = 0;
@@ -2234,24 +2239,27 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
2234void tipc_sk_reinit(struct net *net) 2239void tipc_sk_reinit(struct net *net)
2235{ 2240{
2236 struct tipc_net *tn = net_generic(net, tipc_net_id); 2241 struct tipc_net *tn = net_generic(net, tipc_net_id);
2237 const struct bucket_table *tbl; 2242 struct rhashtable_iter iter;
2238 struct rhash_head *pos;
2239 struct tipc_sock *tsk; 2243 struct tipc_sock *tsk;
2240 struct tipc_msg *msg; 2244 struct tipc_msg *msg;
2241 int i;
2242 2245
2243 rcu_read_lock(); 2246 rhashtable_walk_enter(&tn->sk_rht, &iter);
2244 tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); 2247
2245 for (i = 0; i < tbl->size; i++) { 2248 do {
2246 rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { 2249 tsk = ERR_PTR(rhashtable_walk_start(&iter));
2250 if (tsk)
2251 continue;
2252
2253 while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
2247 spin_lock_bh(&tsk->sk.sk_lock.slock); 2254 spin_lock_bh(&tsk->sk.sk_lock.slock);
2248 msg = &tsk->phdr; 2255 msg = &tsk->phdr;
2249 msg_set_prevnode(msg, tn->own_addr); 2256 msg_set_prevnode(msg, tn->own_addr);
2250 msg_set_orignode(msg, tn->own_addr); 2257 msg_set_orignode(msg, tn->own_addr);
2251 spin_unlock_bh(&tsk->sk.sk_lock.slock); 2258 spin_unlock_bh(&tsk->sk.sk_lock.slock);
2252 } 2259 }
2253 } 2260
2254 rcu_read_unlock(); 2261 rhashtable_walk_stop(&iter);
2262 } while (tsk == ERR_PTR(-EAGAIN));
2255} 2263}
2256 2264
2257static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) 2265static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)