1 files changed, 69 insertions, 101 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 71fd0dd45ce3..cea4244e032b 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,7 +1,7 @@
 /*
 * Resizable, Scalable, Concurrent Hash Table
 *
- * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
 *
 * Based on the following paper:
@@ -34,12 +34,17 @@
 enum {
        RHT_LOCK_NORMAL,
        RHT_LOCK_NESTED,
-        RHT_LOCK_NESTED2,
 };
 /* The bucket lock is selected based on the hash and protects mutations
 * on a group of hash buckets.
 *
+ * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
+ * a single lock always covers both buckets which may both contains
+ * entries which link to the same bucket of the old table during resizing.
+ * This allows to simplify the locking as locking the bucket in both
+ * tables during resize always guarantee protection.
+ *
 * IMPORTANT: When holding the bucket lock of both the old and new table
 * during expansions and shrinking, the old bucket lock must always be
 * acquired first.
@@ -128,8 +133,8 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
        nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
        size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
-        /* Never allocate more than one lock per bucket */
+        /* Never allocate more than 0.5 locks per bucket */
-        size = min_t(unsigned int, size, tbl->size);
+        size = min_t(unsigned int, size, tbl->size >> 1);
        if (sizeof(spinlock_t) != 0) {
 #ifdef CONFIG_NUMA
@@ -211,13 +216,36 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
 }
 EXPORT_SYMBOL_GPL(rht_shrink_below_30);
-static void hashtable_chain_unzip(const struct rhashtable *ht,
+static void lock_buckets(struct bucket_table *new_tbl,
+                         struct bucket_table *old_tbl, unsigned int hash)
+        __acquires(old_bucket_lock)
+{
+        spin_lock_bh(bucket_lock(old_tbl, hash));
+        if (new_tbl != old_tbl)
+                spin_lock_bh_nested(bucket_lock(new_tbl, hash),
+                                    RHT_LOCK_NESTED);
+}
+static void unlock_buckets(struct bucket_table *new_tbl,
+                           struct bucket_table *old_tbl, unsigned int hash)
+        __releases(old_bucket_lock)
+{
+        if (new_tbl != old_tbl)
+                spin_unlock_bh(bucket_lock(new_tbl, hash));
+        spin_unlock_bh(bucket_lock(old_tbl, hash));
+}
+/**
+ * Unlink entries on bucket which hash to different bucket.
+ *
+ * Returns true if no more work needs to be performed on the bucket.
+ */
+static bool hashtable_chain_unzip(const struct rhashtable *ht,
                                  const struct bucket_table *new_tbl,
                                  struct bucket_table *old_tbl,
                                  size_t old_hash)
 {
        struct rhash_head *he, *p, *next;
-        spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
        unsigned int new_hash, new_hash2;
        ASSERT_BUCKET_LOCK(old_tbl, old_hash);
@@ -226,10 +254,10 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
        p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
                                   old_hash);
        if (rht_is_a_nulls(p))
-                return;
+                return false;
-        new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
+        new_hash = head_hashfn(ht, new_tbl, p);
-        new_bucket_lock = bucket_lock(new_tbl, new_hash);
+        ASSERT_BUCKET_LOCK(new_tbl, new_hash);
        /* Advance the old bucket pointer one or more times until it
         * reaches a node that doesn't hash to the same bucket as the
@@ -237,22 +265,14 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
         */
        rht_for_each_continue(he, p->next, old_tbl, old_hash) {
                new_hash2 = head_hashfn(ht, new_tbl, he);
+                ASSERT_BUCKET_LOCK(new_tbl, new_hash2);
                if (new_hash != new_hash2)
                        break;
                p = he;
        }
        rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
-        spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
-        /* If we have encountered an entry that maps to a different bucket in
-         * the new table, lock down that bucket as well as we might cut off
-         * the end of the chain.
-         */
-        new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
-        if (new_bucket_lock != new_bucket_lock2)
-                spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
        /* Find the subsequent node which does hash to the same
         * bucket as node P, or NULL if no such node exists.
         */
@@ -271,21 +291,16 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
         */
        rcu_assign_pointer(p->next, next);
-        if (new_bucket_lock != new_bucket_lock2)
+        p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
-                spin_unlock_bh(new_bucket_lock2);
+                                   old_hash);
-        spin_unlock_bh(new_bucket_lock);
+        return !rht_is_a_nulls(p);
 }
 static void link_old_to_new(struct bucket_table *new_tbl,
                            unsigned int new_hash, struct rhash_head *entry)
 {
-        spinlock_t *new_bucket_lock;
-        new_bucket_lock = bucket_lock(new_tbl, new_hash);
-        spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
        rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
-        spin_unlock_bh(new_bucket_lock);
 }
 /**
@@ -308,7 +323,6 @@ int rhashtable_expand(struct rhashtable *ht)
 {
        struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
        struct rhash_head *he;
-        spinlock_t *old_bucket_lock;
        unsigned int new_hash, old_hash;
        bool complete = false;
@@ -338,16 +352,14 @@ int rhashtable_expand(struct rhashtable *ht)
         */
        for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
                old_hash = rht_bucket_index(old_tbl, new_hash);
-                old_bucket_lock = bucket_lock(old_tbl, old_hash);
+                lock_buckets(new_tbl, old_tbl, new_hash);
-                spin_lock_bh(old_bucket_lock);
                rht_for_each(he, old_tbl, old_hash) {
                        if (head_hashfn(ht, new_tbl, he) == new_hash) {
                                link_old_to_new(new_tbl, new_hash, he);
                                break;
                        }
                }
-                spin_unlock_bh(old_bucket_lock);
+                unlock_buckets(new_tbl, old_tbl, new_hash);
        }
        /* Publish the new table pointer. Lookups may now traverse
@@ -370,18 +382,13 @@ int rhashtable_expand(struct rhashtable *ht)
                 */
                complete = true;
                for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
-                        struct rhash_head *head;
+                        lock_buckets(new_tbl, old_tbl, old_hash);
-                        old_bucket_lock = bucket_lock(old_tbl, old_hash);
+                        if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
-                        spin_lock_bh(old_bucket_lock);
+                                                  old_hash))
-                        hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
-                        head = rht_dereference_bucket(old_tbl->buckets[old_hash],
-                                                      old_tbl, old_hash);
-                        if (!rht_is_a_nulls(head))
                                complete = false;
-                        spin_unlock_bh(old_bucket_lock);
+                        unlock_buckets(new_tbl, old_tbl, old_hash);
                }
        }
@@ -409,7 +416,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
 int rhashtable_shrink(struct rhashtable *ht)
 {
        struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
-        spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
        unsigned int new_hash;
        ASSERT_RHT_MUTEX(ht);
@@ -427,36 +433,16 @@ int rhashtable_shrink(struct rhashtable *ht)
         * always divide the size in half when shrinking, each bucket
         * in the new table maps to exactly two buckets in the old
         * table.
-         *
-         * As removals can occur concurrently on the old table, we need
-         * to lock down both matching buckets in the old table.
         */
        for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
-                old_bucket_lock1 = bucket_lock(tbl, new_hash);
+                lock_buckets(new_tbl, tbl, new_hash);
-                old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
-                new_bucket_lock = bucket_lock(new_tbl, new_hash);
-                spin_lock_bh(old_bucket_lock1);
-                /* Depending on the lock per buckets mapping, the bucket in
-                 * the lower and upper region may map to the same lock.
-                 */
-                if (old_bucket_lock1 != old_bucket_lock2) {
-                        spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
-                        spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
-                } else {
-                        spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
-                }
                rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
                                   tbl->buckets[new_hash]);
                rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
                                   tbl->buckets[new_hash + new_tbl->size]);
-                spin_unlock_bh(new_bucket_lock);
+                unlock_buckets(new_tbl, tbl, new_hash);
-                if (old_bucket_lock1 != old_bucket_lock2)
-                        spin_unlock_bh(old_bucket_lock2);
-                spin_unlock_bh(old_bucket_lock1);
        }
        /* Publish the new, valid hash table */
@@ -547,19 +533,18 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 */
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
-        struct bucket_table *tbl;
+        struct bucket_table *tbl, *old_tbl;
-        spinlock_t *lock;
        unsigned hash;
        rcu_read_lock();
        tbl = rht_dereference_rcu(ht->future_tbl, ht);
+        old_tbl = rht_dereference_rcu(ht->tbl, ht);
        hash = head_hashfn(ht, tbl, obj);
-        lock = bucket_lock(tbl, hash);
-        spin_lock_bh(lock);
+        lock_buckets(tbl, old_tbl, hash);
        __rhashtable_insert(ht, obj, tbl, hash);
-        spin_unlock_bh(lock);
+        unlock_buckets(tbl, old_tbl, hash);
        rcu_read_unlock();
 }
@@ -582,21 +567,20 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
 */
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 {
-        struct bucket_table *tbl;
+        struct bucket_table *tbl, *new_tbl, *old_tbl;
        struct rhash_head __rcu **pprev;
        struct rhash_head *he;
-        spinlock_t *lock;
+        unsigned int hash, new_hash;
-        unsigned int hash;
        bool ret = false;
        rcu_read_lock();
-        tbl = rht_dereference_rcu(ht->tbl, ht);
+        tbl = old_tbl = rht_dereference_rcu(ht->tbl, ht);
-        hash = head_hashfn(ht, tbl, obj);
+        new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
+        new_hash = head_hashfn(ht, new_tbl, obj);
-        lock = bucket_lock(tbl, hash);
-        spin_lock_bh(lock);
+        lock_buckets(new_tbl, old_tbl, new_hash);
 restart:
+        hash = rht_bucket_index(tbl, new_hash);
        pprev = &tbl->buckets[hash];
        rht_for_each(he, tbl, hash) {
                if (he != obj) {
@@ -615,18 +599,12 @@ restart:
         * resizing. Thus traversing both is fine and the added cost is
         * very rare.
         */
-        if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) {
+        if (tbl != new_tbl) {
-                spin_unlock_bh(lock);
+                tbl = new_tbl;
-                tbl = rht_dereference_rcu(ht->future_tbl, ht);
-                hash = head_hashfn(ht, tbl, obj);
-                lock = bucket_lock(tbl, hash);
-                spin_lock_bh(lock);
                goto restart;
        }
-        spin_unlock_bh(lock);
+        unlock_buckets(new_tbl, old_tbl, new_hash);
        if (ret) {
                atomic_dec(&ht->nelems);
@@ -782,24 +760,17 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
                                      void *arg)
 {
        struct bucket_table *new_tbl, *old_tbl;
-        spinlock_t *new_bucket_lock, *old_bucket_lock;
+        u32 new_hash;
-        u32 new_hash, old_hash;
        bool success = true;
        BUG_ON(!ht->p.key_len);
        rcu_read_lock();
        old_tbl = rht_dereference_rcu(ht->tbl, ht);
-        old_hash = head_hashfn(ht, old_tbl, obj);
-        old_bucket_lock = bucket_lock(old_tbl, old_hash);
-        spin_lock_bh(old_bucket_lock);
        new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
        new_hash = head_hashfn(ht, new_tbl, obj);
-        new_bucket_lock = bucket_lock(new_tbl, new_hash);
-        if (unlikely(old_tbl != new_tbl))
+        lock_buckets(new_tbl, old_tbl, new_hash);
-                spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
        if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
                                      compare, arg)) {
@@ -810,10 +781,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
        __rhashtable_insert(ht, obj, new_tbl, new_hash);
 exit:
-        if (unlikely(old_tbl != new_tbl))
+        unlock_buckets(new_tbl, old_tbl, new_hash);
-                spin_unlock_bh(new_bucket_lock);
-        spin_unlock_bh(old_bucket_lock);
        rcu_read_unlock();
        return success;

diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 71fd0dd45ce3..cea4244e032b 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* Resizable, Scalable, Concurrent Hash Table	2	* Resizable, Scalable, Concurrent Hash Table
3	*	3	*
4	* Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>	4	* Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
5	* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>	5	* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6	*	6	*
7	* Based on the following paper:	7	* Based on the following paper:
@@ -34,12 +34,17 @@
34	enum {	34	enum {
35	RHT_LOCK_NORMAL,	35	RHT_LOCK_NORMAL,
36	RHT_LOCK_NESTED,	36	RHT_LOCK_NESTED,
37	RHT_LOCK_NESTED2,
38	};	37	};
39		38
40	/* The bucket lock is selected based on the hash and protects mutations	39	/* The bucket lock is selected based on the hash and protects mutations
41	* on a group of hash buckets.	40	* on a group of hash buckets.
42	*	41	*
		42	* A maximum of tbl->size/2 bucket locks is allocated. This ensures that
		43	* a single lock always covers both buckets which may both contains
		44	* entries which link to the same bucket of the old table during resizing.
		45	* This allows to simplify the locking as locking the bucket in both
		46	* tables during resize always guarantee protection.
		47	*
43	* IMPORTANT: When holding the bucket lock of both the old and new table	48	* IMPORTANT: When holding the bucket lock of both the old and new table
44	* during expansions and shrinking, the old bucket lock must always be	49	* during expansions and shrinking, the old bucket lock must always be
45	* acquired first.	50	* acquired first.
@@ -128,8 +133,8 @@ static int alloc_bucket_locks(struct rhashtable ht, struct bucket_table tbl)
128	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);	133	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
129	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);	134	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
130		135
131	/* Never allocate more than one lock per bucket */	136	/* Never allocate more than 0.5 locks per bucket */
132	size = min_t(unsigned int, size, tbl->size);	137	size = min_t(unsigned int, size, tbl->size >> 1);
133		138
134	if (sizeof(spinlock_t) != 0) {	139	if (sizeof(spinlock_t) != 0) {
135	#ifdef CONFIG_NUMA	140	#ifdef CONFIG_NUMA
@@ -211,13 +216,36 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
211	}	216	}
212	EXPORT_SYMBOL_GPL(rht_shrink_below_30);	217	EXPORT_SYMBOL_GPL(rht_shrink_below_30);
213		218
214	static void hashtable_chain_unzip(const struct rhashtable *ht,	219	static void lock_buckets(struct bucket_table *new_tbl,
		220	struct bucket_table *old_tbl, unsigned int hash)
		221	__acquires(old_bucket_lock)
		222	{
		223	spin_lock_bh(bucket_lock(old_tbl, hash));
		224	if (new_tbl != old_tbl)
		225	spin_lock_bh_nested(bucket_lock(new_tbl, hash),
		226	RHT_LOCK_NESTED);
		227	}
		228
		229	static void unlock_buckets(struct bucket_table *new_tbl,
		230	struct bucket_table *old_tbl, unsigned int hash)
		231	__releases(old_bucket_lock)
		232	{
		233	if (new_tbl != old_tbl)
		234	spin_unlock_bh(bucket_lock(new_tbl, hash));
		235	spin_unlock_bh(bucket_lock(old_tbl, hash));
		236	}
		237
		238	/**
		239	* Unlink entries on bucket which hash to different bucket.
		240	*
		241	* Returns true if no more work needs to be performed on the bucket.
		242	*/
		243	static bool hashtable_chain_unzip(const struct rhashtable *ht,
215	const struct bucket_table *new_tbl,	244	const struct bucket_table *new_tbl,
216	struct bucket_table *old_tbl,	245	struct bucket_table *old_tbl,
217	size_t old_hash)	246	size_t old_hash)
218	{	247	{
219	struct rhash_head he, p, *next;	248	struct rhash_head he, p, *next;
220	spinlock_t new_bucket_lock, new_bucket_lock2 = NULL;
221	unsigned int new_hash, new_hash2;	249	unsigned int new_hash, new_hash2;
222		250
223	ASSERT_BUCKET_LOCK(old_tbl, old_hash);	251	ASSERT_BUCKET_LOCK(old_tbl, old_hash);
@@ -226,10 +254,10 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
226	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,	254	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
227	old_hash);	255	old_hash);
228	if (rht_is_a_nulls(p))	256	if (rht_is_a_nulls(p))
229	return;	257	return false;
230		258
231	new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);	259	new_hash = head_hashfn(ht, new_tbl, p);
232	new_bucket_lock = bucket_lock(new_tbl, new_hash);	260	ASSERT_BUCKET_LOCK(new_tbl, new_hash);
233		261
234	/* Advance the old bucket pointer one or more times until it	262	/* Advance the old bucket pointer one or more times until it
235	* reaches a node that doesn't hash to the same bucket as the	263	* reaches a node that doesn't hash to the same bucket as the
@@ -237,22 +265,14 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
237	*/	265	*/
238	rht_for_each_continue(he, p->next, old_tbl, old_hash) {	266	rht_for_each_continue(he, p->next, old_tbl, old_hash) {
239	new_hash2 = head_hashfn(ht, new_tbl, he);	267	new_hash2 = head_hashfn(ht, new_tbl, he);
		268	ASSERT_BUCKET_LOCK(new_tbl, new_hash2);
		269
240	if (new_hash != new_hash2)	270	if (new_hash != new_hash2)
241	break;	271	break;
242	p = he;	272	p = he;
243	}	273	}
244	rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);	274	rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
245		275
246	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
247
248	/* If we have encountered an entry that maps to a different bucket in
249	* the new table, lock down that bucket as well as we might cut off
250	* the end of the chain.
251	*/
252	new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
253	if (new_bucket_lock != new_bucket_lock2)
254	spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
255
256	/* Find the subsequent node which does hash to the same	276	/* Find the subsequent node which does hash to the same
257	* bucket as node P, or NULL if no such node exists.	277	* bucket as node P, or NULL if no such node exists.
258	*/	278	*/
@@ -271,21 +291,16 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
271	*/	291	*/
272	rcu_assign_pointer(p->next, next);	292	rcu_assign_pointer(p->next, next);
273		293
274	if (new_bucket_lock != new_bucket_lock2)	294	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
275	spin_unlock_bh(new_bucket_lock2);	295	old_hash);
276	spin_unlock_bh(new_bucket_lock);	296
		297	return !rht_is_a_nulls(p);
277	}	298	}
278		299
279	static void link_old_to_new(struct bucket_table *new_tbl,	300	static void link_old_to_new(struct bucket_table *new_tbl,
280	unsigned int new_hash, struct rhash_head *entry)	301	unsigned int new_hash, struct rhash_head *entry)
281	{	302	{
282	spinlock_t *new_bucket_lock;
283
284	new_bucket_lock = bucket_lock(new_tbl, new_hash);
285
286	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
287	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);	303	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
288	spin_unlock_bh(new_bucket_lock);
289	}	304	}
290		305
291	/**	306	/**
@@ -308,7 +323,6 @@ int rhashtable_expand(struct rhashtable *ht)
308	{	323	{
309	struct bucket_table new_tbl, old_tbl = rht_dereference(ht->tbl, ht);	324	struct bucket_table new_tbl, old_tbl = rht_dereference(ht->tbl, ht);
310	struct rhash_head *he;	325	struct rhash_head *he;
311	spinlock_t *old_bucket_lock;
312	unsigned int new_hash, old_hash;	326	unsigned int new_hash, old_hash;
313	bool complete = false;	327	bool complete = false;
314		328
@@ -338,16 +352,14 @@ int rhashtable_expand(struct rhashtable *ht)
338	*/	352	*/
339	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {	353	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
340	old_hash = rht_bucket_index(old_tbl, new_hash);	354	old_hash = rht_bucket_index(old_tbl, new_hash);
341	old_bucket_lock = bucket_lock(old_tbl, old_hash);	355	lock_buckets(new_tbl, old_tbl, new_hash);
342
343	spin_lock_bh(old_bucket_lock);
344	rht_for_each(he, old_tbl, old_hash) {	356	rht_for_each(he, old_tbl, old_hash) {
345	if (head_hashfn(ht, new_tbl, he) == new_hash) {	357	if (head_hashfn(ht, new_tbl, he) == new_hash) {
346	link_old_to_new(new_tbl, new_hash, he);	358	link_old_to_new(new_tbl, new_hash, he);
347	break;	359	break;
348	}	360	}
349	}	361	}
350	spin_unlock_bh(old_bucket_lock);	362	unlock_buckets(new_tbl, old_tbl, new_hash);
351	}	363	}
352		364
353	/* Publish the new table pointer. Lookups may now traverse	365	/* Publish the new table pointer. Lookups may now traverse
@@ -370,18 +382,13 @@ int rhashtable_expand(struct rhashtable *ht)
370	*/	382	*/
371	complete = true;	383	complete = true;
372	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {	384	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
373	struct rhash_head *head;	385	lock_buckets(new_tbl, old_tbl, old_hash);
374		386
375	old_bucket_lock = bucket_lock(old_tbl, old_hash);	387	if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
376	spin_lock_bh(old_bucket_lock);	388	old_hash))
377
378	hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
379	head = rht_dereference_bucket(old_tbl->buckets[old_hash],
380	old_tbl, old_hash);
381	if (!rht_is_a_nulls(head))
382	complete = false;	389	complete = false;
383		390
384	spin_unlock_bh(old_bucket_lock);	391	unlock_buckets(new_tbl, old_tbl, old_hash);
385	}	392	}
386	}	393	}
387		394
@@ -409,7 +416,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
409	int rhashtable_shrink(struct rhashtable *ht)	416	int rhashtable_shrink(struct rhashtable *ht)
410	{	417	{
411	struct bucket_table new_tbl, tbl = rht_dereference(ht->tbl, ht);	418	struct bucket_table new_tbl, tbl = rht_dereference(ht->tbl, ht);
412	spinlock_t new_bucket_lock, old_bucket_lock1, *old_bucket_lock2;
413	unsigned int new_hash;	419	unsigned int new_hash;
414		420
415	ASSERT_RHT_MUTEX(ht);	421	ASSERT_RHT_MUTEX(ht);
@@ -427,36 +433,16 @@ int rhashtable_shrink(struct rhashtable *ht)
427	* always divide the size in half when shrinking, each bucket	433	* always divide the size in half when shrinking, each bucket
428	* in the new table maps to exactly two buckets in the old	434	* in the new table maps to exactly two buckets in the old
429	* table.	435	* table.
430	*
431	* As removals can occur concurrently on the old table, we need
432	* to lock down both matching buckets in the old table.
433	*/	436	*/
434	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {	437	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
435	old_bucket_lock1 = bucket_lock(tbl, new_hash);	438	lock_buckets(new_tbl, tbl, new_hash);
436	old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
437	new_bucket_lock = bucket_lock(new_tbl, new_hash);
438
439	spin_lock_bh(old_bucket_lock1);
440
441	/* Depending on the lock per buckets mapping, the bucket in
442	* the lower and upper region may map to the same lock.
443	*/
444	if (old_bucket_lock1 != old_bucket_lock2) {
445	spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
446	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
447	} else {
448	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
449	}
450		439
451	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),	440	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
452	tbl->buckets[new_hash]);	441	tbl->buckets[new_hash]);
453	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),	442	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
454	tbl->buckets[new_hash + new_tbl->size]);	443	tbl->buckets[new_hash + new_tbl->size]);
455		444
456	spin_unlock_bh(new_bucket_lock);	445	unlock_buckets(new_tbl, tbl, new_hash);
457	if (old_bucket_lock1 != old_bucket_lock2)
458	spin_unlock_bh(old_bucket_lock2);
459	spin_unlock_bh(old_bucket_lock1);
460	}	446	}
461		447
462	/* Publish the new, valid hash table */	448	/* Publish the new, valid hash table */
@@ -547,19 +533,18 @@ static void __rhashtable_insert(struct rhashtable ht, struct rhash_head obj,
547	*/	533	*/
548	void rhashtable_insert(struct rhashtable ht, struct rhash_head obj)	534	void rhashtable_insert(struct rhashtable ht, struct rhash_head obj)
549	{	535	{
550	struct bucket_table *tbl;	536	struct bucket_table tbl, old_tbl;
551	spinlock_t *lock;
552	unsigned hash;	537	unsigned hash;
553		538
554	rcu_read_lock();	539	rcu_read_lock();
555		540
556	tbl = rht_dereference_rcu(ht->future_tbl, ht);	541	tbl = rht_dereference_rcu(ht->future_tbl, ht);
		542	old_tbl = rht_dereference_rcu(ht->tbl, ht);
557	hash = head_hashfn(ht, tbl, obj);	543	hash = head_hashfn(ht, tbl, obj);
558	lock = bucket_lock(tbl, hash);
559		544
560	spin_lock_bh(lock);	545	lock_buckets(tbl, old_tbl, hash);
561	__rhashtable_insert(ht, obj, tbl, hash);	546	__rhashtable_insert(ht, obj, tbl, hash);
562	spin_unlock_bh(lock);	547	unlock_buckets(tbl, old_tbl, hash);
563		548
564	rcu_read_unlock();	549	rcu_read_unlock();
565	}	550	}
@@ -582,21 +567,20 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
582	*/	567	*/
583	bool rhashtable_remove(struct rhashtable ht, struct rhash_head obj)	568	bool rhashtable_remove(struct rhashtable ht, struct rhash_head obj)
584	{	569	{
585	struct bucket_table *tbl;	570	struct bucket_table tbl, new_tbl, *old_tbl;
586	struct rhash_head __rcu **pprev;	571	struct rhash_head __rcu **pprev;
587	struct rhash_head *he;	572	struct rhash_head *he;
588	spinlock_t *lock;	573	unsigned int hash, new_hash;
589	unsigned int hash;
590	bool ret = false;	574	bool ret = false;
591		575
592	rcu_read_lock();	576	rcu_read_lock();
593	tbl = rht_dereference_rcu(ht->tbl, ht);	577	tbl = old_tbl = rht_dereference_rcu(ht->tbl, ht);
594	hash = head_hashfn(ht, tbl, obj);	578	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
595		579	new_hash = head_hashfn(ht, new_tbl, obj);
596	lock = bucket_lock(tbl, hash);
597	spin_lock_bh(lock);
598		580
		581	lock_buckets(new_tbl, old_tbl, new_hash);
599	restart:	582	restart:
		583	hash = rht_bucket_index(tbl, new_hash);
600	pprev = &tbl->buckets[hash];	584	pprev = &tbl->buckets[hash];
601	rht_for_each(he, tbl, hash) {	585	rht_for_each(he, tbl, hash) {
602	if (he != obj) {	586	if (he != obj) {
@@ -615,18 +599,12 @@ restart:
615	* resizing. Thus traversing both is fine and the added cost is	599	* resizing. Thus traversing both is fine and the added cost is
616	* very rare.	600	* very rare.
617	*/	601	*/
618	if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) {	602	if (tbl != new_tbl) {
619	spin_unlock_bh(lock);	603	tbl = new_tbl;
620
621	tbl = rht_dereference_rcu(ht->future_tbl, ht);
622	hash = head_hashfn(ht, tbl, obj);
623
624	lock = bucket_lock(tbl, hash);
625	spin_lock_bh(lock);
626	goto restart;	604	goto restart;
627	}	605	}
628		606
629	spin_unlock_bh(lock);	607	unlock_buckets(new_tbl, old_tbl, new_hash);
630		608
631	if (ret) {	609	if (ret) {
632	atomic_dec(&ht->nelems);	610	atomic_dec(&ht->nelems);
@@ -782,24 +760,17 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
782	void *arg)	760	void *arg)
783	{	761	{
784	struct bucket_table new_tbl, old_tbl;	762	struct bucket_table new_tbl, old_tbl;
785	spinlock_t new_bucket_lock, old_bucket_lock;	763	u32 new_hash;
786	u32 new_hash, old_hash;
787	bool success = true;	764	bool success = true;
788		765
789	BUG_ON(!ht->p.key_len);	766	BUG_ON(!ht->p.key_len);
790		767
791	rcu_read_lock();	768	rcu_read_lock();
792
793	old_tbl = rht_dereference_rcu(ht->tbl, ht);	769	old_tbl = rht_dereference_rcu(ht->tbl, ht);
794	old_hash = head_hashfn(ht, old_tbl, obj);
795	old_bucket_lock = bucket_lock(old_tbl, old_hash);
796	spin_lock_bh(old_bucket_lock);
797
798	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);	770	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
799	new_hash = head_hashfn(ht, new_tbl, obj);	771	new_hash = head_hashfn(ht, new_tbl, obj);
800	new_bucket_lock = bucket_lock(new_tbl, new_hash);	772
801	if (unlikely(old_tbl != new_tbl))	773	lock_buckets(new_tbl, old_tbl, new_hash);
802	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
803		774
804	if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,	775	if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
805	compare, arg)) {	776	compare, arg)) {
@@ -810,10 +781,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
810	__rhashtable_insert(ht, obj, new_tbl, new_hash);	781	__rhashtable_insert(ht, obj, new_tbl, new_hash);
811		782
812	exit:	783	exit:
813	if (unlikely(old_tbl != new_tbl))	784	unlock_buckets(new_tbl, old_tbl, new_hash);
814	spin_unlock_bh(new_bucket_lock);
815	spin_unlock_bh(old_bucket_lock);
816
817	rcu_read_unlock();	785	rcu_read_unlock();
818		786
819	return success;	787	return success;