rhashtable: Use a single bucket lock for sibling buckets

rhashtable currently allows to use a bucket lock per bucket. This requires multiple levels of complicated nested locking because when resizing, a single bucket of the smaller table will map to two buckets in the larger table. So far rhashtable has explicitly locked both buckets in the larger table. By excluding the highest bit of the hash from the bucket lock map and thus only allowing locks to buckets in a ratio of 1:2, the locking can be simplified a lot without losing the benefits of multiple locks. Larger tables which benefit from multiple locks will not have a single lock per bucket anyway. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Thomas Graf <tgraf@suug.ch> 2015-02-04 20:03:32 -0500
committer: David S. Miller <davem@davemloft.net> 2015-02-06 18:18:34 -0500
commit: a5ec68e3b8f2c95ea1a5d23dd543abbe0c8d0624 (patch)
tree: d9229fd3b7fd4b3321dded59e7bb4b8925e37cc9
parent: c88455ce50ae4224d84960ce2baa53e61580df27 (diff)
1 files changed, 69 insertions, 101 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 71fd0dd45ce3..cea4244e032b 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,7 +1,7 @@
 /*
 * Resizable, Scalable, Concurrent Hash Table
 *
- * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
 *
 * Based on the following paper:
@@ -34,12 +34,17 @@
 enum {
        RHT_LOCK_NORMAL,
        RHT_LOCK_NESTED,
-        RHT_LOCK_NESTED2,
 };
 /* The bucket lock is selected based on the hash and protects mutations
 * on a group of hash buckets.
 *
+ * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
+ * a single lock always covers both buckets which may both contains
+ * entries which link to the same bucket of the old table during resizing.
+ * This allows to simplify the locking as locking the bucket in both
+ * tables during resize always guarantee protection.
+ *
 * IMPORTANT: When holding the bucket lock of both the old and new table
 * during expansions and shrinking, the old bucket lock must always be
 * acquired first.
@@ -128,8 +133,8 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
        nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
        size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
-        /* Never allocate more than one lock per bucket */
+        /* Never allocate more than 0.5 locks per bucket */
-        size = min_t(unsigned int, size, tbl->size);
+        size = min_t(unsigned int, size, tbl->size >> 1);
        if (sizeof(spinlock_t) != 0) {
 #ifdef CONFIG_NUMA
@@ -211,13 +216,36 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
 }
 EXPORT_SYMBOL_GPL(rht_shrink_below_30);
-static void hashtable_chain_unzip(const struct rhashtable *ht,
+static void lock_buckets(struct bucket_table *new_tbl,
+                         struct bucket_table *old_tbl, unsigned int hash)
+        __acquires(old_bucket_lock)
+{
+        spin_lock_bh(bucket_lock(old_tbl, hash));
+        if (new_tbl != old_tbl)
+                spin_lock_bh_nested(bucket_lock(new_tbl, hash),
+                                    RHT_LOCK_NESTED);
+}
+static void unlock_buckets(struct bucket_table *new_tbl,
+                           struct bucket_table *old_tbl, unsigned int hash)
+        __releases(old_bucket_lock)
+{
+        if (new_tbl != old_tbl)
+                spin_unlock_bh(bucket_lock(new_tbl, hash));
+        spin_unlock_bh(bucket_lock(old_tbl, hash));
+}
+/**
+ * Unlink entries on bucket which hash to different bucket.
+ *
+ * Returns true if no more work needs to be performed on the bucket.
+ */
+static bool hashtable_chain_unzip(const struct rhashtable *ht,
                                  const struct bucket_table *new_tbl,
                                  struct bucket_table *old_tbl,
                                  size_t old_hash)
 {
        struct rhash_head *he, *p, *next;
-        spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
        unsigned int new_hash, new_hash2;
        ASSERT_BUCKET_LOCK(old_tbl, old_hash);
@@ -226,10 +254,10 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
        p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
                                   old_hash);
        if (rht_is_a_nulls(p))
-                return;
+                return false;
-        new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
+        new_hash = head_hashfn(ht, new_tbl, p);
-        new_bucket_lock = bucket_lock(new_tbl, new_hash);
+        ASSERT_BUCKET_LOCK(new_tbl, new_hash);
        /* Advance the old bucket pointer one or more times until it
         * reaches a node that doesn't hash to the same bucket as the
@@ -237,22 +265,14 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
         */
        rht_for_each_continue(he, p->next, old_tbl, old_hash) {
                new_hash2 = head_hashfn(ht, new_tbl, he);
+                ASSERT_BUCKET_LOCK(new_tbl, new_hash2);
                if (new_hash != new_hash2)
                        break;
                p = he;
        }
        rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
-        spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
-        /* If we have encountered an entry that maps to a different bucket in
-         * the new table, lock down that bucket as well as we might cut off
-         * the end of the chain.
-         */
-        new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
-        if (new_bucket_lock != new_bucket_lock2)
-                spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
        /* Find the subsequent node which does hash to the same
         * bucket as node P, or NULL if no such node exists.
         */
@@ -271,21 +291,16 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
         */
        rcu_assign_pointer(p->next, next);
-        if (new_bucket_lock != new_bucket_lock2)
+        p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
-                spin_unlock_bh(new_bucket_lock2);
+                                   old_hash);
-        spin_unlock_bh(new_bucket_lock);
+        return !rht_is_a_nulls(p);
 }
 static void link_old_to_new(struct bucket_table *new_tbl,
                            unsigned int new_hash, struct rhash_head *entry)
 {
-        spinlock_t *new_bucket_lock;
-        new_bucket_lock = bucket_lock(new_tbl, new_hash);
-        spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
        rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
-        spin_unlock_bh(new_bucket_lock);
 }
 /**
@@ -308,7 +323,6 @@ int rhashtable_expand(struct rhashtable *ht)
 {
        struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
        struct rhash_head *he;
-        spinlock_t *old_bucket_lock;
        unsigned int new_hash, old_hash;
        bool complete = false;
@@ -338,16 +352,14 @@ int rhashtable_expand(struct rhashtable *ht)
         */
        for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
                old_hash = rht_bucket_index(old_tbl, new_hash);
-                old_bucket_lock = bucket_lock(old_tbl, old_hash);
+                lock_buckets(new_tbl, old_tbl, new_hash);
-                spin_lock_bh(old_bucket_lock);
                rht_for_each(he, old_tbl, old_hash) {
                        if (head_hashfn(ht, new_tbl, he) == new_hash) {
                                link_old_to_new(new_tbl, new_hash, he);
                                break;
                        }
                }
-                spin_unlock_bh(old_bucket_lock);
+                unlock_buckets(new_tbl, old_tbl, new_hash);
        }
        /* Publish the new table pointer. Lookups may now traverse
@@ -370,18 +382,13 @@ int rhashtable_expand(struct rhashtable *ht)
                 */
                complete = true;
                for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
-                        struct rhash_head *head;
+                        lock_buckets(new_tbl, old_tbl, old_hash);
-                        old_bucket_lock = bucket_lock(old_tbl, old_hash);
+                        if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
-                        spin_lock_bh(old_bucket_lock);
+                                                  old_hash))
-                        hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
-                        head = rht_dereference_bucket(old_tbl->buckets[old_hash],
-                                                      old_tbl, old_hash);
-                        if (!rht_is_a_nulls(head))
                                complete = false;
-                        spin_unlock_bh(old_bucket_lock);
+                        unlock_buckets(new_tbl, old_tbl, old_hash);
                }
        }
@@ -409,7 +416,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
 int rhashtable_shrink(struct rhashtable *ht)
 {
        struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
-        spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
        unsigned int new_hash;
        ASSERT_RHT_MUTEX(ht);
@@ -427,36 +433,16 @@ int rhashtable_shrink(struct rhashtable *ht)
         * always divide the size in half when shrinking, each bucket
         * in the new table maps to exactly two buckets in the old
         * table.
-         *
-         * As removals can occur concurrently on the old table, we need
-         * to lock down both matching buckets in the old table.
         */
        for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
-                old_bucket_lock1 = bucket_lock(tbl, new_hash);
+                lock_buckets(new_tbl, tbl, new_hash);
-                old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
-                new_bucket_lock = bucket_lock(new_tbl, new_hash);
-                spin_lock_bh(old_bucket_lock1);
-                /* Depending on the lock per buckets mapping, the bucket in
-                 * the lower and upper region may map to the same lock.
-                 */
-                if (old_bucket_lock1 != old_bucket_lock2) {
-                        spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
-                        spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
-                } else {
-                        spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
-                }
                rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
                                   tbl->buckets[new_hash]);
                rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
                                   tbl->buckets[new_hash + new_tbl->size]);
-                spin_unlock_bh(new_bucket_lock);
+                unlock_buckets(new_tbl, tbl, new_hash);
-                if (old_bucket_lock1 != old_bucket_lock2)
-                        spin_unlock_bh(old_bucket_lock2);
-                spin_unlock_bh(old_bucket_lock1);
        }
        /* Publish the new, valid hash table */
@@ -547,19 +533,18 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 */
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
-        struct bucket_table *tbl;
+        struct bucket_table *tbl, *old_tbl;
-        spinlock_t *lock;
        unsigned hash;
        rcu_read_lock();
        tbl = rht_dereference_rcu(ht->future_tbl, ht);
+        old_tbl = rht_dereference_rcu(ht->tbl, ht);
        hash = head_hashfn(ht, tbl, obj);
-        lock = bucket_lock(tbl, hash);
-        spin_lock_bh(lock);
+        lock_buckets(tbl, old_tbl, hash);
        __rhashtable_insert(ht, obj, tbl, hash);
-        spin_unlock_bh(lock);
+        unlock_buckets(tbl, old_tbl, hash);
        rcu_read_unlock();
 }
@@ -582,21 +567,20 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
 */
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 {
-        struct bucket_table *tbl;
+        struct bucket_table *tbl, *new_tbl, *old_tbl;
        struct rhash_head __rcu **pprev;
        struct rhash_head *he;
-        spinlock_t *lock;
+        unsigned int hash, new_hash;
-        unsigned int hash;
        bool ret = false;
        rcu_read_lock();
-        tbl = rht_dereference_rcu(ht->tbl, ht);
+        tbl = old_tbl = rht_dereference_rcu(ht->tbl, ht);
-        hash = head_hashfn(ht, tbl, obj);
+        new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
+        new_hash = head_hashfn(ht, new_tbl, obj);
-        lock = bucket_lock(tbl, hash);
-        spin_lock_bh(lock);
+        lock_buckets(new_tbl, old_tbl, new_hash);
 restart:
+        hash = rht_bucket_index(tbl, new_hash);
        pprev = &tbl->buckets[hash];
        rht_for_each(he, tbl, hash) {
                if (he != obj) {
@@ -615,18 +599,12 @@ restart:
         * resizing. Thus traversing both is fine and the added cost is
         * very rare.
         */
-        if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) {
+        if (tbl != new_tbl) {
-                spin_unlock_bh(lock);
+                tbl = new_tbl;
-                tbl = rht_dereference_rcu(ht->future_tbl, ht);
-                hash = head_hashfn(ht, tbl, obj);
-                lock = bucket_lock(tbl, hash);
-                spin_lock_bh(lock);
                goto restart;
        }
-        spin_unlock_bh(lock);
+        unlock_buckets(new_tbl, old_tbl, new_hash);
        if (ret) {
                atomic_dec(&ht->nelems);
@@ -782,24 +760,17 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
                                      void *arg)
 {
        struct bucket_table *new_tbl, *old_tbl;
-        spinlock_t *new_bucket_lock, *old_bucket_lock;
+        u32 new_hash;
-        u32 new_hash, old_hash;
        bool success = true;
        BUG_ON(!ht->p.key_len);
        rcu_read_lock();
        old_tbl = rht_dereference_rcu(ht->tbl, ht);
-        old_hash = head_hashfn(ht, old_tbl, obj);
-        old_bucket_lock = bucket_lock(old_tbl, old_hash);
-        spin_lock_bh(old_bucket_lock);
        new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
        new_hash = head_hashfn(ht, new_tbl, obj);
-        new_bucket_lock = bucket_lock(new_tbl, new_hash);
-        if (unlikely(old_tbl != new_tbl))
+        lock_buckets(new_tbl, old_tbl, new_hash);
-                spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
        if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
                                      compare, arg)) {
@@ -810,10 +781,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
        __rhashtable_insert(ht, obj, new_tbl, new_hash);
 exit:
-        if (unlikely(old_tbl != new_tbl))
+        unlock_buckets(new_tbl, old_tbl, new_hash);
-                spin_unlock_bh(new_bucket_lock);
-        spin_unlock_bh(old_bucket_lock);
        rcu_read_unlock();
        return success;
author	Thomas Graf <tgraf@suug.ch>	2015-02-04 20:03:32 -0500
committer	David S. Miller <davem@davemloft.net>	2015-02-06 18:18:34 -0500
commit	a5ec68e3b8f2c95ea1a5d23dd543abbe0c8d0624 (patch)
tree	d9229fd3b7fd4b3321dded59e7bb4b8925e37cc9
parent	c88455ce50ae4224d84960ce2baa53e61580df27 (diff)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 71fd0dd45ce3..cea4244e032b 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* Resizable, Scalable, Concurrent Hash Table	2	* Resizable, Scalable, Concurrent Hash Table
3	*	3	*
4	* Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>	4	* Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
5	* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>	5	* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6	*	6	*
7	* Based on the following paper:	7	* Based on the following paper:
@@ -34,12 +34,17 @@
34	enum {	34	enum {
35	RHT_LOCK_NORMAL,	35	RHT_LOCK_NORMAL,
36	RHT_LOCK_NESTED,	36	RHT_LOCK_NESTED,
37	RHT_LOCK_NESTED2,
38	};	37	};
39		38
40	/* The bucket lock is selected based on the hash and protects mutations	39	/* The bucket lock is selected based on the hash and protects mutations
41	* on a group of hash buckets.	40	* on a group of hash buckets.
42	*	41	*
		42	* A maximum of tbl->size/2 bucket locks is allocated. This ensures that
		43	* a single lock always covers both buckets which may both contains
		44	* entries which link to the same bucket of the old table during resizing.
		45	* This allows to simplify the locking as locking the bucket in both
		46	* tables during resize always guarantee protection.
		47	*
43	* IMPORTANT: When holding the bucket lock of both the old and new table	48	* IMPORTANT: When holding the bucket lock of both the old and new table
44	* during expansions and shrinking, the old bucket lock must always be	49	* during expansions and shrinking, the old bucket lock must always be
45	* acquired first.	50	* acquired first.
@@ -128,8 +133,8 @@ static int alloc_bucket_locks(struct rhashtable ht, struct bucket_table tbl)
128	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);	133	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
129	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);	134	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
130		135
131	/* Never allocate more than one lock per bucket */	136	/* Never allocate more than 0.5 locks per bucket */
132	size = min_t(unsigned int, size, tbl->size);	137	size = min_t(unsigned int, size, tbl->size >> 1);
133		138
134	if (sizeof(spinlock_t) != 0) {	139	if (sizeof(spinlock_t) != 0) {
135	#ifdef CONFIG_NUMA	140	#ifdef CONFIG_NUMA
@@ -211,13 +216,36 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
211	}	216	}
212	EXPORT_SYMBOL_GPL(rht_shrink_below_30);	217	EXPORT_SYMBOL_GPL(rht_shrink_below_30);
213		218
214	static void hashtable_chain_unzip(const struct rhashtable *ht,	219	static void lock_buckets(struct bucket_table *new_tbl,
		220	struct bucket_table *old_tbl, unsigned int hash)
		221	__acquires(old_bucket_lock)
		222	{
		223	spin_lock_bh(bucket_lock(old_tbl, hash));
		224	if (new_tbl != old_tbl)
		225	spin_lock_bh_nested(bucket_lock(new_tbl, hash),
		226	RHT_LOCK_NESTED);
		227	}
		228
		229	static void unlock_buckets(struct bucket_table *new_tbl,
		230	struct bucket_table *old_tbl, unsigned int hash)
		231	__releases(old_bucket_lock)
		232	{
		233	if (new_tbl != old_tbl)
		234	spin_unlock_bh(bucket_lock(new_tbl, hash));
		235	spin_unlock_bh(bucket_lock(old_tbl, hash));
		236	}
		237
		238	/**
		239	* Unlink entries on bucket which hash to different bucket.
		240	*
		241	* Returns true if no more work needs to be performed on the bucket.
		242	*/
		243	static bool hashtable_chain_unzip(const struct rhashtable *ht,
215	const struct bucket_table *new_tbl,	244	const struct bucket_table *new_tbl,
216	struct bucket_table *old_tbl,	245	struct bucket_table *old_tbl,
217	size_t old_hash)	246	size_t old_hash)
218	{	247	{
219	struct rhash_head he, p, *next;	248	struct rhash_head he, p, *next;
220	spinlock_t new_bucket_lock, new_bucket_lock2 = NULL;
221	unsigned int new_hash, new_hash2;	249	unsigned int new_hash, new_hash2;
222		250
223	ASSERT_BUCKET_LOCK(old_tbl, old_hash);	251	ASSERT_BUCKET_LOCK(old_tbl, old_hash);
@@ -226,10 +254,10 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
226	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,	254	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
227	old_hash);	255	old_hash);
228	if (rht_is_a_nulls(p))	256	if (rht_is_a_nulls(p))
229	return;	257	return false;
230		258
231	new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);	259	new_hash = head_hashfn(ht, new_tbl, p);
232	new_bucket_lock = bucket_lock(new_tbl, new_hash);	260	ASSERT_BUCKET_LOCK(new_tbl, new_hash);
233		261
234	/* Advance the old bucket pointer one or more times until it	262	/* Advance the old bucket pointer one or more times until it
235	* reaches a node that doesn't hash to the same bucket as the	263	* reaches a node that doesn't hash to the same bucket as the
@@ -237,22 +265,14 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
237	*/	265	*/
238	rht_for_each_continue(he, p->next, old_tbl, old_hash) {	266	rht_for_each_continue(he, p->next, old_tbl, old_hash) {
239	new_hash2 = head_hashfn(ht, new_tbl, he);	267	new_hash2 = head_hashfn(ht, new_tbl, he);
		268	ASSERT_BUCKET_LOCK(new_tbl, new_hash2);
		269
240	if (new_hash != new_hash2)	270	if (new_hash != new_hash2)
241	break;	271	break;
242	p = he;	272	p = he;
243	}	273	}
244	rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);	274	rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
245		275
246	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
247
248	/* If we have encountered an entry that maps to a different bucket in
249	* the new table, lock down that bucket as well as we might cut off
250	* the end of the chain.
251	*/
252	new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
253	if (new_bucket_lock != new_bucket_lock2)
254	spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
255
256	/* Find the subsequent node which does hash to the same	276	/* Find the subsequent node which does hash to the same
257	* bucket as node P, or NULL if no such node exists.	277	* bucket as node P, or NULL if no such node exists.
258	*/	278	*/
@@ -271,21 +291,16 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
271	*/	291	*/
272	rcu_assign_pointer(p->next, next);	292	rcu_assign_pointer(p->next, next);
273		293
274	if (new_bucket_lock != new_bucket_lock2)	294	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
275	spin_unlock_bh(new_bucket_lock2);	295	old_hash);
276	spin_unlock_bh(new_bucket_lock);	296
		297	return !rht_is_a_nulls(p);
277	}	298	}
278		299
279	static void link_old_to_new(struct bucket_table *new_tbl,	300	static void link_old_to_new(struct bucket_table *new_tbl,
280	unsigned int new_hash, struct rhash_head *entry)	301	unsigned int new_hash, struct rhash_head *entry)
281	{	302	{
282	spinlock_t *new_bucket_lock;
283
284	new_bucket_lock = bucket_lock(new_tbl, new_hash);
285
286	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
287	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);	303	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
288	spin_unlock_bh(new_bucket_lock);
289	}	304	}
290		305
291	/**	306	/**
@@ -308,7 +323,6 @@ int rhashtable_expand(struct rhashtable *ht)
308	{	323	{
309	struct bucket_table new_tbl, old_tbl = rht_dereference(ht->tbl, ht);	324	struct bucket_table new_tbl, old_tbl = rht_dereference(ht->tbl, ht);
310	struct rhash_head *he;	325	struct rhash_head *he;
311	spinlock_t *old_bucket_lock;
312	unsigned int new_hash, old_hash;	326	unsigned int new_hash, old_hash;
313	bool complete = false;	327	bool complete = false;
314		328
@@ -338,16 +352,14 @@ int rhashtable_expand(struct rhashtable *ht)
338	*/	352	*/
339	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {	353	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
340	old_hash = rht_bucket_index(old_tbl, new_hash);	354	old_hash = rht_bucket_index(old_tbl, new_hash);
341	old_bucket_lock = bucket_lock(old_tbl, old_hash);	355	lock_buckets(new_tbl, old_tbl, new_hash);
342
343	spin_lock_bh(old_bucket_lock);
344	rht_for_each(he, old_tbl, old_hash) {	356	rht_for_each(he, old_tbl, old_hash) {
345	if (head_hashfn(ht, new_tbl, he) == new_hash) {	357	if (head_hashfn(ht, new_tbl, he) == new_hash) {
346	link_old_to_new(new_tbl, new_hash, he);	358	link_old_to_new(new_tbl, new_hash, he);
347	break;	359	break;
348	}	360	}
349	}	361	}
350	spin_unlock_bh(old_bucket_lock);	362	unlock_buckets(new_tbl, old_tbl, new_hash);
351	}	363	}
352		364
353	/* Publish the new table pointer. Lookups may now traverse	365	/* Publish the new table pointer. Lookups may now traverse
@@ -370,18 +382,13 @@ int rhashtable_expand(struct rhashtable *ht)
370	*/	382	*/
371	complete = true;	383	complete = true;
372	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {	384	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
373	struct rhash_head *head;	385	lock_buckets(new_tbl, old_tbl, old_hash);
374		386
375	old_bucket_lock = bucket_lock(old_tbl, old_hash);	387	if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
376	spin_lock_bh(old_bucket_lock);	388	old_hash))
377
378	hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
379	head = rht_dereference_bucket(old_tbl->buckets[old_hash],
380	old_tbl, old_hash);
381	if (!rht_is_a_nulls(head))
382	complete = false;	389	complete = false;
383		390
384	spin_unlock_bh(old_bucket_lock);	391	unlock_buckets(new_tbl, old_tbl, old_hash);
385	}	392	}
386	}	393	}
387		394
@@ -409,7 +416,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
409	int rhashtable_shrink(struct rhashtable *ht)	416	int rhashtable_shrink(struct rhashtable *ht)
410	{	417	{
411	struct bucket_table new_tbl, tbl = rht_dereference(ht->tbl, ht);	418	struct bucket_table new_tbl, tbl = rht_dereference(ht->tbl, ht);
412	spinlock_t new_bucket_lock, old_bucket_lock1, *old_bucket_lock2;
413	unsigned int new_hash;	419	unsigned int new_hash;
414		420
415	ASSERT_RHT_MUTEX(ht);	421	ASSERT_RHT_MUTEX(ht);
@@ -427,36 +433,16 @@ int rhashtable_shrink(struct rhashtable *ht)
427	* always divide the size in half when shrinking, each bucket	433	* always divide the size in half when shrinking, each bucket
428	* in the new table maps to exactly two buckets in the old	434	* in the new table maps to exactly two buckets in the old
429	* table.	435	* table.
430	*
431	* As removals can occur concurrently on the old table, we need
432	* to lock down both matching buckets in the old table.
433	*/	436	*/
434	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {	437	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
435	old_bucket_lock1 = bucket_lock(tbl, new_hash);	438	lock_buckets(new_tbl, tbl, new_hash);
436	old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
437	new_bucket_lock = bucket_lock(new_tbl, new_hash);
438
439	spin_lock_bh(old_bucket_lock1);
440
441	/* Depending on the lock per buckets mapping, the bucket in
442	* the lower and upper region may map to the same lock.
443	*/
444	if (old_bucket_lock1 != old_bucket_lock2) {
445	spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
446	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
447	} else {
448	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
449	}
450		439
451	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),	440	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
452	tbl->buckets[new_hash]);	441	tbl->buckets[new_hash]);
453	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),	442	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
454	tbl->buckets[new_hash + new_tbl->size]);	443	tbl->buckets[new_hash + new_tbl->size]);
455		444
456	spin_unlock_bh(new_bucket_lock);	445	unlock_buckets(new_tbl, tbl, new_hash);
457	if (old_bucket_lock1 != old_bucket_lock2)
458	spin_unlock_bh(old_bucket_lock2);
459	spin_unlock_bh(old_bucket_lock1);
460	}	446	}
461		447
462	/* Publish the new, valid hash table */	448	/* Publish the new, valid hash table */
@@ -547,19 +533,18 @@ static void __rhashtable_insert(struct rhashtable ht, struct rhash_head obj,
547	*/	533	*/
548	void rhashtable_insert(struct rhashtable ht, struct rhash_head obj)	534	void rhashtable_insert(struct rhashtable ht, struct rhash_head obj)
549	{	535	{
550	struct bucket_table *tbl;	536	struct bucket_table tbl, old_tbl;
551	spinlock_t *lock;
552	unsigned hash;	537	unsigned hash;
553		538
554	rcu_read_lock();	539	rcu_read_lock();
555		540
556	tbl = rht_dereference_rcu(ht->future_tbl, ht);	541	tbl = rht_dereference_rcu(ht->future_tbl, ht);
		542	old_tbl = rht_dereference_rcu(ht->tbl, ht);
557	hash = head_hashfn(ht, tbl, obj);	543	hash = head_hashfn(ht, tbl, obj);
558	lock = bucket_lock(tbl, hash);
559		544
560	spin_lock_bh(lock);	545	lock_buckets(tbl, old_tbl, hash);
561	__rhashtable_insert(ht, obj, tbl, hash);	546	__rhashtable_insert(ht, obj, tbl, hash);
562	spin_unlock_bh(lock);	547	unlock_buckets(tbl, old_tbl, hash);
563		548
564	rcu_read_unlock();	549	rcu_read_unlock();
565	}	550	}
@@ -582,21 +567,20 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
582	*/	567	*/
583	bool rhashtable_remove(struct rhashtable ht, struct rhash_head obj)	568	bool rhashtable_remove(struct rhashtable ht, struct rhash_head obj)
584	{	569	{
585	struct bucket_table *tbl;	570	struct bucket_table tbl, new_tbl, *old_tbl;
586	struct rhash_head __rcu **pprev;	571	struct rhash_head __rcu **pprev;
587	struct rhash_head *he;	572	struct rhash_head *he;
588	spinlock_t *lock;	573	unsigned int hash, new_hash;
589	unsigned int hash;
590	bool ret = false;	574	bool ret = false;
591		575
592	rcu_read_lock();	576	rcu_read_lock();
593	tbl = rht_dereference_rcu(ht->tbl, ht);	577	tbl = old_tbl = rht_dereference_rcu(ht->tbl, ht);
594	hash = head_hashfn(ht, tbl, obj);	578	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
595		579	new_hash = head_hashfn(ht, new_tbl, obj);
596	lock = bucket_lock(tbl, hash);
597	spin_lock_bh(lock);
598		580
		581	lock_buckets(new_tbl, old_tbl, new_hash);
599	restart:	582	restart:
		583	hash = rht_bucket_index(tbl, new_hash);
600	pprev = &tbl->buckets[hash];	584	pprev = &tbl->buckets[hash];
601	rht_for_each(he, tbl, hash) {	585	rht_for_each(he, tbl, hash) {
602	if (he != obj) {	586	if (he != obj) {
@@ -615,18 +599,12 @@ restart:
615	* resizing. Thus traversing both is fine and the added cost is	599	* resizing. Thus traversing both is fine and the added cost is
616	* very rare.	600	* very rare.
617	*/	601	*/
618	if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) {	602	if (tbl != new_tbl) {
619	spin_unlock_bh(lock);	603	tbl = new_tbl;
620
621	tbl = rht_dereference_rcu(ht->future_tbl, ht);
622	hash = head_hashfn(ht, tbl, obj);
623
624	lock = bucket_lock(tbl, hash);
625	spin_lock_bh(lock);
626	goto restart;	604	goto restart;
627	}	605	}
628		606
629	spin_unlock_bh(lock);	607	unlock_buckets(new_tbl, old_tbl, new_hash);
630		608
631	if (ret) {	609	if (ret) {
632	atomic_dec(&ht->nelems);	610	atomic_dec(&ht->nelems);
@@ -782,24 +760,17 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
782	void *arg)	760	void *arg)
783	{	761	{
784	struct bucket_table new_tbl, old_tbl;	762	struct bucket_table new_tbl, old_tbl;
785	spinlock_t new_bucket_lock, old_bucket_lock;	763	u32 new_hash;
786	u32 new_hash, old_hash;
787	bool success = true;	764	bool success = true;
788		765
789	BUG_ON(!ht->p.key_len);	766	BUG_ON(!ht->p.key_len);
790		767
791	rcu_read_lock();	768	rcu_read_lock();
792
793	old_tbl = rht_dereference_rcu(ht->tbl, ht);	769	old_tbl = rht_dereference_rcu(ht->tbl, ht);
794	old_hash = head_hashfn(ht, old_tbl, obj);
795	old_bucket_lock = bucket_lock(old_tbl, old_hash);
796	spin_lock_bh(old_bucket_lock);
797
798	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);	770	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
799	new_hash = head_hashfn(ht, new_tbl, obj);	771	new_hash = head_hashfn(ht, new_tbl, obj);
800	new_bucket_lock = bucket_lock(new_tbl, new_hash);	772
801	if (unlikely(old_tbl != new_tbl))	773	lock_buckets(new_tbl, old_tbl, new_hash);
802	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
803		774
804	if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,	775	if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
805	compare, arg)) {	776	compare, arg)) {
@@ -810,10 +781,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
810	__rhashtable_insert(ht, obj, new_tbl, new_hash);	781	__rhashtable_insert(ht, obj, new_tbl, new_hash);
811		782
812	exit:	783	exit:
813	if (unlikely(old_tbl != new_tbl))	784	unlock_buckets(new_tbl, old_tbl, new_hash);
814	spin_unlock_bh(new_bucket_lock);
815	spin_unlock_bh(old_bucket_lock);
816
817	rcu_read_unlock();	785	rcu_read_unlock();
818		786
819	return success;	787	return success;