aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 23:01:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 23:01:30 -0500
commitc5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb (patch)
tree9830baf38832769e1cf621708889111bbe3c93df /lib
parent29afc4e9a408f2304e09c6dd0dbcfbd2356d0faa (diff)
parent9399f0c51489ae8c16d6559b82a452fdc1895e91 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) More iov_iter conversion work from Al Viro. [ The "crypto: switch af_alg_make_sg() to iov_iter" commit was wrong, and this pull actually adds an extra commit on top of the branch I'm pulling to fix that up, so that the pre-merge state is ok. - Linus ] 2) Various optimizations to the ipv4 forwarding information base trie lookup implementation. From Alexander Duyck. 3) Remove sock_iocb altogether, from CHristoph Hellwig. 4) Allow congestion control algorithm selection via routing metrics. From Daniel Borkmann. 5) Make ipv4 uncached route list per-cpu, from Eric Dumazet. 6) Handle rfs hash collisions more gracefully, also from Eric Dumazet. 7) Add xmit_more support to r8169, e1000, and e1000e drivers. From Florian Westphal. 8) Transparent Ethernet Bridging support for GRO, from Jesse Gross. 9) Add BPF packet actions to packet scheduler, from Jiri Pirko. 10) Add support for uniqu flow IDs to openvswitch, from Joe Stringer. 11) New NetCP ethernet driver, from Muralidharan Karicheri and Wingman Kwok. 12) More sanely handle out-of-window dupacks, which can result in serious ACK storms. From Neal Cardwell. 13) Various rhashtable bug fixes and enhancements, from Herbert Xu, Patrick McHardy, and Thomas Graf. 14) Support xmit_more in be2net, from Sathya Perla. 15) Group Policy extensions for vxlan, from Thomas Graf. 16) Remove Checksum Offload support for vxlan, from Tom Herbert. 17) Like ipv4, support lockless transmit over ipv6 UDP sockets. From Vlad Yasevich. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1494+1 commits) crypto: fix af_alg_make_sg() conversion to iov_iter ipv4: Namespecify TCP PMTU mechanism i40e: Fix for stats init function call in Rx setup tcp: don't include Fast Open option in SYN-ACK on pure SYN-data openvswitch: Only set TUNNEL_VXLAN_OPT if VXLAN-GBP metadata is set ipv6: Make __ipv6_select_ident static ipv6: Fix fragment id assignment on LE arches. bridge: Fix inability to add non-vlan fdb entry net: Mellanox: Delete unnecessary checks before the function call "vunmap" cxgb4: Add support in cxgb4 to get expansion rom version via ethtool ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion ROM version net: dsa: Remove redundant phy_attach() IB/mlx4: Reset flow support for IB kernel ULPs IB/mlx4: Always use the correct port for mirrored multicast attachments net/bonding: Fix potential bad memory access during bonding events tipc: remove tipc_snprintf tipc: nl compat add noop and remove legacy nl framework tipc: convert legacy nl stats show to nl compat tipc: convert legacy nl net id get to nl compat tipc: convert legacy nl net id set to nl compat ...
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile3
-rw-r--r--lib/iovec.c87
-rw-r--r--lib/rhashtable.c1170
-rw-r--r--lib/test_rhashtable.c227
5 files changed, 981 insertions, 508 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 61689a86d7f8..e5ea3ab856bf 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1587,7 +1587,7 @@ config TEST_KSTRTOX
1587 tristate "Test kstrto*() family of functions at runtime" 1587 tristate "Test kstrto*() family of functions at runtime"
1588 1588
1589config TEST_RHASHTABLE 1589config TEST_RHASHTABLE
1590 bool "Perform selftest on resizable hash table" 1590 tristate "Perform selftest on resizable hash table"
1591 default n 1591 default n
1592 help 1592 help
1593 Enable this option to test the rhashtable functions at boot. 1593 Enable this option to test the rhashtable functions at boot.
diff --git a/lib/Makefile b/lib/Makefile
index 3c3b30b9e020..7db78934ec07 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -24,7 +24,7 @@ obj-y += lockref.o
24 24
25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
26 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ 26 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
27 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ 27 gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \
28 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ 28 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
29 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o 29 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
30obj-y += string_helpers.o 30obj-y += string_helpers.o
@@ -35,6 +35,7 @@ obj-$(CONFIG_TEST_LKM) += test_module.o
35obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o 35obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
36obj-$(CONFIG_TEST_BPF) += test_bpf.o 36obj-$(CONFIG_TEST_BPF) += test_bpf.o
37obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o 37obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
38obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
38 39
39ifeq ($(CONFIG_DEBUG_KOBJECT),y) 40ifeq ($(CONFIG_DEBUG_KOBJECT),y)
40CFLAGS_kobject.o += -DDEBUG 41CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/iovec.c b/lib/iovec.c
deleted file mode 100644
index 2d99cb4a5006..000000000000
--- a/lib/iovec.c
+++ /dev/null
@@ -1,87 +0,0 @@
1#include <linux/uaccess.h>
2#include <linux/export.h>
3#include <linux/uio.h>
4
5/*
6 * Copy iovec to kernel. Returns -EFAULT on error.
7 *
8 * Note: this modifies the original iovec.
9 */
10
11int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
12{
13 while (len > 0) {
14 if (iov->iov_len) {
15 int copy = min_t(unsigned int, len, iov->iov_len);
16 if (copy_from_user(kdata, iov->iov_base, copy))
17 return -EFAULT;
18 len -= copy;
19 kdata += copy;
20 iov->iov_base += copy;
21 iov->iov_len -= copy;
22 }
23 iov++;
24 }
25
26 return 0;
27}
28EXPORT_SYMBOL(memcpy_fromiovec);
29
30/*
31 * Copy kernel to iovec. Returns -EFAULT on error.
32 */
33
34int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
35 int offset, int len)
36{
37 int copy;
38 for (; len > 0; ++iov) {
39 /* Skip over the finished iovecs */
40 if (unlikely(offset >= iov->iov_len)) {
41 offset -= iov->iov_len;
42 continue;
43 }
44 copy = min_t(unsigned int, iov->iov_len - offset, len);
45 if (copy_to_user(iov->iov_base + offset, kdata, copy))
46 return -EFAULT;
47 offset = 0;
48 kdata += copy;
49 len -= copy;
50 }
51
52 return 0;
53}
54EXPORT_SYMBOL(memcpy_toiovecend);
55
56/*
57 * Copy iovec to kernel. Returns -EFAULT on error.
58 */
59
60int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
61 int offset, int len)
62{
63 /* No data? Done! */
64 if (len == 0)
65 return 0;
66
67 /* Skip over the finished iovecs */
68 while (offset >= iov->iov_len) {
69 offset -= iov->iov_len;
70 iov++;
71 }
72
73 while (len > 0) {
74 u8 __user *base = iov->iov_base + offset;
75 int copy = min_t(unsigned int, len, iov->iov_len - offset);
76
77 offset = 0;
78 if (copy_from_user(kdata, base, copy))
79 return -EFAULT;
80 len -= copy;
81 kdata += copy;
82 iov++;
83 }
84
85 return 0;
86}
87EXPORT_SYMBOL(memcpy_fromiovecend);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6c3c723e902b..9cc4c4a90d00 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Resizable, Scalable, Concurrent Hash Table 2 * Resizable, Scalable, Concurrent Hash Table
3 * 3 *
4 * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> 4 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
5 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 5 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6 * 6 *
7 * Based on the following paper: 7 * Based on the following paper:
@@ -23,94 +23,203 @@
23#include <linux/jhash.h> 23#include <linux/jhash.h>
24#include <linux/random.h> 24#include <linux/random.h>
25#include <linux/rhashtable.h> 25#include <linux/rhashtable.h>
26#include <linux/err.h>
26 27
27#define HASH_DEFAULT_SIZE 64UL 28#define HASH_DEFAULT_SIZE 64UL
28#define HASH_MIN_SIZE 4UL 29#define HASH_MIN_SIZE 4UL
30#define BUCKET_LOCKS_PER_CPU 128UL
29 31
30#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 32/* Base bits plus 1 bit for nulls marker */
33#define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
31 34
32#ifdef CONFIG_PROVE_LOCKING 35enum {
33int lockdep_rht_mutex_is_held(const struct rhashtable *ht) 36 RHT_LOCK_NORMAL,
37 RHT_LOCK_NESTED,
38};
39
40/* The bucket lock is selected based on the hash and protects mutations
41 * on a group of hash buckets.
42 *
43 * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
44 * a single lock always covers both buckets which may both contains
45 * entries which link to the same bucket of the old table during resizing.
46 * This allows to simplify the locking as locking the bucket in both
47 * tables during resize always guarantee protection.
48 *
49 * IMPORTANT: When holding the bucket lock of both the old and new table
50 * during expansions and shrinking, the old bucket lock must always be
51 * acquired first.
52 */
53static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
34{ 54{
35 return ht->p.mutex_is_held(ht->p.parent); 55 return &tbl->locks[hash & tbl->locks_mask];
36} 56}
37EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
38#endif
39 57
40static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) 58static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
41{ 59{
42 return (void *) he - ht->p.head_offset; 60 return (void *) he - ht->p.head_offset;
43} 61}
44 62
45static u32 __hashfn(const struct rhashtable *ht, const void *key, 63static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
46 u32 len, u32 hsize) 64{
65 return hash & (tbl->size - 1);
66}
67
68static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
47{ 69{
48 u32 h; 70 u32 hash;
49 71
50 h = ht->p.hashfn(key, len, ht->p.hash_rnd); 72 if (unlikely(!ht->p.key_len))
73 hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
74 else
75 hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
76 ht->p.hash_rnd);
51 77
52 return h & (hsize - 1); 78 return hash >> HASH_RESERVED_SPACE;
53} 79}
54 80
55/** 81static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
56 * rhashtable_hashfn - compute hash for key of given length
57 * @ht: hash table to compute for
58 * @key: pointer to key
59 * @len: length of key
60 *
61 * Computes the hash value using the hash function provided in the 'hashfn'
62 * of struct rhashtable_params. The returned value is guaranteed to be
63 * smaller than the number of buckets in the hash table.
64 */
65u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len)
66{ 82{
67 struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 83 return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE;
84}
68 85
69 return __hashfn(ht, key, len, tbl->size); 86static u32 head_hashfn(const struct rhashtable *ht,
87 const struct bucket_table *tbl,
88 const struct rhash_head *he)
89{
90 return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
70} 91}
71EXPORT_SYMBOL_GPL(rhashtable_hashfn);
72 92
73static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize) 93#ifdef CONFIG_PROVE_LOCKING
94static void debug_dump_buckets(const struct rhashtable *ht,
95 const struct bucket_table *tbl)
74{ 96{
75 if (unlikely(!ht->p.key_len)) { 97 struct rhash_head *he;
76 u32 h; 98 unsigned int i, hash;
77 99
78 h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); 100 for (i = 0; i < tbl->size; i++) {
101 pr_warn(" [Bucket %d] ", i);
102 rht_for_each_rcu(he, tbl, i) {
103 hash = head_hashfn(ht, tbl, he);
104 pr_cont("[hash = %#x, lock = %p] ",
105 hash, bucket_lock(tbl, hash));
106 }
107 pr_cont("\n");
108 }
109
110}
111
112static void debug_dump_table(struct rhashtable *ht,
113 const struct bucket_table *tbl,
114 unsigned int hash)
115{
116 struct bucket_table *old_tbl, *future_tbl;
117
118 pr_emerg("BUG: lock for hash %#x in table %p not held\n",
119 hash, tbl);
79 120
80 return h & (hsize - 1); 121 rcu_read_lock();
122 future_tbl = rht_dereference_rcu(ht->future_tbl, ht);
123 old_tbl = rht_dereference_rcu(ht->tbl, ht);
124 if (future_tbl != old_tbl) {
125 pr_warn("Future table %p (size: %zd)\n",
126 future_tbl, future_tbl->size);
127 debug_dump_buckets(ht, future_tbl);
81 } 128 }
82 129
83 return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize); 130 pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size);
131 debug_dump_buckets(ht, old_tbl);
132
133 rcu_read_unlock();
84} 134}
85 135
86/** 136#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
87 * rhashtable_obj_hashfn - compute hash for hashed object 137#define ASSERT_BUCKET_LOCK(HT, TBL, HASH) \
88 * @ht: hash table to compute for 138 do { \
89 * @ptr: pointer to hashed object 139 if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) { \
90 * 140 debug_dump_table(HT, TBL, HASH); \
91 * Computes the hash value using the hash function `hashfn` respectively 141 BUG(); \
92 * 'obj_hashfn' depending on whether the hash table is set up to work with 142 } \
93 * a fixed length key. The returned value is guaranteed to be smaller than 143 } while (0)
94 * the number of buckets in the hash table. 144
95 */ 145int lockdep_rht_mutex_is_held(struct rhashtable *ht)
96u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr)
97{ 146{
98 struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 147 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
148}
149EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
99 150
100 return obj_hashfn(ht, ptr, tbl->size); 151int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
152{
153 spinlock_t *lock = bucket_lock(tbl, hash);
154
155 return (debug_locks) ? lockdep_is_held(lock) : 1;
101} 156}
102EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn); 157EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
158#else
159#define ASSERT_RHT_MUTEX(HT)
160#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)
161#endif
103 162
104static u32 head_hashfn(const struct rhashtable *ht, 163
105 const struct rhash_head *he, u32 hsize) 164static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
106{ 165{
107 return obj_hashfn(ht, rht_obj(ht, he), hsize); 166 struct rhash_head __rcu **pprev;
167
168 for (pprev = &tbl->buckets[n];
169 !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n));
170 pprev = &rht_dereference_bucket(*pprev, tbl, n)->next)
171 ;
172
173 return pprev;
108} 174}
109 175
110static struct bucket_table *bucket_table_alloc(size_t nbuckets) 176static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
177{
178 unsigned int i, size;
179#if defined(CONFIG_PROVE_LOCKING)
180 unsigned int nr_pcpus = 2;
181#else
182 unsigned int nr_pcpus = num_possible_cpus();
183#endif
184
185 nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
186 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
187
188 /* Never allocate more than 0.5 locks per bucket */
189 size = min_t(unsigned int, size, tbl->size >> 1);
190
191 if (sizeof(spinlock_t) != 0) {
192#ifdef CONFIG_NUMA
193 if (size * sizeof(spinlock_t) > PAGE_SIZE)
194 tbl->locks = vmalloc(size * sizeof(spinlock_t));
195 else
196#endif
197 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
198 GFP_KERNEL);
199 if (!tbl->locks)
200 return -ENOMEM;
201 for (i = 0; i < size; i++)
202 spin_lock_init(&tbl->locks[i]);
203 }
204 tbl->locks_mask = size - 1;
205
206 return 0;
207}
208
209static void bucket_table_free(const struct bucket_table *tbl)
210{
211 if (tbl)
212 kvfree(tbl->locks);
213
214 kvfree(tbl);
215}
216
217static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
218 size_t nbuckets)
111{ 219{
112 struct bucket_table *tbl; 220 struct bucket_table *tbl;
113 size_t size; 221 size_t size;
222 int i;
114 223
115 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); 224 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
116 tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 225 tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
@@ -122,12 +231,15 @@ static struct bucket_table *bucket_table_alloc(size_t nbuckets)
122 231
123 tbl->size = nbuckets; 232 tbl->size = nbuckets;
124 233
125 return tbl; 234 if (alloc_bucket_locks(ht, tbl) < 0) {
126} 235 bucket_table_free(tbl);
236 return NULL;
237 }
127 238
128static void bucket_table_free(const struct bucket_table *tbl) 239 for (i = 0; i < nbuckets; i++)
129{ 240 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
130 kvfree(tbl); 241
242 return tbl;
131} 243}
132 244
133/** 245/**
@@ -138,7 +250,8 @@ static void bucket_table_free(const struct bucket_table *tbl)
138bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) 250bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
139{ 251{
140 /* Expand table when exceeding 75% load */ 252 /* Expand table when exceeding 75% load */
141 return ht->nelems > (new_size / 4 * 3); 253 return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
254 (ht->p.max_shift && atomic_read(&ht->shift) < ht->p.max_shift);
142} 255}
143EXPORT_SYMBOL_GPL(rht_grow_above_75); 256EXPORT_SYMBOL_GPL(rht_grow_above_75);
144 257
@@ -150,41 +263,75 @@ EXPORT_SYMBOL_GPL(rht_grow_above_75);
150bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) 263bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
151{ 264{
152 /* Shrink table beneath 30% load */ 265 /* Shrink table beneath 30% load */
153 return ht->nelems < (new_size * 3 / 10); 266 return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
267 (atomic_read(&ht->shift) > ht->p.min_shift);
154} 268}
155EXPORT_SYMBOL_GPL(rht_shrink_below_30); 269EXPORT_SYMBOL_GPL(rht_shrink_below_30);
156 270
157static void hashtable_chain_unzip(const struct rhashtable *ht, 271static void lock_buckets(struct bucket_table *new_tbl,
272 struct bucket_table *old_tbl, unsigned int hash)
273 __acquires(old_bucket_lock)
274{
275 spin_lock_bh(bucket_lock(old_tbl, hash));
276 if (new_tbl != old_tbl)
277 spin_lock_bh_nested(bucket_lock(new_tbl, hash),
278 RHT_LOCK_NESTED);
279}
280
281static void unlock_buckets(struct bucket_table *new_tbl,
282 struct bucket_table *old_tbl, unsigned int hash)
283 __releases(old_bucket_lock)
284{
285 if (new_tbl != old_tbl)
286 spin_unlock_bh(bucket_lock(new_tbl, hash));
287 spin_unlock_bh(bucket_lock(old_tbl, hash));
288}
289
290/**
291 * Unlink entries on bucket which hash to different bucket.
292 *
293 * Returns true if no more work needs to be performed on the bucket.
294 */
295static bool hashtable_chain_unzip(struct rhashtable *ht,
158 const struct bucket_table *new_tbl, 296 const struct bucket_table *new_tbl,
159 struct bucket_table *old_tbl, size_t n) 297 struct bucket_table *old_tbl,
298 size_t old_hash)
160{ 299{
161 struct rhash_head *he, *p, *next; 300 struct rhash_head *he, *p, *next;
162 unsigned int h; 301 unsigned int new_hash, new_hash2;
302
303 ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash);
163 304
164 /* Old bucket empty, no work needed. */ 305 /* Old bucket empty, no work needed. */
165 p = rht_dereference(old_tbl->buckets[n], ht); 306 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
166 if (!p) 307 old_hash);
167 return; 308 if (rht_is_a_nulls(p))
309 return false;
310
311 new_hash = head_hashfn(ht, new_tbl, p);
312 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
168 313
169 /* Advance the old bucket pointer one or more times until it 314 /* Advance the old bucket pointer one or more times until it
170 * reaches a node that doesn't hash to the same bucket as the 315 * reaches a node that doesn't hash to the same bucket as the
171 * previous node p. Call the previous node p; 316 * previous node p. Call the previous node p;
172 */ 317 */
173 h = head_hashfn(ht, p, new_tbl->size); 318 rht_for_each_continue(he, p->next, old_tbl, old_hash) {
174 rht_for_each(he, p->next, ht) { 319 new_hash2 = head_hashfn(ht, new_tbl, he);
175 if (head_hashfn(ht, he, new_tbl->size) != h) 320 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2);
321
322 if (new_hash != new_hash2)
176 break; 323 break;
177 p = he; 324 p = he;
178 } 325 }
179 RCU_INIT_POINTER(old_tbl->buckets[n], p->next); 326 rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
180 327
181 /* Find the subsequent node which does hash to the same 328 /* Find the subsequent node which does hash to the same
182 * bucket as node P, or NULL if no such node exists. 329 * bucket as node P, or NULL if no such node exists.
183 */ 330 */
184 next = NULL; 331 INIT_RHT_NULLS_HEAD(next, ht, old_hash);
185 if (he) { 332 if (!rht_is_a_nulls(he)) {
186 rht_for_each(he, he->next, ht) { 333 rht_for_each_continue(he, he->next, old_tbl, old_hash) {
187 if (head_hashfn(ht, he, new_tbl->size) == h) { 334 if (head_hashfn(ht, new_tbl, he) == new_hash) {
188 next = he; 335 next = he;
189 break; 336 break;
190 } 337 }
@@ -194,7 +341,20 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
194 /* Set p's next pointer to that subsequent node pointer, 341 /* Set p's next pointer to that subsequent node pointer,
195 * bypassing the nodes which do not hash to p's bucket 342 * bypassing the nodes which do not hash to p's bucket
196 */ 343 */
197 RCU_INIT_POINTER(p->next, next); 344 rcu_assign_pointer(p->next, next);
345
346 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
347 old_hash);
348
349 return !rht_is_a_nulls(p);
350}
351
352static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
353 unsigned int new_hash, struct rhash_head *entry)
354{
355 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
356
357 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
198} 358}
199 359
200/** 360/**
@@ -207,53 +367,57 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
207 * This function may only be called in a context where it is safe to call 367 * This function may only be called in a context where it is safe to call
208 * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 368 * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
209 * 369 *
210 * The caller must ensure that no concurrent table mutations take place. 370 * The caller must ensure that no concurrent resizing occurs by holding
211 * It is however valid to have concurrent lookups if they are RCU protected. 371 * ht->mutex.
372 *
373 * It is valid to have concurrent insertions and deletions protected by per
374 * bucket locks or concurrent RCU protected lookups and traversals.
212 */ 375 */
213int rhashtable_expand(struct rhashtable *ht) 376int rhashtable_expand(struct rhashtable *ht)
214{ 377{
215 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 378 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
216 struct rhash_head *he; 379 struct rhash_head *he;
217 unsigned int i, h; 380 unsigned int new_hash, old_hash;
218 bool complete; 381 bool complete = false;
219 382
220 ASSERT_RHT_MUTEX(ht); 383 ASSERT_RHT_MUTEX(ht);
221 384
222 if (ht->p.max_shift && ht->shift >= ht->p.max_shift) 385 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
223 return 0;
224
225 new_tbl = bucket_table_alloc(old_tbl->size * 2);
226 if (new_tbl == NULL) 386 if (new_tbl == NULL)
227 return -ENOMEM; 387 return -ENOMEM;
228 388
229 ht->shift++; 389 atomic_inc(&ht->shift);
390
391 /* Make insertions go into the new, empty table right away. Deletions
392 * and lookups will be attempted in both tables until we synchronize.
393 * The synchronize_rcu() guarantees for the new table to be picked up
394 * so no new additions go into the old table while we relink.
395 */
396 rcu_assign_pointer(ht->future_tbl, new_tbl);
397 synchronize_rcu();
230 398
231 /* For each new bucket, search the corresponding old bucket 399 /* For each new bucket, search the corresponding old bucket for the
232 * for the first entry that hashes to the new bucket, and 400 * first entry that hashes to the new bucket, and link the end of
233 * link the new bucket to that entry. Since all the entries 401 * newly formed bucket chain (containing entries added to future
234 * which will end up in the new bucket appear in the same 402 * table) to that entry. Since all the entries which will end up in
235 * old bucket, this constructs an entirely valid new hash 403 * the new bucket appear in the same old bucket, this constructs an
236 * table, but with multiple buckets "zipped" together into a 404 * entirely valid new hash table, but with multiple buckets
237 * single imprecise chain. 405 * "zipped" together into a single imprecise chain.
238 */ 406 */
239 for (i = 0; i < new_tbl->size; i++) { 407 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
240 h = i & (old_tbl->size - 1); 408 old_hash = rht_bucket_index(old_tbl, new_hash);
241 rht_for_each(he, old_tbl->buckets[h], ht) { 409 lock_buckets(new_tbl, old_tbl, new_hash);
242 if (head_hashfn(ht, he, new_tbl->size) == i) { 410 rht_for_each(he, old_tbl, old_hash) {
243 RCU_INIT_POINTER(new_tbl->buckets[i], he); 411 if (head_hashfn(ht, new_tbl, he) == new_hash) {
412 link_old_to_new(ht, new_tbl, new_hash, he);
244 break; 413 break;
245 } 414 }
246 } 415 }
416 unlock_buckets(new_tbl, old_tbl, new_hash);
247 } 417 }
248 418
249 /* Publish the new table pointer. Lookups may now traverse
250 * the new table, but they will not benefit from any
251 * additional efficiency until later steps unzip the buckets.
252 */
253 rcu_assign_pointer(ht->tbl, new_tbl);
254
255 /* Unzip interleaved hash chains */ 419 /* Unzip interleaved hash chains */
256 do { 420 while (!complete && !ht->being_destroyed) {
257 /* Wait for readers. All new readers will see the new 421 /* Wait for readers. All new readers will see the new
258 * table, and thus no references to the old table will 422 * table, and thus no references to the old table will
259 * remain. 423 * remain.
@@ -265,12 +429,19 @@ int rhashtable_expand(struct rhashtable *ht)
265 * table): ... 429 * table): ...
266 */ 430 */
267 complete = true; 431 complete = true;
268 for (i = 0; i < old_tbl->size; i++) { 432 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
269 hashtable_chain_unzip(ht, new_tbl, old_tbl, i); 433 lock_buckets(new_tbl, old_tbl, old_hash);
270 if (old_tbl->buckets[i] != NULL) 434
435 if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
436 old_hash))
271 complete = false; 437 complete = false;
438
439 unlock_buckets(new_tbl, old_tbl, old_hash);
272 } 440 }
273 } while (!complete); 441 }
442
443 rcu_assign_pointer(ht->tbl, new_tbl);
444 synchronize_rcu();
274 445
275 bucket_table_free(old_tbl); 446 bucket_table_free(old_tbl);
276 return 0; 447 return 0;
@@ -284,45 +455,51 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
284 * This function may only be called in a context where it is safe to call 455 * This function may only be called in a context where it is safe to call
285 * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 456 * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
286 * 457 *
458 * The caller must ensure that no concurrent resizing occurs by holding
459 * ht->mutex.
460 *
287 * The caller must ensure that no concurrent table mutations take place. 461 * The caller must ensure that no concurrent table mutations take place.
288 * It is however valid to have concurrent lookups if they are RCU protected. 462 * It is however valid to have concurrent lookups if they are RCU protected.
463 *
464 * It is valid to have concurrent insertions and deletions protected by per
465 * bucket locks or concurrent RCU protected lookups and traversals.
289 */ 466 */
290int rhashtable_shrink(struct rhashtable *ht) 467int rhashtable_shrink(struct rhashtable *ht)
291{ 468{
292 struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht); 469 struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
293 struct rhash_head __rcu **pprev; 470 unsigned int new_hash;
294 unsigned int i;
295 471
296 ASSERT_RHT_MUTEX(ht); 472 ASSERT_RHT_MUTEX(ht);
297 473
298 if (ht->shift <= ht->p.min_shift) 474 new_tbl = bucket_table_alloc(ht, tbl->size / 2);
299 return 0; 475 if (new_tbl == NULL)
300
301 ntbl = bucket_table_alloc(tbl->size / 2);
302 if (ntbl == NULL)
303 return -ENOMEM; 476 return -ENOMEM;
304 477
305 ht->shift--; 478 rcu_assign_pointer(ht->future_tbl, new_tbl);
479 synchronize_rcu();
306 480
307 /* Link each bucket in the new table to the first bucket 481 /* Link the first entry in the old bucket to the end of the
308 * in the old table that contains entries which will hash 482 * bucket in the new table. As entries are concurrently being
309 * to the new bucket. 483 * added to the new table, lock down the new bucket. As we
484 * always divide the size in half when shrinking, each bucket
485 * in the new table maps to exactly two buckets in the old
486 * table.
310 */ 487 */
311 for (i = 0; i < ntbl->size; i++) { 488 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
312 ntbl->buckets[i] = tbl->buckets[i]; 489 lock_buckets(new_tbl, tbl, new_hash);
313 490
314 /* Link each bucket in the new table to the first bucket 491 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
315 * in the old table that contains entries which will hash 492 tbl->buckets[new_hash]);
316 * to the new bucket. 493 ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size);
317 */ 494 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
318 for (pprev = &ntbl->buckets[i]; *pprev != NULL; 495 tbl->buckets[new_hash + new_tbl->size]);
319 pprev = &rht_dereference(*pprev, ht)->next) 496
320 ; 497 unlock_buckets(new_tbl, tbl, new_hash);
321 RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
322 } 498 }
323 499
324 /* Publish the new, valid hash table */ 500 /* Publish the new, valid hash table */
325 rcu_assign_pointer(ht->tbl, ntbl); 501 rcu_assign_pointer(ht->tbl, new_tbl);
502 atomic_dec(&ht->shift);
326 503
327 /* Wait for readers. No new readers will have references to the 504 /* Wait for readers. No new readers will have references to the
328 * old hash table. 505 * old hash table.
@@ -335,59 +512,99 @@ int rhashtable_shrink(struct rhashtable *ht)
335} 512}
336EXPORT_SYMBOL_GPL(rhashtable_shrink); 513EXPORT_SYMBOL_GPL(rhashtable_shrink);
337 514
338/** 515static void rht_deferred_worker(struct work_struct *work)
339 * rhashtable_insert - insert object into hash hash table
340 * @ht: hash table
341 * @obj: pointer to hash head inside object
342 *
343 * Will automatically grow the table via rhashtable_expand() if the the
344 * grow_decision function specified at rhashtable_init() returns true.
345 *
346 * The caller must ensure that no concurrent table mutations occur. It is
347 * however valid to have concurrent lookups if they are RCU protected.
348 */
349void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
350{ 516{
351 struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 517 struct rhashtable *ht;
352 u32 hash; 518 struct bucket_table *tbl;
519 struct rhashtable_walker *walker;
353 520
354 ASSERT_RHT_MUTEX(ht); 521 ht = container_of(work, struct rhashtable, run_work);
522 mutex_lock(&ht->mutex);
523 if (ht->being_destroyed)
524 goto unlock;
355 525
356 hash = head_hashfn(ht, obj, tbl->size); 526 tbl = rht_dereference(ht->tbl, ht);
357 RCU_INIT_POINTER(obj->next, tbl->buckets[hash]); 527
358 rcu_assign_pointer(tbl->buckets[hash], obj); 528 list_for_each_entry(walker, &ht->walkers, list)
359 ht->nelems++; 529 walker->resize = true;
360 530
361 if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) 531 if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
362 rhashtable_expand(ht); 532 rhashtable_expand(ht);
533 else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
534 rhashtable_shrink(ht);
535
536unlock:
537 mutex_unlock(&ht->mutex);
538}
539
540static void rhashtable_wakeup_worker(struct rhashtable *ht)
541{
542 struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
543 struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
544 size_t size = tbl->size;
545
546 /* Only adjust the table if no resizing is currently in progress. */
547 if (tbl == new_tbl &&
548 ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) ||
549 (ht->p.shrink_decision && ht->p.shrink_decision(ht, size))))
550 schedule_work(&ht->run_work);
551}
552
553static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
554 struct bucket_table *tbl, u32 hash)
555{
556 struct rhash_head *head;
557
558 hash = rht_bucket_index(tbl, hash);
559 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
560
561 ASSERT_BUCKET_LOCK(ht, tbl, hash);
562
563 if (rht_is_a_nulls(head))
564 INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
565 else
566 RCU_INIT_POINTER(obj->next, head);
567
568 rcu_assign_pointer(tbl->buckets[hash], obj);
569
570 atomic_inc(&ht->nelems);
571
572 rhashtable_wakeup_worker(ht);
363} 573}
364EXPORT_SYMBOL_GPL(rhashtable_insert);
365 574
366/** 575/**
367 * rhashtable_remove_pprev - remove object from hash table given previous element 576 * rhashtable_insert - insert object into hash table
368 * @ht: hash table 577 * @ht: hash table
369 * @obj: pointer to hash head inside object 578 * @obj: pointer to hash head inside object
370 * @pprev: pointer to previous element
371 * 579 *
372 * Identical to rhashtable_remove() but caller is alreayd aware of the element 580 * Will take a per bucket spinlock to protect against mutual mutations
373 * in front of the element to be deleted. This is in particular useful for 581 * on the same bucket. Multiple insertions may occur in parallel unless
374 * deletion when combined with walking or lookup. 582 * they map to the same bucket lock.
583 *
584 * It is safe to call this function from atomic context.
585 *
586 * Will trigger an automatic deferred table resizing if the size grows
587 * beyond the watermark indicated by grow_decision() which can be passed
588 * to rhashtable_init().
375 */ 589 */
376void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, 590void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
377 struct rhash_head __rcu **pprev)
378{ 591{
379 struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 592 struct bucket_table *tbl, *old_tbl;
593 unsigned hash;
380 594
381 ASSERT_RHT_MUTEX(ht); 595 rcu_read_lock();
382 596
383 RCU_INIT_POINTER(*pprev, obj->next); 597 tbl = rht_dereference_rcu(ht->future_tbl, ht);
384 ht->nelems--; 598 old_tbl = rht_dereference_rcu(ht->tbl, ht);
599 hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
385 600
386 if (ht->p.shrink_decision && 601 lock_buckets(tbl, old_tbl, hash);
387 ht->p.shrink_decision(ht, tbl->size)) 602 __rhashtable_insert(ht, obj, tbl, hash);
388 rhashtable_shrink(ht); 603 unlock_buckets(tbl, old_tbl, hash);
604
605 rcu_read_unlock();
389} 606}
390EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); 607EXPORT_SYMBOL_GPL(rhashtable_insert);
391 608
392/** 609/**
393 * rhashtable_remove - remove object from hash table 610 * rhashtable_remove - remove object from hash table
@@ -398,7 +615,7 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
398 * walk the bucket chain upon removal. The removal operation is thus 615 * walk the bucket chain upon removal. The removal operation is thus
399 * considerable slow if the hash table is not correctly sized. 616 * considerable slow if the hash table is not correctly sized.
400 * 617 *
401 * Will automatically shrink the table via rhashtable_expand() if the the 618 * Will automatically shrink the table via rhashtable_expand() if the
402 * shrink_decision function specified at rhashtable_init() returns true. 619 * shrink_decision function specified at rhashtable_init() returns true.
403 * 620 *
404 * The caller must ensure that no concurrent table mutations occur. It is 621 * The caller must ensure that no concurrent table mutations occur. It is
@@ -406,30 +623,87 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
406 */ 623 */
407bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) 624bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
408{ 625{
409 struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 626 struct bucket_table *tbl, *new_tbl, *old_tbl;
410 struct rhash_head __rcu **pprev; 627 struct rhash_head __rcu **pprev;
411 struct rhash_head *he; 628 struct rhash_head *he, *he2;
412 u32 h; 629 unsigned int hash, new_hash;
630 bool ret = false;
413 631
414 ASSERT_RHT_MUTEX(ht); 632 rcu_read_lock();
415 633 old_tbl = rht_dereference_rcu(ht->tbl, ht);
416 h = head_hashfn(ht, obj, tbl->size); 634 tbl = new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
417 635 new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
418 pprev = &tbl->buckets[h]; 636
419 rht_for_each(he, tbl->buckets[h], ht) { 637 lock_buckets(new_tbl, old_tbl, new_hash);
638restart:
639 hash = rht_bucket_index(tbl, new_hash);
640 pprev = &tbl->buckets[hash];
641 rht_for_each(he, tbl, hash) {
420 if (he != obj) { 642 if (he != obj) {
421 pprev = &he->next; 643 pprev = &he->next;
422 continue; 644 continue;
423 } 645 }
424 646
425 rhashtable_remove_pprev(ht, he, pprev); 647 ASSERT_BUCKET_LOCK(ht, tbl, hash);
426 return true; 648
649 if (old_tbl->size > new_tbl->size && tbl == old_tbl &&
650 !rht_is_a_nulls(obj->next) &&
651 head_hashfn(ht, tbl, obj->next) != hash) {
652 rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
653 } else if (unlikely(old_tbl->size < new_tbl->size && tbl == new_tbl)) {
654 rht_for_each_continue(he2, obj->next, tbl, hash) {
655 if (head_hashfn(ht, tbl, he2) == hash) {
656 rcu_assign_pointer(*pprev, he2);
657 goto found;
658 }
659 }
660
661 rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
662 } else {
663 rcu_assign_pointer(*pprev, obj->next);
664 }
665
666found:
667 ret = true;
668 break;
669 }
670
671 /* The entry may be linked in either 'tbl', 'future_tbl', or both.
672 * 'future_tbl' only exists for a short period of time during
673 * resizing. Thus traversing both is fine and the added cost is
674 * very rare.
675 */
676 if (tbl != old_tbl) {
677 tbl = old_tbl;
678 goto restart;
679 }
680
681 unlock_buckets(new_tbl, old_tbl, new_hash);
682
683 if (ret) {
684 atomic_dec(&ht->nelems);
685 rhashtable_wakeup_worker(ht);
427 } 686 }
428 687
429 return false; 688 rcu_read_unlock();
689
690 return ret;
430} 691}
431EXPORT_SYMBOL_GPL(rhashtable_remove); 692EXPORT_SYMBOL_GPL(rhashtable_remove);
432 693
694struct rhashtable_compare_arg {
695 struct rhashtable *ht;
696 const void *key;
697};
698
699static bool rhashtable_compare(void *ptr, void *arg)
700{
701 struct rhashtable_compare_arg *x = arg;
702 struct rhashtable *ht = x->ht;
703
704 return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len);
705}
706
433/** 707/**
434 * rhashtable_lookup - lookup key in hash table 708 * rhashtable_lookup - lookup key in hash table
435 * @ht: hash table 709 * @ht: hash table
@@ -439,65 +713,313 @@ EXPORT_SYMBOL_GPL(rhashtable_remove);
439 * for a entry with an identical key. The first matching entry is returned. 713 * for a entry with an identical key. The first matching entry is returned.
440 * 714 *
441 * This lookup function may only be used for fixed key hash table (key_len 715 * This lookup function may only be used for fixed key hash table (key_len
442 * paramter set). It will BUG() if used inappropriately. 716 * parameter set). It will BUG() if used inappropriately.
443 * 717 *
444 * Lookups may occur in parallel with hash mutations as long as the lookup is 718 * Lookups may occur in parallel with hashtable mutations and resizing.
445 * guarded by rcu_read_lock(). The caller must take care of this.
446 */ 719 */
447void *rhashtable_lookup(const struct rhashtable *ht, const void *key) 720void *rhashtable_lookup(struct rhashtable *ht, const void *key)
448{ 721{
449 const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 722 struct rhashtable_compare_arg arg = {
450 struct rhash_head *he; 723 .ht = ht,
451 u32 h; 724 .key = key,
725 };
452 726
453 BUG_ON(!ht->p.key_len); 727 BUG_ON(!ht->p.key_len);
454 728
455 h = __hashfn(ht, key, ht->p.key_len, tbl->size); 729 return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg);
456 rht_for_each_rcu(he, tbl->buckets[h], ht) {
457 if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
458 ht->p.key_len))
459 continue;
460 return (void *) he - ht->p.head_offset;
461 }
462
463 return NULL;
464} 730}
465EXPORT_SYMBOL_GPL(rhashtable_lookup); 731EXPORT_SYMBOL_GPL(rhashtable_lookup);
466 732
467/** 733/**
468 * rhashtable_lookup_compare - search hash table with compare function 734 * rhashtable_lookup_compare - search hash table with compare function
469 * @ht: hash table 735 * @ht: hash table
470 * @hash: hash value of desired entry 736 * @key: the pointer to the key
471 * @compare: compare function, must return true on match 737 * @compare: compare function, must return true on match
472 * @arg: argument passed on to compare function 738 * @arg: argument passed on to compare function
473 * 739 *
474 * Traverses the bucket chain behind the provided hash value and calls the 740 * Traverses the bucket chain behind the provided hash value and calls the
475 * specified compare function for each entry. 741 * specified compare function for each entry.
476 * 742 *
477 * Lookups may occur in parallel with hash mutations as long as the lookup is 743 * Lookups may occur in parallel with hashtable mutations and resizing.
478 * guarded by rcu_read_lock(). The caller must take care of this.
479 * 744 *
480 * Returns the first entry on which the compare function returned true. 745 * Returns the first entry on which the compare function returned true.
481 */ 746 */
482void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, 747void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
483 bool (*compare)(void *, void *), void *arg) 748 bool (*compare)(void *, void *), void *arg)
484{ 749{
485 const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 750 const struct bucket_table *tbl, *old_tbl;
486 struct rhash_head *he; 751 struct rhash_head *he;
752 u32 hash;
487 753
488 if (unlikely(hash >= tbl->size)) 754 rcu_read_lock();
489 return NULL;
490 755
491 rht_for_each_rcu(he, tbl->buckets[hash], ht) { 756 old_tbl = rht_dereference_rcu(ht->tbl, ht);
757 tbl = rht_dereference_rcu(ht->future_tbl, ht);
758 hash = key_hashfn(ht, key, ht->p.key_len);
759restart:
760 rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
492 if (!compare(rht_obj(ht, he), arg)) 761 if (!compare(rht_obj(ht, he), arg))
493 continue; 762 continue;
494 return (void *) he - ht->p.head_offset; 763 rcu_read_unlock();
764 return rht_obj(ht, he);
765 }
766
767 if (unlikely(tbl != old_tbl)) {
768 tbl = old_tbl;
769 goto restart;
495 } 770 }
771 rcu_read_unlock();
496 772
497 return NULL; 773 return NULL;
498} 774}
499EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); 775EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
500 776
777/**
778 * rhashtable_lookup_insert - lookup and insert object into hash table
779 * @ht: hash table
780 * @obj: pointer to hash head inside object
781 *
782 * Locks down the bucket chain in both the old and new table if a resize
783 * is in progress to ensure that writers can't remove from the old table
784 * and can't insert to the new table during the atomic operation of search
785 * and insertion. Searches for duplicates in both the old and new table if
786 * a resize is in progress.
787 *
788 * This lookup function may only be used for fixed key hash table (key_len
789 * parameter set). It will BUG() if used inappropriately.
790 *
791 * It is safe to call this function from atomic context.
792 *
793 * Will trigger an automatic deferred table resizing if the size grows
794 * beyond the watermark indicated by grow_decision() which can be passed
795 * to rhashtable_init().
796 */
797bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
798{
799 struct rhashtable_compare_arg arg = {
800 .ht = ht,
801 .key = rht_obj(ht, obj) + ht->p.key_offset,
802 };
803
804 BUG_ON(!ht->p.key_len);
805
806 return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
807 &arg);
808}
809EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
810
811/**
812 * rhashtable_lookup_compare_insert - search and insert object to hash table
813 * with compare function
814 * @ht: hash table
815 * @obj: pointer to hash head inside object
816 * @compare: compare function, must return true on match
817 * @arg: argument passed on to compare function
818 *
819 * Locks down the bucket chain in both the old and new table if a resize
820 * is in progress to ensure that writers can't remove from the old table
821 * and can't insert to the new table during the atomic operation of search
822 * and insertion. Searches for duplicates in both the old and new table if
823 * a resize is in progress.
824 *
825 * Lookups may occur in parallel with hashtable mutations and resizing.
826 *
827 * Will trigger an automatic deferred table resizing if the size grows
828 * beyond the watermark indicated by grow_decision() which can be passed
829 * to rhashtable_init().
830 */
831bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
832 struct rhash_head *obj,
833 bool (*compare)(void *, void *),
834 void *arg)
835{
836 struct bucket_table *new_tbl, *old_tbl;
837 u32 new_hash;
838 bool success = true;
839
840 BUG_ON(!ht->p.key_len);
841
842 rcu_read_lock();
843 old_tbl = rht_dereference_rcu(ht->tbl, ht);
844 new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
845 new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
846
847 lock_buckets(new_tbl, old_tbl, new_hash);
848
849 if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
850 compare, arg)) {
851 success = false;
852 goto exit;
853 }
854
855 __rhashtable_insert(ht, obj, new_tbl, new_hash);
856
857exit:
858 unlock_buckets(new_tbl, old_tbl, new_hash);
859 rcu_read_unlock();
860
861 return success;
862}
863EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
864
865/**
866 * rhashtable_walk_init - Initialise an iterator
867 * @ht: Table to walk over
868 * @iter: Hash table Iterator
869 *
870 * This function prepares a hash table walk.
871 *
872 * Note that if you restart a walk after rhashtable_walk_stop you
873 * may see the same object twice. Also, you may miss objects if
874 * there are removals in between rhashtable_walk_stop and the next
875 * call to rhashtable_walk_start.
876 *
877 * For a completely stable walk you should construct your own data
878 * structure outside the hash table.
879 *
880 * This function may sleep so you must not call it from interrupt
881 * context or with spin locks held.
882 *
883 * You must call rhashtable_walk_exit if this function returns
884 * successfully.
885 */
886int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
887{
888 iter->ht = ht;
889 iter->p = NULL;
890 iter->slot = 0;
891 iter->skip = 0;
892
893 iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL);
894 if (!iter->walker)
895 return -ENOMEM;
896
897 mutex_lock(&ht->mutex);
898 list_add(&iter->walker->list, &ht->walkers);
899 mutex_unlock(&ht->mutex);
900
901 return 0;
902}
903EXPORT_SYMBOL_GPL(rhashtable_walk_init);
904
905/**
906 * rhashtable_walk_exit - Free an iterator
907 * @iter: Hash table Iterator
908 *
909 * This function frees resources allocated by rhashtable_walk_init.
910 */
911void rhashtable_walk_exit(struct rhashtable_iter *iter)
912{
913 mutex_lock(&iter->ht->mutex);
914 list_del(&iter->walker->list);
915 mutex_unlock(&iter->ht->mutex);
916 kfree(iter->walker);
917}
918EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
919
920/**
921 * rhashtable_walk_start - Start a hash table walk
922 * @iter: Hash table iterator
923 *
924 * Start a hash table walk. Note that we take the RCU lock in all
925 * cases including when we return an error. So you must always call
926 * rhashtable_walk_stop to clean up.
927 *
928 * Returns zero if successful.
929 *
930 * Returns -EAGAIN if resize event occured. Note that the iterator
931 * will rewind back to the beginning and you may use it immediately
932 * by calling rhashtable_walk_next.
933 */
934int rhashtable_walk_start(struct rhashtable_iter *iter)
935{
936 rcu_read_lock();
937
938 if (iter->walker->resize) {
939 iter->slot = 0;
940 iter->skip = 0;
941 iter->walker->resize = false;
942 return -EAGAIN;
943 }
944
945 return 0;
946}
947EXPORT_SYMBOL_GPL(rhashtable_walk_start);
948
949/**
950 * rhashtable_walk_next - Return the next object and advance the iterator
951 * @iter: Hash table iterator
952 *
953 * Note that you must call rhashtable_walk_stop when you are finished
954 * with the walk.
955 *
956 * Returns the next object or NULL when the end of the table is reached.
957 *
958 * Returns -EAGAIN if resize event occured. Note that the iterator
959 * will rewind back to the beginning and you may continue to use it.
960 */
961void *rhashtable_walk_next(struct rhashtable_iter *iter)
962{
963 const struct bucket_table *tbl;
964 struct rhashtable *ht = iter->ht;
965 struct rhash_head *p = iter->p;
966 void *obj = NULL;
967
968 tbl = rht_dereference_rcu(ht->tbl, ht);
969
970 if (p) {
971 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
972 goto next;
973 }
974
975 for (; iter->slot < tbl->size; iter->slot++) {
976 int skip = iter->skip;
977
978 rht_for_each_rcu(p, tbl, iter->slot) {
979 if (!skip)
980 break;
981 skip--;
982 }
983
984next:
985 if (!rht_is_a_nulls(p)) {
986 iter->skip++;
987 iter->p = p;
988 obj = rht_obj(ht, p);
989 goto out;
990 }
991
992 iter->skip = 0;
993 }
994
995 iter->p = NULL;
996
997out:
998 if (iter->walker->resize) {
999 iter->p = NULL;
1000 iter->slot = 0;
1001 iter->skip = 0;
1002 iter->walker->resize = false;
1003 return ERR_PTR(-EAGAIN);
1004 }
1005
1006 return obj;
1007}
1008EXPORT_SYMBOL_GPL(rhashtable_walk_next);
1009
1010/**
1011 * rhashtable_walk_stop - Finish a hash table walk
1012 * @iter: Hash table iterator
1013 *
1014 * Finish a hash table walk.
1015 */
1016void rhashtable_walk_stop(struct rhashtable_iter *iter)
1017{
1018 rcu_read_unlock();
1019 iter->p = NULL;
1020}
1021EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
1022
501static size_t rounded_hashtable_size(struct rhashtable_params *params) 1023static size_t rounded_hashtable_size(struct rhashtable_params *params)
502{ 1024{
503 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 1025 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
@@ -525,9 +1047,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
525 * .key_offset = offsetof(struct test_obj, key), 1047 * .key_offset = offsetof(struct test_obj, key),
526 * .key_len = sizeof(int), 1048 * .key_len = sizeof(int),
527 * .hashfn = jhash, 1049 * .hashfn = jhash,
528 * #ifdef CONFIG_PROVE_LOCKING 1050 * .nulls_base = (1U << RHT_BASE_SHIFT),
529 * .mutex_is_held = &my_mutex_is_held,
530 * #endif
531 * }; 1051 * };
532 * 1052 *
533 * Configuration Example 2: Variable length keys 1053 * Configuration Example 2: Variable length keys
@@ -547,9 +1067,6 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
547 * .head_offset = offsetof(struct test_obj, node), 1067 * .head_offset = offsetof(struct test_obj, node),
548 * .hashfn = jhash, 1068 * .hashfn = jhash,
549 * .obj_hashfn = my_hash_fn, 1069 * .obj_hashfn = my_hash_fn,
550 * #ifdef CONFIG_PROVE_LOCKING
551 * .mutex_is_held = &my_mutex_is_held,
552 * #endif
553 * }; 1070 * };
554 */ 1071 */
555int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) 1072int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
@@ -563,24 +1080,40 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
563 (!params->key_len && !params->obj_hashfn)) 1080 (!params->key_len && !params->obj_hashfn))
564 return -EINVAL; 1081 return -EINVAL;
565 1082
1083 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
1084 return -EINVAL;
1085
566 params->min_shift = max_t(size_t, params->min_shift, 1086 params->min_shift = max_t(size_t, params->min_shift,
567 ilog2(HASH_MIN_SIZE)); 1087 ilog2(HASH_MIN_SIZE));
568 1088
569 if (params->nelem_hint) 1089 if (params->nelem_hint)
570 size = rounded_hashtable_size(params); 1090 size = rounded_hashtable_size(params);
571 1091
572 tbl = bucket_table_alloc(size); 1092 memset(ht, 0, sizeof(*ht));
1093 mutex_init(&ht->mutex);
1094 memcpy(&ht->p, params, sizeof(*params));
1095 INIT_LIST_HEAD(&ht->walkers);
1096
1097 if (params->locks_mul)
1098 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
1099 else
1100 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
1101
1102 tbl = bucket_table_alloc(ht, size);
573 if (tbl == NULL) 1103 if (tbl == NULL)
574 return -ENOMEM; 1104 return -ENOMEM;
575 1105
576 memset(ht, 0, sizeof(*ht)); 1106 atomic_set(&ht->nelems, 0);
577 ht->shift = ilog2(tbl->size); 1107 atomic_set(&ht->shift, ilog2(tbl->size));
578 memcpy(&ht->p, params, sizeof(*params));
579 RCU_INIT_POINTER(ht->tbl, tbl); 1108 RCU_INIT_POINTER(ht->tbl, tbl);
1109 RCU_INIT_POINTER(ht->future_tbl, tbl);
580 1110
581 if (!ht->p.hash_rnd) 1111 if (!ht->p.hash_rnd)
582 get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); 1112 get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
583 1113
1114 if (ht->p.grow_decision || ht->p.shrink_decision)
1115 INIT_WORK(&ht->run_work, rht_deferred_worker);
1116
584 return 0; 1117 return 0;
585} 1118}
586EXPORT_SYMBOL_GPL(rhashtable_init); 1119EXPORT_SYMBOL_GPL(rhashtable_init);
@@ -593,216 +1126,15 @@ EXPORT_SYMBOL_GPL(rhashtable_init);
593 * has to make sure that no resizing may happen by unpublishing the hashtable 1126 * has to make sure that no resizing may happen by unpublishing the hashtable
594 * and waiting for the quiescent cycle before releasing the bucket array. 1127 * and waiting for the quiescent cycle before releasing the bucket array.
595 */ 1128 */
596void rhashtable_destroy(const struct rhashtable *ht) 1129void rhashtable_destroy(struct rhashtable *ht)
597{ 1130{
598 bucket_table_free(ht->tbl); 1131 ht->being_destroyed = true;
599}
600EXPORT_SYMBOL_GPL(rhashtable_destroy);
601
602/**************************************************************************
603 * Self Test
604 **************************************************************************/
605
606#ifdef CONFIG_TEST_RHASHTABLE
607 1132
608#define TEST_HT_SIZE 8 1133 if (ht->p.grow_decision || ht->p.shrink_decision)
609#define TEST_ENTRIES 2048 1134 cancel_work_sync(&ht->run_work);
610#define TEST_PTR ((void *) 0xdeadbeef)
611#define TEST_NEXPANDS 4
612 1135
613#ifdef CONFIG_PROVE_LOCKING 1136 mutex_lock(&ht->mutex);
614static int test_mutex_is_held(void *parent) 1137 bucket_table_free(rht_dereference(ht->tbl, ht));
615{ 1138 mutex_unlock(&ht->mutex);
616 return 1;
617} 1139}
618#endif 1140EXPORT_SYMBOL_GPL(rhashtable_destroy);
619
620struct test_obj {
621 void *ptr;
622 int value;
623 struct rhash_head node;
624};
625
626static int __init test_rht_lookup(struct rhashtable *ht)
627{
628 unsigned int i;
629
630 for (i = 0; i < TEST_ENTRIES * 2; i++) {
631 struct test_obj *obj;
632 bool expected = !(i % 2);
633 u32 key = i;
634
635 obj = rhashtable_lookup(ht, &key);
636
637 if (expected && !obj) {
638 pr_warn("Test failed: Could not find key %u\n", key);
639 return -ENOENT;
640 } else if (!expected && obj) {
641 pr_warn("Test failed: Unexpected entry found for key %u\n",
642 key);
643 return -EEXIST;
644 } else if (expected && obj) {
645 if (obj->ptr != TEST_PTR || obj->value != i) {
646 pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n",
647 obj->ptr, TEST_PTR, obj->value, i);
648 return -EINVAL;
649 }
650 }
651 }
652
653 return 0;
654}
655
656static void test_bucket_stats(struct rhashtable *ht, bool quiet)
657{
658 unsigned int cnt, rcu_cnt, i, total = 0;
659 struct test_obj *obj;
660 struct bucket_table *tbl;
661
662 tbl = rht_dereference_rcu(ht->tbl, ht);
663 for (i = 0; i < tbl->size; i++) {
664 rcu_cnt = cnt = 0;
665
666 if (!quiet)
667 pr_info(" [%#4x/%zu]", i, tbl->size);
668
669 rht_for_each_entry_rcu(obj, tbl->buckets[i], node) {
670 cnt++;
671 total++;
672 if (!quiet)
673 pr_cont(" [%p],", obj);
674 }
675
676 rht_for_each_entry_rcu(obj, tbl->buckets[i], node)
677 rcu_cnt++;
678
679 if (rcu_cnt != cnt)
680 pr_warn("Test failed: Chain count mismach %d != %d",
681 cnt, rcu_cnt);
682
683 if (!quiet)
684 pr_cont("\n [%#x] first element: %p, chain length: %u\n",
685 i, tbl->buckets[i], cnt);
686 }
687
688 pr_info(" Traversal complete: counted=%u, nelems=%zu, entries=%d\n",
689 total, ht->nelems, TEST_ENTRIES);
690
691 if (total != ht->nelems || total != TEST_ENTRIES)
692 pr_warn("Test failed: Total count mismatch ^^^");
693}
694
695static int __init test_rhashtable(struct rhashtable *ht)
696{
697 struct bucket_table *tbl;
698 struct test_obj *obj, *next;
699 int err;
700 unsigned int i;
701
702 /*
703 * Insertion Test:
704 * Insert TEST_ENTRIES into table with all keys even numbers
705 */
706 pr_info(" Adding %d keys\n", TEST_ENTRIES);
707 for (i = 0; i < TEST_ENTRIES; i++) {
708 struct test_obj *obj;
709
710 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
711 if (!obj) {
712 err = -ENOMEM;
713 goto error;
714 }
715
716 obj->ptr = TEST_PTR;
717 obj->value = i * 2;
718
719 rhashtable_insert(ht, &obj->node);
720 }
721
722 rcu_read_lock();
723 test_bucket_stats(ht, true);
724 test_rht_lookup(ht);
725 rcu_read_unlock();
726
727 for (i = 0; i < TEST_NEXPANDS; i++) {
728 pr_info(" Table expansion iteration %u...\n", i);
729 rhashtable_expand(ht);
730
731 rcu_read_lock();
732 pr_info(" Verifying lookups...\n");
733 test_rht_lookup(ht);
734 rcu_read_unlock();
735 }
736
737 for (i = 0; i < TEST_NEXPANDS; i++) {
738 pr_info(" Table shrinkage iteration %u...\n", i);
739 rhashtable_shrink(ht);
740
741 rcu_read_lock();
742 pr_info(" Verifying lookups...\n");
743 test_rht_lookup(ht);
744 rcu_read_unlock();
745 }
746
747 rcu_read_lock();
748 test_bucket_stats(ht, true);
749 rcu_read_unlock();
750
751 pr_info(" Deleting %d keys\n", TEST_ENTRIES);
752 for (i = 0; i < TEST_ENTRIES; i++) {
753 u32 key = i * 2;
754
755 obj = rhashtable_lookup(ht, &key);
756 BUG_ON(!obj);
757
758 rhashtable_remove(ht, &obj->node);
759 kfree(obj);
760 }
761
762 return 0;
763
764error:
765 tbl = rht_dereference_rcu(ht->tbl, ht);
766 for (i = 0; i < tbl->size; i++)
767 rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node)
768 kfree(obj);
769
770 return err;
771}
772
773static int __init test_rht_init(void)
774{
775 struct rhashtable ht;
776 struct rhashtable_params params = {
777 .nelem_hint = TEST_HT_SIZE,
778 .head_offset = offsetof(struct test_obj, node),
779 .key_offset = offsetof(struct test_obj, value),
780 .key_len = sizeof(int),
781 .hashfn = jhash,
782#ifdef CONFIG_PROVE_LOCKING
783 .mutex_is_held = &test_mutex_is_held,
784#endif
785 .grow_decision = rht_grow_above_75,
786 .shrink_decision = rht_shrink_below_30,
787 };
788 int err;
789
790 pr_info("Running resizable hashtable tests...\n");
791
792 err = rhashtable_init(&ht, &params);
793 if (err < 0) {
794 pr_warn("Test failed: Unable to initialize hashtable: %d\n",
795 err);
796 return err;
797 }
798
799 err = test_rhashtable(&ht);
800
801 rhashtable_destroy(&ht);
802
803 return err;
804}
805
806subsys_initcall(test_rht_init);
807
808#endif /* CONFIG_TEST_RHASHTABLE */
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
new file mode 100644
index 000000000000..1dfeba73fc74
--- /dev/null
+++ b/lib/test_rhashtable.c
@@ -0,0 +1,227 @@
1/*
2 * Resizable, Scalable, Concurrent Hash Table
3 *
4 * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
5 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6 *
7 * Based on the following paper:
8 * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
9 *
10 * Code partially derived from nft_hash
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
17/**************************************************************************
18 * Self Test
19 **************************************************************************/
20
21#include <linux/init.h>
22#include <linux/jhash.h>
23#include <linux/kernel.h>
24#include <linux/module.h>
25#include <linux/rcupdate.h>
26#include <linux/rhashtable.h>
27#include <linux/slab.h>
28
29
30#define TEST_HT_SIZE 8
31#define TEST_ENTRIES 2048
32#define TEST_PTR ((void *) 0xdeadbeef)
33#define TEST_NEXPANDS 4
34
35struct test_obj {
36 void *ptr;
37 int value;
38 struct rhash_head node;
39};
40
41static int __init test_rht_lookup(struct rhashtable *ht)
42{
43 unsigned int i;
44
45 for (i = 0; i < TEST_ENTRIES * 2; i++) {
46 struct test_obj *obj;
47 bool expected = !(i % 2);
48 u32 key = i;
49
50 obj = rhashtable_lookup(ht, &key);
51
52 if (expected && !obj) {
53 pr_warn("Test failed: Could not find key %u\n", key);
54 return -ENOENT;
55 } else if (!expected && obj) {
56 pr_warn("Test failed: Unexpected entry found for key %u\n",
57 key);
58 return -EEXIST;
59 } else if (expected && obj) {
60 if (obj->ptr != TEST_PTR || obj->value != i) {
61 pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n",
62 obj->ptr, TEST_PTR, obj->value, i);
63 return -EINVAL;
64 }
65 }
66 }
67
68 return 0;
69}
70
71static void test_bucket_stats(struct rhashtable *ht, bool quiet)
72{
73 unsigned int cnt, rcu_cnt, i, total = 0;
74 struct rhash_head *pos;
75 struct test_obj *obj;
76 struct bucket_table *tbl;
77
78 tbl = rht_dereference_rcu(ht->tbl, ht);
79 for (i = 0; i < tbl->size; i++) {
80 rcu_cnt = cnt = 0;
81
82 if (!quiet)
83 pr_info(" [%#4x/%zu]", i, tbl->size);
84
85 rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
86 cnt++;
87 total++;
88 if (!quiet)
89 pr_cont(" [%p],", obj);
90 }
91
92 rht_for_each_entry_rcu(obj, pos, tbl, i, node)
93 rcu_cnt++;
94
95 if (rcu_cnt != cnt)
96 pr_warn("Test failed: Chain count mismach %d != %d",
97 cnt, rcu_cnt);
98
99 if (!quiet)
100 pr_cont("\n [%#x] first element: %p, chain length: %u\n",
101 i, tbl->buckets[i], cnt);
102 }
103
104 pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d\n",
105 total, atomic_read(&ht->nelems), TEST_ENTRIES);
106
107 if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES)
108 pr_warn("Test failed: Total count mismatch ^^^");
109}
110
111static int __init test_rhashtable(struct rhashtable *ht)
112{
113 struct bucket_table *tbl;
114 struct test_obj *obj;
115 struct rhash_head *pos, *next;
116 int err;
117 unsigned int i;
118
119 /*
120 * Insertion Test:
121 * Insert TEST_ENTRIES into table with all keys even numbers
122 */
123 pr_info(" Adding %d keys\n", TEST_ENTRIES);
124 for (i = 0; i < TEST_ENTRIES; i++) {
125 struct test_obj *obj;
126
127 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
128 if (!obj) {
129 err = -ENOMEM;
130 goto error;
131 }
132
133 obj->ptr = TEST_PTR;
134 obj->value = i * 2;
135
136 rhashtable_insert(ht, &obj->node);
137 }
138
139 rcu_read_lock();
140 test_bucket_stats(ht, true);
141 test_rht_lookup(ht);
142 rcu_read_unlock();
143
144 for (i = 0; i < TEST_NEXPANDS; i++) {
145 pr_info(" Table expansion iteration %u...\n", i);
146 mutex_lock(&ht->mutex);
147 rhashtable_expand(ht);
148 mutex_unlock(&ht->mutex);
149
150 rcu_read_lock();
151 pr_info(" Verifying lookups...\n");
152 test_rht_lookup(ht);
153 rcu_read_unlock();
154 }
155
156 for (i = 0; i < TEST_NEXPANDS; i++) {
157 pr_info(" Table shrinkage iteration %u...\n", i);
158 mutex_lock(&ht->mutex);
159 rhashtable_shrink(ht);
160 mutex_unlock(&ht->mutex);
161
162 rcu_read_lock();
163 pr_info(" Verifying lookups...\n");
164 test_rht_lookup(ht);
165 rcu_read_unlock();
166 }
167
168 rcu_read_lock();
169 test_bucket_stats(ht, true);
170 rcu_read_unlock();
171
172 pr_info(" Deleting %d keys\n", TEST_ENTRIES);
173 for (i = 0; i < TEST_ENTRIES; i++) {
174 u32 key = i * 2;
175
176 obj = rhashtable_lookup(ht, &key);
177 BUG_ON(!obj);
178
179 rhashtable_remove(ht, &obj->node);
180 kfree(obj);
181 }
182
183 return 0;
184
185error:
186 tbl = rht_dereference_rcu(ht->tbl, ht);
187 for (i = 0; i < tbl->size; i++)
188 rht_for_each_entry_safe(obj, pos, next, tbl, i, node)
189 kfree(obj);
190
191 return err;
192}
193
194static int __init test_rht_init(void)
195{
196 struct rhashtable ht;
197 struct rhashtable_params params = {
198 .nelem_hint = TEST_HT_SIZE,
199 .head_offset = offsetof(struct test_obj, node),
200 .key_offset = offsetof(struct test_obj, value),
201 .key_len = sizeof(int),
202 .hashfn = jhash,
203 .nulls_base = (3U << RHT_BASE_SHIFT),
204 .grow_decision = rht_grow_above_75,
205 .shrink_decision = rht_shrink_below_30,
206 };
207 int err;
208
209 pr_info("Running resizable hashtable tests...\n");
210
211 err = rhashtable_init(&ht, &params);
212 if (err < 0) {
213 pr_warn("Test failed: Unable to initialize hashtable: %d\n",
214 err);
215 return err;
216 }
217
218 err = test_rhashtable(&ht);
219
220 rhashtable_destroy(&ht);
221
222 return err;
223}
224
225module_init(test_rht_init);
226
227MODULE_LICENSE("GPL v2");