aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Graf <tgraf@suug.ch>2014-08-02 05:47:46 -0400
committerDavid S. Miller <davem@davemloft.net>2014-08-02 22:49:38 -0400
commitcfe4a9dda034e2b5b6ba0b6313b65dfb89ee451c (patch)
tree09f0da1fdd48a931ecded6a31d54bf3a07e8febb
parente341694e3eb57fcda9f1adc7bfea42fe080d8d7a (diff)
nftables: Convert nft_hash to use generic rhashtable
The sizing of the hash table and the practice of requiring a lookup to retrieve the pprev to be stored in the element cookie before the deletion of an entry is left intact. Signed-off-by: Thomas Graf <tgraf@suug.ch> Acked-by: Patrick McHardy <kaber@trash.net> Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/netfilter/nft_hash.c291
1 files changed, 55 insertions, 236 deletions
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 4080ed6a072b..28fb8f38e6ba 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,209 +15,40 @@
15#include <linux/log2.h> 15#include <linux/log2.h>
16#include <linux/jhash.h> 16#include <linux/jhash.h>
17#include <linux/netlink.h> 17#include <linux/netlink.h>
18#include <linux/vmalloc.h> 18#include <linux/rhashtable.h>
19#include <linux/netfilter.h> 19#include <linux/netfilter.h>
20#include <linux/netfilter/nf_tables.h> 20#include <linux/netfilter/nf_tables.h>
21#include <net/netfilter/nf_tables.h> 21#include <net/netfilter/nf_tables.h>
22 22
23#define NFT_HASH_MIN_SIZE 4UL 23/* We target a hash table size of 4, element hint is 75% of final size */
24 24#define NFT_HASH_ELEMENT_HINT 3
25struct nft_hash {
26 struct nft_hash_table __rcu *tbl;
27};
28
29struct nft_hash_table {
30 unsigned int size;
31 struct nft_hash_elem __rcu *buckets[];
32};
33 25
34struct nft_hash_elem { 26struct nft_hash_elem {
35 struct nft_hash_elem __rcu *next; 27 struct rhash_head node;
36 struct nft_data key; 28 struct nft_data key;
37 struct nft_data data[]; 29 struct nft_data data[];
38}; 30};
39 31
40#define nft_hash_for_each_entry(i, head) \
41 for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
42#define nft_hash_for_each_entry_rcu(i, head) \
43 for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))
44
45static u32 nft_hash_rnd __read_mostly;
46static bool nft_hash_rnd_initted __read_mostly;
47
48static unsigned int nft_hash_data(const struct nft_data *data,
49 unsigned int hsize, unsigned int len)
50{
51 unsigned int h;
52
53 h = jhash(data->data, len, nft_hash_rnd);
54 return h & (hsize - 1);
55}
56
57static bool nft_hash_lookup(const struct nft_set *set, 32static bool nft_hash_lookup(const struct nft_set *set,
58 const struct nft_data *key, 33 const struct nft_data *key,
59 struct nft_data *data) 34 struct nft_data *data)
60{ 35{
61 const struct nft_hash *priv = nft_set_priv(set); 36 const struct rhashtable *priv = nft_set_priv(set);
62 const struct nft_hash_table *tbl = rcu_dereference(priv->tbl);
63 const struct nft_hash_elem *he; 37 const struct nft_hash_elem *he;
64 unsigned int h;
65
66 h = nft_hash_data(key, tbl->size, set->klen);
67 nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) {
68 if (nft_data_cmp(&he->key, key, set->klen))
69 continue;
70 if (set->flags & NFT_SET_MAP)
71 nft_data_copy(data, he->data);
72 return true;
73 }
74 return false;
75}
76
77static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
78{
79 kvfree(tbl);
80}
81
82static unsigned int nft_hash_tbl_size(unsigned int nelem)
83{
84 return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
85}
86
87static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
88{
89 struct nft_hash_table *tbl;
90 size_t size;
91
92 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
93 tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN);
94 if (tbl == NULL)
95 tbl = vzalloc(size);
96 if (tbl == NULL)
97 return NULL;
98 tbl->size = nbuckets;
99
100 return tbl;
101}
102
103static void nft_hash_chain_unzip(const struct nft_set *set,
104 const struct nft_hash_table *ntbl,
105 struct nft_hash_table *tbl, unsigned int n)
106{
107 struct nft_hash_elem *he, *last, *next;
108 unsigned int h;
109
110 he = nft_dereference(tbl->buckets[n]);
111 if (he == NULL)
112 return;
113 h = nft_hash_data(&he->key, ntbl->size, set->klen);
114
115 /* Find last element of first chain hashing to bucket h */
116 last = he;
117 nft_hash_for_each_entry(he, he->next) {
118 if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
119 break;
120 last = he;
121 }
122
123 /* Unlink first chain from the old table */
124 RCU_INIT_POINTER(tbl->buckets[n], last->next);
125 38
126 /* If end of chain reached, done */ 39 he = rhashtable_lookup(priv, key);
127 if (he == NULL) 40 if (he && set->flags & NFT_SET_MAP)
128 return; 41 nft_data_copy(data, he->data);
129 42
130 /* Find first element of second chain hashing to bucket h */ 43 return !!he;
131 next = NULL;
132 nft_hash_for_each_entry(he, he->next) {
133 if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
134 continue;
135 next = he;
136 break;
137 }
138
139 /* Link the two chains */
140 RCU_INIT_POINTER(last->next, next);
141}
142
143static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
144{
145 struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
146 struct nft_hash_elem *he;
147 unsigned int i, h;
148 bool complete;
149
150 ntbl = nft_hash_tbl_alloc(tbl->size * 2);
151 if (ntbl == NULL)
152 return -ENOMEM;
153
154 /* Link new table's buckets to first element in the old table
155 * hashing to the new bucket.
156 */
157 for (i = 0; i < ntbl->size; i++) {
158 h = i < tbl->size ? i : i - tbl->size;
159 nft_hash_for_each_entry(he, tbl->buckets[h]) {
160 if (nft_hash_data(&he->key, ntbl->size, set->klen) != i)
161 continue;
162 RCU_INIT_POINTER(ntbl->buckets[i], he);
163 break;
164 }
165 }
166
167 /* Publish new table */
168 rcu_assign_pointer(priv->tbl, ntbl);
169
170 /* Unzip interleaved hash chains */
171 do {
172 /* Wait for readers to use new table/unzipped chains */
173 synchronize_rcu();
174
175 complete = true;
176 for (i = 0; i < tbl->size; i++) {
177 nft_hash_chain_unzip(set, ntbl, tbl, i);
178 if (tbl->buckets[i] != NULL)
179 complete = false;
180 }
181 } while (!complete);
182
183 nft_hash_tbl_free(tbl);
184 return 0;
185}
186
187static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
188{
189 struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
190 struct nft_hash_elem __rcu **pprev;
191 unsigned int i;
192
193 ntbl = nft_hash_tbl_alloc(tbl->size / 2);
194 if (ntbl == NULL)
195 return -ENOMEM;
196
197 for (i = 0; i < ntbl->size; i++) {
198 ntbl->buckets[i] = tbl->buckets[i];
199
200 for (pprev = &ntbl->buckets[i]; *pprev != NULL;
201 pprev = &nft_dereference(*pprev)->next)
202 ;
203 RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
204 }
205
206 /* Publish new table */
207 rcu_assign_pointer(priv->tbl, ntbl);
208 synchronize_rcu();
209
210 nft_hash_tbl_free(tbl);
211 return 0;
212} 44}
213 45
214static int nft_hash_insert(const struct nft_set *set, 46static int nft_hash_insert(const struct nft_set *set,
215 const struct nft_set_elem *elem) 47 const struct nft_set_elem *elem)
216{ 48{
217 struct nft_hash *priv = nft_set_priv(set); 49 struct rhashtable *priv = nft_set_priv(set);
218 struct nft_hash_table *tbl = nft_dereference(priv->tbl);
219 struct nft_hash_elem *he; 50 struct nft_hash_elem *he;
220 unsigned int size, h; 51 unsigned int size;
221 52
222 if (elem->flags != 0) 53 if (elem->flags != 0)
223 return -EINVAL; 54 return -EINVAL;
@@ -234,13 +65,7 @@ static int nft_hash_insert(const struct nft_set *set,
234 if (set->flags & NFT_SET_MAP) 65 if (set->flags & NFT_SET_MAP)
235 nft_data_copy(he->data, &elem->data); 66 nft_data_copy(he->data, &elem->data);
236 67
237 h = nft_hash_data(&he->key, tbl->size, set->klen); 68 rhashtable_insert(priv, &he->node, GFP_KERNEL);
238 RCU_INIT_POINTER(he->next, tbl->buckets[h]);
239 rcu_assign_pointer(tbl->buckets[h], he);
240
241 /* Expand table when exceeding 75% load */
242 if (set->nelems + 1 > tbl->size / 4 * 3)
243 nft_hash_tbl_expand(set, priv);
244 69
245 return 0; 70 return 0;
246} 71}
@@ -257,36 +82,31 @@ static void nft_hash_elem_destroy(const struct nft_set *set,
257static void nft_hash_remove(const struct nft_set *set, 82static void nft_hash_remove(const struct nft_set *set,
258 const struct nft_set_elem *elem) 83 const struct nft_set_elem *elem)
259{ 84{
260 struct nft_hash *priv = nft_set_priv(set); 85 struct rhashtable *priv = nft_set_priv(set);
261 struct nft_hash_table *tbl = nft_dereference(priv->tbl); 86 struct rhash_head *he, __rcu **pprev;
262 struct nft_hash_elem *he, __rcu **pprev;
263 87
264 pprev = elem->cookie; 88 pprev = elem->cookie;
265 he = nft_dereference((*pprev)); 89 he = rht_dereference((*pprev), priv);
90
91 rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL);
266 92
267 RCU_INIT_POINTER(*pprev, he->next);
268 synchronize_rcu(); 93 synchronize_rcu();
269 kfree(he); 94 kfree(he);
270
271 /* Shrink table beneath 30% load */
272 if (set->nelems - 1 < tbl->size * 3 / 10 &&
273 tbl->size > NFT_HASH_MIN_SIZE)
274 nft_hash_tbl_shrink(set, priv);
275} 95}
276 96
277static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) 97static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
278{ 98{
279 const struct nft_hash *priv = nft_set_priv(set); 99 const struct rhashtable *priv = nft_set_priv(set);
280 const struct nft_hash_table *tbl = nft_dereference(priv->tbl); 100 const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv);
281 struct nft_hash_elem __rcu * const *pprev; 101 struct rhash_head __rcu * const *pprev;
282 struct nft_hash_elem *he; 102 struct nft_hash_elem *he;
283 unsigned int h; 103 u32 h;
284 104
285 h = nft_hash_data(&elem->key, tbl->size, set->klen); 105 h = rhashtable_hashfn(priv, &elem->key, set->klen);
286 pprev = &tbl->buckets[h]; 106 pprev = &tbl->buckets[h];
287 nft_hash_for_each_entry(he, tbl->buckets[h]) { 107 rht_for_each_entry_rcu(he, tbl->buckets[h], node) {
288 if (nft_data_cmp(&he->key, &elem->key, set->klen)) { 108 if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
289 pprev = &he->next; 109 pprev = &he->node.next;
290 continue; 110 continue;
291 } 111 }
292 112
@@ -302,14 +122,15 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
302static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, 122static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
303 struct nft_set_iter *iter) 123 struct nft_set_iter *iter)
304{ 124{
305 const struct nft_hash *priv = nft_set_priv(set); 125 const struct rhashtable *priv = nft_set_priv(set);
306 const struct nft_hash_table *tbl = nft_dereference(priv->tbl); 126 const struct bucket_table *tbl;
307 const struct nft_hash_elem *he; 127 const struct nft_hash_elem *he;
308 struct nft_set_elem elem; 128 struct nft_set_elem elem;
309 unsigned int i; 129 unsigned int i;
310 130
131 tbl = rht_dereference_rcu(priv->tbl, priv);
311 for (i = 0; i < tbl->size; i++) { 132 for (i = 0; i < tbl->size; i++) {
312 nft_hash_for_each_entry(he, tbl->buckets[i]) { 133 rht_for_each_entry_rcu(he, tbl->buckets[i], node) {
313 if (iter->count < iter->skip) 134 if (iter->count < iter->skip)
314 goto cont; 135 goto cont;
315 136
@@ -329,48 +150,46 @@ cont:
329 150
330static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) 151static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
331{ 152{
332 return sizeof(struct nft_hash); 153 return sizeof(struct rhashtable);
154}
155
156static int lockdep_nfnl_lock_is_held(void)
157{
158 return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
333} 159}
334 160
335static int nft_hash_init(const struct nft_set *set, 161static int nft_hash_init(const struct nft_set *set,
336 const struct nft_set_desc *desc, 162 const struct nft_set_desc *desc,
337 const struct nlattr * const tb[]) 163 const struct nlattr * const tb[])
338{ 164{
339 struct nft_hash *priv = nft_set_priv(set); 165 struct rhashtable *priv = nft_set_priv(set);
340 struct nft_hash_table *tbl; 166 struct rhashtable_params params = {
341 unsigned int size; 167 .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT,
168 .head_offset = offsetof(struct nft_hash_elem, node),
169 .key_offset = offsetof(struct nft_hash_elem, key),
170 .key_len = set->klen,
171 .hashfn = jhash,
172 .grow_decision = rht_grow_above_75,
173 .shrink_decision = rht_shrink_below_30,
174 .mutex_is_held = lockdep_nfnl_lock_is_held,
175 };
342 176
343 if (unlikely(!nft_hash_rnd_initted)) { 177 return rhashtable_init(priv, &params);
344 get_random_bytes(&nft_hash_rnd, 4);
345 nft_hash_rnd_initted = true;
346 }
347
348 size = NFT_HASH_MIN_SIZE;
349 if (desc->size)
350 size = nft_hash_tbl_size(desc->size);
351
352 tbl = nft_hash_tbl_alloc(size);
353 if (tbl == NULL)
354 return -ENOMEM;
355 RCU_INIT_POINTER(priv->tbl, tbl);
356 return 0;
357} 178}
358 179
359static void nft_hash_destroy(const struct nft_set *set) 180static void nft_hash_destroy(const struct nft_set *set)
360{ 181{
361 const struct nft_hash *priv = nft_set_priv(set); 182 const struct rhashtable *priv = nft_set_priv(set);
362 const struct nft_hash_table *tbl = nft_dereference(priv->tbl); 183 const struct bucket_table *tbl;
363 struct nft_hash_elem *he, *next; 184 struct nft_hash_elem *he, *next;
364 unsigned int i; 185 unsigned int i;
365 186
366 for (i = 0; i < tbl->size; i++) { 187 tbl = rht_dereference(priv->tbl, priv);
367 for (he = nft_dereference(tbl->buckets[i]); he != NULL; 188 for (i = 0; i < tbl->size; i++)
368 he = next) { 189 rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node)
369 next = nft_dereference(he->next);
370 nft_hash_elem_destroy(set, he); 190 nft_hash_elem_destroy(set, he);
371 } 191
372 } 192 rhashtable_destroy(priv);
373 kfree(tbl);
374} 193}
375 194
376static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, 195static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -383,8 +202,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
383 esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); 202 esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
384 203
385 if (desc->size) { 204 if (desc->size) {
386 est->size = sizeof(struct nft_hash) + 205 est->size = sizeof(struct rhashtable) +
387 nft_hash_tbl_size(desc->size) * 206 roundup_pow_of_two(desc->size * 4 / 3) *
388 sizeof(struct nft_hash_elem *) + 207 sizeof(struct nft_hash_elem *) +
389 desc->size * esize; 208 desc->size * esize;
390 } else { 209 } else {