aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2011-02-01 09:38:36 -0500
committerPatrick McHardy <kaber@trash.net>2011-02-01 09:38:36 -0500
commit6c027889696a7a694b0e2f6e3cabadefec7553b6 (patch)
treebfdb7bbdb8153ac15c45fe86928d4b02ce3fe766 /include/linux
parent543261907dc3c4e90845acfcd602ebdbfdfcb4f0 (diff)
netfilter: ipset: hash:ip set type support
The module implements the hash:ip type support in four flavours: for IPv4 or IPv6, both without and with timeout support. All the hash types are based on the "array hash" or ahash structure and functions as a good compromise between minimal memory footprint and speed. The hashing uses arrays to resolve clashes. The hash table is resized (doubled) when searching becomes too long. Resizing can be triggered by userspace add commands only and those are serialized by the nfnl mutex. During resizing the set is read-locked, so the only possible concurrent operations are the kernel side readers. Those are protected by RCU locking. Because of the four flavours and the other hash types, the functions are implemented in general forms in the ip_set_ahash.h header file and the real functions are generated before compiling by macro expansion. Thus the dereferencing of low-level functions and void pointer arguments could be avoided: the low-level functions are inlined, the function arguments are pointers of type-specific structures. Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/netfilter/ipset/ip_set_ahash.h1074
-rw-r--r--include/linux/netfilter/ipset/ip_set_hash.h26
2 files changed, 1100 insertions, 0 deletions
diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h
new file mode 100644
index 00000000000..ec9d9bea1e3
--- /dev/null
+++ b/include/linux/netfilter/ipset/ip_set_ahash.h
@@ -0,0 +1,1074 @@
1#ifndef _IP_SET_AHASH_H
2#define _IP_SET_AHASH_H
3
4#include <linux/rcupdate.h>
5#include <linux/jhash.h>
6#include <linux/netfilter/ipset/ip_set_timeout.h>
7
8/* Hashing which uses arrays to resolve clashing. The hash table is resized
9 * (doubled) when searching becomes too long.
10 * Internally jhash is used with the assumption that the size of the
11 * stored data is a multiple of sizeof(u32). If storage supports timeout,
12 * the timeout field must be the last one in the data structure - that field
13 * is ignored when computing the hash key.
14 *
15 * Readers and resizing
16 *
17 * Resizing can be triggered by userspace command only, and those
18 * are serialized by the nfnl mutex. During resizing the set is
19 * read-locked, so the only possible concurrent operations are
20 * the kernel side readers. Those must be protected by proper RCU locking.
21 */
22
23/* Number of elements to store in an initial array block */
24#define AHASH_INIT_SIZE 4
25/* Max number of elements to store in an array block */
26#define AHASH_MAX_SIZE (3*4)
27
28/* A hash bucket */
29struct hbucket {
30 void *value; /* the array of the values */
31 u8 size; /* size of the array */
32 u8 pos; /* position of the first free entry */
33};
34
35/* The hash table: the table size stored here in order to make resizing easy */
36struct htable {
37 u8 htable_bits; /* size of hash table == 2^htable_bits */
38 struct hbucket bucket[0]; /* hashtable buckets */
39};
40
41#define hbucket(h, i) &((h)->bucket[i])
42
43/* Book-keeping of the prefixes added to the set */
44struct ip_set_hash_nets {
45 u8 cidr; /* the different cidr values in the set */
46 u32 nets; /* number of elements per cidr */
47};
48
49/* The generic ip_set hash structure */
50struct ip_set_hash {
51 struct htable *table; /* the hash table */
52 u32 maxelem; /* max elements in the hash */
53 u32 elements; /* current element (vs timeout) */
54 u32 initval; /* random jhash init value */
55 u32 timeout; /* timeout value, if enabled */
56 struct timer_list gc; /* garbage collection when timeout enabled */
57#ifdef IP_SET_HASH_WITH_NETMASK
58 u8 netmask; /* netmask value for subnets to store */
59#endif
60#ifdef IP_SET_HASH_WITH_NETS
61 struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
62#endif
63};
64
65/* Compute htable_bits from the user input parameter hashsize */
66static u8
67htable_bits(u32 hashsize)
68{
69 /* Assume that hashsize == 2^htable_bits */
70 u8 bits = fls(hashsize - 1);
71 if (jhash_size(bits) != hashsize)
72 /* Round up to the first 2^n value */
73 bits = fls(hashsize);
74
75 return bits;
76}
77
78#ifdef IP_SET_HASH_WITH_NETS
79
80#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
81
82/* Network cidr size book keeping when the hash stores different
83 * sized networks */
84static void
85add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
86{
87 u8 i;
88
89 ++h->nets[cidr-1].nets;
90
91 pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
92
93 if (h->nets[cidr-1].nets > 1)
94 return;
95
96 /* New cidr size */
97 for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
98 /* Add in increasing prefix order, so larger cidr first */
99 if (h->nets[i].cidr < cidr)
100 swap(h->nets[i].cidr, cidr);
101 }
102 if (i < host_mask)
103 h->nets[i].cidr = cidr;
104}
105
106static void
107del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
108{
109 u8 i;
110
111 --h->nets[cidr-1].nets;
112
113 pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
114
115 if (h->nets[cidr-1].nets != 0)
116 return;
117
118 /* All entries with this cidr size deleted, so cleanup h->cidr[] */
119 for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
120 if (h->nets[i].cidr == cidr)
121 h->nets[i].cidr = cidr = h->nets[i+1].cidr;
122 }
123 h->nets[i - 1].cidr = 0;
124}
125#endif
126
127/* Destroy the hashtable part of the set */
128static void
129ahash_destroy(struct htable *t)
130{
131 struct hbucket *n;
132 u32 i;
133
134 for (i = 0; i < jhash_size(t->htable_bits); i++) {
135 n = hbucket(t, i);
136 if (n->size)
137 /* FIXME: use slab cache */
138 kfree(n->value);
139 }
140
141 ip_set_free(t);
142}
143
144/* Calculate the actual memory size of the set data */
145static size_t
146ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
147{
148 u32 i;
149 struct htable *t = h->table;
150 size_t memsize = sizeof(*h)
151 + sizeof(*t)
152#ifdef IP_SET_HASH_WITH_NETS
153 + sizeof(struct ip_set_hash_nets) * host_mask
154#endif
155 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
156
157 for (i = 0; i < jhash_size(t->htable_bits); i++)
158 memsize += t->bucket[i].size * dsize;
159
160 return memsize;
161}
162
163/* Flush a hash type of set: destroy all elements */
164static void
165ip_set_hash_flush(struct ip_set *set)
166{
167 struct ip_set_hash *h = set->data;
168 struct htable *t = h->table;
169 struct hbucket *n;
170 u32 i;
171
172 for (i = 0; i < jhash_size(t->htable_bits); i++) {
173 n = hbucket(t, i);
174 if (n->size) {
175 n->size = n->pos = 0;
176 /* FIXME: use slab cache */
177 kfree(n->value);
178 }
179 }
180#ifdef IP_SET_HASH_WITH_NETS
181 memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
182 * SET_HOST_MASK(set->family));
183#endif
184 h->elements = 0;
185}
186
187/* Destroy a hash type of set */
188static void
189ip_set_hash_destroy(struct ip_set *set)
190{
191 struct ip_set_hash *h = set->data;
192
193 if (with_timeout(h->timeout))
194 del_timer_sync(&h->gc);
195
196 ahash_destroy(h->table);
197 kfree(h);
198
199 set->data = NULL;
200}
201
202#define HKEY(data, initval, htable_bits) \
203(jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \
204 & jhash_mask(htable_bits))
205
206#endif /* _IP_SET_AHASH_H */
207
208#define CONCAT(a, b, c) a##b##c
209#define TOKEN(a, b, c) CONCAT(a, b, c)
210
211/* Type/family dependent function prototypes */
212
213#define type_pf_data_equal TOKEN(TYPE, PF, _data_equal)
214#define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull)
215#define type_pf_data_copy TOKEN(TYPE, PF, _data_copy)
216#define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out)
217#define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask)
218#define type_pf_data_list TOKEN(TYPE, PF, _data_list)
219#define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist)
220
221#define type_pf_elem TOKEN(TYPE, PF, _elem)
222#define type_pf_telem TOKEN(TYPE, PF, _telem)
223#define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout)
224#define type_pf_data_expired TOKEN(TYPE, PF, _data_expired)
225#define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
226
227#define type_pf_elem_add TOKEN(TYPE, PF, _elem_add)
228#define type_pf_add TOKEN(TYPE, PF, _add)
229#define type_pf_del TOKEN(TYPE, PF, _del)
230#define type_pf_test_cidrs TOKEN(TYPE, PF, _test_cidrs)
231#define type_pf_test TOKEN(TYPE, PF, _test)
232
233#define type_pf_elem_tadd TOKEN(TYPE, PF, _elem_tadd)
234#define type_pf_del_telem TOKEN(TYPE, PF, _ahash_del_telem)
235#define type_pf_expire TOKEN(TYPE, PF, _expire)
236#define type_pf_tadd TOKEN(TYPE, PF, _tadd)
237#define type_pf_tdel TOKEN(TYPE, PF, _tdel)
238#define type_pf_ttest_cidrs TOKEN(TYPE, PF, _ahash_ttest_cidrs)
239#define type_pf_ttest TOKEN(TYPE, PF, _ahash_ttest)
240
241#define type_pf_resize TOKEN(TYPE, PF, _resize)
242#define type_pf_tresize TOKEN(TYPE, PF, _tresize)
243#define type_pf_flush ip_set_hash_flush
244#define type_pf_destroy ip_set_hash_destroy
245#define type_pf_head TOKEN(TYPE, PF, _head)
246#define type_pf_list TOKEN(TYPE, PF, _list)
247#define type_pf_tlist TOKEN(TYPE, PF, _tlist)
248#define type_pf_same_set TOKEN(TYPE, PF, _same_set)
249#define type_pf_kadt TOKEN(TYPE, PF, _kadt)
250#define type_pf_uadt TOKEN(TYPE, PF, _uadt)
251#define type_pf_gc TOKEN(TYPE, PF, _gc)
252#define type_pf_gc_init TOKEN(TYPE, PF, _gc_init)
253#define type_pf_variant TOKEN(TYPE, PF, _variant)
254#define type_pf_tvariant TOKEN(TYPE, PF, _tvariant)
255
256/* Flavour without timeout */
257
258/* Get the ith element from the array block n */
259#define ahash_data(n, i) \
260 ((struct type_pf_elem *)((n)->value) + (i))
261
262/* Add an element to the hash table when resizing the set:
263 * we spare the maintenance of the internal counters. */
264static int
265type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value)
266{
267 if (n->pos >= n->size) {
268 void *tmp;
269
270 if (n->size >= AHASH_MAX_SIZE)
271 /* Trigger rehashing */
272 return -EAGAIN;
273
274 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
275 * sizeof(struct type_pf_elem),
276 GFP_ATOMIC);
277 if (!tmp)
278 return -ENOMEM;
279 if (n->size) {
280 memcpy(tmp, n->value,
281 sizeof(struct type_pf_elem) * n->size);
282 kfree(n->value);
283 }
284 n->value = tmp;
285 n->size += AHASH_INIT_SIZE;
286 }
287 type_pf_data_copy(ahash_data(n, n->pos++), value);
288 return 0;
289}
290
291/* Resize a hash: create a new hash table with doubling the hashsize
292 * and inserting the elements to it. Repeat until we succeed or
293 * fail due to memory pressures. */
294static int
295type_pf_resize(struct ip_set *set, bool retried)
296{
297 struct ip_set_hash *h = set->data;
298 struct htable *t, *orig = h->table;
299 u8 htable_bits = orig->htable_bits;
300 const struct type_pf_elem *data;
301 struct hbucket *n, *m;
302 u32 i, j;
303 int ret;
304
305retry:
306 ret = 0;
307 htable_bits++;
308 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
309 set->name, orig->htable_bits, htable_bits, orig);
310 if (!htable_bits)
311 /* In case we have plenty of memory :-) */
312 return -IPSET_ERR_HASH_FULL;
313 t = ip_set_alloc(sizeof(*t)
314 + jhash_size(htable_bits) * sizeof(struct hbucket));
315 if (!t)
316 return -ENOMEM;
317 t->htable_bits = htable_bits;
318
319 read_lock_bh(&set->lock);
320 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
321 n = hbucket(orig, i);
322 for (j = 0; j < n->pos; j++) {
323 data = ahash_data(n, j);
324 m = hbucket(t, HKEY(data, h->initval, htable_bits));
325 ret = type_pf_elem_add(m, data);
326 if (ret < 0) {
327 read_unlock_bh(&set->lock);
328 ahash_destroy(t);
329 if (ret == -EAGAIN)
330 goto retry;
331 return ret;
332 }
333 }
334 }
335
336 rcu_assign_pointer(h->table, t);
337 read_unlock_bh(&set->lock);
338
339 /* Give time to other readers of the set */
340 synchronize_rcu_bh();
341
342 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
343 orig->htable_bits, orig, t->htable_bits, t);
344 ahash_destroy(orig);
345
346 return 0;
347}
348
349/* Add an element to a hash and update the internal counters when succeeded,
350 * otherwise report the proper error code. */
351static int
352type_pf_add(struct ip_set *set, void *value, u32 timeout)
353{
354 struct ip_set_hash *h = set->data;
355 struct htable *t;
356 const struct type_pf_elem *d = value;
357 struct hbucket *n;
358 int i, ret = 0;
359 u32 key;
360
361 if (h->elements >= h->maxelem)
362 return -IPSET_ERR_HASH_FULL;
363
364 rcu_read_lock_bh();
365 t = rcu_dereference_bh(h->table);
366 key = HKEY(value, h->initval, t->htable_bits);
367 n = hbucket(t, key);
368 for (i = 0; i < n->pos; i++)
369 if (type_pf_data_equal(ahash_data(n, i), d)) {
370 ret = -IPSET_ERR_EXIST;
371 goto out;
372 }
373
374 ret = type_pf_elem_add(n, value);
375 if (ret != 0)
376 goto out;
377
378#ifdef IP_SET_HASH_WITH_NETS
379 add_cidr(h, d->cidr, HOST_MASK);
380#endif
381 h->elements++;
382out:
383 rcu_read_unlock_bh();
384 return ret;
385}
386
387/* Delete an element from the hash: swap it with the last element
388 * and free up space if possible.
389 */
390static int
391type_pf_del(struct ip_set *set, void *value, u32 timeout)
392{
393 struct ip_set_hash *h = set->data;
394 struct htable *t = h->table;
395 const struct type_pf_elem *d = value;
396 struct hbucket *n;
397 int i;
398 struct type_pf_elem *data;
399 u32 key;
400
401 key = HKEY(value, h->initval, t->htable_bits);
402 n = hbucket(t, key);
403 for (i = 0; i < n->pos; i++) {
404 data = ahash_data(n, i);
405 if (!type_pf_data_equal(data, d))
406 continue;
407 if (i != n->pos - 1)
408 /* Not last one */
409 type_pf_data_copy(data, ahash_data(n, n->pos - 1));
410
411 n->pos--;
412 h->elements--;
413#ifdef IP_SET_HASH_WITH_NETS
414 del_cidr(h, d->cidr, HOST_MASK);
415#endif
416 if (n->pos + AHASH_INIT_SIZE < n->size) {
417 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
418 * sizeof(struct type_pf_elem),
419 GFP_ATOMIC);
420 if (!tmp)
421 return 0;
422 n->size -= AHASH_INIT_SIZE;
423 memcpy(tmp, n->value,
424 n->size * sizeof(struct type_pf_elem));
425 kfree(n->value);
426 n->value = tmp;
427 }
428 return 0;
429 }
430
431 return -IPSET_ERR_EXIST;
432}
433
434#ifdef IP_SET_HASH_WITH_NETS
435
436/* Special test function which takes into account the different network
437 * sizes added to the set */
438static int
439type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
440{
441 struct ip_set_hash *h = set->data;
442 struct htable *t = h->table;
443 struct hbucket *n;
444 const struct type_pf_elem *data;
445 int i, j = 0;
446 u32 key;
447 u8 host_mask = SET_HOST_MASK(set->family);
448
449 pr_debug("test by nets\n");
450 for (; j < host_mask && h->nets[j].cidr; j++) {
451 type_pf_data_netmask(d, h->nets[j].cidr);
452 key = HKEY(d, h->initval, t->htable_bits);
453 n = hbucket(t, key);
454 for (i = 0; i < n->pos; i++) {
455 data = ahash_data(n, i);
456 if (type_pf_data_equal(data, d))
457 return 1;
458 }
459 }
460 return 0;
461}
462#endif
463
464/* Test whether the element is added to the set */
465static int
466type_pf_test(struct ip_set *set, void *value, u32 timeout)
467{
468 struct ip_set_hash *h = set->data;
469 struct htable *t = h->table;
470 struct type_pf_elem *d = value;
471 struct hbucket *n;
472 const struct type_pf_elem *data;
473 int i;
474 u32 key;
475
476#ifdef IP_SET_HASH_WITH_NETS
477 /* If we test an IP address and not a network address,
478 * try all possible network sizes */
479 if (d->cidr == SET_HOST_MASK(set->family))
480 return type_pf_test_cidrs(set, d, timeout);
481#endif
482
483 key = HKEY(d, h->initval, t->htable_bits);
484 n = hbucket(t, key);
485 for (i = 0; i < n->pos; i++) {
486 data = ahash_data(n, i);
487 if (type_pf_data_equal(data, d))
488 return 1;
489 }
490 return 0;
491}
492
493/* Reply a HEADER request: fill out the header part of the set */
494static int
495type_pf_head(struct ip_set *set, struct sk_buff *skb)
496{
497 const struct ip_set_hash *h = set->data;
498 struct nlattr *nested;
499 size_t memsize;
500
501 read_lock_bh(&set->lock);
502 memsize = ahash_memsize(h, with_timeout(h->timeout)
503 ? sizeof(struct type_pf_telem)
504 : sizeof(struct type_pf_elem),
505 set->family == AF_INET ? 32 : 128);
506 read_unlock_bh(&set->lock);
507
508 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
509 if (!nested)
510 goto nla_put_failure;
511 NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE,
512 htonl(jhash_size(h->table->htable_bits)));
513 NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem));
514#ifdef IP_SET_HASH_WITH_NETMASK
515 if (h->netmask != HOST_MASK)
516 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
517#endif
518 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
519 htonl(atomic_read(&set->ref) - 1));
520 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
521 if (with_timeout(h->timeout))
522 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
523 ipset_nest_end(skb, nested);
524
525 return 0;
526nla_put_failure:
527 return -EMSGSIZE;
528}
529
530/* Reply a LIST/SAVE request: dump the elements of the specified set */
531static int
532type_pf_list(const struct ip_set *set,
533 struct sk_buff *skb, struct netlink_callback *cb)
534{
535 const struct ip_set_hash *h = set->data;
536 const struct htable *t = h->table;
537 struct nlattr *atd, *nested;
538 const struct hbucket *n;
539 const struct type_pf_elem *data;
540 u32 first = cb->args[2];
541 /* We assume that one hash bucket fills into one page */
542 void *incomplete;
543 int i;
544
545 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
546 if (!atd)
547 return -EMSGSIZE;
548 pr_debug("list hash set %s\n", set->name);
549 for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
550 incomplete = skb_tail_pointer(skb);
551 n = hbucket(t, cb->args[2]);
552 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
553 for (i = 0; i < n->pos; i++) {
554 data = ahash_data(n, i);
555 pr_debug("list hash %lu hbucket %p i %u, data %p\n",
556 cb->args[2], n, i, data);
557 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
558 if (!nested) {
559 if (cb->args[2] == first) {
560 nla_nest_cancel(skb, atd);
561 return -EMSGSIZE;
562 } else
563 goto nla_put_failure;
564 }
565 if (type_pf_data_list(skb, data))
566 goto nla_put_failure;
567 ipset_nest_end(skb, nested);
568 }
569 }
570 ipset_nest_end(skb, atd);
571 /* Set listing finished */
572 cb->args[2] = 0;
573
574 return 0;
575
576nla_put_failure:
577 nlmsg_trim(skb, incomplete);
578 ipset_nest_end(skb, atd);
579 if (unlikely(first == cb->args[2])) {
580 pr_warning("Can't list set %s: one bucket does not fit into "
581 "a message. Please report it!\n", set->name);
582 cb->args[2] = 0;
583 return -EMSGSIZE;
584 }
585 return 0;
586}
587
588static int
589type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
590 enum ipset_adt adt, u8 pf, u8 dim, u8 flags);
591static int
592type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
593 enum ipset_adt adt, u32 *lineno, u32 flags);
594
595static const struct ip_set_type_variant type_pf_variant = {
596 .kadt = type_pf_kadt,
597 .uadt = type_pf_uadt,
598 .adt = {
599 [IPSET_ADD] = type_pf_add,
600 [IPSET_DEL] = type_pf_del,
601 [IPSET_TEST] = type_pf_test,
602 },
603 .destroy = type_pf_destroy,
604 .flush = type_pf_flush,
605 .head = type_pf_head,
606 .list = type_pf_list,
607 .resize = type_pf_resize,
608 .same_set = type_pf_same_set,
609};
610
611/* Flavour with timeout support */
612
613#define ahash_tdata(n, i) \
614 (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
615
616static inline u32
617type_pf_data_timeout(const struct type_pf_elem *data)
618{
619 const struct type_pf_telem *tdata =
620 (const struct type_pf_telem *) data;
621
622 return tdata->timeout;
623}
624
625static inline bool
626type_pf_data_expired(const struct type_pf_elem *data)
627{
628 const struct type_pf_telem *tdata =
629 (const struct type_pf_telem *) data;
630
631 return ip_set_timeout_expired(tdata->timeout);
632}
633
634static inline void
635type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
636{
637 struct type_pf_telem *tdata = (struct type_pf_telem *) data;
638
639 tdata->timeout = ip_set_timeout_set(timeout);
640}
641
642static int
643type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
644 u32 timeout)
645{
646 struct type_pf_elem *data;
647
648 if (n->pos >= n->size) {
649 void *tmp;
650
651 if (n->size >= AHASH_MAX_SIZE)
652 /* Trigger rehashing */
653 return -EAGAIN;
654
655 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
656 * sizeof(struct type_pf_telem),
657 GFP_ATOMIC);
658 if (!tmp)
659 return -ENOMEM;
660 if (n->size) {
661 memcpy(tmp, n->value,
662 sizeof(struct type_pf_telem) * n->size);
663 kfree(n->value);
664 }
665 n->value = tmp;
666 n->size += AHASH_INIT_SIZE;
667 }
668 data = ahash_tdata(n, n->pos++);
669 type_pf_data_copy(data, value);
670 type_pf_data_timeout_set(data, timeout);
671 return 0;
672}
673
674/* Delete expired elements from the hashtable */
675static void
676type_pf_expire(struct ip_set_hash *h)
677{
678 struct htable *t = h->table;
679 struct hbucket *n;
680 struct type_pf_elem *data;
681 u32 i;
682 int j;
683
684 for (i = 0; i < jhash_size(t->htable_bits); i++) {
685 n = hbucket(t, i);
686 for (j = 0; j < n->pos; j++) {
687 data = ahash_tdata(n, j);
688 if (type_pf_data_expired(data)) {
689 pr_debug("expired %u/%u\n", i, j);
690#ifdef IP_SET_HASH_WITH_NETS
691 del_cidr(h, data->cidr, HOST_MASK);
692#endif
693 if (j != n->pos - 1)
694 /* Not last one */
695 type_pf_data_copy(data,
696 ahash_tdata(n, n->pos - 1));
697 n->pos--;
698 h->elements--;
699 }
700 }
701 if (n->pos + AHASH_INIT_SIZE < n->size) {
702 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
703 * sizeof(struct type_pf_telem),
704 GFP_ATOMIC);
705 if (!tmp)
706 /* Still try to delete expired elements */
707 continue;
708 n->size -= AHASH_INIT_SIZE;
709 memcpy(tmp, n->value,
710 n->size * sizeof(struct type_pf_telem));
711 kfree(n->value);
712 n->value = tmp;
713 }
714 }
715}
716
717static int
718type_pf_tresize(struct ip_set *set, bool retried)
719{
720 struct ip_set_hash *h = set->data;
721 struct htable *t, *orig = h->table;
722 u8 htable_bits = orig->htable_bits;
723 const struct type_pf_elem *data;
724 struct hbucket *n, *m;
725 u32 i, j;
726 int ret;
727
728 /* Try to cleanup once */
729 if (!retried) {
730 i = h->elements;
731 write_lock_bh(&set->lock);
732 type_pf_expire(set->data);
733 write_unlock_bh(&set->lock);
734 if (h->elements < i)
735 return 0;
736 }
737
738retry:
739 ret = 0;
740 htable_bits++;
741 if (!htable_bits)
742 /* In case we have plenty of memory :-) */
743 return -IPSET_ERR_HASH_FULL;
744 t = ip_set_alloc(sizeof(*t)
745 + jhash_size(htable_bits) * sizeof(struct hbucket));
746 if (!t)
747 return -ENOMEM;
748 t->htable_bits = htable_bits;
749
750 read_lock_bh(&set->lock);
751 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
752 n = hbucket(orig, i);
753 for (j = 0; j < n->pos; j++) {
754 data = ahash_tdata(n, j);
755 m = hbucket(t, HKEY(data, h->initval, htable_bits));
756 ret = type_pf_elem_tadd(m, data,
757 type_pf_data_timeout(data));
758 if (ret < 0) {
759 read_unlock_bh(&set->lock);
760 ahash_destroy(t);
761 if (ret == -EAGAIN)
762 goto retry;
763 return ret;
764 }
765 }
766 }
767
768 rcu_assign_pointer(h->table, t);
769 read_unlock_bh(&set->lock);
770
771 /* Give time to other readers of the set */
772 synchronize_rcu_bh();
773
774 ahash_destroy(orig);
775
776 return 0;
777}
778
779static int
780type_pf_tadd(struct ip_set *set, void *value, u32 timeout)
781{
782 struct ip_set_hash *h = set->data;
783 struct htable *t = h->table;
784 const struct type_pf_elem *d = value;
785 struct hbucket *n;
786 struct type_pf_elem *data;
787 int ret = 0, i, j = AHASH_MAX_SIZE + 1;
788 u32 key;
789
790 if (h->elements >= h->maxelem)
791 /* FIXME: when set is full, we slow down here */
792 type_pf_expire(h);
793 if (h->elements >= h->maxelem)
794 return -IPSET_ERR_HASH_FULL;
795
796 rcu_read_lock_bh();
797 t = rcu_dereference_bh(h->table);
798 key = HKEY(d, h->initval, t->htable_bits);
799 n = hbucket(t, key);
800 for (i = 0; i < n->pos; i++) {
801 data = ahash_tdata(n, i);
802 if (type_pf_data_equal(data, d)) {
803 if (type_pf_data_expired(data))
804 j = i;
805 else {
806 ret = -IPSET_ERR_EXIST;
807 goto out;
808 }
809 } else if (j == AHASH_MAX_SIZE + 1 &&
810 type_pf_data_expired(data))
811 j = i;
812 }
813 if (j != AHASH_MAX_SIZE + 1) {
814 data = ahash_tdata(n, j);
815#ifdef IP_SET_HASH_WITH_NETS
816 del_cidr(h, data->cidr, HOST_MASK);
817 add_cidr(h, d->cidr, HOST_MASK);
818#endif
819 type_pf_data_copy(data, d);
820 type_pf_data_timeout_set(data, timeout);
821 goto out;
822 }
823 ret = type_pf_elem_tadd(n, d, timeout);
824 if (ret != 0)
825 goto out;
826
827#ifdef IP_SET_HASH_WITH_NETS
828 add_cidr(h, d->cidr, HOST_MASK);
829#endif
830 h->elements++;
831out:
832 rcu_read_unlock_bh();
833 return ret;
834}
835
836static int
837type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
838{
839 struct ip_set_hash *h = set->data;
840 struct htable *t = h->table;
841 const struct type_pf_elem *d = value;
842 struct hbucket *n;
843 int i, ret = 0;
844 struct type_pf_elem *data;
845 u32 key;
846
847 key = HKEY(value, h->initval, t->htable_bits);
848 n = hbucket(t, key);
849 for (i = 0; i < n->pos; i++) {
850 data = ahash_tdata(n, i);
851 if (!type_pf_data_equal(data, d))
852 continue;
853 if (type_pf_data_expired(data))
854 ret = -IPSET_ERR_EXIST;
855 if (i != n->pos - 1)
856 /* Not last one */
857 type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
858
859 n->pos--;
860 h->elements--;
861#ifdef IP_SET_HASH_WITH_NETS
862 del_cidr(h, d->cidr, HOST_MASK);
863#endif
864 if (n->pos + AHASH_INIT_SIZE < n->size) {
865 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
866 * sizeof(struct type_pf_telem),
867 GFP_ATOMIC);
868 if (!tmp)
869 return 0;
870 n->size -= AHASH_INIT_SIZE;
871 memcpy(tmp, n->value,
872 n->size * sizeof(struct type_pf_telem));
873 kfree(n->value);
874 n->value = tmp;
875 }
876 return 0;
877 }
878
879 return -IPSET_ERR_EXIST;
880}
881
882#ifdef IP_SET_HASH_WITH_NETS
883static int
884type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
885{
886 struct ip_set_hash *h = set->data;
887 struct htable *t = h->table;
888 struct type_pf_elem *data;
889 struct hbucket *n;
890 int i, j = 0;
891 u32 key;
892 u8 host_mask = SET_HOST_MASK(set->family);
893
894 for (; j < host_mask && h->nets[j].cidr; j++) {
895 type_pf_data_netmask(d, h->nets[j].cidr);
896 key = HKEY(d, h->initval, t->htable_bits);
897 n = hbucket(t, key);
898 for (i = 0; i < n->pos; i++) {
899 data = ahash_tdata(n, i);
900 if (type_pf_data_equal(data, d))
901 return !type_pf_data_expired(data);
902 }
903 }
904 return 0;
905}
906#endif
907
908static int
909type_pf_ttest(struct ip_set *set, void *value, u32 timeout)
910{
911 struct ip_set_hash *h = set->data;
912 struct htable *t = h->table;
913 struct type_pf_elem *data, *d = value;
914 struct hbucket *n;
915 int i;
916 u32 key;
917
918#ifdef IP_SET_HASH_WITH_NETS
919 if (d->cidr == SET_HOST_MASK(set->family))
920 return type_pf_ttest_cidrs(set, d, timeout);
921#endif
922 key = HKEY(d, h->initval, t->htable_bits);
923 n = hbucket(t, key);
924 for (i = 0; i < n->pos; i++) {
925 data = ahash_tdata(n, i);
926 if (type_pf_data_equal(data, d))
927 return !type_pf_data_expired(data);
928 }
929 return 0;
930}
931
932static int
933type_pf_tlist(const struct ip_set *set,
934 struct sk_buff *skb, struct netlink_callback *cb)
935{
936 const struct ip_set_hash *h = set->data;
937 const struct htable *t = h->table;
938 struct nlattr *atd, *nested;
939 const struct hbucket *n;
940 const struct type_pf_elem *data;
941 u32 first = cb->args[2];
942 /* We assume that one hash bucket fills into one page */
943 void *incomplete;
944 int i;
945
946 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
947 if (!atd)
948 return -EMSGSIZE;
949 for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
950 incomplete = skb_tail_pointer(skb);
951 n = hbucket(t, cb->args[2]);
952 for (i = 0; i < n->pos; i++) {
953 data = ahash_tdata(n, i);
954 pr_debug("list %p %u\n", n, i);
955 if (type_pf_data_expired(data))
956 continue;
957 pr_debug("do list %p %u\n", n, i);
958 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
959 if (!nested) {
960 if (cb->args[2] == first) {
961 nla_nest_cancel(skb, atd);
962 return -EMSGSIZE;
963 } else
964 goto nla_put_failure;
965 }
966 if (type_pf_data_tlist(skb, data))
967 goto nla_put_failure;
968 ipset_nest_end(skb, nested);
969 }
970 }
971 ipset_nest_end(skb, atd);
972 /* Set listing finished */
973 cb->args[2] = 0;
974
975 return 0;
976
977nla_put_failure:
978 nlmsg_trim(skb, incomplete);
979 ipset_nest_end(skb, atd);
980 if (unlikely(first == cb->args[2])) {
981 pr_warning("Can't list set %s: one bucket does not fit into "
982 "a message. Please report it!\n", set->name);
983 cb->args[2] = 0;
984 return -EMSGSIZE;
985 }
986 return 0;
987}
988
989static const struct ip_set_type_variant type_pf_tvariant = {
990 .kadt = type_pf_kadt,
991 .uadt = type_pf_uadt,
992 .adt = {
993 [IPSET_ADD] = type_pf_tadd,
994 [IPSET_DEL] = type_pf_tdel,
995 [IPSET_TEST] = type_pf_ttest,
996 },
997 .destroy = type_pf_destroy,
998 .flush = type_pf_flush,
999 .head = type_pf_head,
1000 .list = type_pf_tlist,
1001 .resize = type_pf_tresize,
1002 .same_set = type_pf_same_set,
1003};
1004
1005static void
1006type_pf_gc(unsigned long ul_set)
1007{
1008 struct ip_set *set = (struct ip_set *) ul_set;
1009 struct ip_set_hash *h = set->data;
1010
1011 pr_debug("called\n");
1012 write_lock_bh(&set->lock);
1013 type_pf_expire(h);
1014 write_unlock_bh(&set->lock);
1015
1016 h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1017 add_timer(&h->gc);
1018}
1019
1020static void
1021type_pf_gc_init(struct ip_set *set)
1022{
1023 struct ip_set_hash *h = set->data;
1024
1025 init_timer(&h->gc);
1026 h->gc.data = (unsigned long) set;
1027 h->gc.function = type_pf_gc;
1028 h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1029 add_timer(&h->gc);
1030 pr_debug("gc initialized, run in every %u\n",
1031 IPSET_GC_PERIOD(h->timeout));
1032}
1033
1034#undef type_pf_data_equal
1035#undef type_pf_data_isnull
1036#undef type_pf_data_copy
1037#undef type_pf_data_zero_out
1038#undef type_pf_data_list
1039#undef type_pf_data_tlist
1040
1041#undef type_pf_elem
1042#undef type_pf_telem
1043#undef type_pf_data_timeout
1044#undef type_pf_data_expired
1045#undef type_pf_data_netmask
1046#undef type_pf_data_timeout_set
1047
1048#undef type_pf_elem_add
1049#undef type_pf_add
1050#undef type_pf_del
1051#undef type_pf_test_cidrs
1052#undef type_pf_test
1053
1054#undef type_pf_elem_tadd
1055#undef type_pf_expire
1056#undef type_pf_tadd
1057#undef type_pf_tdel
1058#undef type_pf_ttest_cidrs
1059#undef type_pf_ttest
1060
1061#undef type_pf_resize
1062#undef type_pf_tresize
1063#undef type_pf_flush
1064#undef type_pf_destroy
1065#undef type_pf_head
1066#undef type_pf_list
1067#undef type_pf_tlist
1068#undef type_pf_same_set
1069#undef type_pf_kadt
1070#undef type_pf_uadt
1071#undef type_pf_gc
1072#undef type_pf_gc_init
1073#undef type_pf_variant
1074#undef type_pf_tvariant
diff --git a/include/linux/netfilter/ipset/ip_set_hash.h b/include/linux/netfilter/ipset/ip_set_hash.h
new file mode 100644
index 00000000000..b86f15c0452
--- /dev/null
+++ b/include/linux/netfilter/ipset/ip_set_hash.h
@@ -0,0 +1,26 @@
1#ifndef __IP_SET_HASH_H
2#define __IP_SET_HASH_H
3
4/* Hash type specific error codes */
5enum {
6 /* Hash is full */
7 IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC,
8 /* Null-valued element */
9 IPSET_ERR_HASH_ELEM,
10 /* Invalid protocol */
11 IPSET_ERR_INVALID_PROTO,
12 /* Protocol missing but must be specified */
13 IPSET_ERR_MISSING_PROTO,
14};
15
16#ifdef __KERNEL__
17
18#define IPSET_DEFAULT_HASHSIZE 1024
19#define IPSET_MIMINAL_HASHSIZE 64
20#define IPSET_DEFAULT_MAXELEM 65536
21#define IPSET_DEFAULT_PROBES 4
22#define IPSET_DEFAULT_RESIZE 100
23
24#endif /* __KERNEL__ */
25
26#endif /* __IP_SET_HASH_H */