aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/netfilter
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2011-02-01 09:28:35 -0500
committerPatrick McHardy <kaber@trash.net>2011-02-01 09:28:35 -0500
commita7b4f989a629493bb4ec4a354def784d440b32c4 (patch)
tree47f5f6a19cd8fb4cbd44a064edbf03a5b102ea1b /include/linux/netfilter
parentf703651ef870bd6b94ddc98ae07488b7d3fd9335 (diff)
netfilter: ipset: IP set core support
The patch adds the IP set core support to the kernel. The IP set core implements a netlink (nfnetlink) based protocol by which one can create, destroy, flush, rename, swap, list, save, restore sets, and add, delete, test elements from userspace. For simplicity (and backward compatibilty and for not to force ip(6)tables to be linked with a netlink library) reasons a small getsockopt-based protocol is also kept in order to communicate with the ip(6)tables match and target. The netlink protocol passes all u16, etc values in network order with NLA_F_NET_BYTEORDER flag. The protocol enforces the proper use of the NLA_F_NESTED and NLA_F_NET_BYTEORDER flags. For other kernel subsystems (netfilter match and target) the API contains the functions to add, delete and test elements in sets and the required calls to get/put refereces to the sets before those operations can be performed. The set types (which are implemented in independent modules) are stored in a simple RCU protected list. A set type may have variants: for example without timeout or with timeout support, for IPv4 or for IPv6. The sets (i.e. the pointers to the sets) are stored in an array. The sets are identified by their index in the array, which makes possible easy and fast swapping of sets. The array is protected indirectly by the nfnl mutex from nfnetlink. The content of the sets are protected by the rwlock of the set. There are functional differences between the add/del/test functions for the kernel and userspace: - kernel add/del/test: works on the current packet (i.e. one element) - kernel test: may trigger an "add" operation in order to fill out unspecified parts of the element from the packet (like MAC address) - userspace add/del: works on the netlink message and thus possibly on multiple elements from the IPSET_ATTR_ADT container attribute. - userspace add: may trigger resizing of a set Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'include/linux/netfilter')
-rw-r--r--include/linux/netfilter/ipset/ip_set.h452
-rw-r--r--include/linux/netfilter/ipset/ip_set_getport.h11
-rw-r--r--include/linux/netfilter/ipset/pfxlen.h35
3 files changed, 498 insertions, 0 deletions
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
new file mode 100644
index 00000000000..ec333d83f3b
--- /dev/null
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -0,0 +1,452 @@
1#ifndef _IP_SET_H
2#define _IP_SET_H
3
4/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
5 * Patrick Schaaf <bof@bof.de>
6 * Martin Josefsson <gandalf@wlug.westbo.se>
7 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14/* The protocol version */
15#define IPSET_PROTOCOL 6
16
17/* The max length of strings including NUL: set and type identifiers */
18#define IPSET_MAXNAMELEN 32
19
20/* Message types and commands */
21enum ipset_cmd {
22 IPSET_CMD_NONE,
23 IPSET_CMD_PROTOCOL, /* 1: Return protocol version */
24 IPSET_CMD_CREATE, /* 2: Create a new (empty) set */
25 IPSET_CMD_DESTROY, /* 3: Destroy a (empty) set */
26 IPSET_CMD_FLUSH, /* 4: Remove all elements from a set */
27 IPSET_CMD_RENAME, /* 5: Rename a set */
28 IPSET_CMD_SWAP, /* 6: Swap two sets */
29 IPSET_CMD_LIST, /* 7: List sets */
30 IPSET_CMD_SAVE, /* 8: Save sets */
31 IPSET_CMD_ADD, /* 9: Add an element to a set */
32 IPSET_CMD_DEL, /* 10: Delete an element from a set */
33 IPSET_CMD_TEST, /* 11: Test an element in a set */
34 IPSET_CMD_HEADER, /* 12: Get set header data only */
35 IPSET_CMD_TYPE, /* 13: Get set type */
36 IPSET_MSG_MAX, /* Netlink message commands */
37
38 /* Commands in userspace: */
39 IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */
40 IPSET_CMD_HELP, /* 15: Get help */
41 IPSET_CMD_VERSION, /* 16: Get program version */
42 IPSET_CMD_QUIT, /* 17: Quit from interactive mode */
43
44 IPSET_CMD_MAX,
45
46 IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */
47};
48
49/* Attributes at command level */
50enum {
51 IPSET_ATTR_UNSPEC,
52 IPSET_ATTR_PROTOCOL, /* 1: Protocol version */
53 IPSET_ATTR_SETNAME, /* 2: Name of the set */
54 IPSET_ATTR_TYPENAME, /* 3: Typename */
55 IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* Setname at rename/swap */
56 IPSET_ATTR_REVISION, /* 4: Settype revision */
57 IPSET_ATTR_FAMILY, /* 5: Settype family */
58 IPSET_ATTR_FLAGS, /* 6: Flags at command level */
59 IPSET_ATTR_DATA, /* 7: Nested attributes */
60 IPSET_ATTR_ADT, /* 8: Multiple data containers */
61 IPSET_ATTR_LINENO, /* 9: Restore lineno */
62 IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
63 IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
64 __IPSET_ATTR_CMD_MAX,
65};
66#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1)
67
68/* CADT specific attributes */
69enum {
70 IPSET_ATTR_IP = IPSET_ATTR_UNSPEC + 1,
71 IPSET_ATTR_IP_FROM = IPSET_ATTR_IP,
72 IPSET_ATTR_IP_TO, /* 2 */
73 IPSET_ATTR_CIDR, /* 3 */
74 IPSET_ATTR_PORT, /* 4 */
75 IPSET_ATTR_PORT_FROM = IPSET_ATTR_PORT,
76 IPSET_ATTR_PORT_TO, /* 5 */
77 IPSET_ATTR_TIMEOUT, /* 6 */
78 IPSET_ATTR_PROTO, /* 7 */
79 IPSET_ATTR_CADT_FLAGS, /* 8 */
80 IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */
81 /* Reserve empty slots */
82 IPSET_ATTR_CADT_MAX = 16,
83 /* Create-only specific attributes */
84 IPSET_ATTR_GC,
85 IPSET_ATTR_HASHSIZE,
86 IPSET_ATTR_MAXELEM,
87 IPSET_ATTR_NETMASK,
88 IPSET_ATTR_PROBES,
89 IPSET_ATTR_RESIZE,
90 IPSET_ATTR_SIZE,
91 /* Kernel-only */
92 IPSET_ATTR_ELEMENTS,
93 IPSET_ATTR_REFERENCES,
94 IPSET_ATTR_MEMSIZE,
95
96 __IPSET_ATTR_CREATE_MAX,
97};
98#define IPSET_ATTR_CREATE_MAX (__IPSET_ATTR_CREATE_MAX - 1)
99
100/* ADT specific attributes */
101enum {
102 IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + 1,
103 IPSET_ATTR_NAME,
104 IPSET_ATTR_NAMEREF,
105 IPSET_ATTR_IP2,
106 IPSET_ATTR_CIDR2,
107 __IPSET_ATTR_ADT_MAX,
108};
109#define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1)
110
111/* IP specific attributes */
112enum {
113 IPSET_ATTR_IPADDR_IPV4 = IPSET_ATTR_UNSPEC + 1,
114 IPSET_ATTR_IPADDR_IPV6,
115 __IPSET_ATTR_IPADDR_MAX,
116};
117#define IPSET_ATTR_IPADDR_MAX (__IPSET_ATTR_IPADDR_MAX - 1)
118
119/* Error codes */
120enum ipset_errno {
121 IPSET_ERR_PRIVATE = 4096,
122 IPSET_ERR_PROTOCOL,
123 IPSET_ERR_FIND_TYPE,
124 IPSET_ERR_MAX_SETS,
125 IPSET_ERR_BUSY,
126 IPSET_ERR_EXIST_SETNAME2,
127 IPSET_ERR_TYPE_MISMATCH,
128 IPSET_ERR_EXIST,
129 IPSET_ERR_INVALID_CIDR,
130 IPSET_ERR_INVALID_NETMASK,
131 IPSET_ERR_INVALID_FAMILY,
132 IPSET_ERR_TIMEOUT,
133 IPSET_ERR_REFERENCED,
134 IPSET_ERR_IPADDR_IPV4,
135 IPSET_ERR_IPADDR_IPV6,
136
137 /* Type specific error codes */
138 IPSET_ERR_TYPE_SPECIFIC = 4352,
139};
140
141/* Flags at command level */
142enum ipset_cmd_flags {
143 IPSET_FLAG_BIT_EXIST = 0,
144 IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST),
145};
146
147/* Flags at CADT attribute level */
148enum ipset_cadt_flags {
149 IPSET_FLAG_BIT_BEFORE = 0,
150 IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE),
151};
152
153/* Commands with settype-specific attributes */
154enum ipset_adt {
155 IPSET_ADD,
156 IPSET_DEL,
157 IPSET_TEST,
158 IPSET_ADT_MAX,
159 IPSET_CREATE = IPSET_ADT_MAX,
160 IPSET_CADT_MAX,
161};
162
163#ifdef __KERNEL__
164#include <linux/ip.h>
165#include <linux/ipv6.h>
166#include <linux/netlink.h>
167#include <linux/netfilter.h>
168#include <linux/vmalloc.h>
169#include <net/netlink.h>
170
171/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t
172 * and IPSET_INVALID_ID if you want to increase the max number of sets.
173 */
174typedef u16 ip_set_id_t;
175
176#define IPSET_INVALID_ID 65535
177
178enum ip_set_dim {
179 IPSET_DIM_ZERO = 0,
180 IPSET_DIM_ONE,
181 IPSET_DIM_TWO,
182 IPSET_DIM_THREE,
183 /* Max dimension in elements.
184 * If changed, new revision of iptables match/target is required.
185 */
186 IPSET_DIM_MAX = 6,
187};
188
189/* Option flags for kernel operations */
190enum ip_set_kopt {
191 IPSET_INV_MATCH = (1 << IPSET_DIM_ZERO),
192 IPSET_DIM_ONE_SRC = (1 << IPSET_DIM_ONE),
193 IPSET_DIM_TWO_SRC = (1 << IPSET_DIM_TWO),
194 IPSET_DIM_THREE_SRC = (1 << IPSET_DIM_THREE),
195};
196
197/* Set features */
198enum ip_set_feature {
199 IPSET_TYPE_IP_FLAG = 0,
200 IPSET_TYPE_IP = (1 << IPSET_TYPE_IP_FLAG),
201 IPSET_TYPE_PORT_FLAG = 1,
202 IPSET_TYPE_PORT = (1 << IPSET_TYPE_PORT_FLAG),
203 IPSET_TYPE_MAC_FLAG = 2,
204 IPSET_TYPE_MAC = (1 << IPSET_TYPE_MAC_FLAG),
205 IPSET_TYPE_IP2_FLAG = 3,
206 IPSET_TYPE_IP2 = (1 << IPSET_TYPE_IP2_FLAG),
207 IPSET_TYPE_NAME_FLAG = 4,
208 IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
209 /* Strictly speaking not a feature, but a flag for dumping:
210 * this settype must be dumped last */
211 IPSET_DUMP_LAST_FLAG = 7,
212 IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG),
213};
214
215struct ip_set;
216
217typedef int (*ipset_adtfn)(struct ip_set *set, void *value, u32 timeout);
218
219/* Set type, variant-specific part */
220struct ip_set_type_variant {
221 /* Kernelspace: test/add/del entries
222 * returns negative error code,
223 * zero for no match/success to add/delete
224 * positive for matching element */
225 int (*kadt)(struct ip_set *set, const struct sk_buff * skb,
226 enum ipset_adt adt, u8 pf, u8 dim, u8 flags);
227
228 /* Userspace: test/add/del entries
229 * returns negative error code,
230 * zero for no match/success to add/delete
231 * positive for matching element */
232 int (*uadt)(struct ip_set *set, struct nlattr *tb[],
233 enum ipset_adt adt, u32 *lineno, u32 flags);
234
235 /* Low level add/del/test functions */
236 ipset_adtfn adt[IPSET_ADT_MAX];
237
238 /* When adding entries and set is full, try to resize the set */
239 int (*resize)(struct ip_set *set, bool retried);
240 /* Destroy the set */
241 void (*destroy)(struct ip_set *set);
242 /* Flush the elements */
243 void (*flush)(struct ip_set *set);
244 /* Expire entries before listing */
245 void (*expire)(struct ip_set *set);
246 /* List set header data */
247 int (*head)(struct ip_set *set, struct sk_buff *skb);
248 /* List elements */
249 int (*list)(const struct ip_set *set, struct sk_buff *skb,
250 struct netlink_callback *cb);
251
252 /* Return true if "b" set is the same as "a"
253 * according to the create set parameters */
254 bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
255};
256
257/* The core set type structure */
258struct ip_set_type {
259 struct list_head list;
260
261 /* Typename */
262 char name[IPSET_MAXNAMELEN];
263 /* Protocol version */
264 u8 protocol;
265 /* Set features to control swapping */
266 u8 features;
267 /* Set type dimension */
268 u8 dimension;
269 /* Supported family: may be AF_UNSPEC for both AF_INET/AF_INET6 */
270 u8 family;
271 /* Type revision */
272 u8 revision;
273
274 /* Create set */
275 int (*create)(struct ip_set *set, struct nlattr *tb[], u32 flags);
276
277 /* Attribute policies */
278 const struct nla_policy create_policy[IPSET_ATTR_CREATE_MAX + 1];
279 const struct nla_policy adt_policy[IPSET_ATTR_ADT_MAX + 1];
280
281 /* Set this to THIS_MODULE if you are a module, otherwise NULL */
282 struct module *me;
283};
284
285/* register and unregister set type */
286extern int ip_set_type_register(struct ip_set_type *set_type);
287extern void ip_set_type_unregister(struct ip_set_type *set_type);
288
289/* A generic IP set */
290struct ip_set {
291 /* The name of the set */
292 char name[IPSET_MAXNAMELEN];
293 /* Lock protecting the set data */
294 rwlock_t lock;
295 /* References to the set */
296 atomic_t ref;
297 /* The core set type */
298 struct ip_set_type *type;
299 /* The type variant doing the real job */
300 const struct ip_set_type_variant *variant;
301 /* The actual INET family of the set */
302 u8 family;
303 /* The type specific data */
304 void *data;
305};
306
307/* register and unregister set references */
308extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set);
309extern void ip_set_put_byindex(ip_set_id_t index);
310extern const char * ip_set_name_byindex(ip_set_id_t index);
311extern ip_set_id_t ip_set_nfnl_get(const char *name);
312extern ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index);
313extern void ip_set_nfnl_put(ip_set_id_t index);
314
315/* API for iptables set match, and SET target */
316extern int ip_set_add(ip_set_id_t id, const struct sk_buff *skb,
317 u8 family, u8 dim, u8 flags);
318extern int ip_set_del(ip_set_id_t id, const struct sk_buff *skb,
319 u8 family, u8 dim, u8 flags);
320extern int ip_set_test(ip_set_id_t id, const struct sk_buff *skb,
321 u8 family, u8 dim, u8 flags);
322
323/* Utility functions */
324extern void * ip_set_alloc(size_t size);
325extern void ip_set_free(void *members);
326extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr);
327extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
328
329static inline int
330ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr)
331{
332 __be32 ip;
333 int ret = ip_set_get_ipaddr4(nla, &ip);
334
335 if (ret)
336 return ret;
337 *ipaddr = ntohl(ip);
338 return 0;
339}
340
341/* Ignore IPSET_ERR_EXIST errors if asked to do so? */
342static inline bool
343ip_set_eexist(int ret, u32 flags)
344{
345 return ret == -IPSET_ERR_EXIST && (flags & IPSET_FLAG_EXIST);
346}
347
348/* Check the NLA_F_NET_BYTEORDER flag */
349static inline bool
350ip_set_attr_netorder(struct nlattr *tb[], int type)
351{
352 return tb[type] && (tb[type]->nla_type & NLA_F_NET_BYTEORDER);
353}
354
355static inline bool
356ip_set_optattr_netorder(struct nlattr *tb[], int type)
357{
358 return !tb[type] || (tb[type]->nla_type & NLA_F_NET_BYTEORDER);
359}
360
361/* Useful converters */
362static inline u32
363ip_set_get_h32(const struct nlattr *attr)
364{
365 return ntohl(nla_get_be32(attr));
366}
367
368static inline u16
369ip_set_get_h16(const struct nlattr *attr)
370{
371 return ntohs(nla_get_be16(attr));
372}
373
374#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED)
375#define ipset_nest_end(skb, start) nla_nest_end(skb, start)
376
377#define NLA_PUT_IPADDR4(skb, type, ipaddr) \
378do { \
379 struct nlattr *__nested = ipset_nest_start(skb, type); \
380 \
381 if (!__nested) \
382 goto nla_put_failure; \
383 NLA_PUT_NET32(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); \
384 ipset_nest_end(skb, __nested); \
385} while (0)
386
387#define NLA_PUT_IPADDR6(skb, type, ipaddrptr) \
388do { \
389 struct nlattr *__nested = ipset_nest_start(skb, type); \
390 \
391 if (!__nested) \
392 goto nla_put_failure; \
393 NLA_PUT(skb, IPSET_ATTR_IPADDR_IPV6, \
394 sizeof(struct in6_addr), ipaddrptr); \
395 ipset_nest_end(skb, __nested); \
396} while (0)
397
398/* Get address from skbuff */
399static inline __be32
400ip4addr(const struct sk_buff *skb, bool src)
401{
402 return src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr;
403}
404
405static inline void
406ip4addrptr(const struct sk_buff *skb, bool src, __be32 *addr)
407{
408 *addr = src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr;
409}
410
411static inline void
412ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
413{
414 memcpy(addr, src ? &ipv6_hdr(skb)->saddr : &ipv6_hdr(skb)->daddr,
415 sizeof(*addr));
416}
417
418/* Calculate the bytes required to store the inclusive range of a-b */
419static inline int
420bitmap_bytes(u32 a, u32 b)
421{
422 return 4 * ((((b - a + 8) / 8) + 3) / 4);
423}
424
425/* Interface to iptables/ip6tables */
426
427#define SO_IP_SET 83
428
429union ip_set_name_index {
430 char name[IPSET_MAXNAMELEN];
431 ip_set_id_t index;
432};
433
434#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */
435struct ip_set_req_get_set {
436 unsigned op;
437 unsigned version;
438 union ip_set_name_index set;
439};
440
441#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */
442/* Uses ip_set_req_get_set */
443
444#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */
445struct ip_set_req_version {
446 unsigned op;
447 unsigned version;
448};
449
450#endif /* __KERNEL__ */
451
452#endif /*_IP_SET_H */
diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h
new file mode 100644
index 00000000000..694c433298b
--- /dev/null
+++ b/include/linux/netfilter/ipset/ip_set_getport.h
@@ -0,0 +1,11 @@
1#ifndef _IP_SET_GETPORT_H
2#define _IP_SET_GETPORT_H
3
4extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
5 __be16 *port, u8 *proto);
6extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
7 __be16 *port, u8 *proto);
8extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
9 __be16 *port);
10
11#endif /*_IP_SET_GETPORT_H*/
diff --git a/include/linux/netfilter/ipset/pfxlen.h b/include/linux/netfilter/ipset/pfxlen.h
new file mode 100644
index 00000000000..0e1fb50da56
--- /dev/null
+++ b/include/linux/netfilter/ipset/pfxlen.h
@@ -0,0 +1,35 @@
1#ifndef _PFXLEN_H
2#define _PFXLEN_H
3
4#include <asm/byteorder.h>
5#include <linux/netfilter.h>
6
7/* Prefixlen maps, by Jan Engelhardt */
8extern const union nf_inet_addr ip_set_netmask_map[];
9extern const union nf_inet_addr ip_set_hostmask_map[];
10
11static inline __be32
12ip_set_netmask(u8 pfxlen)
13{
14 return ip_set_netmask_map[pfxlen].ip;
15}
16
17static inline const __be32 *
18ip_set_netmask6(u8 pfxlen)
19{
20 return &ip_set_netmask_map[pfxlen].ip6[0];
21}
22
23static inline u32
24ip_set_hostmask(u8 pfxlen)
25{
26 return (__force u32) ip_set_hostmask_map[pfxlen].ip;
27}
28
29static inline const __be32 *
30ip_set_hostmask6(u8 pfxlen)
31{
32 return &ip_set_hostmask_map[pfxlen].ip6[0];
33}
34
35#endif /*_PFXLEN_H */