aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2013-10-11 06:06:22 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2013-10-14 11:16:07 -0400
commit20a69341f2d00cd042e81c82289fba8a13c05a25 (patch)
tree95340d318608a95c53d023962808edbcbc8e2291 /include
parent96518518cc417bb0a8c80b9fb736202e28acdf96 (diff)
netfilter: nf_tables: add netlink set API
This patch adds the new netlink API for maintaining nf_tables sets independently of the ruleset. The API supports the following operations: - creation of sets - deletion of sets - querying of specific sets - dumping of all sets - addition of set elements - removal of set elements - dumping of all set elements Sets are identified by name, each table defines an individual namespace. The name of a set may be allocated automatically, this is mostly useful in combination with the NFT_SET_ANONYMOUS flag, which destroys a set automatically once the last reference has been released. Sets can be marked constant, meaning they're not allowed to change while linked to a rule. This allows to perform lockless operation for set types that would otherwise require locking. Additionally, if the implementation supports it, sets can (as before) be used as maps, associating a data value with each key (or range), by specifying the NFT_SET_MAP flag and can be used for interval queries by specifying the NFT_SET_INTERVAL flag. Set elements are added and removed incrementally. All element operations support batching, reducing netlink message and set lookup overhead. The old "set" and "hash" expressions are replaced by a generic "lookup" expression, which binds to the specified set. Userspace is not aware of the actual set implementation used by the kernel anymore, all configuration options are generic. Currently the implementation selection logic is largely missing and the kernel will simply use the first registered implementation supporting the requested operation. Eventually, the plan is to have userspace supply a description of the data characteristics and select the implementation based on expected performance and memory use. This patch includes the new 'lookup' expression to look up for element matching in the set. This patch includes kernel-doc descriptions for this set API and it also includes the following fixes. From Patrick McHardy: * netfilter: nf_tables: fix set element data type in dumps * netfilter: nf_tables: fix indentation of struct nft_set_elem comments * netfilter: nf_tables: fix oops in nft_validate_data_load() * netfilter: nf_tables: fix oops while listing sets of built-in tables * netfilter: nf_tables: destroy anonymous sets immediately if binding fails * netfilter: nf_tables: propagate context to set iter callback * netfilter: nf_tables: add loop detection From Pablo Neira Ayuso: * netfilter: nf_tables: allow to dump all existing sets * netfilter: nf_tables: fix wrong type for flags variable in newelem Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'include')
-rw-r--r--include/net/netfilter/nf_tables.h149
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h191
2 files changed, 281 insertions, 59 deletions
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d26dfa345f49..677dd79380ed 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -6,6 +6,8 @@
6#include <linux/netfilter/nf_tables.h> 6#include <linux/netfilter/nf_tables.h>
7#include <net/netlink.h> 7#include <net/netlink.h>
8 8
9#define NFT_JUMP_STACK_SIZE 16
10
9struct nft_pktinfo { 11struct nft_pktinfo {
10 struct sk_buff *skb; 12 struct sk_buff *skb;
11 const struct net_device *in; 13 const struct net_device *in;
@@ -48,23 +50,22 @@ static inline void nft_data_debug(const struct nft_data *data)
48} 50}
49 51
50/** 52/**
51 * struct nft_ctx - nf_tables rule context 53 * struct nft_ctx - nf_tables rule/set context
52 * 54 *
55 * @skb: netlink skb
56 * @nlh: netlink message header
53 * @afi: address family info 57 * @afi: address family info
54 * @table: the table the chain is contained in 58 * @table: the table the chain is contained in
55 * @chain: the chain the rule is contained in 59 * @chain: the chain the rule is contained in
56 */ 60 */
57struct nft_ctx { 61struct nft_ctx {
62 const struct sk_buff *skb;
63 const struct nlmsghdr *nlh;
58 const struct nft_af_info *afi; 64 const struct nft_af_info *afi;
59 const struct nft_table *table; 65 const struct nft_table *table;
60 const struct nft_chain *chain; 66 const struct nft_chain *chain;
61}; 67};
62 68
63enum nft_data_types {
64 NFT_DATA_VALUE,
65 NFT_DATA_VERDICT,
66};
67
68struct nft_data_desc { 69struct nft_data_desc {
69 enum nft_data_types type; 70 enum nft_data_types type;
70 unsigned int len; 71 unsigned int len;
@@ -83,6 +84,11 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)
83 return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; 84 return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
84} 85}
85 86
87static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
88{
89 return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1;
90}
91
86extern int nft_validate_input_register(enum nft_registers reg); 92extern int nft_validate_input_register(enum nft_registers reg);
87extern int nft_validate_output_register(enum nft_registers reg); 93extern int nft_validate_output_register(enum nft_registers reg);
88extern int nft_validate_data_load(const struct nft_ctx *ctx, 94extern int nft_validate_data_load(const struct nft_ctx *ctx,
@@ -91,6 +97,132 @@ extern int nft_validate_data_load(const struct nft_ctx *ctx,
91 enum nft_data_types type); 97 enum nft_data_types type);
92 98
93/** 99/**
100 * struct nft_set_elem - generic representation of set elements
101 *
102 * @cookie: implementation specific element cookie
103 * @key: element key
104 * @data: element data (maps only)
105 * @flags: element flags (end of interval)
106 *
107 * The cookie can be used to store a handle to the element for subsequent
108 * removal.
109 */
110struct nft_set_elem {
111 void *cookie;
112 struct nft_data key;
113 struct nft_data data;
114 u32 flags;
115};
116
117struct nft_set;
118struct nft_set_iter {
119 unsigned int count;
120 unsigned int skip;
121 int err;
122 int (*fn)(const struct nft_ctx *ctx,
123 const struct nft_set *set,
124 const struct nft_set_iter *iter,
125 const struct nft_set_elem *elem);
126};
127
128/**
129 * struct nft_set_ops - nf_tables set operations
130 *
131 * @lookup: look up an element within the set
132 * @insert: insert new element into set
133 * @remove: remove element from set
134 * @walk: iterate over all set elemeennts
135 * @privsize: function to return size of set private data
136 * @init: initialize private data of new set instance
137 * @destroy: destroy private data of set instance
138 * @list: nf_tables_set_ops list node
139 * @owner: module reference
140 * @features: features supported by the implementation
141 */
142struct nft_set_ops {
143 bool (*lookup)(const struct nft_set *set,
144 const struct nft_data *key,
145 struct nft_data *data);
146 int (*get)(const struct nft_set *set,
147 struct nft_set_elem *elem);
148 int (*insert)(const struct nft_set *set,
149 const struct nft_set_elem *elem);
150 void (*remove)(const struct nft_set *set,
151 const struct nft_set_elem *elem);
152 void (*walk)(const struct nft_ctx *ctx,
153 const struct nft_set *set,
154 struct nft_set_iter *iter);
155
156 unsigned int (*privsize)(const struct nlattr * const nla[]);
157 int (*init)(const struct nft_set *set,
158 const struct nlattr * const nla[]);
159 void (*destroy)(const struct nft_set *set);
160
161 struct list_head list;
162 struct module *owner;
163 u32 features;
164};
165
166extern int nft_register_set(struct nft_set_ops *ops);
167extern void nft_unregister_set(struct nft_set_ops *ops);
168
169/**
170 * struct nft_set - nf_tables set instance
171 *
172 * @list: table set list node
173 * @bindings: list of set bindings
174 * @name: name of the set
175 * @ktype: key type (numeric type defined by userspace, not used in the kernel)
176 * @dtype: data type (verdict or numeric type defined by userspace)
177 * @ops: set ops
178 * @flags: set flags
179 * @klen: key length
180 * @dlen: data length
181 * @data: private set data
182 */
183struct nft_set {
184 struct list_head list;
185 struct list_head bindings;
186 char name[IFNAMSIZ];
187 u32 ktype;
188 u32 dtype;
189 /* runtime data below here */
190 const struct nft_set_ops *ops ____cacheline_aligned;
191 u16 flags;
192 u8 klen;
193 u8 dlen;
194 unsigned char data[]
195 __attribute__((aligned(__alignof__(u64))));
196};
197
198static inline void *nft_set_priv(const struct nft_set *set)
199{
200 return (void *)set->data;
201}
202
203extern struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
204 const struct nlattr *nla);
205
206/**
207 * struct nft_set_binding - nf_tables set binding
208 *
209 * @list: set bindings list node
210 * @chain: chain containing the rule bound to the set
211 *
212 * A set binding contains all information necessary for validation
213 * of new elements added to a bound set.
214 */
215struct nft_set_binding {
216 struct list_head list;
217 const struct nft_chain *chain;
218};
219
220extern int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
221 struct nft_set_binding *binding);
222extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
223 struct nft_set_binding *binding);
224
225/**
94 * struct nft_expr_ops - nf_tables expression operations 226 * struct nft_expr_ops - nf_tables expression operations
95 * 227 *
96 * @eval: Expression evaluation function 228 * @eval: Expression evaluation function
@@ -115,7 +247,7 @@ struct nft_expr_ops {
115 void (*destroy)(const struct nft_expr *expr); 247 void (*destroy)(const struct nft_expr *expr);
116 int (*dump)(struct sk_buff *skb, 248 int (*dump)(struct sk_buff *skb,
117 const struct nft_expr *expr); 249 const struct nft_expr *expr);
118 250 const struct nft_data * (*get_verdict)(const struct nft_expr *expr);
119 struct list_head list; 251 struct list_head list;
120 const char *name; 252 const char *name;
121 struct module *owner; 253 struct module *owner;
@@ -298,4 +430,7 @@ extern void nft_unregister_expr(struct nft_expr_ops *);
298#define MODULE_ALIAS_NFT_EXPR(name) \ 430#define MODULE_ALIAS_NFT_EXPR(name) \
299 MODULE_ALIAS("nft-expr-" name) 431 MODULE_ALIAS("nft-expr-" name)
300 432
433#define MODULE_ALIAS_NFT_SET() \
434 MODULE_ALIAS("nft-set")
435
301#endif /* _NET_NF_TABLES_H */ 436#endif /* _NET_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index ec6d84a8ed1e..9e924014efe3 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -44,6 +44,12 @@ enum nft_verdicts {
44 * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) 44 * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes)
45 * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) 45 * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes)
46 * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) 46 * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes)
47 * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes)
48 * @NFT_MSG_GETSET: get a set (enum nft_set_attributes)
49 * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes)
50 * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes)
51 * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes)
52 * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
47 */ 53 */
48enum nf_tables_msg_types { 54enum nf_tables_msg_types {
49 NFT_MSG_NEWTABLE, 55 NFT_MSG_NEWTABLE,
@@ -55,9 +61,20 @@ enum nf_tables_msg_types {
55 NFT_MSG_NEWRULE, 61 NFT_MSG_NEWRULE,
56 NFT_MSG_GETRULE, 62 NFT_MSG_GETRULE,
57 NFT_MSG_DELRULE, 63 NFT_MSG_DELRULE,
64 NFT_MSG_NEWSET,
65 NFT_MSG_GETSET,
66 NFT_MSG_DELSET,
67 NFT_MSG_NEWSETELEM,
68 NFT_MSG_GETSETELEM,
69 NFT_MSG_DELSETELEM,
58 NFT_MSG_MAX, 70 NFT_MSG_MAX,
59}; 71};
60 72
73/**
74 * enum nft_list_attributes - nf_tables generic list netlink attributes
75 *
76 * @NFTA_LIST_ELEM: list element (NLA_NESTED)
77 */
61enum nft_list_attributes { 78enum nft_list_attributes {
62 NFTA_LIST_UNPEC, 79 NFTA_LIST_UNPEC,
63 NFTA_LIST_ELEM, 80 NFTA_LIST_ELEM,
@@ -127,6 +144,113 @@ enum nft_rule_attributes {
127}; 144};
128#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) 145#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1)
129 146
147/**
148 * enum nft_set_flags - nf_tables set flags
149 *
150 * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink
151 * @NFT_SET_CONSTANT: set contents may not change while bound
152 * @NFT_SET_INTERVAL: set contains intervals
153 * @NFT_SET_MAP: set is used as a dictionary
154 */
155enum nft_set_flags {
156 NFT_SET_ANONYMOUS = 0x1,
157 NFT_SET_CONSTANT = 0x2,
158 NFT_SET_INTERVAL = 0x4,
159 NFT_SET_MAP = 0x8,
160};
161
162/**
163 * enum nft_set_attributes - nf_tables set netlink attributes
164 *
165 * @NFTA_SET_TABLE: table name (NLA_STRING)
166 * @NFTA_SET_NAME: set name (NLA_STRING)
167 * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32)
168 * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32)
169 * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
170 * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
171 * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
172 */
173enum nft_set_attributes {
174 NFTA_SET_UNSPEC,
175 NFTA_SET_TABLE,
176 NFTA_SET_NAME,
177 NFTA_SET_FLAGS,
178 NFTA_SET_KEY_TYPE,
179 NFTA_SET_KEY_LEN,
180 NFTA_SET_DATA_TYPE,
181 NFTA_SET_DATA_LEN,
182 __NFTA_SET_MAX
183};
184#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
185
186/**
187 * enum nft_set_elem_flags - nf_tables set element flags
188 *
189 * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval
190 */
191enum nft_set_elem_flags {
192 NFT_SET_ELEM_INTERVAL_END = 0x1,
193};
194
195/**
196 * enum nft_set_elem_attributes - nf_tables set element netlink attributes
197 *
198 * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
199 * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
200 * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
201 */
202enum nft_set_elem_attributes {
203 NFTA_SET_ELEM_UNSPEC,
204 NFTA_SET_ELEM_KEY,
205 NFTA_SET_ELEM_DATA,
206 NFTA_SET_ELEM_FLAGS,
207 __NFTA_SET_ELEM_MAX
208};
209#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1)
210
211/**
212 * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes
213 *
214 * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING)
215 * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING)
216 * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes)
217 */
218enum nft_set_elem_list_attributes {
219 NFTA_SET_ELEM_LIST_UNSPEC,
220 NFTA_SET_ELEM_LIST_TABLE,
221 NFTA_SET_ELEM_LIST_SET,
222 NFTA_SET_ELEM_LIST_ELEMENTS,
223 __NFTA_SET_ELEM_LIST_MAX
224};
225#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1)
226
227/**
228 * enum nft_data_types - nf_tables data types
229 *
230 * @NFT_DATA_VALUE: generic data
231 * @NFT_DATA_VERDICT: netfilter verdict
232 *
233 * The type of data is usually determined by the kernel directly and is not
234 * explicitly specified by userspace. The only difference are sets, where
235 * userspace specifies the key and mapping data types.
236 *
237 * The values 0xffffff00-0xffffffff are reserved for internally used types.
238 * The remaining range can be freely used by userspace to encode types, all
239 * values are equivalent to NFT_DATA_VALUE.
240 */
241enum nft_data_types {
242 NFT_DATA_VALUE,
243 NFT_DATA_VERDICT = 0xffffff00U,
244};
245
246#define NFT_DATA_RESERVED_MASK 0xffffff00U
247
248/**
249 * enum nft_data_attributes - nf_tables data netlink attributes
250 *
251 * @NFTA_DATA_VALUE: generic data (NLA_BINARY)
252 * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes)
253 */
130enum nft_data_attributes { 254enum nft_data_attributes {
131 NFTA_DATA_UNSPEC, 255 NFTA_DATA_UNSPEC,
132 NFTA_DATA_VALUE, 256 NFTA_DATA_VALUE,
@@ -275,58 +399,21 @@ enum nft_cmp_attributes {
275}; 399};
276#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) 400#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1)
277 401
278enum nft_set_elem_flags { 402/**
279 NFT_SE_INTERVAL_END = 0x1, 403 * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes
280}; 404 *
281 405 * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING)
282enum nft_set_elem_attributes { 406 * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
283 NFTA_SE_UNSPEC, 407 * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
284 NFTA_SE_KEY, 408 */
285 NFTA_SE_DATA, 409enum nft_lookup_attributes {
286 NFTA_SE_FLAGS, 410 NFTA_LOOKUP_UNSPEC,
287 __NFTA_SE_MAX 411 NFTA_LOOKUP_SET,
288}; 412 NFTA_LOOKUP_SREG,
289#define NFTA_SE_MAX (__NFTA_SE_MAX - 1) 413 NFTA_LOOKUP_DREG,
290 414 __NFTA_LOOKUP_MAX
291enum nft_set_flags { 415};
292 NFT_SET_INTERVAL = 0x1, 416#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1)
293 NFT_SET_MAP = 0x2,
294};
295
296enum nft_set_attributes {
297 NFTA_SET_UNSPEC,
298 NFTA_SET_FLAGS,
299 NFTA_SET_SREG,
300 NFTA_SET_DREG,
301 NFTA_SET_KLEN,
302 NFTA_SET_DLEN,
303 NFTA_SET_ELEMENTS,
304 __NFTA_SET_MAX
305};
306#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
307
308enum nft_hash_flags {
309 NFT_HASH_MAP = 0x1,
310};
311
312enum nft_hash_elem_attributes {
313 NFTA_HE_UNSPEC,
314 NFTA_HE_KEY,
315 NFTA_HE_DATA,
316 __NFTA_HE_MAX
317};
318#define NFTA_HE_MAX (__NFTA_HE_MAX - 1)
319
320enum nft_hash_attributes {
321 NFTA_HASH_UNSPEC,
322 NFTA_HASH_FLAGS,
323 NFTA_HASH_SREG,
324 NFTA_HASH_DREG,
325 NFTA_HASH_KLEN,
326 NFTA_HASH_ELEMENTS,
327 __NFTA_HASH_MAX
328};
329#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1)
330 417
331/** 418/**
332 * enum nft_payload_bases - nf_tables payload expression offset bases 419 * enum nft_payload_bases - nf_tables payload expression offset bases