aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-04-09 14:46:04 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-09 14:46:04 -0400
commitca69d7102fde3e22b09536867ba14ace84ea80e1 (patch)
tree6cfba19b3885c0ce11df838cdda53cb556e2c46f /include/net
parent3ab1a30fbded99936956442d8cf8f379064e4a26 (diff)
parentaadd51aa71f8d013c818a312bb2a0c5714830dbc (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree. They are: * nf_tables set timeout infrastructure from Patrick Mchardy. 1) Add support for set timeout support. 2) Add support for set element timeouts using the new set extension infrastructure. 4) Add garbage collection helper functions to get rid of stale elements. Elements are accumulated in a batch that are asynchronously released via RCU when the batch is full. 5) Add garbage collection synchronization helpers. This introduces a new element busy bit to address concurrent access from the netlink API and the garbage collector. 5) Add timeout support for the nft_hash set implementation. The garbage collector peridically checks for stale elements from the workqueue. * iptables/nftables cgroup fixes: 6) Ignore non full-socket objects from the input path, otherwise cgroup match may crash, from Daniel Borkmann. 7) Fix cgroup in nf_tables. 8) Save some cycles from xt_socket by skipping packet header parsing when skb->sk is already set because of early demux. Also from Daniel. * br_netfilter updates from Florian Westphal. 9) Save frag_max_size and restore it from the forward path too. 10) Use a per-cpu area to restore the original source MAC address when traffic is DNAT'ed. 11) Add helper functions to access physical devices. 12) Use these new physdev helper function from xt_physdev. 13) Add another nf_bridge_info_get() helper function to fetch the br_netfilter state information. 14) Annotate original layer 2 protocol number in nf_bridge info, instead of using kludgy flags. 15) Also annotate the pkttype mangling when the packet travels back and forth from the IP to the bridge layer, instead of using a flag. * More nf_tables set enhancement from Patrick: 16) Fix possible usage of set variant that doesn't support timeouts. 17) Avoid spurious "set is full" errors from Netlink API when there are pending stale elements scheduled to be released. 18) Restrict loop checks to set maps. 19) Add support for dynamic set updates from the packet path. 20) Add support to store optional user data (eg. comments) per set element. BTW, I have also pulled net-next into nf-next to anticipate the conflict resolution between your okfn() signature changes and Florian's br_netfilter updates. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/netfilter/nf_tables.h155
-rw-r--r--include/net/netfilter/nf_tables_core.h3
2 files changed, 157 insertions, 1 deletions
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 804981980393..d6a2f0ed5130 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -195,6 +195,7 @@ struct nft_set_estimate {
195}; 195};
196 196
197struct nft_set_ext; 197struct nft_set_ext;
198struct nft_expr;
198 199
199/** 200/**
200 * struct nft_set_ops - nf_tables set operations 201 * struct nft_set_ops - nf_tables set operations
@@ -217,6 +218,15 @@ struct nft_set_ops {
217 bool (*lookup)(const struct nft_set *set, 218 bool (*lookup)(const struct nft_set *set,
218 const struct nft_data *key, 219 const struct nft_data *key,
219 const struct nft_set_ext **ext); 220 const struct nft_set_ext **ext);
221 bool (*update)(struct nft_set *set,
222 const struct nft_data *key,
223 void *(*new)(struct nft_set *,
224 const struct nft_expr *,
225 struct nft_data []),
226 const struct nft_expr *expr,
227 struct nft_data data[],
228 const struct nft_set_ext **ext);
229
220 int (*insert)(const struct nft_set *set, 230 int (*insert)(const struct nft_set *set,
221 const struct nft_set_elem *elem); 231 const struct nft_set_elem *elem);
222 void (*activate)(const struct nft_set *set, 232 void (*activate)(const struct nft_set *set,
@@ -257,6 +267,9 @@ void nft_unregister_set(struct nft_set_ops *ops);
257 * @dtype: data type (verdict or numeric type defined by userspace) 267 * @dtype: data type (verdict or numeric type defined by userspace)
258 * @size: maximum set size 268 * @size: maximum set size
259 * @nelems: number of elements 269 * @nelems: number of elements
270 * @ndeact: number of deactivated elements queued for removal
271 * @timeout: default timeout value in msecs
272 * @gc_int: garbage collection interval in msecs
260 * @policy: set parameterization (see enum nft_set_policies) 273 * @policy: set parameterization (see enum nft_set_policies)
261 * @ops: set ops 274 * @ops: set ops
262 * @pnet: network namespace 275 * @pnet: network namespace
@@ -272,7 +285,10 @@ struct nft_set {
272 u32 ktype; 285 u32 ktype;
273 u32 dtype; 286 u32 dtype;
274 u32 size; 287 u32 size;
275 u32 nelems; 288 atomic_t nelems;
289 u32 ndeact;
290 u64 timeout;
291 u32 gc_int;
276 u16 policy; 292 u16 policy;
277 /* runtime data below here */ 293 /* runtime data below here */
278 const struct nft_set_ops *ops ____cacheline_aligned; 294 const struct nft_set_ops *ops ____cacheline_aligned;
@@ -289,16 +305,27 @@ static inline void *nft_set_priv(const struct nft_set *set)
289 return (void *)set->data; 305 return (void *)set->data;
290} 306}
291 307
308static inline struct nft_set *nft_set_container_of(const void *priv)
309{
310 return (void *)priv - offsetof(struct nft_set, data);
311}
312
292struct nft_set *nf_tables_set_lookup(const struct nft_table *table, 313struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
293 const struct nlattr *nla); 314 const struct nlattr *nla);
294struct nft_set *nf_tables_set_lookup_byid(const struct net *net, 315struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
295 const struct nlattr *nla); 316 const struct nlattr *nla);
296 317
318static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
319{
320 return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
321}
322
297/** 323/**
298 * struct nft_set_binding - nf_tables set binding 324 * struct nft_set_binding - nf_tables set binding
299 * 325 *
300 * @list: set bindings list node 326 * @list: set bindings list node
301 * @chain: chain containing the rule bound to the set 327 * @chain: chain containing the rule bound to the set
328 * @flags: set action flags
302 * 329 *
303 * A set binding contains all information necessary for validation 330 * A set binding contains all information necessary for validation
304 * of new elements added to a bound set. 331 * of new elements added to a bound set.
@@ -306,6 +333,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
306struct nft_set_binding { 333struct nft_set_binding {
307 struct list_head list; 334 struct list_head list;
308 const struct nft_chain *chain; 335 const struct nft_chain *chain;
336 u32 flags;
309}; 337};
310 338
311int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, 339int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
@@ -319,12 +347,18 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
319 * @NFT_SET_EXT_KEY: element key 347 * @NFT_SET_EXT_KEY: element key
320 * @NFT_SET_EXT_DATA: mapping data 348 * @NFT_SET_EXT_DATA: mapping data
321 * @NFT_SET_EXT_FLAGS: element flags 349 * @NFT_SET_EXT_FLAGS: element flags
350 * @NFT_SET_EXT_TIMEOUT: element timeout
351 * @NFT_SET_EXT_EXPIRATION: element expiration time
352 * @NFT_SET_EXT_USERDATA: user data associated with the element
322 * @NFT_SET_EXT_NUM: number of extension types 353 * @NFT_SET_EXT_NUM: number of extension types
323 */ 354 */
324enum nft_set_extensions { 355enum nft_set_extensions {
325 NFT_SET_EXT_KEY, 356 NFT_SET_EXT_KEY,
326 NFT_SET_EXT_DATA, 357 NFT_SET_EXT_DATA,
327 NFT_SET_EXT_FLAGS, 358 NFT_SET_EXT_FLAGS,
359 NFT_SET_EXT_TIMEOUT,
360 NFT_SET_EXT_EXPIRATION,
361 NFT_SET_EXT_USERDATA,
328 NFT_SET_EXT_NUM 362 NFT_SET_EXT_NUM
329}; 363};
330 364
@@ -421,15 +455,97 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
421 return nft_set_ext(ext, NFT_SET_EXT_FLAGS); 455 return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
422} 456}
423 457
458static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
459{
460 return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
461}
462
463static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext)
464{
465 return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
466}
467
468static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext)
469{
470 return nft_set_ext(ext, NFT_SET_EXT_USERDATA);
471}
472
473static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
474{
475 return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
476 time_is_before_eq_jiffies(*nft_set_ext_expiration(ext));
477}
478
424static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, 479static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
425 void *elem) 480 void *elem)
426{ 481{
427 return elem + set->ops->elemsize; 482 return elem + set->ops->elemsize;
428} 483}
429 484
485void *nft_set_elem_init(const struct nft_set *set,
486 const struct nft_set_ext_tmpl *tmpl,
487 const struct nft_data *key,
488 const struct nft_data *data,
489 u64 timeout, gfp_t gfp);
430void nft_set_elem_destroy(const struct nft_set *set, void *elem); 490void nft_set_elem_destroy(const struct nft_set *set, void *elem);
431 491
432/** 492/**
493 * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
494 *
495 * @rcu: rcu head
496 * @set: set the elements belong to
497 * @cnt: count of elements
498 */
499struct nft_set_gc_batch_head {
500 struct rcu_head rcu;
501 const struct nft_set *set;
502 unsigned int cnt;
503};
504
505#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
506 sizeof(struct nft_set_gc_batch_head)) / \
507 sizeof(void *))
508
509/**
510 * struct nft_set_gc_batch - nf_tables set garbage collection batch
511 *
512 * @head: GC batch head
513 * @elems: garbage collection elements
514 */
515struct nft_set_gc_batch {
516 struct nft_set_gc_batch_head head;
517 void *elems[NFT_SET_GC_BATCH_SIZE];
518};
519
520struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
521 gfp_t gfp);
522void nft_set_gc_batch_release(struct rcu_head *rcu);
523
524static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
525{
526 if (gcb != NULL)
527 call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
528}
529
530static inline struct nft_set_gc_batch *
531nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
532 gfp_t gfp)
533{
534 if (gcb != NULL) {
535 if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
536 return gcb;
537 nft_set_gc_batch_complete(gcb);
538 }
539 return nft_set_gc_batch_alloc(set, gfp);
540}
541
542static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
543 void *elem)
544{
545 gcb->elems[gcb->head.cnt++] = elem;
546}
547
548/**
433 * struct nft_expr_type - nf_tables expression type 549 * struct nft_expr_type - nf_tables expression type
434 * 550 *
435 * @select_ops: function to select nft_expr_ops 551 * @select_ops: function to select nft_expr_ops
@@ -750,6 +866,8 @@ static inline u8 nft_genmask_cur(const struct net *net)
750 return 1 << ACCESS_ONCE(net->nft.gencursor); 866 return 1 << ACCESS_ONCE(net->nft.gencursor);
751} 867}
752 868
869#define NFT_GENMASK_ANY ((1 << 0) | (1 << 1))
870
753/* 871/*
754 * Set element transaction helpers 872 * Set element transaction helpers
755 */ 873 */
@@ -766,6 +884,41 @@ static inline void nft_set_elem_change_active(const struct nft_set *set,
766 ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); 884 ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
767} 885}
768 886
887/*
888 * We use a free bit in the genmask field to indicate the element
889 * is busy, meaning it is currently being processed either by
890 * the netlink API or GC.
891 *
892 * Even though the genmask is only a single byte wide, this works
893 * because the extension structure if fully constant once initialized,
894 * so there are no non-atomic write accesses unless it is already
895 * marked busy.
896 */
897#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
898
899#if defined(__LITTLE_ENDIAN_BITFIELD)
900#define NFT_SET_ELEM_BUSY_BIT 2
901#elif defined(__BIG_ENDIAN_BITFIELD)
902#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
903#else
904#error
905#endif
906
907static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
908{
909 unsigned long *word = (unsigned long *)ext;
910
911 BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
912 return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
913}
914
915static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
916{
917 unsigned long *word = (unsigned long *)ext;
918
919 clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
920}
921
769/** 922/**
770 * struct nft_trans - nf_tables object update in transaction 923 * struct nft_trans - nf_tables object update in transaction
771 * 924 *
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a75fc8e27cd6..c6f400cfaac8 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -31,6 +31,9 @@ void nft_cmp_module_exit(void);
31int nft_lookup_module_init(void); 31int nft_lookup_module_init(void);
32void nft_lookup_module_exit(void); 32void nft_lookup_module_exit(void);
33 33
34int nft_dynset_module_init(void);
35void nft_dynset_module_exit(void);
36
34int nft_bitwise_module_init(void); 37int nft_bitwise_module_init(void);
35void nft_bitwise_module_exit(void); 38void nft_bitwise_module_exit(void);
36 39