diff options
author | David S. Miller <davem@davemloft.net> | 2014-03-17 15:06:24 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-03-17 15:06:24 -0400 |
commit | e86e180b824e00733bd0e499d412a595078f9b51 (patch) | |
tree | ebda350b99785b4d0dd0188dd28fa17ec8135474 | |
parent | e7ef085d0a9dc1cc72e7d8108ed3b4e1a5e8d938 (diff) | |
parent | 7d08487777c8b30dea34790734d708470faaf1e5 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter/IPVS updates for net-next
The following patchset contains Netfilter/IPVS updates for net-next,
most relevantly they are:
* cleanup to remove double semicolon from stephen hemminger.
* calm down sparse warning in xt_ipcomp, from Fan Du.
* nf_ct_labels support for nf_tables, from Florian Westphal.
* new macros to simplify rcu dereferences in the scope of nfnetlink
and nf_tables, from Patrick McHardy.
* Accept queue and drop (including reason for drop) to verdict
parsing in nf_tables, also from Patrick.
* Remove unused random seed initialization in nfnetlink_log, from
Florian Westphal.
* Allow to attach user-specific information to nf_tables rules, useful
to attach user comments to rule, from me.
* Return errors in ipset according to the manpage documentation, from
Jozsef Kadlecsik.
* Fix coccinelle warnings related to incorrect bool type usage for ipset,
from Fengguang Wu.
* Add hash:ip,mark set type to ipset, from Vytas Dauksa.
* Fix message for each spotted by ipset for each netns that is created,
from Ilia Mirkin.
* Add forceadd option to ipset, which evicts a random entry from the set
if it becomes full, from Josh Hunt.
* Minor IPVS cleanups and fixes from Andi Kleen and Tingwei Liu.
* Improve conntrack scalability by removing a central spinlock, original
work from Eric Dumazet. Jesper Dangaard Brouer took them over to address
remaining issues. Several patches to prepare this change come in first
place.
* Rework nft_hash to resolve bugs (leaking chain, missing rcu synchronization
on element removal, etc. from Patrick McHardy.
* Restore context in the rule deletion path, as we now release rule objects
synchronously, from Patrick McHardy. This gets back event notification for
anonymous sets.
* Fix NAT family validation in nft_nat, also from Patrick.
* Improve scalability of xt_connlimit by using an array of spinlocks and
by introducing a rb-tree of hashtables for faster lookup of accounted
objects per network. This patch was preceded by several patches and
refactorizations to accomodate this change including the use of kmem_cache,
from Florian Westphal.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
46 files changed, 1527 insertions, 475 deletions
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 0c7d01eae56c..96afc29184be 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h | |||
@@ -39,11 +39,13 @@ enum ip_set_feature { | |||
39 | IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), | 39 | IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), |
40 | IPSET_TYPE_IFACE_FLAG = 5, | 40 | IPSET_TYPE_IFACE_FLAG = 5, |
41 | IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG), | 41 | IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG), |
42 | IPSET_TYPE_NOMATCH_FLAG = 6, | 42 | IPSET_TYPE_MARK_FLAG = 6, |
43 | IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG), | ||
44 | IPSET_TYPE_NOMATCH_FLAG = 7, | ||
43 | IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG), | 45 | IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG), |
44 | /* Strictly speaking not a feature, but a flag for dumping: | 46 | /* Strictly speaking not a feature, but a flag for dumping: |
45 | * this settype must be dumped last */ | 47 | * this settype must be dumped last */ |
46 | IPSET_DUMP_LAST_FLAG = 7, | 48 | IPSET_DUMP_LAST_FLAG = 8, |
47 | IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), | 49 | IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), |
48 | }; | 50 | }; |
49 | 51 | ||
@@ -63,6 +65,7 @@ enum ip_set_extension { | |||
63 | #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) | 65 | #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) |
64 | #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) | 66 | #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) |
65 | #define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT) | 67 | #define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT) |
68 | #define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD) | ||
66 | 69 | ||
67 | /* Extension id, in size order */ | 70 | /* Extension id, in size order */ |
68 | enum ip_set_ext_id { | 71 | enum ip_set_ext_id { |
@@ -171,8 +174,6 @@ struct ip_set_type { | |||
171 | char name[IPSET_MAXNAMELEN]; | 174 | char name[IPSET_MAXNAMELEN]; |
172 | /* Protocol version */ | 175 | /* Protocol version */ |
173 | u8 protocol; | 176 | u8 protocol; |
174 | /* Set features to control swapping */ | ||
175 | u8 features; | ||
176 | /* Set type dimension */ | 177 | /* Set type dimension */ |
177 | u8 dimension; | 178 | u8 dimension; |
178 | /* | 179 | /* |
@@ -182,6 +183,8 @@ struct ip_set_type { | |||
182 | u8 family; | 183 | u8 family; |
183 | /* Type revisions */ | 184 | /* Type revisions */ |
184 | u8 revision_min, revision_max; | 185 | u8 revision_min, revision_max; |
186 | /* Set features to control swapping */ | ||
187 | u16 features; | ||
185 | 188 | ||
186 | /* Create set */ | 189 | /* Create set */ |
187 | int (*create)(struct net *net, struct ip_set *set, | 190 | int (*create)(struct net *net, struct ip_set *set, |
@@ -217,6 +220,8 @@ struct ip_set { | |||
217 | u8 revision; | 220 | u8 revision; |
218 | /* Extensions */ | 221 | /* Extensions */ |
219 | u8 extensions; | 222 | u8 extensions; |
223 | /* Create flags */ | ||
224 | u8 flags; | ||
220 | /* Default timeout value, if enabled */ | 225 | /* Default timeout value, if enabled */ |
221 | u32 timeout; | 226 | u32 timeout; |
222 | /* Element data size */ | 227 | /* Element data size */ |
@@ -251,6 +256,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) | |||
251 | cadt_flags |= IPSET_FLAG_WITH_COUNTERS; | 256 | cadt_flags |= IPSET_FLAG_WITH_COUNTERS; |
252 | if (SET_WITH_COMMENT(set)) | 257 | if (SET_WITH_COMMENT(set)) |
253 | cadt_flags |= IPSET_FLAG_WITH_COMMENT; | 258 | cadt_flags |= IPSET_FLAG_WITH_COMMENT; |
259 | if (SET_WITH_FORCEADD(set)) | ||
260 | cadt_flags |= IPSET_FLAG_WITH_FORCEADD; | ||
254 | 261 | ||
255 | if (!cadt_flags) | 262 | if (!cadt_flags) |
256 | return 0; | 263 | return 0; |
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 28c74367e900..e955d4730625 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h | |||
@@ -44,6 +44,27 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, | |||
44 | 44 | ||
45 | void nfnl_lock(__u8 subsys_id); | 45 | void nfnl_lock(__u8 subsys_id); |
46 | void nfnl_unlock(__u8 subsys_id); | 46 | void nfnl_unlock(__u8 subsys_id); |
47 | #ifdef CONFIG_PROVE_LOCKING | ||
48 | int lockdep_nfnl_is_held(__u8 subsys_id); | ||
49 | #else | ||
50 | static inline int lockdep_nfnl_is_held(__u8 subsys_id) | ||
51 | { | ||
52 | return 1; | ||
53 | } | ||
54 | #endif /* CONFIG_PROVE_LOCKING */ | ||
55 | |||
56 | /* | ||
57 | * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex | ||
58 | * | ||
59 | * @p: The pointer to read, prior to dereferencing | ||
60 | * @ss: The nfnetlink subsystem ID | ||
61 | * | ||
62 | * Return the value of the specified RCU-protected pointer, but omit | ||
63 | * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because | ||
64 | * caller holds the NFNL subsystem mutex. | ||
65 | */ | ||
66 | #define nfnl_dereference(p, ss) \ | ||
67 | rcu_dereference_protected(p, lockdep_nfnl_is_held(ss)) | ||
47 | 68 | ||
48 | #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ | 69 | #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ |
49 | MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) | 70 | MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) |
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b2ac6246b7e0..37252f71a380 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h | |||
@@ -73,10 +73,17 @@ struct nf_conn_help { | |||
73 | 73 | ||
74 | struct nf_conn { | 74 | struct nf_conn { |
75 | /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, | 75 | /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, |
76 | plus 1 for any connection(s) we are `master' for */ | 76 | * plus 1 for any connection(s) we are `master' for |
77 | * | ||
78 | * Hint, SKB address this struct and refcnt via skb->nfct and | ||
79 | * helpers nf_conntrack_get() and nf_conntrack_put(). | ||
80 | * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, | ||
81 | * beware nf_ct_get() is different and don't inc refcnt. | ||
82 | */ | ||
77 | struct nf_conntrack ct_general; | 83 | struct nf_conntrack ct_general; |
78 | 84 | ||
79 | spinlock_t lock; | 85 | spinlock_t lock; |
86 | u16 cpu; | ||
80 | 87 | ||
81 | /* XXX should I move this to the tail ? - Y.K */ | 88 | /* XXX should I move this to the tail ? - Y.K */ |
82 | /* These are my tuples; original and reply */ | 89 | /* These are my tuples; original and reply */ |
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 15308b8eb5b5..cc0c18827602 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h | |||
@@ -77,6 +77,13 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, | |||
77 | const struct nf_conntrack_l3proto *l3proto, | 77 | const struct nf_conntrack_l3proto *l3proto, |
78 | const struct nf_conntrack_l4proto *proto); | 78 | const struct nf_conntrack_l4proto *proto); |
79 | 79 | ||
80 | extern spinlock_t nf_conntrack_lock ; | 80 | #ifdef CONFIG_LOCKDEP |
81 | # define CONNTRACK_LOCKS 8 | ||
82 | #else | ||
83 | # define CONNTRACK_LOCKS 1024 | ||
84 | #endif | ||
85 | extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; | ||
86 | |||
87 | extern spinlock_t nf_conntrack_expect_lock; | ||
81 | 88 | ||
82 | #endif /* _NF_CONNTRACK_CORE_H */ | 89 | #endif /* _NF_CONNTRACK_CORE_H */ |
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index c985695283b3..dec6336bf850 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h | |||
@@ -7,6 +7,8 @@ | |||
7 | 7 | ||
8 | #include <uapi/linux/netfilter/xt_connlabel.h> | 8 | #include <uapi/linux/netfilter/xt_connlabel.h> |
9 | 9 | ||
10 | #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) | ||
11 | |||
10 | struct nf_conn_labels { | 12 | struct nf_conn_labels { |
11 | u8 words; | 13 | u8 words; |
12 | unsigned long bits[]; | 14 | unsigned long bits[]; |
@@ -29,7 +31,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) | |||
29 | u8 words; | 31 | u8 words; |
30 | 32 | ||
31 | words = ACCESS_ONCE(net->ct.label_words); | 33 | words = ACCESS_ONCE(net->ct.label_words); |
32 | if (words == 0 || WARN_ON_ONCE(words > 8)) | 34 | if (words == 0) |
33 | return NULL; | 35 | return NULL; |
34 | 36 | ||
35 | cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, | 37 | cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, |
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e7e14ffe0f6a..e6bc14d8fa9a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/list.h> | 4 | #include <linux/list.h> |
5 | #include <linux/netfilter.h> | 5 | #include <linux/netfilter.h> |
6 | #include <linux/netfilter/nfnetlink.h> | ||
6 | #include <linux/netfilter/x_tables.h> | 7 | #include <linux/netfilter/x_tables.h> |
7 | #include <linux/netfilter/nf_tables.h> | 8 | #include <linux/netfilter/nf_tables.h> |
8 | #include <net/netlink.h> | 9 | #include <net/netlink.h> |
@@ -288,7 +289,8 @@ struct nft_expr_ops { | |||
288 | int (*init)(const struct nft_ctx *ctx, | 289 | int (*init)(const struct nft_ctx *ctx, |
289 | const struct nft_expr *expr, | 290 | const struct nft_expr *expr, |
290 | const struct nlattr * const tb[]); | 291 | const struct nlattr * const tb[]); |
291 | void (*destroy)(const struct nft_expr *expr); | 292 | void (*destroy)(const struct nft_ctx *ctx, |
293 | const struct nft_expr *expr); | ||
292 | int (*dump)(struct sk_buff *skb, | 294 | int (*dump)(struct sk_buff *skb, |
293 | const struct nft_expr *expr); | 295 | const struct nft_expr *expr); |
294 | int (*validate)(const struct nft_ctx *ctx, | 296 | int (*validate)(const struct nft_ctx *ctx, |
@@ -325,13 +327,15 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) | |||
325 | * @handle: rule handle | 327 | * @handle: rule handle |
326 | * @genmask: generation mask | 328 | * @genmask: generation mask |
327 | * @dlen: length of expression data | 329 | * @dlen: length of expression data |
330 | * @ulen: length of user data (used for comments) | ||
328 | * @data: expression data | 331 | * @data: expression data |
329 | */ | 332 | */ |
330 | struct nft_rule { | 333 | struct nft_rule { |
331 | struct list_head list; | 334 | struct list_head list; |
332 | u64 handle:46, | 335 | u64 handle:42, |
333 | genmask:2, | 336 | genmask:2, |
334 | dlen:16; | 337 | dlen:12, |
338 | ulen:8; | ||
335 | unsigned char data[] | 339 | unsigned char data[] |
336 | __attribute__((aligned(__alignof__(struct nft_expr)))); | 340 | __attribute__((aligned(__alignof__(struct nft_expr)))); |
337 | }; | 341 | }; |
@@ -340,19 +344,13 @@ struct nft_rule { | |||
340 | * struct nft_rule_trans - nf_tables rule update in transaction | 344 | * struct nft_rule_trans - nf_tables rule update in transaction |
341 | * | 345 | * |
342 | * @list: used internally | 346 | * @list: used internally |
347 | * @ctx: rule context | ||
343 | * @rule: rule that needs to be updated | 348 | * @rule: rule that needs to be updated |
344 | * @chain: chain that this rule belongs to | ||
345 | * @table: table for which this chain applies | ||
346 | * @nlh: netlink header of the message that contain this update | ||
347 | * @family: family expressesed as AF_* | ||
348 | */ | 349 | */ |
349 | struct nft_rule_trans { | 350 | struct nft_rule_trans { |
350 | struct list_head list; | 351 | struct list_head list; |
352 | struct nft_ctx ctx; | ||
351 | struct nft_rule *rule; | 353 | struct nft_rule *rule; |
352 | const struct nft_chain *chain; | ||
353 | const struct nft_table *table; | ||
354 | const struct nlmsghdr *nlh; | ||
355 | u8 family; | ||
356 | }; | 354 | }; |
357 | 355 | ||
358 | static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) | 356 | static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) |
@@ -370,6 +368,11 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) | |||
370 | return (struct nft_expr *)&rule->data[rule->dlen]; | 368 | return (struct nft_expr *)&rule->data[rule->dlen]; |
371 | } | 369 | } |
372 | 370 | ||
371 | static inline void *nft_userdata(const struct nft_rule *rule) | ||
372 | { | ||
373 | return (void *)&rule->data[rule->dlen]; | ||
374 | } | ||
375 | |||
373 | /* | 376 | /* |
374 | * The last pointer isn't really necessary, but the compiler isn't able to | 377 | * The last pointer isn't really necessary, but the compiler isn't able to |
375 | * determine that the result of nft_expr_last() is always the same since it | 378 | * determine that the result of nft_expr_last() is always the same since it |
@@ -521,6 +524,9 @@ void nft_unregister_chain_type(const struct nf_chain_type *); | |||
521 | int nft_register_expr(struct nft_expr_type *); | 524 | int nft_register_expr(struct nft_expr_type *); |
522 | void nft_unregister_expr(struct nft_expr_type *); | 525 | void nft_unregister_expr(struct nft_expr_type *); |
523 | 526 | ||
527 | #define nft_dereference(p) \ | ||
528 | nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) | ||
529 | |||
524 | #define MODULE_ALIAS_NFT_FAMILY(family) \ | 530 | #define MODULE_ALIAS_NFT_FAMILY(family) \ |
525 | MODULE_ALIAS("nft-afinfo-" __stringify(family)) | 531 | MODULE_ALIAS("nft-afinfo-" __stringify(family)) |
526 | 532 | ||
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fbcc7fa536dc..773cce308bc6 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/list_nulls.h> | 5 | #include <linux/list_nulls.h> |
6 | #include <linux/atomic.h> | 6 | #include <linux/atomic.h> |
7 | #include <linux/netfilter/nf_conntrack_tcp.h> | 7 | #include <linux/netfilter/nf_conntrack_tcp.h> |
8 | #include <linux/seqlock.h> | ||
8 | 9 | ||
9 | struct ctl_table_header; | 10 | struct ctl_table_header; |
10 | struct nf_conntrack_ecache; | 11 | struct nf_conntrack_ecache; |
@@ -62,6 +63,13 @@ struct nf_ip_net { | |||
62 | #endif | 63 | #endif |
63 | }; | 64 | }; |
64 | 65 | ||
66 | struct ct_pcpu { | ||
67 | spinlock_t lock; | ||
68 | struct hlist_nulls_head unconfirmed; | ||
69 | struct hlist_nulls_head dying; | ||
70 | struct hlist_nulls_head tmpl; | ||
71 | }; | ||
72 | |||
65 | struct netns_ct { | 73 | struct netns_ct { |
66 | atomic_t count; | 74 | atomic_t count; |
67 | unsigned int expect_count; | 75 | unsigned int expect_count; |
@@ -83,12 +91,11 @@ struct netns_ct { | |||
83 | int sysctl_checksum; | 91 | int sysctl_checksum; |
84 | 92 | ||
85 | unsigned int htable_size; | 93 | unsigned int htable_size; |
94 | seqcount_t generation; | ||
86 | struct kmem_cache *nf_conntrack_cachep; | 95 | struct kmem_cache *nf_conntrack_cachep; |
87 | struct hlist_nulls_head *hash; | 96 | struct hlist_nulls_head *hash; |
88 | struct hlist_head *expect_hash; | 97 | struct hlist_head *expect_hash; |
89 | struct hlist_nulls_head unconfirmed; | 98 | struct ct_pcpu __percpu *pcpu_lists; |
90 | struct hlist_nulls_head dying; | ||
91 | struct hlist_nulls_head tmpl; | ||
92 | struct ip_conntrack_stat __percpu *stat; | 99 | struct ip_conntrack_stat __percpu *stat; |
93 | struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; | 100 | struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; |
94 | struct nf_exp_event_notifier __rcu *nf_expect_event_cb; | 101 | struct nf_exp_event_notifier __rcu *nf_expect_event_cb; |
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 25d3b2f79c02..78c2f2e79920 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h | |||
@@ -82,6 +82,8 @@ enum { | |||
82 | IPSET_ATTR_PROTO, /* 7 */ | 82 | IPSET_ATTR_PROTO, /* 7 */ |
83 | IPSET_ATTR_CADT_FLAGS, /* 8 */ | 83 | IPSET_ATTR_CADT_FLAGS, /* 8 */ |
84 | IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ | 84 | IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ |
85 | IPSET_ATTR_MARK, /* 10 */ | ||
86 | IPSET_ATTR_MARKMASK, /* 11 */ | ||
85 | /* Reserve empty slots */ | 87 | /* Reserve empty slots */ |
86 | IPSET_ATTR_CADT_MAX = 16, | 88 | IPSET_ATTR_CADT_MAX = 16, |
87 | /* Create-only specific attributes */ | 89 | /* Create-only specific attributes */ |
@@ -144,6 +146,7 @@ enum ipset_errno { | |||
144 | IPSET_ERR_IPADDR_IPV6, | 146 | IPSET_ERR_IPADDR_IPV6, |
145 | IPSET_ERR_COUNTER, | 147 | IPSET_ERR_COUNTER, |
146 | IPSET_ERR_COMMENT, | 148 | IPSET_ERR_COMMENT, |
149 | IPSET_ERR_INVALID_MARKMASK, | ||
147 | 150 | ||
148 | /* Type specific error codes */ | 151 | /* Type specific error codes */ |
149 | IPSET_ERR_TYPE_SPECIFIC = 4352, | 152 | IPSET_ERR_TYPE_SPECIFIC = 4352, |
@@ -182,9 +185,18 @@ enum ipset_cadt_flags { | |||
182 | IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), | 185 | IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), |
183 | IPSET_FLAG_BIT_WITH_COMMENT = 4, | 186 | IPSET_FLAG_BIT_WITH_COMMENT = 4, |
184 | IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), | 187 | IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), |
188 | IPSET_FLAG_BIT_WITH_FORCEADD = 5, | ||
189 | IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD), | ||
185 | IPSET_FLAG_CADT_MAX = 15, | 190 | IPSET_FLAG_CADT_MAX = 15, |
186 | }; | 191 | }; |
187 | 192 | ||
193 | /* The flag bits which correspond to the non-extension create flags */ | ||
194 | enum ipset_create_flags { | ||
195 | IPSET_CREATE_FLAG_BIT_FORCEADD = 0, | ||
196 | IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD), | ||
197 | IPSET_CREATE_FLAG_BIT_MAX = 7, | ||
198 | }; | ||
199 | |||
188 | /* Commands with settype-specific attributes */ | 200 | /* Commands with settype-specific attributes */ |
189 | enum ipset_adt { | 201 | enum ipset_adt { |
190 | IPSET_ADD, | 202 | IPSET_ADD, |
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 83c985a6170b..c88ccbfda5f1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h | |||
@@ -1,7 +1,8 @@ | |||
1 | #ifndef _LINUX_NF_TABLES_H | 1 | #ifndef _LINUX_NF_TABLES_H |
2 | #define _LINUX_NF_TABLES_H | 2 | #define _LINUX_NF_TABLES_H |
3 | 3 | ||
4 | #define NFT_CHAIN_MAXNAMELEN 32 | 4 | #define NFT_CHAIN_MAXNAMELEN 32 |
5 | #define NFT_USERDATA_MAXLEN 256 | ||
5 | 6 | ||
6 | enum nft_registers { | 7 | enum nft_registers { |
7 | NFT_REG_VERDICT, | 8 | NFT_REG_VERDICT, |
@@ -156,6 +157,7 @@ enum nft_chain_attributes { | |||
156 | * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) | 157 | * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) |
157 | * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) | 158 | * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) |
158 | * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) | 159 | * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) |
160 | * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) | ||
159 | */ | 161 | */ |
160 | enum nft_rule_attributes { | 162 | enum nft_rule_attributes { |
161 | NFTA_RULE_UNSPEC, | 163 | NFTA_RULE_UNSPEC, |
@@ -165,6 +167,7 @@ enum nft_rule_attributes { | |||
165 | NFTA_RULE_EXPRESSIONS, | 167 | NFTA_RULE_EXPRESSIONS, |
166 | NFTA_RULE_COMPAT, | 168 | NFTA_RULE_COMPAT, |
167 | NFTA_RULE_POSITION, | 169 | NFTA_RULE_POSITION, |
170 | NFTA_RULE_USERDATA, | ||
168 | __NFTA_RULE_MAX | 171 | __NFTA_RULE_MAX |
169 | }; | 172 | }; |
170 | #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) | 173 | #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) |
@@ -601,6 +604,7 @@ enum nft_ct_keys { | |||
601 | NFT_CT_PROTOCOL, | 604 | NFT_CT_PROTOCOL, |
602 | NFT_CT_PROTO_SRC, | 605 | NFT_CT_PROTO_SRC, |
603 | NFT_CT_PROTO_DST, | 606 | NFT_CT_PROTO_DST, |
607 | NFT_CT_LABELS, | ||
604 | }; | 608 | }; |
605 | 609 | ||
606 | /** | 610 | /** |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index c3e0adea9c27..7ebd6e37875c 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -61,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) | |||
61 | skb_dst_set(skb, NULL); | 61 | skb_dst_set(skb, NULL); |
62 | dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); | 62 | dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); |
63 | if (IS_ERR(dst)) | 63 | if (IS_ERR(dst)) |
64 | return PTR_ERR(dst);; | 64 | return PTR_ERR(dst); |
65 | skb_dst_set(skb, dst); | 65 | skb_dst_set(skb, dst); |
66 | } | 66 | } |
67 | #endif | 67 | #endif |
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 44cd4f58adf0..2f7f5c32c6f9 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig | |||
@@ -61,6 +61,15 @@ config IP_SET_HASH_IP | |||
61 | 61 | ||
62 | To compile it as a module, choose M here. If unsure, say N. | 62 | To compile it as a module, choose M here. If unsure, say N. |
63 | 63 | ||
64 | config IP_SET_HASH_IPMARK | ||
65 | tristate "hash:ip,mark set support" | ||
66 | depends on IP_SET | ||
67 | help | ||
68 | This option adds the hash:ip,mark set type support, by which one | ||
69 | can store IPv4/IPv6 address and mark pairs. | ||
70 | |||
71 | To compile it as a module, choose M here. If unsure, say N. | ||
72 | |||
64 | config IP_SET_HASH_IPPORT | 73 | config IP_SET_HASH_IPPORT |
65 | tristate "hash:ip,port set support" | 74 | tristate "hash:ip,port set support" |
66 | depends on IP_SET | 75 | depends on IP_SET |
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 44b2d38476fa..231f10196cb9 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile | |||
@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o | |||
14 | 14 | ||
15 | # hash types | 15 | # hash types |
16 | obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o | 16 | obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o |
17 | obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o | ||
17 | obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o | 18 | obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o |
18 | obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o | 19 | obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o |
19 | obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o | 20 | obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o |
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index de770ec39e51..117208321f16 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c | |||
@@ -54,10 +54,10 @@ MODULE_DESCRIPTION("core IP set support"); | |||
54 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); | 54 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); |
55 | 55 | ||
56 | /* When the nfnl mutex is held: */ | 56 | /* When the nfnl mutex is held: */ |
57 | #define nfnl_dereference(p) \ | 57 | #define ip_set_dereference(p) \ |
58 | rcu_dereference_protected(p, 1) | 58 | rcu_dereference_protected(p, 1) |
59 | #define nfnl_set(inst, id) \ | 59 | #define ip_set(inst, id) \ |
60 | nfnl_dereference((inst)->ip_set_list)[id] | 60 | ip_set_dereference((inst)->ip_set_list)[id] |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * The set types are implemented in modules and registered set types | 63 | * The set types are implemented in modules and registered set types |
@@ -368,6 +368,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) | |||
368 | 368 | ||
369 | if (tb[IPSET_ATTR_CADT_FLAGS]) | 369 | if (tb[IPSET_ATTR_CADT_FLAGS]) |
370 | cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); | 370 | cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); |
371 | if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) | ||
372 | set->flags |= IPSET_CREATE_FLAG_FORCEADD; | ||
371 | for (id = 0; id < IPSET_EXT_ID_MAX; id++) { | 373 | for (id = 0; id < IPSET_EXT_ID_MAX; id++) { |
372 | if (!add_extension(id, cadt_flags, tb)) | 374 | if (!add_extension(id, cadt_flags, tb)) |
373 | continue; | 375 | continue; |
@@ -510,7 +512,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, | |||
510 | 512 | ||
511 | if (opt->dim < set->type->dimension || | 513 | if (opt->dim < set->type->dimension || |
512 | !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) | 514 | !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) |
513 | return 0; | 515 | return -IPSET_ERR_TYPE_MISMATCH; |
514 | 516 | ||
515 | write_lock_bh(&set->lock); | 517 | write_lock_bh(&set->lock); |
516 | ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); | 518 | ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); |
@@ -533,7 +535,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, | |||
533 | 535 | ||
534 | if (opt->dim < set->type->dimension || | 536 | if (opt->dim < set->type->dimension || |
535 | !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) | 537 | !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) |
536 | return 0; | 538 | return -IPSET_ERR_TYPE_MISMATCH; |
537 | 539 | ||
538 | write_lock_bh(&set->lock); | 540 | write_lock_bh(&set->lock); |
539 | ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); | 541 | ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); |
@@ -640,7 +642,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) | |||
640 | return IPSET_INVALID_ID; | 642 | return IPSET_INVALID_ID; |
641 | 643 | ||
642 | nfnl_lock(NFNL_SUBSYS_IPSET); | 644 | nfnl_lock(NFNL_SUBSYS_IPSET); |
643 | set = nfnl_set(inst, index); | 645 | set = ip_set(inst, index); |
644 | if (set) | 646 | if (set) |
645 | __ip_set_get(set); | 647 | __ip_set_get(set); |
646 | else | 648 | else |
@@ -666,7 +668,7 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index) | |||
666 | 668 | ||
667 | nfnl_lock(NFNL_SUBSYS_IPSET); | 669 | nfnl_lock(NFNL_SUBSYS_IPSET); |
668 | if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ | 670 | if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ |
669 | set = nfnl_set(inst, index); | 671 | set = ip_set(inst, index); |
670 | if (set != NULL) | 672 | if (set != NULL) |
671 | __ip_set_put(set); | 673 | __ip_set_put(set); |
672 | } | 674 | } |
@@ -734,7 +736,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) | |||
734 | 736 | ||
735 | *id = IPSET_INVALID_ID; | 737 | *id = IPSET_INVALID_ID; |
736 | for (i = 0; i < inst->ip_set_max; i++) { | 738 | for (i = 0; i < inst->ip_set_max; i++) { |
737 | set = nfnl_set(inst, i); | 739 | set = ip_set(inst, i); |
738 | if (set != NULL && STREQ(set->name, name)) { | 740 | if (set != NULL && STREQ(set->name, name)) { |
739 | *id = i; | 741 | *id = i; |
740 | break; | 742 | break; |
@@ -760,7 +762,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, | |||
760 | 762 | ||
761 | *index = IPSET_INVALID_ID; | 763 | *index = IPSET_INVALID_ID; |
762 | for (i = 0; i < inst->ip_set_max; i++) { | 764 | for (i = 0; i < inst->ip_set_max; i++) { |
763 | s = nfnl_set(inst, i); | 765 | s = ip_set(inst, i); |
764 | if (s == NULL) { | 766 | if (s == NULL) { |
765 | if (*index == IPSET_INVALID_ID) | 767 | if (*index == IPSET_INVALID_ID) |
766 | *index = i; | 768 | *index = i; |
@@ -883,7 +885,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, | |||
883 | if (!list) | 885 | if (!list) |
884 | goto cleanup; | 886 | goto cleanup; |
885 | /* nfnl mutex is held, both lists are valid */ | 887 | /* nfnl mutex is held, both lists are valid */ |
886 | tmp = nfnl_dereference(inst->ip_set_list); | 888 | tmp = ip_set_dereference(inst->ip_set_list); |
887 | memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); | 889 | memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); |
888 | rcu_assign_pointer(inst->ip_set_list, list); | 890 | rcu_assign_pointer(inst->ip_set_list, list); |
889 | /* Make sure all current packets have passed through */ | 891 | /* Make sure all current packets have passed through */ |
@@ -900,7 +902,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, | |||
900 | * Finally! Add our shiny new set to the list, and be done. | 902 | * Finally! Add our shiny new set to the list, and be done. |
901 | */ | 903 | */ |
902 | pr_debug("create: '%s' created with index %u!\n", set->name, index); | 904 | pr_debug("create: '%s' created with index %u!\n", set->name, index); |
903 | nfnl_set(inst, index) = set; | 905 | ip_set(inst, index) = set; |
904 | 906 | ||
905 | return ret; | 907 | return ret; |
906 | 908 | ||
@@ -925,10 +927,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { | |||
925 | static void | 927 | static void |
926 | ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) | 928 | ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) |
927 | { | 929 | { |
928 | struct ip_set *set = nfnl_set(inst, index); | 930 | struct ip_set *set = ip_set(inst, index); |
929 | 931 | ||
930 | pr_debug("set: %s\n", set->name); | 932 | pr_debug("set: %s\n", set->name); |
931 | nfnl_set(inst, index) = NULL; | 933 | ip_set(inst, index) = NULL; |
932 | 934 | ||
933 | /* Must call it without holding any lock */ | 935 | /* Must call it without holding any lock */ |
934 | set->variant->destroy(set); | 936 | set->variant->destroy(set); |
@@ -962,7 +964,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, | |||
962 | read_lock_bh(&ip_set_ref_lock); | 964 | read_lock_bh(&ip_set_ref_lock); |
963 | if (!attr[IPSET_ATTR_SETNAME]) { | 965 | if (!attr[IPSET_ATTR_SETNAME]) { |
964 | for (i = 0; i < inst->ip_set_max; i++) { | 966 | for (i = 0; i < inst->ip_set_max; i++) { |
965 | s = nfnl_set(inst, i); | 967 | s = ip_set(inst, i); |
966 | if (s != NULL && s->ref) { | 968 | if (s != NULL && s->ref) { |
967 | ret = -IPSET_ERR_BUSY; | 969 | ret = -IPSET_ERR_BUSY; |
968 | goto out; | 970 | goto out; |
@@ -970,7 +972,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, | |||
970 | } | 972 | } |
971 | read_unlock_bh(&ip_set_ref_lock); | 973 | read_unlock_bh(&ip_set_ref_lock); |
972 | for (i = 0; i < inst->ip_set_max; i++) { | 974 | for (i = 0; i < inst->ip_set_max; i++) { |
973 | s = nfnl_set(inst, i); | 975 | s = ip_set(inst, i); |
974 | if (s != NULL) | 976 | if (s != NULL) |
975 | ip_set_destroy_set(inst, i); | 977 | ip_set_destroy_set(inst, i); |
976 | } | 978 | } |
@@ -1020,7 +1022,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, | |||
1020 | 1022 | ||
1021 | if (!attr[IPSET_ATTR_SETNAME]) { | 1023 | if (!attr[IPSET_ATTR_SETNAME]) { |
1022 | for (i = 0; i < inst->ip_set_max; i++) { | 1024 | for (i = 0; i < inst->ip_set_max; i++) { |
1023 | s = nfnl_set(inst, i); | 1025 | s = ip_set(inst, i); |
1024 | if (s != NULL) | 1026 | if (s != NULL) |
1025 | ip_set_flush_set(s); | 1027 | ip_set_flush_set(s); |
1026 | } | 1028 | } |
@@ -1074,7 +1076,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, | |||
1074 | 1076 | ||
1075 | name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); | 1077 | name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); |
1076 | for (i = 0; i < inst->ip_set_max; i++) { | 1078 | for (i = 0; i < inst->ip_set_max; i++) { |
1077 | s = nfnl_set(inst, i); | 1079 | s = ip_set(inst, i); |
1078 | if (s != NULL && STREQ(s->name, name2)) { | 1080 | if (s != NULL && STREQ(s->name, name2)) { |
1079 | ret = -IPSET_ERR_EXIST_SETNAME2; | 1081 | ret = -IPSET_ERR_EXIST_SETNAME2; |
1080 | goto out; | 1082 | goto out; |
@@ -1134,8 +1136,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, | |||
1134 | 1136 | ||
1135 | write_lock_bh(&ip_set_ref_lock); | 1137 | write_lock_bh(&ip_set_ref_lock); |
1136 | swap(from->ref, to->ref); | 1138 | swap(from->ref, to->ref); |
1137 | nfnl_set(inst, from_id) = to; | 1139 | ip_set(inst, from_id) = to; |
1138 | nfnl_set(inst, to_id) = from; | 1140 | ip_set(inst, to_id) = from; |
1139 | write_unlock_bh(&ip_set_ref_lock); | 1141 | write_unlock_bh(&ip_set_ref_lock); |
1140 | 1142 | ||
1141 | return 0; | 1143 | return 0; |
@@ -1157,7 +1159,7 @@ ip_set_dump_done(struct netlink_callback *cb) | |||
1157 | struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; | 1159 | struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; |
1158 | if (cb->args[IPSET_CB_ARG0]) { | 1160 | if (cb->args[IPSET_CB_ARG0]) { |
1159 | pr_debug("release set %s\n", | 1161 | pr_debug("release set %s\n", |
1160 | nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name); | 1162 | ip_set(inst, cb->args[IPSET_CB_INDEX])->name); |
1161 | __ip_set_put_byindex(inst, | 1163 | __ip_set_put_byindex(inst, |
1162 | (ip_set_id_t) cb->args[IPSET_CB_INDEX]); | 1164 | (ip_set_id_t) cb->args[IPSET_CB_INDEX]); |
1163 | } | 1165 | } |
@@ -1254,7 +1256,7 @@ dump_last: | |||
1254 | dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); | 1256 | dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); |
1255 | for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { | 1257 | for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { |
1256 | index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; | 1258 | index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; |
1257 | set = nfnl_set(inst, index); | 1259 | set = ip_set(inst, index); |
1258 | if (set == NULL) { | 1260 | if (set == NULL) { |
1259 | if (dump_type == DUMP_ONE) { | 1261 | if (dump_type == DUMP_ONE) { |
1260 | ret = -ENOENT; | 1262 | ret = -ENOENT; |
@@ -1332,7 +1334,7 @@ next_set: | |||
1332 | release_refcount: | 1334 | release_refcount: |
1333 | /* If there was an error or set is done, release set */ | 1335 | /* If there was an error or set is done, release set */ |
1334 | if (ret || !cb->args[IPSET_CB_ARG0]) { | 1336 | if (ret || !cb->args[IPSET_CB_ARG0]) { |
1335 | pr_debug("release set %s\n", nfnl_set(inst, index)->name); | 1337 | pr_debug("release set %s\n", ip_set(inst, index)->name); |
1336 | __ip_set_put_byindex(inst, index); | 1338 | __ip_set_put_byindex(inst, index); |
1337 | cb->args[IPSET_CB_ARG0] = 0; | 1339 | cb->args[IPSET_CB_ARG0] = 0; |
1338 | } | 1340 | } |
@@ -1887,7 +1889,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) | |||
1887 | find_set_and_id(inst, req_get->set.name, &id); | 1889 | find_set_and_id(inst, req_get->set.name, &id); |
1888 | req_get->set.index = id; | 1890 | req_get->set.index = id; |
1889 | if (id != IPSET_INVALID_ID) | 1891 | if (id != IPSET_INVALID_ID) |
1890 | req_get->family = nfnl_set(inst, id)->family; | 1892 | req_get->family = ip_set(inst, id)->family; |
1891 | nfnl_unlock(NFNL_SUBSYS_IPSET); | 1893 | nfnl_unlock(NFNL_SUBSYS_IPSET); |
1892 | goto copy; | 1894 | goto copy; |
1893 | } | 1895 | } |
@@ -1901,7 +1903,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) | |||
1901 | goto done; | 1903 | goto done; |
1902 | } | 1904 | } |
1903 | nfnl_lock(NFNL_SUBSYS_IPSET); | 1905 | nfnl_lock(NFNL_SUBSYS_IPSET); |
1904 | set = nfnl_set(inst, req_get->set.index); | 1906 | set = ip_set(inst, req_get->set.index); |
1905 | strncpy(req_get->set.name, set ? set->name : "", | 1907 | strncpy(req_get->set.name, set ? set->name : "", |
1906 | IPSET_MAXNAMELEN); | 1908 | IPSET_MAXNAMELEN); |
1907 | nfnl_unlock(NFNL_SUBSYS_IPSET); | 1909 | nfnl_unlock(NFNL_SUBSYS_IPSET); |
@@ -1945,7 +1947,6 @@ ip_set_net_init(struct net *net) | |||
1945 | return -ENOMEM; | 1947 | return -ENOMEM; |
1946 | inst->is_deleted = 0; | 1948 | inst->is_deleted = 0; |
1947 | rcu_assign_pointer(inst->ip_set_list, list); | 1949 | rcu_assign_pointer(inst->ip_set_list, list); |
1948 | pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); | ||
1949 | return 0; | 1950 | return 0; |
1950 | } | 1951 | } |
1951 | 1952 | ||
@@ -1960,7 +1961,7 @@ ip_set_net_exit(struct net *net) | |||
1960 | inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ | 1961 | inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ |
1961 | 1962 | ||
1962 | for (i = 0; i < inst->ip_set_max; i++) { | 1963 | for (i = 0; i < inst->ip_set_max; i++) { |
1963 | set = nfnl_set(inst, i); | 1964 | set = ip_set(inst, i); |
1964 | if (set != NULL) | 1965 | if (set != NULL) |
1965 | ip_set_destroy_set(inst, i); | 1966 | ip_set_destroy_set(inst, i); |
1966 | } | 1967 | } |
@@ -1996,6 +1997,7 @@ ip_set_init(void) | |||
1996 | nfnetlink_subsys_unregister(&ip_set_netlink_subsys); | 1997 | nfnetlink_subsys_unregister(&ip_set_netlink_subsys); |
1997 | return ret; | 1998 | return ret; |
1998 | } | 1999 | } |
2000 | pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); | ||
1999 | return 0; | 2001 | return 0; |
2000 | } | 2002 | } |
2001 | 2003 | ||
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index be6932ad3a86..61c7fb052802 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h | |||
@@ -263,6 +263,9 @@ struct htype { | |||
263 | u32 maxelem; /* max elements in the hash */ | 263 | u32 maxelem; /* max elements in the hash */ |
264 | u32 elements; /* current element (vs timeout) */ | 264 | u32 elements; /* current element (vs timeout) */ |
265 | u32 initval; /* random jhash init value */ | 265 | u32 initval; /* random jhash init value */ |
266 | #ifdef IP_SET_HASH_WITH_MARKMASK | ||
267 | u32 markmask; /* markmask value for mark mask to store */ | ||
268 | #endif | ||
266 | struct timer_list gc; /* garbage collection when timeout enabled */ | 269 | struct timer_list gc; /* garbage collection when timeout enabled */ |
267 | struct mtype_elem next; /* temporary storage for uadd */ | 270 | struct mtype_elem next; /* temporary storage for uadd */ |
268 | #ifdef IP_SET_HASH_WITH_MULTI | 271 | #ifdef IP_SET_HASH_WITH_MULTI |
@@ -454,6 +457,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) | |||
454 | #ifdef IP_SET_HASH_WITH_NETMASK | 457 | #ifdef IP_SET_HASH_WITH_NETMASK |
455 | x->netmask == y->netmask && | 458 | x->netmask == y->netmask && |
456 | #endif | 459 | #endif |
460 | #ifdef IP_SET_HASH_WITH_MARKMASK | ||
461 | x->markmask == y->markmask && | ||
462 | #endif | ||
457 | a->extensions == b->extensions; | 463 | a->extensions == b->extensions; |
458 | } | 464 | } |
459 | 465 | ||
@@ -627,6 +633,18 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, | |||
627 | bool flag_exist = flags & IPSET_FLAG_EXIST; | 633 | bool flag_exist = flags & IPSET_FLAG_EXIST; |
628 | u32 key, multi = 0; | 634 | u32 key, multi = 0; |
629 | 635 | ||
636 | if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) { | ||
637 | rcu_read_lock_bh(); | ||
638 | t = rcu_dereference_bh(h->table); | ||
639 | key = HKEY(value, h->initval, t->htable_bits); | ||
640 | n = hbucket(t,key); | ||
641 | if (n->pos) { | ||
642 | /* Choosing the first entry in the array to replace */ | ||
643 | j = 0; | ||
644 | goto reuse_slot; | ||
645 | } | ||
646 | rcu_read_unlock_bh(); | ||
647 | } | ||
630 | if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) | 648 | if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) |
631 | /* FIXME: when set is full, we slow down here */ | 649 | /* FIXME: when set is full, we slow down here */ |
632 | mtype_expire(set, h, NLEN(set->family), set->dsize); | 650 | mtype_expire(set, h, NLEN(set->family), set->dsize); |
@@ -908,6 +926,10 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) | |||
908 | nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) | 926 | nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) |
909 | goto nla_put_failure; | 927 | goto nla_put_failure; |
910 | #endif | 928 | #endif |
929 | #ifdef IP_SET_HASH_WITH_MARKMASK | ||
930 | if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) | ||
931 | goto nla_put_failure; | ||
932 | #endif | ||
911 | if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || | 933 | if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || |
912 | nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) | 934 | nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) |
913 | goto nla_put_failure; | 935 | goto nla_put_failure; |
@@ -1016,6 +1038,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, | |||
1016 | struct nlattr *tb[], u32 flags) | 1038 | struct nlattr *tb[], u32 flags) |
1017 | { | 1039 | { |
1018 | u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; | 1040 | u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; |
1041 | #ifdef IP_SET_HASH_WITH_MARKMASK | ||
1042 | u32 markmask; | ||
1043 | #endif | ||
1019 | u8 hbits; | 1044 | u8 hbits; |
1020 | #ifdef IP_SET_HASH_WITH_NETMASK | 1045 | #ifdef IP_SET_HASH_WITH_NETMASK |
1021 | u8 netmask; | 1046 | u8 netmask; |
@@ -1026,6 +1051,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, | |||
1026 | 1051 | ||
1027 | if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) | 1052 | if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) |
1028 | return -IPSET_ERR_INVALID_FAMILY; | 1053 | return -IPSET_ERR_INVALID_FAMILY; |
1054 | |||
1055 | #ifdef IP_SET_HASH_WITH_MARKMASK | ||
1056 | markmask = 0xffffffff; | ||
1057 | #endif | ||
1029 | #ifdef IP_SET_HASH_WITH_NETMASK | 1058 | #ifdef IP_SET_HASH_WITH_NETMASK |
1030 | netmask = set->family == NFPROTO_IPV4 ? 32 : 128; | 1059 | netmask = set->family == NFPROTO_IPV4 ? 32 : 128; |
1031 | pr_debug("Create set %s with family %s\n", | 1060 | pr_debug("Create set %s with family %s\n", |
@@ -1034,6 +1063,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, | |||
1034 | 1063 | ||
1035 | if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || | 1064 | if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || |
1036 | !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || | 1065 | !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || |
1066 | #ifdef IP_SET_HASH_WITH_MARKMASK | ||
1067 | !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) || | ||
1068 | #endif | ||
1037 | !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || | 1069 | !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || |
1038 | !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) | 1070 | !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) |
1039 | return -IPSET_ERR_PROTOCOL; | 1071 | return -IPSET_ERR_PROTOCOL; |
@@ -1057,6 +1089,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, | |||
1057 | return -IPSET_ERR_INVALID_NETMASK; | 1089 | return -IPSET_ERR_INVALID_NETMASK; |
1058 | } | 1090 | } |
1059 | #endif | 1091 | #endif |
1092 | #ifdef IP_SET_HASH_WITH_MARKMASK | ||
1093 | if (tb[IPSET_ATTR_MARKMASK]) { | ||
1094 | markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK])); | ||
1095 | |||
1096 | if ((markmask > 4294967295u) || markmask == 0) | ||
1097 | return -IPSET_ERR_INVALID_MARKMASK; | ||
1098 | } | ||
1099 | #endif | ||
1060 | 1100 | ||
1061 | hsize = sizeof(*h); | 1101 | hsize = sizeof(*h); |
1062 | #ifdef IP_SET_HASH_WITH_NETS | 1102 | #ifdef IP_SET_HASH_WITH_NETS |
@@ -1071,6 +1111,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, | |||
1071 | #ifdef IP_SET_HASH_WITH_NETMASK | 1111 | #ifdef IP_SET_HASH_WITH_NETMASK |
1072 | h->netmask = netmask; | 1112 | h->netmask = netmask; |
1073 | #endif | 1113 | #endif |
1114 | #ifdef IP_SET_HASH_WITH_MARKMASK | ||
1115 | h->markmask = markmask; | ||
1116 | #endif | ||
1074 | get_random_bytes(&h->initval, sizeof(h->initval)); | 1117 | get_random_bytes(&h->initval, sizeof(h->initval)); |
1075 | set->timeout = IPSET_NO_TIMEOUT; | 1118 | set->timeout = IPSET_NO_TIMEOUT; |
1076 | 1119 | ||
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index e65fc2423d56..dd40607f878e 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c | |||
@@ -25,7 +25,8 @@ | |||
25 | 25 | ||
26 | #define IPSET_TYPE_REV_MIN 0 | 26 | #define IPSET_TYPE_REV_MIN 0 |
27 | /* 1 Counters support */ | 27 | /* 1 Counters support */ |
28 | #define IPSET_TYPE_REV_MAX 2 /* Comments support */ | 28 | /* 2 Comments support */ |
29 | #define IPSET_TYPE_REV_MAX 3 /* Forceadd support */ | ||
29 | 30 | ||
30 | MODULE_LICENSE("GPL"); | 31 | MODULE_LICENSE("GPL"); |
31 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); | 32 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); |
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c new file mode 100644 index 000000000000..4eff0a297254 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c | |||
@@ -0,0 +1,321 @@ | |||
1 | /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | ||
2 | * Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | /* Kernel module implementing an IP set type: the hash:ip,mark type */ | ||
10 | |||
11 | #include <linux/jhash.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/skbuff.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/random.h> | ||
17 | #include <net/ip.h> | ||
18 | #include <net/ipv6.h> | ||
19 | #include <net/netlink.h> | ||
20 | #include <net/tcp.h> | ||
21 | |||
22 | #include <linux/netfilter.h> | ||
23 | #include <linux/netfilter/ipset/pfxlen.h> | ||
24 | #include <linux/netfilter/ipset/ip_set.h> | ||
25 | #include <linux/netfilter/ipset/ip_set_hash.h> | ||
26 | |||
27 | #define IPSET_TYPE_REV_MIN 0 | ||
28 | #define IPSET_TYPE_REV_MAX 1 /* Forceadd support */ | ||
29 | |||
30 | MODULE_LICENSE("GPL"); | ||
31 | MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>"); | ||
32 | IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); | ||
33 | MODULE_ALIAS("ip_set_hash:ip,mark"); | ||
34 | |||
35 | /* Type specific function prefix */ | ||
36 | #define HTYPE hash_ipmark | ||
37 | #define IP_SET_HASH_WITH_MARKMASK | ||
38 | |||
39 | /* IPv4 variant */ | ||
40 | |||
41 | /* Member elements */ | ||
42 | struct hash_ipmark4_elem { | ||
43 | __be32 ip; | ||
44 | __u32 mark; | ||
45 | }; | ||
46 | |||
47 | /* Common functions */ | ||
48 | |||
49 | static inline bool | ||
50 | hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1, | ||
51 | const struct hash_ipmark4_elem *ip2, | ||
52 | u32 *multi) | ||
53 | { | ||
54 | return ip1->ip == ip2->ip && | ||
55 | ip1->mark == ip2->mark; | ||
56 | } | ||
57 | |||
58 | static bool | ||
59 | hash_ipmark4_data_list(struct sk_buff *skb, | ||
60 | const struct hash_ipmark4_elem *data) | ||
61 | { | ||
62 | if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || | ||
63 | nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) | ||
64 | goto nla_put_failure; | ||
65 | return 0; | ||
66 | |||
67 | nla_put_failure: | ||
68 | return 1; | ||
69 | } | ||
70 | |||
71 | static inline void | ||
72 | hash_ipmark4_data_next(struct hash_ipmark4_elem *next, | ||
73 | const struct hash_ipmark4_elem *d) | ||
74 | { | ||
75 | next->ip = d->ip; | ||
76 | } | ||
77 | |||
78 | #define MTYPE hash_ipmark4 | ||
79 | #define PF 4 | ||
80 | #define HOST_MASK 32 | ||
81 | #define HKEY_DATALEN sizeof(struct hash_ipmark4_elem) | ||
82 | #include "ip_set_hash_gen.h" | ||
83 | |||
84 | static int | ||
85 | hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb, | ||
86 | const struct xt_action_param *par, | ||
87 | enum ipset_adt adt, struct ip_set_adt_opt *opt) | ||
88 | { | ||
89 | const struct hash_ipmark *h = set->data; | ||
90 | ipset_adtfn adtfn = set->variant->adt[adt]; | ||
91 | struct hash_ipmark4_elem e = { }; | ||
92 | struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); | ||
93 | |||
94 | e.mark = skb->mark; | ||
95 | e.mark &= h->markmask; | ||
96 | |||
97 | ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); | ||
98 | return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); | ||
99 | } | ||
100 | |||
101 | static int | ||
102 | hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], | ||
103 | enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) | ||
104 | { | ||
105 | const struct hash_ipmark *h = set->data; | ||
106 | ipset_adtfn adtfn = set->variant->adt[adt]; | ||
107 | struct hash_ipmark4_elem e = { }; | ||
108 | struct ip_set_ext ext = IP_SET_INIT_UEXT(set); | ||
109 | u32 ip, ip_to = 0; | ||
110 | int ret; | ||
111 | |||
112 | if (unlikely(!tb[IPSET_ATTR_IP] || | ||
113 | !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || | ||
114 | !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || | ||
115 | !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || | ||
116 | !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) | ||
117 | return -IPSET_ERR_PROTOCOL; | ||
118 | |||
119 | if (tb[IPSET_ATTR_LINENO]) | ||
120 | *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); | ||
121 | |||
122 | ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || | ||
123 | ip_set_get_extensions(set, tb, &ext); | ||
124 | if (ret) | ||
125 | return ret; | ||
126 | |||
127 | e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); | ||
128 | e.mark &= h->markmask; | ||
129 | |||
130 | if (adt == IPSET_TEST || | ||
131 | !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { | ||
132 | ret = adtfn(set, &e, &ext, &ext, flags); | ||
133 | return ip_set_eexist(ret, flags) ? 0 : ret; | ||
134 | } | ||
135 | |||
136 | ip_to = ip = ntohl(e.ip); | ||
137 | if (tb[IPSET_ATTR_IP_TO]) { | ||
138 | ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); | ||
139 | if (ret) | ||
140 | return ret; | ||
141 | if (ip > ip_to) | ||
142 | swap(ip, ip_to); | ||
143 | } else if (tb[IPSET_ATTR_CIDR]) { | ||
144 | u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); | ||
145 | |||
146 | if (!cidr || cidr > 32) | ||
147 | return -IPSET_ERR_INVALID_CIDR; | ||
148 | ip_set_mask_from_to(ip, ip_to, cidr); | ||
149 | } | ||
150 | |||
151 | if (retried) | ||
152 | ip = ntohl(h->next.ip); | ||
153 | for (; !before(ip_to, ip); ip++) { | ||
154 | e.ip = htonl(ip); | ||
155 | ret = adtfn(set, &e, &ext, &ext, flags); | ||
156 | |||
157 | if (ret && !ip_set_eexist(ret, flags)) | ||
158 | return ret; | ||
159 | else | ||
160 | ret = 0; | ||
161 | } | ||
162 | return ret; | ||
163 | } | ||
164 | |||
165 | /* IPv6 variant */ | ||
166 | |||
167 | struct hash_ipmark6_elem { | ||
168 | union nf_inet_addr ip; | ||
169 | __u32 mark; | ||
170 | }; | ||
171 | |||
172 | /* Common functions */ | ||
173 | |||
174 | static inline bool | ||
175 | hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1, | ||
176 | const struct hash_ipmark6_elem *ip2, | ||
177 | u32 *multi) | ||
178 | { | ||
179 | return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && | ||
180 | ip1->mark == ip2->mark; | ||
181 | } | ||
182 | |||
183 | static bool | ||
184 | hash_ipmark6_data_list(struct sk_buff *skb, | ||
185 | const struct hash_ipmark6_elem *data) | ||
186 | { | ||
187 | if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || | ||
188 | nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) | ||
189 | goto nla_put_failure; | ||
190 | return 0; | ||
191 | |||
192 | nla_put_failure: | ||
193 | return 1; | ||
194 | } | ||
195 | |||
196 | static inline void | ||
197 | hash_ipmark6_data_next(struct hash_ipmark4_elem *next, | ||
198 | const struct hash_ipmark6_elem *d) | ||
199 | { | ||
200 | } | ||
201 | |||
202 | #undef MTYPE | ||
203 | #undef PF | ||
204 | #undef HOST_MASK | ||
205 | #undef HKEY_DATALEN | ||
206 | |||
207 | #define MTYPE hash_ipmark6 | ||
208 | #define PF 6 | ||
209 | #define HOST_MASK 128 | ||
210 | #define HKEY_DATALEN sizeof(struct hash_ipmark6_elem) | ||
211 | #define IP_SET_EMIT_CREATE | ||
212 | #include "ip_set_hash_gen.h" | ||
213 | |||
214 | |||
215 | static int | ||
216 | hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, | ||
217 | const struct xt_action_param *par, | ||
218 | enum ipset_adt adt, struct ip_set_adt_opt *opt) | ||
219 | { | ||
220 | const struct hash_ipmark *h = set->data; | ||
221 | ipset_adtfn adtfn = set->variant->adt[adt]; | ||
222 | struct hash_ipmark6_elem e = { }; | ||
223 | struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); | ||
224 | |||
225 | e.mark = skb->mark; | ||
226 | e.mark &= h->markmask; | ||
227 | |||
228 | ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); | ||
229 | return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); | ||
230 | } | ||
231 | |||
232 | static int | ||
233 | hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], | ||
234 | enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) | ||
235 | { | ||
236 | const struct hash_ipmark *h = set->data; | ||
237 | ipset_adtfn adtfn = set->variant->adt[adt]; | ||
238 | struct hash_ipmark6_elem e = { }; | ||
239 | struct ip_set_ext ext = IP_SET_INIT_UEXT(set); | ||
240 | int ret; | ||
241 | |||
242 | if (unlikely(!tb[IPSET_ATTR_IP] || | ||
243 | !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || | ||
244 | !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || | ||
245 | !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || | ||
246 | !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || | ||
247 | tb[IPSET_ATTR_IP_TO] || | ||
248 | tb[IPSET_ATTR_CIDR])) | ||
249 | return -IPSET_ERR_PROTOCOL; | ||
250 | |||
251 | if (tb[IPSET_ATTR_LINENO]) | ||
252 | *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); | ||
253 | |||
254 | ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || | ||
255 | ip_set_get_extensions(set, tb, &ext); | ||
256 | if (ret) | ||
257 | return ret; | ||
258 | |||
259 | e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); | ||
260 | e.mark &= h->markmask; | ||
261 | |||
262 | if (adt == IPSET_TEST) { | ||
263 | ret = adtfn(set, &e, &ext, &ext, flags); | ||
264 | return ip_set_eexist(ret, flags) ? 0 : ret; | ||
265 | } | ||
266 | |||
267 | ret = adtfn(set, &e, &ext, &ext, flags); | ||
268 | if (ret && !ip_set_eexist(ret, flags)) | ||
269 | return ret; | ||
270 | else | ||
271 | ret = 0; | ||
272 | |||
273 | return ret; | ||
274 | } | ||
275 | |||
276 | static struct ip_set_type hash_ipmark_type __read_mostly = { | ||
277 | .name = "hash:ip,mark", | ||
278 | .protocol = IPSET_PROTOCOL, | ||
279 | .features = IPSET_TYPE_IP | IPSET_TYPE_MARK, | ||
280 | .dimension = IPSET_DIM_TWO, | ||
281 | .family = NFPROTO_UNSPEC, | ||
282 | .revision_min = IPSET_TYPE_REV_MIN, | ||
283 | .revision_max = IPSET_TYPE_REV_MAX, | ||
284 | .create = hash_ipmark_create, | ||
285 | .create_policy = { | ||
286 | [IPSET_ATTR_MARKMASK] = { .type = NLA_U32 }, | ||
287 | [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, | ||
288 | [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, | ||
289 | [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, | ||
290 | [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, | ||
291 | [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, | ||
292 | [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, | ||
293 | }, | ||
294 | .adt_policy = { | ||
295 | [IPSET_ATTR_IP] = { .type = NLA_NESTED }, | ||
296 | [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, | ||
297 | [IPSET_ATTR_MARK] = { .type = NLA_U32 }, | ||
298 | [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, | ||
299 | [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, | ||
300 | [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, | ||
301 | [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, | ||
302 | [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, | ||
303 | [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, | ||
304 | }, | ||
305 | .me = THIS_MODULE, | ||
306 | }; | ||
307 | |||
308 | static int __init | ||
309 | hash_ipmark_init(void) | ||
310 | { | ||
311 | return ip_set_type_register(&hash_ipmark_type); | ||
312 | } | ||
313 | |||
314 | static void __exit | ||
315 | hash_ipmark_fini(void) | ||
316 | { | ||
317 | ip_set_type_unregister(&hash_ipmark_type); | ||
318 | } | ||
319 | |||
320 | module_init(hash_ipmark_init); | ||
321 | module_exit(hash_ipmark_fini); | ||
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 525a595dd1fe..7597b82a8b03 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c | |||
@@ -27,7 +27,8 @@ | |||
27 | #define IPSET_TYPE_REV_MIN 0 | 27 | #define IPSET_TYPE_REV_MIN 0 |
28 | /* 1 SCTP and UDPLITE support added */ | 28 | /* 1 SCTP and UDPLITE support added */ |
29 | /* 2 Counters support added */ | 29 | /* 2 Counters support added */ |
30 | #define IPSET_TYPE_REV_MAX 3 /* Comments support added */ | 30 | /* 3 Comments support added */ |
31 | #define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */ | ||
31 | 32 | ||
32 | MODULE_LICENSE("GPL"); | 33 | MODULE_LICENSE("GPL"); |
33 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); | 34 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); |
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index f5636631466e..672655ffd573 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c | |||
@@ -27,7 +27,8 @@ | |||
27 | #define IPSET_TYPE_REV_MIN 0 | 27 | #define IPSET_TYPE_REV_MIN 0 |
28 | /* 1 SCTP and UDPLITE support added */ | 28 | /* 1 SCTP and UDPLITE support added */ |
29 | /* 2 Counters support added */ | 29 | /* 2 Counters support added */ |
30 | #define IPSET_TYPE_REV_MAX 3 /* Comments support added */ | 30 | /* 3 Comments support added */ |
31 | #define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */ | ||
31 | 32 | ||
32 | MODULE_LICENSE("GPL"); | 33 | MODULE_LICENSE("GPL"); |
33 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); | 34 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); |
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5d87fe8a41ff..7308d84f9277 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c | |||
@@ -29,7 +29,8 @@ | |||
29 | /* 2 Range as input support for IPv4 added */ | 29 | /* 2 Range as input support for IPv4 added */ |
30 | /* 3 nomatch flag support added */ | 30 | /* 3 nomatch flag support added */ |
31 | /* 4 Counters support added */ | 31 | /* 4 Counters support added */ |
32 | #define IPSET_TYPE_REV_MAX 5 /* Comments support added */ | 32 | /* 5 Comments support added */ |
33 | #define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */ | ||
33 | 34 | ||
34 | MODULE_LICENSE("GPL"); | 35 | MODULE_LICENSE("GPL"); |
35 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); | 36 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); |
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 8295cf4f9fdc..4c7d495783a3 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c | |||
@@ -26,7 +26,8 @@ | |||
26 | /* 1 Range as input support for IPv4 added */ | 26 | /* 1 Range as input support for IPv4 added */ |
27 | /* 2 nomatch flag support added */ | 27 | /* 2 nomatch flag support added */ |
28 | /* 3 Counters support added */ | 28 | /* 3 Counters support added */ |
29 | #define IPSET_TYPE_REV_MAX 4 /* Comments support added */ | 29 | /* 4 Comments support added */ |
30 | #define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */ | ||
30 | 31 | ||
31 | MODULE_LICENSE("GPL"); | 32 | MODULE_LICENSE("GPL"); |
32 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); | 33 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); |
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index b827a0f1f351..db2606805b35 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c | |||
@@ -27,7 +27,8 @@ | |||
27 | /* 1 nomatch flag support added */ | 27 | /* 1 nomatch flag support added */ |
28 | /* 2 /0 support added */ | 28 | /* 2 /0 support added */ |
29 | /* 3 Counters support added */ | 29 | /* 3 Counters support added */ |
30 | #define IPSET_TYPE_REV_MAX 4 /* Comments support added */ | 30 | /* 4 Comments support added */ |
31 | #define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */ | ||
31 | 32 | ||
32 | MODULE_LICENSE("GPL"); | 33 | MODULE_LICENSE("GPL"); |
33 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); | 34 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); |
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 6226803fc490..3e99987e4bf2 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <linux/netfilter/ipset/ip_set_hash.h> | 24 | #include <linux/netfilter/ipset/ip_set_hash.h> |
25 | 25 | ||
26 | #define IPSET_TYPE_REV_MIN 0 | 26 | #define IPSET_TYPE_REV_MIN 0 |
27 | #define IPSET_TYPE_REV_MAX 0 | 27 | #define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */ |
28 | 28 | ||
29 | MODULE_LICENSE("GPL"); | 29 | MODULE_LICENSE("GPL"); |
30 | MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); | 30 | MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); |
@@ -112,10 +112,10 @@ hash_netnet4_data_list(struct sk_buff *skb, | |||
112 | (flags && | 112 | (flags && |
113 | nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) | 113 | nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) |
114 | goto nla_put_failure; | 114 | goto nla_put_failure; |
115 | return 0; | 115 | return false; |
116 | 116 | ||
117 | nla_put_failure: | 117 | nla_put_failure: |
118 | return 1; | 118 | return true; |
119 | } | 119 | } |
120 | 120 | ||
121 | static inline void | 121 | static inline void |
@@ -334,10 +334,10 @@ hash_netnet6_data_list(struct sk_buff *skb, | |||
334 | (flags && | 334 | (flags && |
335 | nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) | 335 | nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) |
336 | goto nla_put_failure; | 336 | goto nla_put_failure; |
337 | return 0; | 337 | return false; |
338 | 338 | ||
339 | nla_put_failure: | 339 | nla_put_failure: |
340 | return 1; | 340 | return true; |
341 | } | 341 | } |
342 | 342 | ||
343 | static inline void | 343 | static inline void |
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7097fb0141bf..1c645fbd09c7 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c | |||
@@ -28,7 +28,8 @@ | |||
28 | /* 2 Range as input support for IPv4 added */ | 28 | /* 2 Range as input support for IPv4 added */ |
29 | /* 3 nomatch flag support added */ | 29 | /* 3 nomatch flag support added */ |
30 | /* 4 Counters support added */ | 30 | /* 4 Counters support added */ |
31 | #define IPSET_TYPE_REV_MAX 5 /* Comments support added */ | 31 | /* 5 Comments support added */ |
32 | #define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */ | ||
32 | 33 | ||
33 | MODULE_LICENSE("GPL"); | 34 | MODULE_LICENSE("GPL"); |
34 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); | 35 | MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); |
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 703d1192a6a2..c0d2ba73f8b2 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c | |||
@@ -25,7 +25,8 @@ | |||
25 | #include <linux/netfilter/ipset/ip_set_hash.h> | 25 | #include <linux/netfilter/ipset/ip_set_hash.h> |
26 | 26 | ||
27 | #define IPSET_TYPE_REV_MIN 0 | 27 | #define IPSET_TYPE_REV_MIN 0 |
28 | #define IPSET_TYPE_REV_MAX 0 /* Comments support added */ | 28 | /* 0 Comments support added */ |
29 | #define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */ | ||
29 | 30 | ||
30 | MODULE_LICENSE("GPL"); | 31 | MODULE_LICENSE("GPL"); |
31 | MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); | 32 | MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); |
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 4f29fa97044b..04d15fdc99ee 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c | |||
@@ -7,8 +7,8 @@ | |||
7 | 7 | ||
8 | #define E(a, b, c, d) \ | 8 | #define E(a, b, c, d) \ |
9 | {.ip6 = { \ | 9 | {.ip6 = { \ |
10 | __constant_htonl(a), __constant_htonl(b), \ | 10 | htonl(a), htonl(b), \ |
11 | __constant_htonl(c), __constant_htonl(d), \ | 11 | htonl(c), htonl(d), \ |
12 | } } | 12 | } } |
13 | 13 | ||
14 | /* | 14 | /* |
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d6d75841352a..c42e83d2751c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -3580,7 +3580,7 @@ out: | |||
3580 | } | 3580 | } |
3581 | 3581 | ||
3582 | 3582 | ||
3583 | static const struct genl_ops ip_vs_genl_ops[] __read_mostly = { | 3583 | static const struct genl_ops ip_vs_genl_ops[] = { |
3584 | { | 3584 | { |
3585 | .cmd = IPVS_CMD_NEW_SERVICE, | 3585 | .cmd = IPVS_CMD_NEW_SERVICE, |
3586 | .flags = GENL_ADMIN_PERM, | 3586 | .flags = GENL_ADMIN_PERM, |
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index ca056a331e60..547ff33c1efd 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c | |||
@@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) | |||
238 | 238 | ||
239 | spin_lock_bh(&svc->sched_lock); | 239 | spin_lock_bh(&svc->sched_lock); |
240 | tbl->dead = 1; | 240 | tbl->dead = 1; |
241 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { | 241 | for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) { |
242 | hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { | 242 | hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { |
243 | ip_vs_lblc_del(en); | 243 | ip_vs_lblc_del(en); |
244 | atomic_dec(&tbl->entries); | 244 | atomic_dec(&tbl->entries); |
@@ -265,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) | |||
265 | unsigned long now = jiffies; | 265 | unsigned long now = jiffies; |
266 | int i, j; | 266 | int i, j; |
267 | 267 | ||
268 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { | 268 | for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) { |
269 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; | 269 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; |
270 | 270 | ||
271 | spin_lock(&svc->sched_lock); | 271 | spin_lock(&svc->sched_lock); |
@@ -321,7 +321,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) | |||
321 | if (goal > tbl->max_size/2) | 321 | if (goal > tbl->max_size/2) |
322 | goal = tbl->max_size/2; | 322 | goal = tbl->max_size/2; |
323 | 323 | ||
324 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { | 324 | for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) { |
325 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; | 325 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; |
326 | 326 | ||
327 | spin_lock(&svc->sched_lock); | 327 | spin_lock(&svc->sched_lock); |
@@ -340,7 +340,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) | |||
340 | tbl->rover = j; | 340 | tbl->rover = j; |
341 | 341 | ||
342 | out: | 342 | out: |
343 | mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL); | 343 | mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); |
344 | } | 344 | } |
345 | 345 | ||
346 | 346 | ||
@@ -363,7 +363,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) | |||
363 | /* | 363 | /* |
364 | * Initialize the hash buckets | 364 | * Initialize the hash buckets |
365 | */ | 365 | */ |
366 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { | 366 | for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) { |
367 | INIT_HLIST_HEAD(&tbl->bucket[i]); | 367 | INIT_HLIST_HEAD(&tbl->bucket[i]); |
368 | } | 368 | } |
369 | tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; | 369 | tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; |
@@ -536,8 +536,7 @@ out: | |||
536 | /* | 536 | /* |
537 | * IPVS LBLC Scheduler structure | 537 | * IPVS LBLC Scheduler structure |
538 | */ | 538 | */ |
539 | static struct ip_vs_scheduler ip_vs_lblc_scheduler = | 539 | static struct ip_vs_scheduler ip_vs_lblc_scheduler = { |
540 | { | ||
541 | .name = "lblc", | 540 | .name = "lblc", |
542 | .refcnt = ATOMIC_INIT(0), | 541 | .refcnt = ATOMIC_INIT(0), |
543 | .module = THIS_MODULE, | 542 | .module = THIS_MODULE, |
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 356bef519fe5..5d1e7d126ebd 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -60,8 +60,59 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, | |||
60 | const struct nlattr *attr) __read_mostly; | 60 | const struct nlattr *attr) __read_mostly; |
61 | EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); | 61 | EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); |
62 | 62 | ||
63 | DEFINE_SPINLOCK(nf_conntrack_lock); | 63 | __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; |
64 | EXPORT_SYMBOL_GPL(nf_conntrack_lock); | 64 | EXPORT_SYMBOL_GPL(nf_conntrack_locks); |
65 | |||
66 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); | ||
67 | EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); | ||
68 | |||
69 | static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) | ||
70 | { | ||
71 | h1 %= CONNTRACK_LOCKS; | ||
72 | h2 %= CONNTRACK_LOCKS; | ||
73 | spin_unlock(&nf_conntrack_locks[h1]); | ||
74 | if (h1 != h2) | ||
75 | spin_unlock(&nf_conntrack_locks[h2]); | ||
76 | } | ||
77 | |||
78 | /* return true if we need to recompute hashes (in case hash table was resized) */ | ||
79 | static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, | ||
80 | unsigned int h2, unsigned int sequence) | ||
81 | { | ||
82 | h1 %= CONNTRACK_LOCKS; | ||
83 | h2 %= CONNTRACK_LOCKS; | ||
84 | if (h1 <= h2) { | ||
85 | spin_lock(&nf_conntrack_locks[h1]); | ||
86 | if (h1 != h2) | ||
87 | spin_lock_nested(&nf_conntrack_locks[h2], | ||
88 | SINGLE_DEPTH_NESTING); | ||
89 | } else { | ||
90 | spin_lock(&nf_conntrack_locks[h2]); | ||
91 | spin_lock_nested(&nf_conntrack_locks[h1], | ||
92 | SINGLE_DEPTH_NESTING); | ||
93 | } | ||
94 | if (read_seqcount_retry(&net->ct.generation, sequence)) { | ||
95 | nf_conntrack_double_unlock(h1, h2); | ||
96 | return true; | ||
97 | } | ||
98 | return false; | ||
99 | } | ||
100 | |||
101 | static void nf_conntrack_all_lock(void) | ||
102 | { | ||
103 | int i; | ||
104 | |||
105 | for (i = 0; i < CONNTRACK_LOCKS; i++) | ||
106 | spin_lock_nested(&nf_conntrack_locks[i], i); | ||
107 | } | ||
108 | |||
109 | static void nf_conntrack_all_unlock(void) | ||
110 | { | ||
111 | int i; | ||
112 | |||
113 | for (i = 0; i < CONNTRACK_LOCKS; i++) | ||
114 | spin_unlock(&nf_conntrack_locks[i]); | ||
115 | } | ||
65 | 116 | ||
66 | unsigned int nf_conntrack_htable_size __read_mostly; | 117 | unsigned int nf_conntrack_htable_size __read_mostly; |
67 | EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); | 118 | EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); |
@@ -192,6 +243,50 @@ clean_from_lists(struct nf_conn *ct) | |||
192 | nf_ct_remove_expectations(ct); | 243 | nf_ct_remove_expectations(ct); |
193 | } | 244 | } |
194 | 245 | ||
246 | /* must be called with local_bh_disable */ | ||
247 | static void nf_ct_add_to_dying_list(struct nf_conn *ct) | ||
248 | { | ||
249 | struct ct_pcpu *pcpu; | ||
250 | |||
251 | /* add this conntrack to the (per cpu) dying list */ | ||
252 | ct->cpu = smp_processor_id(); | ||
253 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); | ||
254 | |||
255 | spin_lock(&pcpu->lock); | ||
256 | hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | ||
257 | &pcpu->dying); | ||
258 | spin_unlock(&pcpu->lock); | ||
259 | } | ||
260 | |||
261 | /* must be called with local_bh_disable */ | ||
262 | static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) | ||
263 | { | ||
264 | struct ct_pcpu *pcpu; | ||
265 | |||
266 | /* add this conntrack to the (per cpu) unconfirmed list */ | ||
267 | ct->cpu = smp_processor_id(); | ||
268 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); | ||
269 | |||
270 | spin_lock(&pcpu->lock); | ||
271 | hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | ||
272 | &pcpu->unconfirmed); | ||
273 | spin_unlock(&pcpu->lock); | ||
274 | } | ||
275 | |||
276 | /* must be called with local_bh_disable */ | ||
277 | static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) | ||
278 | { | ||
279 | struct ct_pcpu *pcpu; | ||
280 | |||
281 | /* We overload first tuple to link into unconfirmed or dying list.*/ | ||
282 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); | ||
283 | |||
284 | spin_lock(&pcpu->lock); | ||
285 | BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); | ||
286 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); | ||
287 | spin_unlock(&pcpu->lock); | ||
288 | } | ||
289 | |||
195 | static void | 290 | static void |
196 | destroy_conntrack(struct nf_conntrack *nfct) | 291 | destroy_conntrack(struct nf_conntrack *nfct) |
197 | { | 292 | { |
@@ -203,9 +298,6 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
203 | NF_CT_ASSERT(atomic_read(&nfct->use) == 0); | 298 | NF_CT_ASSERT(atomic_read(&nfct->use) == 0); |
204 | NF_CT_ASSERT(!timer_pending(&ct->timeout)); | 299 | NF_CT_ASSERT(!timer_pending(&ct->timeout)); |
205 | 300 | ||
206 | /* To make sure we don't get any weird locking issues here: | ||
207 | * destroy_conntrack() MUST NOT be called with a write lock | ||
208 | * to nf_conntrack_lock!!! -HW */ | ||
209 | rcu_read_lock(); | 301 | rcu_read_lock(); |
210 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); | 302 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); |
211 | if (l4proto && l4proto->destroy) | 303 | if (l4proto && l4proto->destroy) |
@@ -213,19 +305,18 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
213 | 305 | ||
214 | rcu_read_unlock(); | 306 | rcu_read_unlock(); |
215 | 307 | ||
216 | spin_lock_bh(&nf_conntrack_lock); | 308 | local_bh_disable(); |
217 | /* Expectations will have been removed in clean_from_lists, | 309 | /* Expectations will have been removed in clean_from_lists, |
218 | * except TFTP can create an expectation on the first packet, | 310 | * except TFTP can create an expectation on the first packet, |
219 | * before connection is in the list, so we need to clean here, | 311 | * before connection is in the list, so we need to clean here, |
220 | * too. */ | 312 | * too. |
313 | */ | ||
221 | nf_ct_remove_expectations(ct); | 314 | nf_ct_remove_expectations(ct); |
222 | 315 | ||
223 | /* We overload first tuple to link into unconfirmed or dying list.*/ | 316 | nf_ct_del_from_dying_or_unconfirmed_list(ct); |
224 | BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); | ||
225 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); | ||
226 | 317 | ||
227 | NF_CT_STAT_INC(net, delete); | 318 | NF_CT_STAT_INC(net, delete); |
228 | spin_unlock_bh(&nf_conntrack_lock); | 319 | local_bh_enable(); |
229 | 320 | ||
230 | if (ct->master) | 321 | if (ct->master) |
231 | nf_ct_put(ct->master); | 322 | nf_ct_put(ct->master); |
@@ -237,17 +328,28 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
237 | static void nf_ct_delete_from_lists(struct nf_conn *ct) | 328 | static void nf_ct_delete_from_lists(struct nf_conn *ct) |
238 | { | 329 | { |
239 | struct net *net = nf_ct_net(ct); | 330 | struct net *net = nf_ct_net(ct); |
331 | unsigned int hash, reply_hash; | ||
332 | u16 zone = nf_ct_zone(ct); | ||
333 | unsigned int sequence; | ||
240 | 334 | ||
241 | nf_ct_helper_destroy(ct); | 335 | nf_ct_helper_destroy(ct); |
242 | spin_lock_bh(&nf_conntrack_lock); | 336 | |
243 | /* Inside lock so preempt is disabled on module removal path. | 337 | local_bh_disable(); |
244 | * Otherwise we can get spurious warnings. */ | 338 | do { |
245 | NF_CT_STAT_INC(net, delete_list); | 339 | sequence = read_seqcount_begin(&net->ct.generation); |
340 | hash = hash_conntrack(net, zone, | ||
341 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
342 | reply_hash = hash_conntrack(net, zone, | ||
343 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
344 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); | ||
345 | |||
246 | clean_from_lists(ct); | 346 | clean_from_lists(ct); |
247 | /* add this conntrack to the dying list */ | 347 | nf_conntrack_double_unlock(hash, reply_hash); |
248 | hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | 348 | |
249 | &net->ct.dying); | 349 | nf_ct_add_to_dying_list(ct); |
250 | spin_unlock_bh(&nf_conntrack_lock); | 350 | |
351 | NF_CT_STAT_INC(net, delete_list); | ||
352 | local_bh_enable(); | ||
251 | } | 353 | } |
252 | 354 | ||
253 | static void death_by_event(unsigned long ul_conntrack) | 355 | static void death_by_event(unsigned long ul_conntrack) |
@@ -331,8 +433,6 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, | |||
331 | * Warning : | 433 | * Warning : |
332 | * - Caller must take a reference on returned object | 434 | * - Caller must take a reference on returned object |
333 | * and recheck nf_ct_tuple_equal(tuple, &h->tuple) | 435 | * and recheck nf_ct_tuple_equal(tuple, &h->tuple) |
334 | * OR | ||
335 | * - Caller must lock nf_conntrack_lock before calling this function | ||
336 | */ | 436 | */ |
337 | static struct nf_conntrack_tuple_hash * | 437 | static struct nf_conntrack_tuple_hash * |
338 | ____nf_conntrack_find(struct net *net, u16 zone, | 438 | ____nf_conntrack_find(struct net *net, u16 zone, |
@@ -408,32 +508,36 @@ EXPORT_SYMBOL_GPL(nf_conntrack_find_get); | |||
408 | 508 | ||
409 | static void __nf_conntrack_hash_insert(struct nf_conn *ct, | 509 | static void __nf_conntrack_hash_insert(struct nf_conn *ct, |
410 | unsigned int hash, | 510 | unsigned int hash, |
411 | unsigned int repl_hash) | 511 | unsigned int reply_hash) |
412 | { | 512 | { |
413 | struct net *net = nf_ct_net(ct); | 513 | struct net *net = nf_ct_net(ct); |
414 | 514 | ||
415 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | 515 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, |
416 | &net->ct.hash[hash]); | 516 | &net->ct.hash[hash]); |
417 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, | 517 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, |
418 | &net->ct.hash[repl_hash]); | 518 | &net->ct.hash[reply_hash]); |
419 | } | 519 | } |
420 | 520 | ||
421 | int | 521 | int |
422 | nf_conntrack_hash_check_insert(struct nf_conn *ct) | 522 | nf_conntrack_hash_check_insert(struct nf_conn *ct) |
423 | { | 523 | { |
424 | struct net *net = nf_ct_net(ct); | 524 | struct net *net = nf_ct_net(ct); |
425 | unsigned int hash, repl_hash; | 525 | unsigned int hash, reply_hash; |
426 | struct nf_conntrack_tuple_hash *h; | 526 | struct nf_conntrack_tuple_hash *h; |
427 | struct hlist_nulls_node *n; | 527 | struct hlist_nulls_node *n; |
428 | u16 zone; | 528 | u16 zone; |
529 | unsigned int sequence; | ||
429 | 530 | ||
430 | zone = nf_ct_zone(ct); | 531 | zone = nf_ct_zone(ct); |
431 | hash = hash_conntrack(net, zone, | ||
432 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
433 | repl_hash = hash_conntrack(net, zone, | ||
434 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
435 | 532 | ||
436 | spin_lock_bh(&nf_conntrack_lock); | 533 | local_bh_disable(); |
534 | do { | ||
535 | sequence = read_seqcount_begin(&net->ct.generation); | ||
536 | hash = hash_conntrack(net, zone, | ||
537 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
538 | reply_hash = hash_conntrack(net, zone, | ||
539 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
540 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); | ||
437 | 541 | ||
438 | /* See if there's one in the list already, including reverse */ | 542 | /* See if there's one in the list already, including reverse */ |
439 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) | 543 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) |
@@ -441,7 +545,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) | |||
441 | &h->tuple) && | 545 | &h->tuple) && |
442 | zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) | 546 | zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) |
443 | goto out; | 547 | goto out; |
444 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) | 548 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) |
445 | if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, | 549 | if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, |
446 | &h->tuple) && | 550 | &h->tuple) && |
447 | zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) | 551 | zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) |
@@ -451,15 +555,16 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) | |||
451 | smp_wmb(); | 555 | smp_wmb(); |
452 | /* The caller holds a reference to this object */ | 556 | /* The caller holds a reference to this object */ |
453 | atomic_set(&ct->ct_general.use, 2); | 557 | atomic_set(&ct->ct_general.use, 2); |
454 | __nf_conntrack_hash_insert(ct, hash, repl_hash); | 558 | __nf_conntrack_hash_insert(ct, hash, reply_hash); |
559 | nf_conntrack_double_unlock(hash, reply_hash); | ||
455 | NF_CT_STAT_INC(net, insert); | 560 | NF_CT_STAT_INC(net, insert); |
456 | spin_unlock_bh(&nf_conntrack_lock); | 561 | local_bh_enable(); |
457 | |||
458 | return 0; | 562 | return 0; |
459 | 563 | ||
460 | out: | 564 | out: |
565 | nf_conntrack_double_unlock(hash, reply_hash); | ||
461 | NF_CT_STAT_INC(net, insert_failed); | 566 | NF_CT_STAT_INC(net, insert_failed); |
462 | spin_unlock_bh(&nf_conntrack_lock); | 567 | local_bh_enable(); |
463 | return -EEXIST; | 568 | return -EEXIST; |
464 | } | 569 | } |
465 | EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); | 570 | EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); |
@@ -467,15 +572,22 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); | |||
467 | /* deletion from this larval template list happens via nf_ct_put() */ | 572 | /* deletion from this larval template list happens via nf_ct_put() */ |
468 | void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) | 573 | void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) |
469 | { | 574 | { |
575 | struct ct_pcpu *pcpu; | ||
576 | |||
470 | __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); | 577 | __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); |
471 | __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); | 578 | __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); |
472 | nf_conntrack_get(&tmpl->ct_general); | 579 | nf_conntrack_get(&tmpl->ct_general); |
473 | 580 | ||
474 | spin_lock_bh(&nf_conntrack_lock); | 581 | /* add this conntrack to the (per cpu) tmpl list */ |
582 | local_bh_disable(); | ||
583 | tmpl->cpu = smp_processor_id(); | ||
584 | pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu); | ||
585 | |||
586 | spin_lock(&pcpu->lock); | ||
475 | /* Overload tuple linked list to put us in template list. */ | 587 | /* Overload tuple linked list to put us in template list. */ |
476 | hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | 588 | hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, |
477 | &net->ct.tmpl); | 589 | &pcpu->tmpl); |
478 | spin_unlock_bh(&nf_conntrack_lock); | 590 | spin_unlock_bh(&pcpu->lock); |
479 | } | 591 | } |
480 | EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); | 592 | EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); |
481 | 593 | ||
@@ -483,7 +595,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); | |||
483 | int | 595 | int |
484 | __nf_conntrack_confirm(struct sk_buff *skb) | 596 | __nf_conntrack_confirm(struct sk_buff *skb) |
485 | { | 597 | { |
486 | unsigned int hash, repl_hash; | 598 | unsigned int hash, reply_hash; |
487 | struct nf_conntrack_tuple_hash *h; | 599 | struct nf_conntrack_tuple_hash *h; |
488 | struct nf_conn *ct; | 600 | struct nf_conn *ct; |
489 | struct nf_conn_help *help; | 601 | struct nf_conn_help *help; |
@@ -492,6 +604,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
492 | enum ip_conntrack_info ctinfo; | 604 | enum ip_conntrack_info ctinfo; |
493 | struct net *net; | 605 | struct net *net; |
494 | u16 zone; | 606 | u16 zone; |
607 | unsigned int sequence; | ||
495 | 608 | ||
496 | ct = nf_ct_get(skb, &ctinfo); | 609 | ct = nf_ct_get(skb, &ctinfo); |
497 | net = nf_ct_net(ct); | 610 | net = nf_ct_net(ct); |
@@ -504,31 +617,37 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
504 | return NF_ACCEPT; | 617 | return NF_ACCEPT; |
505 | 618 | ||
506 | zone = nf_ct_zone(ct); | 619 | zone = nf_ct_zone(ct); |
507 | /* reuse the hash saved before */ | 620 | local_bh_disable(); |
508 | hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; | 621 | |
509 | hash = hash_bucket(hash, net); | 622 | do { |
510 | repl_hash = hash_conntrack(net, zone, | 623 | sequence = read_seqcount_begin(&net->ct.generation); |
511 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | 624 | /* reuse the hash saved before */ |
625 | hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; | ||
626 | hash = hash_bucket(hash, net); | ||
627 | reply_hash = hash_conntrack(net, zone, | ||
628 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
629 | |||
630 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); | ||
512 | 631 | ||
513 | /* We're not in hash table, and we refuse to set up related | 632 | /* We're not in hash table, and we refuse to set up related |
514 | connections for unconfirmed conns. But packet copies and | 633 | * connections for unconfirmed conns. But packet copies and |
515 | REJECT will give spurious warnings here. */ | 634 | * REJECT will give spurious warnings here. |
635 | */ | ||
516 | /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ | 636 | /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ |
517 | 637 | ||
518 | /* No external references means no one else could have | 638 | /* No external references means no one else could have |
519 | confirmed us. */ | 639 | * confirmed us. |
640 | */ | ||
520 | NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); | 641 | NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); |
521 | pr_debug("Confirming conntrack %p\n", ct); | 642 | pr_debug("Confirming conntrack %p\n", ct); |
522 | |||
523 | spin_lock_bh(&nf_conntrack_lock); | ||
524 | |||
525 | /* We have to check the DYING flag inside the lock to prevent | 643 | /* We have to check the DYING flag inside the lock to prevent |
526 | a race against nf_ct_get_next_corpse() possibly called from | 644 | a race against nf_ct_get_next_corpse() possibly called from |
527 | user context, else we insert an already 'dead' hash, blocking | 645 | user context, else we insert an already 'dead' hash, blocking |
528 | further use of that particular connection -JM */ | 646 | further use of that particular connection -JM */ |
529 | 647 | ||
530 | if (unlikely(nf_ct_is_dying(ct))) { | 648 | if (unlikely(nf_ct_is_dying(ct))) { |
531 | spin_unlock_bh(&nf_conntrack_lock); | 649 | nf_conntrack_double_unlock(hash, reply_hash); |
650 | local_bh_enable(); | ||
532 | return NF_ACCEPT; | 651 | return NF_ACCEPT; |
533 | } | 652 | } |
534 | 653 | ||
@@ -540,14 +659,13 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
540 | &h->tuple) && | 659 | &h->tuple) && |
541 | zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) | 660 | zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) |
542 | goto out; | 661 | goto out; |
543 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) | 662 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) |
544 | if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, | 663 | if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, |
545 | &h->tuple) && | 664 | &h->tuple) && |
546 | zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) | 665 | zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) |
547 | goto out; | 666 | goto out; |
548 | 667 | ||
549 | /* Remove from unconfirmed list */ | 668 | nf_ct_del_from_dying_or_unconfirmed_list(ct); |
550 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); | ||
551 | 669 | ||
552 | /* Timer relative to confirmation time, not original | 670 | /* Timer relative to confirmation time, not original |
553 | setting time, otherwise we'd get timer wrap in | 671 | setting time, otherwise we'd get timer wrap in |
@@ -570,9 +688,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
570 | * guarantee that no other CPU can find the conntrack before the above | 688 | * guarantee that no other CPU can find the conntrack before the above |
571 | * stores are visible. | 689 | * stores are visible. |
572 | */ | 690 | */ |
573 | __nf_conntrack_hash_insert(ct, hash, repl_hash); | 691 | __nf_conntrack_hash_insert(ct, hash, reply_hash); |
692 | nf_conntrack_double_unlock(hash, reply_hash); | ||
574 | NF_CT_STAT_INC(net, insert); | 693 | NF_CT_STAT_INC(net, insert); |
575 | spin_unlock_bh(&nf_conntrack_lock); | 694 | local_bh_enable(); |
576 | 695 | ||
577 | help = nfct_help(ct); | 696 | help = nfct_help(ct); |
578 | if (help && help->helper) | 697 | if (help && help->helper) |
@@ -583,8 +702,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
583 | return NF_ACCEPT; | 702 | return NF_ACCEPT; |
584 | 703 | ||
585 | out: | 704 | out: |
705 | nf_conntrack_double_unlock(hash, reply_hash); | ||
586 | NF_CT_STAT_INC(net, insert_failed); | 706 | NF_CT_STAT_INC(net, insert_failed); |
587 | spin_unlock_bh(&nf_conntrack_lock); | 707 | local_bh_enable(); |
588 | return NF_DROP; | 708 | return NF_DROP; |
589 | } | 709 | } |
590 | EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); | 710 | EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); |
@@ -627,39 +747,48 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); | |||
627 | 747 | ||
628 | /* There's a small race here where we may free a just-assured | 748 | /* There's a small race here where we may free a just-assured |
629 | connection. Too bad: we're in trouble anyway. */ | 749 | connection. Too bad: we're in trouble anyway. */ |
630 | static noinline int early_drop(struct net *net, unsigned int hash) | 750 | static noinline int early_drop(struct net *net, unsigned int _hash) |
631 | { | 751 | { |
632 | /* Use oldest entry, which is roughly LRU */ | 752 | /* Use oldest entry, which is roughly LRU */ |
633 | struct nf_conntrack_tuple_hash *h; | 753 | struct nf_conntrack_tuple_hash *h; |
634 | struct nf_conn *ct = NULL, *tmp; | 754 | struct nf_conn *ct = NULL, *tmp; |
635 | struct hlist_nulls_node *n; | 755 | struct hlist_nulls_node *n; |
636 | unsigned int i, cnt = 0; | 756 | unsigned int i = 0, cnt = 0; |
637 | int dropped = 0; | 757 | int dropped = 0; |
758 | unsigned int hash, sequence; | ||
759 | spinlock_t *lockp; | ||
638 | 760 | ||
639 | rcu_read_lock(); | 761 | local_bh_disable(); |
640 | for (i = 0; i < net->ct.htable_size; i++) { | 762 | restart: |
763 | sequence = read_seqcount_begin(&net->ct.generation); | ||
764 | hash = hash_bucket(_hash, net); | ||
765 | for (; i < net->ct.htable_size; i++) { | ||
766 | lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; | ||
767 | spin_lock(lockp); | ||
768 | if (read_seqcount_retry(&net->ct.generation, sequence)) { | ||
769 | spin_unlock(lockp); | ||
770 | goto restart; | ||
771 | } | ||
641 | hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], | 772 | hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], |
642 | hnnode) { | 773 | hnnode) { |
643 | tmp = nf_ct_tuplehash_to_ctrack(h); | 774 | tmp = nf_ct_tuplehash_to_ctrack(h); |
644 | if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) | 775 | if (!test_bit(IPS_ASSURED_BIT, &tmp->status) && |
776 | !nf_ct_is_dying(tmp) && | ||
777 | atomic_inc_not_zero(&tmp->ct_general.use)) { | ||
645 | ct = tmp; | 778 | ct = tmp; |
779 | break; | ||
780 | } | ||
646 | cnt++; | 781 | cnt++; |
647 | } | 782 | } |
648 | 783 | ||
649 | if (ct != NULL) { | 784 | hash = (hash + 1) % net->ct.htable_size; |
650 | if (likely(!nf_ct_is_dying(ct) && | 785 | spin_unlock(lockp); |
651 | atomic_inc_not_zero(&ct->ct_general.use))) | ||
652 | break; | ||
653 | else | ||
654 | ct = NULL; | ||
655 | } | ||
656 | 786 | ||
657 | if (cnt >= NF_CT_EVICTION_RANGE) | 787 | if (ct || cnt >= NF_CT_EVICTION_RANGE) |
658 | break; | 788 | break; |
659 | 789 | ||
660 | hash = (hash + 1) % net->ct.htable_size; | ||
661 | } | 790 | } |
662 | rcu_read_unlock(); | 791 | local_bh_enable(); |
663 | 792 | ||
664 | if (!ct) | 793 | if (!ct) |
665 | return dropped; | 794 | return dropped; |
@@ -708,7 +837,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone, | |||
708 | 837 | ||
709 | if (nf_conntrack_max && | 838 | if (nf_conntrack_max && |
710 | unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { | 839 | unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { |
711 | if (!early_drop(net, hash_bucket(hash, net))) { | 840 | if (!early_drop(net, hash)) { |
712 | atomic_dec(&net->ct.count); | 841 | atomic_dec(&net->ct.count); |
713 | net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); | 842 | net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); |
714 | return ERR_PTR(-ENOMEM); | 843 | return ERR_PTR(-ENOMEM); |
@@ -805,7 +934,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, | |||
805 | struct nf_conn_help *help; | 934 | struct nf_conn_help *help; |
806 | struct nf_conntrack_tuple repl_tuple; | 935 | struct nf_conntrack_tuple repl_tuple; |
807 | struct nf_conntrack_ecache *ecache; | 936 | struct nf_conntrack_ecache *ecache; |
808 | struct nf_conntrack_expect *exp; | 937 | struct nf_conntrack_expect *exp = NULL; |
809 | u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; | 938 | u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; |
810 | struct nf_conn_timeout *timeout_ext; | 939 | struct nf_conn_timeout *timeout_ext; |
811 | unsigned int *timeouts; | 940 | unsigned int *timeouts; |
@@ -849,42 +978,44 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, | |||
849 | ecache ? ecache->expmask : 0, | 978 | ecache ? ecache->expmask : 0, |
850 | GFP_ATOMIC); | 979 | GFP_ATOMIC); |
851 | 980 | ||
852 | spin_lock_bh(&nf_conntrack_lock); | 981 | local_bh_disable(); |
853 | exp = nf_ct_find_expectation(net, zone, tuple); | 982 | if (net->ct.expect_count) { |
854 | if (exp) { | 983 | spin_lock(&nf_conntrack_expect_lock); |
855 | pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", | 984 | exp = nf_ct_find_expectation(net, zone, tuple); |
856 | ct, exp); | 985 | if (exp) { |
857 | /* Welcome, Mr. Bond. We've been expecting you... */ | 986 | pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", |
858 | __set_bit(IPS_EXPECTED_BIT, &ct->status); | 987 | ct, exp); |
859 | ct->master = exp->master; | 988 | /* Welcome, Mr. Bond. We've been expecting you... */ |
860 | if (exp->helper) { | 989 | __set_bit(IPS_EXPECTED_BIT, &ct->status); |
861 | help = nf_ct_helper_ext_add(ct, exp->helper, | 990 | /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ |
862 | GFP_ATOMIC); | 991 | ct->master = exp->master; |
863 | if (help) | 992 | if (exp->helper) { |
864 | rcu_assign_pointer(help->helper, exp->helper); | 993 | help = nf_ct_helper_ext_add(ct, exp->helper, |
865 | } | 994 | GFP_ATOMIC); |
995 | if (help) | ||
996 | rcu_assign_pointer(help->helper, exp->helper); | ||
997 | } | ||
866 | 998 | ||
867 | #ifdef CONFIG_NF_CONNTRACK_MARK | 999 | #ifdef CONFIG_NF_CONNTRACK_MARK |
868 | ct->mark = exp->master->mark; | 1000 | ct->mark = exp->master->mark; |
869 | #endif | 1001 | #endif |
870 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 1002 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
871 | ct->secmark = exp->master->secmark; | 1003 | ct->secmark = exp->master->secmark; |
872 | #endif | 1004 | #endif |
873 | nf_conntrack_get(&ct->master->ct_general); | 1005 | NF_CT_STAT_INC(net, expect_new); |
874 | NF_CT_STAT_INC(net, expect_new); | 1006 | } |
875 | } else { | 1007 | spin_unlock(&nf_conntrack_expect_lock); |
1008 | } | ||
1009 | if (!exp) { | ||
876 | __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); | 1010 | __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); |
877 | NF_CT_STAT_INC(net, new); | 1011 | NF_CT_STAT_INC(net, new); |
878 | } | 1012 | } |
879 | 1013 | ||
880 | /* Now it is inserted into the unconfirmed list, bump refcount */ | 1014 | /* Now it is inserted into the unconfirmed list, bump refcount */ |
881 | nf_conntrack_get(&ct->ct_general); | 1015 | nf_conntrack_get(&ct->ct_general); |
1016 | nf_ct_add_to_unconfirmed_list(ct); | ||
882 | 1017 | ||
883 | /* Overload tuple linked list to put us in unconfirmed list. */ | 1018 | local_bh_enable(); |
884 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | ||
885 | &net->ct.unconfirmed); | ||
886 | |||
887 | spin_unlock_bh(&nf_conntrack_lock); | ||
888 | 1019 | ||
889 | if (exp) { | 1020 | if (exp) { |
890 | if (exp->expectfn) | 1021 | if (exp->expectfn) |
@@ -1254,27 +1385,42 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), | |||
1254 | struct nf_conntrack_tuple_hash *h; | 1385 | struct nf_conntrack_tuple_hash *h; |
1255 | struct nf_conn *ct; | 1386 | struct nf_conn *ct; |
1256 | struct hlist_nulls_node *n; | 1387 | struct hlist_nulls_node *n; |
1388 | int cpu; | ||
1389 | spinlock_t *lockp; | ||
1257 | 1390 | ||
1258 | spin_lock_bh(&nf_conntrack_lock); | ||
1259 | for (; *bucket < net->ct.htable_size; (*bucket)++) { | 1391 | for (; *bucket < net->ct.htable_size; (*bucket)++) { |
1260 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { | 1392 | lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; |
1261 | if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) | 1393 | local_bh_disable(); |
1262 | continue; | 1394 | spin_lock(lockp); |
1395 | if (*bucket < net->ct.htable_size) { | ||
1396 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { | ||
1397 | if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) | ||
1398 | continue; | ||
1399 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
1400 | if (iter(ct, data)) | ||
1401 | goto found; | ||
1402 | } | ||
1403 | } | ||
1404 | spin_unlock(lockp); | ||
1405 | local_bh_enable(); | ||
1406 | } | ||
1407 | |||
1408 | for_each_possible_cpu(cpu) { | ||
1409 | struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); | ||
1410 | |||
1411 | spin_lock_bh(&pcpu->lock); | ||
1412 | hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { | ||
1263 | ct = nf_ct_tuplehash_to_ctrack(h); | 1413 | ct = nf_ct_tuplehash_to_ctrack(h); |
1264 | if (iter(ct, data)) | 1414 | if (iter(ct, data)) |
1265 | goto found; | 1415 | set_bit(IPS_DYING_BIT, &ct->status); |
1266 | } | 1416 | } |
1417 | spin_unlock_bh(&pcpu->lock); | ||
1267 | } | 1418 | } |
1268 | hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) { | ||
1269 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
1270 | if (iter(ct, data)) | ||
1271 | set_bit(IPS_DYING_BIT, &ct->status); | ||
1272 | } | ||
1273 | spin_unlock_bh(&nf_conntrack_lock); | ||
1274 | return NULL; | 1419 | return NULL; |
1275 | found: | 1420 | found: |
1276 | atomic_inc(&ct->ct_general.use); | 1421 | atomic_inc(&ct->ct_general.use); |
1277 | spin_unlock_bh(&nf_conntrack_lock); | 1422 | spin_unlock(lockp); |
1423 | local_bh_enable(); | ||
1278 | return ct; | 1424 | return ct; |
1279 | } | 1425 | } |
1280 | 1426 | ||
@@ -1323,14 +1469,19 @@ static void nf_ct_release_dying_list(struct net *net) | |||
1323 | struct nf_conntrack_tuple_hash *h; | 1469 | struct nf_conntrack_tuple_hash *h; |
1324 | struct nf_conn *ct; | 1470 | struct nf_conn *ct; |
1325 | struct hlist_nulls_node *n; | 1471 | struct hlist_nulls_node *n; |
1472 | int cpu; | ||
1326 | 1473 | ||
1327 | spin_lock_bh(&nf_conntrack_lock); | 1474 | for_each_possible_cpu(cpu) { |
1328 | hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) { | 1475 | struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); |
1329 | ct = nf_ct_tuplehash_to_ctrack(h); | 1476 | |
1330 | /* never fails to remove them, no listeners at this point */ | 1477 | spin_lock_bh(&pcpu->lock); |
1331 | nf_ct_kill(ct); | 1478 | hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { |
1479 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
1480 | /* never fails to remove them, no listeners at this point */ | ||
1481 | nf_ct_kill(ct); | ||
1482 | } | ||
1483 | spin_unlock_bh(&pcpu->lock); | ||
1332 | } | 1484 | } |
1333 | spin_unlock_bh(&nf_conntrack_lock); | ||
1334 | } | 1485 | } |
1335 | 1486 | ||
1336 | static int untrack_refs(void) | 1487 | static int untrack_refs(void) |
@@ -1417,6 +1568,7 @@ i_see_dead_people: | |||
1417 | kmem_cache_destroy(net->ct.nf_conntrack_cachep); | 1568 | kmem_cache_destroy(net->ct.nf_conntrack_cachep); |
1418 | kfree(net->ct.slabname); | 1569 | kfree(net->ct.slabname); |
1419 | free_percpu(net->ct.stat); | 1570 | free_percpu(net->ct.stat); |
1571 | free_percpu(net->ct.pcpu_lists); | ||
1420 | } | 1572 | } |
1421 | } | 1573 | } |
1422 | 1574 | ||
@@ -1469,12 +1621,16 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | |||
1469 | if (!hash) | 1621 | if (!hash) |
1470 | return -ENOMEM; | 1622 | return -ENOMEM; |
1471 | 1623 | ||
1624 | local_bh_disable(); | ||
1625 | nf_conntrack_all_lock(); | ||
1626 | write_seqcount_begin(&init_net.ct.generation); | ||
1627 | |||
1472 | /* Lookups in the old hash might happen in parallel, which means we | 1628 | /* Lookups in the old hash might happen in parallel, which means we |
1473 | * might get false negatives during connection lookup. New connections | 1629 | * might get false negatives during connection lookup. New connections |
1474 | * created because of a false negative won't make it into the hash | 1630 | * created because of a false negative won't make it into the hash |
1475 | * though since that required taking the lock. | 1631 | * though since that required taking the locks. |
1476 | */ | 1632 | */ |
1477 | spin_lock_bh(&nf_conntrack_lock); | 1633 | |
1478 | for (i = 0; i < init_net.ct.htable_size; i++) { | 1634 | for (i = 0; i < init_net.ct.htable_size; i++) { |
1479 | while (!hlist_nulls_empty(&init_net.ct.hash[i])) { | 1635 | while (!hlist_nulls_empty(&init_net.ct.hash[i])) { |
1480 | h = hlist_nulls_entry(init_net.ct.hash[i].first, | 1636 | h = hlist_nulls_entry(init_net.ct.hash[i].first, |
@@ -1491,7 +1647,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | |||
1491 | 1647 | ||
1492 | init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; | 1648 | init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; |
1493 | init_net.ct.hash = hash; | 1649 | init_net.ct.hash = hash; |
1494 | spin_unlock_bh(&nf_conntrack_lock); | 1650 | |
1651 | write_seqcount_end(&init_net.ct.generation); | ||
1652 | nf_conntrack_all_unlock(); | ||
1653 | local_bh_enable(); | ||
1495 | 1654 | ||
1496 | nf_ct_free_hashtable(old_hash, old_size); | 1655 | nf_ct_free_hashtable(old_hash, old_size); |
1497 | return 0; | 1656 | return 0; |
@@ -1513,7 +1672,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); | |||
1513 | int nf_conntrack_init_start(void) | 1672 | int nf_conntrack_init_start(void) |
1514 | { | 1673 | { |
1515 | int max_factor = 8; | 1674 | int max_factor = 8; |
1516 | int ret, cpu; | 1675 | int i, ret, cpu; |
1676 | |||
1677 | for (i = 0; i < ARRAY_SIZE(nf_conntrack_locks); i++) | ||
1678 | spin_lock_init(&nf_conntrack_locks[i]); | ||
1517 | 1679 | ||
1518 | /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB | 1680 | /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB |
1519 | * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ | 1681 | * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ |
@@ -1629,37 +1791,43 @@ void nf_conntrack_init_end(void) | |||
1629 | 1791 | ||
1630 | int nf_conntrack_init_net(struct net *net) | 1792 | int nf_conntrack_init_net(struct net *net) |
1631 | { | 1793 | { |
1632 | int ret; | 1794 | int ret = -ENOMEM; |
1795 | int cpu; | ||
1633 | 1796 | ||
1634 | atomic_set(&net->ct.count, 0); | 1797 | atomic_set(&net->ct.count, 0); |
1635 | INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); | 1798 | |
1636 | INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); | 1799 | net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); |
1637 | INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL); | 1800 | if (!net->ct.pcpu_lists) |
1638 | net->ct.stat = alloc_percpu(struct ip_conntrack_stat); | ||
1639 | if (!net->ct.stat) { | ||
1640 | ret = -ENOMEM; | ||
1641 | goto err_stat; | 1801 | goto err_stat; |
1802 | |||
1803 | for_each_possible_cpu(cpu) { | ||
1804 | struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); | ||
1805 | |||
1806 | spin_lock_init(&pcpu->lock); | ||
1807 | INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); | ||
1808 | INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); | ||
1809 | INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL); | ||
1642 | } | 1810 | } |
1643 | 1811 | ||
1812 | net->ct.stat = alloc_percpu(struct ip_conntrack_stat); | ||
1813 | if (!net->ct.stat) | ||
1814 | goto err_pcpu_lists; | ||
1815 | |||
1644 | net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); | 1816 | net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); |
1645 | if (!net->ct.slabname) { | 1817 | if (!net->ct.slabname) |
1646 | ret = -ENOMEM; | ||
1647 | goto err_slabname; | 1818 | goto err_slabname; |
1648 | } | ||
1649 | 1819 | ||
1650 | net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, | 1820 | net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, |
1651 | sizeof(struct nf_conn), 0, | 1821 | sizeof(struct nf_conn), 0, |
1652 | SLAB_DESTROY_BY_RCU, NULL); | 1822 | SLAB_DESTROY_BY_RCU, NULL); |
1653 | if (!net->ct.nf_conntrack_cachep) { | 1823 | if (!net->ct.nf_conntrack_cachep) { |
1654 | printk(KERN_ERR "Unable to create nf_conn slab cache\n"); | 1824 | printk(KERN_ERR "Unable to create nf_conn slab cache\n"); |
1655 | ret = -ENOMEM; | ||
1656 | goto err_cache; | 1825 | goto err_cache; |
1657 | } | 1826 | } |
1658 | 1827 | ||
1659 | net->ct.htable_size = nf_conntrack_htable_size; | 1828 | net->ct.htable_size = nf_conntrack_htable_size; |
1660 | net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); | 1829 | net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); |
1661 | if (!net->ct.hash) { | 1830 | if (!net->ct.hash) { |
1662 | ret = -ENOMEM; | ||
1663 | printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); | 1831 | printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); |
1664 | goto err_hash; | 1832 | goto err_hash; |
1665 | } | 1833 | } |
@@ -1701,6 +1869,8 @@ err_cache: | |||
1701 | kfree(net->ct.slabname); | 1869 | kfree(net->ct.slabname); |
1702 | err_slabname: | 1870 | err_slabname: |
1703 | free_percpu(net->ct.stat); | 1871 | free_percpu(net->ct.stat); |
1872 | err_pcpu_lists: | ||
1873 | free_percpu(net->ct.pcpu_lists); | ||
1704 | err_stat: | 1874 | err_stat: |
1705 | return ret; | 1875 | return ret; |
1706 | } | 1876 | } |
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4fd1ca94fd4a..f87e8f68ad45 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c | |||
@@ -66,9 +66,9 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect) | |||
66 | { | 66 | { |
67 | struct nf_conntrack_expect *exp = (void *)ul_expect; | 67 | struct nf_conntrack_expect *exp = (void *)ul_expect; |
68 | 68 | ||
69 | spin_lock_bh(&nf_conntrack_lock); | 69 | spin_lock_bh(&nf_conntrack_expect_lock); |
70 | nf_ct_unlink_expect(exp); | 70 | nf_ct_unlink_expect(exp); |
71 | spin_unlock_bh(&nf_conntrack_lock); | 71 | spin_unlock_bh(&nf_conntrack_expect_lock); |
72 | nf_ct_expect_put(exp); | 72 | nf_ct_expect_put(exp); |
73 | } | 73 | } |
74 | 74 | ||
@@ -155,6 +155,18 @@ nf_ct_find_expectation(struct net *net, u16 zone, | |||
155 | if (!nf_ct_is_confirmed(exp->master)) | 155 | if (!nf_ct_is_confirmed(exp->master)) |
156 | return NULL; | 156 | return NULL; |
157 | 157 | ||
158 | /* Avoid race with other CPUs, that for exp->master ct, is | ||
159 | * about to invoke ->destroy(), or nf_ct_delete() via timeout | ||
160 | * or early_drop(). | ||
161 | * | ||
162 | * The atomic_inc_not_zero() check tells: If that fails, we | ||
163 | * know that the ct is being destroyed. If it succeeds, we | ||
164 | * can be sure the ct cannot disappear underneath. | ||
165 | */ | ||
166 | if (unlikely(nf_ct_is_dying(exp->master) || | ||
167 | !atomic_inc_not_zero(&exp->master->ct_general.use))) | ||
168 | return NULL; | ||
169 | |||
158 | if (exp->flags & NF_CT_EXPECT_PERMANENT) { | 170 | if (exp->flags & NF_CT_EXPECT_PERMANENT) { |
159 | atomic_inc(&exp->use); | 171 | atomic_inc(&exp->use); |
160 | return exp; | 172 | return exp; |
@@ -162,6 +174,8 @@ nf_ct_find_expectation(struct net *net, u16 zone, | |||
162 | nf_ct_unlink_expect(exp); | 174 | nf_ct_unlink_expect(exp); |
163 | return exp; | 175 | return exp; |
164 | } | 176 | } |
177 | /* Undo exp->master refcnt increase, if del_timer() failed */ | ||
178 | nf_ct_put(exp->master); | ||
165 | 179 | ||
166 | return NULL; | 180 | return NULL; |
167 | } | 181 | } |
@@ -177,12 +191,14 @@ void nf_ct_remove_expectations(struct nf_conn *ct) | |||
177 | if (!help) | 191 | if (!help) |
178 | return; | 192 | return; |
179 | 193 | ||
194 | spin_lock_bh(&nf_conntrack_expect_lock); | ||
180 | hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { | 195 | hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { |
181 | if (del_timer(&exp->timeout)) { | 196 | if (del_timer(&exp->timeout)) { |
182 | nf_ct_unlink_expect(exp); | 197 | nf_ct_unlink_expect(exp); |
183 | nf_ct_expect_put(exp); | 198 | nf_ct_expect_put(exp); |
184 | } | 199 | } |
185 | } | 200 | } |
201 | spin_unlock_bh(&nf_conntrack_expect_lock); | ||
186 | } | 202 | } |
187 | EXPORT_SYMBOL_GPL(nf_ct_remove_expectations); | 203 | EXPORT_SYMBOL_GPL(nf_ct_remove_expectations); |
188 | 204 | ||
@@ -217,12 +233,12 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, | |||
217 | /* Generally a bad idea to call this: could have matched already. */ | 233 | /* Generally a bad idea to call this: could have matched already. */ |
218 | void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) | 234 | void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) |
219 | { | 235 | { |
220 | spin_lock_bh(&nf_conntrack_lock); | 236 | spin_lock_bh(&nf_conntrack_expect_lock); |
221 | if (del_timer(&exp->timeout)) { | 237 | if (del_timer(&exp->timeout)) { |
222 | nf_ct_unlink_expect(exp); | 238 | nf_ct_unlink_expect(exp); |
223 | nf_ct_expect_put(exp); | 239 | nf_ct_expect_put(exp); |
224 | } | 240 | } |
225 | spin_unlock_bh(&nf_conntrack_lock); | 241 | spin_unlock_bh(&nf_conntrack_expect_lock); |
226 | } | 242 | } |
227 | EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); | 243 | EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); |
228 | 244 | ||
@@ -335,7 +351,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) | |||
335 | setup_timer(&exp->timeout, nf_ct_expectation_timed_out, | 351 | setup_timer(&exp->timeout, nf_ct_expectation_timed_out, |
336 | (unsigned long)exp); | 352 | (unsigned long)exp); |
337 | helper = rcu_dereference_protected(master_help->helper, | 353 | helper = rcu_dereference_protected(master_help->helper, |
338 | lockdep_is_held(&nf_conntrack_lock)); | 354 | lockdep_is_held(&nf_conntrack_expect_lock)); |
339 | if (helper) { | 355 | if (helper) { |
340 | exp->timeout.expires = jiffies + | 356 | exp->timeout.expires = jiffies + |
341 | helper->expect_policy[exp->class].timeout * HZ; | 357 | helper->expect_policy[exp->class].timeout * HZ; |
@@ -395,7 +411,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) | |||
395 | } | 411 | } |
396 | /* Will be over limit? */ | 412 | /* Will be over limit? */ |
397 | helper = rcu_dereference_protected(master_help->helper, | 413 | helper = rcu_dereference_protected(master_help->helper, |
398 | lockdep_is_held(&nf_conntrack_lock)); | 414 | lockdep_is_held(&nf_conntrack_expect_lock)); |
399 | if (helper) { | 415 | if (helper) { |
400 | p = &helper->expect_policy[expect->class]; | 416 | p = &helper->expect_policy[expect->class]; |
401 | if (p->max_expected && | 417 | if (p->max_expected && |
@@ -417,12 +433,12 @@ out: | |||
417 | return ret; | 433 | return ret; |
418 | } | 434 | } |
419 | 435 | ||
420 | int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, | 436 | int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, |
421 | u32 portid, int report) | 437 | u32 portid, int report) |
422 | { | 438 | { |
423 | int ret; | 439 | int ret; |
424 | 440 | ||
425 | spin_lock_bh(&nf_conntrack_lock); | 441 | spin_lock_bh(&nf_conntrack_expect_lock); |
426 | ret = __nf_ct_expect_check(expect); | 442 | ret = __nf_ct_expect_check(expect); |
427 | if (ret <= 0) | 443 | if (ret <= 0) |
428 | goto out; | 444 | goto out; |
@@ -430,11 +446,11 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, | |||
430 | ret = nf_ct_expect_insert(expect); | 446 | ret = nf_ct_expect_insert(expect); |
431 | if (ret < 0) | 447 | if (ret < 0) |
432 | goto out; | 448 | goto out; |
433 | spin_unlock_bh(&nf_conntrack_lock); | 449 | spin_unlock_bh(&nf_conntrack_expect_lock); |
434 | nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); | 450 | nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); |
435 | return ret; | 451 | return ret; |
436 | out: | 452 | out: |
437 | spin_unlock_bh(&nf_conntrack_lock); | 453 | spin_unlock_bh(&nf_conntrack_expect_lock); |
438 | return ret; | 454 | return ret; |
439 | } | 455 | } |
440 | EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); | 456 | EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); |
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 70866d192efc..3a3a60b126e0 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c | |||
@@ -1476,7 +1476,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, | |||
1476 | nf_ct_refresh(ct, skb, info->timeout * HZ); | 1476 | nf_ct_refresh(ct, skb, info->timeout * HZ); |
1477 | 1477 | ||
1478 | /* Set expect timeout */ | 1478 | /* Set expect timeout */ |
1479 | spin_lock_bh(&nf_conntrack_lock); | 1479 | spin_lock_bh(&nf_conntrack_expect_lock); |
1480 | exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, | 1480 | exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, |
1481 | info->sig_port[!dir]); | 1481 | info->sig_port[!dir]); |
1482 | if (exp) { | 1482 | if (exp) { |
@@ -1486,7 +1486,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, | |||
1486 | nf_ct_dump_tuple(&exp->tuple); | 1486 | nf_ct_dump_tuple(&exp->tuple); |
1487 | set_expect_timeout(exp, info->timeout); | 1487 | set_expect_timeout(exp, info->timeout); |
1488 | } | 1488 | } |
1489 | spin_unlock_bh(&nf_conntrack_lock); | 1489 | spin_unlock_bh(&nf_conntrack_expect_lock); |
1490 | } | 1490 | } |
1491 | 1491 | ||
1492 | return 0; | 1492 | return 0; |
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 974a2a4adefa..5b3eae7d4c9a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c | |||
@@ -250,16 +250,14 @@ out: | |||
250 | } | 250 | } |
251 | EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); | 251 | EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); |
252 | 252 | ||
253 | /* appropiate ct lock protecting must be taken by caller */ | ||
253 | static inline int unhelp(struct nf_conntrack_tuple_hash *i, | 254 | static inline int unhelp(struct nf_conntrack_tuple_hash *i, |
254 | const struct nf_conntrack_helper *me) | 255 | const struct nf_conntrack_helper *me) |
255 | { | 256 | { |
256 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); | 257 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); |
257 | struct nf_conn_help *help = nfct_help(ct); | 258 | struct nf_conn_help *help = nfct_help(ct); |
258 | 259 | ||
259 | if (help && rcu_dereference_protected( | 260 | if (help && rcu_dereference_raw(help->helper) == me) { |
260 | help->helper, | ||
261 | lockdep_is_held(&nf_conntrack_lock) | ||
262 | ) == me) { | ||
263 | nf_conntrack_event(IPCT_HELPER, ct); | 261 | nf_conntrack_event(IPCT_HELPER, ct); |
264 | RCU_INIT_POINTER(help->helper, NULL); | 262 | RCU_INIT_POINTER(help->helper, NULL); |
265 | } | 263 | } |
@@ -284,17 +282,17 @@ static LIST_HEAD(nf_ct_helper_expectfn_list); | |||
284 | 282 | ||
285 | void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) | 283 | void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) |
286 | { | 284 | { |
287 | spin_lock_bh(&nf_conntrack_lock); | 285 | spin_lock_bh(&nf_conntrack_expect_lock); |
288 | list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); | 286 | list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); |
289 | spin_unlock_bh(&nf_conntrack_lock); | 287 | spin_unlock_bh(&nf_conntrack_expect_lock); |
290 | } | 288 | } |
291 | EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); | 289 | EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); |
292 | 290 | ||
293 | void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) | 291 | void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) |
294 | { | 292 | { |
295 | spin_lock_bh(&nf_conntrack_lock); | 293 | spin_lock_bh(&nf_conntrack_expect_lock); |
296 | list_del_rcu(&n->head); | 294 | list_del_rcu(&n->head); |
297 | spin_unlock_bh(&nf_conntrack_lock); | 295 | spin_unlock_bh(&nf_conntrack_expect_lock); |
298 | } | 296 | } |
299 | EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); | 297 | EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); |
300 | 298 | ||
@@ -396,15 +394,17 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, | |||
396 | const struct hlist_node *next; | 394 | const struct hlist_node *next; |
397 | const struct hlist_nulls_node *nn; | 395 | const struct hlist_nulls_node *nn; |
398 | unsigned int i; | 396 | unsigned int i; |
397 | int cpu; | ||
399 | 398 | ||
400 | /* Get rid of expectations */ | 399 | /* Get rid of expectations */ |
400 | spin_lock_bh(&nf_conntrack_expect_lock); | ||
401 | for (i = 0; i < nf_ct_expect_hsize; i++) { | 401 | for (i = 0; i < nf_ct_expect_hsize; i++) { |
402 | hlist_for_each_entry_safe(exp, next, | 402 | hlist_for_each_entry_safe(exp, next, |
403 | &net->ct.expect_hash[i], hnode) { | 403 | &net->ct.expect_hash[i], hnode) { |
404 | struct nf_conn_help *help = nfct_help(exp->master); | 404 | struct nf_conn_help *help = nfct_help(exp->master); |
405 | if ((rcu_dereference_protected( | 405 | if ((rcu_dereference_protected( |
406 | help->helper, | 406 | help->helper, |
407 | lockdep_is_held(&nf_conntrack_lock) | 407 | lockdep_is_held(&nf_conntrack_expect_lock) |
408 | ) == me || exp->helper == me) && | 408 | ) == me || exp->helper == me) && |
409 | del_timer(&exp->timeout)) { | 409 | del_timer(&exp->timeout)) { |
410 | nf_ct_unlink_expect(exp); | 410 | nf_ct_unlink_expect(exp); |
@@ -412,14 +412,27 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, | |||
412 | } | 412 | } |
413 | } | 413 | } |
414 | } | 414 | } |
415 | spin_unlock_bh(&nf_conntrack_expect_lock); | ||
415 | 416 | ||
416 | /* Get rid of expecteds, set helpers to NULL. */ | 417 | /* Get rid of expecteds, set helpers to NULL. */ |
417 | hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) | 418 | for_each_possible_cpu(cpu) { |
418 | unhelp(h, me); | 419 | struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); |
419 | for (i = 0; i < net->ct.htable_size; i++) { | 420 | |
420 | hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) | 421 | spin_lock_bh(&pcpu->lock); |
422 | hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) | ||
421 | unhelp(h, me); | 423 | unhelp(h, me); |
424 | spin_unlock_bh(&pcpu->lock); | ||
425 | } | ||
426 | local_bh_disable(); | ||
427 | for (i = 0; i < net->ct.htable_size; i++) { | ||
428 | spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); | ||
429 | if (i < net->ct.htable_size) { | ||
430 | hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) | ||
431 | unhelp(h, me); | ||
432 | } | ||
433 | spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); | ||
422 | } | 434 | } |
435 | local_bh_enable(); | ||
423 | } | 436 | } |
424 | 437 | ||
425 | void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) | 438 | void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) |
@@ -437,10 +450,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) | |||
437 | synchronize_rcu(); | 450 | synchronize_rcu(); |
438 | 451 | ||
439 | rtnl_lock(); | 452 | rtnl_lock(); |
440 | spin_lock_bh(&nf_conntrack_lock); | ||
441 | for_each_net(net) | 453 | for_each_net(net) |
442 | __nf_conntrack_helper_unregister(me, net); | 454 | __nf_conntrack_helper_unregister(me, net); |
443 | spin_unlock_bh(&nf_conntrack_lock); | ||
444 | rtnl_unlock(); | 455 | rtnl_unlock(); |
445 | } | 456 | } |
446 | EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); | 457 | EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b9f0e0374322..ccc46fa5edbc 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -764,14 +764,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) | |||
764 | struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); | 764 | struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); |
765 | u_int8_t l3proto = nfmsg->nfgen_family; | 765 | u_int8_t l3proto = nfmsg->nfgen_family; |
766 | int res; | 766 | int res; |
767 | spinlock_t *lockp; | ||
768 | |||
767 | #ifdef CONFIG_NF_CONNTRACK_MARK | 769 | #ifdef CONFIG_NF_CONNTRACK_MARK |
768 | const struct ctnetlink_dump_filter *filter = cb->data; | 770 | const struct ctnetlink_dump_filter *filter = cb->data; |
769 | #endif | 771 | #endif |
770 | 772 | ||
771 | spin_lock_bh(&nf_conntrack_lock); | ||
772 | last = (struct nf_conn *)cb->args[1]; | 773 | last = (struct nf_conn *)cb->args[1]; |
774 | |||
775 | local_bh_disable(); | ||
773 | for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { | 776 | for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { |
774 | restart: | 777 | restart: |
778 | lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; | ||
779 | spin_lock(lockp); | ||
780 | if (cb->args[0] >= net->ct.htable_size) { | ||
781 | spin_unlock(lockp); | ||
782 | goto out; | ||
783 | } | ||
775 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], | 784 | hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], |
776 | hnnode) { | 785 | hnnode) { |
777 | if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) | 786 | if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) |
@@ -803,16 +812,18 @@ restart: | |||
803 | if (res < 0) { | 812 | if (res < 0) { |
804 | nf_conntrack_get(&ct->ct_general); | 813 | nf_conntrack_get(&ct->ct_general); |
805 | cb->args[1] = (unsigned long)ct; | 814 | cb->args[1] = (unsigned long)ct; |
815 | spin_unlock(lockp); | ||
806 | goto out; | 816 | goto out; |
807 | } | 817 | } |
808 | } | 818 | } |
819 | spin_unlock(lockp); | ||
809 | if (cb->args[1]) { | 820 | if (cb->args[1]) { |
810 | cb->args[1] = 0; | 821 | cb->args[1] = 0; |
811 | goto restart; | 822 | goto restart; |
812 | } | 823 | } |
813 | } | 824 | } |
814 | out: | 825 | out: |
815 | spin_unlock_bh(&nf_conntrack_lock); | 826 | local_bh_enable(); |
816 | if (last) | 827 | if (last) |
817 | nf_ct_put(last); | 828 | nf_ct_put(last); |
818 | 829 | ||
@@ -966,7 +977,6 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, | |||
966 | return 0; | 977 | return 0; |
967 | } | 978 | } |
968 | 979 | ||
969 | #define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) | ||
970 | static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { | 980 | static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { |
971 | [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, | 981 | [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, |
972 | [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, | 982 | [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, |
@@ -984,9 +994,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { | |||
984 | [CTA_ZONE] = { .type = NLA_U16 }, | 994 | [CTA_ZONE] = { .type = NLA_U16 }, |
985 | [CTA_MARK_MASK] = { .type = NLA_U32 }, | 995 | [CTA_MARK_MASK] = { .type = NLA_U32 }, |
986 | [CTA_LABELS] = { .type = NLA_BINARY, | 996 | [CTA_LABELS] = { .type = NLA_BINARY, |
987 | .len = __CTA_LABELS_MAX_LENGTH }, | 997 | .len = NF_CT_LABELS_MAX_SIZE }, |
988 | [CTA_LABELS_MASK] = { .type = NLA_BINARY, | 998 | [CTA_LABELS_MASK] = { .type = NLA_BINARY, |
989 | .len = __CTA_LABELS_MAX_LENGTH }, | 999 | .len = NF_CT_LABELS_MAX_SIZE }, |
990 | }; | 1000 | }; |
991 | 1001 | ||
992 | static int | 1002 | static int |
@@ -1138,50 +1148,65 @@ static int ctnetlink_done_list(struct netlink_callback *cb) | |||
1138 | } | 1148 | } |
1139 | 1149 | ||
1140 | static int | 1150 | static int |
1141 | ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, | 1151 | ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) |
1142 | struct hlist_nulls_head *list) | ||
1143 | { | 1152 | { |
1144 | struct nf_conn *ct, *last; | 1153 | struct nf_conn *ct, *last = NULL; |
1145 | struct nf_conntrack_tuple_hash *h; | 1154 | struct nf_conntrack_tuple_hash *h; |
1146 | struct hlist_nulls_node *n; | 1155 | struct hlist_nulls_node *n; |
1147 | struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); | 1156 | struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); |
1148 | u_int8_t l3proto = nfmsg->nfgen_family; | 1157 | u_int8_t l3proto = nfmsg->nfgen_family; |
1149 | int res; | 1158 | int res; |
1159 | int cpu; | ||
1160 | struct hlist_nulls_head *list; | ||
1161 | struct net *net = sock_net(skb->sk); | ||
1150 | 1162 | ||
1151 | if (cb->args[2]) | 1163 | if (cb->args[2]) |
1152 | return 0; | 1164 | return 0; |
1153 | 1165 | ||
1154 | spin_lock_bh(&nf_conntrack_lock); | 1166 | if (cb->args[0] == nr_cpu_ids) |
1155 | last = (struct nf_conn *)cb->args[1]; | 1167 | return 0; |
1156 | restart: | 1168 | |
1157 | hlist_nulls_for_each_entry(h, n, list, hnnode) { | 1169 | for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { |
1158 | ct = nf_ct_tuplehash_to_ctrack(h); | 1170 | struct ct_pcpu *pcpu; |
1159 | if (l3proto && nf_ct_l3num(ct) != l3proto) | 1171 | |
1172 | if (!cpu_possible(cpu)) | ||
1160 | continue; | 1173 | continue; |
1161 | if (cb->args[1]) { | 1174 | |
1162 | if (ct != last) | 1175 | pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); |
1176 | spin_lock_bh(&pcpu->lock); | ||
1177 | last = (struct nf_conn *)cb->args[1]; | ||
1178 | list = dying ? &pcpu->dying : &pcpu->unconfirmed; | ||
1179 | restart: | ||
1180 | hlist_nulls_for_each_entry(h, n, list, hnnode) { | ||
1181 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
1182 | if (l3proto && nf_ct_l3num(ct) != l3proto) | ||
1163 | continue; | 1183 | continue; |
1164 | cb->args[1] = 0; | 1184 | if (cb->args[1]) { |
1165 | } | 1185 | if (ct != last) |
1166 | rcu_read_lock(); | 1186 | continue; |
1167 | res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, | 1187 | cb->args[1] = 0; |
1168 | cb->nlh->nlmsg_seq, | 1188 | } |
1169 | NFNL_MSG_TYPE(cb->nlh->nlmsg_type), | 1189 | rcu_read_lock(); |
1170 | ct); | 1190 | res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, |
1171 | rcu_read_unlock(); | 1191 | cb->nlh->nlmsg_seq, |
1172 | if (res < 0) { | 1192 | NFNL_MSG_TYPE(cb->nlh->nlmsg_type), |
1173 | nf_conntrack_get(&ct->ct_general); | 1193 | ct); |
1174 | cb->args[1] = (unsigned long)ct; | 1194 | rcu_read_unlock(); |
1175 | goto out; | 1195 | if (res < 0) { |
1196 | nf_conntrack_get(&ct->ct_general); | ||
1197 | cb->args[1] = (unsigned long)ct; | ||
1198 | spin_unlock_bh(&pcpu->lock); | ||
1199 | goto out; | ||
1200 | } | ||
1176 | } | 1201 | } |
1202 | if (cb->args[1]) { | ||
1203 | cb->args[1] = 0; | ||
1204 | goto restart; | ||
1205 | } else | ||
1206 | cb->args[2] = 1; | ||
1207 | spin_unlock_bh(&pcpu->lock); | ||
1177 | } | 1208 | } |
1178 | if (cb->args[1]) { | ||
1179 | cb->args[1] = 0; | ||
1180 | goto restart; | ||
1181 | } else | ||
1182 | cb->args[2] = 1; | ||
1183 | out: | 1209 | out: |
1184 | spin_unlock_bh(&nf_conntrack_lock); | ||
1185 | if (last) | 1210 | if (last) |
1186 | nf_ct_put(last); | 1211 | nf_ct_put(last); |
1187 | 1212 | ||
@@ -1191,9 +1216,7 @@ out: | |||
1191 | static int | 1216 | static int |
1192 | ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) | 1217 | ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) |
1193 | { | 1218 | { |
1194 | struct net *net = sock_net(skb->sk); | 1219 | return ctnetlink_dump_list(skb, cb, true); |
1195 | |||
1196 | return ctnetlink_dump_list(skb, cb, &net->ct.dying); | ||
1197 | } | 1220 | } |
1198 | 1221 | ||
1199 | static int | 1222 | static int |
@@ -1215,9 +1238,7 @@ ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, | |||
1215 | static int | 1238 | static int |
1216 | ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) | 1239 | ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) |
1217 | { | 1240 | { |
1218 | struct net *net = sock_net(skb->sk); | 1241 | return ctnetlink_dump_list(skb, cb, false); |
1219 | |||
1220 | return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed); | ||
1221 | } | 1242 | } |
1222 | 1243 | ||
1223 | static int | 1244 | static int |
@@ -1361,14 +1382,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) | |||
1361 | nf_ct_protonum(ct)); | 1382 | nf_ct_protonum(ct)); |
1362 | if (helper == NULL) { | 1383 | if (helper == NULL) { |
1363 | #ifdef CONFIG_MODULES | 1384 | #ifdef CONFIG_MODULES |
1364 | spin_unlock_bh(&nf_conntrack_lock); | 1385 | spin_unlock_bh(&nf_conntrack_expect_lock); |
1365 | 1386 | ||
1366 | if (request_module("nfct-helper-%s", helpname) < 0) { | 1387 | if (request_module("nfct-helper-%s", helpname) < 0) { |
1367 | spin_lock_bh(&nf_conntrack_lock); | 1388 | spin_lock_bh(&nf_conntrack_expect_lock); |
1368 | return -EOPNOTSUPP; | 1389 | return -EOPNOTSUPP; |
1369 | } | 1390 | } |
1370 | 1391 | ||
1371 | spin_lock_bh(&nf_conntrack_lock); | 1392 | spin_lock_bh(&nf_conntrack_expect_lock); |
1372 | helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), | 1393 | helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), |
1373 | nf_ct_protonum(ct)); | 1394 | nf_ct_protonum(ct)); |
1374 | if (helper) | 1395 | if (helper) |
@@ -1804,9 +1825,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
1804 | err = -EEXIST; | 1825 | err = -EEXIST; |
1805 | ct = nf_ct_tuplehash_to_ctrack(h); | 1826 | ct = nf_ct_tuplehash_to_ctrack(h); |
1806 | if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { | 1827 | if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { |
1807 | spin_lock_bh(&nf_conntrack_lock); | 1828 | spin_lock_bh(&nf_conntrack_expect_lock); |
1808 | err = ctnetlink_change_conntrack(ct, cda); | 1829 | err = ctnetlink_change_conntrack(ct, cda); |
1809 | spin_unlock_bh(&nf_conntrack_lock); | 1830 | spin_unlock_bh(&nf_conntrack_expect_lock); |
1810 | if (err == 0) { | 1831 | if (err == 0) { |
1811 | nf_conntrack_eventmask_report((1 << IPCT_REPLY) | | 1832 | nf_conntrack_eventmask_report((1 << IPCT_REPLY) | |
1812 | (1 << IPCT_ASSURED) | | 1833 | (1 << IPCT_ASSURED) | |
@@ -2135,9 +2156,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) | |||
2135 | if (ret < 0) | 2156 | if (ret < 0) |
2136 | return ret; | 2157 | return ret; |
2137 | 2158 | ||
2138 | spin_lock_bh(&nf_conntrack_lock); | 2159 | spin_lock_bh(&nf_conntrack_expect_lock); |
2139 | ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); | 2160 | ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); |
2140 | spin_unlock_bh(&nf_conntrack_lock); | 2161 | spin_unlock_bh(&nf_conntrack_expect_lock); |
2141 | 2162 | ||
2142 | return ret; | 2163 | return ret; |
2143 | } | 2164 | } |
@@ -2692,13 +2713,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | |||
2692 | } | 2713 | } |
2693 | 2714 | ||
2694 | /* after list removal, usage count == 1 */ | 2715 | /* after list removal, usage count == 1 */ |
2695 | spin_lock_bh(&nf_conntrack_lock); | 2716 | spin_lock_bh(&nf_conntrack_expect_lock); |
2696 | if (del_timer(&exp->timeout)) { | 2717 | if (del_timer(&exp->timeout)) { |
2697 | nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, | 2718 | nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, |
2698 | nlmsg_report(nlh)); | 2719 | nlmsg_report(nlh)); |
2699 | nf_ct_expect_put(exp); | 2720 | nf_ct_expect_put(exp); |
2700 | } | 2721 | } |
2701 | spin_unlock_bh(&nf_conntrack_lock); | 2722 | spin_unlock_bh(&nf_conntrack_expect_lock); |
2702 | /* have to put what we 'get' above. | 2723 | /* have to put what we 'get' above. |
2703 | * after this line usage count == 0 */ | 2724 | * after this line usage count == 0 */ |
2704 | nf_ct_expect_put(exp); | 2725 | nf_ct_expect_put(exp); |
@@ -2707,7 +2728,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | |||
2707 | struct nf_conn_help *m_help; | 2728 | struct nf_conn_help *m_help; |
2708 | 2729 | ||
2709 | /* delete all expectations for this helper */ | 2730 | /* delete all expectations for this helper */ |
2710 | spin_lock_bh(&nf_conntrack_lock); | 2731 | spin_lock_bh(&nf_conntrack_expect_lock); |
2711 | for (i = 0; i < nf_ct_expect_hsize; i++) { | 2732 | for (i = 0; i < nf_ct_expect_hsize; i++) { |
2712 | hlist_for_each_entry_safe(exp, next, | 2733 | hlist_for_each_entry_safe(exp, next, |
2713 | &net->ct.expect_hash[i], | 2734 | &net->ct.expect_hash[i], |
@@ -2722,10 +2743,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | |||
2722 | } | 2743 | } |
2723 | } | 2744 | } |
2724 | } | 2745 | } |
2725 | spin_unlock_bh(&nf_conntrack_lock); | 2746 | spin_unlock_bh(&nf_conntrack_expect_lock); |
2726 | } else { | 2747 | } else { |
2727 | /* This basically means we have to flush everything*/ | 2748 | /* This basically means we have to flush everything*/ |
2728 | spin_lock_bh(&nf_conntrack_lock); | 2749 | spin_lock_bh(&nf_conntrack_expect_lock); |
2729 | for (i = 0; i < nf_ct_expect_hsize; i++) { | 2750 | for (i = 0; i < nf_ct_expect_hsize; i++) { |
2730 | hlist_for_each_entry_safe(exp, next, | 2751 | hlist_for_each_entry_safe(exp, next, |
2731 | &net->ct.expect_hash[i], | 2752 | &net->ct.expect_hash[i], |
@@ -2738,7 +2759,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | |||
2738 | } | 2759 | } |
2739 | } | 2760 | } |
2740 | } | 2761 | } |
2741 | spin_unlock_bh(&nf_conntrack_lock); | 2762 | spin_unlock_bh(&nf_conntrack_expect_lock); |
2742 | } | 2763 | } |
2743 | 2764 | ||
2744 | return 0; | 2765 | return 0; |
@@ -2964,11 +2985,11 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, | |||
2964 | if (err < 0) | 2985 | if (err < 0) |
2965 | return err; | 2986 | return err; |
2966 | 2987 | ||
2967 | spin_lock_bh(&nf_conntrack_lock); | 2988 | spin_lock_bh(&nf_conntrack_expect_lock); |
2968 | exp = __nf_ct_expect_find(net, zone, &tuple); | 2989 | exp = __nf_ct_expect_find(net, zone, &tuple); |
2969 | 2990 | ||
2970 | if (!exp) { | 2991 | if (!exp) { |
2971 | spin_unlock_bh(&nf_conntrack_lock); | 2992 | spin_unlock_bh(&nf_conntrack_expect_lock); |
2972 | err = -ENOENT; | 2993 | err = -ENOENT; |
2973 | if (nlh->nlmsg_flags & NLM_F_CREATE) { | 2994 | if (nlh->nlmsg_flags & NLM_F_CREATE) { |
2974 | err = ctnetlink_create_expect(net, zone, cda, | 2995 | err = ctnetlink_create_expect(net, zone, cda, |
@@ -2982,7 +3003,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, | |||
2982 | err = -EEXIST; | 3003 | err = -EEXIST; |
2983 | if (!(nlh->nlmsg_flags & NLM_F_EXCL)) | 3004 | if (!(nlh->nlmsg_flags & NLM_F_EXCL)) |
2984 | err = ctnetlink_change_expect(exp, cda); | 3005 | err = ctnetlink_change_expect(exp, cda); |
2985 | spin_unlock_bh(&nf_conntrack_lock); | 3006 | spin_unlock_bh(&nf_conntrack_expect_lock); |
2986 | 3007 | ||
2987 | return err; | 3008 | return err; |
2988 | } | 3009 | } |
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 466410eaa482..4c3ba1c8d682 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c | |||
@@ -800,7 +800,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct, | |||
800 | struct hlist_node *next; | 800 | struct hlist_node *next; |
801 | int found = 0; | 801 | int found = 0; |
802 | 802 | ||
803 | spin_lock_bh(&nf_conntrack_lock); | 803 | spin_lock_bh(&nf_conntrack_expect_lock); |
804 | hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { | 804 | hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { |
805 | if (exp->class != SIP_EXPECT_SIGNALLING || | 805 | if (exp->class != SIP_EXPECT_SIGNALLING || |
806 | !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || | 806 | !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || |
@@ -815,7 +815,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct, | |||
815 | found = 1; | 815 | found = 1; |
816 | break; | 816 | break; |
817 | } | 817 | } |
818 | spin_unlock_bh(&nf_conntrack_lock); | 818 | spin_unlock_bh(&nf_conntrack_expect_lock); |
819 | return found; | 819 | return found; |
820 | } | 820 | } |
821 | 821 | ||
@@ -825,7 +825,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) | |||
825 | struct nf_conntrack_expect *exp; | 825 | struct nf_conntrack_expect *exp; |
826 | struct hlist_node *next; | 826 | struct hlist_node *next; |
827 | 827 | ||
828 | spin_lock_bh(&nf_conntrack_lock); | 828 | spin_lock_bh(&nf_conntrack_expect_lock); |
829 | hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { | 829 | hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { |
830 | if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) | 830 | if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) |
831 | continue; | 831 | continue; |
@@ -836,7 +836,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) | |||
836 | if (!media) | 836 | if (!media) |
837 | break; | 837 | break; |
838 | } | 838 | } |
839 | spin_unlock_bh(&nf_conntrack_lock); | 839 | spin_unlock_bh(&nf_conntrack_expect_lock); |
840 | } | 840 | } |
841 | 841 | ||
842 | static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, | 842 | static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, |
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index adce01e8bb57..33045a562297 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c | |||
@@ -794,9 +794,8 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) | |||
794 | stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); | 794 | stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); |
795 | 795 | ||
796 | if (chain->stats) { | 796 | if (chain->stats) { |
797 | /* nfnl_lock is held, add some nfnl function for this, later */ | ||
798 | struct nft_stats __percpu *oldstats = | 797 | struct nft_stats __percpu *oldstats = |
799 | rcu_dereference_protected(chain->stats, 1); | 798 | nft_dereference(chain->stats); |
800 | 799 | ||
801 | rcu_assign_pointer(chain->stats, newstats); | 800 | rcu_assign_pointer(chain->stats, newstats); |
802 | synchronize_rcu(); | 801 | synchronize_rcu(); |
@@ -1254,10 +1253,11 @@ err1: | |||
1254 | return err; | 1253 | return err; |
1255 | } | 1254 | } |
1256 | 1255 | ||
1257 | static void nf_tables_expr_destroy(struct nft_expr *expr) | 1256 | static void nf_tables_expr_destroy(const struct nft_ctx *ctx, |
1257 | struct nft_expr *expr) | ||
1258 | { | 1258 | { |
1259 | if (expr->ops->destroy) | 1259 | if (expr->ops->destroy) |
1260 | expr->ops->destroy(expr); | 1260 | expr->ops->destroy(ctx, expr); |
1261 | module_put(expr->ops->type->owner); | 1261 | module_put(expr->ops->type->owner); |
1262 | } | 1262 | } |
1263 | 1263 | ||
@@ -1296,6 +1296,8 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { | |||
1296 | [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, | 1296 | [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, |
1297 | [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, | 1297 | [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, |
1298 | [NFTA_RULE_POSITION] = { .type = NLA_U64 }, | 1298 | [NFTA_RULE_POSITION] = { .type = NLA_U64 }, |
1299 | [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, | ||
1300 | .len = NFT_USERDATA_MAXLEN }, | ||
1299 | }; | 1301 | }; |
1300 | 1302 | ||
1301 | static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, | 1303 | static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, |
@@ -1348,6 +1350,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, | |||
1348 | } | 1350 | } |
1349 | nla_nest_end(skb, list); | 1351 | nla_nest_end(skb, list); |
1350 | 1352 | ||
1353 | if (rule->ulen && | ||
1354 | nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule))) | ||
1355 | goto nla_put_failure; | ||
1356 | |||
1351 | return nlmsg_end(skb, nlh); | 1357 | return nlmsg_end(skb, nlh); |
1352 | 1358 | ||
1353 | nla_put_failure: | 1359 | nla_put_failure: |
@@ -1531,7 +1537,8 @@ err: | |||
1531 | return err; | 1537 | return err; |
1532 | } | 1538 | } |
1533 | 1539 | ||
1534 | static void nf_tables_rule_destroy(struct nft_rule *rule) | 1540 | static void nf_tables_rule_destroy(const struct nft_ctx *ctx, |
1541 | struct nft_rule *rule) | ||
1535 | { | 1542 | { |
1536 | struct nft_expr *expr; | 1543 | struct nft_expr *expr; |
1537 | 1544 | ||
@@ -1541,7 +1548,7 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) | |||
1541 | */ | 1548 | */ |
1542 | expr = nft_expr_first(rule); | 1549 | expr = nft_expr_first(rule); |
1543 | while (expr->ops && expr != nft_expr_last(rule)) { | 1550 | while (expr->ops && expr != nft_expr_last(rule)) { |
1544 | nf_tables_expr_destroy(expr); | 1551 | nf_tables_expr_destroy(ctx, expr); |
1545 | expr = nft_expr_next(expr); | 1552 | expr = nft_expr_next(expr); |
1546 | } | 1553 | } |
1547 | kfree(rule); | 1554 | kfree(rule); |
@@ -1552,7 +1559,7 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) | |||
1552 | static struct nft_expr_info *info; | 1559 | static struct nft_expr_info *info; |
1553 | 1560 | ||
1554 | static struct nft_rule_trans * | 1561 | static struct nft_rule_trans * |
1555 | nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) | 1562 | nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule) |
1556 | { | 1563 | { |
1557 | struct nft_rule_trans *rupd; | 1564 | struct nft_rule_trans *rupd; |
1558 | 1565 | ||
@@ -1560,11 +1567,8 @@ nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) | |||
1560 | if (rupd == NULL) | 1567 | if (rupd == NULL) |
1561 | return NULL; | 1568 | return NULL; |
1562 | 1569 | ||
1563 | rupd->chain = ctx->chain; | 1570 | rupd->ctx = *ctx; |
1564 | rupd->table = ctx->table; | ||
1565 | rupd->rule = rule; | 1571 | rupd->rule = rule; |
1566 | rupd->family = ctx->afi->family; | ||
1567 | rupd->nlh = ctx->nlh; | ||
1568 | list_add_tail(&rupd->list, &ctx->net->nft.commit_list); | 1572 | list_add_tail(&rupd->list, &ctx->net->nft.commit_list); |
1569 | 1573 | ||
1570 | return rupd; | 1574 | return rupd; |
@@ -1584,7 +1588,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
1584 | struct nft_expr *expr; | 1588 | struct nft_expr *expr; |
1585 | struct nft_ctx ctx; | 1589 | struct nft_ctx ctx; |
1586 | struct nlattr *tmp; | 1590 | struct nlattr *tmp; |
1587 | unsigned int size, i, n; | 1591 | unsigned int size, i, n, ulen = 0; |
1588 | int err, rem; | 1592 | int err, rem; |
1589 | bool create; | 1593 | bool create; |
1590 | u64 handle, pos_handle; | 1594 | u64 handle, pos_handle; |
@@ -1650,8 +1654,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
1650 | } | 1654 | } |
1651 | } | 1655 | } |
1652 | 1656 | ||
1657 | if (nla[NFTA_RULE_USERDATA]) | ||
1658 | ulen = nla_len(nla[NFTA_RULE_USERDATA]); | ||
1659 | |||
1653 | err = -ENOMEM; | 1660 | err = -ENOMEM; |
1654 | rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL); | 1661 | rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL); |
1655 | if (rule == NULL) | 1662 | if (rule == NULL) |
1656 | goto err1; | 1663 | goto err1; |
1657 | 1664 | ||
@@ -1659,6 +1666,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
1659 | 1666 | ||
1660 | rule->handle = handle; | 1667 | rule->handle = handle; |
1661 | rule->dlen = size; | 1668 | rule->dlen = size; |
1669 | rule->ulen = ulen; | ||
1670 | |||
1671 | if (ulen) | ||
1672 | nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen); | ||
1662 | 1673 | ||
1663 | expr = nft_expr_first(rule); | 1674 | expr = nft_expr_first(rule); |
1664 | for (i = 0; i < n; i++) { | 1675 | for (i = 0; i < n; i++) { |
@@ -1671,7 +1682,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
1671 | 1682 | ||
1672 | if (nlh->nlmsg_flags & NLM_F_REPLACE) { | 1683 | if (nlh->nlmsg_flags & NLM_F_REPLACE) { |
1673 | if (nft_rule_is_active_next(net, old_rule)) { | 1684 | if (nft_rule_is_active_next(net, old_rule)) { |
1674 | repl = nf_tables_trans_add(old_rule, &ctx); | 1685 | repl = nf_tables_trans_add(&ctx, old_rule); |
1675 | if (repl == NULL) { | 1686 | if (repl == NULL) { |
1676 | err = -ENOMEM; | 1687 | err = -ENOMEM; |
1677 | goto err2; | 1688 | goto err2; |
@@ -1694,7 +1705,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
1694 | list_add_rcu(&rule->list, &chain->rules); | 1705 | list_add_rcu(&rule->list, &chain->rules); |
1695 | } | 1706 | } |
1696 | 1707 | ||
1697 | if (nf_tables_trans_add(rule, &ctx) == NULL) { | 1708 | if (nf_tables_trans_add(&ctx, rule) == NULL) { |
1698 | err = -ENOMEM; | 1709 | err = -ENOMEM; |
1699 | goto err3; | 1710 | goto err3; |
1700 | } | 1711 | } |
@@ -1709,7 +1720,7 @@ err3: | |||
1709 | kfree(repl); | 1720 | kfree(repl); |
1710 | } | 1721 | } |
1711 | err2: | 1722 | err2: |
1712 | nf_tables_rule_destroy(rule); | 1723 | nf_tables_rule_destroy(&ctx, rule); |
1713 | err1: | 1724 | err1: |
1714 | for (i = 0; i < n; i++) { | 1725 | for (i = 0; i < n; i++) { |
1715 | if (info[i].ops != NULL) | 1726 | if (info[i].ops != NULL) |
@@ -1723,7 +1734,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) | |||
1723 | { | 1734 | { |
1724 | /* You cannot delete the same rule twice */ | 1735 | /* You cannot delete the same rule twice */ |
1725 | if (nft_rule_is_active_next(ctx->net, rule)) { | 1736 | if (nft_rule_is_active_next(ctx->net, rule)) { |
1726 | if (nf_tables_trans_add(rule, ctx) == NULL) | 1737 | if (nf_tables_trans_add(ctx, rule) == NULL) |
1727 | return -ENOMEM; | 1738 | return -ENOMEM; |
1728 | nft_rule_disactivate_next(ctx->net, rule); | 1739 | nft_rule_disactivate_next(ctx->net, rule); |
1729 | return 0; | 1740 | return 0; |
@@ -1819,10 +1830,10 @@ static int nf_tables_commit(struct sk_buff *skb) | |||
1819 | */ | 1830 | */ |
1820 | if (nft_rule_is_active(net, rupd->rule)) { | 1831 | if (nft_rule_is_active(net, rupd->rule)) { |
1821 | nft_rule_clear(net, rupd->rule); | 1832 | nft_rule_clear(net, rupd->rule); |
1822 | nf_tables_rule_notify(skb, rupd->nlh, rupd->table, | 1833 | nf_tables_rule_notify(skb, rupd->ctx.nlh, |
1823 | rupd->chain, rupd->rule, | 1834 | rupd->ctx.table, rupd->ctx.chain, |
1824 | NFT_MSG_NEWRULE, 0, | 1835 | rupd->rule, NFT_MSG_NEWRULE, 0, |
1825 | rupd->family); | 1836 | rupd->ctx.afi->family); |
1826 | list_del(&rupd->list); | 1837 | list_del(&rupd->list); |
1827 | kfree(rupd); | 1838 | kfree(rupd); |
1828 | continue; | 1839 | continue; |
@@ -1830,9 +1841,10 @@ static int nf_tables_commit(struct sk_buff *skb) | |||
1830 | 1841 | ||
1831 | /* This rule is in the past, get rid of it */ | 1842 | /* This rule is in the past, get rid of it */ |
1832 | list_del_rcu(&rupd->rule->list); | 1843 | list_del_rcu(&rupd->rule->list); |
1833 | nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, | 1844 | nf_tables_rule_notify(skb, rupd->ctx.nlh, |
1845 | rupd->ctx.table, rupd->ctx.chain, | ||
1834 | rupd->rule, NFT_MSG_DELRULE, 0, | 1846 | rupd->rule, NFT_MSG_DELRULE, 0, |
1835 | rupd->family); | 1847 | rupd->ctx.afi->family); |
1836 | } | 1848 | } |
1837 | 1849 | ||
1838 | /* Make sure we don't see any packet traversing old rules */ | 1850 | /* Make sure we don't see any packet traversing old rules */ |
@@ -1840,7 +1852,7 @@ static int nf_tables_commit(struct sk_buff *skb) | |||
1840 | 1852 | ||
1841 | /* Now we can safely release unused old rules */ | 1853 | /* Now we can safely release unused old rules */ |
1842 | list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { | 1854 | list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { |
1843 | nf_tables_rule_destroy(rupd->rule); | 1855 | nf_tables_rule_destroy(&rupd->ctx, rupd->rule); |
1844 | list_del(&rupd->list); | 1856 | list_del(&rupd->list); |
1845 | kfree(rupd); | 1857 | kfree(rupd); |
1846 | } | 1858 | } |
@@ -1869,7 +1881,7 @@ static int nf_tables_abort(struct sk_buff *skb) | |||
1869 | synchronize_rcu(); | 1881 | synchronize_rcu(); |
1870 | 1882 | ||
1871 | list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { | 1883 | list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { |
1872 | nf_tables_rule_destroy(rupd->rule); | 1884 | nf_tables_rule_destroy(&rupd->ctx, rupd->rule); |
1873 | list_del(&rupd->list); | 1885 | list_del(&rupd->list); |
1874 | kfree(rupd); | 1886 | kfree(rupd); |
1875 | } | 1887 | } |
@@ -2430,8 +2442,7 @@ err1: | |||
2430 | static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) | 2442 | static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) |
2431 | { | 2443 | { |
2432 | list_del(&set->list); | 2444 | list_del(&set->list); |
2433 | if (!(set->flags & NFT_SET_ANONYMOUS)) | 2445 | nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); |
2434 | nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); | ||
2435 | 2446 | ||
2436 | set->ops->destroy(set); | 2447 | set->ops->destroy(set); |
2437 | module_put(set->ops->owner); | 2448 | module_put(set->ops->owner); |
@@ -3175,9 +3186,16 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, | |||
3175 | data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); | 3186 | data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); |
3176 | 3187 | ||
3177 | switch (data->verdict) { | 3188 | switch (data->verdict) { |
3178 | case NF_ACCEPT: | 3189 | default: |
3179 | case NF_DROP: | 3190 | switch (data->verdict & NF_VERDICT_MASK) { |
3180 | case NF_QUEUE: | 3191 | case NF_ACCEPT: |
3192 | case NF_DROP: | ||
3193 | case NF_QUEUE: | ||
3194 | break; | ||
3195 | default: | ||
3196 | return -EINVAL; | ||
3197 | } | ||
3198 | /* fall through */ | ||
3181 | case NFT_CONTINUE: | 3199 | case NFT_CONTINUE: |
3182 | case NFT_BREAK: | 3200 | case NFT_BREAK: |
3183 | case NFT_RETURN: | 3201 | case NFT_RETURN: |
@@ -3198,8 +3216,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, | |||
3198 | data->chain = chain; | 3216 | data->chain = chain; |
3199 | desc->len = sizeof(data); | 3217 | desc->len = sizeof(data); |
3200 | break; | 3218 | break; |
3201 | default: | ||
3202 | return -EINVAL; | ||
3203 | } | 3219 | } |
3204 | 3220 | ||
3205 | desc->type = NFT_DATA_VERDICT; | 3221 | desc->type = NFT_DATA_VERDICT; |
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 046aa13b4fea..e8138da4c14f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c | |||
@@ -61,6 +61,14 @@ void nfnl_unlock(__u8 subsys_id) | |||
61 | } | 61 | } |
62 | EXPORT_SYMBOL_GPL(nfnl_unlock); | 62 | EXPORT_SYMBOL_GPL(nfnl_unlock); |
63 | 63 | ||
64 | #ifdef CONFIG_PROVE_LOCKING | ||
65 | int lockdep_nfnl_is_held(u8 subsys_id) | ||
66 | { | ||
67 | return lockdep_is_held(&table[subsys_id].mutex); | ||
68 | } | ||
69 | EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held); | ||
70 | #endif | ||
71 | |||
64 | int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) | 72 | int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) |
65 | { | 73 | { |
66 | nfnl_lock(n->subsys_id); | 74 | nfnl_lock(n->subsys_id); |
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index a155d19a225e..d292c8d286eb 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c | |||
@@ -28,8 +28,6 @@ | |||
28 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
29 | #include <linux/security.h> | 29 | #include <linux/security.h> |
30 | #include <linux/list.h> | 30 | #include <linux/list.h> |
31 | #include <linux/jhash.h> | ||
32 | #include <linux/random.h> | ||
33 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
34 | #include <net/sock.h> | 32 | #include <net/sock.h> |
35 | #include <net/netfilter/nf_log.h> | 33 | #include <net/netfilter/nf_log.h> |
@@ -75,7 +73,6 @@ struct nfulnl_instance { | |||
75 | }; | 73 | }; |
76 | 74 | ||
77 | #define INSTANCE_BUCKETS 16 | 75 | #define INSTANCE_BUCKETS 16 |
78 | static unsigned int hash_init; | ||
79 | 76 | ||
80 | static int nfnl_log_net_id __read_mostly; | 77 | static int nfnl_log_net_id __read_mostly; |
81 | 78 | ||
@@ -1067,11 +1064,6 @@ static int __init nfnetlink_log_init(void) | |||
1067 | { | 1064 | { |
1068 | int status = -ENOMEM; | 1065 | int status = -ENOMEM; |
1069 | 1066 | ||
1070 | /* it's not really all that important to have a random value, so | ||
1071 | * we can do this from the init function, even if there hasn't | ||
1072 | * been that much entropy yet */ | ||
1073 | get_random_bytes(&hash_init, sizeof(hash_init)); | ||
1074 | |||
1075 | netlink_register_notifier(&nfulnl_rtnl_notifier); | 1067 | netlink_register_notifier(&nfulnl_rtnl_notifier); |
1076 | status = nfnetlink_subsys_register(&nfulnl_subsys); | 1068 | status = nfnetlink_subsys_register(&nfulnl_subsys); |
1077 | if (status < 0) { | 1069 | if (status < 0) { |
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 82cb8236f8a1..8a779be832fb 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c | |||
@@ -192,7 +192,7 @@ err: | |||
192 | } | 192 | } |
193 | 193 | ||
194 | static void | 194 | static void |
195 | nft_target_destroy(const struct nft_expr *expr) | 195 | nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) |
196 | { | 196 | { |
197 | struct xt_target *target = expr->ops->data; | 197 | struct xt_target *target = expr->ops->data; |
198 | 198 | ||
@@ -379,7 +379,7 @@ err: | |||
379 | } | 379 | } |
380 | 380 | ||
381 | static void | 381 | static void |
382 | nft_match_destroy(const struct nft_expr *expr) | 382 | nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) |
383 | { | 383 | { |
384 | struct xt_match *match = expr->ops->data; | 384 | struct xt_match *match = expr->ops->data; |
385 | 385 | ||
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 46e275403838..bd0d41e69341 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c | |||
@@ -19,15 +19,15 @@ | |||
19 | #include <net/netfilter/nf_conntrack_tuple.h> | 19 | #include <net/netfilter/nf_conntrack_tuple.h> |
20 | #include <net/netfilter/nf_conntrack_helper.h> | 20 | #include <net/netfilter/nf_conntrack_helper.h> |
21 | #include <net/netfilter/nf_conntrack_ecache.h> | 21 | #include <net/netfilter/nf_conntrack_ecache.h> |
22 | #include <net/netfilter/nf_conntrack_labels.h> | ||
22 | 23 | ||
23 | struct nft_ct { | 24 | struct nft_ct { |
24 | enum nft_ct_keys key:8; | 25 | enum nft_ct_keys key:8; |
25 | enum ip_conntrack_dir dir:8; | 26 | enum ip_conntrack_dir dir:8; |
26 | union{ | 27 | union { |
27 | enum nft_registers dreg:8; | 28 | enum nft_registers dreg:8; |
28 | enum nft_registers sreg:8; | 29 | enum nft_registers sreg:8; |
29 | }; | 30 | }; |
30 | uint8_t family; | ||
31 | }; | 31 | }; |
32 | 32 | ||
33 | static void nft_ct_get_eval(const struct nft_expr *expr, | 33 | static void nft_ct_get_eval(const struct nft_expr *expr, |
@@ -97,6 +97,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr, | |||
97 | goto err; | 97 | goto err; |
98 | strncpy((char *)dest->data, helper->name, sizeof(dest->data)); | 98 | strncpy((char *)dest->data, helper->name, sizeof(dest->data)); |
99 | return; | 99 | return; |
100 | #ifdef CONFIG_NF_CONNTRACK_LABELS | ||
101 | case NFT_CT_LABELS: { | ||
102 | struct nf_conn_labels *labels = nf_ct_labels_find(ct); | ||
103 | unsigned int size; | ||
104 | |||
105 | if (!labels) { | ||
106 | memset(dest->data, 0, sizeof(dest->data)); | ||
107 | return; | ||
108 | } | ||
109 | |||
110 | BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data)); | ||
111 | size = labels->words * sizeof(long); | ||
112 | |||
113 | memcpy(dest->data, labels->bits, size); | ||
114 | if (size < sizeof(dest->data)) | ||
115 | memset(((char *) dest->data) + size, 0, | ||
116 | sizeof(dest->data) - size); | ||
117 | return; | ||
118 | } | ||
119 | #endif | ||
100 | } | 120 | } |
101 | 121 | ||
102 | tuple = &ct->tuplehash[priv->dir].tuple; | 122 | tuple = &ct->tuplehash[priv->dir].tuple; |
@@ -221,6 +241,9 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr, | |||
221 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 241 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
222 | case NFT_CT_SECMARK: | 242 | case NFT_CT_SECMARK: |
223 | #endif | 243 | #endif |
244 | #ifdef CONFIG_NF_CONNTRACK_LABELS | ||
245 | case NFT_CT_LABELS: | ||
246 | #endif | ||
224 | case NFT_CT_EXPIRATION: | 247 | case NFT_CT_EXPIRATION: |
225 | case NFT_CT_HELPER: | 248 | case NFT_CT_HELPER: |
226 | if (tb[NFTA_CT_DIRECTION] != NULL) | 249 | if (tb[NFTA_CT_DIRECTION] != NULL) |
@@ -292,16 +315,13 @@ static int nft_ct_init(const struct nft_ctx *ctx, | |||
292 | if (err < 0) | 315 | if (err < 0) |
293 | return err; | 316 | return err; |
294 | 317 | ||
295 | priv->family = ctx->afi->family; | ||
296 | |||
297 | return 0; | 318 | return 0; |
298 | } | 319 | } |
299 | 320 | ||
300 | static void nft_ct_destroy(const struct nft_expr *expr) | 321 | static void nft_ct_destroy(const struct nft_ctx *ctx, |
322 | const struct nft_expr *expr) | ||
301 | { | 323 | { |
302 | struct nft_ct *priv = nft_expr_priv(expr); | 324 | nft_ct_l3proto_module_put(ctx->afi->family); |
303 | |||
304 | nft_ct_l3proto_module_put(priv->family); | ||
305 | } | 325 | } |
306 | 326 | ||
307 | static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) | 327 | static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) |
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3d3f8fce10a5..6a1acde16c60 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> | 2 | * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
@@ -18,17 +18,29 @@ | |||
18 | #include <linux/netfilter/nf_tables.h> | 18 | #include <linux/netfilter/nf_tables.h> |
19 | #include <net/netfilter/nf_tables.h> | 19 | #include <net/netfilter/nf_tables.h> |
20 | 20 | ||
21 | #define NFT_HASH_MIN_SIZE 4 | ||
22 | |||
21 | struct nft_hash { | 23 | struct nft_hash { |
22 | struct hlist_head *hash; | 24 | struct nft_hash_table __rcu *tbl; |
23 | unsigned int hsize; | 25 | }; |
26 | |||
27 | struct nft_hash_table { | ||
28 | unsigned int size; | ||
29 | unsigned int elements; | ||
30 | struct nft_hash_elem __rcu *buckets[]; | ||
24 | }; | 31 | }; |
25 | 32 | ||
26 | struct nft_hash_elem { | 33 | struct nft_hash_elem { |
27 | struct hlist_node hnode; | 34 | struct nft_hash_elem __rcu *next; |
28 | struct nft_data key; | 35 | struct nft_data key; |
29 | struct nft_data data[]; | 36 | struct nft_data data[]; |
30 | }; | 37 | }; |
31 | 38 | ||
39 | #define nft_hash_for_each_entry(i, head) \ | ||
40 | for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next)) | ||
41 | #define nft_hash_for_each_entry_rcu(i, head) \ | ||
42 | for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next)) | ||
43 | |||
32 | static u32 nft_hash_rnd __read_mostly; | 44 | static u32 nft_hash_rnd __read_mostly; |
33 | static bool nft_hash_rnd_initted __read_mostly; | 45 | static bool nft_hash_rnd_initted __read_mostly; |
34 | 46 | ||
@@ -38,7 +50,7 @@ static unsigned int nft_hash_data(const struct nft_data *data, | |||
38 | unsigned int h; | 50 | unsigned int h; |
39 | 51 | ||
40 | h = jhash(data->data, len, nft_hash_rnd); | 52 | h = jhash(data->data, len, nft_hash_rnd); |
41 | return ((u64)h * hsize) >> 32; | 53 | return h & (hsize - 1); |
42 | } | 54 | } |
43 | 55 | ||
44 | static bool nft_hash_lookup(const struct nft_set *set, | 56 | static bool nft_hash_lookup(const struct nft_set *set, |
@@ -46,11 +58,12 @@ static bool nft_hash_lookup(const struct nft_set *set, | |||
46 | struct nft_data *data) | 58 | struct nft_data *data) |
47 | { | 59 | { |
48 | const struct nft_hash *priv = nft_set_priv(set); | 60 | const struct nft_hash *priv = nft_set_priv(set); |
61 | const struct nft_hash_table *tbl = rcu_dereference(priv->tbl); | ||
49 | const struct nft_hash_elem *he; | 62 | const struct nft_hash_elem *he; |
50 | unsigned int h; | 63 | unsigned int h; |
51 | 64 | ||
52 | h = nft_hash_data(key, priv->hsize, set->klen); | 65 | h = nft_hash_data(key, tbl->size, set->klen); |
53 | hlist_for_each_entry(he, &priv->hash[h], hnode) { | 66 | nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) { |
54 | if (nft_data_cmp(&he->key, key, set->klen)) | 67 | if (nft_data_cmp(&he->key, key, set->klen)) |
55 | continue; | 68 | continue; |
56 | if (set->flags & NFT_SET_MAP) | 69 | if (set->flags & NFT_SET_MAP) |
@@ -60,19 +73,148 @@ static bool nft_hash_lookup(const struct nft_set *set, | |||
60 | return false; | 73 | return false; |
61 | } | 74 | } |
62 | 75 | ||
63 | static void nft_hash_elem_destroy(const struct nft_set *set, | 76 | static void nft_hash_tbl_free(const struct nft_hash_table *tbl) |
64 | struct nft_hash_elem *he) | ||
65 | { | 77 | { |
66 | nft_data_uninit(&he->key, NFT_DATA_VALUE); | 78 | if (is_vmalloc_addr(tbl)) |
67 | if (set->flags & NFT_SET_MAP) | 79 | vfree(tbl); |
68 | nft_data_uninit(he->data, set->dtype); | 80 | else |
69 | kfree(he); | 81 | kfree(tbl); |
82 | } | ||
83 | |||
84 | static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) | ||
85 | { | ||
86 | struct nft_hash_table *tbl; | ||
87 | size_t size; | ||
88 | |||
89 | size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); | ||
90 | tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN); | ||
91 | if (tbl == NULL) | ||
92 | tbl = vzalloc(size); | ||
93 | if (tbl == NULL) | ||
94 | return NULL; | ||
95 | tbl->size = nbuckets; | ||
96 | |||
97 | return tbl; | ||
98 | } | ||
99 | |||
100 | static void nft_hash_chain_unzip(const struct nft_set *set, | ||
101 | const struct nft_hash_table *ntbl, | ||
102 | struct nft_hash_table *tbl, unsigned int n) | ||
103 | { | ||
104 | struct nft_hash_elem *he, *last, *next; | ||
105 | unsigned int h; | ||
106 | |||
107 | he = nft_dereference(tbl->buckets[n]); | ||
108 | if (he == NULL) | ||
109 | return; | ||
110 | h = nft_hash_data(&he->key, ntbl->size, set->klen); | ||
111 | |||
112 | /* Find last element of first chain hashing to bucket h */ | ||
113 | last = he; | ||
114 | nft_hash_for_each_entry(he, he->next) { | ||
115 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) | ||
116 | break; | ||
117 | last = he; | ||
118 | } | ||
119 | |||
120 | /* Unlink first chain from the old table */ | ||
121 | RCU_INIT_POINTER(tbl->buckets[n], last->next); | ||
122 | |||
123 | /* If end of chain reached, done */ | ||
124 | if (he == NULL) | ||
125 | return; | ||
126 | |||
127 | /* Find first element of second chain hashing to bucket h */ | ||
128 | next = NULL; | ||
129 | nft_hash_for_each_entry(he, he->next) { | ||
130 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) | ||
131 | continue; | ||
132 | next = he; | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | /* Link the two chains */ | ||
137 | RCU_INIT_POINTER(last->next, next); | ||
138 | } | ||
139 | |||
140 | static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv) | ||
141 | { | ||
142 | struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; | ||
143 | struct nft_hash_elem *he; | ||
144 | unsigned int i, h; | ||
145 | bool complete; | ||
146 | |||
147 | ntbl = nft_hash_tbl_alloc(tbl->size * 2); | ||
148 | if (ntbl == NULL) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | /* Link new table's buckets to first element in the old table | ||
152 | * hashing to the new bucket. | ||
153 | */ | ||
154 | for (i = 0; i < ntbl->size; i++) { | ||
155 | h = i < tbl->size ? i : i - tbl->size; | ||
156 | nft_hash_for_each_entry(he, tbl->buckets[h]) { | ||
157 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != i) | ||
158 | continue; | ||
159 | RCU_INIT_POINTER(ntbl->buckets[i], he); | ||
160 | break; | ||
161 | } | ||
162 | } | ||
163 | ntbl->elements = tbl->elements; | ||
164 | |||
165 | /* Publish new table */ | ||
166 | rcu_assign_pointer(priv->tbl, ntbl); | ||
167 | |||
168 | /* Unzip interleaved hash chains */ | ||
169 | do { | ||
170 | /* Wait for readers to use new table/unzipped chains */ | ||
171 | synchronize_rcu(); | ||
172 | |||
173 | complete = true; | ||
174 | for (i = 0; i < tbl->size; i++) { | ||
175 | nft_hash_chain_unzip(set, ntbl, tbl, i); | ||
176 | if (tbl->buckets[i] != NULL) | ||
177 | complete = false; | ||
178 | } | ||
179 | } while (!complete); | ||
180 | |||
181 | nft_hash_tbl_free(tbl); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv) | ||
186 | { | ||
187 | struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; | ||
188 | struct nft_hash_elem __rcu **pprev; | ||
189 | unsigned int i; | ||
190 | |||
191 | ntbl = nft_hash_tbl_alloc(tbl->size / 2); | ||
192 | if (ntbl == NULL) | ||
193 | return -ENOMEM; | ||
194 | |||
195 | for (i = 0; i < ntbl->size; i++) { | ||
196 | ntbl->buckets[i] = tbl->buckets[i]; | ||
197 | |||
198 | for (pprev = &ntbl->buckets[i]; *pprev != NULL; | ||
199 | pprev = &nft_dereference(*pprev)->next) | ||
200 | ; | ||
201 | RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); | ||
202 | } | ||
203 | ntbl->elements = tbl->elements; | ||
204 | |||
205 | /* Publish new table */ | ||
206 | rcu_assign_pointer(priv->tbl, ntbl); | ||
207 | synchronize_rcu(); | ||
208 | |||
209 | nft_hash_tbl_free(tbl); | ||
210 | return 0; | ||
70 | } | 211 | } |
71 | 212 | ||
72 | static int nft_hash_insert(const struct nft_set *set, | 213 | static int nft_hash_insert(const struct nft_set *set, |
73 | const struct nft_set_elem *elem) | 214 | const struct nft_set_elem *elem) |
74 | { | 215 | { |
75 | struct nft_hash *priv = nft_set_priv(set); | 216 | struct nft_hash *priv = nft_set_priv(set); |
217 | struct nft_hash_table *tbl = nft_dereference(priv->tbl); | ||
76 | struct nft_hash_elem *he; | 218 | struct nft_hash_elem *he; |
77 | unsigned int size, h; | 219 | unsigned int size, h; |
78 | 220 | ||
@@ -91,33 +233,66 @@ static int nft_hash_insert(const struct nft_set *set, | |||
91 | if (set->flags & NFT_SET_MAP) | 233 | if (set->flags & NFT_SET_MAP) |
92 | nft_data_copy(he->data, &elem->data); | 234 | nft_data_copy(he->data, &elem->data); |
93 | 235 | ||
94 | h = nft_hash_data(&he->key, priv->hsize, set->klen); | 236 | h = nft_hash_data(&he->key, tbl->size, set->klen); |
95 | hlist_add_head_rcu(&he->hnode, &priv->hash[h]); | 237 | RCU_INIT_POINTER(he->next, tbl->buckets[h]); |
238 | rcu_assign_pointer(tbl->buckets[h], he); | ||
239 | tbl->elements++; | ||
240 | |||
241 | /* Expand table when exceeding 75% load */ | ||
242 | if (tbl->elements > tbl->size / 4 * 3) | ||
243 | nft_hash_tbl_expand(set, priv); | ||
244 | |||
96 | return 0; | 245 | return 0; |
97 | } | 246 | } |
98 | 247 | ||
248 | static void nft_hash_elem_destroy(const struct nft_set *set, | ||
249 | struct nft_hash_elem *he) | ||
250 | { | ||
251 | nft_data_uninit(&he->key, NFT_DATA_VALUE); | ||
252 | if (set->flags & NFT_SET_MAP) | ||
253 | nft_data_uninit(he->data, set->dtype); | ||
254 | kfree(he); | ||
255 | } | ||
256 | |||
99 | static void nft_hash_remove(const struct nft_set *set, | 257 | static void nft_hash_remove(const struct nft_set *set, |
100 | const struct nft_set_elem *elem) | 258 | const struct nft_set_elem *elem) |
101 | { | 259 | { |
102 | struct nft_hash_elem *he = elem->cookie; | 260 | struct nft_hash *priv = nft_set_priv(set); |
261 | struct nft_hash_table *tbl = nft_dereference(priv->tbl); | ||
262 | struct nft_hash_elem *he, __rcu **pprev; | ||
103 | 263 | ||
104 | hlist_del_rcu(&he->hnode); | 264 | pprev = elem->cookie; |
265 | he = nft_dereference((*pprev)); | ||
266 | |||
267 | RCU_INIT_POINTER(*pprev, he->next); | ||
268 | synchronize_rcu(); | ||
105 | kfree(he); | 269 | kfree(he); |
270 | tbl->elements--; | ||
271 | |||
272 | /* Shrink table beneath 30% load */ | ||
273 | if (tbl->elements < tbl->size * 3 / 10 && | ||
274 | tbl->size > NFT_HASH_MIN_SIZE) | ||
275 | nft_hash_tbl_shrink(set, priv); | ||
106 | } | 276 | } |
107 | 277 | ||
108 | static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) | 278 | static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) |
109 | { | 279 | { |
110 | const struct nft_hash *priv = nft_set_priv(set); | 280 | const struct nft_hash *priv = nft_set_priv(set); |
281 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); | ||
282 | struct nft_hash_elem __rcu * const *pprev; | ||
111 | struct nft_hash_elem *he; | 283 | struct nft_hash_elem *he; |
112 | unsigned int h; | 284 | unsigned int h; |
113 | 285 | ||
114 | h = nft_hash_data(&elem->key, priv->hsize, set->klen); | 286 | h = nft_hash_data(&elem->key, tbl->size, set->klen); |
115 | hlist_for_each_entry(he, &priv->hash[h], hnode) { | 287 | pprev = &tbl->buckets[h]; |
116 | if (nft_data_cmp(&he->key, &elem->key, set->klen)) | 288 | nft_hash_for_each_entry(he, tbl->buckets[h]) { |
289 | if (nft_data_cmp(&he->key, &elem->key, set->klen)) { | ||
290 | pprev = &he->next; | ||
117 | continue; | 291 | continue; |
292 | } | ||
118 | 293 | ||
119 | elem->cookie = he; | 294 | elem->cookie = (void *)pprev; |
120 | elem->flags = 0; | 295 | elem->flags = 0; |
121 | if (set->flags & NFT_SET_MAP) | 296 | if (set->flags & NFT_SET_MAP) |
122 | nft_data_copy(&elem->data, he->data); | 297 | nft_data_copy(&elem->data, he->data); |
123 | return 0; | 298 | return 0; |
@@ -129,12 +304,13 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, | |||
129 | struct nft_set_iter *iter) | 304 | struct nft_set_iter *iter) |
130 | { | 305 | { |
131 | const struct nft_hash *priv = nft_set_priv(set); | 306 | const struct nft_hash *priv = nft_set_priv(set); |
307 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); | ||
132 | const struct nft_hash_elem *he; | 308 | const struct nft_hash_elem *he; |
133 | struct nft_set_elem elem; | 309 | struct nft_set_elem elem; |
134 | unsigned int i; | 310 | unsigned int i; |
135 | 311 | ||
136 | for (i = 0; i < priv->hsize; i++) { | 312 | for (i = 0; i < tbl->size; i++) { |
137 | hlist_for_each_entry(he, &priv->hash[i], hnode) { | 313 | nft_hash_for_each_entry(he, tbl->buckets[i]) { |
138 | if (iter->count < iter->skip) | 314 | if (iter->count < iter->skip) |
139 | goto cont; | 315 | goto cont; |
140 | 316 | ||
@@ -161,43 +337,35 @@ static int nft_hash_init(const struct nft_set *set, | |||
161 | const struct nlattr * const tb[]) | 337 | const struct nlattr * const tb[]) |
162 | { | 338 | { |
163 | struct nft_hash *priv = nft_set_priv(set); | 339 | struct nft_hash *priv = nft_set_priv(set); |
164 | unsigned int cnt, i; | 340 | struct nft_hash_table *tbl; |
165 | 341 | ||
166 | if (unlikely(!nft_hash_rnd_initted)) { | 342 | if (unlikely(!nft_hash_rnd_initted)) { |
167 | get_random_bytes(&nft_hash_rnd, 4); | 343 | get_random_bytes(&nft_hash_rnd, 4); |
168 | nft_hash_rnd_initted = true; | 344 | nft_hash_rnd_initted = true; |
169 | } | 345 | } |
170 | 346 | ||
171 | /* Aim for a load factor of 0.75 */ | 347 | tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE); |
172 | // FIXME: temporarily broken until we have set descriptions | 348 | if (tbl == NULL) |
173 | cnt = 100; | ||
174 | cnt = cnt * 4 / 3; | ||
175 | |||
176 | priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); | ||
177 | if (priv->hash == NULL) | ||
178 | return -ENOMEM; | 349 | return -ENOMEM; |
179 | priv->hsize = cnt; | 350 | RCU_INIT_POINTER(priv->tbl, tbl); |
180 | |||
181 | for (i = 0; i < cnt; i++) | ||
182 | INIT_HLIST_HEAD(&priv->hash[i]); | ||
183 | |||
184 | return 0; | 351 | return 0; |
185 | } | 352 | } |
186 | 353 | ||
187 | static void nft_hash_destroy(const struct nft_set *set) | 354 | static void nft_hash_destroy(const struct nft_set *set) |
188 | { | 355 | { |
189 | const struct nft_hash *priv = nft_set_priv(set); | 356 | const struct nft_hash *priv = nft_set_priv(set); |
190 | const struct hlist_node *next; | 357 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
191 | struct nft_hash_elem *elem; | 358 | struct nft_hash_elem *he, *next; |
192 | unsigned int i; | 359 | unsigned int i; |
193 | 360 | ||
194 | for (i = 0; i < priv->hsize; i++) { | 361 | for (i = 0; i < tbl->size; i++) { |
195 | hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { | 362 | for (he = nft_dereference(tbl->buckets[i]); he != NULL; |
196 | hlist_del(&elem->hnode); | 363 | he = next) { |
197 | nft_hash_elem_destroy(set, elem); | 364 | next = nft_dereference(he->next); |
365 | nft_hash_elem_destroy(set, he); | ||
198 | } | 366 | } |
199 | } | 367 | } |
200 | kfree(priv->hash); | 368 | kfree(tbl); |
201 | } | 369 | } |
202 | 370 | ||
203 | static struct nft_set_ops nft_hash_ops __read_mostly = { | 371 | static struct nft_set_ops nft_hash_ops __read_mostly = { |
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index f169501f1ad4..810385eb7249 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c | |||
@@ -70,7 +70,8 @@ err1: | |||
70 | return err; | 70 | return err; |
71 | } | 71 | } |
72 | 72 | ||
73 | static void nft_immediate_destroy(const struct nft_expr *expr) | 73 | static void nft_immediate_destroy(const struct nft_ctx *ctx, |
74 | const struct nft_expr *expr) | ||
74 | { | 75 | { |
75 | const struct nft_immediate_expr *priv = nft_expr_priv(expr); | 76 | const struct nft_immediate_expr *priv = nft_expr_priv(expr); |
76 | return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); | 77 | return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); |
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 26c5154e05f3..10cfb156cdf4 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c | |||
@@ -74,7 +74,8 @@ static int nft_log_init(const struct nft_ctx *ctx, | |||
74 | return 0; | 74 | return 0; |
75 | } | 75 | } |
76 | 76 | ||
77 | static void nft_log_destroy(const struct nft_expr *expr) | 77 | static void nft_log_destroy(const struct nft_ctx *ctx, |
78 | const struct nft_expr *expr) | ||
78 | { | 79 | { |
79 | struct nft_log *priv = nft_expr_priv(expr); | 80 | struct nft_log *priv = nft_expr_priv(expr); |
80 | 81 | ||
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index bb4ef4cccb6e..7fd2bea8aa23 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c | |||
@@ -89,11 +89,12 @@ static int nft_lookup_init(const struct nft_ctx *ctx, | |||
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | 91 | ||
92 | static void nft_lookup_destroy(const struct nft_expr *expr) | 92 | static void nft_lookup_destroy(const struct nft_ctx *ctx, |
93 | const struct nft_expr *expr) | ||
93 | { | 94 | { |
94 | struct nft_lookup *priv = nft_expr_priv(expr); | 95 | struct nft_lookup *priv = nft_expr_priv(expr); |
95 | 96 | ||
96 | nf_tables_unbind_set(NULL, priv->set, &priv->binding); | 97 | nf_tables_unbind_set(ctx, priv->set, &priv->binding); |
97 | } | 98 | } |
98 | 99 | ||
99 | static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) | 100 | static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) |
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index d3b1ffe26181..a0195d28bcfc 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c | |||
@@ -31,8 +31,8 @@ struct nft_nat { | |||
31 | enum nft_registers sreg_addr_max:8; | 31 | enum nft_registers sreg_addr_max:8; |
32 | enum nft_registers sreg_proto_min:8; | 32 | enum nft_registers sreg_proto_min:8; |
33 | enum nft_registers sreg_proto_max:8; | 33 | enum nft_registers sreg_proto_max:8; |
34 | int family; | 34 | enum nf_nat_manip_type type:8; |
35 | enum nf_nat_manip_type type; | 35 | u8 family; |
36 | }; | 36 | }; |
37 | 37 | ||
38 | static void nft_nat_eval(const struct nft_expr *expr, | 38 | static void nft_nat_eval(const struct nft_expr *expr, |
@@ -88,6 +88,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, | |||
88 | const struct nlattr * const tb[]) | 88 | const struct nlattr * const tb[]) |
89 | { | 89 | { |
90 | struct nft_nat *priv = nft_expr_priv(expr); | 90 | struct nft_nat *priv = nft_expr_priv(expr); |
91 | u32 family; | ||
91 | int err; | 92 | int err; |
92 | 93 | ||
93 | if (tb[NFTA_NAT_TYPE] == NULL) | 94 | if (tb[NFTA_NAT_TYPE] == NULL) |
@@ -107,9 +108,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, | |||
107 | if (tb[NFTA_NAT_FAMILY] == NULL) | 108 | if (tb[NFTA_NAT_FAMILY] == NULL) |
108 | return -EINVAL; | 109 | return -EINVAL; |
109 | 110 | ||
110 | priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); | 111 | family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); |
111 | if (priv->family != AF_INET && priv->family != AF_INET6) | 112 | if (family != AF_INET && family != AF_INET6) |
112 | return -EINVAL; | 113 | return -EAFNOSUPPORT; |
114 | if (family != ctx->afi->family) | ||
115 | return -EOPNOTSUPP; | ||
116 | priv->family = family; | ||
113 | 117 | ||
114 | if (tb[NFTA_NAT_REG_ADDR_MIN]) { | 118 | if (tb[NFTA_NAT_REG_ADDR_MIN]) { |
115 | priv->sreg_addr_min = ntohl(nla_get_be32( | 119 | priv->sreg_addr_min = ntohl(nla_get_be32( |
@@ -202,13 +206,7 @@ static struct nft_expr_type nft_nat_type __read_mostly = { | |||
202 | 206 | ||
203 | static int __init nft_nat_module_init(void) | 207 | static int __init nft_nat_module_init(void) |
204 | { | 208 | { |
205 | int err; | 209 | return nft_register_expr(&nft_nat_type); |
206 | |||
207 | err = nft_register_expr(&nft_nat_type); | ||
208 | if (err < 0) | ||
209 | return err; | ||
210 | |||
211 | return 0; | ||
212 | } | 210 | } |
213 | 211 | ||
214 | static void __exit nft_nat_module_exit(void) | 212 | static void __exit nft_nat_module_exit(void) |
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 3228d7f24eb4..4973cbddc446 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c | |||
@@ -146,11 +146,11 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
146 | 146 | ||
147 | if (par->family == NFPROTO_BRIDGE) { | 147 | if (par->family == NFPROTO_BRIDGE) { |
148 | switch (eth_hdr(skb)->h_proto) { | 148 | switch (eth_hdr(skb)->h_proto) { |
149 | case __constant_htons(ETH_P_IP): | 149 | case htons(ETH_P_IP): |
150 | audit_ip4(ab, skb); | 150 | audit_ip4(ab, skb); |
151 | break; | 151 | break; |
152 | 152 | ||
153 | case __constant_htons(ETH_P_IPV6): | 153 | case htons(ETH_P_IPV6): |
154 | audit_ip6(ab, skb); | 154 | audit_ip6(ab, skb); |
155 | break; | 155 | break; |
156 | } | 156 | } |
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index c40b2695633b..458464e7bd7a 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/jhash.h> | 19 | #include <linux/jhash.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/list.h> | 21 | #include <linux/list.h> |
22 | #include <linux/rbtree.h> | ||
22 | #include <linux/module.h> | 23 | #include <linux/module.h> |
23 | #include <linux/random.h> | 24 | #include <linux/random.h> |
24 | #include <linux/skbuff.h> | 25 | #include <linux/skbuff.h> |
@@ -31,6 +32,10 @@ | |||
31 | #include <net/netfilter/nf_conntrack_tuple.h> | 32 | #include <net/netfilter/nf_conntrack_tuple.h> |
32 | #include <net/netfilter/nf_conntrack_zones.h> | 33 | #include <net/netfilter/nf_conntrack_zones.h> |
33 | 34 | ||
35 | #define CONNLIMIT_SLOTS 32 | ||
36 | #define CONNLIMIT_LOCK_SLOTS 32 | ||
37 | #define CONNLIMIT_GC_MAX_NODES 8 | ||
38 | |||
34 | /* we will save the tuples of all connections we care about */ | 39 | /* we will save the tuples of all connections we care about */ |
35 | struct xt_connlimit_conn { | 40 | struct xt_connlimit_conn { |
36 | struct hlist_node node; | 41 | struct hlist_node node; |
@@ -38,16 +43,26 @@ struct xt_connlimit_conn { | |||
38 | union nf_inet_addr addr; | 43 | union nf_inet_addr addr; |
39 | }; | 44 | }; |
40 | 45 | ||
46 | struct xt_connlimit_rb { | ||
47 | struct rb_node node; | ||
48 | struct hlist_head hhead; /* connections/hosts in same subnet */ | ||
49 | union nf_inet_addr addr; /* search key */ | ||
50 | }; | ||
51 | |||
41 | struct xt_connlimit_data { | 52 | struct xt_connlimit_data { |
42 | struct hlist_head iphash[256]; | 53 | struct rb_root climit_root4[CONNLIMIT_SLOTS]; |
43 | spinlock_t lock; | 54 | struct rb_root climit_root6[CONNLIMIT_SLOTS]; |
55 | spinlock_t locks[CONNLIMIT_LOCK_SLOTS]; | ||
44 | }; | 56 | }; |
45 | 57 | ||
46 | static u_int32_t connlimit_rnd __read_mostly; | 58 | static u_int32_t connlimit_rnd __read_mostly; |
59 | static struct kmem_cache *connlimit_rb_cachep __read_mostly; | ||
60 | static struct kmem_cache *connlimit_conn_cachep __read_mostly; | ||
47 | 61 | ||
48 | static inline unsigned int connlimit_iphash(__be32 addr) | 62 | static inline unsigned int connlimit_iphash(__be32 addr) |
49 | { | 63 | { |
50 | return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF; | 64 | return jhash_1word((__force __u32)addr, |
65 | connlimit_rnd) % CONNLIMIT_SLOTS; | ||
51 | } | 66 | } |
52 | 67 | ||
53 | static inline unsigned int | 68 | static inline unsigned int |
@@ -60,7 +75,8 @@ connlimit_iphash6(const union nf_inet_addr *addr, | |||
60 | for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) | 75 | for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) |
61 | res.ip6[i] = addr->ip6[i] & mask->ip6[i]; | 76 | res.ip6[i] = addr->ip6[i] & mask->ip6[i]; |
62 | 77 | ||
63 | return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF; | 78 | return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), |
79 | connlimit_rnd) % CONNLIMIT_SLOTS; | ||
64 | } | 80 | } |
65 | 81 | ||
66 | static inline bool already_closed(const struct nf_conn *conn) | 82 | static inline bool already_closed(const struct nf_conn *conn) |
@@ -72,13 +88,14 @@ static inline bool already_closed(const struct nf_conn *conn) | |||
72 | return 0; | 88 | return 0; |
73 | } | 89 | } |
74 | 90 | ||
75 | static inline unsigned int | 91 | static int |
76 | same_source_net(const union nf_inet_addr *addr, | 92 | same_source_net(const union nf_inet_addr *addr, |
77 | const union nf_inet_addr *mask, | 93 | const union nf_inet_addr *mask, |
78 | const union nf_inet_addr *u3, u_int8_t family) | 94 | const union nf_inet_addr *u3, u_int8_t family) |
79 | { | 95 | { |
80 | if (family == NFPROTO_IPV4) { | 96 | if (family == NFPROTO_IPV4) { |
81 | return (addr->ip & mask->ip) == (u3->ip & mask->ip); | 97 | return ntohl(addr->ip & mask->ip) - |
98 | ntohl(u3->ip & mask->ip); | ||
82 | } else { | 99 | } else { |
83 | union nf_inet_addr lh, rh; | 100 | union nf_inet_addr lh, rh; |
84 | unsigned int i; | 101 | unsigned int i; |
@@ -88,89 +105,205 @@ same_source_net(const union nf_inet_addr *addr, | |||
88 | rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; | 105 | rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; |
89 | } | 106 | } |
90 | 107 | ||
91 | return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0; | 108 | return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)); |
92 | } | 109 | } |
93 | } | 110 | } |
94 | 111 | ||
95 | static int count_them(struct net *net, | 112 | static bool add_hlist(struct hlist_head *head, |
96 | struct xt_connlimit_data *data, | ||
97 | const struct nf_conntrack_tuple *tuple, | 113 | const struct nf_conntrack_tuple *tuple, |
98 | const union nf_inet_addr *addr, | 114 | const union nf_inet_addr *addr) |
99 | const union nf_inet_addr *mask, | 115 | { |
100 | u_int8_t family) | 116 | struct xt_connlimit_conn *conn; |
117 | |||
118 | conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); | ||
119 | if (conn == NULL) | ||
120 | return false; | ||
121 | conn->tuple = *tuple; | ||
122 | conn->addr = *addr; | ||
123 | hlist_add_head(&conn->node, head); | ||
124 | return true; | ||
125 | } | ||
126 | |||
127 | static unsigned int check_hlist(struct net *net, | ||
128 | struct hlist_head *head, | ||
129 | const struct nf_conntrack_tuple *tuple, | ||
130 | bool *addit) | ||
101 | { | 131 | { |
102 | const struct nf_conntrack_tuple_hash *found; | 132 | const struct nf_conntrack_tuple_hash *found; |
103 | struct xt_connlimit_conn *conn; | 133 | struct xt_connlimit_conn *conn; |
104 | struct hlist_node *n; | 134 | struct hlist_node *n; |
105 | struct nf_conn *found_ct; | 135 | struct nf_conn *found_ct; |
106 | struct hlist_head *hash; | 136 | unsigned int length = 0; |
107 | bool addit = true; | ||
108 | int matches = 0; | ||
109 | |||
110 | if (family == NFPROTO_IPV6) | ||
111 | hash = &data->iphash[connlimit_iphash6(addr, mask)]; | ||
112 | else | ||
113 | hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; | ||
114 | 137 | ||
138 | *addit = true; | ||
115 | rcu_read_lock(); | 139 | rcu_read_lock(); |
116 | 140 | ||
117 | /* check the saved connections */ | 141 | /* check the saved connections */ |
118 | hlist_for_each_entry_safe(conn, n, hash, node) { | 142 | hlist_for_each_entry_safe(conn, n, head, node) { |
119 | found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, | 143 | found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, |
120 | &conn->tuple); | 144 | &conn->tuple); |
121 | found_ct = NULL; | 145 | if (found == NULL) { |
146 | hlist_del(&conn->node); | ||
147 | kmem_cache_free(connlimit_conn_cachep, conn); | ||
148 | continue; | ||
149 | } | ||
122 | 150 | ||
123 | if (found != NULL) | 151 | found_ct = nf_ct_tuplehash_to_ctrack(found); |
124 | found_ct = nf_ct_tuplehash_to_ctrack(found); | ||
125 | 152 | ||
126 | if (found_ct != NULL && | 153 | if (nf_ct_tuple_equal(&conn->tuple, tuple)) { |
127 | nf_ct_tuple_equal(&conn->tuple, tuple) && | ||
128 | !already_closed(found_ct)) | ||
129 | /* | 154 | /* |
130 | * Just to be sure we have it only once in the list. | 155 | * Just to be sure we have it only once in the list. |
131 | * We should not see tuples twice unless someone hooks | 156 | * We should not see tuples twice unless someone hooks |
132 | * this into a table without "-p tcp --syn". | 157 | * this into a table without "-p tcp --syn". |
133 | */ | 158 | */ |
134 | addit = false; | 159 | *addit = false; |
135 | 160 | } else if (already_closed(found_ct)) { | |
136 | if (found == NULL) { | ||
137 | /* this one is gone */ | ||
138 | hlist_del(&conn->node); | ||
139 | kfree(conn); | ||
140 | continue; | ||
141 | } | ||
142 | |||
143 | if (already_closed(found_ct)) { | ||
144 | /* | 161 | /* |
145 | * we do not care about connections which are | 162 | * we do not care about connections which are |
146 | * closed already -> ditch it | 163 | * closed already -> ditch it |
147 | */ | 164 | */ |
148 | nf_ct_put(found_ct); | 165 | nf_ct_put(found_ct); |
149 | hlist_del(&conn->node); | 166 | hlist_del(&conn->node); |
150 | kfree(conn); | 167 | kmem_cache_free(connlimit_conn_cachep, conn); |
151 | continue; | 168 | continue; |
152 | } | 169 | } |
153 | 170 | ||
154 | if (same_source_net(addr, mask, &conn->addr, family)) | ||
155 | /* same source network -> be counted! */ | ||
156 | ++matches; | ||
157 | nf_ct_put(found_ct); | 171 | nf_ct_put(found_ct); |
172 | length++; | ||
158 | } | 173 | } |
159 | 174 | ||
160 | rcu_read_unlock(); | 175 | rcu_read_unlock(); |
161 | 176 | ||
162 | if (addit) { | 177 | return length; |
163 | /* save the new connection in our list */ | 178 | } |
164 | conn = kmalloc(sizeof(*conn), GFP_ATOMIC); | 179 | |
165 | if (conn == NULL) | 180 | static void tree_nodes_free(struct rb_root *root, |
166 | return -ENOMEM; | 181 | struct xt_connlimit_rb *gc_nodes[], |
167 | conn->tuple = *tuple; | 182 | unsigned int gc_count) |
168 | conn->addr = *addr; | 183 | { |
169 | hlist_add_head(&conn->node, hash); | 184 | struct xt_connlimit_rb *rbconn; |
170 | ++matches; | 185 | |
186 | while (gc_count) { | ||
187 | rbconn = gc_nodes[--gc_count]; | ||
188 | rb_erase(&rbconn->node, root); | ||
189 | kmem_cache_free(connlimit_rb_cachep, rbconn); | ||
190 | } | ||
191 | } | ||
192 | |||
193 | static unsigned int | ||
194 | count_tree(struct net *net, struct rb_root *root, | ||
195 | const struct nf_conntrack_tuple *tuple, | ||
196 | const union nf_inet_addr *addr, const union nf_inet_addr *mask, | ||
197 | u8 family) | ||
198 | { | ||
199 | struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; | ||
200 | struct rb_node **rbnode, *parent; | ||
201 | struct xt_connlimit_rb *rbconn; | ||
202 | struct xt_connlimit_conn *conn; | ||
203 | unsigned int gc_count; | ||
204 | bool no_gc = false; | ||
205 | |||
206 | restart: | ||
207 | gc_count = 0; | ||
208 | parent = NULL; | ||
209 | rbnode = &(root->rb_node); | ||
210 | while (*rbnode) { | ||
211 | int diff; | ||
212 | bool addit; | ||
213 | |||
214 | rbconn = container_of(*rbnode, struct xt_connlimit_rb, node); | ||
215 | |||
216 | parent = *rbnode; | ||
217 | diff = same_source_net(addr, mask, &rbconn->addr, family); | ||
218 | if (diff < 0) { | ||
219 | rbnode = &((*rbnode)->rb_left); | ||
220 | } else if (diff > 0) { | ||
221 | rbnode = &((*rbnode)->rb_right); | ||
222 | } else { | ||
223 | /* same source network -> be counted! */ | ||
224 | unsigned int count; | ||
225 | count = check_hlist(net, &rbconn->hhead, tuple, &addit); | ||
226 | |||
227 | tree_nodes_free(root, gc_nodes, gc_count); | ||
228 | if (!addit) | ||
229 | return count; | ||
230 | |||
231 | if (!add_hlist(&rbconn->hhead, tuple, addr)) | ||
232 | return 0; /* hotdrop */ | ||
233 | |||
234 | return count + 1; | ||
235 | } | ||
236 | |||
237 | if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) | ||
238 | continue; | ||
239 | |||
240 | /* only used for GC on hhead, retval and 'addit' ignored */ | ||
241 | check_hlist(net, &rbconn->hhead, tuple, &addit); | ||
242 | if (hlist_empty(&rbconn->hhead)) | ||
243 | gc_nodes[gc_count++] = rbconn; | ||
244 | } | ||
245 | |||
246 | if (gc_count) { | ||
247 | no_gc = true; | ||
248 | tree_nodes_free(root, gc_nodes, gc_count); | ||
249 | /* tree_node_free before new allocation permits | ||
250 | * allocator to re-use newly free'd object. | ||
251 | * | ||
252 | * This is a rare event; in most cases we will find | ||
253 | * existing node to re-use. (or gc_count is 0). | ||
254 | */ | ||
255 | goto restart; | ||
256 | } | ||
257 | |||
258 | /* no match, need to insert new node */ | ||
259 | rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC); | ||
260 | if (rbconn == NULL) | ||
261 | return 0; | ||
262 | |||
263 | conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); | ||
264 | if (conn == NULL) { | ||
265 | kmem_cache_free(connlimit_rb_cachep, rbconn); | ||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | conn->tuple = *tuple; | ||
270 | conn->addr = *addr; | ||
271 | rbconn->addr = *addr; | ||
272 | |||
273 | INIT_HLIST_HEAD(&rbconn->hhead); | ||
274 | hlist_add_head(&conn->node, &rbconn->hhead); | ||
275 | |||
276 | rb_link_node(&rbconn->node, parent, rbnode); | ||
277 | rb_insert_color(&rbconn->node, root); | ||
278 | return 1; | ||
279 | } | ||
280 | |||
281 | static int count_them(struct net *net, | ||
282 | struct xt_connlimit_data *data, | ||
283 | const struct nf_conntrack_tuple *tuple, | ||
284 | const union nf_inet_addr *addr, | ||
285 | const union nf_inet_addr *mask, | ||
286 | u_int8_t family) | ||
287 | { | ||
288 | struct rb_root *root; | ||
289 | int count; | ||
290 | u32 hash; | ||
291 | |||
292 | if (family == NFPROTO_IPV6) { | ||
293 | hash = connlimit_iphash6(addr, mask); | ||
294 | root = &data->climit_root6[hash]; | ||
295 | } else { | ||
296 | hash = connlimit_iphash(addr->ip & mask->ip); | ||
297 | root = &data->climit_root4[hash]; | ||
171 | } | 298 | } |
172 | 299 | ||
173 | return matches; | 300 | spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); |
301 | |||
302 | count = count_tree(net, root, tuple, addr, mask, family); | ||
303 | |||
304 | spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); | ||
305 | |||
306 | return count; | ||
174 | } | 307 | } |
175 | 308 | ||
176 | static bool | 309 | static bool |
@@ -183,7 +316,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
183 | const struct nf_conntrack_tuple *tuple_ptr = &tuple; | 316 | const struct nf_conntrack_tuple *tuple_ptr = &tuple; |
184 | enum ip_conntrack_info ctinfo; | 317 | enum ip_conntrack_info ctinfo; |
185 | const struct nf_conn *ct; | 318 | const struct nf_conn *ct; |
186 | int connections; | 319 | unsigned int connections; |
187 | 320 | ||
188 | ct = nf_ct_get(skb, &ctinfo); | 321 | ct = nf_ct_get(skb, &ctinfo); |
189 | if (ct != NULL) | 322 | if (ct != NULL) |
@@ -202,12 +335,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
202 | iph->daddr : iph->saddr; | 335 | iph->daddr : iph->saddr; |
203 | } | 336 | } |
204 | 337 | ||
205 | spin_lock_bh(&info->data->lock); | ||
206 | connections = count_them(net, info->data, tuple_ptr, &addr, | 338 | connections = count_them(net, info->data, tuple_ptr, &addr, |
207 | &info->mask, par->family); | 339 | &info->mask, par->family); |
208 | spin_unlock_bh(&info->data->lock); | 340 | if (connections == 0) |
209 | |||
210 | if (connections < 0) | ||
211 | /* kmalloc failed, drop it entirely */ | 341 | /* kmalloc failed, drop it entirely */ |
212 | goto hotdrop; | 342 | goto hotdrop; |
213 | 343 | ||
@@ -247,29 +377,47 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) | |||
247 | return -ENOMEM; | 377 | return -ENOMEM; |
248 | } | 378 | } |
249 | 379 | ||
250 | spin_lock_init(&info->data->lock); | 380 | for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i) |
251 | for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) | 381 | spin_lock_init(&info->data->locks[i]); |
252 | INIT_HLIST_HEAD(&info->data->iphash[i]); | 382 | |
383 | for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) | ||
384 | info->data->climit_root4[i] = RB_ROOT; | ||
385 | for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) | ||
386 | info->data->climit_root6[i] = RB_ROOT; | ||
253 | 387 | ||
254 | return 0; | 388 | return 0; |
255 | } | 389 | } |
256 | 390 | ||
257 | static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) | 391 | static void destroy_tree(struct rb_root *r) |
258 | { | 392 | { |
259 | const struct xt_connlimit_info *info = par->matchinfo; | ||
260 | struct xt_connlimit_conn *conn; | 393 | struct xt_connlimit_conn *conn; |
394 | struct xt_connlimit_rb *rbconn; | ||
261 | struct hlist_node *n; | 395 | struct hlist_node *n; |
262 | struct hlist_head *hash = info->data->iphash; | 396 | struct rb_node *node; |
397 | |||
398 | while ((node = rb_first(r)) != NULL) { | ||
399 | rbconn = container_of(node, struct xt_connlimit_rb, node); | ||
400 | |||
401 | rb_erase(node, r); | ||
402 | |||
403 | hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) | ||
404 | kmem_cache_free(connlimit_conn_cachep, conn); | ||
405 | |||
406 | kmem_cache_free(connlimit_rb_cachep, rbconn); | ||
407 | } | ||
408 | } | ||
409 | |||
410 | static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) | ||
411 | { | ||
412 | const struct xt_connlimit_info *info = par->matchinfo; | ||
263 | unsigned int i; | 413 | unsigned int i; |
264 | 414 | ||
265 | nf_ct_l3proto_module_put(par->family); | 415 | nf_ct_l3proto_module_put(par->family); |
266 | 416 | ||
267 | for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { | 417 | for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) |
268 | hlist_for_each_entry_safe(conn, n, &hash[i], node) { | 418 | destroy_tree(&info->data->climit_root4[i]); |
269 | hlist_del(&conn->node); | 419 | for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) |
270 | kfree(conn); | 420 | destroy_tree(&info->data->climit_root6[i]); |
271 | } | ||
272 | } | ||
273 | 421 | ||
274 | kfree(info->data); | 422 | kfree(info->data); |
275 | } | 423 | } |
@@ -287,12 +435,37 @@ static struct xt_match connlimit_mt_reg __read_mostly = { | |||
287 | 435 | ||
288 | static int __init connlimit_mt_init(void) | 436 | static int __init connlimit_mt_init(void) |
289 | { | 437 | { |
290 | return xt_register_match(&connlimit_mt_reg); | 438 | int ret; |
439 | |||
440 | BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS); | ||
441 | BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0); | ||
442 | |||
443 | connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn", | ||
444 | sizeof(struct xt_connlimit_conn), | ||
445 | 0, 0, NULL); | ||
446 | if (!connlimit_conn_cachep) | ||
447 | return -ENOMEM; | ||
448 | |||
449 | connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb", | ||
450 | sizeof(struct xt_connlimit_rb), | ||
451 | 0, 0, NULL); | ||
452 | if (!connlimit_rb_cachep) { | ||
453 | kmem_cache_destroy(connlimit_conn_cachep); | ||
454 | return -ENOMEM; | ||
455 | } | ||
456 | ret = xt_register_match(&connlimit_mt_reg); | ||
457 | if (ret != 0) { | ||
458 | kmem_cache_destroy(connlimit_conn_cachep); | ||
459 | kmem_cache_destroy(connlimit_rb_cachep); | ||
460 | } | ||
461 | return ret; | ||
291 | } | 462 | } |
292 | 463 | ||
293 | static void __exit connlimit_mt_exit(void) | 464 | static void __exit connlimit_mt_exit(void) |
294 | { | 465 | { |
295 | xt_unregister_match(&connlimit_mt_reg); | 466 | xt_unregister_match(&connlimit_mt_reg); |
467 | kmem_cache_destroy(connlimit_conn_cachep); | ||
468 | kmem_cache_destroy(connlimit_rb_cachep); | ||
296 | } | 469 | } |
297 | 470 | ||
298 | module_init(connlimit_mt_init); | 471 | module_init(connlimit_mt_init); |
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index a4c7561698c5..89d53104c6b3 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c | |||
@@ -60,7 +60,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
60 | } | 60 | } |
61 | 61 | ||
62 | return spi_match(compinfo->spis[0], compinfo->spis[1], | 62 | return spi_match(compinfo->spis[0], compinfo->spis[1], |
63 | ntohl(chdr->cpi << 16), | 63 | ntohs(chdr->cpi), |
64 | !!(compinfo->invflags & XT_IPCOMP_INV_SPI)); | 64 | !!(compinfo->invflags & XT_IPCOMP_INV_SPI)); |
65 | } | 65 | } |
66 | 66 | ||