aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-05-06 21:51:37 -0400
committerDavid S. Miller <davem@davemloft.net>2018-05-06 21:51:37 -0400
commit90278871d4b0da39c84fc9aa4929b0809dc7cf3c (patch)
treef1f5f261ec4d3e7e13c05b2f27e704395c8a8e98
parent8fb11a9a8d51df9a314a6d970436963c127ff1bd (diff)
parentb13468dc577498002cf4e62978359ff97ffcd187 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter/IPVS updates for net-next The following patchset contains Netfilter/IPVS updates for your net-next tree, more relevant updates in this batch are: 1) Add Maglev support to IPVS. Moreover, store lastest server weight in IPVS since this is needed by maglev, patches from from Inju Song. 2) Preparation works to add iptables flowtable support, patches from Felix Fietkau. 3) Hand over flows back to conntrack slow path in case of TCP RST/FIN packet is seen via new teardown state, also from Felix. 4) Add support for extended netlink error reporting for nf_tables. 5) Support for larger timeouts that 23 days in nf_tables, patch from Florian Westphal. 6) Always set an upper limit to dynamic sets, also from Florian. 7) Allow number generator to make map lookups, from Laura Garcia. 8) Use hash_32() instead of opencode hashing in IPVS, from Vicent Bernat. 9) Extend ip6tables SRH match to support previous, next and last SID, from Ahmed Abdelsalam. 10) Move Passive OS fingerprint nf_osf.c, from Fernando Fernandez. 11) Expose nf_conntrack_max through ctnetlink, from Florent Fourcot. 12) Several housekeeping patches for xt_NFLOG, x_tables and ebtables, from Taehee Yoo. 13) Unify meta bridge with core nft_meta, then make nft_meta built-in. Make rt and exthdr built-in too, again from Florian. 14) Missing initialization of tbl->entries in IPVS, from Cong Wang. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netfilter/nf_osf.h27
-rw-r--r--include/linux/netfilter_bridge/ebtables.h4
-rw-r--r--include/net/ip6_route.h21
-rw-r--r--include/net/ip_vs.h1
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/netfilter/ipv4/nf_nat_masquerade.h2
-rw-r--r--include/net/netfilter/ipv6/nf_nat_masquerade.h2
-rw-r--r--include/net/netfilter/nf_flow_table.h24
-rw-r--r--include/net/netfilter/nf_nat.h2
-rw-r--r--include/net/netfilter/nf_nat_l3proto.h28
-rw-r--r--include/net/netfilter/nf_nat_l4proto.h8
-rw-r--r--include/net/netfilter/nf_nat_redirect.h2
-rw-r--r--include/net/netfilter/nf_tables.h53
-rw-r--r--include/net/netfilter/nf_tables_core.h3
-rw-r--r--include/net/netfilter/nfnetlink_log.h17
-rw-r--r--include/net/netfilter/nft_meta.h44
-rw-r--r--include/uapi/linux/netfilter/nf_nat.h12
-rw-r--r--include/uapi/linux/netfilter/nf_osf.h90
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h8
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_conntrack.h1
-rw-r--r--include/uapi/linux/netfilter/xt_osf.h106
-rw-r--r--include/uapi/linux/netfilter_bridge/ebtables.h6
-rw-r--r--include/uapi/linux/netfilter_ipv6/ip6t_srh.h43
-rw-r--r--net/bridge/netfilter/Kconfig7
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebtables.c63
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c135
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c3
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c255
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c18
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c2
-rw-r--r--net/ipv6/ip6_output.c22
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c2
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c173
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c3
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c246
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c18
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c2
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c2
-rw-r--r--net/netfilter/Kconfig25
-rw-r--r--net/netfilter/Makefile8
-rw-r--r--net/netfilter/ipvs/Kconfig37
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_mh.c540
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c1
-rw-r--r--net/netfilter/nf_conntrack_ftp.c3
-rw-r--r--net/netfilter/nf_conntrack_irc.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_sane.c3
-rw-r--r--net/netfilter/nf_conntrack_sip.c2
-rw-r--r--net/netfilter/nf_conntrack_tftp.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c (renamed from net/netfilter/nf_flow_table.c)309
-rw-r--r--net/netfilter/nf_flow_table_inet.c3
-rw-r--r--net/netfilter/nf_flow_table_ip.c487
-rw-r--r--net/netfilter/nf_nat_core.c27
-rw-r--r--net/netfilter/nf_nat_helper.c2
-rw-r--r--net/netfilter/nf_nat_proto_common.c9
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c2
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c2
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udp.c4
-rw-r--r--net/netfilter/nf_nat_proto_unknown.c2
-rw-r--r--net/netfilter/nf_nat_redirect.c6
-rw-r--r--net/netfilter/nf_nat_sip.c2
-rw-r--r--net/netfilter/nf_osf.c218
-rw-r--r--net/netfilter/nf_tables_api.c624
-rw-r--r--net/netfilter/nf_tables_core.c3
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nft_dynset.c7
-rw-r--r--net/netfilter/nft_exthdr.c23
-rw-r--r--net/netfilter/nft_flow_offload.c5
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/nft_meta.c112
-rw-r--r--net/netfilter/nft_nat.c2
-rw-r--r--net/netfilter/nft_numgen.c85
-rw-r--r--net/netfilter/nft_objref.c4
-rw-r--r--net/netfilter/nft_rt.c22
-rw-r--r--net/netfilter/nft_set_bitmap.c34
-rw-r--r--net/netfilter/nft_set_hash.c153
-rw-r--r--net/netfilter/nft_set_rbtree.c36
-rw-r--r--net/netfilter/xt_NETMAP.c8
-rw-r--r--net/netfilter/xt_NFLOG.c15
-rw-r--r--net/netfilter/xt_REDIRECT.c2
-rw-r--r--net/netfilter/xt_nat.c72
-rw-r--r--net/netfilter/xt_osf.c202
-rw-r--r--net/openvswitch/conntrack.c4
104 files changed, 2753 insertions, 1887 deletions
diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
new file mode 100644
index 000000000000..a2b39602e87d
--- /dev/null
+++ b/include/linux/netfilter/nf_osf.h
@@ -0,0 +1,27 @@
1#include <uapi/linux/netfilter/nf_osf.h>
2
3/* Initial window size option state machine: multiple of mss, mtu or
4 * plain numeric value. Can also be made as plain numeric value which
5 * is not a multiple of specified value.
6 */
7enum nf_osf_window_size_options {
8 OSF_WSS_PLAIN = 0,
9 OSF_WSS_MSS,
10 OSF_WSS_MTU,
11 OSF_WSS_MODULO,
12 OSF_WSS_MAX,
13};
14
15enum osf_fmatch_states {
16 /* Packet does not match the fingerprint */
17 FMATCH_WRONG = 0,
18 /* Packet matches the fingerprint */
19 FMATCH_OK,
20 /* Options do not match the fingerprint, but header does */
21 FMATCH_OPT_WRONG,
22};
23
24bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
25 int hooknum, struct net_device *in, struct net_device *out,
26 const struct nf_osf_info *info, struct net *net,
27 const struct list_head *nf_osf_fingers);
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 0773b5a032f1..c6935be7c6ca 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -17,10 +17,6 @@
17#include <linux/if_ether.h> 17#include <linux/if_ether.h>
18#include <uapi/linux/netfilter_bridge/ebtables.h> 18#include <uapi/linux/netfilter_bridge/ebtables.h>
19 19
20/* return values for match() functions */
21#define EBT_MATCH 0
22#define EBT_NOMATCH 1
23
24struct ebt_match { 20struct ebt_match {
25 struct list_head list; 21 struct list_head list;
26 const char name[EBT_FUNCTION_MAXNAMELEN]; 22 const char name[EBT_FUNCTION_MAXNAMELEN];
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 8df4ff798b04..4cf1ef935ed9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -279,6 +279,27 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
279 !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); 279 !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
280} 280}
281 281
282static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
283{
284 struct inet6_dev *idev;
285 unsigned int mtu;
286
287 if (dst_metric_locked(dst, RTAX_MTU)) {
288 mtu = dst_metric_raw(dst, RTAX_MTU);
289 if (mtu)
290 return mtu;
291 }
292
293 mtu = IPV6_MIN_MTU;
294 rcu_read_lock();
295 idev = __in6_dev_get(dst->dev);
296 if (idev)
297 mtu = idev->cnf.mtu6;
298 rcu_read_unlock();
299
300 return mtu;
301}
302
282struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, 303struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
283 struct net_device *dev, struct sk_buff *skb, 304 struct net_device *dev, struct sk_buff *skb,
284 const void *daddr); 305 const void *daddr);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index eb0bec043c96..0ac795b41ab8 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -668,6 +668,7 @@ struct ip_vs_dest {
668 volatile unsigned int flags; /* dest status flags */ 668 volatile unsigned int flags; /* dest status flags */
669 atomic_t conn_flags; /* flags to copy to conn */ 669 atomic_t conn_flags; /* flags to copy to conn */
670 atomic_t weight; /* server weight */ 670 atomic_t weight; /* server weight */
671 atomic_t last_weight; /* server latest weight */
671 672
672 refcount_t refcnt; /* reference counter */ 673 refcount_t refcnt; /* reference counter */
673 struct ip_vs_stats stats; /* statistics */ 674 struct ip_vs_stats stats; /* statistics */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 0a872a7c33c8..798558fd1681 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -960,8 +960,6 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
960 &inet6_sk(sk)->cork); 960 &inet6_sk(sk)->cork);
961} 961}
962 962
963unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
964
965int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 963int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
966 struct flowi6 *fl6); 964 struct flowi6 *fl6);
967struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, 965struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h
index ebd869473603..cd24be4c4a99 100644
--- a/include/net/netfilter/ipv4/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h
@@ -6,7 +6,7 @@
6 6
7unsigned int 7unsigned int
8nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, 8nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
9 const struct nf_nat_range *range, 9 const struct nf_nat_range2 *range,
10 const struct net_device *out); 10 const struct net_device *out);
11 11
12void nf_nat_masquerade_ipv4_register_notifier(void); 12void nf_nat_masquerade_ipv4_register_notifier(void);
diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h
index 1ed4f2631ed6..0c3b5ebf0bb8 100644
--- a/include/net/netfilter/ipv6/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h
@@ -3,7 +3,7 @@
3#define _NF_NAT_MASQUERADE_IPV6_H_ 3#define _NF_NAT_MASQUERADE_IPV6_H_
4 4
5unsigned int 5unsigned int
6nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, 6nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
7 const struct net_device *out); 7 const struct net_device *out);
8void nf_nat_masquerade_ipv6_register_notifier(void); 8void nf_nat_masquerade_ipv6_register_notifier(void);
9void nf_nat_masquerade_ipv6_unregister_notifier(void); 9void nf_nat_masquerade_ipv6_unregister_notifier(void);
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 833752dd0c58..ba9fa4592f2b 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -6,6 +6,7 @@
6#include <linux/netdevice.h> 6#include <linux/netdevice.h>
7#include <linux/rhashtable.h> 7#include <linux/rhashtable.h>
8#include <linux/rcupdate.h> 8#include <linux/rcupdate.h>
9#include <linux/netfilter/nf_conntrack_tuple_common.h>
9#include <net/dst.h> 10#include <net/dst.h>
10 11
11struct nf_flowtable; 12struct nf_flowtable;
@@ -13,25 +14,24 @@ struct nf_flowtable;
13struct nf_flowtable_type { 14struct nf_flowtable_type {
14 struct list_head list; 15 struct list_head list;
15 int family; 16 int family;
16 void (*gc)(struct work_struct *work); 17 int (*init)(struct nf_flowtable *ft);
17 void (*free)(struct nf_flowtable *ft); 18 void (*free)(struct nf_flowtable *ft);
18 const struct rhashtable_params *params;
19 nf_hookfn *hook; 19 nf_hookfn *hook;
20 struct module *owner; 20 struct module *owner;
21}; 21};
22 22
23struct nf_flowtable { 23struct nf_flowtable {
24 struct list_head list;
24 struct rhashtable rhashtable; 25 struct rhashtable rhashtable;
25 const struct nf_flowtable_type *type; 26 const struct nf_flowtable_type *type;
26 struct delayed_work gc_work; 27 struct delayed_work gc_work;
27}; 28};
28 29
29enum flow_offload_tuple_dir { 30enum flow_offload_tuple_dir {
30 FLOW_OFFLOAD_DIR_ORIGINAL, 31 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
31 FLOW_OFFLOAD_DIR_REPLY, 32 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
32 __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY, 33 FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
33}; 34};
34#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
35 35
36struct flow_offload_tuple { 36struct flow_offload_tuple {
37 union { 37 union {
@@ -55,6 +55,8 @@ struct flow_offload_tuple {
55 55
56 int oifidx; 56 int oifidx;
57 57
58 u16 mtu;
59
58 struct dst_entry *dst_cache; 60 struct dst_entry *dst_cache;
59}; 61};
60 62
@@ -66,6 +68,7 @@ struct flow_offload_tuple_rhash {
66#define FLOW_OFFLOAD_SNAT 0x1 68#define FLOW_OFFLOAD_SNAT 0x1
67#define FLOW_OFFLOAD_DNAT 0x2 69#define FLOW_OFFLOAD_DNAT 0x2
68#define FLOW_OFFLOAD_DYING 0x4 70#define FLOW_OFFLOAD_DYING 0x4
71#define FLOW_OFFLOAD_TEARDOWN 0x8
69 72
70struct flow_offload { 73struct flow_offload {
71 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; 74 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
@@ -98,11 +101,14 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
98 101
99void nf_flow_table_cleanup(struct net *net, struct net_device *dev); 102void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
100 103
104int nf_flow_table_init(struct nf_flowtable *flow_table);
101void nf_flow_table_free(struct nf_flowtable *flow_table); 105void nf_flow_table_free(struct nf_flowtable *flow_table);
102void nf_flow_offload_work_gc(struct work_struct *work);
103extern const struct rhashtable_params nf_flow_offload_rhash_params;
104 106
105void flow_offload_dead(struct flow_offload *flow); 107void flow_offload_teardown(struct flow_offload *flow);
108static inline void flow_offload_dead(struct flow_offload *flow)
109{
110 flow->flags |= FLOW_OFFLOAD_DYING;
111}
106 112
107int nf_flow_snat_port(const struct flow_offload *flow, 113int nf_flow_snat_port(const struct flow_offload *flow,
108 struct sk_buff *skb, unsigned int thoff, 114 struct sk_buff *skb, unsigned int thoff,
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 207a467e7ca6..da3d601cadee 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -39,7 +39,7 @@ struct nf_conn_nat {
39 39
40/* Set up the info structure to map into this range. */ 40/* Set up the info structure to map into this range. */
41unsigned int nf_nat_setup_info(struct nf_conn *ct, 41unsigned int nf_nat_setup_info(struct nf_conn *ct,
42 const struct nf_nat_range *range, 42 const struct nf_nat_range2 *range,
43 enum nf_nat_manip_type maniptype); 43 enum nf_nat_manip_type maniptype);
44 44
45extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, 45extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index ce7c2b4e64bb..8bad2560576f 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -7,7 +7,7 @@ struct nf_nat_l3proto {
7 u8 l3proto; 7 u8 l3proto;
8 8
9 bool (*in_range)(const struct nf_conntrack_tuple *t, 9 bool (*in_range)(const struct nf_conntrack_tuple *t,
10 const struct nf_nat_range *range); 10 const struct nf_nat_range2 *range);
11 11
12 u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16); 12 u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16);
13 13
@@ -33,7 +33,7 @@ struct nf_nat_l3proto {
33 struct flowi *fl); 33 struct flowi *fl);
34 34
35 int (*nlattr_to_range)(struct nlattr *tb[], 35 int (*nlattr_to_range)(struct nlattr *tb[],
36 struct nf_nat_range *range); 36 struct nf_nat_range2 *range);
37}; 37};
38 38
39int nf_nat_l3proto_register(const struct nf_nat_l3proto *); 39int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
@@ -48,30 +48,26 @@ unsigned int nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
48 const struct nf_hook_state *state, 48 const struct nf_hook_state *state,
49 unsigned int (*do_chain)(void *priv, 49 unsigned int (*do_chain)(void *priv,
50 struct sk_buff *skb, 50 struct sk_buff *skb,
51 const struct nf_hook_state *state, 51 const struct nf_hook_state *state));
52 struct nf_conn *ct));
53 52
54unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb, 53unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
55 const struct nf_hook_state *state, 54 const struct nf_hook_state *state,
56 unsigned int (*do_chain)(void *priv, 55 unsigned int (*do_chain)(void *priv,
57 struct sk_buff *skb, 56 struct sk_buff *skb,
58 const struct nf_hook_state *state, 57 const struct nf_hook_state *state));
59 struct nf_conn *ct));
60 58
61unsigned int nf_nat_ipv4_local_fn(void *priv, 59unsigned int nf_nat_ipv4_local_fn(void *priv,
62 struct sk_buff *skb, 60 struct sk_buff *skb,
63 const struct nf_hook_state *state, 61 const struct nf_hook_state *state,
64 unsigned int (*do_chain)(void *priv, 62 unsigned int (*do_chain)(void *priv,
65 struct sk_buff *skb, 63 struct sk_buff *skb,
66 const struct nf_hook_state *state, 64 const struct nf_hook_state *state));
67 struct nf_conn *ct));
68 65
69unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, 66unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
70 const struct nf_hook_state *state, 67 const struct nf_hook_state *state,
71 unsigned int (*do_chain)(void *priv, 68 unsigned int (*do_chain)(void *priv,
72 struct sk_buff *skb, 69 struct sk_buff *skb,
73 const struct nf_hook_state *state, 70 const struct nf_hook_state *state));
74 struct nf_conn *ct));
75 71
76int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, 72int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
77 enum ip_conntrack_info ctinfo, 73 enum ip_conntrack_info ctinfo,
@@ -81,29 +77,25 @@ unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
81 const struct nf_hook_state *state, 77 const struct nf_hook_state *state,
82 unsigned int (*do_chain)(void *priv, 78 unsigned int (*do_chain)(void *priv,
83 struct sk_buff *skb, 79 struct sk_buff *skb,
84 const struct nf_hook_state *state, 80 const struct nf_hook_state *state));
85 struct nf_conn *ct));
86 81
87unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb, 82unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
88 const struct nf_hook_state *state, 83 const struct nf_hook_state *state,
89 unsigned int (*do_chain)(void *priv, 84 unsigned int (*do_chain)(void *priv,
90 struct sk_buff *skb, 85 struct sk_buff *skb,
91 const struct nf_hook_state *state, 86 const struct nf_hook_state *state));
92 struct nf_conn *ct));
93 87
94unsigned int nf_nat_ipv6_local_fn(void *priv, 88unsigned int nf_nat_ipv6_local_fn(void *priv,
95 struct sk_buff *skb, 89 struct sk_buff *skb,
96 const struct nf_hook_state *state, 90 const struct nf_hook_state *state,
97 unsigned int (*do_chain)(void *priv, 91 unsigned int (*do_chain)(void *priv,
98 struct sk_buff *skb, 92 struct sk_buff *skb,
99 const struct nf_hook_state *state, 93 const struct nf_hook_state *state));
100 struct nf_conn *ct));
101 94
102unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, 95unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
103 const struct nf_hook_state *state, 96 const struct nf_hook_state *state,
104 unsigned int (*do_chain)(void *priv, 97 unsigned int (*do_chain)(void *priv,
105 struct sk_buff *skb, 98 struct sk_buff *skb,
106 const struct nf_hook_state *state, 99 const struct nf_hook_state *state));
107 struct nf_conn *ct));
108 100
109#endif /* _NF_NAT_L3PROTO_H */ 101#endif /* _NF_NAT_L3PROTO_H */
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 67835ff8a2d9..b4d6b29bca62 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -34,12 +34,12 @@ struct nf_nat_l4proto {
34 */ 34 */
35 void (*unique_tuple)(const struct nf_nat_l3proto *l3proto, 35 void (*unique_tuple)(const struct nf_nat_l3proto *l3proto,
36 struct nf_conntrack_tuple *tuple, 36 struct nf_conntrack_tuple *tuple,
37 const struct nf_nat_range *range, 37 const struct nf_nat_range2 *range,
38 enum nf_nat_manip_type maniptype, 38 enum nf_nat_manip_type maniptype,
39 const struct nf_conn *ct); 39 const struct nf_conn *ct);
40 40
41 int (*nlattr_to_range)(struct nlattr *tb[], 41 int (*nlattr_to_range)(struct nlattr *tb[],
42 struct nf_nat_range *range); 42 struct nf_nat_range2 *range);
43}; 43};
44 44
45/* Protocol registration. */ 45/* Protocol registration. */
@@ -72,11 +72,11 @@ bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
72 72
73void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, 73void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
74 struct nf_conntrack_tuple *tuple, 74 struct nf_conntrack_tuple *tuple,
75 const struct nf_nat_range *range, 75 const struct nf_nat_range2 *range,
76 enum nf_nat_manip_type maniptype, 76 enum nf_nat_manip_type maniptype,
77 const struct nf_conn *ct, u16 *rover); 77 const struct nf_conn *ct, u16 *rover);
78 78
79int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], 79int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
80 struct nf_nat_range *range); 80 struct nf_nat_range2 *range);
81 81
82#endif /*_NF_NAT_L4PROTO_H*/ 82#endif /*_NF_NAT_L4PROTO_H*/
diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h
index 5ddabb08c472..c129aacc8ae8 100644
--- a/include/net/netfilter/nf_nat_redirect.h
+++ b/include/net/netfilter/nf_nat_redirect.h
@@ -7,7 +7,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
7 const struct nf_nat_ipv4_multi_range_compat *mr, 7 const struct nf_nat_ipv4_multi_range_compat *mr,
8 unsigned int hooknum); 8 unsigned int hooknum);
9unsigned int 9unsigned int
10nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, 10nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
11 unsigned int hooknum); 11 unsigned int hooknum);
12 12
13#endif /* _NF_NAT_REDIRECT_H_ */ 13#endif /* _NF_NAT_REDIRECT_H_ */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index cd368d1b8cb8..435c9e3b9181 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -275,23 +275,6 @@ struct nft_set_estimate {
275 enum nft_set_class space; 275 enum nft_set_class space;
276}; 276};
277 277
278/**
279 * struct nft_set_type - nf_tables set type
280 *
281 * @select_ops: function to select nft_set_ops
282 * @ops: default ops, used when no select_ops functions is present
283 * @list: used internally
284 * @owner: module reference
285 */
286struct nft_set_type {
287 const struct nft_set_ops *(*select_ops)(const struct nft_ctx *,
288 const struct nft_set_desc *desc,
289 u32 flags);
290 const struct nft_set_ops *ops;
291 struct list_head list;
292 struct module *owner;
293};
294
295struct nft_set_ext; 278struct nft_set_ext;
296struct nft_expr; 279struct nft_expr;
297 280
@@ -310,7 +293,6 @@ struct nft_expr;
310 * @init: initialize private data of new set instance 293 * @init: initialize private data of new set instance
311 * @destroy: destroy private data of set instance 294 * @destroy: destroy private data of set instance
312 * @elemsize: element private size 295 * @elemsize: element private size
313 * @features: features supported by the implementation
314 */ 296 */
315struct nft_set_ops { 297struct nft_set_ops {
316 bool (*lookup)(const struct net *net, 298 bool (*lookup)(const struct net *net,
@@ -361,9 +343,23 @@ struct nft_set_ops {
361 void (*destroy)(const struct nft_set *set); 343 void (*destroy)(const struct nft_set *set);
362 344
363 unsigned int elemsize; 345 unsigned int elemsize;
346};
347
348/**
349 * struct nft_set_type - nf_tables set type
350 *
351 * @ops: set ops for this type
352 * @list: used internally
353 * @owner: module reference
354 * @features: features supported by the implementation
355 */
356struct nft_set_type {
357 const struct nft_set_ops ops;
358 struct list_head list;
359 struct module *owner;
364 u32 features; 360 u32 features;
365 const struct nft_set_type *type;
366}; 361};
362#define to_set_type(o) container_of(o, struct nft_set_type, ops)
367 363
368int nft_register_set(struct nft_set_type *type); 364int nft_register_set(struct nft_set_type *type);
369void nft_unregister_set(struct nft_set_type *type); 365void nft_unregister_set(struct nft_set_type *type);
@@ -589,7 +585,7 @@ static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
589 return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); 585 return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
590} 586}
591 587
592static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext) 588static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext)
593{ 589{
594 return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); 590 return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
595} 591}
@@ -607,7 +603,7 @@ static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext)
607static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) 603static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
608{ 604{
609 return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && 605 return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
610 time_is_before_eq_jiffies(*nft_set_ext_expiration(ext)); 606 time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
611} 607}
612 608
613static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, 609static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
@@ -1015,9 +1011,9 @@ static inline void *nft_obj_data(const struct nft_object *obj)
1015 1011
1016#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) 1012#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr))
1017 1013
1018struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, 1014struct nft_object *nft_obj_lookup(const struct nft_table *table,
1019 const struct nlattr *nla, u32 objtype, 1015 const struct nlattr *nla, u32 objtype,
1020 u8 genmask); 1016 u8 genmask);
1021 1017
1022void nft_obj_notify(struct net *net, struct nft_table *table, 1018void nft_obj_notify(struct net *net, struct nft_table *table,
1023 struct nft_object *obj, u32 portid, u32 seq, 1019 struct nft_object *obj, u32 portid, u32 seq,
@@ -1106,12 +1102,9 @@ struct nft_flowtable {
1106 struct nf_flowtable data; 1102 struct nf_flowtable data;
1107}; 1103};
1108 1104
1109struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, 1105struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
1110 const struct nlattr *nla, 1106 const struct nlattr *nla,
1111 u8 genmask); 1107 u8 genmask);
1112void nft_flow_table_iterate(struct net *net,
1113 void (*iter)(struct nf_flowtable *flowtable, void *data),
1114 void *data);
1115 1108
1116void nft_register_flowtable_type(struct nf_flowtable_type *type); 1109void nft_register_flowtable_type(struct nf_flowtable_type *type);
1117void nft_unregister_flowtable_type(struct nf_flowtable_type *type); 1110void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index ea5aab568be8..cd6915b6c054 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -10,6 +10,9 @@ extern struct nft_expr_type nft_byteorder_type;
10extern struct nft_expr_type nft_payload_type; 10extern struct nft_expr_type nft_payload_type;
11extern struct nft_expr_type nft_dynset_type; 11extern struct nft_expr_type nft_dynset_type;
12extern struct nft_expr_type nft_range_type; 12extern struct nft_expr_type nft_range_type;
13extern struct nft_expr_type nft_meta_type;
14extern struct nft_expr_type nft_rt_type;
15extern struct nft_expr_type nft_exthdr_type;
13 16
14int nf_tables_core_module_init(void); 17int nf_tables_core_module_init(void);
15void nf_tables_core_module_exit(void); 18void nf_tables_core_module_exit(void);
diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h
index 612cfb63ac68..ea32a7d3cf1b 100644
--- a/include/net/netfilter/nfnetlink_log.h
+++ b/include/net/netfilter/nfnetlink_log.h
@@ -1,18 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _KER_NFNETLINK_LOG_H
3#define _KER_NFNETLINK_LOG_H
4
5void
6nfulnl_log_packet(struct net *net,
7 u_int8_t pf,
8 unsigned int hooknum,
9 const struct sk_buff *skb,
10 const struct net_device *in,
11 const struct net_device *out,
12 const struct nf_loginfo *li_user,
13 const char *prefix);
14
15#define NFULNL_COPY_DISABLED 0xff
16
17#endif /* _KER_NFNETLINK_LOG_H */
18
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
deleted file mode 100644
index 5c69e9b09388..000000000000
--- a/include/net/netfilter/nft_meta.h
+++ /dev/null
@@ -1,44 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _NFT_META_H_
3#define _NFT_META_H_
4
5struct nft_meta {
6 enum nft_meta_keys key:8;
7 union {
8 enum nft_registers dreg:8;
9 enum nft_registers sreg:8;
10 };
11};
12
13extern const struct nla_policy nft_meta_policy[];
14
15int nft_meta_get_init(const struct nft_ctx *ctx,
16 const struct nft_expr *expr,
17 const struct nlattr * const tb[]);
18
19int nft_meta_set_init(const struct nft_ctx *ctx,
20 const struct nft_expr *expr,
21 const struct nlattr * const tb[]);
22
23int nft_meta_get_dump(struct sk_buff *skb,
24 const struct nft_expr *expr);
25
26int nft_meta_set_dump(struct sk_buff *skb,
27 const struct nft_expr *expr);
28
29void nft_meta_get_eval(const struct nft_expr *expr,
30 struct nft_regs *regs,
31 const struct nft_pktinfo *pkt);
32
33void nft_meta_set_eval(const struct nft_expr *expr,
34 struct nft_regs *regs,
35 const struct nft_pktinfo *pkt);
36
37void nft_meta_set_destroy(const struct nft_ctx *ctx,
38 const struct nft_expr *expr);
39
40int nft_meta_set_validate(const struct nft_ctx *ctx,
41 const struct nft_expr *expr,
42 const struct nft_data **data);
43
44#endif
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a33000da7229..4a95c0db14d4 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -10,6 +10,7 @@
10#define NF_NAT_RANGE_PROTO_RANDOM (1 << 2) 10#define NF_NAT_RANGE_PROTO_RANDOM (1 << 2)
11#define NF_NAT_RANGE_PERSISTENT (1 << 3) 11#define NF_NAT_RANGE_PERSISTENT (1 << 3)
12#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) 12#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4)
13#define NF_NAT_RANGE_PROTO_OFFSET (1 << 5)
13 14
14#define NF_NAT_RANGE_PROTO_RANDOM_ALL \ 15#define NF_NAT_RANGE_PROTO_RANDOM_ALL \
15 (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) 16 (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -17,7 +18,7 @@
17#define NF_NAT_RANGE_MASK \ 18#define NF_NAT_RANGE_MASK \
18 (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \ 19 (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \
19 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \ 20 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \
20 NF_NAT_RANGE_PROTO_RANDOM_FULLY) 21 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET)
21 22
22struct nf_nat_ipv4_range { 23struct nf_nat_ipv4_range {
23 unsigned int flags; 24 unsigned int flags;
@@ -40,4 +41,13 @@ struct nf_nat_range {
40 union nf_conntrack_man_proto max_proto; 41 union nf_conntrack_man_proto max_proto;
41}; 42};
42 43
44struct nf_nat_range2 {
45 unsigned int flags;
46 union nf_inet_addr min_addr;
47 union nf_inet_addr max_addr;
48 union nf_conntrack_man_proto min_proto;
49 union nf_conntrack_man_proto max_proto;
50 union nf_conntrack_man_proto base_proto;
51};
52
43#endif /* _NETFILTER_NF_NAT_H */ 53#endif /* _NETFILTER_NF_NAT_H */
diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
new file mode 100644
index 000000000000..45376eae31ef
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_osf.h
@@ -0,0 +1,90 @@
1#ifndef _NF_OSF_H
2#define _NF_OSF_H
3
4#define MAXGENRELEN 32
5
6#define NF_OSF_GENRE (1 << 0)
7#define NF_OSF_TTL (1 << 1)
8#define NF_OSF_LOG (1 << 2)
9#define NF_OSF_INVERT (1 << 3)
10
11#define NF_OSF_LOGLEVEL_ALL 0 /* log all matched fingerprints */
12#define NF_OSF_LOGLEVEL_FIRST 1 /* log only the first matced fingerprint */
13#define NF_OSF_LOGLEVEL_ALL_KNOWN 2 /* do not log unknown packets */
14
15#define NF_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */
16
17/* Do not compare ip and fingerprint TTL at all */
18#define NF_OSF_TTL_NOCHECK 2
19
20/* Wildcard MSS (kind of).
21 * It is used to implement a state machine for the different wildcard values
22 * of the MSS and window sizes.
23 */
24struct nf_osf_wc {
25 __u32 wc;
26 __u32 val;
27};
28
29/* This struct represents IANA options
30 * http://www.iana.org/assignments/tcp-parameters
31 */
32struct nf_osf_opt {
33 __u16 kind, length;
34 struct nf_osf_wc wc;
35};
36
37struct nf_osf_info {
38 char genre[MAXGENRELEN];
39 __u32 len;
40 __u32 flags;
41 __u32 loglevel;
42 __u32 ttl;
43};
44
45struct nf_osf_user_finger {
46 struct nf_osf_wc wss;
47
48 __u8 ttl, df;
49 __u16 ss, mss;
50 __u16 opt_num;
51
52 char genre[MAXGENRELEN];
53 char version[MAXGENRELEN];
54 char subtype[MAXGENRELEN];
55
56 /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
57 struct nf_osf_opt opt[MAX_IPOPTLEN];
58};
59
60struct nf_osf_finger {
61 struct rcu_head rcu_head;
62 struct list_head finger_entry;
63 struct nf_osf_user_finger finger;
64};
65
66struct nf_osf_nlmsg {
67 struct nf_osf_user_finger f;
68 struct iphdr ip;
69 struct tcphdr tcp;
70};
71
72/* Defines for IANA option kinds */
73enum iana_options {
74 OSFOPT_EOL = 0, /* End of options */
75 OSFOPT_NOP, /* NOP */
76 OSFOPT_MSS, /* Maximum segment size */
77 OSFOPT_WSO, /* Window scale option */
78 OSFOPT_SACKP, /* SACK permitted */
79 OSFOPT_SACK, /* SACK */
80 OSFOPT_ECHO,
81 OSFOPT_ECHOREPLY,
82 OSFOPT_TS, /* Timestamp option */
83 OSFOPT_POCP, /* Partial Order Connection Permitted */
84 OSFOPT_POSP, /* Partial Order Service Profile */
85
86 /* Others are not used in the current OSF */
87 OSFOPT_EMPTY = 255,
88};
89
90#endif /* _NF_OSF_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6a3d653d5b27..ce031cf72288 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -831,7 +831,9 @@ enum nft_rt_keys {
831 NFT_RT_NEXTHOP4, 831 NFT_RT_NEXTHOP4,
832 NFT_RT_NEXTHOP6, 832 NFT_RT_NEXTHOP6,
833 NFT_RT_TCPMSS, 833 NFT_RT_TCPMSS,
834 __NFT_RT_MAX
834}; 835};
836#define NFT_RT_MAX (__NFT_RT_MAX - 1)
835 837
836/** 838/**
837 * enum nft_hash_types - nf_tables hash expression types 839 * enum nft_hash_types - nf_tables hash expression types
@@ -949,7 +951,9 @@ enum nft_ct_keys {
949 NFT_CT_DST_IP, 951 NFT_CT_DST_IP,
950 NFT_CT_SRC_IP6, 952 NFT_CT_SRC_IP6,
951 NFT_CT_DST_IP6, 953 NFT_CT_DST_IP6,
954 __NFT_CT_MAX
952}; 955};
956#define NFT_CT_MAX (__NFT_CT_MAX - 1)
953 957
954/** 958/**
955 * enum nft_ct_attributes - nf_tables ct expression netlink attributes 959 * enum nft_ct_attributes - nf_tables ct expression netlink attributes
@@ -1450,6 +1454,8 @@ enum nft_trace_types {
1450 * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) 1454 * @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
1451 * @NFTA_NG_TYPE: operation type (NLA_U32) 1455 * @NFTA_NG_TYPE: operation type (NLA_U32)
1452 * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32) 1456 * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32)
1457 * @NFTA_NG_SET_NAME: name of the map to lookup (NLA_STRING)
1458 * @NFTA_NG_SET_ID: id of the map (NLA_U32)
1453 */ 1459 */
1454enum nft_ng_attributes { 1460enum nft_ng_attributes {
1455 NFTA_NG_UNSPEC, 1461 NFTA_NG_UNSPEC,
@@ -1457,6 +1463,8 @@ enum nft_ng_attributes {
1457 NFTA_NG_MODULUS, 1463 NFTA_NG_MODULUS,
1458 NFTA_NG_TYPE, 1464 NFTA_NG_TYPE,
1459 NFTA_NG_OFFSET, 1465 NFTA_NG_OFFSET,
1466 NFTA_NG_SET_NAME,
1467 NFTA_NG_SET_ID,
1460 __NFTA_NG_MAX 1468 __NFTA_NG_MAX
1461}; 1469};
1462#define NFTA_NG_MAX (__NFTA_NG_MAX - 1) 1470#define NFTA_NG_MAX (__NFTA_NG_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 77987111cab0..1d41810d17e2 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -262,6 +262,7 @@ enum ctattr_stats_cpu {
262enum ctattr_stats_global { 262enum ctattr_stats_global {
263 CTA_STATS_GLOBAL_UNSPEC, 263 CTA_STATS_GLOBAL_UNSPEC,
264 CTA_STATS_GLOBAL_ENTRIES, 264 CTA_STATS_GLOBAL_ENTRIES,
265 CTA_STATS_GLOBAL_MAX_ENTRIES,
265 __CTA_STATS_GLOBAL_MAX, 266 __CTA_STATS_GLOBAL_MAX,
266}; 267};
267#define CTA_STATS_GLOBAL_MAX (__CTA_STATS_GLOBAL_MAX - 1) 268#define CTA_STATS_GLOBAL_MAX (__CTA_STATS_GLOBAL_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index dad197e2ab99..72956eceeb09 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -23,101 +23,29 @@
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/ip.h> 24#include <linux/ip.h>
25#include <linux/tcp.h> 25#include <linux/tcp.h>
26#include <linux/netfilter/nf_osf.h>
26 27
27#define MAXGENRELEN 32 28#define XT_OSF_GENRE NF_OSF_GENRE
29#define XT_OSF_INVERT NF_OSF_INVERT
28 30
29#define XT_OSF_GENRE (1<<0) 31#define XT_OSF_TTL NF_OSF_TTL
30#define XT_OSF_TTL (1<<1) 32#define XT_OSF_LOG NF_OSF_LOG
31#define XT_OSF_LOG (1<<2)
32#define XT_OSF_INVERT (1<<3)
33 33
34#define XT_OSF_LOGLEVEL_ALL 0 /* log all matched fingerprints */ 34#define XT_OSF_LOGLEVEL_ALL NF_OSF_LOGLEVEL_ALL
35#define XT_OSF_LOGLEVEL_FIRST 1 /* log only the first matced fingerprint */ 35#define XT_OSF_LOGLEVEL_FIRST NF_OSF_LOGLEVEL_FIRST
36#define XT_OSF_LOGLEVEL_ALL_KNOWN 2 /* do not log unknown packets */ 36#define XT_OSF_LOGLEVEL_ALL_KNOWN NF_OSF_LOGLEVEL_ALL_KNOWN
37 37
38#define XT_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */ 38#define XT_OSF_TTL_TRUE NF_OSF_TTL_TRUE
39#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */ 39#define XT_OSF_TTL_NOCHECK NF_OSF_TTL_NOCHECK
40#define XT_OSF_TTL_NOCHECK 2 /* Do not compare ip and fingerprint TTL at all */
41 40
42struct xt_osf_info { 41#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */
43 char genre[MAXGENRELEN];
44 __u32 len;
45 __u32 flags;
46 __u32 loglevel;
47 __u32 ttl;
48};
49
50/*
51 * Wildcard MSS (kind of).
52 * It is used to implement a state machine for the different wildcard values
53 * of the MSS and window sizes.
54 */
55struct xt_osf_wc {
56 __u32 wc;
57 __u32 val;
58};
59
60/*
61 * This struct represents IANA options
62 * http://www.iana.org/assignments/tcp-parameters
63 */
64struct xt_osf_opt {
65 __u16 kind, length;
66 struct xt_osf_wc wc;
67};
68
69struct xt_osf_user_finger {
70 struct xt_osf_wc wss;
71
72 __u8 ttl, df;
73 __u16 ss, mss;
74 __u16 opt_num;
75
76 char genre[MAXGENRELEN];
77 char version[MAXGENRELEN];
78 char subtype[MAXGENRELEN];
79 42
80 /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */ 43#define xt_osf_wc nf_osf_wc
81 struct xt_osf_opt opt[MAX_IPOPTLEN]; 44#define xt_osf_opt nf_osf_opt
82}; 45#define xt_osf_info nf_osf_info
83 46#define xt_osf_user_finger nf_osf_user_finger
84struct xt_osf_nlmsg { 47#define xt_osf_finger nf_osf_finger
85 struct xt_osf_user_finger f; 48#define xt_osf_nlmsg nf_osf_nlmsg
86 struct iphdr ip;
87 struct tcphdr tcp;
88};
89
90/* Defines for IANA option kinds */
91
92enum iana_options {
93 OSFOPT_EOL = 0, /* End of options */
94 OSFOPT_NOP, /* NOP */
95 OSFOPT_MSS, /* Maximum segment size */
96 OSFOPT_WSO, /* Window scale option */
97 OSFOPT_SACKP, /* SACK permitted */
98 OSFOPT_SACK, /* SACK */
99 OSFOPT_ECHO,
100 OSFOPT_ECHOREPLY,
101 OSFOPT_TS, /* Timestamp option */
102 OSFOPT_POCP, /* Partial Order Connection Permitted */
103 OSFOPT_POSP, /* Partial Order Service Profile */
104
105 /* Others are not used in the current OSF */
106 OSFOPT_EMPTY = 255,
107};
108
109/*
110 * Initial window size option state machine: multiple of mss, mtu or
111 * plain numeric value. Can also be made as plain numeric value which
112 * is not a multiple of specified value.
113 */
114enum xt_osf_window_size_options {
115 OSF_WSS_PLAIN = 0,
116 OSF_WSS_MSS,
117 OSF_WSS_MTU,
118 OSF_WSS_MODULO,
119 OSF_WSS_MAX,
120};
121 49
122/* 50/*
123 * Add/remove fingerprint from the kernel. 51 * Add/remove fingerprint from the kernel.
diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index 0c7dc8315013..3b86c14ea49d 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -191,6 +191,12 @@ struct ebt_entry {
191 unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); 191 unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
192}; 192};
193 193
194static __inline__ struct ebt_entry_target *
195ebt_get_target(struct ebt_entry *e)
196{
197 return (void *)e + e->target_offset;
198}
199
194/* {g,s}etsockopt numbers */ 200/* {g,s}etsockopt numbers */
195#define EBT_BASE_CTL 128 201#define EBT_BASE_CTL 128
196 202
diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h
index f3cc0ef514a7..54ed83360dac 100644
--- a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h
@@ -17,7 +17,10 @@
17#define IP6T_SRH_LAST_GT 0x0100 17#define IP6T_SRH_LAST_GT 0x0100
18#define IP6T_SRH_LAST_LT 0x0200 18#define IP6T_SRH_LAST_LT 0x0200
19#define IP6T_SRH_TAG 0x0400 19#define IP6T_SRH_TAG 0x0400
20#define IP6T_SRH_MASK 0x07FF 20#define IP6T_SRH_PSID 0x0800
21#define IP6T_SRH_NSID 0x1000
22#define IP6T_SRH_LSID 0x2000
23#define IP6T_SRH_MASK 0x3FFF
21 24
22/* Values for "mt_invflags" field in struct ip6t_srh */ 25/* Values for "mt_invflags" field in struct ip6t_srh */
23#define IP6T_SRH_INV_NEXTHDR 0x0001 26#define IP6T_SRH_INV_NEXTHDR 0x0001
@@ -31,7 +34,10 @@
31#define IP6T_SRH_INV_LAST_GT 0x0100 34#define IP6T_SRH_INV_LAST_GT 0x0100
32#define IP6T_SRH_INV_LAST_LT 0x0200 35#define IP6T_SRH_INV_LAST_LT 0x0200
33#define IP6T_SRH_INV_TAG 0x0400 36#define IP6T_SRH_INV_TAG 0x0400
34#define IP6T_SRH_INV_MASK 0x07FF 37#define IP6T_SRH_INV_PSID 0x0800
38#define IP6T_SRH_INV_NSID 0x1000
39#define IP6T_SRH_INV_LSID 0x2000
40#define IP6T_SRH_INV_MASK 0x3FFF
35 41
36/** 42/**
37 * struct ip6t_srh - SRH match options 43 * struct ip6t_srh - SRH match options
@@ -54,4 +60,37 @@ struct ip6t_srh {
54 __u16 mt_invflags; 60 __u16 mt_invflags;
55}; 61};
56 62
63/**
64 * struct ip6t_srh1 - SRH match options (revision 1)
65 * @ next_hdr: Next header field of SRH
66 * @ hdr_len: Extension header length field of SRH
67 * @ segs_left: Segments left field of SRH
68 * @ last_entry: Last entry field of SRH
69 * @ tag: Tag field of SRH
70 * @ psid_addr: Address of previous SID in SRH SID list
71 * @ nsid_addr: Address of NEXT SID in SRH SID list
72 * @ lsid_addr: Address of LAST SID in SRH SID list
73 * @ psid_msk: Mask of previous SID in SRH SID list
74 * @ nsid_msk: Mask of next SID in SRH SID list
75 * @ lsid_msk: MAsk of last SID in SRH SID list
76 * @ mt_flags: match options
77 * @ mt_invflags: Invert the sense of match options
78 */
79
80struct ip6t_srh1 {
81 __u8 next_hdr;
82 __u8 hdr_len;
83 __u8 segs_left;
84 __u8 last_entry;
85 __u16 tag;
86 struct in6_addr psid_addr;
87 struct in6_addr nsid_addr;
88 struct in6_addr lsid_addr;
89 struct in6_addr psid_msk;
90 struct in6_addr nsid_msk;
91 struct in6_addr lsid_msk;
92 __u16 mt_flags;
93 __u16 mt_invflags;
94};
95
57#endif /*_IP6T_SRH_H*/ 96#endif /*_IP6T_SRH_H*/
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index f212447794bd..9a0159aebe1a 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -8,13 +8,6 @@ menuconfig NF_TABLES_BRIDGE
8 bool "Ethernet Bridge nf_tables support" 8 bool "Ethernet Bridge nf_tables support"
9 9
10if NF_TABLES_BRIDGE 10if NF_TABLES_BRIDGE
11
12config NFT_BRIDGE_META
13 tristate "Netfilter nf_table bridge meta support"
14 depends on NFT_META
15 help
16 Add support for bridge dedicated meta key.
17
18config NFT_BRIDGE_REJECT 11config NFT_BRIDGE_REJECT
19 tristate "Netfilter nf_tables bridge reject support" 12 tristate "Netfilter nf_tables bridge reject support"
20 depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6 13 depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 4bc758dd4a8c..9b868861f21a 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,7 +3,6 @@
3# Makefile for the netfilter modules for Link Layer filtering on a bridge. 3# Makefile for the netfilter modules for Link Layer filtering on a bridge.
4# 4#
5 5
6obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
7obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o 6obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
8 7
9# packet logging 8# packet logging
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 28a4c3490359..b286ed5596c3 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -101,7 +101,7 @@ ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb,
101{ 101{
102 par->match = m->u.match; 102 par->match = m->u.match;
103 par->matchinfo = m->data; 103 par->matchinfo = m->data;
104 return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH; 104 return !m->u.match->match(skb, par);
105} 105}
106 106
107static inline int 107static inline int
@@ -177,6 +177,12 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
177 return (void *)entry + entry->next_offset; 177 return (void *)entry + entry->next_offset;
178} 178}
179 179
180static inline const struct ebt_entry_target *
181ebt_get_target_c(const struct ebt_entry *e)
182{
183 return ebt_get_target((struct ebt_entry *)e);
184}
185
180/* Do some firewalling */ 186/* Do some firewalling */
181unsigned int ebt_do_table(struct sk_buff *skb, 187unsigned int ebt_do_table(struct sk_buff *skb,
182 const struct nf_hook_state *state, 188 const struct nf_hook_state *state,
@@ -230,8 +236,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
230 */ 236 */
231 EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar); 237 EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
232 238
233 t = (struct ebt_entry_target *) 239 t = ebt_get_target_c(point);
234 (((char *)point) + point->target_offset);
235 /* standard target */ 240 /* standard target */
236 if (!t->u.target->target) 241 if (!t->u.target->target)
237 verdict = ((struct ebt_standard_target *)t)->verdict; 242 verdict = ((struct ebt_standard_target *)t)->verdict;
@@ -343,6 +348,16 @@ find_table_lock(struct net *net, const char *name, int *error,
343 "ebtable_", error, mutex); 348 "ebtable_", error, mutex);
344} 349}
345 350
351static inline void ebt_free_table_info(struct ebt_table_info *info)
352{
353 int i;
354
355 if (info->chainstack) {
356 for_each_possible_cpu(i)
357 vfree(info->chainstack[i]);
358 vfree(info->chainstack);
359 }
360}
346static inline int 361static inline int
347ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, 362ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
348 unsigned int *cnt) 363 unsigned int *cnt)
@@ -627,7 +642,7 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
627 return 1; 642 return 1;
628 EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL); 643 EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL);
629 EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); 644 EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL);
630 t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); 645 t = ebt_get_target(e);
631 646
632 par.net = net; 647 par.net = net;
633 par.target = t->u.target; 648 par.target = t->u.target;
@@ -706,7 +721,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
706 ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j); 721 ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j);
707 if (ret != 0) 722 if (ret != 0)
708 goto cleanup_watchers; 723 goto cleanup_watchers;
709 t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); 724 t = ebt_get_target(e);
710 gap = e->next_offset - e->target_offset; 725 gap = e->next_offset - e->target_offset;
711 726
712 target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0); 727 target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0);
@@ -779,8 +794,7 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack
779 if (pos == nentries) 794 if (pos == nentries)
780 continue; 795 continue;
781 } 796 }
782 t = (struct ebt_entry_target *) 797 t = ebt_get_target_c(e);
783 (((char *)e) + e->target_offset);
784 if (strcmp(t->u.name, EBT_STANDARD_TARGET)) 798 if (strcmp(t->u.name, EBT_STANDARD_TARGET))
785 goto letscontinue; 799 goto letscontinue;
786 if (e->target_offset + sizeof(struct ebt_standard_target) > 800 if (e->target_offset + sizeof(struct ebt_standard_target) >
@@ -975,7 +989,7 @@ static void get_counters(const struct ebt_counter *oldcounters,
975static int do_replace_finish(struct net *net, struct ebt_replace *repl, 989static int do_replace_finish(struct net *net, struct ebt_replace *repl,
976 struct ebt_table_info *newinfo) 990 struct ebt_table_info *newinfo)
977{ 991{
978 int ret, i; 992 int ret;
979 struct ebt_counter *counterstmp = NULL; 993 struct ebt_counter *counterstmp = NULL;
980 /* used to be able to unlock earlier */ 994 /* used to be able to unlock earlier */
981 struct ebt_table_info *table; 995 struct ebt_table_info *table;
@@ -1051,13 +1065,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
1051 ebt_cleanup_entry, net, NULL); 1065 ebt_cleanup_entry, net, NULL);
1052 1066
1053 vfree(table->entries); 1067 vfree(table->entries);
1054 if (table->chainstack) { 1068 ebt_free_table_info(table);
1055 for_each_possible_cpu(i)
1056 vfree(table->chainstack[i]);
1057 vfree(table->chainstack);
1058 }
1059 vfree(table); 1069 vfree(table);
1060
1061 vfree(counterstmp); 1070 vfree(counterstmp);
1062 1071
1063#ifdef CONFIG_AUDIT 1072#ifdef CONFIG_AUDIT
@@ -1078,11 +1087,7 @@ free_iterate:
1078free_counterstmp: 1087free_counterstmp:
1079 vfree(counterstmp); 1088 vfree(counterstmp);
1080 /* can be initialized in translate_table() */ 1089 /* can be initialized in translate_table() */
1081 if (newinfo->chainstack) { 1090 ebt_free_table_info(newinfo);
1082 for_each_possible_cpu(i)
1083 vfree(newinfo->chainstack[i]);
1084 vfree(newinfo->chainstack);
1085 }
1086 return ret; 1091 return ret;
1087} 1092}
1088 1093
@@ -1147,8 +1152,6 @@ free_newinfo:
1147 1152
1148static void __ebt_unregister_table(struct net *net, struct ebt_table *table) 1153static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
1149{ 1154{
1150 int i;
1151
1152 mutex_lock(&ebt_mutex); 1155 mutex_lock(&ebt_mutex);
1153 list_del(&table->list); 1156 list_del(&table->list);
1154 mutex_unlock(&ebt_mutex); 1157 mutex_unlock(&ebt_mutex);
@@ -1157,11 +1160,7 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
1157 if (table->private->nentries) 1160 if (table->private->nentries)
1158 module_put(table->me); 1161 module_put(table->me);
1159 vfree(table->private->entries); 1162 vfree(table->private->entries);
1160 if (table->private->chainstack) { 1163 ebt_free_table_info(table->private);
1161 for_each_possible_cpu(i)
1162 vfree(table->private->chainstack[i]);
1163 vfree(table->private->chainstack);
1164 }
1165 vfree(table->private); 1164 vfree(table->private);
1166 kfree(table); 1165 kfree(table);
1167} 1166}
@@ -1263,11 +1262,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
1263free_unlock: 1262free_unlock:
1264 mutex_unlock(&ebt_mutex); 1263 mutex_unlock(&ebt_mutex);
1265free_chainstack: 1264free_chainstack:
1266 if (newinfo->chainstack) { 1265 ebt_free_table_info(newinfo);
1267 for_each_possible_cpu(i)
1268 vfree(newinfo->chainstack[i]);
1269 vfree(newinfo->chainstack);
1270 }
1271 vfree(newinfo->entries); 1266 vfree(newinfo->entries);
1272free_newinfo: 1267free_newinfo:
1273 vfree(newinfo); 1268 vfree(newinfo);
@@ -1405,7 +1400,7 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
1405 return -EFAULT; 1400 return -EFAULT;
1406 1401
1407 hlp = ubase + (((char *)e + e->target_offset) - base); 1402 hlp = ubase + (((char *)e + e->target_offset) - base);
1408 t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); 1403 t = ebt_get_target_c(e);
1409 1404
1410 ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase); 1405 ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase);
1411 if (ret != 0) 1406 if (ret != 0)
@@ -1746,7 +1741,7 @@ static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr,
1746 return ret; 1741 return ret;
1747 target_offset = e->target_offset - (origsize - *size); 1742 target_offset = e->target_offset - (origsize - *size);
1748 1743
1749 t = (struct ebt_entry_target *) ((char *) e + e->target_offset); 1744 t = ebt_get_target(e);
1750 1745
1751 ret = compat_target_to_user(t, dstptr, size); 1746 ret = compat_target_to_user(t, dstptr, size);
1752 if (ret) 1747 if (ret)
@@ -1794,7 +1789,7 @@ static int compat_calc_entry(const struct ebt_entry *e,
1794 EBT_MATCH_ITERATE(e, compat_calc_match, &off); 1789 EBT_MATCH_ITERATE(e, compat_calc_match, &off);
1795 EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off); 1790 EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off);
1796 1791
1797 t = (const struct ebt_entry_target *) ((char *) e + e->target_offset); 1792 t = ebt_get_target_c(e);
1798 1793
1799 off += xt_compat_target_offset(t->u.target); 1794 off += xt_compat_target_offset(t->u.target);
1800 off += ebt_compat_entry_padsize(); 1795 off += ebt_compat_entry_padsize();
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
deleted file mode 100644
index bb63c9aed55d..000000000000
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ /dev/null
@@ -1,135 +0,0 @@
1/*
2 * Copyright (c) 2014 Intel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 */
9
10#include <linux/kernel.h>
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/netlink.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter/nf_tables.h>
16#include <net/netfilter/nf_tables.h>
17#include <net/netfilter/nft_meta.h>
18
19#include "../br_private.h"
20
21static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
22 struct nft_regs *regs,
23 const struct nft_pktinfo *pkt)
24{
25 const struct nft_meta *priv = nft_expr_priv(expr);
26 const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
27 u32 *dest = &regs->data[priv->dreg];
28 const struct net_bridge_port *p;
29
30 switch (priv->key) {
31 case NFT_META_BRI_IIFNAME:
32 if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
33 goto err;
34 break;
35 case NFT_META_BRI_OIFNAME:
36 if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
37 goto err;
38 break;
39 default:
40 goto out;
41 }
42
43 strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
44 return;
45out:
46 return nft_meta_get_eval(expr, regs, pkt);
47err:
48 regs->verdict.code = NFT_BREAK;
49}
50
51static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
52 const struct nft_expr *expr,
53 const struct nlattr * const tb[])
54{
55 struct nft_meta *priv = nft_expr_priv(expr);
56 unsigned int len;
57
58 priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
59 switch (priv->key) {
60 case NFT_META_BRI_IIFNAME:
61 case NFT_META_BRI_OIFNAME:
62 len = IFNAMSIZ;
63 break;
64 default:
65 return nft_meta_get_init(ctx, expr, tb);
66 }
67
68 priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
69 return nft_validate_register_store(ctx, priv->dreg, NULL,
70 NFT_DATA_VALUE, len);
71}
72
73static struct nft_expr_type nft_meta_bridge_type;
74static const struct nft_expr_ops nft_meta_bridge_get_ops = {
75 .type = &nft_meta_bridge_type,
76 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
77 .eval = nft_meta_bridge_get_eval,
78 .init = nft_meta_bridge_get_init,
79 .dump = nft_meta_get_dump,
80};
81
82static const struct nft_expr_ops nft_meta_bridge_set_ops = {
83 .type = &nft_meta_bridge_type,
84 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
85 .eval = nft_meta_set_eval,
86 .init = nft_meta_set_init,
87 .destroy = nft_meta_set_destroy,
88 .dump = nft_meta_set_dump,
89 .validate = nft_meta_set_validate,
90};
91
92static const struct nft_expr_ops *
93nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
94 const struct nlattr * const tb[])
95{
96 if (tb[NFTA_META_KEY] == NULL)
97 return ERR_PTR(-EINVAL);
98
99 if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
100 return ERR_PTR(-EINVAL);
101
102 if (tb[NFTA_META_DREG])
103 return &nft_meta_bridge_get_ops;
104
105 if (tb[NFTA_META_SREG])
106 return &nft_meta_bridge_set_ops;
107
108 return ERR_PTR(-EINVAL);
109}
110
111static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
112 .family = NFPROTO_BRIDGE,
113 .name = "meta",
114 .select_ops = nft_meta_bridge_select_ops,
115 .policy = nft_meta_policy,
116 .maxattr = NFTA_META_MAX,
117 .owner = THIS_MODULE,
118};
119
120static int __init nft_meta_bridge_module_init(void)
121{
122 return nft_register_expr(&nft_meta_bridge_type);
123}
124
125static void __exit nft_meta_bridge_module_exit(void)
126{
127 nft_unregister_expr(&nft_meta_bridge_type);
128}
129
130module_init(nft_meta_bridge_module_init);
131module_exit(nft_meta_bridge_module_exit);
132
133MODULE_LICENSE("GPL");
134MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
135MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 44b308d93ec2..444f125f3974 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -300,7 +300,7 @@ ipt_do_table(struct sk_buff *skb,
300 counter = xt_get_this_cpu_counter(&e->counters); 300 counter = xt_get_this_cpu_counter(&e->counters);
301 ADD_COUNTER(*counter, skb->len, 1); 301 ADD_COUNTER(*counter, skb->len, 1);
302 302
303 t = ipt_get_target(e); 303 t = ipt_get_target_c(e);
304 WARN_ON(!t->u.kernel.target); 304 WARN_ON(!t->u.kernel.target);
305 305
306#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 306#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index a03e4e7ef5f9..ce1512b02cb2 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -47,7 +47,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
47static unsigned int 47static unsigned int
48masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) 48masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
49{ 49{
50 struct nf_nat_range range; 50 struct nf_nat_range2 range;
51 const struct nf_nat_ipv4_multi_range_compat *mr; 51 const struct nf_nat_ipv4_multi_range_compat *mr;
52 52
53 mr = par->targinfo; 53 mr = par->targinfo;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 0f7255cc65ee..529d89ec31e8 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -33,8 +33,7 @@ static const struct xt_table nf_nat_ipv4_table = {
33 33
34static unsigned int iptable_nat_do_chain(void *priv, 34static unsigned int iptable_nat_do_chain(void *priv,
35 struct sk_buff *skb, 35 struct sk_buff *skb,
36 const struct nf_hook_state *state, 36 const struct nf_hook_state *state)
37 struct nf_conn *ct)
38{ 37{
39 return ipt_do_table(skb, state, state->net->ipv4.nat_table); 38 return ipt_do_table(skb, state, state->net->ipv4.nat_table);
40} 39}
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index 0cd46bffa469..e1e56d7123d2 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -2,265 +2,12 @@
2#include <linux/init.h> 2#include <linux/init.h>
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/netfilter.h> 4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/ip.h>
7#include <linux/netdevice.h>
8#include <net/ip.h>
9#include <net/neighbour.h>
10#include <net/netfilter/nf_flow_table.h> 5#include <net/netfilter/nf_flow_table.h>
11#include <net/netfilter/nf_tables.h> 6#include <net/netfilter/nf_tables.h>
12/* For layer 4 checksum field offset. */
13#include <linux/tcp.h>
14#include <linux/udp.h>
15
16static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
17 __be32 addr, __be32 new_addr)
18{
19 struct tcphdr *tcph;
20
21 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
22 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
23 return -1;
24
25 tcph = (void *)(skb_network_header(skb) + thoff);
26 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
27
28 return 0;
29}
30
31static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
32 __be32 addr, __be32 new_addr)
33{
34 struct udphdr *udph;
35
36 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
37 skb_try_make_writable(skb, thoff + sizeof(*udph)))
38 return -1;
39
40 udph = (void *)(skb_network_header(skb) + thoff);
41 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
42 inet_proto_csum_replace4(&udph->check, skb, addr,
43 new_addr, true);
44 if (!udph->check)
45 udph->check = CSUM_MANGLED_0;
46 }
47
48 return 0;
49}
50
51static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
52 unsigned int thoff, __be32 addr,
53 __be32 new_addr)
54{
55 switch (iph->protocol) {
56 case IPPROTO_TCP:
57 if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
58 return NF_DROP;
59 break;
60 case IPPROTO_UDP:
61 if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
62 return NF_DROP;
63 break;
64 }
65
66 return 0;
67}
68
69static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
70 struct iphdr *iph, unsigned int thoff,
71 enum flow_offload_tuple_dir dir)
72{
73 __be32 addr, new_addr;
74
75 switch (dir) {
76 case FLOW_OFFLOAD_DIR_ORIGINAL:
77 addr = iph->saddr;
78 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
79 iph->saddr = new_addr;
80 break;
81 case FLOW_OFFLOAD_DIR_REPLY:
82 addr = iph->daddr;
83 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
84 iph->daddr = new_addr;
85 break;
86 default:
87 return -1;
88 }
89 csum_replace4(&iph->check, addr, new_addr);
90
91 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
92}
93
94static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
95 struct iphdr *iph, unsigned int thoff,
96 enum flow_offload_tuple_dir dir)
97{
98 __be32 addr, new_addr;
99
100 switch (dir) {
101 case FLOW_OFFLOAD_DIR_ORIGINAL:
102 addr = iph->daddr;
103 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
104 iph->daddr = new_addr;
105 break;
106 case FLOW_OFFLOAD_DIR_REPLY:
107 addr = iph->saddr;
108 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
109 iph->saddr = new_addr;
110 break;
111 default:
112 return -1;
113 }
114 csum_replace4(&iph->check, addr, new_addr);
115
116 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
117}
118
119static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
120 enum flow_offload_tuple_dir dir)
121{
122 struct iphdr *iph = ip_hdr(skb);
123 unsigned int thoff = iph->ihl * 4;
124
125 if (flow->flags & FLOW_OFFLOAD_SNAT &&
126 (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
127 nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
128 return -1;
129 if (flow->flags & FLOW_OFFLOAD_DNAT &&
130 (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
131 nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
132 return -1;
133
134 return 0;
135}
136
137static bool ip_has_options(unsigned int thoff)
138{
139 return thoff != sizeof(struct iphdr);
140}
141
142static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
143 struct flow_offload_tuple *tuple)
144{
145 struct flow_ports *ports;
146 unsigned int thoff;
147 struct iphdr *iph;
148
149 if (!pskb_may_pull(skb, sizeof(*iph)))
150 return -1;
151
152 iph = ip_hdr(skb);
153 thoff = iph->ihl * 4;
154
155 if (ip_is_fragment(iph) ||
156 unlikely(ip_has_options(thoff)))
157 return -1;
158
159 if (iph->protocol != IPPROTO_TCP &&
160 iph->protocol != IPPROTO_UDP)
161 return -1;
162
163 thoff = iph->ihl * 4;
164 if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
165 return -1;
166
167 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
168
169 tuple->src_v4.s_addr = iph->saddr;
170 tuple->dst_v4.s_addr = iph->daddr;
171 tuple->src_port = ports->source;
172 tuple->dst_port = ports->dest;
173 tuple->l3proto = AF_INET;
174 tuple->l4proto = iph->protocol;
175 tuple->iifidx = dev->ifindex;
176
177 return 0;
178}
179
180/* Based on ip_exceeds_mtu(). */
181static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
182{
183 if (skb->len <= mtu)
184 return false;
185
186 if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
187 return false;
188
189 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
190 return false;
191
192 return true;
193}
194
195static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
196{
197 u32 mtu;
198
199 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
200 if (__nf_flow_exceeds_mtu(skb, mtu))
201 return true;
202
203 return false;
204}
205
206unsigned int
207nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
208 const struct nf_hook_state *state)
209{
210 struct flow_offload_tuple_rhash *tuplehash;
211 struct nf_flowtable *flow_table = priv;
212 struct flow_offload_tuple tuple = {};
213 enum flow_offload_tuple_dir dir;
214 struct flow_offload *flow;
215 struct net_device *outdev;
216 const struct rtable *rt;
217 struct iphdr *iph;
218 __be32 nexthop;
219
220 if (skb->protocol != htons(ETH_P_IP))
221 return NF_ACCEPT;
222
223 if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
224 return NF_ACCEPT;
225
226 tuplehash = flow_offload_lookup(flow_table, &tuple);
227 if (tuplehash == NULL)
228 return NF_ACCEPT;
229
230 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
231 if (!outdev)
232 return NF_ACCEPT;
233
234 dir = tuplehash->tuple.dir;
235 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
236
237 rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
238 if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
239 return NF_ACCEPT;
240
241 if (skb_try_make_writable(skb, sizeof(*iph)))
242 return NF_DROP;
243
244 if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
245 nf_flow_nat_ip(flow, skb, dir) < 0)
246 return NF_DROP;
247
248 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
249 iph = ip_hdr(skb);
250 ip_decrease_ttl(iph);
251
252 skb->dev = outdev;
253 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
254 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
255
256 return NF_STOLEN;
257}
258EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
259 7
260static struct nf_flowtable_type flowtable_ipv4 = { 8static struct nf_flowtable_type flowtable_ipv4 = {
261 .family = NFPROTO_IPV4, 9 .family = NFPROTO_IPV4,
262 .params = &nf_flow_offload_rhash_params, 10 .init = nf_flow_table_init,
263 .gc = nf_flow_offload_work_gc,
264 .free = nf_flow_table_free, 11 .free = nf_flow_table_free,
265 .hook = nf_flow_offload_ip_hook, 12 .hook = nf_flow_offload_ip_hook,
266 .owner = THIS_MODULE, 13 .owner = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index ac8342dcb55e..4e6b53ab6c33 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -395,7 +395,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
395static void ip_nat_q931_expect(struct nf_conn *new, 395static void ip_nat_q931_expect(struct nf_conn *new,
396 struct nf_conntrack_expect *this) 396 struct nf_conntrack_expect *this)
397{ 397{
398 struct nf_nat_range range; 398 struct nf_nat_range2 range;
399 399
400 if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ 400 if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
401 nf_nat_follow_master(new, this); 401 nf_nat_follow_master(new, this);
@@ -497,7 +497,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
497static void ip_nat_callforwarding_expect(struct nf_conn *new, 497static void ip_nat_callforwarding_expect(struct nf_conn *new,
498 struct nf_conntrack_expect *this) 498 struct nf_conntrack_expect *this)
499{ 499{
500 struct nf_nat_range range; 500 struct nf_nat_range2 range;
501 501
502 /* This must be a fresh one. */ 502 /* This must be a fresh one. */
503 BUG_ON(new->status & IPS_NAT_DONE_MASK); 503 BUG_ON(new->status & IPS_NAT_DONE_MASK);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index f7ff6a364d7b..325e02956bf5 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -63,7 +63,7 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
63#endif /* CONFIG_XFRM */ 63#endif /* CONFIG_XFRM */
64 64
65static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, 65static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
66 const struct nf_nat_range *range) 66 const struct nf_nat_range2 *range)
67{ 67{
68 return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && 68 return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
69 ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); 69 ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
@@ -143,7 +143,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
143 143
144#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 144#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
145static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], 145static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
146 struct nf_nat_range *range) 146 struct nf_nat_range2 *range)
147{ 147{
148 if (tb[CTA_NAT_V4_MINIP]) { 148 if (tb[CTA_NAT_V4_MINIP]) {
149 range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); 149 range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
@@ -246,8 +246,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
246 const struct nf_hook_state *state, 246 const struct nf_hook_state *state,
247 unsigned int (*do_chain)(void *priv, 247 unsigned int (*do_chain)(void *priv,
248 struct sk_buff *skb, 248 struct sk_buff *skb,
249 const struct nf_hook_state *state, 249 const struct nf_hook_state *state))
250 struct nf_conn *ct))
251{ 250{
252 struct nf_conn *ct; 251 struct nf_conn *ct;
253 enum ip_conntrack_info ctinfo; 252 enum ip_conntrack_info ctinfo;
@@ -285,7 +284,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
285 if (!nf_nat_initialized(ct, maniptype)) { 284 if (!nf_nat_initialized(ct, maniptype)) {
286 unsigned int ret; 285 unsigned int ret;
287 286
288 ret = do_chain(priv, skb, state, ct); 287 ret = do_chain(priv, skb, state);
289 if (ret != NF_ACCEPT) 288 if (ret != NF_ACCEPT)
290 return ret; 289 return ret;
291 290
@@ -326,8 +325,7 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
326 const struct nf_hook_state *state, 325 const struct nf_hook_state *state,
327 unsigned int (*do_chain)(void *priv, 326 unsigned int (*do_chain)(void *priv,
328 struct sk_buff *skb, 327 struct sk_buff *skb,
329 const struct nf_hook_state *state, 328 const struct nf_hook_state *state))
330 struct nf_conn *ct))
331{ 329{
332 unsigned int ret; 330 unsigned int ret;
333 __be32 daddr = ip_hdr(skb)->daddr; 331 __be32 daddr = ip_hdr(skb)->daddr;
@@ -346,8 +344,7 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
346 const struct nf_hook_state *state, 344 const struct nf_hook_state *state,
347 unsigned int (*do_chain)(void *priv, 345 unsigned int (*do_chain)(void *priv,
348 struct sk_buff *skb, 346 struct sk_buff *skb,
349 const struct nf_hook_state *state, 347 const struct nf_hook_state *state))
350 struct nf_conn *ct))
351{ 348{
352#ifdef CONFIG_XFRM 349#ifdef CONFIG_XFRM
353 const struct nf_conn *ct; 350 const struct nf_conn *ct;
@@ -383,8 +380,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
383 const struct nf_hook_state *state, 380 const struct nf_hook_state *state,
384 unsigned int (*do_chain)(void *priv, 381 unsigned int (*do_chain)(void *priv,
385 struct sk_buff *skb, 382 struct sk_buff *skb,
386 const struct nf_hook_state *state, 383 const struct nf_hook_state *state))
387 struct nf_conn *ct))
388{ 384{
389 const struct nf_conn *ct; 385 const struct nf_conn *ct;
390 enum ip_conntrack_info ctinfo; 386 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index 0c366aad89cb..f538c5001547 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -24,13 +24,13 @@
24 24
25unsigned int 25unsigned int
26nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, 26nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
27 const struct nf_nat_range *range, 27 const struct nf_nat_range2 *range,
28 const struct net_device *out) 28 const struct net_device *out)
29{ 29{
30 struct nf_conn *ct; 30 struct nf_conn *ct;
31 struct nf_conn_nat *nat; 31 struct nf_conn_nat *nat;
32 enum ip_conntrack_info ctinfo; 32 enum ip_conntrack_info ctinfo;
33 struct nf_nat_range newrange; 33 struct nf_nat_range2 newrange;
34 const struct rtable *rt; 34 const struct rtable *rt;
35 __be32 newsrc, nh; 35 __be32 newsrc, nh;
36 36
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 8a69363b4884..5d259a12e25f 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -48,7 +48,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
48 struct nf_conntrack_tuple t = {}; 48 struct nf_conntrack_tuple t = {};
49 const struct nf_ct_pptp_master *ct_pptp_info; 49 const struct nf_ct_pptp_master *ct_pptp_info;
50 const struct nf_nat_pptp *nat_pptp_info; 50 const struct nf_nat_pptp *nat_pptp_info;
51 struct nf_nat_range range; 51 struct nf_nat_range2 range;
52 struct nf_conn_nat *nat; 52 struct nf_conn_nat *nat;
53 53
54 nat = nf_ct_nat_ext_add(ct); 54 nat = nf_ct_nat_ext_add(ct);
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index edf05002d674..00fda6331ce5 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -41,7 +41,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
41static void 41static void
42gre_unique_tuple(const struct nf_nat_l3proto *l3proto, 42gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
43 struct nf_conntrack_tuple *tuple, 43 struct nf_conntrack_tuple *tuple,
44 const struct nf_nat_range *range, 44 const struct nf_nat_range2 *range,
45 enum nf_nat_manip_type maniptype, 45 enum nf_nat_manip_type maniptype,
46 const struct nf_conn *ct) 46 const struct nf_conn *ct)
47{ 47{
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 7b98baa13ede..6d7cf1d79baf 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
30static void 30static void
31icmp_unique_tuple(const struct nf_nat_l3proto *l3proto, 31icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
32 struct nf_conntrack_tuple *tuple, 32 struct nf_conntrack_tuple *tuple,
33 const struct nf_nat_range *range, 33 const struct nf_nat_range2 *range,
34 enum nf_nat_manip_type maniptype, 34 enum nf_nat_manip_type maniptype,
35 const struct nf_conn *ct) 35 const struct nf_conn *ct)
36{ 36{
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index b5464a3f253b..285baccfbdea 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -28,8 +28,7 @@
28 28
29static unsigned int nft_nat_do_chain(void *priv, 29static unsigned int nft_nat_do_chain(void *priv,
30 struct sk_buff *skb, 30 struct sk_buff *skb,
31 const struct nf_hook_state *state, 31 const struct nf_hook_state *state)
32 struct nf_conn *ct)
33{ 32{
34 struct nft_pktinfo pkt; 33 struct nft_pktinfo pkt;
35 34
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f18677277119..f1193e1e928a 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -21,7 +21,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
21 const struct nft_pktinfo *pkt) 21 const struct nft_pktinfo *pkt)
22{ 22{
23 struct nft_masq *priv = nft_expr_priv(expr); 23 struct nft_masq *priv = nft_expr_priv(expr);
24 struct nf_nat_range range; 24 struct nf_nat_range2 range;
25 25
26 memset(&range, 0, sizeof(range)); 26 memset(&range, 0, sizeof(range));
27 range.flags = priv->flags; 27 range.flags = priv->flags;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index dfd8af41824e..7f4493080df6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -383,28 +383,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
383 return dst_output(net, sk, skb); 383 return dst_output(net, sk, skb);
384} 384}
385 385
386unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
387{
388 unsigned int mtu;
389 struct inet6_dev *idev;
390
391 if (dst_metric_locked(dst, RTAX_MTU)) {
392 mtu = dst_metric_raw(dst, RTAX_MTU);
393 if (mtu)
394 return mtu;
395 }
396
397 mtu = IPV6_MIN_MTU;
398 rcu_read_lock();
399 idev = __in6_dev_get(dst->dev);
400 if (idev)
401 mtu = idev->cnf.mtu6;
402 rcu_read_unlock();
403
404 return mtu;
405}
406EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
407
408static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 386static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
409{ 387{
410 if (skb->len <= mtu) 388 if (skb->len <= mtu)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 65c9e1a58305..7097bbf95843 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -528,7 +528,6 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
528 .family = NFPROTO_IPV6, 528 .family = NFPROTO_IPV6,
529 }; 529 };
530 530
531 t = ip6t_get_target(e);
532 return xt_check_target(&par, t->u.target_size - sizeof(*t), 531 return xt_check_target(&par, t->u.target_size - sizeof(*t),
533 e->ipv6.proto, 532 e->ipv6.proto,
534 e->ipv6.invflags & IP6T_INV_PROTO); 533 e->ipv6.invflags & IP6T_INV_PROTO);
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 92c0047e7e33..491f808e356a 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -29,7 +29,7 @@ masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
29 29
30static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) 30static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
31{ 31{
32 const struct nf_nat_range *range = par->targinfo; 32 const struct nf_nat_range2 *range = par->targinfo;
33 33
34 if (range->flags & NF_NAT_RANGE_MAP_IPS) 34 if (range->flags & NF_NAT_RANGE_MAP_IPS)
35 return -EINVAL; 35 return -EINVAL;
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
index 33719d5560c8..1059894a6f4c 100644
--- a/net/ipv6/netfilter/ip6t_srh.c
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -117,6 +117,130 @@ static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
117 return true; 117 return true;
118} 118}
119 119
120static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
121{
122 int hdrlen, psidoff, nsidoff, lsidoff, srhoff = 0;
123 const struct ip6t_srh1 *srhinfo = par->matchinfo;
124 struct in6_addr *psid, *nsid, *lsid;
125 struct in6_addr _psid, _nsid, _lsid;
126 struct ipv6_sr_hdr *srh;
127 struct ipv6_sr_hdr _srh;
128
129 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
130 return false;
131 srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh);
132 if (!srh)
133 return false;
134
135 hdrlen = ipv6_optlen(srh);
136 if (skb->len - srhoff < hdrlen)
137 return false;
138
139 if (srh->type != IPV6_SRCRT_TYPE_4)
140 return false;
141
142 if (srh->segments_left > srh->first_segment)
143 return false;
144
145 /* Next Header matching */
146 if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR)
147 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR,
148 !(srh->nexthdr == srhinfo->next_hdr)))
149 return false;
150
151 /* Header Extension Length matching */
152 if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ)
153 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ,
154 !(srh->hdrlen == srhinfo->hdr_len)))
155 return false;
156 if (srhinfo->mt_flags & IP6T_SRH_LEN_GT)
157 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT,
158 !(srh->hdrlen > srhinfo->hdr_len)))
159 return false;
160 if (srhinfo->mt_flags & IP6T_SRH_LEN_LT)
161 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT,
162 !(srh->hdrlen < srhinfo->hdr_len)))
163 return false;
164
165 /* Segments Left matching */
166 if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ)
167 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ,
168 !(srh->segments_left == srhinfo->segs_left)))
169 return false;
170 if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT)
171 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT,
172 !(srh->segments_left > srhinfo->segs_left)))
173 return false;
174 if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT)
175 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT,
176 !(srh->segments_left < srhinfo->segs_left)))
177 return false;
178
179 /**
180 * Last Entry matching
181 * Last_Entry field was introduced in revision 6 of the SRH draft.
182 * It was called First_Segment in the previous revision
183 */
184 if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ)
185 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ,
186 !(srh->first_segment == srhinfo->last_entry)))
187 return false;
188 if (srhinfo->mt_flags & IP6T_SRH_LAST_GT)
189 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT,
190 !(srh->first_segment > srhinfo->last_entry)))
191 return false;
192 if (srhinfo->mt_flags & IP6T_SRH_LAST_LT)
193 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT,
194 !(srh->first_segment < srhinfo->last_entry)))
195 return false;
196
197 /**
198 * Tag matchig
199 * Tag field was introduced in revision 6 of the SRH draft
200 */
201 if (srhinfo->mt_flags & IP6T_SRH_TAG)
202 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG,
203 !(srh->tag == srhinfo->tag)))
204 return false;
205
206 /* Previous SID matching */
207 if (srhinfo->mt_flags & IP6T_SRH_PSID) {
208 if (srh->segments_left == srh->first_segment)
209 return false;
210 psidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
211 ((srh->segments_left + 1) * sizeof(struct in6_addr));
212 psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid);
213 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID,
214 ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk,
215 &srhinfo->psid_addr)))
216 return false;
217 }
218
219 /* Next SID matching */
220 if (srhinfo->mt_flags & IP6T_SRH_NSID) {
221 if (srh->segments_left == 0)
222 return false;
223 nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
224 ((srh->segments_left - 1) * sizeof(struct in6_addr));
225 nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid);
226 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID,
227 ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk,
228 &srhinfo->nsid_addr)))
229 return false;
230 }
231
232 /* Last SID matching */
233 if (srhinfo->mt_flags & IP6T_SRH_LSID) {
234 lsidoff = srhoff + sizeof(struct ipv6_sr_hdr);
235 lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid);
236 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID,
237 ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk,
238 &srhinfo->lsid_addr)))
239 return false;
240 }
241 return true;
242}
243
120static int srh_mt6_check(const struct xt_mtchk_param *par) 244static int srh_mt6_check(const struct xt_mtchk_param *par)
121{ 245{
122 const struct ip6t_srh *srhinfo = par->matchinfo; 246 const struct ip6t_srh *srhinfo = par->matchinfo;
@@ -136,23 +260,54 @@ static int srh_mt6_check(const struct xt_mtchk_param *par)
136 return 0; 260 return 0;
137} 261}
138 262
139static struct xt_match srh_mt6_reg __read_mostly = { 263static int srh1_mt6_check(const struct xt_mtchk_param *par)
140 .name = "srh", 264{
141 .family = NFPROTO_IPV6, 265 const struct ip6t_srh1 *srhinfo = par->matchinfo;
142 .match = srh_mt6, 266
143 .matchsize = sizeof(struct ip6t_srh), 267 if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
144 .checkentry = srh_mt6_check, 268 pr_info_ratelimited("unknown srh match flags %X\n",
145 .me = THIS_MODULE, 269 srhinfo->mt_flags);
270 return -EINVAL;
271 }
272
273 if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
274 pr_info_ratelimited("unknown srh invflags %X\n",
275 srhinfo->mt_invflags);
276 return -EINVAL;
277 }
278
279 return 0;
280}
281
282static struct xt_match srh_mt6_reg[] __read_mostly = {
283 {
284 .name = "srh",
285 .revision = 0,
286 .family = NFPROTO_IPV6,
287 .match = srh_mt6,
288 .matchsize = sizeof(struct ip6t_srh),
289 .checkentry = srh_mt6_check,
290 .me = THIS_MODULE,
291 },
292 {
293 .name = "srh",
294 .revision = 1,
295 .family = NFPROTO_IPV6,
296 .match = srh1_mt6,
297 .matchsize = sizeof(struct ip6t_srh1),
298 .checkentry = srh1_mt6_check,
299 .me = THIS_MODULE,
300 }
146}; 301};
147 302
148static int __init srh_mt6_init(void) 303static int __init srh_mt6_init(void)
149{ 304{
150 return xt_register_match(&srh_mt6_reg); 305 return xt_register_matches(srh_mt6_reg, ARRAY_SIZE(srh_mt6_reg));
151} 306}
152 307
153static void __exit srh_mt6_exit(void) 308static void __exit srh_mt6_exit(void)
154{ 309{
155 xt_unregister_match(&srh_mt6_reg); 310 xt_unregister_matches(srh_mt6_reg, ARRAY_SIZE(srh_mt6_reg));
156} 311}
157 312
158module_init(srh_mt6_init); 313module_init(srh_mt6_init);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 47306e45a80a..2bf554e18af8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -35,8 +35,7 @@ static const struct xt_table nf_nat_ipv6_table = {
35 35
36static unsigned int ip6table_nat_do_chain(void *priv, 36static unsigned int ip6table_nat_do_chain(void *priv,
37 struct sk_buff *skb, 37 struct sk_buff *skb,
38 const struct nf_hook_state *state, 38 const struct nf_hook_state *state)
39 struct nf_conn *ct)
40{ 39{
41 return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat); 40 return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
42} 41}
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index 207cb35569b1..c511d206bf9b 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -3,256 +3,12 @@
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/netfilter.h> 4#include <linux/netfilter.h>
5#include <linux/rhashtable.h> 5#include <linux/rhashtable.h>
6#include <linux/ipv6.h>
7#include <linux/netdevice.h>
8#include <net/ipv6.h>
9#include <net/ip6_route.h>
10#include <net/neighbour.h>
11#include <net/netfilter/nf_flow_table.h> 6#include <net/netfilter/nf_flow_table.h>
12#include <net/netfilter/nf_tables.h> 7#include <net/netfilter/nf_tables.h>
13/* For layer 4 checksum field offset. */
14#include <linux/tcp.h>
15#include <linux/udp.h>
16
17static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
18 struct in6_addr *addr,
19 struct in6_addr *new_addr)
20{
21 struct tcphdr *tcph;
22
23 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
24 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
25 return -1;
26
27 tcph = (void *)(skb_network_header(skb) + thoff);
28 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
29 new_addr->s6_addr32, true);
30
31 return 0;
32}
33
34static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
35 struct in6_addr *addr,
36 struct in6_addr *new_addr)
37{
38 struct udphdr *udph;
39
40 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
41 skb_try_make_writable(skb, thoff + sizeof(*udph)))
42 return -1;
43
44 udph = (void *)(skb_network_header(skb) + thoff);
45 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
46 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
47 new_addr->s6_addr32, true);
48 if (!udph->check)
49 udph->check = CSUM_MANGLED_0;
50 }
51
52 return 0;
53}
54
55static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
56 unsigned int thoff, struct in6_addr *addr,
57 struct in6_addr *new_addr)
58{
59 switch (ip6h->nexthdr) {
60 case IPPROTO_TCP:
61 if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
62 return NF_DROP;
63 break;
64 case IPPROTO_UDP:
65 if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
66 return NF_DROP;
67 break;
68 }
69
70 return 0;
71}
72
73static int nf_flow_snat_ipv6(const struct flow_offload *flow,
74 struct sk_buff *skb, struct ipv6hdr *ip6h,
75 unsigned int thoff,
76 enum flow_offload_tuple_dir dir)
77{
78 struct in6_addr addr, new_addr;
79
80 switch (dir) {
81 case FLOW_OFFLOAD_DIR_ORIGINAL:
82 addr = ip6h->saddr;
83 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
84 ip6h->saddr = new_addr;
85 break;
86 case FLOW_OFFLOAD_DIR_REPLY:
87 addr = ip6h->daddr;
88 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
89 ip6h->daddr = new_addr;
90 break;
91 default:
92 return -1;
93 }
94
95 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
96}
97
98static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
99 struct sk_buff *skb, struct ipv6hdr *ip6h,
100 unsigned int thoff,
101 enum flow_offload_tuple_dir dir)
102{
103 struct in6_addr addr, new_addr;
104
105 switch (dir) {
106 case FLOW_OFFLOAD_DIR_ORIGINAL:
107 addr = ip6h->daddr;
108 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
109 ip6h->daddr = new_addr;
110 break;
111 case FLOW_OFFLOAD_DIR_REPLY:
112 addr = ip6h->saddr;
113 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
114 ip6h->saddr = new_addr;
115 break;
116 default:
117 return -1;
118 }
119
120 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
121}
122
123static int nf_flow_nat_ipv6(const struct flow_offload *flow,
124 struct sk_buff *skb,
125 enum flow_offload_tuple_dir dir)
126{
127 struct ipv6hdr *ip6h = ipv6_hdr(skb);
128 unsigned int thoff = sizeof(*ip6h);
129
130 if (flow->flags & FLOW_OFFLOAD_SNAT &&
131 (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
132 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
133 return -1;
134 if (flow->flags & FLOW_OFFLOAD_DNAT &&
135 (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
136 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
137 return -1;
138
139 return 0;
140}
141
142static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
143 struct flow_offload_tuple *tuple)
144{
145 struct flow_ports *ports;
146 struct ipv6hdr *ip6h;
147 unsigned int thoff;
148
149 if (!pskb_may_pull(skb, sizeof(*ip6h)))
150 return -1;
151
152 ip6h = ipv6_hdr(skb);
153
154 if (ip6h->nexthdr != IPPROTO_TCP &&
155 ip6h->nexthdr != IPPROTO_UDP)
156 return -1;
157
158 thoff = sizeof(*ip6h);
159 if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
160 return -1;
161
162 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
163
164 tuple->src_v6 = ip6h->saddr;
165 tuple->dst_v6 = ip6h->daddr;
166 tuple->src_port = ports->source;
167 tuple->dst_port = ports->dest;
168 tuple->l3proto = AF_INET6;
169 tuple->l4proto = ip6h->nexthdr;
170 tuple->iifidx = dev->ifindex;
171
172 return 0;
173}
174
175/* Based on ip_exceeds_mtu(). */
176static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
177{
178 if (skb->len <= mtu)
179 return false;
180
181 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
182 return false;
183
184 return true;
185}
186
187static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
188{
189 u32 mtu;
190
191 mtu = ip6_dst_mtu_forward(&rt->dst);
192 if (__nf_flow_exceeds_mtu(skb, mtu))
193 return true;
194
195 return false;
196}
197
198unsigned int
199nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
200 const struct nf_hook_state *state)
201{
202 struct flow_offload_tuple_rhash *tuplehash;
203 struct nf_flowtable *flow_table = priv;
204 struct flow_offload_tuple tuple = {};
205 enum flow_offload_tuple_dir dir;
206 struct flow_offload *flow;
207 struct net_device *outdev;
208 struct in6_addr *nexthop;
209 struct ipv6hdr *ip6h;
210 struct rt6_info *rt;
211
212 if (skb->protocol != htons(ETH_P_IPV6))
213 return NF_ACCEPT;
214
215 if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
216 return NF_ACCEPT;
217
218 tuplehash = flow_offload_lookup(flow_table, &tuple);
219 if (tuplehash == NULL)
220 return NF_ACCEPT;
221
222 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
223 if (!outdev)
224 return NF_ACCEPT;
225
226 dir = tuplehash->tuple.dir;
227 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
228
229 rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
230 if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
231 return NF_ACCEPT;
232
233 if (skb_try_make_writable(skb, sizeof(*ip6h)))
234 return NF_DROP;
235
236 if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
237 nf_flow_nat_ipv6(flow, skb, dir) < 0)
238 return NF_DROP;
239
240 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
241 ip6h = ipv6_hdr(skb);
242 ip6h->hop_limit--;
243
244 skb->dev = outdev;
245 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
246 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
247
248 return NF_STOLEN;
249}
250EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
251 8
252static struct nf_flowtable_type flowtable_ipv6 = { 9static struct nf_flowtable_type flowtable_ipv6 = {
253 .family = NFPROTO_IPV6, 10 .family = NFPROTO_IPV6,
254 .params = &nf_flow_offload_rhash_params, 11 .init = nf_flow_table_init,
255 .gc = nf_flow_offload_work_gc,
256 .free = nf_flow_table_free, 12 .free = nf_flow_table_free,
257 .hook = nf_flow_offload_ipv6_hook, 13 .hook = nf_flow_offload_ipv6_hook,
258 .owner = THIS_MODULE, 14 .owner = THIS_MODULE,
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 6b7f075f811f..f1582b6f9588 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -62,7 +62,7 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
62#endif 62#endif
63 63
64static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t, 64static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
65 const struct nf_nat_range *range) 65 const struct nf_nat_range2 *range)
66{ 66{
67 return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && 67 return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
68 ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; 68 ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
@@ -151,7 +151,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
151 151
152#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 152#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
153static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], 153static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
154 struct nf_nat_range *range) 154 struct nf_nat_range2 *range)
155{ 155{
156 if (tb[CTA_NAT_V6_MINIP]) { 156 if (tb[CTA_NAT_V6_MINIP]) {
157 nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], 157 nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
@@ -257,8 +257,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
257 const struct nf_hook_state *state, 257 const struct nf_hook_state *state,
258 unsigned int (*do_chain)(void *priv, 258 unsigned int (*do_chain)(void *priv,
259 struct sk_buff *skb, 259 struct sk_buff *skb,
260 const struct nf_hook_state *state, 260 const struct nf_hook_state *state))
261 struct nf_conn *ct))
262{ 261{
263 struct nf_conn *ct; 262 struct nf_conn *ct;
264 enum ip_conntrack_info ctinfo; 263 enum ip_conntrack_info ctinfo;
@@ -303,7 +302,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
303 if (!nf_nat_initialized(ct, maniptype)) { 302 if (!nf_nat_initialized(ct, maniptype)) {
304 unsigned int ret; 303 unsigned int ret;
305 304
306 ret = do_chain(priv, skb, state, ct); 305 ret = do_chain(priv, skb, state);
307 if (ret != NF_ACCEPT) 306 if (ret != NF_ACCEPT)
308 return ret; 307 return ret;
309 308
@@ -343,8 +342,7 @@ nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
343 const struct nf_hook_state *state, 342 const struct nf_hook_state *state,
344 unsigned int (*do_chain)(void *priv, 343 unsigned int (*do_chain)(void *priv,
345 struct sk_buff *skb, 344 struct sk_buff *skb,
346 const struct nf_hook_state *state, 345 const struct nf_hook_state *state))
347 struct nf_conn *ct))
348{ 346{
349 unsigned int ret; 347 unsigned int ret;
350 struct in6_addr daddr = ipv6_hdr(skb)->daddr; 348 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
@@ -363,8 +361,7 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
363 const struct nf_hook_state *state, 361 const struct nf_hook_state *state,
364 unsigned int (*do_chain)(void *priv, 362 unsigned int (*do_chain)(void *priv,
365 struct sk_buff *skb, 363 struct sk_buff *skb,
366 const struct nf_hook_state *state, 364 const struct nf_hook_state *state))
367 struct nf_conn *ct))
368{ 365{
369#ifdef CONFIG_XFRM 366#ifdef CONFIG_XFRM
370 const struct nf_conn *ct; 367 const struct nf_conn *ct;
@@ -400,8 +397,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
400 const struct nf_hook_state *state, 397 const struct nf_hook_state *state,
401 unsigned int (*do_chain)(void *priv, 398 unsigned int (*do_chain)(void *priv,
402 struct sk_buff *skb, 399 struct sk_buff *skb,
403 const struct nf_hook_state *state, 400 const struct nf_hook_state *state))
404 struct nf_conn *ct))
405{ 401{
406 const struct nf_conn *ct; 402 const struct nf_conn *ct;
407 enum ip_conntrack_info ctinfo; 403 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 98f61fcb9108..9dfc2b90c362 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -26,14 +26,14 @@
26static atomic_t v6_worker_count; 26static atomic_t v6_worker_count;
27 27
28unsigned int 28unsigned int
29nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, 29nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
30 const struct net_device *out) 30 const struct net_device *out)
31{ 31{
32 enum ip_conntrack_info ctinfo; 32 enum ip_conntrack_info ctinfo;
33 struct nf_conn_nat *nat; 33 struct nf_conn_nat *nat;
34 struct in6_addr src; 34 struct in6_addr src;
35 struct nf_conn *ct; 35 struct nf_conn *ct;
36 struct nf_nat_range newrange; 36 struct nf_nat_range2 newrange;
37 37
38 ct = nf_ct_get(skb, &ctinfo); 38 ct = nf_ct_get(skb, &ctinfo);
39 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 39 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 57593b00c5b4..d9bf42ba44fa 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -32,7 +32,7 @@ icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
32static void 32static void
33icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto, 33icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
34 struct nf_conntrack_tuple *tuple, 34 struct nf_conntrack_tuple *tuple,
35 const struct nf_nat_range *range, 35 const struct nf_nat_range2 *range,
36 enum nf_nat_manip_type maniptype, 36 enum nf_nat_manip_type maniptype,
37 const struct nf_conn *ct) 37 const struct nf_conn *ct)
38{ 38{
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 3557b114446c..100a6bd1046a 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -26,8 +26,7 @@
26 26
27static unsigned int nft_nat_do_chain(void *priv, 27static unsigned int nft_nat_do_chain(void *priv,
28 struct sk_buff *skb, 28 struct sk_buff *skb,
29 const struct nf_hook_state *state, 29 const struct nf_hook_state *state)
30 struct nf_conn *ct)
31{ 30{
32 struct nft_pktinfo pkt; 31 struct nft_pktinfo pkt;
33 32
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 4146536e9c15..dd0122f3cffe 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -22,7 +22,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
22 const struct nft_pktinfo *pkt) 22 const struct nft_pktinfo *pkt)
23{ 23{
24 struct nft_masq *priv = nft_expr_priv(expr); 24 struct nft_masq *priv = nft_expr_priv(expr);
25 struct nf_nat_range range; 25 struct nf_nat_range2 range;
26 26
27 memset(&range, 0, sizeof(range)); 27 memset(&range, 0, sizeof(range));
28 range.flags = priv->flags; 28 range.flags = priv->flags;
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index a27e424f690d..74269865acc8 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -22,7 +22,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
22 const struct nft_pktinfo *pkt) 22 const struct nft_pktinfo *pkt)
23{ 23{
24 struct nft_redir *priv = nft_expr_priv(expr); 24 struct nft_redir *priv = nft_expr_priv(expr);
25 struct nf_nat_range range; 25 struct nf_nat_range2 range;
26 26
27 memset(&range, 0, sizeof(range)); 27 memset(&range, 0, sizeof(range));
28 if (priv->sreg_proto_min) { 28 if (priv->sreg_proto_min) {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 44d8a55e9721..e57c9d479503 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -444,6 +444,9 @@ config NETFILTER_SYNPROXY
444 444
445endif # NF_CONNTRACK 445endif # NF_CONNTRACK
446 446
447config NF_OSF
448 tristate 'Passive OS fingerprint infrastructure'
449
447config NF_TABLES 450config NF_TABLES
448 select NETFILTER_NETLINK 451 select NETFILTER_NETLINK
449 tristate "Netfilter nf_tables support" 452 tristate "Netfilter nf_tables support"
@@ -474,24 +477,6 @@ config NF_TABLES_NETDEV
474 help 477 help
475 This option enables support for the "netdev" table. 478 This option enables support for the "netdev" table.
476 479
477config NFT_EXTHDR
478 tristate "Netfilter nf_tables exthdr module"
479 help
480 This option adds the "exthdr" expression that you can use to match
481 IPv6 extension headers and tcp options.
482
483config NFT_META
484 tristate "Netfilter nf_tables meta module"
485 help
486 This option adds the "meta" expression that you can use to match and
487 to set packet metainformation such as the packet mark.
488
489config NFT_RT
490 tristate "Netfilter nf_tables routing module"
491 help
492 This option adds the "rt" expression that you can use to match
493 packet routing information such as the packet nexthop.
494
495config NFT_NUMGEN 480config NFT_NUMGEN
496 tristate "Netfilter nf_tables number generator module" 481 tristate "Netfilter nf_tables number generator module"
497 help 482 help
@@ -667,8 +652,7 @@ endif # NF_TABLES
667 652
668config NF_FLOW_TABLE_INET 653config NF_FLOW_TABLE_INET
669 tristate "Netfilter flow table mixed IPv4/IPv6 module" 654 tristate "Netfilter flow table mixed IPv4/IPv6 module"
670 depends on NF_FLOW_TABLE_IPV4 655 depends on NF_FLOW_TABLE
671 depends on NF_FLOW_TABLE_IPV6
672 help 656 help
673 This option adds the flow table mixed IPv4/IPv6 support. 657 This option adds the flow table mixed IPv4/IPv6 support.
674 658
@@ -1378,6 +1362,7 @@ config NETFILTER_XT_MATCH_NFACCT
1378config NETFILTER_XT_MATCH_OSF 1362config NETFILTER_XT_MATCH_OSF
1379 tristate '"osf" Passive OS fingerprint match' 1363 tristate '"osf" Passive OS fingerprint match'
1380 depends on NETFILTER_ADVANCED && NETFILTER_NETLINK 1364 depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
1365 select NF_OSF
1381 help 1366 help
1382 This option selects the Passive OS Fingerprinting match module 1367 This option selects the Passive OS Fingerprinting match module
1383 that allows to passively match the remote operating system by 1368 that allows to passively match the remote operating system by
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index fd32bd2c9521..1aa710b5d384 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -76,13 +76,10 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
76nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ 76nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
77 nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ 77 nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
78 nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ 78 nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
79 nft_dynset.o 79 nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
80 80
81obj-$(CONFIG_NF_TABLES) += nf_tables.o 81obj-$(CONFIG_NF_TABLES) += nf_tables.o
82obj-$(CONFIG_NFT_COMPAT) += nft_compat.o 82obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
83obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
84obj-$(CONFIG_NFT_META) += nft_meta.o
85obj-$(CONFIG_NFT_RT) += nft_rt.o
86obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o 83obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
87obj-$(CONFIG_NFT_CT) += nft_ct.o 84obj-$(CONFIG_NFT_CT) += nft_ct.o
88obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o 85obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
@@ -104,6 +101,7 @@ obj-$(CONFIG_NFT_HASH) += nft_hash.o
104obj-$(CONFIG_NFT_FIB) += nft_fib.o 101obj-$(CONFIG_NFT_FIB) += nft_fib.o
105obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o 102obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
106obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o 103obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
104obj-$(CONFIG_NF_OSF) += nf_osf.o
107 105
108# nf_tables netdev 106# nf_tables netdev
109obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o 107obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
@@ -111,6 +109,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
111 109
112# flow table infrastructure 110# flow table infrastructure
113obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o 111obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
112nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
113
114obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o 114obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
115 115
116# generic X tables 116# generic X tables
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index b32fb0dbe237..05dc1b77e466 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -225,6 +225,25 @@ config IP_VS_SH
225 If you want to compile it in kernel, say Y. To compile it as a 225 If you want to compile it in kernel, say Y. To compile it as a
226 module, choose M here. If unsure, say N. 226 module, choose M here. If unsure, say N.
227 227
228config IP_VS_MH
229 tristate "maglev hashing scheduling"
230 ---help---
231 The maglev consistent hashing scheduling algorithm provides the
232 Google's Maglev hashing algorithm as a IPVS scheduler. It assigns
233 network connections to the servers through looking up a statically
234 assigned special hash table called the lookup table. Maglev hashing
235 is to assign a preference list of all the lookup table positions
236 to each destination.
237
238 Through this operation, The maglev hashing gives an almost equal
239 share of the lookup table to each of the destinations and provides
240 minimal disruption by using the lookup table. When the set of
241 destinations changes, a connection will likely be sent to the same
242 destination as it was before.
243
244 If you want to compile it in kernel, say Y. To compile it as a
245 module, choose M here. If unsure, say N.
246
228config IP_VS_SED 247config IP_VS_SED
229 tristate "shortest expected delay scheduling" 248 tristate "shortest expected delay scheduling"
230 ---help--- 249 ---help---
@@ -266,6 +285,24 @@ config IP_VS_SH_TAB_BITS
266 needs to be large enough to effectively fit all the destinations 285 needs to be large enough to effectively fit all the destinations
267 multiplied by their respective weights. 286 multiplied by their respective weights.
268 287
288comment 'IPVS MH scheduler'
289
290config IP_VS_MH_TAB_INDEX
291 int "IPVS maglev hashing table index of size (the prime numbers)"
292 range 8 17
293 default 12
294 ---help---
295 The maglev hashing scheduler maps source IPs to destinations
296 stored in a hash table. This table is assigned by a preference
297 list of the positions to each destination until all slots in
298 the table are filled. The index determines the prime for size of
299 the table as 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
300 65521 or 131071. When using weights to allow destinations to
301 receive more connections, the table is assigned an amount
302 proportional to the weights specified. The table needs to be large
303 enough to effectively fit all the destinations multiplied by their
304 respective weights.
305
269comment 'IPVS application helper' 306comment 'IPVS application helper'
270 307
271config IP_VS_FTP 308config IP_VS_FTP
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index c552993fa4b9..bfce2677fda2 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
33obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o 33obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
34obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o 34obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
35obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o 35obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
36obj-$(CONFIG_IP_VS_MH) += ip_vs_mh.o
36obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o 37obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
37obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o 38obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
38 39
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index f36098887ad0..d4f68d0f7df7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -821,6 +821,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
821 if (add && udest->af != svc->af) 821 if (add && udest->af != svc->af)
822 ipvs->mixed_address_family_dests++; 822 ipvs->mixed_address_family_dests++;
823 823
824 /* keep the last_weight with latest non-0 weight */
825 if (add || udest->weight != 0)
826 atomic_set(&dest->last_weight, udest->weight);
827
824 /* set the weight and the flags */ 828 /* set the weight and the flags */
825 atomic_set(&dest->weight, udest->weight); 829 atomic_set(&dest->weight, udest->weight);
826 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 830 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 75f798f8e83b..07459e71d907 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -43,6 +43,7 @@
43#include <linux/module.h> 43#include <linux/module.h>
44#include <linux/kernel.h> 44#include <linux/kernel.h>
45#include <linux/skbuff.h> 45#include <linux/skbuff.h>
46#include <linux/hash.h>
46 47
47#include <net/ip_vs.h> 48#include <net/ip_vs.h>
48 49
@@ -81,7 +82,7 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
81 addr_fold = addr->ip6[0]^addr->ip6[1]^ 82 addr_fold = addr->ip6[0]^addr->ip6[1]^
82 addr->ip6[2]^addr->ip6[3]; 83 addr->ip6[2]^addr->ip6[3];
83#endif 84#endif
84 return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK; 85 return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
85} 86}
86 87
87 88
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 3057e453bf31..b9f375e6dc93 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -48,6 +48,7 @@
48#include <linux/kernel.h> 48#include <linux/kernel.h>
49#include <linux/skbuff.h> 49#include <linux/skbuff.h>
50#include <linux/jiffies.h> 50#include <linux/jiffies.h>
51#include <linux/hash.h>
51 52
52/* for sysctl */ 53/* for sysctl */
53#include <linux/fs.h> 54#include <linux/fs.h>
@@ -160,7 +161,7 @@ ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr)
160 addr_fold = addr->ip6[0]^addr->ip6[1]^ 161 addr_fold = addr->ip6[0]^addr->ip6[1]^
161 addr->ip6[2]^addr->ip6[3]; 162 addr->ip6[2]^addr->ip6[3];
162#endif 163#endif
163 return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLC_TAB_MASK; 164 return hash_32(ntohl(addr_fold), IP_VS_LBLC_TAB_BITS);
164} 165}
165 166
166 167
@@ -371,6 +372,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
371 tbl->counter = 1; 372 tbl->counter = 1;
372 tbl->dead = false; 373 tbl->dead = false;
373 tbl->svc = svc; 374 tbl->svc = svc;
375 atomic_set(&tbl->entries, 0);
374 376
375 /* 377 /*
376 * Hook periodic timer for garbage collection 378 * Hook periodic timer for garbage collection
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 92adc04557ed..542c4949937a 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -47,6 +47,7 @@
47#include <linux/jiffies.h> 47#include <linux/jiffies.h>
48#include <linux/list.h> 48#include <linux/list.h>
49#include <linux/slab.h> 49#include <linux/slab.h>
50#include <linux/hash.h>
50 51
51/* for sysctl */ 52/* for sysctl */
52#include <linux/fs.h> 53#include <linux/fs.h>
@@ -323,7 +324,7 @@ ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
323 addr_fold = addr->ip6[0]^addr->ip6[1]^ 324 addr_fold = addr->ip6[0]^addr->ip6[1]^
324 addr->ip6[2]^addr->ip6[3]; 325 addr->ip6[2]^addr->ip6[3];
325#endif 326#endif
326 return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK; 327 return hash_32(ntohl(addr_fold), IP_VS_LBLCR_TAB_BITS);
327} 328}
328 329
329 330
@@ -534,6 +535,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
534 tbl->counter = 1; 535 tbl->counter = 1;
535 tbl->dead = false; 536 tbl->dead = false;
536 tbl->svc = svc; 537 tbl->svc = svc;
538 atomic_set(&tbl->entries, 0);
537 539
538 /* 540 /*
539 * Hook periodic timer for garbage collection 541 * Hook periodic timer for garbage collection
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
new file mode 100644
index 000000000000..0f795b186eb3
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_mh.c
@@ -0,0 +1,540 @@
1// SPDX-License-Identifier: GPL-2.0
2/* IPVS: Maglev Hashing scheduling module
3 *
4 * Authors: Inju Song <inju.song@navercorp.com>
5 *
6 */
7
8/* The mh algorithm is to assign a preference list of all the lookup
9 * table positions to each destination and populate the table with
10 * the most-preferred position of destinations. Then it is to select
11 * destination with the hash key of source IP address through looking
12 * up a the lookup table.
13 *
14 * The algorithm is detailed in:
15 * [3.4 Consistent Hasing]
16https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
17 *
18 */
19
20#define KMSG_COMPONENT "IPVS"
21#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
22
23#include <linux/ip.h>
24#include <linux/slab.h>
25#include <linux/module.h>
26#include <linux/kernel.h>
27#include <linux/skbuff.h>
28
29#include <net/ip_vs.h>
30
31#include <linux/siphash.h>
32#include <linux/bitops.h>
33#include <linux/gcd.h>
34
35#define IP_VS_SVC_F_SCHED_MH_FALLBACK IP_VS_SVC_F_SCHED1 /* MH fallback */
36#define IP_VS_SVC_F_SCHED_MH_PORT IP_VS_SVC_F_SCHED2 /* MH use port */
37
38struct ip_vs_mh_lookup {
39 struct ip_vs_dest __rcu *dest; /* real server (cache) */
40};
41
42struct ip_vs_mh_dest_setup {
43 unsigned int offset; /* starting offset */
44 unsigned int skip; /* skip */
45 unsigned int perm; /* next_offset */
46 int turns; /* weight / gcd() and rshift */
47};
48
49/* Available prime numbers for MH table */
50static int primes[] = {251, 509, 1021, 2039, 4093,
51 8191, 16381, 32749, 65521, 131071};
52
53/* For IPVS MH entry hash table */
54#ifndef CONFIG_IP_VS_MH_TAB_INDEX
55#define CONFIG_IP_VS_MH_TAB_INDEX 12
56#endif
57#define IP_VS_MH_TAB_BITS (CONFIG_IP_VS_MH_TAB_INDEX / 2)
58#define IP_VS_MH_TAB_INDEX (CONFIG_IP_VS_MH_TAB_INDEX - 8)
59#define IP_VS_MH_TAB_SIZE primes[IP_VS_MH_TAB_INDEX]
60
61struct ip_vs_mh_state {
62 struct rcu_head rcu_head;
63 struct ip_vs_mh_lookup *lookup;
64 struct ip_vs_mh_dest_setup *dest_setup;
65 hsiphash_key_t hash1, hash2;
66 int gcd;
67 int rshift;
68};
69
70static inline void generate_hash_secret(hsiphash_key_t *hash1,
71 hsiphash_key_t *hash2)
72{
73 hash1->key[0] = 2654435761UL;
74 hash1->key[1] = 2654435761UL;
75
76 hash2->key[0] = 2654446892UL;
77 hash2->key[1] = 2654446892UL;
78}
79
80/* Helper function to determine if server is unavailable */
81static inline bool is_unavailable(struct ip_vs_dest *dest)
82{
83 return atomic_read(&dest->weight) <= 0 ||
84 dest->flags & IP_VS_DEST_F_OVERLOAD;
85}
86
87/* Returns hash value for IPVS MH entry */
88static inline unsigned int
89ip_vs_mh_hashkey(int af, const union nf_inet_addr *addr,
90 __be16 port, hsiphash_key_t *key, unsigned int offset)
91{
92 unsigned int v;
93 __be32 addr_fold = addr->ip;
94
95#ifdef CONFIG_IP_VS_IPV6
96 if (af == AF_INET6)
97 addr_fold = addr->ip6[0] ^ addr->ip6[1] ^
98 addr->ip6[2] ^ addr->ip6[3];
99#endif
100 v = (offset + ntohs(port) + ntohl(addr_fold));
101 return hsiphash(&v, sizeof(v), key);
102}
103
104/* Reset all the hash buckets of the specified table. */
105static void ip_vs_mh_reset(struct ip_vs_mh_state *s)
106{
107 int i;
108 struct ip_vs_mh_lookup *l;
109 struct ip_vs_dest *dest;
110
111 l = &s->lookup[0];
112 for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
113 dest = rcu_dereference_protected(l->dest, 1);
114 if (dest) {
115 ip_vs_dest_put(dest);
116 RCU_INIT_POINTER(l->dest, NULL);
117 }
118 l++;
119 }
120}
121
122static int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
123 struct ip_vs_service *svc)
124{
125 struct list_head *p;
126 struct ip_vs_mh_dest_setup *ds;
127 struct ip_vs_dest *dest;
128 int lw;
129
130 /* If gcd is smaller then 1, number of dests or
131 * all last_weight of dests are zero. So, skip
132 * permutation for the dests.
133 */
134 if (s->gcd < 1)
135 return 0;
136
137 /* Set dest_setup for the dests permutation */
138 p = &svc->destinations;
139 ds = &s->dest_setup[0];
140 while ((p = p->next) != &svc->destinations) {
141 dest = list_entry(p, struct ip_vs_dest, n_list);
142
143 ds->offset = ip_vs_mh_hashkey(svc->af, &dest->addr,
144 dest->port, &s->hash1, 0) %
145 IP_VS_MH_TAB_SIZE;
146 ds->skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
147 dest->port, &s->hash2, 0) %
148 (IP_VS_MH_TAB_SIZE - 1) + 1;
149 ds->perm = ds->offset;
150
151 lw = atomic_read(&dest->last_weight);
152 ds->turns = ((lw / s->gcd) >> s->rshift) ? : (lw != 0);
153 ds++;
154 }
155
156 return 0;
157}
158
159static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
160 struct ip_vs_service *svc)
161{
162 int n, c, dt_count;
163 unsigned long *table;
164 struct list_head *p;
165 struct ip_vs_mh_dest_setup *ds;
166 struct ip_vs_dest *dest, *new_dest;
167
168 /* If gcd is smaller then 1, number of dests or
169 * all last_weight of dests are zero. So, skip
170 * the population for the dests and reset lookup table.
171 */
172 if (s->gcd < 1) {
173 ip_vs_mh_reset(s);
174 return 0;
175 }
176
177 table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
178 sizeof(unsigned long), GFP_KERNEL);
179 if (!table)
180 return -ENOMEM;
181
182 p = &svc->destinations;
183 n = 0;
184 dt_count = 0;
185 while (n < IP_VS_MH_TAB_SIZE) {
186 if (p == &svc->destinations)
187 p = p->next;
188
189 ds = &s->dest_setup[0];
190 while (p != &svc->destinations) {
191 /* Ignore added server with zero weight */
192 if (ds->turns < 1) {
193 p = p->next;
194 ds++;
195 continue;
196 }
197
198 c = ds->perm;
199 while (test_bit(c, table)) {
200 /* Add skip, mod IP_VS_MH_TAB_SIZE */
201 ds->perm += ds->skip;
202 if (ds->perm >= IP_VS_MH_TAB_SIZE)
203 ds->perm -= IP_VS_MH_TAB_SIZE;
204 c = ds->perm;
205 }
206
207 __set_bit(c, table);
208
209 dest = rcu_dereference_protected(s->lookup[c].dest, 1);
210 new_dest = list_entry(p, struct ip_vs_dest, n_list);
211 if (dest != new_dest) {
212 if (dest)
213 ip_vs_dest_put(dest);
214 ip_vs_dest_hold(new_dest);
215 RCU_INIT_POINTER(s->lookup[c].dest, new_dest);
216 }
217
218 if (++n == IP_VS_MH_TAB_SIZE)
219 goto out;
220
221 if (++dt_count >= ds->turns) {
222 dt_count = 0;
223 p = p->next;
224 ds++;
225 }
226 }
227 }
228
229out:
230 kfree(table);
231 return 0;
232}
233
234/* Get ip_vs_dest associated with supplied parameters. */
235static inline struct ip_vs_dest *
236ip_vs_mh_get(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
237 const union nf_inet_addr *addr, __be16 port)
238{
239 unsigned int hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, 0)
240 % IP_VS_MH_TAB_SIZE;
241 struct ip_vs_dest *dest = rcu_dereference(s->lookup[hash].dest);
242
243 return (!dest || is_unavailable(dest)) ? NULL : dest;
244}
245
246/* As ip_vs_mh_get, but with fallback if selected server is unavailable */
247static inline struct ip_vs_dest *
248ip_vs_mh_get_fallback(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
249 const union nf_inet_addr *addr, __be16 port)
250{
251 unsigned int offset, roffset;
252 unsigned int hash, ihash;
253 struct ip_vs_dest *dest;
254
255 /* First try the dest it's supposed to go to */
256 ihash = ip_vs_mh_hashkey(svc->af, addr, port,
257 &s->hash1, 0) % IP_VS_MH_TAB_SIZE;
258 dest = rcu_dereference(s->lookup[ihash].dest);
259 if (!dest)
260 return NULL;
261 if (!is_unavailable(dest))
262 return dest;
263
264 IP_VS_DBG_BUF(6, "MH: selected unavailable server %s:%u, reselecting",
265 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
266
267 /* If the original dest is unavailable, loop around the table
268 * starting from ihash to find a new dest
269 */
270 for (offset = 0; offset < IP_VS_MH_TAB_SIZE; offset++) {
271 roffset = (offset + ihash) % IP_VS_MH_TAB_SIZE;
272 hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1,
273 roffset) % IP_VS_MH_TAB_SIZE;
274 dest = rcu_dereference(s->lookup[hash].dest);
275 if (!dest)
276 break;
277 if (!is_unavailable(dest))
278 return dest;
279 IP_VS_DBG_BUF(6,
280 "MH: selected unavailable server %s:%u (offset %u), reselecting",
281 IP_VS_DBG_ADDR(dest->af, &dest->addr),
282 ntohs(dest->port), roffset);
283 }
284
285 return NULL;
286}
287
288/* Assign all the hash buckets of the specified table with the service. */
289static int ip_vs_mh_reassign(struct ip_vs_mh_state *s,
290 struct ip_vs_service *svc)
291{
292 int ret;
293
294 if (svc->num_dests > IP_VS_MH_TAB_SIZE)
295 return -EINVAL;
296
297 if (svc->num_dests >= 1) {
298 s->dest_setup = kcalloc(svc->num_dests,
299 sizeof(struct ip_vs_mh_dest_setup),
300 GFP_KERNEL);
301 if (!s->dest_setup)
302 return -ENOMEM;
303 }
304
305 ip_vs_mh_permutate(s, svc);
306
307 ret = ip_vs_mh_populate(s, svc);
308 if (ret < 0)
309 goto out;
310
311 IP_VS_DBG_BUF(6, "MH: reassign lookup table of %s:%u\n",
312 IP_VS_DBG_ADDR(svc->af, &svc->addr),
313 ntohs(svc->port));
314
315out:
316 if (svc->num_dests >= 1) {
317 kfree(s->dest_setup);
318 s->dest_setup = NULL;
319 }
320 return ret;
321}
322
323static int ip_vs_mh_gcd_weight(struct ip_vs_service *svc)
324{
325 struct ip_vs_dest *dest;
326 int weight;
327 int g = 0;
328
329 list_for_each_entry(dest, &svc->destinations, n_list) {
330 weight = atomic_read(&dest->last_weight);
331 if (weight > 0) {
332 if (g > 0)
333 g = gcd(weight, g);
334 else
335 g = weight;
336 }
337 }
338 return g;
339}
340
341/* To avoid assigning huge weight for the MH table,
342 * calculate shift value with gcd.
343 */
344static int ip_vs_mh_shift_weight(struct ip_vs_service *svc, int gcd)
345{
346 struct ip_vs_dest *dest;
347 int new_weight, weight = 0;
348 int mw, shift;
349
350 /* If gcd is smaller then 1, number of dests or
351 * all last_weight of dests are zero. So, return
352 * shift value as zero.
353 */
354 if (gcd < 1)
355 return 0;
356
357 list_for_each_entry(dest, &svc->destinations, n_list) {
358 new_weight = atomic_read(&dest->last_weight);
359 if (new_weight > weight)
360 weight = new_weight;
361 }
362
363 /* Because gcd is greater than zero,
364 * the maximum weight and gcd are always greater than zero
365 */
366 mw = weight / gcd;
367
368 /* shift = occupied bits of weight/gcd - MH highest bits */
369 shift = fls(mw) - IP_VS_MH_TAB_BITS;
370 return (shift >= 0) ? shift : 0;
371}
372
373static void ip_vs_mh_state_free(struct rcu_head *head)
374{
375 struct ip_vs_mh_state *s;
376
377 s = container_of(head, struct ip_vs_mh_state, rcu_head);
378 kfree(s->lookup);
379 kfree(s);
380}
381
382static int ip_vs_mh_init_svc(struct ip_vs_service *svc)
383{
384 int ret;
385 struct ip_vs_mh_state *s;
386
387 /* Allocate the MH table for this service */
388 s = kzalloc(sizeof(*s), GFP_KERNEL);
389 if (!s)
390 return -ENOMEM;
391
392 s->lookup = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(struct ip_vs_mh_lookup),
393 GFP_KERNEL);
394 if (!s->lookup) {
395 kfree(s);
396 return -ENOMEM;
397 }
398
399 generate_hash_secret(&s->hash1, &s->hash2);
400 s->gcd = ip_vs_mh_gcd_weight(svc);
401 s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
402
403 IP_VS_DBG(6,
404 "MH lookup table (memory=%zdbytes) allocated for current service\n",
405 sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
406
407 /* Assign the lookup table with current dests */
408 ret = ip_vs_mh_reassign(s, svc);
409 if (ret < 0) {
410 ip_vs_mh_reset(s);
411 ip_vs_mh_state_free(&s->rcu_head);
412 return ret;
413 }
414
415 /* No more failures, attach state */
416 svc->sched_data = s;
417 return 0;
418}
419
420static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
421{
422 struct ip_vs_mh_state *s = svc->sched_data;
423
424 /* Got to clean up lookup entry here */
425 ip_vs_mh_reset(s);
426
427 call_rcu(&s->rcu_head, ip_vs_mh_state_free);
428 IP_VS_DBG(6, "MH lookup table (memory=%zdbytes) released\n",
429 sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
430}
431
432static int ip_vs_mh_dest_changed(struct ip_vs_service *svc,
433 struct ip_vs_dest *dest)
434{
435 struct ip_vs_mh_state *s = svc->sched_data;
436
437 s->gcd = ip_vs_mh_gcd_weight(svc);
438 s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
439
440 /* Assign the lookup table with the updated service */
441 return ip_vs_mh_reassign(s, svc);
442}
443
444/* Helper function to get port number */
445static inline __be16
446ip_vs_mh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
447{
448 __be16 _ports[2], *ports;
449
450 /* At this point we know that we have a valid packet of some kind.
451 * Because ICMP packets are only guaranteed to have the first 8
452 * bytes, let's just grab the ports. Fortunately they're in the
453 * same position for all three of the protocols we care about.
454 */
455 switch (iph->protocol) {
456 case IPPROTO_TCP:
457 case IPPROTO_UDP:
458 case IPPROTO_SCTP:
459 ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
460 &_ports);
461 if (unlikely(!ports))
462 return 0;
463
464 if (likely(!ip_vs_iph_inverse(iph)))
465 return ports[0];
466 else
467 return ports[1];
468 default:
469 return 0;
470 }
471}
472
473/* Maglev Hashing scheduling */
474static struct ip_vs_dest *
475ip_vs_mh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
476 struct ip_vs_iphdr *iph)
477{
478 struct ip_vs_dest *dest;
479 struct ip_vs_mh_state *s;
480 __be16 port = 0;
481 const union nf_inet_addr *hash_addr;
482
483 hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
484
485 IP_VS_DBG(6, "%s : Scheduling...\n", __func__);
486
487 if (svc->flags & IP_VS_SVC_F_SCHED_MH_PORT)
488 port = ip_vs_mh_get_port(skb, iph);
489
490 s = (struct ip_vs_mh_state *)svc->sched_data;
491
492 if (svc->flags & IP_VS_SVC_F_SCHED_MH_FALLBACK)
493 dest = ip_vs_mh_get_fallback(svc, s, hash_addr, port);
494 else
495 dest = ip_vs_mh_get(svc, s, hash_addr, port);
496
497 if (!dest) {
498 ip_vs_scheduler_err(svc, "no destination available");
499 return NULL;
500 }
501
502 IP_VS_DBG_BUF(6, "MH: source IP address %s:%u --> server %s:%u\n",
503 IP_VS_DBG_ADDR(svc->af, hash_addr),
504 ntohs(port),
505 IP_VS_DBG_ADDR(dest->af, &dest->addr),
506 ntohs(dest->port));
507
508 return dest;
509}
510
511/* IPVS MH Scheduler structure */
512static struct ip_vs_scheduler ip_vs_mh_scheduler = {
513 .name = "mh",
514 .refcnt = ATOMIC_INIT(0),
515 .module = THIS_MODULE,
516 .n_list = LIST_HEAD_INIT(ip_vs_mh_scheduler.n_list),
517 .init_service = ip_vs_mh_init_svc,
518 .done_service = ip_vs_mh_done_svc,
519 .add_dest = ip_vs_mh_dest_changed,
520 .del_dest = ip_vs_mh_dest_changed,
521 .upd_dest = ip_vs_mh_dest_changed,
522 .schedule = ip_vs_mh_schedule,
523};
524
525static int __init ip_vs_mh_init(void)
526{
527 return register_ip_vs_scheduler(&ip_vs_mh_scheduler);
528}
529
530static void __exit ip_vs_mh_cleanup(void)
531{
532 unregister_ip_vs_scheduler(&ip_vs_mh_scheduler);
533 rcu_barrier();
534}
535
536module_init(ip_vs_mh_init);
537module_exit(ip_vs_mh_cleanup);
538MODULE_DESCRIPTION("Maglev hashing ipvs scheduler");
539MODULE_LICENSE("GPL v2");
540MODULE_AUTHOR("Inju Song <inju.song@navercorp.com>");
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index bcd9b7bde4ee..569631d2b2a1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -436,7 +436,7 @@ static bool tcp_state_active(int state)
436 return tcp_state_active_table[state]; 436 return tcp_state_active_table[state];
437} 437}
438 438
439static struct tcp_states_t tcp_states [] = { 439static struct tcp_states_t tcp_states[] = {
440/* INPUT */ 440/* INPUT */
441/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 441/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
442/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, 442/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
@@ -459,7 +459,7 @@ static struct tcp_states_t tcp_states [] = {
459/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 459/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
460}; 460};
461 461
462static struct tcp_states_t tcp_states_dos [] = { 462static struct tcp_states_t tcp_states_dos[] = {
463/* INPUT */ 463/* INPUT */
464/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 464/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
465/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, 465/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 16aaac6eedc9..1e01c782583a 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -96,7 +96,8 @@ ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
96 addr_fold = addr->ip6[0]^addr->ip6[1]^ 96 addr_fold = addr->ip6[0]^addr->ip6[1]^
97 addr->ip6[2]^addr->ip6[3]; 97 addr->ip6[2]^addr->ip6[3];
98#endif 98#endif
99 return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) & 99 return (offset + hash_32(ntohs(port) + ntohl(addr_fold),
100 IP_VS_SH_TAB_BITS)) &
100 IP_VS_SH_TAB_MASK; 101 IP_VS_SH_TAB_MASK;
101} 102}
102 103
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 41ff04ee2554..605441727008 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -186,6 +186,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
186EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); 186EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
187 187
188unsigned int nf_conntrack_max __read_mostly; 188unsigned int nf_conntrack_max __read_mostly;
189EXPORT_SYMBOL_GPL(nf_conntrack_max);
189seqcount_t nf_conntrack_generation __read_mostly; 190seqcount_t nf_conntrack_generation __read_mostly;
190static unsigned int nf_conntrack_hash_rnd __read_mostly; 191static unsigned int nf_conntrack_hash_rnd __read_mostly;
191 192
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index f0e9a7511e1a..a11c304fb771 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -566,8 +566,7 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
566 .timeout = 5 * 60, 566 .timeout = 5 * 60,
567}; 567};
568 568
569/* don't make this __exit, since it's called from __init ! */ 569static void __exit nf_conntrack_ftp_fini(void)
570static void nf_conntrack_ftp_fini(void)
571{ 570{
572 nf_conntrack_helpers_unregister(ftp, ports_c * 2); 571 nf_conntrack_helpers_unregister(ftp, ports_c * 2);
573 kfree(ftp_buffer); 572 kfree(ftp_buffer);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 5523acce9d69..4099f4d79bae 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -232,8 +232,6 @@ static int help(struct sk_buff *skb, unsigned int protoff,
232static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly; 232static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly;
233static struct nf_conntrack_expect_policy irc_exp_policy; 233static struct nf_conntrack_expect_policy irc_exp_policy;
234 234
235static void nf_conntrack_irc_fini(void);
236
237static int __init nf_conntrack_irc_init(void) 235static int __init nf_conntrack_irc_init(void)
238{ 236{
239 int i, ret; 237 int i, ret;
@@ -276,9 +274,7 @@ static int __init nf_conntrack_irc_init(void)
276 return 0; 274 return 0;
277} 275}
278 276
279/* This function is intentionally _NOT_ defined as __exit, because 277static void __exit nf_conntrack_irc_fini(void)
280 * it is needed by the init function */
281static void nf_conntrack_irc_fini(void)
282{ 278{
283 nf_conntrack_helpers_unregister(irc, ports_c); 279 nf_conntrack_helpers_unregister(irc, ports_c);
284 kfree(irc_buffer); 280 kfree(irc_buffer);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4c1d0c5bc268..d807b8770be3 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2205,6 +2205,9 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
2205 if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks))) 2205 if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
2206 goto nla_put_failure; 2206 goto nla_put_failure;
2207 2207
2208 if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, htonl(nf_conntrack_max)))
2209 goto nla_put_failure;
2210
2208 nlmsg_end(skb, nlh); 2211 nlmsg_end(skb, nlh);
2209 return skb->len; 2212 return skb->len;
2210 2213
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index ae457f39d5ce..5072ff96ab33 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -173,8 +173,7 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
173 .timeout = 5 * 60, 173 .timeout = 5 * 60,
174}; 174};
175 175
176/* don't make this __exit, since it's called from __init ! */ 176static void __exit nf_conntrack_sane_fini(void)
177static void nf_conntrack_sane_fini(void)
178{ 177{
179 nf_conntrack_helpers_unregister(sane, ports_c * 2); 178 nf_conntrack_helpers_unregister(sane, ports_c * 2);
180 kfree(sane_buffer); 179 kfree(sane_buffer);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 908e51e2dc2b..c8d2b6688a2a 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1617,7 +1617,7 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
1617 }, 1617 },
1618}; 1618};
1619 1619
1620static void nf_conntrack_sip_fini(void) 1620static void __exit nf_conntrack_sip_fini(void)
1621{ 1621{
1622 nf_conntrack_helpers_unregister(sip, ports_c * 4); 1622 nf_conntrack_helpers_unregister(sip, ports_c * 4);
1623} 1623}
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 0ec6779fd5d9..548b673b3625 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -104,7 +104,7 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = {
104 .timeout = 5 * 60, 104 .timeout = 5 * 60,
105}; 105};
106 106
107static void nf_conntrack_tftp_fini(void) 107static void __exit nf_conntrack_tftp_fini(void)
108{ 108{
109 nf_conntrack_helpers_unregister(tftp, ports_c * 2); 109 nf_conntrack_helpers_unregister(tftp, ports_c * 2);
110} 110}
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table_core.c
index ec410cae9307..eb0d1658ac05 100644
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -4,6 +4,8 @@
4#include <linux/netfilter.h> 4#include <linux/netfilter.h>
5#include <linux/rhashtable.h> 5#include <linux/rhashtable.h>
6#include <linux/netdevice.h> 6#include <linux/netdevice.h>
7#include <net/ip.h>
8#include <net/ip6_route.h>
7#include <net/netfilter/nf_tables.h> 9#include <net/netfilter/nf_tables.h>
8#include <net/netfilter/nf_flow_table.h> 10#include <net/netfilter/nf_flow_table.h>
9#include <net/netfilter/nf_conntrack.h> 11#include <net/netfilter/nf_conntrack.h>
@@ -16,6 +18,43 @@ struct flow_offload_entry {
16 struct rcu_head rcu_head; 18 struct rcu_head rcu_head;
17}; 19};
18 20
21static DEFINE_MUTEX(flowtable_lock);
22static LIST_HEAD(flowtables);
23
24static void
25flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
26 struct nf_flow_route *route,
27 enum flow_offload_tuple_dir dir)
28{
29 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
30 struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
31 struct dst_entry *dst = route->tuple[dir].dst;
32
33 ft->dir = dir;
34
35 switch (ctt->src.l3num) {
36 case NFPROTO_IPV4:
37 ft->src_v4 = ctt->src.u3.in;
38 ft->dst_v4 = ctt->dst.u3.in;
39 ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
40 break;
41 case NFPROTO_IPV6:
42 ft->src_v6 = ctt->src.u3.in6;
43 ft->dst_v6 = ctt->dst.u3.in6;
44 ft->mtu = ip6_dst_mtu_forward(dst);
45 break;
46 }
47
48 ft->l3proto = ctt->src.l3num;
49 ft->l4proto = ctt->dst.protonum;
50 ft->src_port = ctt->src.u.tcp.port;
51 ft->dst_port = ctt->dst.u.tcp.port;
52
53 ft->iifidx = route->tuple[dir].ifindex;
54 ft->oifidx = route->tuple[!dir].ifindex;
55 ft->dst_cache = dst;
56}
57
19struct flow_offload * 58struct flow_offload *
20flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) 59flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
21{ 60{
@@ -40,69 +79,12 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
40 79
41 entry->ct = ct; 80 entry->ct = ct;
42 81
43 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) { 82 flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
44 case NFPROTO_IPV4: 83 flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
45 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
46 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
47 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
48 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
49 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
50 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
51 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
52 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
53 break;
54 case NFPROTO_IPV6:
55 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
56 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
57 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
58 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
59 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
60 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
61 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
62 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
63 break;
64 }
65
66 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
67 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
68 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
69 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
70 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
71 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
72 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
73 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
74
75 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
76 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
77 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
78 route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
79
80 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
81 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
82 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
83 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
84 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
85 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
86 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
87 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
88
89 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
90 FLOW_OFFLOAD_DIR_ORIGINAL;
91 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
92 FLOW_OFFLOAD_DIR_REPLY;
93
94 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
95 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
96 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
97 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
98 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
99 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
100 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
101 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
102 84
103 if (ct->status & IPS_SRC_NAT) 85 if (ct->status & IPS_SRC_NAT)
104 flow->flags |= FLOW_OFFLOAD_SNAT; 86 flow->flags |= FLOW_OFFLOAD_SNAT;
105 else if (ct->status & IPS_DST_NAT) 87 if (ct->status & IPS_DST_NAT)
106 flow->flags |= FLOW_OFFLOAD_DNAT; 88 flow->flags |= FLOW_OFFLOAD_DNAT;
107 89
108 return flow; 90 return flow;
@@ -118,6 +100,43 @@ err_ct_refcnt:
118} 100}
119EXPORT_SYMBOL_GPL(flow_offload_alloc); 101EXPORT_SYMBOL_GPL(flow_offload_alloc);
120 102
103static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
104{
105 tcp->state = TCP_CONNTRACK_ESTABLISHED;
106 tcp->seen[0].td_maxwin = 0;
107 tcp->seen[1].td_maxwin = 0;
108}
109
110static void flow_offload_fixup_ct_state(struct nf_conn *ct)
111{
112 const struct nf_conntrack_l4proto *l4proto;
113 struct net *net = nf_ct_net(ct);
114 unsigned int *timeouts;
115 unsigned int timeout;
116 int l4num;
117
118 l4num = nf_ct_protonum(ct);
119 if (l4num == IPPROTO_TCP)
120 flow_offload_fixup_tcp(&ct->proto.tcp);
121
122 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
123 if (!l4proto)
124 return;
125
126 timeouts = l4proto->get_timeouts(net);
127 if (!timeouts)
128 return;
129
130 if (l4num == IPPROTO_TCP)
131 timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
132 else if (l4num == IPPROTO_UDP)
133 timeout = timeouts[UDP_CT_REPLIED];
134 else
135 return;
136
137 ct->timeout = nfct_time_stamp + timeout;
138}
139
121void flow_offload_free(struct flow_offload *flow) 140void flow_offload_free(struct flow_offload *flow)
122{ 141{
123 struct flow_offload_entry *e; 142 struct flow_offload_entry *e;
@@ -125,17 +144,46 @@ void flow_offload_free(struct flow_offload *flow)
125 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); 144 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
126 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); 145 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
127 e = container_of(flow, struct flow_offload_entry, flow); 146 e = container_of(flow, struct flow_offload_entry, flow);
128 nf_ct_delete(e->ct, 0, 0); 147 if (flow->flags & FLOW_OFFLOAD_DYING)
148 nf_ct_delete(e->ct, 0, 0);
129 nf_ct_put(e->ct); 149 nf_ct_put(e->ct);
130 kfree_rcu(e, rcu_head); 150 kfree_rcu(e, rcu_head);
131} 151}
132EXPORT_SYMBOL_GPL(flow_offload_free); 152EXPORT_SYMBOL_GPL(flow_offload_free);
133 153
134void flow_offload_dead(struct flow_offload *flow) 154static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
155{
156 const struct flow_offload_tuple *tuple = data;
157
158 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
159}
160
161static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
162{
163 const struct flow_offload_tuple_rhash *tuplehash = data;
164
165 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
166}
167
168static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
169 const void *ptr)
135{ 170{
136 flow->flags |= FLOW_OFFLOAD_DYING; 171 const struct flow_offload_tuple *tuple = arg->key;
172 const struct flow_offload_tuple_rhash *x = ptr;
173
174 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
175 return 1;
176
177 return 0;
137} 178}
138EXPORT_SYMBOL_GPL(flow_offload_dead); 179
180static const struct rhashtable_params nf_flow_offload_rhash_params = {
181 .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
182 .hashfn = flow_offload_hash,
183 .obj_hashfn = flow_offload_hash_obj,
184 .obj_cmpfn = flow_offload_hash_cmp,
185 .automatic_shrinking = true,
186};
139 187
140int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) 188int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
141{ 189{
@@ -143,10 +191,10 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
143 191
144 rhashtable_insert_fast(&flow_table->rhashtable, 192 rhashtable_insert_fast(&flow_table->rhashtable,
145 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 193 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
146 *flow_table->type->params); 194 nf_flow_offload_rhash_params);
147 rhashtable_insert_fast(&flow_table->rhashtable, 195 rhashtable_insert_fast(&flow_table->rhashtable,
148 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 196 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
149 *flow_table->type->params); 197 nf_flow_offload_rhash_params);
150 return 0; 198 return 0;
151} 199}
152EXPORT_SYMBOL_GPL(flow_offload_add); 200EXPORT_SYMBOL_GPL(flow_offload_add);
@@ -154,22 +202,51 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
154static void flow_offload_del(struct nf_flowtable *flow_table, 202static void flow_offload_del(struct nf_flowtable *flow_table,
155 struct flow_offload *flow) 203 struct flow_offload *flow)
156{ 204{
205 struct flow_offload_entry *e;
206
157 rhashtable_remove_fast(&flow_table->rhashtable, 207 rhashtable_remove_fast(&flow_table->rhashtable,
158 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 208 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
159 *flow_table->type->params); 209 nf_flow_offload_rhash_params);
160 rhashtable_remove_fast(&flow_table->rhashtable, 210 rhashtable_remove_fast(&flow_table->rhashtable,
161 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 211 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
162 *flow_table->type->params); 212 nf_flow_offload_rhash_params);
213
214 e = container_of(flow, struct flow_offload_entry, flow);
215 clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
163 216
164 flow_offload_free(flow); 217 flow_offload_free(flow);
165} 218}
166 219
220void flow_offload_teardown(struct flow_offload *flow)
221{
222 struct flow_offload_entry *e;
223
224 flow->flags |= FLOW_OFFLOAD_TEARDOWN;
225
226 e = container_of(flow, struct flow_offload_entry, flow);
227 flow_offload_fixup_ct_state(e->ct);
228}
229EXPORT_SYMBOL_GPL(flow_offload_teardown);
230
167struct flow_offload_tuple_rhash * 231struct flow_offload_tuple_rhash *
168flow_offload_lookup(struct nf_flowtable *flow_table, 232flow_offload_lookup(struct nf_flowtable *flow_table,
169 struct flow_offload_tuple *tuple) 233 struct flow_offload_tuple *tuple)
170{ 234{
171 return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, 235 struct flow_offload_tuple_rhash *tuplehash;
172 *flow_table->type->params); 236 struct flow_offload *flow;
237 int dir;
238
239 tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
240 nf_flow_offload_rhash_params);
241 if (!tuplehash)
242 return NULL;
243
244 dir = tuplehash->tuple.dir;
245 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
246 if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
247 return NULL;
248
249 return tuplehash;
173} 250}
174EXPORT_SYMBOL_GPL(flow_offload_lookup); 251EXPORT_SYMBOL_GPL(flow_offload_lookup);
175 252
@@ -216,11 +293,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
216 return (__s32)(flow->timeout - (u32)jiffies) <= 0; 293 return (__s32)(flow->timeout - (u32)jiffies) <= 0;
217} 294}
218 295
219static inline bool nf_flow_is_dying(const struct flow_offload *flow)
220{
221 return flow->flags & FLOW_OFFLOAD_DYING;
222}
223
224static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table) 296static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
225{ 297{
226 struct flow_offload_tuple_rhash *tuplehash; 298 struct flow_offload_tuple_rhash *tuplehash;
@@ -248,7 +320,8 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
248 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); 320 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
249 321
250 if (nf_flow_has_expired(flow) || 322 if (nf_flow_has_expired(flow) ||
251 nf_flow_is_dying(flow)) 323 (flow->flags & (FLOW_OFFLOAD_DYING |
324 FLOW_OFFLOAD_TEARDOWN)))
252 flow_offload_del(flow_table, flow); 325 flow_offload_del(flow_table, flow);
253 } 326 }
254out: 327out:
@@ -258,7 +331,7 @@ out:
258 return 1; 331 return 1;
259} 332}
260 333
261void nf_flow_offload_work_gc(struct work_struct *work) 334static void nf_flow_offload_work_gc(struct work_struct *work)
262{ 335{
263 struct nf_flowtable *flow_table; 336 struct nf_flowtable *flow_table;
264 337
@@ -266,42 +339,6 @@ void nf_flow_offload_work_gc(struct work_struct *work)
266 nf_flow_offload_gc_step(flow_table); 339 nf_flow_offload_gc_step(flow_table);
267 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); 340 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
268} 341}
269EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
270
271static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
272{
273 const struct flow_offload_tuple *tuple = data;
274
275 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
276}
277
278static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
279{
280 const struct flow_offload_tuple_rhash *tuplehash = data;
281
282 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
283}
284
285static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
286 const void *ptr)
287{
288 const struct flow_offload_tuple *tuple = arg->key;
289 const struct flow_offload_tuple_rhash *x = ptr;
290
291 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
292 return 1;
293
294 return 0;
295}
296
297const struct rhashtable_params nf_flow_offload_rhash_params = {
298 .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
299 .hashfn = flow_offload_hash,
300 .obj_hashfn = flow_offload_hash_obj,
301 .obj_cmpfn = flow_offload_hash_cmp,
302 .automatic_shrinking = true,
303};
304EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
305 342
306static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, 343static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
307 __be16 port, __be16 new_port) 344 __be16 port, __be16 new_port)
@@ -419,33 +456,69 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
419} 456}
420EXPORT_SYMBOL_GPL(nf_flow_dnat_port); 457EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
421 458
459int nf_flow_table_init(struct nf_flowtable *flowtable)
460{
461 int err;
462
463 INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
464
465 err = rhashtable_init(&flowtable->rhashtable,
466 &nf_flow_offload_rhash_params);
467 if (err < 0)
468 return err;
469
470 queue_delayed_work(system_power_efficient_wq,
471 &flowtable->gc_work, HZ);
472
473 mutex_lock(&flowtable_lock);
474 list_add(&flowtable->list, &flowtables);
475 mutex_unlock(&flowtable_lock);
476
477 return 0;
478}
479EXPORT_SYMBOL_GPL(nf_flow_table_init);
480
422static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) 481static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
423{ 482{
424 struct net_device *dev = data; 483 struct net_device *dev = data;
425 484
426 if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) 485 if (!dev) {
486 flow_offload_teardown(flow);
427 return; 487 return;
488 }
428 489
429 flow_offload_dead(flow); 490 if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
491 flow->tuplehash[1].tuple.iifidx == dev->ifindex)
492 flow_offload_dead(flow);
430} 493}
431 494
432static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, 495static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
433 void *data) 496 struct net_device *dev)
434{ 497{
435 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data); 498 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
436 flush_delayed_work(&flowtable->gc_work); 499 flush_delayed_work(&flowtable->gc_work);
437} 500}
438 501
439void nf_flow_table_cleanup(struct net *net, struct net_device *dev) 502void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
440{ 503{
441 nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev); 504 struct nf_flowtable *flowtable;
505
506 mutex_lock(&flowtable_lock);
507 list_for_each_entry(flowtable, &flowtables, list)
508 nf_flow_table_iterate_cleanup(flowtable, dev);
509 mutex_unlock(&flowtable_lock);
442} 510}
443EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); 511EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
444 512
445void nf_flow_table_free(struct nf_flowtable *flow_table) 513void nf_flow_table_free(struct nf_flowtable *flow_table)
446{ 514{
515 mutex_lock(&flowtable_lock);
516 list_del(&flow_table->list);
517 mutex_unlock(&flowtable_lock);
518 cancel_delayed_work_sync(&flow_table->gc_work);
447 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); 519 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
448 WARN_ON(!nf_flow_offload_gc_step(flow_table)); 520 WARN_ON(!nf_flow_offload_gc_step(flow_table));
521 rhashtable_destroy(&flow_table->rhashtable);
449} 522}
450EXPORT_SYMBOL_GPL(nf_flow_table_free); 523EXPORT_SYMBOL_GPL(nf_flow_table_free);
451 524
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 375a1881d93d..99771aa7e7ea 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -22,8 +22,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
22 22
23static struct nf_flowtable_type flowtable_inet = { 23static struct nf_flowtable_type flowtable_inet = {
24 .family = NFPROTO_INET, 24 .family = NFPROTO_INET,
25 .params = &nf_flow_offload_rhash_params, 25 .init = nf_flow_table_init,
26 .gc = nf_flow_offload_work_gc,
27 .free = nf_flow_table_free, 26 .free = nf_flow_table_free,
28 .hook = nf_flow_offload_inet_hook, 27 .hook = nf_flow_offload_inet_hook,
29 .owner = THIS_MODULE, 28 .owner = THIS_MODULE,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
new file mode 100644
index 000000000000..82451b7e0acb
--- /dev/null
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -0,0 +1,487 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/ip.h>
7#include <linux/ipv6.h>
8#include <linux/netdevice.h>
9#include <net/ip.h>
10#include <net/ipv6.h>
11#include <net/ip6_route.h>
12#include <net/neighbour.h>
13#include <net/netfilter/nf_flow_table.h>
14/* For layer 4 checksum field offset. */
15#include <linux/tcp.h>
16#include <linux/udp.h>
17
18static int nf_flow_state_check(struct flow_offload *flow, int proto,
19 struct sk_buff *skb, unsigned int thoff)
20{
21 struct tcphdr *tcph;
22
23 if (proto != IPPROTO_TCP)
24 return 0;
25
26 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
27 return -1;
28
29 tcph = (void *)(skb_network_header(skb) + thoff);
30 if (unlikely(tcph->fin || tcph->rst)) {
31 flow_offload_teardown(flow);
32 return -1;
33 }
34
35 return 0;
36}
37
38static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
39 __be32 addr, __be32 new_addr)
40{
41 struct tcphdr *tcph;
42
43 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
44 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
45 return -1;
46
47 tcph = (void *)(skb_network_header(skb) + thoff);
48 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
49
50 return 0;
51}
52
53static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
54 __be32 addr, __be32 new_addr)
55{
56 struct udphdr *udph;
57
58 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
59 skb_try_make_writable(skb, thoff + sizeof(*udph)))
60 return -1;
61
62 udph = (void *)(skb_network_header(skb) + thoff);
63 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
64 inet_proto_csum_replace4(&udph->check, skb, addr,
65 new_addr, true);
66 if (!udph->check)
67 udph->check = CSUM_MANGLED_0;
68 }
69
70 return 0;
71}
72
73static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
74 unsigned int thoff, __be32 addr,
75 __be32 new_addr)
76{
77 switch (iph->protocol) {
78 case IPPROTO_TCP:
79 if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
80 return NF_DROP;
81 break;
82 case IPPROTO_UDP:
83 if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
84 return NF_DROP;
85 break;
86 }
87
88 return 0;
89}
90
91static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
92 struct iphdr *iph, unsigned int thoff,
93 enum flow_offload_tuple_dir dir)
94{
95 __be32 addr, new_addr;
96
97 switch (dir) {
98 case FLOW_OFFLOAD_DIR_ORIGINAL:
99 addr = iph->saddr;
100 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
101 iph->saddr = new_addr;
102 break;
103 case FLOW_OFFLOAD_DIR_REPLY:
104 addr = iph->daddr;
105 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
106 iph->daddr = new_addr;
107 break;
108 default:
109 return -1;
110 }
111 csum_replace4(&iph->check, addr, new_addr);
112
113 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
114}
115
116static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
117 struct iphdr *iph, unsigned int thoff,
118 enum flow_offload_tuple_dir dir)
119{
120 __be32 addr, new_addr;
121
122 switch (dir) {
123 case FLOW_OFFLOAD_DIR_ORIGINAL:
124 addr = iph->daddr;
125 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
126 iph->daddr = new_addr;
127 break;
128 case FLOW_OFFLOAD_DIR_REPLY:
129 addr = iph->saddr;
130 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
131 iph->saddr = new_addr;
132 break;
133 default:
134 return -1;
135 }
136 csum_replace4(&iph->check, addr, new_addr);
137
138 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
139}
140
141static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
142 unsigned int thoff, enum flow_offload_tuple_dir dir)
143{
144 struct iphdr *iph = ip_hdr(skb);
145
146 if (flow->flags & FLOW_OFFLOAD_SNAT &&
147 (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
148 nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
149 return -1;
150 if (flow->flags & FLOW_OFFLOAD_DNAT &&
151 (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
152 nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
153 return -1;
154
155 return 0;
156}
157
158static bool ip_has_options(unsigned int thoff)
159{
160 return thoff != sizeof(struct iphdr);
161}
162
163static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
164 struct flow_offload_tuple *tuple)
165{
166 struct flow_ports *ports;
167 unsigned int thoff;
168 struct iphdr *iph;
169
170 if (!pskb_may_pull(skb, sizeof(*iph)))
171 return -1;
172
173 iph = ip_hdr(skb);
174 thoff = iph->ihl * 4;
175
176 if (ip_is_fragment(iph) ||
177 unlikely(ip_has_options(thoff)))
178 return -1;
179
180 if (iph->protocol != IPPROTO_TCP &&
181 iph->protocol != IPPROTO_UDP)
182 return -1;
183
184 thoff = iph->ihl * 4;
185 if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
186 return -1;
187
188 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
189
190 tuple->src_v4.s_addr = iph->saddr;
191 tuple->dst_v4.s_addr = iph->daddr;
192 tuple->src_port = ports->source;
193 tuple->dst_port = ports->dest;
194 tuple->l3proto = AF_INET;
195 tuple->l4proto = iph->protocol;
196 tuple->iifidx = dev->ifindex;
197
198 return 0;
199}
200
201/* Based on ip_exceeds_mtu(). */
202static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
203{
204 if (skb->len <= mtu)
205 return false;
206
207 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
208 return false;
209
210 return true;
211}
212
213unsigned int
214nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
215 const struct nf_hook_state *state)
216{
217 struct flow_offload_tuple_rhash *tuplehash;
218 struct nf_flowtable *flow_table = priv;
219 struct flow_offload_tuple tuple = {};
220 enum flow_offload_tuple_dir dir;
221 struct flow_offload *flow;
222 struct net_device *outdev;
223 const struct rtable *rt;
224 unsigned int thoff;
225 struct iphdr *iph;
226 __be32 nexthop;
227
228 if (skb->protocol != htons(ETH_P_IP))
229 return NF_ACCEPT;
230
231 if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
232 return NF_ACCEPT;
233
234 tuplehash = flow_offload_lookup(flow_table, &tuple);
235 if (tuplehash == NULL)
236 return NF_ACCEPT;
237
238 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
239 if (!outdev)
240 return NF_ACCEPT;
241
242 dir = tuplehash->tuple.dir;
243 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
244 rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
245
246 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
247 (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
248 return NF_ACCEPT;
249
250 if (skb_try_make_writable(skb, sizeof(*iph)))
251 return NF_DROP;
252
253 thoff = ip_hdr(skb)->ihl * 4;
254 if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
255 return NF_ACCEPT;
256
257 if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
258 nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
259 return NF_DROP;
260
261 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
262 iph = ip_hdr(skb);
263 ip_decrease_ttl(iph);
264
265 skb->dev = outdev;
266 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
267 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
268
269 return NF_STOLEN;
270}
271EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
272
273static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
274 struct in6_addr *addr,
275 struct in6_addr *new_addr)
276{
277 struct tcphdr *tcph;
278
279 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
280 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
281 return -1;
282
283 tcph = (void *)(skb_network_header(skb) + thoff);
284 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
285 new_addr->s6_addr32, true);
286
287 return 0;
288}
289
290static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
291 struct in6_addr *addr,
292 struct in6_addr *new_addr)
293{
294 struct udphdr *udph;
295
296 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
297 skb_try_make_writable(skb, thoff + sizeof(*udph)))
298 return -1;
299
300 udph = (void *)(skb_network_header(skb) + thoff);
301 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
302 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
303 new_addr->s6_addr32, true);
304 if (!udph->check)
305 udph->check = CSUM_MANGLED_0;
306 }
307
308 return 0;
309}
310
311static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
312 unsigned int thoff, struct in6_addr *addr,
313 struct in6_addr *new_addr)
314{
315 switch (ip6h->nexthdr) {
316 case IPPROTO_TCP:
317 if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
318 return NF_DROP;
319 break;
320 case IPPROTO_UDP:
321 if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
322 return NF_DROP;
323 break;
324 }
325
326 return 0;
327}
328
329static int nf_flow_snat_ipv6(const struct flow_offload *flow,
330 struct sk_buff *skb, struct ipv6hdr *ip6h,
331 unsigned int thoff,
332 enum flow_offload_tuple_dir dir)
333{
334 struct in6_addr addr, new_addr;
335
336 switch (dir) {
337 case FLOW_OFFLOAD_DIR_ORIGINAL:
338 addr = ip6h->saddr;
339 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
340 ip6h->saddr = new_addr;
341 break;
342 case FLOW_OFFLOAD_DIR_REPLY:
343 addr = ip6h->daddr;
344 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
345 ip6h->daddr = new_addr;
346 break;
347 default:
348 return -1;
349 }
350
351 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
352}
353
354static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
355 struct sk_buff *skb, struct ipv6hdr *ip6h,
356 unsigned int thoff,
357 enum flow_offload_tuple_dir dir)
358{
359 struct in6_addr addr, new_addr;
360
361 switch (dir) {
362 case FLOW_OFFLOAD_DIR_ORIGINAL:
363 addr = ip6h->daddr;
364 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
365 ip6h->daddr = new_addr;
366 break;
367 case FLOW_OFFLOAD_DIR_REPLY:
368 addr = ip6h->saddr;
369 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
370 ip6h->saddr = new_addr;
371 break;
372 default:
373 return -1;
374 }
375
376 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
377}
378
379static int nf_flow_nat_ipv6(const struct flow_offload *flow,
380 struct sk_buff *skb,
381 enum flow_offload_tuple_dir dir)
382{
383 struct ipv6hdr *ip6h = ipv6_hdr(skb);
384 unsigned int thoff = sizeof(*ip6h);
385
386 if (flow->flags & FLOW_OFFLOAD_SNAT &&
387 (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
388 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
389 return -1;
390 if (flow->flags & FLOW_OFFLOAD_DNAT &&
391 (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
392 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
393 return -1;
394
395 return 0;
396}
397
398static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
399 struct flow_offload_tuple *tuple)
400{
401 struct flow_ports *ports;
402 struct ipv6hdr *ip6h;
403 unsigned int thoff;
404
405 if (!pskb_may_pull(skb, sizeof(*ip6h)))
406 return -1;
407
408 ip6h = ipv6_hdr(skb);
409
410 if (ip6h->nexthdr != IPPROTO_TCP &&
411 ip6h->nexthdr != IPPROTO_UDP)
412 return -1;
413
414 thoff = sizeof(*ip6h);
415 if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
416 return -1;
417
418 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
419
420 tuple->src_v6 = ip6h->saddr;
421 tuple->dst_v6 = ip6h->daddr;
422 tuple->src_port = ports->source;
423 tuple->dst_port = ports->dest;
424 tuple->l3proto = AF_INET6;
425 tuple->l4proto = ip6h->nexthdr;
426 tuple->iifidx = dev->ifindex;
427
428 return 0;
429}
430
431unsigned int
432nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
433 const struct nf_hook_state *state)
434{
435 struct flow_offload_tuple_rhash *tuplehash;
436 struct nf_flowtable *flow_table = priv;
437 struct flow_offload_tuple tuple = {};
438 enum flow_offload_tuple_dir dir;
439 struct flow_offload *flow;
440 struct net_device *outdev;
441 struct in6_addr *nexthop;
442 struct ipv6hdr *ip6h;
443 struct rt6_info *rt;
444
445 if (skb->protocol != htons(ETH_P_IPV6))
446 return NF_ACCEPT;
447
448 if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
449 return NF_ACCEPT;
450
451 tuplehash = flow_offload_lookup(flow_table, &tuple);
452 if (tuplehash == NULL)
453 return NF_ACCEPT;
454
455 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
456 if (!outdev)
457 return NF_ACCEPT;
458
459 dir = tuplehash->tuple.dir;
460 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
461 rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
462
463 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
464 return NF_ACCEPT;
465
466 if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
467 sizeof(*ip6h)))
468 return NF_ACCEPT;
469
470 if (skb_try_make_writable(skb, sizeof(*ip6h)))
471 return NF_DROP;
472
473 if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
474 nf_flow_nat_ipv6(flow, skb, dir) < 0)
475 return NF_DROP;
476
477 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
478 ip6h = ipv6_hdr(skb);
479 ip6h->hop_limit--;
480
481 skb->dev = outdev;
482 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
483 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
484
485 return NF_STOLEN;
486}
487EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 617693ff9f4c..37b3c9913b08 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
157static int in_range(const struct nf_nat_l3proto *l3proto, 157static int in_range(const struct nf_nat_l3proto *l3proto,
158 const struct nf_nat_l4proto *l4proto, 158 const struct nf_nat_l4proto *l4proto,
159 const struct nf_conntrack_tuple *tuple, 159 const struct nf_conntrack_tuple *tuple,
160 const struct nf_nat_range *range) 160 const struct nf_nat_range2 *range)
161{ 161{
162 /* If we are supposed to map IPs, then we must be in the 162 /* If we are supposed to map IPs, then we must be in the
163 * range specified, otherwise let this drag us onto a new src IP. 163 * range specified, otherwise let this drag us onto a new src IP.
@@ -194,7 +194,7 @@ find_appropriate_src(struct net *net,
194 const struct nf_nat_l4proto *l4proto, 194 const struct nf_nat_l4proto *l4proto,
195 const struct nf_conntrack_tuple *tuple, 195 const struct nf_conntrack_tuple *tuple,
196 struct nf_conntrack_tuple *result, 196 struct nf_conntrack_tuple *result,
197 const struct nf_nat_range *range) 197 const struct nf_nat_range2 *range)
198{ 198{
199 unsigned int h = hash_by_src(net, tuple); 199 unsigned int h = hash_by_src(net, tuple);
200 const struct nf_conn *ct; 200 const struct nf_conn *ct;
@@ -224,7 +224,7 @@ find_appropriate_src(struct net *net,
224static void 224static void
225find_best_ips_proto(const struct nf_conntrack_zone *zone, 225find_best_ips_proto(const struct nf_conntrack_zone *zone,
226 struct nf_conntrack_tuple *tuple, 226 struct nf_conntrack_tuple *tuple,
227 const struct nf_nat_range *range, 227 const struct nf_nat_range2 *range,
228 const struct nf_conn *ct, 228 const struct nf_conn *ct,
229 enum nf_nat_manip_type maniptype) 229 enum nf_nat_manip_type maniptype)
230{ 230{
@@ -298,7 +298,7 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
298static void 298static void
299get_unique_tuple(struct nf_conntrack_tuple *tuple, 299get_unique_tuple(struct nf_conntrack_tuple *tuple,
300 const struct nf_conntrack_tuple *orig_tuple, 300 const struct nf_conntrack_tuple *orig_tuple,
301 const struct nf_nat_range *range, 301 const struct nf_nat_range2 *range,
302 struct nf_conn *ct, 302 struct nf_conn *ct,
303 enum nf_nat_manip_type maniptype) 303 enum nf_nat_manip_type maniptype)
304{ 304{
@@ -349,9 +349,10 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
349 /* Only bother mapping if it's not already in range and unique */ 349 /* Only bother mapping if it's not already in range and unique */
350 if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { 350 if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
351 if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { 351 if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
352 if (l4proto->in_range(tuple, maniptype, 352 if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
353 &range->min_proto, 353 l4proto->in_range(tuple, maniptype,
354 &range->max_proto) && 354 &range->min_proto,
355 &range->max_proto) &&
355 (range->min_proto.all == range->max_proto.all || 356 (range->min_proto.all == range->max_proto.all ||
356 !nf_nat_used_tuple(tuple, ct))) 357 !nf_nat_used_tuple(tuple, ct)))
357 goto out; 358 goto out;
@@ -360,7 +361,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
360 } 361 }
361 } 362 }
362 363
363 /* Last change: get protocol to try to obtain unique tuple. */ 364 /* Last chance: get protocol to try to obtain unique tuple. */
364 l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct); 365 l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
365out: 366out:
366 rcu_read_unlock(); 367 rcu_read_unlock();
@@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
381 382
382unsigned int 383unsigned int
383nf_nat_setup_info(struct nf_conn *ct, 384nf_nat_setup_info(struct nf_conn *ct,
384 const struct nf_nat_range *range, 385 const struct nf_nat_range2 *range,
385 enum nf_nat_manip_type maniptype) 386 enum nf_nat_manip_type maniptype)
386{ 387{
387 struct net *net = nf_ct_net(ct); 388 struct net *net = nf_ct_net(ct);
@@ -459,7 +460,7 @@ __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
459 (manip == NF_NAT_MANIP_SRC ? 460 (manip == NF_NAT_MANIP_SRC ?
460 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : 461 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
461 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); 462 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
462 struct nf_nat_range range = { 463 struct nf_nat_range2 range = {
463 .flags = NF_NAT_RANGE_MAP_IPS, 464 .flags = NF_NAT_RANGE_MAP_IPS,
464 .min_addr = ip, 465 .min_addr = ip,
465 .max_addr = ip, 466 .max_addr = ip,
@@ -702,7 +703,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
702 703
703static int nfnetlink_parse_nat_proto(struct nlattr *attr, 704static int nfnetlink_parse_nat_proto(struct nlattr *attr,
704 const struct nf_conn *ct, 705 const struct nf_conn *ct,
705 struct nf_nat_range *range) 706 struct nf_nat_range2 *range)
706{ 707{
707 struct nlattr *tb[CTA_PROTONAT_MAX+1]; 708 struct nlattr *tb[CTA_PROTONAT_MAX+1];
708 const struct nf_nat_l4proto *l4proto; 709 const struct nf_nat_l4proto *l4proto;
@@ -730,7 +731,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
730 731
731static int 732static int
732nfnetlink_parse_nat(const struct nlattr *nat, 733nfnetlink_parse_nat(const struct nlattr *nat,
733 const struct nf_conn *ct, struct nf_nat_range *range, 734 const struct nf_conn *ct, struct nf_nat_range2 *range,
734 const struct nf_nat_l3proto *l3proto) 735 const struct nf_nat_l3proto *l3proto)
735{ 736{
736 struct nlattr *tb[CTA_NAT_MAX+1]; 737 struct nlattr *tb[CTA_NAT_MAX+1];
@@ -758,7 +759,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
758 enum nf_nat_manip_type manip, 759 enum nf_nat_manip_type manip,
759 const struct nlattr *attr) 760 const struct nlattr *attr)
760{ 761{
761 struct nf_nat_range range; 762 struct nf_nat_range2 range;
762 const struct nf_nat_l3proto *l3proto; 763 const struct nf_nat_l3proto *l3proto;
763 int err; 764 int err;
764 765
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 607a373379b4..99606baedda4 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -191,7 +191,7 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
191void nf_nat_follow_master(struct nf_conn *ct, 191void nf_nat_follow_master(struct nf_conn *ct,
192 struct nf_conntrack_expect *exp) 192 struct nf_conntrack_expect *exp)
193{ 193{
194 struct nf_nat_range range; 194 struct nf_nat_range2 range;
195 195
196 /* This must be a fresh one. */ 196 /* This must be a fresh one. */
197 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 197 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index 7d7466dbf663..5d849d835561 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
36 36
37void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, 37void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
38 struct nf_conntrack_tuple *tuple, 38 struct nf_conntrack_tuple *tuple,
39 const struct nf_nat_range *range, 39 const struct nf_nat_range2 *range,
40 enum nf_nat_manip_type maniptype, 40 enum nf_nat_manip_type maniptype,
41 const struct nf_conn *ct, 41 const struct nf_conn *ct,
42 u16 *rover) 42 u16 *rover)
@@ -83,6 +83,8 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
83 : tuple->src.u.all); 83 : tuple->src.u.all);
84 } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) { 84 } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
85 off = prandom_u32(); 85 off = prandom_u32();
86 } else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) {
87 off = (ntohs(*portptr) - ntohs(range->base_proto.all));
86 } else { 88 } else {
87 off = *rover; 89 off = *rover;
88 } 90 }
@@ -91,7 +93,8 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
91 *portptr = htons(min + off % range_size); 93 *portptr = htons(min + off % range_size);
92 if (++i != range_size && nf_nat_used_tuple(tuple, ct)) 94 if (++i != range_size && nf_nat_used_tuple(tuple, ct))
93 continue; 95 continue;
94 if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) 96 if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
97 NF_NAT_RANGE_PROTO_OFFSET)))
95 *rover = off; 98 *rover = off;
96 return; 99 return;
97 } 100 }
@@ -100,7 +103,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
100 103
101#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 104#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
102int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], 105int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
103 struct nf_nat_range *range) 106 struct nf_nat_range2 *range)
104{ 107{
105 if (tb[CTA_PROTONAT_PORT_MIN]) { 108 if (tb[CTA_PROTONAT_PORT_MIN]) {
106 range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); 109 range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 269fcd5dc34c..67ea0d83aa5a 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -23,7 +23,7 @@ static u_int16_t dccp_port_rover;
23static void 23static void
24dccp_unique_tuple(const struct nf_nat_l3proto *l3proto, 24dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
25 struct nf_conntrack_tuple *tuple, 25 struct nf_conntrack_tuple *tuple,
26 const struct nf_nat_range *range, 26 const struct nf_nat_range2 *range,
27 enum nf_nat_manip_type maniptype, 27 enum nf_nat_manip_type maniptype,
28 const struct nf_conn *ct) 28 const struct nf_conn *ct)
29{ 29{
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index c57ee3240b1d..1c5d9b65fbba 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -17,7 +17,7 @@ static u_int16_t nf_sctp_port_rover;
17static void 17static void
18sctp_unique_tuple(const struct nf_nat_l3proto *l3proto, 18sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
19 struct nf_conntrack_tuple *tuple, 19 struct nf_conntrack_tuple *tuple,
20 const struct nf_nat_range *range, 20 const struct nf_nat_range2 *range,
21 enum nf_nat_manip_type maniptype, 21 enum nf_nat_manip_type maniptype,
22 const struct nf_conn *ct) 22 const struct nf_conn *ct)
23{ 23{
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index 4f8820fc5148..f15fcd475f98 100644
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -23,7 +23,7 @@ static u16 tcp_port_rover;
23static void 23static void
24tcp_unique_tuple(const struct nf_nat_l3proto *l3proto, 24tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
25 struct nf_conntrack_tuple *tuple, 25 struct nf_conntrack_tuple *tuple,
26 const struct nf_nat_range *range, 26 const struct nf_nat_range2 *range,
27 enum nf_nat_manip_type maniptype, 27 enum nf_nat_manip_type maniptype,
28 const struct nf_conn *ct) 28 const struct nf_conn *ct)
29{ 29{
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index edd4a77dc09a..5790f70a83b2 100644
--- a/net/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -22,7 +22,7 @@ static u16 udp_port_rover;
22static void 22static void
23udp_unique_tuple(const struct nf_nat_l3proto *l3proto, 23udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
24 struct nf_conntrack_tuple *tuple, 24 struct nf_conntrack_tuple *tuple,
25 const struct nf_nat_range *range, 25 const struct nf_nat_range2 *range,
26 enum nf_nat_manip_type maniptype, 26 enum nf_nat_manip_type maniptype,
27 const struct nf_conn *ct) 27 const struct nf_conn *ct)
28{ 28{
@@ -100,7 +100,7 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
100static void 100static void
101udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, 101udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
102 struct nf_conntrack_tuple *tuple, 102 struct nf_conntrack_tuple *tuple,
103 const struct nf_nat_range *range, 103 const struct nf_nat_range2 *range,
104 enum nf_nat_manip_type maniptype, 104 enum nf_nat_manip_type maniptype,
105 const struct nf_conn *ct) 105 const struct nf_conn *ct)
106{ 106{
diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c
index 6e494d584412..c5db3e251232 100644
--- a/net/netfilter/nf_nat_proto_unknown.c
+++ b/net/netfilter/nf_nat_proto_unknown.c
@@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
27 27
28static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto, 28static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
29 struct nf_conntrack_tuple *tuple, 29 struct nf_conntrack_tuple *tuple,
30 const struct nf_nat_range *range, 30 const struct nf_nat_range2 *range,
31 enum nf_nat_manip_type maniptype, 31 enum nf_nat_manip_type maniptype,
32 const struct nf_conn *ct) 32 const struct nf_conn *ct)
33{ 33{
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 25b06b959118..7c4bb0a773ca 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -36,7 +36,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
36 struct nf_conn *ct; 36 struct nf_conn *ct;
37 enum ip_conntrack_info ctinfo; 37 enum ip_conntrack_info ctinfo;
38 __be32 newdst; 38 __be32 newdst;
39 struct nf_nat_range newrange; 39 struct nf_nat_range2 newrange;
40 40
41 WARN_ON(hooknum != NF_INET_PRE_ROUTING && 41 WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
42 hooknum != NF_INET_LOCAL_OUT); 42 hooknum != NF_INET_LOCAL_OUT);
@@ -82,10 +82,10 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
82static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; 82static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
83 83
84unsigned int 84unsigned int
85nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, 85nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
86 unsigned int hooknum) 86 unsigned int hooknum)
87{ 87{
88 struct nf_nat_range newrange; 88 struct nf_nat_range2 newrange;
89 struct in6_addr newdst; 89 struct in6_addr newdst;
90 enum ip_conntrack_info ctinfo; 90 enum ip_conntrack_info ctinfo;
91 struct nf_conn *ct; 91 struct nf_conn *ct;
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 791fac4fd745..1f3086074981 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -316,7 +316,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
316static void nf_nat_sip_expected(struct nf_conn *ct, 316static void nf_nat_sip_expected(struct nf_conn *ct,
317 struct nf_conntrack_expect *exp) 317 struct nf_conntrack_expect *exp)
318{ 318{
319 struct nf_nat_range range; 319 struct nf_nat_range2 range;
320 320
321 /* This must be a fresh one. */ 321 /* This must be a fresh one. */
322 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 322 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
diff --git a/net/netfilter/nf_osf.c b/net/netfilter/nf_osf.c
new file mode 100644
index 000000000000..5ba5c7bef2f9
--- /dev/null
+++ b/net/netfilter/nf_osf.c
@@ -0,0 +1,218 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2#include <linux/module.h>
3#include <linux/kernel.h>
4
5#include <linux/capability.h>
6#include <linux/if.h>
7#include <linux/inetdevice.h>
8#include <linux/ip.h>
9#include <linux/list.h>
10#include <linux/rculist.h>
11#include <linux/skbuff.h>
12#include <linux/slab.h>
13#include <linux/tcp.h>
14
15#include <net/ip.h>
16#include <net/tcp.h>
17
18#include <linux/netfilter/nfnetlink.h>
19#include <linux/netfilter/x_tables.h>
20#include <net/netfilter/nf_log.h>
21#include <linux/netfilter/nf_osf.h>
22
23static inline int nf_osf_ttl(const struct sk_buff *skb,
24 const struct nf_osf_info *info,
25 unsigned char f_ttl)
26{
27 const struct iphdr *ip = ip_hdr(skb);
28
29 if (info->flags & NF_OSF_TTL) {
30 if (info->ttl == NF_OSF_TTL_TRUE)
31 return ip->ttl == f_ttl;
32 if (info->ttl == NF_OSF_TTL_NOCHECK)
33 return 1;
34 else if (ip->ttl <= f_ttl)
35 return 1;
36 else {
37 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
38 int ret = 0;
39
40 for_ifa(in_dev) {
41 if (inet_ifa_match(ip->saddr, ifa)) {
42 ret = (ip->ttl == f_ttl);
43 break;
44 }
45 }
46 endfor_ifa(in_dev);
47
48 return ret;
49 }
50 }
51
52 return ip->ttl == f_ttl;
53}
54
55bool
56nf_osf_match(const struct sk_buff *skb, u_int8_t family,
57 int hooknum, struct net_device *in, struct net_device *out,
58 const struct nf_osf_info *info, struct net *net,
59 const struct list_head *nf_osf_fingers)
60{
61 const unsigned char *optp = NULL, *_optp = NULL;
62 unsigned int optsize = 0, check_WSS = 0;
63 int fmatch = FMATCH_WRONG, fcount = 0;
64 const struct iphdr *ip = ip_hdr(skb);
65 const struct nf_osf_user_finger *f;
66 unsigned char opts[MAX_IPOPTLEN];
67 const struct nf_osf_finger *kf;
68 u16 window, totlen, mss = 0;
69 const struct tcphdr *tcp;
70 struct tcphdr _tcph;
71 bool df;
72
73 tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
74 if (!tcp)
75 return false;
76
77 if (!tcp->syn)
78 return false;
79
80 totlen = ntohs(ip->tot_len);
81 df = ntohs(ip->frag_off) & IP_DF;
82 window = ntohs(tcp->window);
83
84 if (tcp->doff * 4 > sizeof(struct tcphdr)) {
85 optsize = tcp->doff * 4 - sizeof(struct tcphdr);
86
87 _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
88 sizeof(struct tcphdr), optsize, opts);
89 }
90
91 list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) {
92 int foptsize, optnum;
93
94 f = &kf->finger;
95
96 if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
97 continue;
98
99 optp = _optp;
100 fmatch = FMATCH_WRONG;
101
102 if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl))
103 continue;
104
105 /*
106 * Should not happen if userspace parser was written correctly.
107 */
108 if (f->wss.wc >= OSF_WSS_MAX)
109 continue;
110
111 /* Check options */
112
113 foptsize = 0;
114 for (optnum = 0; optnum < f->opt_num; ++optnum)
115 foptsize += f->opt[optnum].length;
116
117 if (foptsize > MAX_IPOPTLEN ||
118 optsize > MAX_IPOPTLEN ||
119 optsize != foptsize)
120 continue;
121
122 check_WSS = f->wss.wc;
123
124 for (optnum = 0; optnum < f->opt_num; ++optnum) {
125 if (f->opt[optnum].kind == (*optp)) {
126 __u32 len = f->opt[optnum].length;
127 const __u8 *optend = optp + len;
128
129 fmatch = FMATCH_OK;
130
131 switch (*optp) {
132 case OSFOPT_MSS:
133 mss = optp[3];
134 mss <<= 8;
135 mss |= optp[2];
136
137 mss = ntohs((__force __be16)mss);
138 break;
139 case OSFOPT_TS:
140 break;
141 }
142
143 optp = optend;
144 } else
145 fmatch = FMATCH_OPT_WRONG;
146
147 if (fmatch != FMATCH_OK)
148 break;
149 }
150
151 if (fmatch != FMATCH_OPT_WRONG) {
152 fmatch = FMATCH_WRONG;
153
154 switch (check_WSS) {
155 case OSF_WSS_PLAIN:
156 if (f->wss.val == 0 || window == f->wss.val)
157 fmatch = FMATCH_OK;
158 break;
159 case OSF_WSS_MSS:
160 /*
161 * Some smart modems decrease mangle MSS to
162 * SMART_MSS_2, so we check standard, decreased
163 * and the one provided in the fingerprint MSS
164 * values.
165 */
166#define SMART_MSS_1 1460
167#define SMART_MSS_2 1448
168 if (window == f->wss.val * mss ||
169 window == f->wss.val * SMART_MSS_1 ||
170 window == f->wss.val * SMART_MSS_2)
171 fmatch = FMATCH_OK;
172 break;
173 case OSF_WSS_MTU:
174 if (window == f->wss.val * (mss + 40) ||
175 window == f->wss.val * (SMART_MSS_1 + 40) ||
176 window == f->wss.val * (SMART_MSS_2 + 40))
177 fmatch = FMATCH_OK;
178 break;
179 case OSF_WSS_MODULO:
180 if ((window % f->wss.val) == 0)
181 fmatch = FMATCH_OK;
182 break;
183 }
184 }
185
186 if (fmatch != FMATCH_OK)
187 continue;
188
189 fcount++;
190
191 if (info->flags & NF_OSF_LOG)
192 nf_log_packet(net, family, hooknum, skb,
193 in, out, NULL,
194 "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
195 f->genre, f->version, f->subtype,
196 &ip->saddr, ntohs(tcp->source),
197 &ip->daddr, ntohs(tcp->dest),
198 f->ttl - ip->ttl);
199
200 if ((info->flags & NF_OSF_LOG) &&
201 info->loglevel == NF_OSF_LOGLEVEL_FIRST)
202 break;
203 }
204
205 if (!fcount && (info->flags & NF_OSF_LOG))
206 nf_log_packet(net, family, hooknum, skb, in, out, NULL,
207 "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
208 &ip->saddr, ntohs(tcp->source),
209 &ip->daddr, ntohs(tcp->dest));
210
211 if (fcount)
212 fmatch = FMATCH_OK;
213
214 return fmatch == FMATCH_OK;
215}
216EXPORT_SYMBOL_GPL(nf_osf_match);
217
218MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 04d4e3772584..18bd584fadda 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -386,13 +386,17 @@ static struct nft_table *nft_table_lookup(const struct net *net,
386{ 386{
387 struct nft_table *table; 387 struct nft_table *table;
388 388
389 if (nla == NULL)
390 return ERR_PTR(-EINVAL);
391
389 list_for_each_entry(table, &net->nft.tables, list) { 392 list_for_each_entry(table, &net->nft.tables, list) {
390 if (!nla_strcmp(nla, table->name) && 393 if (!nla_strcmp(nla, table->name) &&
391 table->family == family && 394 table->family == family &&
392 nft_active_genmask(table, genmask)) 395 nft_active_genmask(table, genmask))
393 return table; 396 return table;
394 } 397 }
395 return NULL; 398
399 return ERR_PTR(-ENOENT);
396} 400}
397 401
398static struct nft_table *nft_table_lookup_byhandle(const struct net *net, 402static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
@@ -406,37 +410,6 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
406 nft_active_genmask(table, genmask)) 410 nft_active_genmask(table, genmask))
407 return table; 411 return table;
408 } 412 }
409 return NULL;
410}
411
412static struct nft_table *nf_tables_table_lookup(const struct net *net,
413 const struct nlattr *nla,
414 u8 family, u8 genmask)
415{
416 struct nft_table *table;
417
418 if (nla == NULL)
419 return ERR_PTR(-EINVAL);
420
421 table = nft_table_lookup(net, nla, family, genmask);
422 if (table != NULL)
423 return table;
424
425 return ERR_PTR(-ENOENT);
426}
427
428static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
429 const struct nlattr *nla,
430 u8 genmask)
431{
432 struct nft_table *table;
433
434 if (nla == NULL)
435 return ERR_PTR(-EINVAL);
436
437 table = nft_table_lookup_byhandle(net, nla, genmask);
438 if (table != NULL)
439 return table;
440 413
441 return ERR_PTR(-ENOENT); 414 return ERR_PTR(-ENOENT);
442} 415}
@@ -608,10 +581,11 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
608 return netlink_dump_start(nlsk, skb, nlh, &c); 581 return netlink_dump_start(nlsk, skb, nlh, &c);
609 } 582 }
610 583
611 table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family, 584 table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask);
612 genmask); 585 if (IS_ERR(table)) {
613 if (IS_ERR(table)) 586 NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]);
614 return PTR_ERR(table); 587 return PTR_ERR(table);
588 }
615 589
616 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 590 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
617 if (!skb2) 591 if (!skb2)
@@ -727,21 +701,23 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
727{ 701{
728 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 702 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
729 u8 genmask = nft_genmask_next(net); 703 u8 genmask = nft_genmask_next(net);
730 const struct nlattr *name;
731 struct nft_table *table;
732 int family = nfmsg->nfgen_family; 704 int family = nfmsg->nfgen_family;
705 const struct nlattr *attr;
706 struct nft_table *table;
733 u32 flags = 0; 707 u32 flags = 0;
734 struct nft_ctx ctx; 708 struct nft_ctx ctx;
735 int err; 709 int err;
736 710
737 name = nla[NFTA_TABLE_NAME]; 711 attr = nla[NFTA_TABLE_NAME];
738 table = nf_tables_table_lookup(net, name, family, genmask); 712 table = nft_table_lookup(net, attr, family, genmask);
739 if (IS_ERR(table)) { 713 if (IS_ERR(table)) {
740 if (PTR_ERR(table) != -ENOENT) 714 if (PTR_ERR(table) != -ENOENT)
741 return PTR_ERR(table); 715 return PTR_ERR(table);
742 } else { 716 } else {
743 if (nlh->nlmsg_flags & NLM_F_EXCL) 717 if (nlh->nlmsg_flags & NLM_F_EXCL) {
718 NL_SET_BAD_ATTR(extack, attr);
744 return -EEXIST; 719 return -EEXIST;
720 }
745 if (nlh->nlmsg_flags & NLM_F_REPLACE) 721 if (nlh->nlmsg_flags & NLM_F_REPLACE)
746 return -EOPNOTSUPP; 722 return -EOPNOTSUPP;
747 723
@@ -760,7 +736,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
760 if (table == NULL) 736 if (table == NULL)
761 goto err_kzalloc; 737 goto err_kzalloc;
762 738
763 table->name = nla_strdup(name, GFP_KERNEL); 739 table->name = nla_strdup(attr, GFP_KERNEL);
764 if (table->name == NULL) 740 if (table->name == NULL)
765 goto err_strdup; 741 goto err_strdup;
766 742
@@ -883,8 +859,9 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
883{ 859{
884 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 860 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
885 u8 genmask = nft_genmask_next(net); 861 u8 genmask = nft_genmask_next(net);
886 struct nft_table *table;
887 int family = nfmsg->nfgen_family; 862 int family = nfmsg->nfgen_family;
863 const struct nlattr *attr;
864 struct nft_table *table;
888 struct nft_ctx ctx; 865 struct nft_ctx ctx;
889 866
890 nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla); 867 nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
@@ -892,16 +869,18 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
892 (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE])) 869 (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
893 return nft_flush(&ctx, family); 870 return nft_flush(&ctx, family);
894 871
895 if (nla[NFTA_TABLE_HANDLE]) 872 if (nla[NFTA_TABLE_HANDLE]) {
896 table = nf_tables_table_lookup_byhandle(net, 873 attr = nla[NFTA_TABLE_HANDLE];
897 nla[NFTA_TABLE_HANDLE], 874 table = nft_table_lookup_byhandle(net, attr, genmask);
898 genmask); 875 } else {
899 else 876 attr = nla[NFTA_TABLE_NAME];
900 table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], 877 table = nft_table_lookup(net, attr, family, genmask);
901 family, genmask); 878 }
902 879
903 if (IS_ERR(table)) 880 if (IS_ERR(table)) {
881 NL_SET_BAD_ATTR(extack, attr);
904 return PTR_ERR(table); 882 return PTR_ERR(table);
883 }
905 884
906 if (nlh->nlmsg_flags & NLM_F_NONREC && 885 if (nlh->nlmsg_flags & NLM_F_NONREC &&
907 table->use > 0) 886 table->use > 0)
@@ -949,8 +928,7 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
949 */ 928 */
950 929
951static struct nft_chain * 930static struct nft_chain *
952nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle, 931nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
953 u8 genmask)
954{ 932{
955 struct nft_chain *chain; 933 struct nft_chain *chain;
956 934
@@ -963,9 +941,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
963 return ERR_PTR(-ENOENT); 941 return ERR_PTR(-ENOENT);
964} 942}
965 943
966static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, 944static struct nft_chain *nft_chain_lookup(const struct nft_table *table,
967 const struct nlattr *nla, 945 const struct nlattr *nla, u8 genmask)
968 u8 genmask)
969{ 946{
970 struct nft_chain *chain; 947 struct nft_chain *chain;
971 948
@@ -1194,14 +1171,17 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
1194 return netlink_dump_start(nlsk, skb, nlh, &c); 1171 return netlink_dump_start(nlsk, skb, nlh, &c);
1195 } 1172 }
1196 1173
1197 table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, 1174 table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
1198 genmask); 1175 if (IS_ERR(table)) {
1199 if (IS_ERR(table)) 1176 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
1200 return PTR_ERR(table); 1177 return PTR_ERR(table);
1178 }
1201 1179
1202 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); 1180 chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
1203 if (IS_ERR(chain)) 1181 if (IS_ERR(chain)) {
1182 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
1204 return PTR_ERR(chain); 1183 return PTR_ERR(chain);
1184 }
1205 1185
1206 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1186 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1207 if (!skb2) 1187 if (!skb2)
@@ -1513,8 +1493,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
1513 nla[NFTA_CHAIN_NAME]) { 1493 nla[NFTA_CHAIN_NAME]) {
1514 struct nft_chain *chain2; 1494 struct nft_chain *chain2;
1515 1495
1516 chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], 1496 chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
1517 genmask);
1518 if (!IS_ERR(chain2)) 1497 if (!IS_ERR(chain2))
1519 return -EEXIST; 1498 return -EEXIST;
1520 } 1499 }
@@ -1564,9 +1543,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
1564 struct netlink_ext_ack *extack) 1543 struct netlink_ext_ack *extack)
1565{ 1544{
1566 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 1545 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1567 const struct nlattr * uninitialized_var(name);
1568 u8 genmask = nft_genmask_next(net); 1546 u8 genmask = nft_genmask_next(net);
1569 int family = nfmsg->nfgen_family; 1547 int family = nfmsg->nfgen_family;
1548 const struct nlattr *attr;
1570 struct nft_table *table; 1549 struct nft_table *table;
1571 struct nft_chain *chain; 1550 struct nft_chain *chain;
1572 u8 policy = NF_ACCEPT; 1551 u8 policy = NF_ACCEPT;
@@ -1576,36 +1555,46 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
1576 1555
1577 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 1556 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
1578 1557
1579 table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, 1558 table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
1580 genmask); 1559 if (IS_ERR(table)) {
1581 if (IS_ERR(table)) 1560 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
1582 return PTR_ERR(table); 1561 return PTR_ERR(table);
1562 }
1583 1563
1584 chain = NULL; 1564 chain = NULL;
1585 name = nla[NFTA_CHAIN_NAME]; 1565 attr = nla[NFTA_CHAIN_NAME];
1586 1566
1587 if (nla[NFTA_CHAIN_HANDLE]) { 1567 if (nla[NFTA_CHAIN_HANDLE]) {
1588 handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); 1568 handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
1589 chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); 1569 chain = nft_chain_lookup_byhandle(table, handle, genmask);
1590 if (IS_ERR(chain)) 1570 if (IS_ERR(chain)) {
1571 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]);
1591 return PTR_ERR(chain); 1572 return PTR_ERR(chain);
1573 }
1574 attr = nla[NFTA_CHAIN_HANDLE];
1592 } else { 1575 } else {
1593 chain = nf_tables_chain_lookup(table, name, genmask); 1576 chain = nft_chain_lookup(table, attr, genmask);
1594 if (IS_ERR(chain)) { 1577 if (IS_ERR(chain)) {
1595 if (PTR_ERR(chain) != -ENOENT) 1578 if (PTR_ERR(chain) != -ENOENT) {
1579 NL_SET_BAD_ATTR(extack, attr);
1596 return PTR_ERR(chain); 1580 return PTR_ERR(chain);
1581 }
1597 chain = NULL; 1582 chain = NULL;
1598 } 1583 }
1599 } 1584 }
1600 1585
1601 if (nla[NFTA_CHAIN_POLICY]) { 1586 if (nla[NFTA_CHAIN_POLICY]) {
1602 if (chain != NULL && 1587 if (chain != NULL &&
1603 !nft_is_base_chain(chain)) 1588 !nft_is_base_chain(chain)) {
1589 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
1604 return -EOPNOTSUPP; 1590 return -EOPNOTSUPP;
1591 }
1605 1592
1606 if (chain == NULL && 1593 if (chain == NULL &&
1607 nla[NFTA_CHAIN_HOOK] == NULL) 1594 nla[NFTA_CHAIN_HOOK] == NULL) {
1595 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
1608 return -EOPNOTSUPP; 1596 return -EOPNOTSUPP;
1597 }
1609 1598
1610 policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY])); 1599 policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
1611 switch (policy) { 1600 switch (policy) {
@@ -1620,8 +1609,10 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
1620 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); 1609 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
1621 1610
1622 if (chain != NULL) { 1611 if (chain != NULL) {
1623 if (nlh->nlmsg_flags & NLM_F_EXCL) 1612 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1613 NL_SET_BAD_ATTR(extack, attr);
1624 return -EEXIST; 1614 return -EEXIST;
1615 }
1625 if (nlh->nlmsg_flags & NLM_F_REPLACE) 1616 if (nlh->nlmsg_flags & NLM_F_REPLACE)
1626 return -EOPNOTSUPP; 1617 return -EOPNOTSUPP;
1627 1618
@@ -1638,28 +1629,34 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
1638{ 1629{
1639 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 1630 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1640 u8 genmask = nft_genmask_next(net); 1631 u8 genmask = nft_genmask_next(net);
1632 int family = nfmsg->nfgen_family;
1633 const struct nlattr *attr;
1641 struct nft_table *table; 1634 struct nft_table *table;
1642 struct nft_chain *chain; 1635 struct nft_chain *chain;
1643 struct nft_rule *rule; 1636 struct nft_rule *rule;
1644 int family = nfmsg->nfgen_family;
1645 struct nft_ctx ctx; 1637 struct nft_ctx ctx;
1646 u64 handle; 1638 u64 handle;
1647 u32 use; 1639 u32 use;
1648 int err; 1640 int err;
1649 1641
1650 table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, 1642 table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
1651 genmask); 1643 if (IS_ERR(table)) {
1652 if (IS_ERR(table)) 1644 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
1653 return PTR_ERR(table); 1645 return PTR_ERR(table);
1646 }
1654 1647
1655 if (nla[NFTA_CHAIN_HANDLE]) { 1648 if (nla[NFTA_CHAIN_HANDLE]) {
1656 handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); 1649 attr = nla[NFTA_CHAIN_HANDLE];
1657 chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); 1650 handle = be64_to_cpu(nla_get_be64(attr));
1651 chain = nft_chain_lookup_byhandle(table, handle, genmask);
1658 } else { 1652 } else {
1659 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); 1653 attr = nla[NFTA_CHAIN_NAME];
1654 chain = nft_chain_lookup(table, attr, genmask);
1660 } 1655 }
1661 if (IS_ERR(chain)) 1656 if (IS_ERR(chain)) {
1657 NL_SET_BAD_ATTR(extack, attr);
1662 return PTR_ERR(chain); 1658 return PTR_ERR(chain);
1659 }
1663 1660
1664 if (nlh->nlmsg_flags & NLM_F_NONREC && 1661 if (nlh->nlmsg_flags & NLM_F_NONREC &&
1665 chain->use > 0) 1662 chain->use > 0)
@@ -1681,8 +1678,10 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
1681 /* There are rules and elements that are still holding references to us, 1678 /* There are rules and elements that are still holding references to us,
1682 * we cannot do a recursive removal in this case. 1679 * we cannot do a recursive removal in this case.
1683 */ 1680 */
1684 if (use > 0) 1681 if (use > 0) {
1682 NL_SET_BAD_ATTR(extack, attr);
1685 return -EBUSY; 1683 return -EBUSY;
1684 }
1686 1685
1687 return nft_delchain(&ctx); 1686 return nft_delchain(&ctx);
1688} 1687}
@@ -1939,8 +1938,8 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
1939 * Rules 1938 * Rules
1940 */ 1939 */
1941 1940
1942static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain, 1941static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
1943 u64 handle) 1942 u64 handle)
1944{ 1943{
1945 struct nft_rule *rule; 1944 struct nft_rule *rule;
1946 1945
@@ -1953,13 +1952,13 @@ static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
1953 return ERR_PTR(-ENOENT); 1952 return ERR_PTR(-ENOENT);
1954} 1953}
1955 1954
1956static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain, 1955static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
1957 const struct nlattr *nla) 1956 const struct nlattr *nla)
1958{ 1957{
1959 if (nla == NULL) 1958 if (nla == NULL)
1960 return ERR_PTR(-EINVAL); 1959 return ERR_PTR(-EINVAL);
1961 1960
1962 return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); 1961 return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
1963} 1962}
1964 1963
1965static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { 1964static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
@@ -2191,18 +2190,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
2191 return netlink_dump_start(nlsk, skb, nlh, &c); 2190 return netlink_dump_start(nlsk, skb, nlh, &c);
2192 } 2191 }
2193 2192
2194 table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, 2193 table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
2195 genmask); 2194 if (IS_ERR(table)) {
2196 if (IS_ERR(table)) 2195 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
2197 return PTR_ERR(table); 2196 return PTR_ERR(table);
2197 }
2198 2198
2199 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); 2199 chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
2200 if (IS_ERR(chain)) 2200 if (IS_ERR(chain)) {
2201 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
2201 return PTR_ERR(chain); 2202 return PTR_ERR(chain);
2203 }
2202 2204
2203 rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); 2205 rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
2204 if (IS_ERR(rule)) 2206 if (IS_ERR(rule)) {
2207 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
2205 return PTR_ERR(rule); 2208 return PTR_ERR(rule);
2209 }
2206 2210
2207 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2211 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2208 if (!skb2) 2212 if (!skb2)
@@ -2265,23 +2269,30 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
2265 2269
2266 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 2270 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
2267 2271
2268 table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, 2272 table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
2269 genmask); 2273 if (IS_ERR(table)) {
2270 if (IS_ERR(table)) 2274 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
2271 return PTR_ERR(table); 2275 return PTR_ERR(table);
2276 }
2272 2277
2273 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); 2278 chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
2274 if (IS_ERR(chain)) 2279 if (IS_ERR(chain)) {
2280 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
2275 return PTR_ERR(chain); 2281 return PTR_ERR(chain);
2282 }
2276 2283
2277 if (nla[NFTA_RULE_HANDLE]) { 2284 if (nla[NFTA_RULE_HANDLE]) {
2278 handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); 2285 handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
2279 rule = __nf_tables_rule_lookup(chain, handle); 2286 rule = __nft_rule_lookup(chain, handle);
2280 if (IS_ERR(rule)) 2287 if (IS_ERR(rule)) {
2288 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
2281 return PTR_ERR(rule); 2289 return PTR_ERR(rule);
2290 }
2282 2291
2283 if (nlh->nlmsg_flags & NLM_F_EXCL) 2292 if (nlh->nlmsg_flags & NLM_F_EXCL) {
2293 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
2284 return -EEXIST; 2294 return -EEXIST;
2295 }
2285 if (nlh->nlmsg_flags & NLM_F_REPLACE) 2296 if (nlh->nlmsg_flags & NLM_F_REPLACE)
2286 old_rule = rule; 2297 old_rule = rule;
2287 else 2298 else
@@ -2300,9 +2311,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
2300 return -EOPNOTSUPP; 2311 return -EOPNOTSUPP;
2301 2312
2302 pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); 2313 pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
2303 old_rule = __nf_tables_rule_lookup(chain, pos_handle); 2314 old_rule = __nft_rule_lookup(chain, pos_handle);
2304 if (IS_ERR(old_rule)) 2315 if (IS_ERR(old_rule)) {
2316 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
2305 return PTR_ERR(old_rule); 2317 return PTR_ERR(old_rule);
2318 }
2306 } 2319 }
2307 2320
2308 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); 2321 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
@@ -2440,32 +2453,37 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
2440 int family = nfmsg->nfgen_family, err = 0; 2453 int family = nfmsg->nfgen_family, err = 0;
2441 struct nft_ctx ctx; 2454 struct nft_ctx ctx;
2442 2455
2443 table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, 2456 table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
2444 genmask); 2457 if (IS_ERR(table)) {
2445 if (IS_ERR(table)) 2458 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
2446 return PTR_ERR(table); 2459 return PTR_ERR(table);
2460 }
2447 2461
2448 if (nla[NFTA_RULE_CHAIN]) { 2462 if (nla[NFTA_RULE_CHAIN]) {
2449 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], 2463 chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
2450 genmask); 2464 if (IS_ERR(chain)) {
2451 if (IS_ERR(chain)) 2465 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
2452 return PTR_ERR(chain); 2466 return PTR_ERR(chain);
2467 }
2453 } 2468 }
2454 2469
2455 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); 2470 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
2456 2471
2457 if (chain) { 2472 if (chain) {
2458 if (nla[NFTA_RULE_HANDLE]) { 2473 if (nla[NFTA_RULE_HANDLE]) {
2459 rule = nf_tables_rule_lookup(chain, 2474 rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
2460 nla[NFTA_RULE_HANDLE]); 2475 if (IS_ERR(rule)) {
2461 if (IS_ERR(rule)) 2476 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
2462 return PTR_ERR(rule); 2477 return PTR_ERR(rule);
2478 }
2463 2479
2464 err = nft_delrule(&ctx, rule); 2480 err = nft_delrule(&ctx, rule);
2465 } else if (nla[NFTA_RULE_ID]) { 2481 } else if (nla[NFTA_RULE_ID]) {
2466 rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]); 2482 rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
2467 if (IS_ERR(rule)) 2483 if (IS_ERR(rule)) {
2484 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
2468 return PTR_ERR(rule); 2485 return PTR_ERR(rule);
2486 }
2469 2487
2470 err = nft_delrule(&ctx, rule); 2488 err = nft_delrule(&ctx, rule);
2471 } else { 2489 } else {
@@ -2510,14 +2528,12 @@ void nft_unregister_set(struct nft_set_type *type)
2510EXPORT_SYMBOL_GPL(nft_unregister_set); 2528EXPORT_SYMBOL_GPL(nft_unregister_set);
2511 2529
2512#define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ 2530#define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \
2513 NFT_SET_TIMEOUT | NFT_SET_OBJECT) 2531 NFT_SET_TIMEOUT | NFT_SET_OBJECT | \
2532 NFT_SET_EVAL)
2514 2533
2515static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags) 2534static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
2516{ 2535{
2517 if ((flags & NFT_SET_EVAL) && !ops->update) 2536 return (flags & type->features) == (flags & NFT_SET_FEATURES);
2518 return false;
2519
2520 return (flags & ops->features) == (flags & NFT_SET_FEATURES);
2521} 2537}
2522 2538
2523/* 2539/*
@@ -2554,14 +2570,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
2554 best.space = ~0; 2570 best.space = ~0;
2555 2571
2556 list_for_each_entry(type, &nf_tables_set_types, list) { 2572 list_for_each_entry(type, &nf_tables_set_types, list) {
2557 if (!type->select_ops) 2573 ops = &type->ops;
2558 ops = type->ops;
2559 else
2560 ops = type->select_ops(ctx, desc, flags);
2561 if (!ops)
2562 continue;
2563 2574
2564 if (!nft_set_ops_candidate(ops, flags)) 2575 if (!nft_set_ops_candidate(type, flags))
2565 continue; 2576 continue;
2566 if (!ops->estimate(desc, flags, &est)) 2577 if (!ops->estimate(desc, flags, &est))
2567 continue; 2578 continue;
@@ -2592,7 +2603,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
2592 if (!try_module_get(type->owner)) 2603 if (!try_module_get(type->owner))
2593 continue; 2604 continue;
2594 if (bops != NULL) 2605 if (bops != NULL)
2595 module_put(bops->type->owner); 2606 module_put(to_set_type(bops)->owner);
2596 2607
2597 bops = ops; 2608 bops = ops;
2598 best = est; 2609 best = est;
@@ -2633,6 +2644,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
2633 const struct sk_buff *skb, 2644 const struct sk_buff *skb,
2634 const struct nlmsghdr *nlh, 2645 const struct nlmsghdr *nlh,
2635 const struct nlattr * const nla[], 2646 const struct nlattr * const nla[],
2647 struct netlink_ext_ack *extack,
2636 u8 genmask) 2648 u8 genmask)
2637{ 2649{
2638 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2650 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2640,18 +2652,20 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
2640 struct nft_table *table = NULL; 2652 struct nft_table *table = NULL;
2641 2653
2642 if (nla[NFTA_SET_TABLE] != NULL) { 2654 if (nla[NFTA_SET_TABLE] != NULL) {
2643 table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], 2655 table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
2644 family, genmask); 2656 genmask);
2645 if (IS_ERR(table)) 2657 if (IS_ERR(table)) {
2658 NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
2646 return PTR_ERR(table); 2659 return PTR_ERR(table);
2660 }
2647 } 2661 }
2648 2662
2649 nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); 2663 nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
2650 return 0; 2664 return 0;
2651} 2665}
2652 2666
2653static struct nft_set *nf_tables_set_lookup(const struct nft_table *table, 2667static struct nft_set *nft_set_lookup(const struct nft_table *table,
2654 const struct nlattr *nla, u8 genmask) 2668 const struct nlattr *nla, u8 genmask)
2655{ 2669{
2656 struct nft_set *set; 2670 struct nft_set *set;
2657 2671
@@ -2666,14 +2680,12 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
2666 return ERR_PTR(-ENOENT); 2680 return ERR_PTR(-ENOENT);
2667} 2681}
2668 2682
2669static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table, 2683static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
2670 const struct nlattr *nla, u8 genmask) 2684 const struct nlattr *nla,
2685 u8 genmask)
2671{ 2686{
2672 struct nft_set *set; 2687 struct nft_set *set;
2673 2688
2674 if (nla == NULL)
2675 return ERR_PTR(-EINVAL);
2676
2677 list_for_each_entry(set, &table->sets, list) { 2689 list_for_each_entry(set, &table->sets, list) {
2678 if (be64_to_cpu(nla_get_be64(nla)) == set->handle && 2690 if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
2679 nft_active_genmask(set, genmask)) 2691 nft_active_genmask(set, genmask))
@@ -2682,9 +2694,8 @@ static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *tab
2682 return ERR_PTR(-ENOENT); 2694 return ERR_PTR(-ENOENT);
2683} 2695}
2684 2696
2685static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, 2697static struct nft_set *nft_set_lookup_byid(const struct net *net,
2686 const struct nlattr *nla, 2698 const struct nlattr *nla, u8 genmask)
2687 u8 genmask)
2688{ 2699{
2689 struct nft_trans *trans; 2700 struct nft_trans *trans;
2690 u32 id = ntohl(nla_get_be32(nla)); 2701 u32 id = ntohl(nla_get_be32(nla));
@@ -2708,12 +2719,12 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
2708{ 2719{
2709 struct nft_set *set; 2720 struct nft_set *set;
2710 2721
2711 set = nf_tables_set_lookup(table, nla_set_name, genmask); 2722 set = nft_set_lookup(table, nla_set_name, genmask);
2712 if (IS_ERR(set)) { 2723 if (IS_ERR(set)) {
2713 if (!nla_set_id) 2724 if (!nla_set_id)
2714 return set; 2725 return set;
2715 2726
2716 set = nf_tables_set_lookup_byid(net, nla_set_id, genmask); 2727 set = nft_set_lookup_byid(net, nla_set_id, genmask);
2717 } 2728 }
2718 return set; 2729 return set;
2719} 2730}
@@ -2773,6 +2784,27 @@ cont:
2773 return 0; 2784 return 0;
2774} 2785}
2775 2786
2787static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
2788{
2789 u64 ms = be64_to_cpu(nla_get_be64(nla));
2790 u64 max = (u64)(~((u64)0));
2791
2792 max = div_u64(max, NSEC_PER_MSEC);
2793 if (ms >= max)
2794 return -ERANGE;
2795
2796 ms *= NSEC_PER_MSEC;
2797 *result = nsecs_to_jiffies64(ms);
2798 return 0;
2799}
2800
2801static u64 nf_jiffies64_to_msecs(u64 input)
2802{
2803 u64 ms = jiffies64_to_nsecs(input);
2804
2805 return cpu_to_be64(div_u64(ms, NSEC_PER_MSEC));
2806}
2807
2776static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, 2808static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
2777 const struct nft_set *set, u16 event, u16 flags) 2809 const struct nft_set *set, u16 event, u16 flags)
2778{ 2810{
@@ -2820,7 +2852,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
2820 2852
2821 if (set->timeout && 2853 if (set->timeout &&
2822 nla_put_be64(skb, NFTA_SET_TIMEOUT, 2854 nla_put_be64(skb, NFTA_SET_TIMEOUT,
2823 cpu_to_be64(jiffies_to_msecs(set->timeout)), 2855 nf_jiffies64_to_msecs(set->timeout),
2824 NFTA_SET_PAD)) 2856 NFTA_SET_PAD))
2825 goto nla_put_failure; 2857 goto nla_put_failure;
2826 if (set->gc_int && 2858 if (set->gc_int &&
@@ -2958,7 +2990,8 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
2958 int err; 2990 int err;
2959 2991
2960 /* Verify existence before starting dump */ 2992 /* Verify existence before starting dump */
2961 err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); 2993 err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
2994 genmask);
2962 if (err < 0) 2995 if (err < 0)
2963 return err; 2996 return err;
2964 2997
@@ -2985,7 +3018,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
2985 if (!nla[NFTA_SET_TABLE]) 3018 if (!nla[NFTA_SET_TABLE])
2986 return -EINVAL; 3019 return -EINVAL;
2987 3020
2988 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); 3021 set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
2989 if (IS_ERR(set)) 3022 if (IS_ERR(set))
2990 return PTR_ERR(set); 3023 return PTR_ERR(set);
2991 3024
@@ -3115,8 +3148,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
3115 if (nla[NFTA_SET_TIMEOUT] != NULL) { 3148 if (nla[NFTA_SET_TIMEOUT] != NULL) {
3116 if (!(flags & NFT_SET_TIMEOUT)) 3149 if (!(flags & NFT_SET_TIMEOUT))
3117 return -EINVAL; 3150 return -EINVAL;
3118 timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( 3151
3119 nla[NFTA_SET_TIMEOUT]))); 3152 err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
3153 if (err)
3154 return err;
3120 } 3155 }
3121 gc_int = 0; 3156 gc_int = 0;
3122 if (nla[NFTA_SET_GC_INTERVAL] != NULL) { 3157 if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
@@ -3137,22 +3172,28 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
3137 3172
3138 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 3173 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
3139 3174
3140 table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family, 3175 table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask);
3141 genmask); 3176 if (IS_ERR(table)) {
3142 if (IS_ERR(table)) 3177 NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
3143 return PTR_ERR(table); 3178 return PTR_ERR(table);
3179 }
3144 3180
3145 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); 3181 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
3146 3182
3147 set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); 3183 set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
3148 if (IS_ERR(set)) { 3184 if (IS_ERR(set)) {
3149 if (PTR_ERR(set) != -ENOENT) 3185 if (PTR_ERR(set) != -ENOENT) {
3186 NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
3150 return PTR_ERR(set); 3187 return PTR_ERR(set);
3188 }
3151 } else { 3189 } else {
3152 if (nlh->nlmsg_flags & NLM_F_EXCL) 3190 if (nlh->nlmsg_flags & NLM_F_EXCL) {
3191 NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
3153 return -EEXIST; 3192 return -EEXIST;
3193 }
3154 if (nlh->nlmsg_flags & NLM_F_REPLACE) 3194 if (nlh->nlmsg_flags & NLM_F_REPLACE)
3155 return -EOPNOTSUPP; 3195 return -EOPNOTSUPP;
3196
3156 return 0; 3197 return 0;
3157 } 3198 }
3158 3199
@@ -3229,14 +3270,14 @@ err3:
3229err2: 3270err2:
3230 kvfree(set); 3271 kvfree(set);
3231err1: 3272err1:
3232 module_put(ops->type->owner); 3273 module_put(to_set_type(ops)->owner);
3233 return err; 3274 return err;
3234} 3275}
3235 3276
3236static void nft_set_destroy(struct nft_set *set) 3277static void nft_set_destroy(struct nft_set *set)
3237{ 3278{
3238 set->ops->destroy(set); 3279 set->ops->destroy(set);
3239 module_put(set->ops->type->owner); 3280 module_put(to_set_type(set->ops)->owner);
3240 kfree(set->name); 3281 kfree(set->name);
3241 kvfree(set); 3282 kvfree(set);
3242} 3283}
@@ -3255,6 +3296,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
3255{ 3296{
3256 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 3297 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3257 u8 genmask = nft_genmask_next(net); 3298 u8 genmask = nft_genmask_next(net);
3299 const struct nlattr *attr;
3258 struct nft_set *set; 3300 struct nft_set *set;
3259 struct nft_ctx ctx; 3301 struct nft_ctx ctx;
3260 int err; 3302 int err;
@@ -3264,20 +3306,28 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
3264 if (nla[NFTA_SET_TABLE] == NULL) 3306 if (nla[NFTA_SET_TABLE] == NULL)
3265 return -EINVAL; 3307 return -EINVAL;
3266 3308
3267 err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); 3309 err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
3310 genmask);
3268 if (err < 0) 3311 if (err < 0)
3269 return err; 3312 return err;
3270 3313
3271 if (nla[NFTA_SET_HANDLE]) 3314 if (nla[NFTA_SET_HANDLE]) {
3272 set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask); 3315 attr = nla[NFTA_SET_HANDLE];
3273 else 3316 set = nft_set_lookup_byhandle(ctx.table, attr, genmask);
3274 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); 3317 } else {
3275 if (IS_ERR(set)) 3318 attr = nla[NFTA_SET_NAME];
3276 return PTR_ERR(set); 3319 set = nft_set_lookup(ctx.table, attr, genmask);
3320 }
3277 3321
3322 if (IS_ERR(set)) {
3323 NL_SET_BAD_ATTR(extack, attr);
3324 return PTR_ERR(set);
3325 }
3278 if (!list_empty(&set->bindings) || 3326 if (!list_empty(&set->bindings) ||
3279 (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) 3327 (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
3328 NL_SET_BAD_ATTR(extack, attr);
3280 return -EBUSY; 3329 return -EBUSY;
3330 }
3281 3331
3282 return nft_delset(&ctx, set); 3332 return nft_delset(&ctx, set);
3283} 3333}
@@ -3367,8 +3417,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
3367 .align = __alignof__(u64), 3417 .align = __alignof__(u64),
3368 }, 3418 },
3369 [NFT_SET_EXT_EXPIRATION] = { 3419 [NFT_SET_EXT_EXPIRATION] = {
3370 .len = sizeof(unsigned long), 3420 .len = sizeof(u64),
3371 .align = __alignof__(unsigned long), 3421 .align = __alignof__(u64),
3372 }, 3422 },
3373 [NFT_SET_EXT_USERDATA] = { 3423 [NFT_SET_EXT_USERDATA] = {
3374 .len = sizeof(struct nft_userdata), 3424 .len = sizeof(struct nft_userdata),
@@ -3405,16 +3455,19 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
3405 const struct sk_buff *skb, 3455 const struct sk_buff *skb,
3406 const struct nlmsghdr *nlh, 3456 const struct nlmsghdr *nlh,
3407 const struct nlattr * const nla[], 3457 const struct nlattr * const nla[],
3458 struct netlink_ext_ack *extack,
3408 u8 genmask) 3459 u8 genmask)
3409{ 3460{
3410 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 3461 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3411 int family = nfmsg->nfgen_family; 3462 int family = nfmsg->nfgen_family;
3412 struct nft_table *table; 3463 struct nft_table *table;
3413 3464
3414 table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], 3465 table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
3415 family, genmask); 3466 genmask);
3416 if (IS_ERR(table)) 3467 if (IS_ERR(table)) {
3468 NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
3417 return PTR_ERR(table); 3469 return PTR_ERR(table);
3470 }
3418 3471
3419 nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); 3472 nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
3420 return 0; 3473 return 0;
@@ -3458,22 +3511,21 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
3458 3511
3459 if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && 3512 if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
3460 nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, 3513 nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
3461 cpu_to_be64(jiffies_to_msecs( 3514 nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
3462 *nft_set_ext_timeout(ext))),
3463 NFTA_SET_ELEM_PAD)) 3515 NFTA_SET_ELEM_PAD))
3464 goto nla_put_failure; 3516 goto nla_put_failure;
3465 3517
3466 if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { 3518 if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
3467 unsigned long expires, now = jiffies; 3519 u64 expires, now = get_jiffies_64();
3468 3520
3469 expires = *nft_set_ext_expiration(ext); 3521 expires = *nft_set_ext_expiration(ext);
3470 if (time_before(now, expires)) 3522 if (time_before64(now, expires))
3471 expires -= now; 3523 expires -= now;
3472 else 3524 else
3473 expires = 0; 3525 expires = 0;
3474 3526
3475 if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, 3527 if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
3476 cpu_to_be64(jiffies_to_msecs(expires)), 3528 nf_jiffies64_to_msecs(expires),
3477 NFTA_SET_ELEM_PAD)) 3529 NFTA_SET_ELEM_PAD))
3478 goto nla_put_failure; 3530 goto nla_put_failure;
3479 } 3531 }
@@ -3744,12 +3796,12 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
3744 struct nft_ctx ctx; 3796 struct nft_ctx ctx;
3745 int rem, err = 0; 3797 int rem, err = 0;
3746 3798
3747 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); 3799 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
3800 genmask);
3748 if (err < 0) 3801 if (err < 0)
3749 return err; 3802 return err;
3750 3803
3751 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], 3804 set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
3752 genmask);
3753 if (IS_ERR(set)) 3805 if (IS_ERR(set))
3754 return PTR_ERR(set); 3806 return PTR_ERR(set);
3755 3807
@@ -3848,7 +3900,7 @@ void *nft_set_elem_init(const struct nft_set *set,
3848 memcpy(nft_set_ext_data(ext), data, set->dlen); 3900 memcpy(nft_set_ext_data(ext), data, set->dlen);
3849 if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) 3901 if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
3850 *nft_set_ext_expiration(ext) = 3902 *nft_set_ext_expiration(ext) =
3851 jiffies + timeout; 3903 get_jiffies_64() + timeout;
3852 if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) 3904 if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
3853 *nft_set_ext_timeout(ext) = timeout; 3905 *nft_set_ext_timeout(ext) = timeout;
3854 3906
@@ -3935,8 +3987,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3935 if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { 3987 if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
3936 if (!(set->flags & NFT_SET_TIMEOUT)) 3988 if (!(set->flags & NFT_SET_TIMEOUT))
3937 return -EINVAL; 3989 return -EINVAL;
3938 timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( 3990 err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT],
3939 nla[NFTA_SET_ELEM_TIMEOUT]))); 3991 &timeout);
3992 if (err)
3993 return err;
3940 } else if (set->flags & NFT_SET_TIMEOUT) { 3994 } else if (set->flags & NFT_SET_TIMEOUT) {
3941 timeout = set->timeout; 3995 timeout = set->timeout;
3942 } 3996 }
@@ -3961,8 +4015,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3961 err = -EINVAL; 4015 err = -EINVAL;
3962 goto err2; 4016 goto err2;
3963 } 4017 }
3964 obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], 4018 obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
3965 set->objtype, genmask); 4019 set->objtype, genmask);
3966 if (IS_ERR(obj)) { 4020 if (IS_ERR(obj)) {
3967 err = PTR_ERR(obj); 4021 err = PTR_ERR(obj);
3968 goto err2; 4022 goto err2;
@@ -4099,7 +4153,8 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
4099 if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) 4153 if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
4100 return -EINVAL; 4154 return -EINVAL;
4101 4155
4102 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); 4156 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
4157 genmask);
4103 if (err < 0) 4158 if (err < 0)
4104 return err; 4159 return err;
4105 4160
@@ -4287,12 +4342,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
4287 struct nft_ctx ctx; 4342 struct nft_ctx ctx;
4288 int rem, err = 0; 4343 int rem, err = 0;
4289 4344
4290 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); 4345 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
4346 genmask);
4291 if (err < 0) 4347 if (err < 0)
4292 return err; 4348 return err;
4293 4349
4294 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], 4350 set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
4295 genmask);
4296 if (IS_ERR(set)) 4351 if (IS_ERR(set))
4297 return PTR_ERR(set); 4352 return PTR_ERR(set);
4298 if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) 4353 if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
@@ -4380,9 +4435,9 @@ void nft_unregister_obj(struct nft_object_type *obj_type)
4380} 4435}
4381EXPORT_SYMBOL_GPL(nft_unregister_obj); 4436EXPORT_SYMBOL_GPL(nft_unregister_obj);
4382 4437
4383struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, 4438struct nft_object *nft_obj_lookup(const struct nft_table *table,
4384 const struct nlattr *nla, 4439 const struct nlattr *nla, u32 objtype,
4385 u32 objtype, u8 genmask) 4440 u8 genmask)
4386{ 4441{
4387 struct nft_object *obj; 4442 struct nft_object *obj;
4388 4443
@@ -4394,11 +4449,11 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
4394 } 4449 }
4395 return ERR_PTR(-ENOENT); 4450 return ERR_PTR(-ENOENT);
4396} 4451}
4397EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); 4452EXPORT_SYMBOL_GPL(nft_obj_lookup);
4398 4453
4399static struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table, 4454static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table,
4400 const struct nlattr *nla, 4455 const struct nlattr *nla,
4401 u32 objtype, u8 genmask) 4456 u32 objtype, u8 genmask)
4402{ 4457{
4403 struct nft_object *obj; 4458 struct nft_object *obj;
4404 4459
@@ -4542,22 +4597,25 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
4542 !nla[NFTA_OBJ_DATA]) 4597 !nla[NFTA_OBJ_DATA])
4543 return -EINVAL; 4598 return -EINVAL;
4544 4599
4545 table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, 4600 table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
4546 genmask); 4601 if (IS_ERR(table)) {
4547 if (IS_ERR(table)) 4602 NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
4548 return PTR_ERR(table); 4603 return PTR_ERR(table);
4604 }
4549 4605
4550 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); 4606 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
4551 obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); 4607 obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
4552 if (IS_ERR(obj)) { 4608 if (IS_ERR(obj)) {
4553 err = PTR_ERR(obj); 4609 err = PTR_ERR(obj);
4554 if (err != -ENOENT) 4610 if (err != -ENOENT) {
4611 NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
4555 return err; 4612 return err;
4556 4613 }
4557 } else { 4614 } else {
4558 if (nlh->nlmsg_flags & NLM_F_EXCL) 4615 if (nlh->nlmsg_flags & NLM_F_EXCL) {
4616 NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
4559 return -EEXIST; 4617 return -EEXIST;
4560 4618 }
4561 return 0; 4619 return 0;
4562 } 4620 }
4563 4621
@@ -4768,15 +4826,18 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
4768 !nla[NFTA_OBJ_TYPE]) 4826 !nla[NFTA_OBJ_TYPE])
4769 return -EINVAL; 4827 return -EINVAL;
4770 4828
4771 table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, 4829 table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
4772 genmask); 4830 if (IS_ERR(table)) {
4773 if (IS_ERR(table)) 4831 NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
4774 return PTR_ERR(table); 4832 return PTR_ERR(table);
4833 }
4775 4834
4776 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); 4835 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
4777 obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); 4836 obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
4778 if (IS_ERR(obj)) 4837 if (IS_ERR(obj)) {
4838 NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
4779 return PTR_ERR(obj); 4839 return PTR_ERR(obj);
4840 }
4780 4841
4781 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4842 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4782 if (!skb2) 4843 if (!skb2)
@@ -4815,6 +4876,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
4815 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 4876 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
4816 u8 genmask = nft_genmask_next(net); 4877 u8 genmask = nft_genmask_next(net);
4817 int family = nfmsg->nfgen_family; 4878 int family = nfmsg->nfgen_family;
4879 const struct nlattr *attr;
4818 struct nft_table *table; 4880 struct nft_table *table;
4819 struct nft_object *obj; 4881 struct nft_object *obj;
4820 struct nft_ctx ctx; 4882 struct nft_ctx ctx;
@@ -4824,22 +4886,29 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
4824 (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE])) 4886 (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
4825 return -EINVAL; 4887 return -EINVAL;
4826 4888
4827 table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, 4889 table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
4828 genmask); 4890 if (IS_ERR(table)) {
4829 if (IS_ERR(table)) 4891 NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
4830 return PTR_ERR(table); 4892 return PTR_ERR(table);
4893 }
4831 4894
4832 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); 4895 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
4833 if (nla[NFTA_OBJ_HANDLE]) 4896 if (nla[NFTA_OBJ_HANDLE]) {
4834 obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE], 4897 attr = nla[NFTA_OBJ_HANDLE];
4835 objtype, genmask); 4898 obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask);
4836 else 4899 } else {
4837 obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], 4900 attr = nla[NFTA_OBJ_NAME];
4838 objtype, genmask); 4901 obj = nft_obj_lookup(table, attr, objtype, genmask);
4839 if (IS_ERR(obj)) 4902 }
4903
4904 if (IS_ERR(obj)) {
4905 NL_SET_BAD_ATTR(extack, attr);
4840 return PTR_ERR(obj); 4906 return PTR_ERR(obj);
4841 if (obj->use > 0) 4907 }
4908 if (obj->use > 0) {
4909 NL_SET_BAD_ATTR(extack, attr);
4842 return -EBUSY; 4910 return -EBUSY;
4911 }
4843 4912
4844 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); 4913 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
4845 4914
@@ -4910,9 +4979,8 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
4910 [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, 4979 [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
4911}; 4980};
4912 4981
4913struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, 4982struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
4914 const struct nlattr *nla, 4983 const struct nlattr *nla, u8 genmask)
4915 u8 genmask)
4916{ 4984{
4917 struct nft_flowtable *flowtable; 4985 struct nft_flowtable *flowtable;
4918 4986
@@ -4923,11 +4991,11 @@ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
4923 } 4991 }
4924 return ERR_PTR(-ENOENT); 4992 return ERR_PTR(-ENOENT);
4925} 4993}
4926EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup); 4994EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
4927 4995
4928static struct nft_flowtable * 4996static struct nft_flowtable *
4929nf_tables_flowtable_lookup_byhandle(const struct nft_table *table, 4997nft_flowtable_lookup_byhandle(const struct nft_table *table,
4930 const struct nlattr *nla, u8 genmask) 4998 const struct nlattr *nla, u8 genmask)
4931{ 4999{
4932 struct nft_flowtable *flowtable; 5000 struct nft_flowtable *flowtable;
4933 5001
@@ -5026,7 +5094,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
5026 flowtable->ops[i].pf = NFPROTO_NETDEV; 5094 flowtable->ops[i].pf = NFPROTO_NETDEV;
5027 flowtable->ops[i].hooknum = hooknum; 5095 flowtable->ops[i].hooknum = hooknum;
5028 flowtable->ops[i].priority = priority; 5096 flowtable->ops[i].priority = priority;
5029 flowtable->ops[i].priv = &flowtable->data.rhashtable; 5097 flowtable->ops[i].priv = &flowtable->data;
5030 flowtable->ops[i].hook = flowtable->data.type->hook; 5098 flowtable->ops[i].hook = flowtable->data.type->hook;
5031 flowtable->ops[i].dev = dev_array[i]; 5099 flowtable->ops[i].dev = dev_array[i];
5032 flowtable->dev_name[i] = kstrdup(dev_array[i]->name, 5100 flowtable->dev_name[i] = kstrdup(dev_array[i]->name,
@@ -5067,23 +5135,6 @@ static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
5067 return ERR_PTR(-ENOENT); 5135 return ERR_PTR(-ENOENT);
5068} 5136}
5069 5137
5070void nft_flow_table_iterate(struct net *net,
5071 void (*iter)(struct nf_flowtable *flowtable, void *data),
5072 void *data)
5073{
5074 struct nft_flowtable *flowtable;
5075 const struct nft_table *table;
5076
5077 nfnl_lock(NFNL_SUBSYS_NFTABLES);
5078 list_for_each_entry(table, &net->nft.tables, list) {
5079 list_for_each_entry(flowtable, &table->flowtables, list) {
5080 iter(&flowtable->data, data);
5081 }
5082 }
5083 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
5084}
5085EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
5086
5087static void nft_unregister_flowtable_net_hooks(struct net *net, 5138static void nft_unregister_flowtable_net_hooks(struct net *net,
5088 struct nft_flowtable *flowtable) 5139 struct nft_flowtable *flowtable)
5089{ 5140{
@@ -5117,20 +5168,26 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
5117 !nla[NFTA_FLOWTABLE_HOOK]) 5168 !nla[NFTA_FLOWTABLE_HOOK])
5118 return -EINVAL; 5169 return -EINVAL;
5119 5170
5120 table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], 5171 table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
5121 family, genmask); 5172 genmask);
5122 if (IS_ERR(table)) 5173 if (IS_ERR(table)) {
5174 NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
5123 return PTR_ERR(table); 5175 return PTR_ERR(table);
5176 }
5124 5177
5125 flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], 5178 flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
5126 genmask); 5179 genmask);
5127 if (IS_ERR(flowtable)) { 5180 if (IS_ERR(flowtable)) {
5128 err = PTR_ERR(flowtable); 5181 err = PTR_ERR(flowtable);
5129 if (err != -ENOENT) 5182 if (err != -ENOENT) {
5183 NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
5130 return err; 5184 return err;
5185 }
5131 } else { 5186 } else {
5132 if (nlh->nlmsg_flags & NLM_F_EXCL) 5187 if (nlh->nlmsg_flags & NLM_F_EXCL) {
5188 NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
5133 return -EEXIST; 5189 return -EEXIST;
5190 }
5134 5191
5135 return 0; 5192 return 0;
5136 } 5193 }
@@ -5157,14 +5214,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
5157 } 5214 }
5158 5215
5159 flowtable->data.type = type; 5216 flowtable->data.type = type;
5160 err = rhashtable_init(&flowtable->data.rhashtable, type->params); 5217 err = type->init(&flowtable->data);
5161 if (err < 0) 5218 if (err < 0)
5162 goto err3; 5219 goto err3;
5163 5220
5164 err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], 5221 err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
5165 flowtable); 5222 flowtable);
5166 if (err < 0) 5223 if (err < 0)
5167 goto err3; 5224 goto err4;
5168 5225
5169 for (i = 0; i < flowtable->ops_len; i++) { 5226 for (i = 0; i < flowtable->ops_len; i++) {
5170 if (!flowtable->ops[i].dev) 5227 if (!flowtable->ops[i].dev)
@@ -5178,37 +5235,35 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
5178 if (flowtable->ops[i].dev == ft->ops[k].dev && 5235 if (flowtable->ops[i].dev == ft->ops[k].dev &&
5179 flowtable->ops[i].pf == ft->ops[k].pf) { 5236 flowtable->ops[i].pf == ft->ops[k].pf) {
5180 err = -EBUSY; 5237 err = -EBUSY;
5181 goto err4; 5238 goto err5;
5182 } 5239 }
5183 } 5240 }
5184 } 5241 }
5185 5242
5186 err = nf_register_net_hook(net, &flowtable->ops[i]); 5243 err = nf_register_net_hook(net, &flowtable->ops[i]);
5187 if (err < 0) 5244 if (err < 0)
5188 goto err4; 5245 goto err5;
5189 } 5246 }
5190 5247
5191 err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); 5248 err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
5192 if (err < 0) 5249 if (err < 0)
5193 goto err5; 5250 goto err6;
5194
5195 INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
5196 queue_delayed_work(system_power_efficient_wq,
5197 &flowtable->data.gc_work, HZ);
5198 5251
5199 list_add_tail_rcu(&flowtable->list, &table->flowtables); 5252 list_add_tail_rcu(&flowtable->list, &table->flowtables);
5200 table->use++; 5253 table->use++;
5201 5254
5202 return 0; 5255 return 0;
5203err5: 5256err6:
5204 i = flowtable->ops_len; 5257 i = flowtable->ops_len;
5205err4: 5258err5:
5206 for (k = i - 1; k >= 0; k--) { 5259 for (k = i - 1; k >= 0; k--) {
5207 kfree(flowtable->dev_name[k]); 5260 kfree(flowtable->dev_name[k]);
5208 nf_unregister_net_hook(net, &flowtable->ops[k]); 5261 nf_unregister_net_hook(net, &flowtable->ops[k]);
5209 } 5262 }
5210 5263
5211 kfree(flowtable->ops); 5264 kfree(flowtable->ops);
5265err4:
5266 flowtable->data.type->free(&flowtable->data);
5212err3: 5267err3:
5213 module_put(type->owner); 5268 module_put(type->owner);
5214err2: 5269err2:
@@ -5228,6 +5283,7 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
5228 u8 genmask = nft_genmask_next(net); 5283 u8 genmask = nft_genmask_next(net);
5229 int family = nfmsg->nfgen_family; 5284 int family = nfmsg->nfgen_family;
5230 struct nft_flowtable *flowtable; 5285 struct nft_flowtable *flowtable;
5286 const struct nlattr *attr;
5231 struct nft_table *table; 5287 struct nft_table *table;
5232 struct nft_ctx ctx; 5288 struct nft_ctx ctx;
5233 5289
@@ -5236,23 +5292,29 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
5236 !nla[NFTA_FLOWTABLE_HANDLE])) 5292 !nla[NFTA_FLOWTABLE_HANDLE]))
5237 return -EINVAL; 5293 return -EINVAL;
5238 5294
5239 table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], 5295 table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
5240 family, genmask); 5296 genmask);
5241 if (IS_ERR(table)) 5297 if (IS_ERR(table)) {
5298 NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
5242 return PTR_ERR(table); 5299 return PTR_ERR(table);
5300 }
5243 5301
5244 if (nla[NFTA_FLOWTABLE_HANDLE]) 5302 if (nla[NFTA_FLOWTABLE_HANDLE]) {
5245 flowtable = nf_tables_flowtable_lookup_byhandle(table, 5303 attr = nla[NFTA_FLOWTABLE_HANDLE];
5246 nla[NFTA_FLOWTABLE_HANDLE], 5304 flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask);
5247 genmask); 5305 } else {
5248 else 5306 attr = nla[NFTA_FLOWTABLE_NAME];
5249 flowtable = nf_tables_flowtable_lookup(table, 5307 flowtable = nft_flowtable_lookup(table, attr, genmask);
5250 nla[NFTA_FLOWTABLE_NAME], 5308 }
5251 genmask); 5309
5252 if (IS_ERR(flowtable)) 5310 if (IS_ERR(flowtable)) {
5253 return PTR_ERR(flowtable); 5311 NL_SET_BAD_ATTR(extack, attr);
5254 if (flowtable->use > 0) 5312 return PTR_ERR(flowtable);
5313 }
5314 if (flowtable->use > 0) {
5315 NL_SET_BAD_ATTR(extack, attr);
5255 return -EBUSY; 5316 return -EBUSY;
5317 }
5256 5318
5257 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); 5319 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
5258 5320
@@ -5433,13 +5495,13 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
5433 if (!nla[NFTA_FLOWTABLE_NAME]) 5495 if (!nla[NFTA_FLOWTABLE_NAME])
5434 return -EINVAL; 5496 return -EINVAL;
5435 5497
5436 table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], 5498 table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
5437 family, genmask); 5499 genmask);
5438 if (IS_ERR(table)) 5500 if (IS_ERR(table))
5439 return PTR_ERR(table); 5501 return PTR_ERR(table);
5440 5502
5441 flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], 5503 flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
5442 genmask); 5504 genmask);
5443 if (IS_ERR(flowtable)) 5505 if (IS_ERR(flowtable))
5444 return PTR_ERR(flowtable); 5506 return PTR_ERR(flowtable);
5445 5507
@@ -5492,11 +5554,9 @@ err:
5492 5554
5493static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) 5555static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
5494{ 5556{
5495 cancel_delayed_work_sync(&flowtable->data.gc_work);
5496 kfree(flowtable->ops); 5557 kfree(flowtable->ops);
5497 kfree(flowtable->name); 5558 kfree(flowtable->name);
5498 flowtable->data.type->free(&flowtable->data); 5559 flowtable->data.type->free(&flowtable->data);
5499 rhashtable_destroy(&flowtable->data.rhashtable);
5500 module_put(flowtable->data.type->owner); 5560 module_put(flowtable->data.type->owner);
5501} 5561}
5502 5562
@@ -6410,8 +6470,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
6410 case NFT_GOTO: 6470 case NFT_GOTO:
6411 if (!tb[NFTA_VERDICT_CHAIN]) 6471 if (!tb[NFTA_VERDICT_CHAIN])
6412 return -EINVAL; 6472 return -EINVAL;
6413 chain = nf_tables_chain_lookup(ctx->table, 6473 chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN],
6414 tb[NFTA_VERDICT_CHAIN], genmask); 6474 genmask);
6415 if (IS_ERR(chain)) 6475 if (IS_ERR(chain))
6416 return PTR_ERR(chain); 6476 return PTR_ERR(chain);
6417 if (nft_is_base_chain(chain)) 6477 if (nft_is_base_chain(chain))
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index dfd0bf3810d2..9cf47c4cb9d5 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -251,6 +251,9 @@ static struct nft_expr_type *nft_basic_types[] = {
251 &nft_payload_type, 251 &nft_payload_type,
252 &nft_dynset_type, 252 &nft_dynset_type,
253 &nft_range_type, 253 &nft_range_type,
254 &nft_meta_type,
255 &nft_rt_type,
256 &nft_exthdr_type,
254}; 257};
255 258
256int __init nf_tables_core_module_init(void) 259int __init nf_tables_core_module_init(void)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 7b46aa4c478d..e5cc4d9b9ce7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -37,7 +37,6 @@
37#include <net/sock.h> 37#include <net/sock.h>
38#include <net/netfilter/nf_log.h> 38#include <net/netfilter/nf_log.h>
39#include <net/netns/generic.h> 39#include <net/netns/generic.h>
40#include <net/netfilter/nfnetlink_log.h>
41 40
42#include <linux/atomic.h> 41#include <linux/atomic.h>
43#include <linux/refcount.h> 42#include <linux/refcount.h>
@@ -47,6 +46,7 @@
47#include "../bridge/br_private.h" 46#include "../bridge/br_private.h"
48#endif 47#endif
49 48
49#define NFULNL_COPY_DISABLED 0xff
50#define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE 50#define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE
51#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ 51#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */
52#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ 52#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */
@@ -618,7 +618,7 @@ static const struct nf_loginfo default_loginfo = {
618}; 618};
619 619
620/* log handler for internal netfilter logging api */ 620/* log handler for internal netfilter logging api */
621void 621static void
622nfulnl_log_packet(struct net *net, 622nfulnl_log_packet(struct net *net,
623 u_int8_t pf, 623 u_int8_t pf,
624 unsigned int hooknum, 624 unsigned int hooknum,
@@ -633,7 +633,7 @@ nfulnl_log_packet(struct net *net,
633 struct nfulnl_instance *inst; 633 struct nfulnl_instance *inst;
634 const struct nf_loginfo *li; 634 const struct nf_loginfo *li;
635 unsigned int qthreshold; 635 unsigned int qthreshold;
636 unsigned int plen; 636 unsigned int plen = 0;
637 struct nfnl_log_net *log = nfnl_log_pernet(net); 637 struct nfnl_log_net *log = nfnl_log_pernet(net);
638 const struct nfnl_ct_hook *nfnl_ct = NULL; 638 const struct nfnl_ct_hook *nfnl_ct = NULL;
639 struct nf_conn *ct = NULL; 639 struct nf_conn *ct = NULL;
@@ -648,7 +648,6 @@ nfulnl_log_packet(struct net *net,
648 if (!inst) 648 if (!inst)
649 return; 649 return;
650 650
651 plen = 0;
652 if (prefix) 651 if (prefix)
653 plen = strlen(prefix) + 1; 652 plen = strlen(prefix) + 1;
654 653
@@ -760,7 +759,6 @@ alloc_failure:
760 /* FIXME: statistics */ 759 /* FIXME: statistics */
761 goto unlock_and_release; 760 goto unlock_and_release;
762} 761}
763EXPORT_SYMBOL_GPL(nfulnl_log_packet);
764 762
765static int 763static int
766nfulnl_rcv_nl_event(struct notifier_block *this, 764nfulnl_rcv_nl_event(struct notifier_block *this,
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 04863fad05dd..b07a3fd9eeea 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -36,7 +36,7 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
36 u64 timeout; 36 u64 timeout;
37 void *elem; 37 void *elem;
38 38
39 if (set->size && !atomic_add_unless(&set->nelems, 1, set->size)) 39 if (!atomic_add_unless(&set->nelems, 1, set->size))
40 return NULL; 40 return NULL;
41 41
42 timeout = priv->timeout ? : set->timeout; 42 timeout = priv->timeout ? : set->timeout;
@@ -81,7 +81,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
81 if (priv->op == NFT_DYNSET_OP_UPDATE && 81 if (priv->op == NFT_DYNSET_OP_UPDATE &&
82 nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { 82 nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
83 timeout = priv->timeout ? : set->timeout; 83 timeout = priv->timeout ? : set->timeout;
84 *nft_set_ext_expiration(ext) = jiffies + timeout; 84 *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout;
85 } 85 }
86 86
87 if (sexpr != NULL) 87 if (sexpr != NULL)
@@ -216,6 +216,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
216 if (err < 0) 216 if (err < 0)
217 goto err1; 217 goto err1;
218 218
219 if (set->size == 0)
220 set->size = 0xffff;
221
219 priv->set = set; 222 priv->set = set;
220 return 0; 223 return 0;
221 224
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 47ec1046ad11..a940c9fd9045 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -10,11 +10,10 @@
10 10
11#include <asm/unaligned.h> 11#include <asm/unaligned.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/netlink.h> 13#include <linux/netlink.h>
16#include <linux/netfilter.h> 14#include <linux/netfilter.h>
17#include <linux/netfilter/nf_tables.h> 15#include <linux/netfilter/nf_tables.h>
16#include <net/netfilter/nf_tables_core.h>
18#include <net/netfilter/nf_tables.h> 17#include <net/netfilter/nf_tables.h>
19#include <net/tcp.h> 18#include <net/tcp.h>
20 19
@@ -353,7 +352,6 @@ static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
353 return nft_exthdr_dump_common(skb, priv); 352 return nft_exthdr_dump_common(skb, priv);
354} 353}
355 354
356static struct nft_expr_type nft_exthdr_type;
357static const struct nft_expr_ops nft_exthdr_ipv6_ops = { 355static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
358 .type = &nft_exthdr_type, 356 .type = &nft_exthdr_type,
359 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), 357 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@@ -407,27 +405,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
407 return ERR_PTR(-EOPNOTSUPP); 405 return ERR_PTR(-EOPNOTSUPP);
408} 406}
409 407
410static struct nft_expr_type nft_exthdr_type __read_mostly = { 408struct nft_expr_type nft_exthdr_type __read_mostly = {
411 .name = "exthdr", 409 .name = "exthdr",
412 .select_ops = nft_exthdr_select_ops, 410 .select_ops = nft_exthdr_select_ops,
413 .policy = nft_exthdr_policy, 411 .policy = nft_exthdr_policy,
414 .maxattr = NFTA_EXTHDR_MAX, 412 .maxattr = NFTA_EXTHDR_MAX,
415 .owner = THIS_MODULE, 413 .owner = THIS_MODULE,
416}; 414};
417
418static int __init nft_exthdr_module_init(void)
419{
420 return nft_register_expr(&nft_exthdr_type);
421}
422
423static void __exit nft_exthdr_module_exit(void)
424{
425 nft_unregister_expr(&nft_exthdr_type);
426}
427
428module_init(nft_exthdr_module_init);
429module_exit(nft_exthdr_module_exit);
430
431MODULE_LICENSE("GPL");
432MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
433MODULE_ALIAS_NFT_EXPR("exthdr");
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index b65829b2be22..d6bab8c3cbb0 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -142,9 +142,8 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
142 if (!tb[NFTA_FLOW_TABLE_NAME]) 142 if (!tb[NFTA_FLOW_TABLE_NAME])
143 return -EINVAL; 143 return -EINVAL;
144 144
145 flowtable = nf_tables_flowtable_lookup(ctx->table, 145 flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
146 tb[NFTA_FLOW_TABLE_NAME], 146 genmask);
147 genmask);
148 if (IS_ERR(flowtable)) 147 if (IS_ERR(flowtable))
149 return PTR_ERR(flowtable); 148 return PTR_ERR(flowtable);
150 149
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 24f2f7567ddb..e235c17f1b8b 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -97,7 +97,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
97 priv->len = len; 97 priv->len = len;
98 98
99 priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); 99 priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
100 if (priv->modulus <= 1) 100 if (priv->modulus < 1)
101 return -ERANGE; 101 return -ERANGE;
102 102
103 if (priv->offset + priv->modulus - 1 < priv->offset) 103 if (priv->offset + priv->modulus - 1 < priv->offset)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 8fb91940e2e7..5348bd058c88 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -1,5 +1,7 @@
1/* 1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> 2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2014 Intel Corporation
4 * Author: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
3 * 5 *
4 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -9,8 +11,6 @@
9 */ 11 */
10 12
11#include <linux/kernel.h> 13#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h> 14#include <linux/netlink.h>
15#include <linux/netfilter.h> 15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h> 16#include <linux/netfilter/nf_tables.h>
@@ -24,21 +24,35 @@
24#include <net/tcp_states.h> /* for TCP_TIME_WAIT */ 24#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
25#include <net/netfilter/nf_tables.h> 25#include <net/netfilter/nf_tables.h>
26#include <net/netfilter/nf_tables_core.h> 26#include <net/netfilter/nf_tables_core.h>
27#include <net/netfilter/nft_meta.h>
28 27
29#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ 28#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
30 29
30struct nft_meta {
31 enum nft_meta_keys key:8;
32 union {
33 enum nft_registers dreg:8;
34 enum nft_registers sreg:8;
35 };
36};
37
31static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); 38static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
32 39
33void nft_meta_get_eval(const struct nft_expr *expr, 40#ifdef CONFIG_NF_TABLES_BRIDGE
34 struct nft_regs *regs, 41#include "../bridge/br_private.h"
35 const struct nft_pktinfo *pkt) 42#endif
43
44static void nft_meta_get_eval(const struct nft_expr *expr,
45 struct nft_regs *regs,
46 const struct nft_pktinfo *pkt)
36{ 47{
37 const struct nft_meta *priv = nft_expr_priv(expr); 48 const struct nft_meta *priv = nft_expr_priv(expr);
38 const struct sk_buff *skb = pkt->skb; 49 const struct sk_buff *skb = pkt->skb;
39 const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); 50 const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
40 struct sock *sk; 51 struct sock *sk;
41 u32 *dest = &regs->data[priv->dreg]; 52 u32 *dest = &regs->data[priv->dreg];
53#ifdef CONFIG_NF_TABLES_BRIDGE
54 const struct net_bridge_port *p;
55#endif
42 56
43 switch (priv->key) { 57 switch (priv->key) {
44 case NFT_META_LEN: 58 case NFT_META_LEN:
@@ -215,6 +229,18 @@ void nft_meta_get_eval(const struct nft_expr *expr,
215 nft_reg_store8(dest, !!skb->sp); 229 nft_reg_store8(dest, !!skb->sp);
216 break; 230 break;
217#endif 231#endif
232#ifdef CONFIG_NF_TABLES_BRIDGE
233 case NFT_META_BRI_IIFNAME:
234 if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
235 goto err;
236 strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
237 return;
238 case NFT_META_BRI_OIFNAME:
239 if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
240 goto err;
241 strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
242 return;
243#endif
218 default: 244 default:
219 WARN_ON(1); 245 WARN_ON(1);
220 goto err; 246 goto err;
@@ -224,11 +250,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
224err: 250err:
225 regs->verdict.code = NFT_BREAK; 251 regs->verdict.code = NFT_BREAK;
226} 252}
227EXPORT_SYMBOL_GPL(nft_meta_get_eval);
228 253
229void nft_meta_set_eval(const struct nft_expr *expr, 254static void nft_meta_set_eval(const struct nft_expr *expr,
230 struct nft_regs *regs, 255 struct nft_regs *regs,
231 const struct nft_pktinfo *pkt) 256 const struct nft_pktinfo *pkt)
232{ 257{
233 const struct nft_meta *meta = nft_expr_priv(expr); 258 const struct nft_meta *meta = nft_expr_priv(expr);
234 struct sk_buff *skb = pkt->skb; 259 struct sk_buff *skb = pkt->skb;
@@ -258,18 +283,16 @@ void nft_meta_set_eval(const struct nft_expr *expr,
258 WARN_ON(1); 283 WARN_ON(1);
259 } 284 }
260} 285}
261EXPORT_SYMBOL_GPL(nft_meta_set_eval);
262 286
263const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { 287static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
264 [NFTA_META_DREG] = { .type = NLA_U32 }, 288 [NFTA_META_DREG] = { .type = NLA_U32 },
265 [NFTA_META_KEY] = { .type = NLA_U32 }, 289 [NFTA_META_KEY] = { .type = NLA_U32 },
266 [NFTA_META_SREG] = { .type = NLA_U32 }, 290 [NFTA_META_SREG] = { .type = NLA_U32 },
267}; 291};
268EXPORT_SYMBOL_GPL(nft_meta_policy);
269 292
270int nft_meta_get_init(const struct nft_ctx *ctx, 293static int nft_meta_get_init(const struct nft_ctx *ctx,
271 const struct nft_expr *expr, 294 const struct nft_expr *expr,
272 const struct nlattr * const tb[]) 295 const struct nlattr * const tb[])
273{ 296{
274 struct nft_meta *priv = nft_expr_priv(expr); 297 struct nft_meta *priv = nft_expr_priv(expr);
275 unsigned int len; 298 unsigned int len;
@@ -318,6 +341,14 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
318 len = sizeof(u8); 341 len = sizeof(u8);
319 break; 342 break;
320#endif 343#endif
344#ifdef CONFIG_NF_TABLES_BRIDGE
345 case NFT_META_BRI_IIFNAME:
346 case NFT_META_BRI_OIFNAME:
347 if (ctx->family != NFPROTO_BRIDGE)
348 return -EOPNOTSUPP;
349 len = IFNAMSIZ;
350 break;
351#endif
321 default: 352 default:
322 return -EOPNOTSUPP; 353 return -EOPNOTSUPP;
323 } 354 }
@@ -326,7 +357,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
326 return nft_validate_register_store(ctx, priv->dreg, NULL, 357 return nft_validate_register_store(ctx, priv->dreg, NULL,
327 NFT_DATA_VALUE, len); 358 NFT_DATA_VALUE, len);
328} 359}
329EXPORT_SYMBOL_GPL(nft_meta_get_init);
330 360
331static int nft_meta_get_validate(const struct nft_ctx *ctx, 361static int nft_meta_get_validate(const struct nft_ctx *ctx,
332 const struct nft_expr *expr, 362 const struct nft_expr *expr,
@@ -360,9 +390,9 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
360#endif 390#endif
361} 391}
362 392
363int nft_meta_set_validate(const struct nft_ctx *ctx, 393static int nft_meta_set_validate(const struct nft_ctx *ctx,
364 const struct nft_expr *expr, 394 const struct nft_expr *expr,
365 const struct nft_data **data) 395 const struct nft_data **data)
366{ 396{
367 struct nft_meta *priv = nft_expr_priv(expr); 397 struct nft_meta *priv = nft_expr_priv(expr);
368 unsigned int hooks; 398 unsigned int hooks;
@@ -388,11 +418,10 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
388 418
389 return nft_chain_validate_hooks(ctx->chain, hooks); 419 return nft_chain_validate_hooks(ctx->chain, hooks);
390} 420}
391EXPORT_SYMBOL_GPL(nft_meta_set_validate);
392 421
393int nft_meta_set_init(const struct nft_ctx *ctx, 422static int nft_meta_set_init(const struct nft_ctx *ctx,
394 const struct nft_expr *expr, 423 const struct nft_expr *expr,
395 const struct nlattr * const tb[]) 424 const struct nlattr * const tb[])
396{ 425{
397 struct nft_meta *priv = nft_expr_priv(expr); 426 struct nft_meta *priv = nft_expr_priv(expr);
398 unsigned int len; 427 unsigned int len;
@@ -424,10 +453,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
424 453
425 return 0; 454 return 0;
426} 455}
427EXPORT_SYMBOL_GPL(nft_meta_set_init);
428 456
429int nft_meta_get_dump(struct sk_buff *skb, 457static int nft_meta_get_dump(struct sk_buff *skb,
430 const struct nft_expr *expr) 458 const struct nft_expr *expr)
431{ 459{
432 const struct nft_meta *priv = nft_expr_priv(expr); 460 const struct nft_meta *priv = nft_expr_priv(expr);
433 461
@@ -440,10 +468,8 @@ int nft_meta_get_dump(struct sk_buff *skb,
440nla_put_failure: 468nla_put_failure:
441 return -1; 469 return -1;
442} 470}
443EXPORT_SYMBOL_GPL(nft_meta_get_dump);
444 471
445int nft_meta_set_dump(struct sk_buff *skb, 472static int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
446 const struct nft_expr *expr)
447{ 473{
448 const struct nft_meta *priv = nft_expr_priv(expr); 474 const struct nft_meta *priv = nft_expr_priv(expr);
449 475
@@ -457,19 +483,16 @@ int nft_meta_set_dump(struct sk_buff *skb,
457nla_put_failure: 483nla_put_failure:
458 return -1; 484 return -1;
459} 485}
460EXPORT_SYMBOL_GPL(nft_meta_set_dump);
461 486
462void nft_meta_set_destroy(const struct nft_ctx *ctx, 487static void nft_meta_set_destroy(const struct nft_ctx *ctx,
463 const struct nft_expr *expr) 488 const struct nft_expr *expr)
464{ 489{
465 const struct nft_meta *priv = nft_expr_priv(expr); 490 const struct nft_meta *priv = nft_expr_priv(expr);
466 491
467 if (priv->key == NFT_META_NFTRACE) 492 if (priv->key == NFT_META_NFTRACE)
468 static_branch_dec(&nft_trace_enabled); 493 static_branch_dec(&nft_trace_enabled);
469} 494}
470EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
471 495
472static struct nft_expr_type nft_meta_type;
473static const struct nft_expr_ops nft_meta_get_ops = { 496static const struct nft_expr_ops nft_meta_get_ops = {
474 .type = &nft_meta_type, 497 .type = &nft_meta_type,
475 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), 498 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
@@ -508,27 +531,10 @@ nft_meta_select_ops(const struct nft_ctx *ctx,
508 return ERR_PTR(-EINVAL); 531 return ERR_PTR(-EINVAL);
509} 532}
510 533
511static struct nft_expr_type nft_meta_type __read_mostly = { 534struct nft_expr_type nft_meta_type __read_mostly = {
512 .name = "meta", 535 .name = "meta",
513 .select_ops = nft_meta_select_ops, 536 .select_ops = nft_meta_select_ops,
514 .policy = nft_meta_policy, 537 .policy = nft_meta_policy,
515 .maxattr = NFTA_META_MAX, 538 .maxattr = NFTA_META_MAX,
516 .owner = THIS_MODULE, 539 .owner = THIS_MODULE,
517}; 540};
518
519static int __init nft_meta_module_init(void)
520{
521 return nft_register_expr(&nft_meta_type);
522}
523
524static void __exit nft_meta_module_exit(void)
525{
526 nft_unregister_expr(&nft_meta_type);
527}
528
529module_init(nft_meta_module_init);
530module_exit(nft_meta_module_exit);
531
532MODULE_LICENSE("GPL");
533MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
534MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 1f36954c2ba9..c15807d10b91 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -43,7 +43,7 @@ static void nft_nat_eval(const struct nft_expr *expr,
43 const struct nft_nat *priv = nft_expr_priv(expr); 43 const struct nft_nat *priv = nft_expr_priv(expr);
44 enum ip_conntrack_info ctinfo; 44 enum ip_conntrack_info ctinfo;
45 struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); 45 struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
46 struct nf_nat_range range; 46 struct nf_nat_range2 range;
47 47
48 memset(&range, 0, sizeof(range)); 48 memset(&range, 0, sizeof(range));
49 if (priv->sreg_addr_min) { 49 if (priv->sreg_addr_min) {
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 5a3a52c71545..8a64db8f2e69 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -24,13 +24,11 @@ struct nft_ng_inc {
24 u32 modulus; 24 u32 modulus;
25 atomic_t counter; 25 atomic_t counter;
26 u32 offset; 26 u32 offset;
27 struct nft_set *map;
27}; 28};
28 29
29static void nft_ng_inc_eval(const struct nft_expr *expr, 30static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
30 struct nft_regs *regs,
31 const struct nft_pktinfo *pkt)
32{ 31{
33 struct nft_ng_inc *priv = nft_expr_priv(expr);
34 u32 nval, oval; 32 u32 nval, oval;
35 33
36 do { 34 do {
@@ -38,7 +36,36 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
38 nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; 36 nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
39 } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); 37 } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
40 38
41 regs->data[priv->dreg] = nval + priv->offset; 39 return nval + priv->offset;
40}
41
42static void nft_ng_inc_eval(const struct nft_expr *expr,
43 struct nft_regs *regs,
44 const struct nft_pktinfo *pkt)
45{
46 struct nft_ng_inc *priv = nft_expr_priv(expr);
47
48 regs->data[priv->dreg] = nft_ng_inc_gen(priv);
49}
50
51static void nft_ng_inc_map_eval(const struct nft_expr *expr,
52 struct nft_regs *regs,
53 const struct nft_pktinfo *pkt)
54{
55 struct nft_ng_inc *priv = nft_expr_priv(expr);
56 const struct nft_set *map = priv->map;
57 const struct nft_set_ext *ext;
58 u32 result;
59 bool found;
60
61 result = nft_ng_inc_gen(priv);
62 found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
63
64 if (!found)
65 return;
66
67 nft_data_copy(&regs->data[priv->dreg],
68 nft_set_ext_data(ext), map->dlen);
42} 69}
43 70
44static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { 71static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
@@ -46,6 +73,9 @@ static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
46 [NFTA_NG_MODULUS] = { .type = NLA_U32 }, 73 [NFTA_NG_MODULUS] = { .type = NLA_U32 },
47 [NFTA_NG_TYPE] = { .type = NLA_U32 }, 74 [NFTA_NG_TYPE] = { .type = NLA_U32 },
48 [NFTA_NG_OFFSET] = { .type = NLA_U32 }, 75 [NFTA_NG_OFFSET] = { .type = NLA_U32 },
76 [NFTA_NG_SET_NAME] = { .type = NLA_STRING,
77 .len = NFT_SET_MAXNAMELEN - 1 },
78 [NFTA_NG_SET_ID] = { .type = NLA_U32 },
49}; 79};
50 80
51static int nft_ng_inc_init(const struct nft_ctx *ctx, 81static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -71,6 +101,25 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
71 NFT_DATA_VALUE, sizeof(u32)); 101 NFT_DATA_VALUE, sizeof(u32));
72} 102}
73 103
104static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
105 const struct nft_expr *expr,
106 const struct nlattr * const tb[])
107{
108 struct nft_ng_inc *priv = nft_expr_priv(expr);
109 u8 genmask = nft_genmask_next(ctx->net);
110
111 nft_ng_inc_init(ctx, expr, tb);
112
113 priv->map = nft_set_lookup_global(ctx->net, ctx->table,
114 tb[NFTA_NG_SET_NAME],
115 tb[NFTA_NG_SET_ID], genmask);
116
117 if (IS_ERR(priv->map))
118 return PTR_ERR(priv->map);
119
120 return 0;
121}
122
74static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, 123static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
75 u32 modulus, enum nft_ng_types type, u32 offset) 124 u32 modulus, enum nft_ng_types type, u32 offset)
76{ 125{
@@ -97,6 +146,22 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
97 priv->offset); 146 priv->offset);
98} 147}
99 148
149static int nft_ng_inc_map_dump(struct sk_buff *skb,
150 const struct nft_expr *expr)
151{
152 const struct nft_ng_inc *priv = nft_expr_priv(expr);
153
154 if (nft_ng_dump(skb, priv->dreg, priv->modulus,
155 NFT_NG_INCREMENTAL, priv->offset) ||
156 nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
157 goto nla_put_failure;
158
159 return 0;
160
161nla_put_failure:
162 return -1;
163}
164
100struct nft_ng_random { 165struct nft_ng_random {
101 enum nft_registers dreg:8; 166 enum nft_registers dreg:8;
102 u32 modulus; 167 u32 modulus;
@@ -156,6 +221,14 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
156 .dump = nft_ng_inc_dump, 221 .dump = nft_ng_inc_dump,
157}; 222};
158 223
224static const struct nft_expr_ops nft_ng_inc_map_ops = {
225 .type = &nft_ng_type,
226 .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
227 .eval = nft_ng_inc_map_eval,
228 .init = nft_ng_inc_map_init,
229 .dump = nft_ng_inc_map_dump,
230};
231
159static const struct nft_expr_ops nft_ng_random_ops = { 232static const struct nft_expr_ops nft_ng_random_ops = {
160 .type = &nft_ng_type, 233 .type = &nft_ng_type,
161 .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), 234 .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
@@ -178,6 +251,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
178 251
179 switch (type) { 252 switch (type) {
180 case NFT_NG_INCREMENTAL: 253 case NFT_NG_INCREMENTAL:
254 if (tb[NFTA_NG_SET_NAME])
255 return &nft_ng_inc_map_ops;
181 return &nft_ng_inc_ops; 256 return &nft_ng_inc_ops;
182 case NFT_NG_RANDOM: 257 case NFT_NG_RANDOM:
183 return &nft_ng_random_ops; 258 return &nft_ng_random_ops;
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 0b02407773ad..cdf348f751ec 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -38,8 +38,8 @@ static int nft_objref_init(const struct nft_ctx *ctx,
38 return -EINVAL; 38 return -EINVAL;
39 39
40 objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE])); 40 objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
41 obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype, 41 obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
42 genmask); 42 genmask);
43 if (IS_ERR(obj)) 43 if (IS_ERR(obj))
44 return -ENOENT; 44 return -ENOENT;
45 45
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 11a2071b6dd4..76dba9f6b6f6 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -7,8 +7,6 @@
7 */ 7 */
8 8
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/netlink.h> 10#include <linux/netlink.h>
13#include <linux/netfilter.h> 11#include <linux/netfilter.h>
14#include <linux/netfilter/nf_tables.h> 12#include <linux/netfilter/nf_tables.h>
@@ -179,7 +177,6 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
179 return nft_chain_validate_hooks(ctx->chain, hooks); 177 return nft_chain_validate_hooks(ctx->chain, hooks);
180} 178}
181 179
182static struct nft_expr_type nft_rt_type;
183static const struct nft_expr_ops nft_rt_get_ops = { 180static const struct nft_expr_ops nft_rt_get_ops = {
184 .type = &nft_rt_type, 181 .type = &nft_rt_type,
185 .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)), 182 .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)),
@@ -189,27 +186,10 @@ static const struct nft_expr_ops nft_rt_get_ops = {
189 .validate = nft_rt_validate, 186 .validate = nft_rt_validate,
190}; 187};
191 188
192static struct nft_expr_type nft_rt_type __read_mostly = { 189struct nft_expr_type nft_rt_type __read_mostly = {
193 .name = "rt", 190 .name = "rt",
194 .ops = &nft_rt_get_ops, 191 .ops = &nft_rt_get_ops,
195 .policy = nft_rt_policy, 192 .policy = nft_rt_policy,
196 .maxattr = NFTA_RT_MAX, 193 .maxattr = NFTA_RT_MAX,
197 .owner = THIS_MODULE, 194 .owner = THIS_MODULE,
198}; 195};
199
200static int __init nft_rt_module_init(void)
201{
202 return nft_register_expr(&nft_rt_type);
203}
204
205static void __exit nft_rt_module_exit(void)
206{
207 nft_unregister_expr(&nft_rt_type);
208}
209
210module_init(nft_rt_module_init);
211module_exit(nft_rt_module_exit);
212
213MODULE_LICENSE("GPL");
214MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>");
215MODULE_ALIAS_NFT_EXPR("rt");
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 45fb2752fb63..d6626e01c7ee 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -296,27 +296,23 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
296 return true; 296 return true;
297} 297}
298 298
299static struct nft_set_type nft_bitmap_type;
300static struct nft_set_ops nft_bitmap_ops __read_mostly = {
301 .type = &nft_bitmap_type,
302 .privsize = nft_bitmap_privsize,
303 .elemsize = offsetof(struct nft_bitmap_elem, ext),
304 .estimate = nft_bitmap_estimate,
305 .init = nft_bitmap_init,
306 .destroy = nft_bitmap_destroy,
307 .insert = nft_bitmap_insert,
308 .remove = nft_bitmap_remove,
309 .deactivate = nft_bitmap_deactivate,
310 .flush = nft_bitmap_flush,
311 .activate = nft_bitmap_activate,
312 .lookup = nft_bitmap_lookup,
313 .walk = nft_bitmap_walk,
314 .get = nft_bitmap_get,
315};
316
317static struct nft_set_type nft_bitmap_type __read_mostly = { 299static struct nft_set_type nft_bitmap_type __read_mostly = {
318 .ops = &nft_bitmap_ops,
319 .owner = THIS_MODULE, 300 .owner = THIS_MODULE,
301 .ops = {
302 .privsize = nft_bitmap_privsize,
303 .elemsize = offsetof(struct nft_bitmap_elem, ext),
304 .estimate = nft_bitmap_estimate,
305 .init = nft_bitmap_init,
306 .destroy = nft_bitmap_destroy,
307 .insert = nft_bitmap_insert,
308 .remove = nft_bitmap_remove,
309 .deactivate = nft_bitmap_deactivate,
310 .flush = nft_bitmap_flush,
311 .activate = nft_bitmap_activate,
312 .lookup = nft_bitmap_lookup,
313 .walk = nft_bitmap_walk,
314 .get = nft_bitmap_get,
315 },
320}; 316};
321 317
322static int __init nft_bitmap_module_init(void) 318static int __init nft_bitmap_module_init(void)
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index fc9c6d5d64cd..dbf1f4ad077c 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -605,6 +605,12 @@ static void nft_hash_destroy(const struct nft_set *set)
605static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, 605static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
606 struct nft_set_estimate *est) 606 struct nft_set_estimate *est)
607{ 607{
608 if (!desc->size)
609 return false;
610
611 if (desc->klen == 4)
612 return false;
613
608 est->size = sizeof(struct nft_hash) + 614 est->size = sizeof(struct nft_hash) +
609 nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + 615 nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
610 desc->size * sizeof(struct nft_hash_elem); 616 desc->size * sizeof(struct nft_hash_elem);
@@ -614,91 +620,100 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
614 return true; 620 return true;
615} 621}
616 622
617static struct nft_set_type nft_hash_type; 623static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features,
618static struct nft_set_ops nft_rhash_ops __read_mostly = { 624 struct nft_set_estimate *est)
619 .type = &nft_hash_type, 625{
620 .privsize = nft_rhash_privsize, 626 if (!desc->size)
621 .elemsize = offsetof(struct nft_rhash_elem, ext), 627 return false;
622 .estimate = nft_rhash_estimate,
623 .init = nft_rhash_init,
624 .destroy = nft_rhash_destroy,
625 .insert = nft_rhash_insert,
626 .activate = nft_rhash_activate,
627 .deactivate = nft_rhash_deactivate,
628 .flush = nft_rhash_flush,
629 .remove = nft_rhash_remove,
630 .lookup = nft_rhash_lookup,
631 .update = nft_rhash_update,
632 .walk = nft_rhash_walk,
633 .get = nft_rhash_get,
634 .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
635};
636 628
637static struct nft_set_ops nft_hash_ops __read_mostly = { 629 if (desc->klen != 4)
638 .type = &nft_hash_type, 630 return false;
639 .privsize = nft_hash_privsize,
640 .elemsize = offsetof(struct nft_hash_elem, ext),
641 .estimate = nft_hash_estimate,
642 .init = nft_hash_init,
643 .destroy = nft_hash_destroy,
644 .insert = nft_hash_insert,
645 .activate = nft_hash_activate,
646 .deactivate = nft_hash_deactivate,
647 .flush = nft_hash_flush,
648 .remove = nft_hash_remove,
649 .lookup = nft_hash_lookup,
650 .walk = nft_hash_walk,
651 .get = nft_hash_get,
652 .features = NFT_SET_MAP | NFT_SET_OBJECT,
653};
654 631
655static struct nft_set_ops nft_hash_fast_ops __read_mostly = { 632 est->size = sizeof(struct nft_hash) +
656 .type = &nft_hash_type, 633 nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
657 .privsize = nft_hash_privsize, 634 desc->size * sizeof(struct nft_hash_elem);
658 .elemsize = offsetof(struct nft_hash_elem, ext), 635 est->lookup = NFT_SET_CLASS_O_1;
659 .estimate = nft_hash_estimate, 636 est->space = NFT_SET_CLASS_O_N;
660 .init = nft_hash_init,
661 .destroy = nft_hash_destroy,
662 .insert = nft_hash_insert,
663 .activate = nft_hash_activate,
664 .deactivate = nft_hash_deactivate,
665 .flush = nft_hash_flush,
666 .remove = nft_hash_remove,
667 .lookup = nft_hash_lookup_fast,
668 .walk = nft_hash_walk,
669 .get = nft_hash_get,
670 .features = NFT_SET_MAP | NFT_SET_OBJECT,
671};
672
673static const struct nft_set_ops *
674nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
675 u32 flags)
676{
677 if (desc->size && !(flags & (NFT_SET_EVAL | NFT_SET_TIMEOUT))) {
678 switch (desc->klen) {
679 case 4:
680 return &nft_hash_fast_ops;
681 default:
682 return &nft_hash_ops;
683 }
684 }
685 637
686 return &nft_rhash_ops; 638 return true;
687} 639}
688 640
641static struct nft_set_type nft_rhash_type __read_mostly = {
642 .owner = THIS_MODULE,
643 .features = NFT_SET_MAP | NFT_SET_OBJECT |
644 NFT_SET_TIMEOUT | NFT_SET_EVAL,
645 .ops = {
646 .privsize = nft_rhash_privsize,
647 .elemsize = offsetof(struct nft_rhash_elem, ext),
648 .estimate = nft_rhash_estimate,
649 .init = nft_rhash_init,
650 .destroy = nft_rhash_destroy,
651 .insert = nft_rhash_insert,
652 .activate = nft_rhash_activate,
653 .deactivate = nft_rhash_deactivate,
654 .flush = nft_rhash_flush,
655 .remove = nft_rhash_remove,
656 .lookup = nft_rhash_lookup,
657 .update = nft_rhash_update,
658 .walk = nft_rhash_walk,
659 .get = nft_rhash_get,
660 },
661};
662
689static struct nft_set_type nft_hash_type __read_mostly = { 663static struct nft_set_type nft_hash_type __read_mostly = {
690 .select_ops = nft_hash_select_ops,
691 .owner = THIS_MODULE, 664 .owner = THIS_MODULE,
665 .features = NFT_SET_MAP | NFT_SET_OBJECT,
666 .ops = {
667 .privsize = nft_hash_privsize,
668 .elemsize = offsetof(struct nft_hash_elem, ext),
669 .estimate = nft_hash_estimate,
670 .init = nft_hash_init,
671 .destroy = nft_hash_destroy,
672 .insert = nft_hash_insert,
673 .activate = nft_hash_activate,
674 .deactivate = nft_hash_deactivate,
675 .flush = nft_hash_flush,
676 .remove = nft_hash_remove,
677 .lookup = nft_hash_lookup,
678 .walk = nft_hash_walk,
679 .get = nft_hash_get,
680 },
681};
682
683static struct nft_set_type nft_hash_fast_type __read_mostly = {
684 .owner = THIS_MODULE,
685 .features = NFT_SET_MAP | NFT_SET_OBJECT,
686 .ops = {
687 .privsize = nft_hash_privsize,
688 .elemsize = offsetof(struct nft_hash_elem, ext),
689 .estimate = nft_hash_fast_estimate,
690 .init = nft_hash_init,
691 .destroy = nft_hash_destroy,
692 .insert = nft_hash_insert,
693 .activate = nft_hash_activate,
694 .deactivate = nft_hash_deactivate,
695 .flush = nft_hash_flush,
696 .remove = nft_hash_remove,
697 .lookup = nft_hash_lookup_fast,
698 .walk = nft_hash_walk,
699 .get = nft_hash_get,
700 },
692}; 701};
693 702
694static int __init nft_hash_module_init(void) 703static int __init nft_hash_module_init(void)
695{ 704{
696 return nft_register_set(&nft_hash_type); 705 if (nft_register_set(&nft_hash_fast_type) ||
706 nft_register_set(&nft_hash_type) ||
707 nft_register_set(&nft_rhash_type))
708 return 1;
709 return 0;
697} 710}
698 711
699static void __exit nft_hash_module_exit(void) 712static void __exit nft_hash_module_exit(void)
700{ 713{
714 nft_unregister_set(&nft_rhash_type);
701 nft_unregister_set(&nft_hash_type); 715 nft_unregister_set(&nft_hash_type);
716 nft_unregister_set(&nft_hash_fast_type);
702} 717}
703 718
704module_init(nft_hash_module_init); 719module_init(nft_hash_module_init);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index e6f08bc5f359..22c57d7612c4 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -393,28 +393,24 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
393 return true; 393 return true;
394} 394}
395 395
396static struct nft_set_type nft_rbtree_type;
397static struct nft_set_ops nft_rbtree_ops __read_mostly = {
398 .type = &nft_rbtree_type,
399 .privsize = nft_rbtree_privsize,
400 .elemsize = offsetof(struct nft_rbtree_elem, ext),
401 .estimate = nft_rbtree_estimate,
402 .init = nft_rbtree_init,
403 .destroy = nft_rbtree_destroy,
404 .insert = nft_rbtree_insert,
405 .remove = nft_rbtree_remove,
406 .deactivate = nft_rbtree_deactivate,
407 .flush = nft_rbtree_flush,
408 .activate = nft_rbtree_activate,
409 .lookup = nft_rbtree_lookup,
410 .walk = nft_rbtree_walk,
411 .get = nft_rbtree_get,
412 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
413};
414
415static struct nft_set_type nft_rbtree_type __read_mostly = { 396static struct nft_set_type nft_rbtree_type __read_mostly = {
416 .ops = &nft_rbtree_ops,
417 .owner = THIS_MODULE, 397 .owner = THIS_MODULE,
398 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
399 .ops = {
400 .privsize = nft_rbtree_privsize,
401 .elemsize = offsetof(struct nft_rbtree_elem, ext),
402 .estimate = nft_rbtree_estimate,
403 .init = nft_rbtree_init,
404 .destroy = nft_rbtree_destroy,
405 .insert = nft_rbtree_insert,
406 .remove = nft_rbtree_remove,
407 .deactivate = nft_rbtree_deactivate,
408 .flush = nft_rbtree_flush,
409 .activate = nft_rbtree_activate,
410 .lookup = nft_rbtree_lookup,
411 .walk = nft_rbtree_walk,
412 .get = nft_rbtree_get,
413 },
418}; 414};
419 415
420static int __init nft_rbtree_module_init(void) 416static int __init nft_rbtree_module_init(void)
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index 58aa9dd3c5b7..1d437875e15a 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -21,8 +21,8 @@
21static unsigned int 21static unsigned int
22netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) 22netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
23{ 23{
24 const struct nf_nat_range *range = par->targinfo; 24 const struct nf_nat_range2 *range = par->targinfo;
25 struct nf_nat_range newrange; 25 struct nf_nat_range2 newrange;
26 struct nf_conn *ct; 26 struct nf_conn *ct;
27 enum ip_conntrack_info ctinfo; 27 enum ip_conntrack_info ctinfo;
28 union nf_inet_addr new_addr, netmask; 28 union nf_inet_addr new_addr, netmask;
@@ -56,7 +56,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
56 56
57static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) 57static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
58{ 58{
59 const struct nf_nat_range *range = par->targinfo; 59 const struct nf_nat_range2 *range = par->targinfo;
60 60
61 if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) 61 if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
62 return -EINVAL; 62 return -EINVAL;
@@ -75,7 +75,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
75 enum ip_conntrack_info ctinfo; 75 enum ip_conntrack_info ctinfo;
76 __be32 new_ip, netmask; 76 __be32 new_ip, netmask;
77 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; 77 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
78 struct nf_nat_range newrange; 78 struct nf_nat_range2 newrange;
79 79
80 WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING && 80 WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
81 xt_hooknum(par) != NF_INET_POST_ROUTING && 81 xt_hooknum(par) != NF_INET_POST_ROUTING &&
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index c7f8958cea4a..1ed0cac585c4 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -13,7 +13,6 @@
13#include <linux/netfilter/x_tables.h> 13#include <linux/netfilter/x_tables.h>
14#include <linux/netfilter/xt_NFLOG.h> 14#include <linux/netfilter/xt_NFLOG.h>
15#include <net/netfilter/nf_log.h> 15#include <net/netfilter/nf_log.h>
16#include <net/netfilter/nfnetlink_log.h>
17 16
18MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 17MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
19MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG"); 18MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
@@ -37,8 +36,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
37 if (info->flags & XT_NFLOG_F_COPY_LEN) 36 if (info->flags & XT_NFLOG_F_COPY_LEN)
38 li.u.ulog.flags |= NF_LOG_F_COPY_LEN; 37 li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
39 38
40 nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb, 39 nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
41 xt_in(par), xt_out(par), &li, info->prefix); 40 xt_out(par), &li, "%s", info->prefix);
41
42 return XT_CONTINUE; 42 return XT_CONTINUE;
43} 43}
44 44
@@ -50,7 +50,13 @@ static int nflog_tg_check(const struct xt_tgchk_param *par)
50 return -EINVAL; 50 return -EINVAL;
51 if (info->prefix[sizeof(info->prefix) - 1] != '\0') 51 if (info->prefix[sizeof(info->prefix) - 1] != '\0')
52 return -EINVAL; 52 return -EINVAL;
53 return 0; 53
54 return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
55}
56
57static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
58{
59 nf_logger_put(par->family, NF_LOG_TYPE_ULOG);
54} 60}
55 61
56static struct xt_target nflog_tg_reg __read_mostly = { 62static struct xt_target nflog_tg_reg __read_mostly = {
@@ -58,6 +64,7 @@ static struct xt_target nflog_tg_reg __read_mostly = {
58 .revision = 0, 64 .revision = 0,
59 .family = NFPROTO_UNSPEC, 65 .family = NFPROTO_UNSPEC,
60 .checkentry = nflog_tg_check, 66 .checkentry = nflog_tg_check,
67 .destroy = nflog_tg_destroy,
61 .target = nflog_tg, 68 .target = nflog_tg,
62 .targetsize = sizeof(struct xt_nflog_info), 69 .targetsize = sizeof(struct xt_nflog_info),
63 .me = THIS_MODULE, 70 .me = THIS_MODULE,
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 98a4c6d4f1cb..5ce9461e979c 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c
@@ -36,7 +36,7 @@ redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
36 36
37static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) 37static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
38{ 38{
39 const struct nf_nat_range *range = par->targinfo; 39 const struct nf_nat_range2 *range = par->targinfo;
40 40
41 if (range->flags & NF_NAT_RANGE_MAP_IPS) 41 if (range->flags & NF_NAT_RANGE_MAP_IPS)
42 return -EINVAL; 42 return -EINVAL;
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index bdb689cdc829..8af9707f8789 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -37,11 +37,12 @@ static void xt_nat_destroy(const struct xt_tgdtor_param *par)
37 nf_ct_netns_put(par->net, par->family); 37 nf_ct_netns_put(par->net, par->family);
38} 38}
39 39
40static void xt_nat_convert_range(struct nf_nat_range *dst, 40static void xt_nat_convert_range(struct nf_nat_range2 *dst,
41 const struct nf_nat_ipv4_range *src) 41 const struct nf_nat_ipv4_range *src)
42{ 42{
43 memset(&dst->min_addr, 0, sizeof(dst->min_addr)); 43 memset(&dst->min_addr, 0, sizeof(dst->min_addr));
44 memset(&dst->max_addr, 0, sizeof(dst->max_addr)); 44 memset(&dst->max_addr, 0, sizeof(dst->max_addr));
45 memset(&dst->base_proto, 0, sizeof(dst->base_proto));
45 46
46 dst->flags = src->flags; 47 dst->flags = src->flags;
47 dst->min_addr.ip = src->min_ip; 48 dst->min_addr.ip = src->min_ip;
@@ -54,7 +55,7 @@ static unsigned int
54xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) 55xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
55{ 56{
56 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; 57 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
57 struct nf_nat_range range; 58 struct nf_nat_range2 range;
58 enum ip_conntrack_info ctinfo; 59 enum ip_conntrack_info ctinfo;
59 struct nf_conn *ct; 60 struct nf_conn *ct;
60 61
@@ -71,7 +72,7 @@ static unsigned int
71xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) 72xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
72{ 73{
73 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; 74 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
74 struct nf_nat_range range; 75 struct nf_nat_range2 range;
75 enum ip_conntrack_info ctinfo; 76 enum ip_conntrack_info ctinfo;
76 struct nf_conn *ct; 77 struct nf_conn *ct;
77 78
@@ -86,7 +87,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
86static unsigned int 87static unsigned int
87xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) 88xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
88{ 89{
89 const struct nf_nat_range *range = par->targinfo; 90 const struct nf_nat_range *range_v1 = par->targinfo;
91 struct nf_nat_range2 range;
90 enum ip_conntrack_info ctinfo; 92 enum ip_conntrack_info ctinfo;
91 struct nf_conn *ct; 93 struct nf_conn *ct;
92 94
@@ -95,13 +97,49 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
95 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 97 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
96 ctinfo == IP_CT_RELATED_REPLY))); 98 ctinfo == IP_CT_RELATED_REPLY)));
97 99
98 return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC); 100 memcpy(&range, range_v1, sizeof(*range_v1));
101 memset(&range.base_proto, 0, sizeof(range.base_proto));
102
103 return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
99} 104}
100 105
101static unsigned int 106static unsigned int
102xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) 107xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
103{ 108{
104 const struct nf_nat_range *range = par->targinfo; 109 const struct nf_nat_range *range_v1 = par->targinfo;
110 struct nf_nat_range2 range;
111 enum ip_conntrack_info ctinfo;
112 struct nf_conn *ct;
113
114 ct = nf_ct_get(skb, &ctinfo);
115 WARN_ON(!(ct != NULL &&
116 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
117
118 memcpy(&range, range_v1, sizeof(*range_v1));
119 memset(&range.base_proto, 0, sizeof(range.base_proto));
120
121 return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
122}
123
124static unsigned int
125xt_snat_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
126{
127 const struct nf_nat_range2 *range = par->targinfo;
128 enum ip_conntrack_info ctinfo;
129 struct nf_conn *ct;
130
131 ct = nf_ct_get(skb, &ctinfo);
132 WARN_ON(!(ct != NULL &&
133 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
134 ctinfo == IP_CT_RELATED_REPLY)));
135
136 return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
137}
138
139static unsigned int
140xt_dnat_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
141{
142 const struct nf_nat_range2 *range = par->targinfo;
105 enum ip_conntrack_info ctinfo; 143 enum ip_conntrack_info ctinfo;
106 struct nf_conn *ct; 144 struct nf_conn *ct;
107 145
@@ -163,6 +201,28 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
163 (1 << NF_INET_LOCAL_OUT), 201 (1 << NF_INET_LOCAL_OUT),
164 .me = THIS_MODULE, 202 .me = THIS_MODULE,
165 }, 203 },
204 {
205 .name = "SNAT",
206 .revision = 2,
207 .checkentry = xt_nat_checkentry,
208 .destroy = xt_nat_destroy,
209 .target = xt_snat_target_v2,
210 .targetsize = sizeof(struct nf_nat_range2),
211 .table = "nat",
212 .hooks = (1 << NF_INET_POST_ROUTING) |
213 (1 << NF_INET_LOCAL_IN),
214 .me = THIS_MODULE,
215 },
216 {
217 .name = "DNAT",
218 .revision = 2,
219 .target = xt_dnat_target_v2,
220 .targetsize = sizeof(struct nf_nat_range2),
221 .table = "nat",
222 .hooks = (1 << NF_INET_PRE_ROUTING) |
223 (1 << NF_INET_LOCAL_OUT),
224 .me = THIS_MODULE,
225 },
166}; 226};
167 227
168static int __init xt_nat_init(void) 228static int __init xt_nat_init(void)
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index a34f314a8c23..9cfef73b4107 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -37,21 +37,6 @@
37#include <net/netfilter/nf_log.h> 37#include <net/netfilter/nf_log.h>
38#include <linux/netfilter/xt_osf.h> 38#include <linux/netfilter/xt_osf.h>
39 39
40struct xt_osf_finger {
41 struct rcu_head rcu_head;
42 struct list_head finger_entry;
43 struct xt_osf_user_finger finger;
44};
45
46enum osf_fmatch_states {
47 /* Packet does not match the fingerprint */
48 FMATCH_WRONG = 0,
49 /* Packet matches the fingerprint */
50 FMATCH_OK,
51 /* Options do not match the fingerprint, but header does */
52 FMATCH_OPT_WRONG,
53};
54
55/* 40/*
56 * Indexed by dont-fragment bit. 41 * Indexed by dont-fragment bit.
57 * It is the only constant value in the fingerprint. 42 * It is the only constant value in the fingerprint.
@@ -164,200 +149,17 @@ static const struct nfnetlink_subsystem xt_osf_nfnetlink = {
164 .cb = xt_osf_nfnetlink_callbacks, 149 .cb = xt_osf_nfnetlink_callbacks,
165}; 150};
166 151
167static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info,
168 unsigned char f_ttl)
169{
170 const struct iphdr *ip = ip_hdr(skb);
171
172 if (info->flags & XT_OSF_TTL) {
173 if (info->ttl == XT_OSF_TTL_TRUE)
174 return ip->ttl == f_ttl;
175 if (info->ttl == XT_OSF_TTL_NOCHECK)
176 return 1;
177 else if (ip->ttl <= f_ttl)
178 return 1;
179 else {
180 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
181 int ret = 0;
182
183 for_ifa(in_dev) {
184 if (inet_ifa_match(ip->saddr, ifa)) {
185 ret = (ip->ttl == f_ttl);
186 break;
187 }
188 }
189 endfor_ifa(in_dev);
190
191 return ret;
192 }
193 }
194
195 return ip->ttl == f_ttl;
196}
197
198static bool 152static bool
199xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) 153xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
200{ 154{
201 const struct xt_osf_info *info = p->matchinfo; 155 const struct xt_osf_info *info = p->matchinfo;
202 const struct iphdr *ip = ip_hdr(skb);
203 const struct tcphdr *tcp;
204 struct tcphdr _tcph;
205 int fmatch = FMATCH_WRONG, fcount = 0;
206 unsigned int optsize = 0, check_WSS = 0;
207 u16 window, totlen, mss = 0;
208 bool df;
209 const unsigned char *optp = NULL, *_optp = NULL;
210 unsigned char opts[MAX_IPOPTLEN];
211 const struct xt_osf_finger *kf;
212 const struct xt_osf_user_finger *f;
213 struct net *net = xt_net(p); 156 struct net *net = xt_net(p);
214 157
215 if (!info) 158 if (!info)
216 return false; 159 return false;
217 160
218 tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); 161 return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
219 if (!tcp) 162 xt_out(p), info, net, xt_osf_fingers);
220 return false;
221
222 if (!tcp->syn)
223 return false;
224
225 totlen = ntohs(ip->tot_len);
226 df = ntohs(ip->frag_off) & IP_DF;
227 window = ntohs(tcp->window);
228
229 if (tcp->doff * 4 > sizeof(struct tcphdr)) {
230 optsize = tcp->doff * 4 - sizeof(struct tcphdr);
231
232 _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
233 sizeof(struct tcphdr), optsize, opts);
234 }
235
236 list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
237 int foptsize, optnum;
238
239 f = &kf->finger;
240
241 if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
242 continue;
243
244 optp = _optp;
245 fmatch = FMATCH_WRONG;
246
247 if (totlen != f->ss || !xt_osf_ttl(skb, info, f->ttl))
248 continue;
249
250 /*
251 * Should not happen if userspace parser was written correctly.
252 */
253 if (f->wss.wc >= OSF_WSS_MAX)
254 continue;
255
256 /* Check options */
257
258 foptsize = 0;
259 for (optnum = 0; optnum < f->opt_num; ++optnum)
260 foptsize += f->opt[optnum].length;
261
262 if (foptsize > MAX_IPOPTLEN ||
263 optsize > MAX_IPOPTLEN ||
264 optsize != foptsize)
265 continue;
266
267 check_WSS = f->wss.wc;
268
269 for (optnum = 0; optnum < f->opt_num; ++optnum) {
270 if (f->opt[optnum].kind == (*optp)) {
271 __u32 len = f->opt[optnum].length;
272 const __u8 *optend = optp + len;
273
274 fmatch = FMATCH_OK;
275
276 switch (*optp) {
277 case OSFOPT_MSS:
278 mss = optp[3];
279 mss <<= 8;
280 mss |= optp[2];
281
282 mss = ntohs((__force __be16)mss);
283 break;
284 case OSFOPT_TS:
285 break;
286 }
287
288 optp = optend;
289 } else
290 fmatch = FMATCH_OPT_WRONG;
291
292 if (fmatch != FMATCH_OK)
293 break;
294 }
295
296 if (fmatch != FMATCH_OPT_WRONG) {
297 fmatch = FMATCH_WRONG;
298
299 switch (check_WSS) {
300 case OSF_WSS_PLAIN:
301 if (f->wss.val == 0 || window == f->wss.val)
302 fmatch = FMATCH_OK;
303 break;
304 case OSF_WSS_MSS:
305 /*
306 * Some smart modems decrease mangle MSS to
307 * SMART_MSS_2, so we check standard, decreased
308 * and the one provided in the fingerprint MSS
309 * values.
310 */
311#define SMART_MSS_1 1460
312#define SMART_MSS_2 1448
313 if (window == f->wss.val * mss ||
314 window == f->wss.val * SMART_MSS_1 ||
315 window == f->wss.val * SMART_MSS_2)
316 fmatch = FMATCH_OK;
317 break;
318 case OSF_WSS_MTU:
319 if (window == f->wss.val * (mss + 40) ||
320 window == f->wss.val * (SMART_MSS_1 + 40) ||
321 window == f->wss.val * (SMART_MSS_2 + 40))
322 fmatch = FMATCH_OK;
323 break;
324 case OSF_WSS_MODULO:
325 if ((window % f->wss.val) == 0)
326 fmatch = FMATCH_OK;
327 break;
328 }
329 }
330
331 if (fmatch != FMATCH_OK)
332 continue;
333
334 fcount++;
335
336 if (info->flags & XT_OSF_LOG)
337 nf_log_packet(net, xt_family(p), xt_hooknum(p), skb,
338 xt_in(p), xt_out(p), NULL,
339 "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
340 f->genre, f->version, f->subtype,
341 &ip->saddr, ntohs(tcp->source),
342 &ip->daddr, ntohs(tcp->dest),
343 f->ttl - ip->ttl);
344
345 if ((info->flags & XT_OSF_LOG) &&
346 info->loglevel == XT_OSF_LOGLEVEL_FIRST)
347 break;
348 }
349
350 if (!fcount && (info->flags & XT_OSF_LOG))
351 nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
352 xt_out(p), NULL,
353 "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
354 &ip->saddr, ntohs(tcp->source),
355 &ip->daddr, ntohs(tcp->dest));
356
357 if (fcount)
358 fmatch = FMATCH_OK;
359
360 return fmatch == FMATCH_OK;
361} 163}
362 164
363static struct xt_match xt_osf_match = { 165static struct xt_match xt_osf_match = {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c5904f629091..02fc343feb66 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -72,7 +72,7 @@ struct ovs_conntrack_info {
72 struct md_mark mark; 72 struct md_mark mark;
73 struct md_labels labels; 73 struct md_labels labels;
74#ifdef CONFIG_NF_NAT_NEEDED 74#ifdef CONFIG_NF_NAT_NEEDED
75 struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */ 75 struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */
76#endif 76#endif
77}; 77};
78 78
@@ -710,7 +710,7 @@ static bool skb_nfct_cached(struct net *net,
710 */ 710 */
711static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 711static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
712 enum ip_conntrack_info ctinfo, 712 enum ip_conntrack_info ctinfo,
713 const struct nf_nat_range *range, 713 const struct nf_nat_range2 *range,
714 enum nf_nat_manip_type maniptype) 714 enum nf_nat_manip_type maniptype)
715{ 715{
716 int hooknum, nh_off, err = NF_ACCEPT; 716 int hooknum, nh_off, err = NF_ACCEPT;