aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-08-18 12:59:19 -0400
committerDavid S. Miller <davem@davemloft.net>2018-08-18 12:59:19 -0400
commit3fe49d699a9604eed851eb45e1e0adab0e25688e (patch)
treea9a307f2dc93f8aec686d9a0a9f22a0a54ae53bc
parentbfdd19ad80f203f42f05fd32a31c678c9c524ef9 (diff)
parentfeb9f55c33e5114127238a2c87c069b4f30d1f23 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says: ==================== Netfilter/IPVS fixes for net The following patchset contains Netfilter/IPVS fixes for your net tree: 1) Infinite loop in IPVS when net namespace is released, from Tan Hu. 2) Do not show negative timeouts in ip_vs_conn by using the new jiffies_delta_to_msecs(), patches from Matteo Croce. 3) Set F_IFACE flag for linklocal addresses in ip6t_rpfilter, from Florian Westphal. 4) Fix overflow in set size allocation, from Taehee Yoo. 5) Use netlink_dump_start() from ctnetlink to fix memleak from the error path, again from Florian. 6) Register nfnetlink_subsys in last place, otherwise netns init path may lose race and see net->nft uninitialized data. This also reverts previous attempt to fix this by increase netns refcount, patches from Florian. 7) Remove conntrack entries on layer 4 protocol tracker module removal, from Florian. 8) Use GFP_KERNEL_ACCOUNT for xtables blob allocation, from Michal Hocko. 9) Get tproxy documentation in sync with existing codebase, from Mate Eckl. 10) Honor preset layer 3 protocol via ctx->family in the new nft_ct timeout infrastructure, from Harsha Sharma. 11) Let uapi nfnetlink_osf.h compile standalone with no errors, from Dmitry V. Levin. 12) Missing braces compilation warning in nft_tproxy, patch from Mate Eclk. 13) Disregard bogus check to bail out on non-anonymous sets from the dynamic set update extension. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/tproxy.txt34
-rw-r--r--include/linux/jiffies.h5
-rw-r--r--include/net/netfilter/nf_tables.h6
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_osf.h2
-rw-r--r--include/uapi/linux/netfilter/xt_osf.h2
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c15
-rw-r--r--net/netfilter/nf_conntrack_netlink.c26
-rw-r--r--net/netfilter/nf_conntrack_proto.c15
-rw-r--r--net/netfilter/nf_tables_api.c38
-rw-r--r--net/netfilter/nfnetlink_acct.c29
-rw-r--r--net/netfilter/nft_chain_filter.c14
-rw-r--r--net/netfilter/nft_ct.c7
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_set_bitmap.c6
-rw-r--r--net/netfilter/nft_set_hash.c8
-rw-r--r--net/netfilter/nft_set_rbtree.c4
-rw-r--r--net/netfilter/nft_tproxy.c4
-rw-r--r--net/netfilter/x_tables.c7
20 files changed, 163 insertions, 95 deletions
diff --git a/Documentation/networking/tproxy.txt b/Documentation/networking/tproxy.txt
index ec11429e1d42..b9a188823d9f 100644
--- a/Documentation/networking/tproxy.txt
+++ b/Documentation/networking/tproxy.txt
@@ -5,19 +5,28 @@ This feature adds Linux 2.2-like transparent proxy support to current kernels.
5To use it, enable the socket match and the TPROXY target in your kernel config. 5To use it, enable the socket match and the TPROXY target in your kernel config.
6You will need policy routing too, so be sure to enable that as well. 6You will need policy routing too, so be sure to enable that as well.
7 7
8From Linux 4.18 transparent proxy support is also available in nf_tables.
8 9
91. Making non-local sockets work 101. Making non-local sockets work
10================================ 11================================
11 12
12The idea is that you identify packets with destination address matching a local 13The idea is that you identify packets with destination address matching a local
13socket on your box, set the packet mark to a certain value, and then match on that 14socket on your box, set the packet mark to a certain value:
14value using policy routing to have those packets delivered locally:
15 15
16# iptables -t mangle -N DIVERT 16# iptables -t mangle -N DIVERT
17# iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT 17# iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT
18# iptables -t mangle -A DIVERT -j MARK --set-mark 1 18# iptables -t mangle -A DIVERT -j MARK --set-mark 1
19# iptables -t mangle -A DIVERT -j ACCEPT 19# iptables -t mangle -A DIVERT -j ACCEPT
20 20
21Alternatively you can do this in nft with the following commands:
22
23# nft add table filter
24# nft add chain filter divert "{ type filter hook prerouting priority -150; }"
25# nft add rule filter divert meta l4proto tcp socket transparent 1 meta mark set 1 accept
26
27And then match on that value using policy routing to have those packets
28delivered locally:
29
21# ip rule add fwmark 1 lookup 100 30# ip rule add fwmark 1 lookup 100
22# ip route add local 0.0.0.0/0 dev lo table 100 31# ip route add local 0.0.0.0/0 dev lo table 100
23 32
@@ -57,17 +66,28 @@ add rules like this to the iptables ruleset above:
57# iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \ 66# iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \
58 --tproxy-mark 0x1/0x1 --on-port 50080 67 --tproxy-mark 0x1/0x1 --on-port 50080
59 68
69Or the following rule to nft:
70
71# nft add rule filter divert tcp dport 80 tproxy to :50080 meta mark set 1 accept
72
60Note that for this to work you'll have to modify the proxy to enable (SOL_IP, 73Note that for this to work you'll have to modify the proxy to enable (SOL_IP,
61IP_TRANSPARENT) for the listening socket. 74IP_TRANSPARENT) for the listening socket.
62 75
76As an example implementation, tcprdr is available here:
77https://git.breakpoint.cc/cgit/fw/tcprdr.git/
78This tool is written by Florian Westphal and it was used for testing during the
79nf_tables implementation.
63 80
643. Iptables extensions 813. Iptables and nf_tables extensions
65====================== 82====================================
66 83
67To use tproxy you'll need to have the 'socket' and 'TPROXY' modules 84To use tproxy you'll need to have the following modules compiled for iptables:
68compiled for iptables. A patched version of iptables is available 85 - NETFILTER_XT_MATCH_SOCKET
69here: http://git.balabit.hu/?p=bazsi/iptables-tproxy.git 86 - NETFILTER_XT_TARGET_TPROXY
70 87
88Or the floowing modules for nf_tables:
89 - NFT_SOCKET
90 - NFT_TPROXY
71 91
724. Application support 924. Application support
73====================== 93======================
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index a27cf6652327..fa928242567d 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -447,6 +447,11 @@ static inline clock_t jiffies_delta_to_clock_t(long delta)
447 return jiffies_to_clock_t(max(0L, delta)); 447 return jiffies_to_clock_t(max(0L, delta));
448} 448}
449 449
450static inline unsigned int jiffies_delta_to_msecs(long delta)
451{
452 return jiffies_to_msecs(max(0L, delta));
453}
454
450extern unsigned long clock_t_to_jiffies(unsigned long x); 455extern unsigned long clock_t_to_jiffies(unsigned long x);
451extern u64 jiffies_64_to_clock_t(u64 x); 456extern u64 jiffies_64_to_clock_t(u64 x);
452extern u64 nsec_to_clock_t(u64 x); 457extern u64 nsec_to_clock_t(u64 x);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index dc417ef0a0c5..0f39ac487012 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -274,7 +274,7 @@ enum nft_set_class {
274 * @space: memory class 274 * @space: memory class
275 */ 275 */
276struct nft_set_estimate { 276struct nft_set_estimate {
277 unsigned int size; 277 u64 size;
278 enum nft_set_class lookup; 278 enum nft_set_class lookup;
279 enum nft_set_class space; 279 enum nft_set_class space;
280}; 280};
@@ -336,7 +336,7 @@ struct nft_set_ops {
336 const struct nft_set_elem *elem, 336 const struct nft_set_elem *elem,
337 unsigned int flags); 337 unsigned int flags);
338 338
339 unsigned int (*privsize)(const struct nlattr * const nla[], 339 u64 (*privsize)(const struct nlattr * const nla[],
340 const struct nft_set_desc *desc); 340 const struct nft_set_desc *desc);
341 bool (*estimate)(const struct nft_set_desc *desc, 341 bool (*estimate)(const struct nft_set_desc *desc,
342 u32 features, 342 u32 features,
@@ -1374,6 +1374,6 @@ struct nft_trans_flowtable {
1374 (((struct nft_trans_flowtable *)trans->data)->flowtable) 1374 (((struct nft_trans_flowtable *)trans->data)->flowtable)
1375 1375
1376int __init nft_chain_filter_init(void); 1376int __init nft_chain_filter_init(void);
1377void __exit nft_chain_filter_fini(void); 1377void nft_chain_filter_fini(void);
1378 1378
1379#endif /* _NET_NF_TABLES_H */ 1379#endif /* _NET_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink_osf.h b/include/uapi/linux/netfilter/nfnetlink_osf.h
index 76a3527df5dd..272bc3195f2d 100644
--- a/include/uapi/linux/netfilter/nfnetlink_osf.h
+++ b/include/uapi/linux/netfilter/nfnetlink_osf.h
@@ -2,6 +2,8 @@
2#define _NF_OSF_H 2#define _NF_OSF_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/ip.h>
6#include <linux/tcp.h>
5 7
6#define MAXGENRELEN 32 8#define MAXGENRELEN 32
7 9
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index 24102b5286ec..6e466236ca4b 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -21,8 +21,6 @@
21#define _XT_OSF_H 21#define _XT_OSF_H
22 22
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/ip.h>
25#include <linux/tcp.h>
26#include <linux/netfilter/nfnetlink_osf.h> 24#include <linux/netfilter/nfnetlink_osf.h>
27 25
28#define XT_OSF_GENRE NF_OSF_GENRE 26#define XT_OSF_GENRE NF_OSF_GENRE
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 0fe61ede77c6..c3c6b09acdc4 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -26,6 +26,12 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr)
26 return addr_type & IPV6_ADDR_UNICAST; 26 return addr_type & IPV6_ADDR_UNICAST;
27} 27}
28 28
29static bool rpfilter_addr_linklocal(const struct in6_addr *addr)
30{
31 int addr_type = ipv6_addr_type(addr);
32 return addr_type & IPV6_ADDR_LINKLOCAL;
33}
34
29static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, 35static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
30 const struct net_device *dev, u8 flags) 36 const struct net_device *dev, u8 flags)
31{ 37{
@@ -48,7 +54,11 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
48 } 54 }
49 55
50 fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; 56 fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
51 if ((flags & XT_RPFILTER_LOOSE) == 0) 57
58 if (rpfilter_addr_linklocal(&iph->saddr)) {
59 lookup_flags |= RT6_LOOKUP_F_IFACE;
60 fl6.flowi6_oif = dev->ifindex;
61 } else if ((flags & XT_RPFILTER_LOOSE) == 0)
52 fl6.flowi6_oif = dev->ifindex; 62 fl6.flowi6_oif = dev->ifindex;
53 63
54 rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); 64 rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 0edc62910ebf..5b2b17867cb1 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1117,24 +1117,28 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
1117#ifdef CONFIG_IP_VS_IPV6 1117#ifdef CONFIG_IP_VS_IPV6
1118 if (cp->af == AF_INET6) 1118 if (cp->af == AF_INET6)
1119 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X " 1119 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
1120 "%s %04X %-11s %7lu%s\n", 1120 "%s %04X %-11s %7u%s\n",
1121 ip_vs_proto_name(cp->protocol), 1121 ip_vs_proto_name(cp->protocol),
1122 &cp->caddr.in6, ntohs(cp->cport), 1122 &cp->caddr.in6, ntohs(cp->cport),
1123 &cp->vaddr.in6, ntohs(cp->vport), 1123 &cp->vaddr.in6, ntohs(cp->vport),
1124 dbuf, ntohs(cp->dport), 1124 dbuf, ntohs(cp->dport),
1125 ip_vs_state_name(cp), 1125 ip_vs_state_name(cp),
1126 (cp->timer.expires-jiffies)/HZ, pe_data); 1126 jiffies_delta_to_msecs(cp->timer.expires -
1127 jiffies) / 1000,
1128 pe_data);
1127 else 1129 else
1128#endif 1130#endif
1129 seq_printf(seq, 1131 seq_printf(seq,
1130 "%-3s %08X %04X %08X %04X" 1132 "%-3s %08X %04X %08X %04X"
1131 " %s %04X %-11s %7lu%s\n", 1133 " %s %04X %-11s %7u%s\n",
1132 ip_vs_proto_name(cp->protocol), 1134 ip_vs_proto_name(cp->protocol),
1133 ntohl(cp->caddr.ip), ntohs(cp->cport), 1135 ntohl(cp->caddr.ip), ntohs(cp->cport),
1134 ntohl(cp->vaddr.ip), ntohs(cp->vport), 1136 ntohl(cp->vaddr.ip), ntohs(cp->vport),
1135 dbuf, ntohs(cp->dport), 1137 dbuf, ntohs(cp->dport),
1136 ip_vs_state_name(cp), 1138 ip_vs_state_name(cp),
1137 (cp->timer.expires-jiffies)/HZ, pe_data); 1139 jiffies_delta_to_msecs(cp->timer.expires -
1140 jiffies) / 1000,
1141 pe_data);
1138 } 1142 }
1139 return 0; 1143 return 0;
1140} 1144}
@@ -1179,26 +1183,28 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
1179#ifdef CONFIG_IP_VS_IPV6 1183#ifdef CONFIG_IP_VS_IPV6
1180 if (cp->af == AF_INET6) 1184 if (cp->af == AF_INET6)
1181 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X " 1185 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
1182 "%s %04X %-11s %-6s %7lu\n", 1186 "%s %04X %-11s %-6s %7u\n",
1183 ip_vs_proto_name(cp->protocol), 1187 ip_vs_proto_name(cp->protocol),
1184 &cp->caddr.in6, ntohs(cp->cport), 1188 &cp->caddr.in6, ntohs(cp->cport),
1185 &cp->vaddr.in6, ntohs(cp->vport), 1189 &cp->vaddr.in6, ntohs(cp->vport),
1186 dbuf, ntohs(cp->dport), 1190 dbuf, ntohs(cp->dport),
1187 ip_vs_state_name(cp), 1191 ip_vs_state_name(cp),
1188 ip_vs_origin_name(cp->flags), 1192 ip_vs_origin_name(cp->flags),
1189 (cp->timer.expires-jiffies)/HZ); 1193 jiffies_delta_to_msecs(cp->timer.expires -
1194 jiffies) / 1000);
1190 else 1195 else
1191#endif 1196#endif
1192 seq_printf(seq, 1197 seq_printf(seq,
1193 "%-3s %08X %04X %08X %04X " 1198 "%-3s %08X %04X %08X %04X "
1194 "%s %04X %-11s %-6s %7lu\n", 1199 "%s %04X %-11s %-6s %7u\n",
1195 ip_vs_proto_name(cp->protocol), 1200 ip_vs_proto_name(cp->protocol),
1196 ntohl(cp->caddr.ip), ntohs(cp->cport), 1201 ntohl(cp->caddr.ip), ntohs(cp->cport),
1197 ntohl(cp->vaddr.ip), ntohs(cp->vport), 1202 ntohl(cp->vaddr.ip), ntohs(cp->vport),
1198 dbuf, ntohs(cp->dport), 1203 dbuf, ntohs(cp->dport),
1199 ip_vs_state_name(cp), 1204 ip_vs_state_name(cp),
1200 ip_vs_origin_name(cp->flags), 1205 ip_vs_origin_name(cp->flags),
1201 (cp->timer.expires-jiffies)/HZ); 1206 jiffies_delta_to_msecs(cp->timer.expires -
1207 jiffies) / 1000);
1202 } 1208 }
1203 return 0; 1209 return 0;
1204} 1210}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 0679dd101e72..7ca926a03b81 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1972,13 +1972,20 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1972 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1972 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
1973 /* the destination server is not available */ 1973 /* the destination server is not available */
1974 1974
1975 if (sysctl_expire_nodest_conn(ipvs)) { 1975 __u32 flags = cp->flags;
1976
1977 /* when timer already started, silently drop the packet.*/
1978 if (timer_pending(&cp->timer))
1979 __ip_vs_conn_put(cp);
1980 else
1981 ip_vs_conn_put(cp);
1982
1983 if (sysctl_expire_nodest_conn(ipvs) &&
1984 !(flags & IP_VS_CONN_F_ONE_PACKET)) {
1976 /* try to expire the connection immediately */ 1985 /* try to expire the connection immediately */
1977 ip_vs_conn_expire_now(cp); 1986 ip_vs_conn_expire_now(cp);
1978 } 1987 }
1979 /* don't restart its timer, and silently 1988
1980 drop the packet. */
1981 __ip_vs_conn_put(cp);
1982 return NF_DROP; 1989 return NF_DROP;
1983 } 1990 }
1984 1991
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f981bfa8db72..036207ecaf16 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -846,6 +846,21 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[])
846#endif 846#endif
847} 847}
848 848
849static int ctnetlink_start(struct netlink_callback *cb)
850{
851 const struct nlattr * const *cda = cb->data;
852 struct ctnetlink_filter *filter = NULL;
853
854 if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
855 filter = ctnetlink_alloc_filter(cda);
856 if (IS_ERR(filter))
857 return PTR_ERR(filter);
858 }
859
860 cb->data = filter;
861 return 0;
862}
863
849static int ctnetlink_filter_match(struct nf_conn *ct, void *data) 864static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
850{ 865{
851 struct ctnetlink_filter *filter = data; 866 struct ctnetlink_filter *filter = data;
@@ -1290,19 +1305,12 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
1290 1305
1291 if (nlh->nlmsg_flags & NLM_F_DUMP) { 1306 if (nlh->nlmsg_flags & NLM_F_DUMP) {
1292 struct netlink_dump_control c = { 1307 struct netlink_dump_control c = {
1308 .start = ctnetlink_start,
1293 .dump = ctnetlink_dump_table, 1309 .dump = ctnetlink_dump_table,
1294 .done = ctnetlink_done, 1310 .done = ctnetlink_done,
1311 .data = (void *)cda,
1295 }; 1312 };
1296 1313
1297 if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
1298 struct ctnetlink_filter *filter;
1299
1300 filter = ctnetlink_alloc_filter(cda);
1301 if (IS_ERR(filter))
1302 return PTR_ERR(filter);
1303
1304 c.data = filter;
1305 }
1306 return netlink_dump_start(ctnl, skb, nlh, &c); 1314 return netlink_dump_start(ctnl, skb, nlh, &c);
1307 } 1315 }
1308 1316
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 30070732ee50..9f14b0df6960 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -312,7 +312,9 @@ void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
312 __nf_ct_l4proto_unregister_one(l4proto); 312 __nf_ct_l4proto_unregister_one(l4proto);
313 mutex_unlock(&nf_ct_proto_mutex); 313 mutex_unlock(&nf_ct_proto_mutex);
314 314
315 synchronize_rcu(); 315 synchronize_net();
316 /* Remove all contrack entries for this protocol */
317 nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
316} 318}
317EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); 319EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
318 320
@@ -333,14 +335,17 @@ static void
333nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], 335nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
334 unsigned int num_proto) 336 unsigned int num_proto)
335{ 337{
338 int i;
339
336 mutex_lock(&nf_ct_proto_mutex); 340 mutex_lock(&nf_ct_proto_mutex);
337 while (num_proto-- != 0) 341 for (i = 0; i < num_proto; i++)
338 __nf_ct_l4proto_unregister_one(l4proto[num_proto]); 342 __nf_ct_l4proto_unregister_one(l4proto[i]);
339 mutex_unlock(&nf_ct_proto_mutex); 343 mutex_unlock(&nf_ct_proto_mutex);
340 344
341 synchronize_net(); 345 synchronize_net();
342 /* Remove all contrack entries for this protocol */ 346
343 nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); 347 for (i = 0; i < num_proto; i++)
348 nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]);
344} 349}
345 350
346static int 351static int
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 67cdd5c4f4f5..1dca5683f59f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3354,7 +3354,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
3354 struct nft_set *set; 3354 struct nft_set *set;
3355 struct nft_ctx ctx; 3355 struct nft_ctx ctx;
3356 char *name; 3356 char *name;
3357 unsigned int size; 3357 u64 size;
3358 u64 timeout; 3358 u64 timeout;
3359 u32 ktype, dtype, flags, policy, gc_int, objtype; 3359 u32 ktype, dtype, flags, policy, gc_int, objtype;
3360 struct nft_set_desc desc; 3360 struct nft_set_desc desc;
@@ -5925,10 +5925,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
5925 if (event != NETDEV_UNREGISTER) 5925 if (event != NETDEV_UNREGISTER)
5926 return 0; 5926 return 0;
5927 5927
5928 net = maybe_get_net(dev_net(dev)); 5928 net = dev_net(dev);
5929 if (!net)
5930 return 0;
5931
5932 mutex_lock(&net->nft.commit_mutex); 5929 mutex_lock(&net->nft.commit_mutex);
5933 list_for_each_entry(table, &net->nft.tables, list) { 5930 list_for_each_entry(table, &net->nft.tables, list) {
5934 list_for_each_entry(flowtable, &table->flowtables, list) { 5931 list_for_each_entry(flowtable, &table->flowtables, list) {
@@ -5936,7 +5933,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
5936 } 5933 }
5937 } 5934 }
5938 mutex_unlock(&net->nft.commit_mutex); 5935 mutex_unlock(&net->nft.commit_mutex);
5939 put_net(net); 5936
5940 return NOTIFY_DONE; 5937 return NOTIFY_DONE;
5941} 5938}
5942 5939
@@ -7273,21 +7270,36 @@ static int __init nf_tables_module_init(void)
7273{ 7270{
7274 int err; 7271 int err;
7275 7272
7276 nft_chain_filter_init(); 7273 err = register_pernet_subsys(&nf_tables_net_ops);
7274 if (err < 0)
7275 return err;
7276
7277 err = nft_chain_filter_init();
7278 if (err < 0)
7279 goto err1;
7277 7280
7278 err = nf_tables_core_module_init(); 7281 err = nf_tables_core_module_init();
7279 if (err < 0) 7282 if (err < 0)
7280 return err; 7283 goto err2;
7281 7284
7282 err = nfnetlink_subsys_register(&nf_tables_subsys); 7285 err = register_netdevice_notifier(&nf_tables_flowtable_notifier);
7283 if (err < 0) 7286 if (err < 0)
7284 goto err; 7287 goto err3;
7285 7288
7286 register_netdevice_notifier(&nf_tables_flowtable_notifier); 7289 /* must be last */
7290 err = nfnetlink_subsys_register(&nf_tables_subsys);
7291 if (err < 0)
7292 goto err4;
7287 7293
7288 return register_pernet_subsys(&nf_tables_net_ops); 7294 return err;
7289err: 7295err4:
7296 unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
7297err3:
7290 nf_tables_core_module_exit(); 7298 nf_tables_core_module_exit();
7299err2:
7300 nft_chain_filter_fini();
7301err1:
7302 unregister_pernet_subsys(&nf_tables_net_ops);
7291 return err; 7303 return err;
7292} 7304}
7293 7305
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index a0e5adf0b3b6..8fa8bf7c48e6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -238,29 +238,33 @@ static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
238 [NFACCT_FILTER_VALUE] = { .type = NLA_U32 }, 238 [NFACCT_FILTER_VALUE] = { .type = NLA_U32 },
239}; 239};
240 240
241static struct nfacct_filter * 241static int nfnl_acct_start(struct netlink_callback *cb)
242nfacct_filter_alloc(const struct nlattr * const attr)
243{ 242{
244 struct nfacct_filter *filter; 243 const struct nlattr *const attr = cb->data;
245 struct nlattr *tb[NFACCT_FILTER_MAX + 1]; 244 struct nlattr *tb[NFACCT_FILTER_MAX + 1];
245 struct nfacct_filter *filter;
246 int err; 246 int err;
247 247
248 if (!attr)
249 return 0;
250
248 err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy, 251 err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy,
249 NULL); 252 NULL);
250 if (err < 0) 253 if (err < 0)
251 return ERR_PTR(err); 254 return err;
252 255
253 if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE]) 256 if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE])
254 return ERR_PTR(-EINVAL); 257 return -EINVAL;
255 258
256 filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); 259 filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
257 if (!filter) 260 if (!filter)
258 return ERR_PTR(-ENOMEM); 261 return -ENOMEM;
259 262
260 filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK])); 263 filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
261 filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE])); 264 filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
265 cb->data = filter;
262 266
263 return filter; 267 return 0;
264} 268}
265 269
266static int nfnl_acct_get(struct net *net, struct sock *nfnl, 270static int nfnl_acct_get(struct net *net, struct sock *nfnl,
@@ -275,18 +279,11 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl,
275 if (nlh->nlmsg_flags & NLM_F_DUMP) { 279 if (nlh->nlmsg_flags & NLM_F_DUMP) {
276 struct netlink_dump_control c = { 280 struct netlink_dump_control c = {
277 .dump = nfnl_acct_dump, 281 .dump = nfnl_acct_dump,
282 .start = nfnl_acct_start,
278 .done = nfnl_acct_done, 283 .done = nfnl_acct_done,
284 .data = (void *)tb[NFACCT_FILTER],
279 }; 285 };
280 286
281 if (tb[NFACCT_FILTER]) {
282 struct nfacct_filter *filter;
283
284 filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
285 if (IS_ERR(filter))
286 return PTR_ERR(filter);
287
288 c.data = filter;
289 }
290 return netlink_dump_start(nfnl, skb, nlh, &c); 287 return netlink_dump_start(nfnl, skb, nlh, &c);
291 } 288 }
292 289
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index ea5b7c4944f6..3fd540b2c6ba 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -293,6 +293,13 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
293 if (strcmp(basechain->dev_name, dev->name) != 0) 293 if (strcmp(basechain->dev_name, dev->name) != 0)
294 return; 294 return;
295 295
296 /* UNREGISTER events are also happpening on netns exit.
297 *
298 * Altough nf_tables core releases all tables/chains, only
299 * this event handler provides guarantee that
300 * basechain.ops->dev is still accessible, so we cannot
301 * skip exiting net namespaces.
302 */
296 __nft_release_basechain(ctx); 303 __nft_release_basechain(ctx);
297 break; 304 break;
298 case NETDEV_CHANGENAME: 305 case NETDEV_CHANGENAME:
@@ -318,10 +325,6 @@ static int nf_tables_netdev_event(struct notifier_block *this,
318 event != NETDEV_CHANGENAME) 325 event != NETDEV_CHANGENAME)
319 return NOTIFY_DONE; 326 return NOTIFY_DONE;
320 327
321 ctx.net = maybe_get_net(ctx.net);
322 if (!ctx.net)
323 return NOTIFY_DONE;
324
325 mutex_lock(&ctx.net->nft.commit_mutex); 328 mutex_lock(&ctx.net->nft.commit_mutex);
326 list_for_each_entry(table, &ctx.net->nft.tables, list) { 329 list_for_each_entry(table, &ctx.net->nft.tables, list) {
327 if (table->family != NFPROTO_NETDEV) 330 if (table->family != NFPROTO_NETDEV)
@@ -338,7 +341,6 @@ static int nf_tables_netdev_event(struct notifier_block *this,
338 } 341 }
339 } 342 }
340 mutex_unlock(&ctx.net->nft.commit_mutex); 343 mutex_unlock(&ctx.net->nft.commit_mutex);
341 put_net(ctx.net);
342 344
343 return NOTIFY_DONE; 345 return NOTIFY_DONE;
344} 346}
@@ -392,7 +394,7 @@ int __init nft_chain_filter_init(void)
392 return 0; 394 return 0;
393} 395}
394 396
395void __exit nft_chain_filter_fini(void) 397void nft_chain_filter_fini(void)
396{ 398{
397 nft_chain_filter_bridge_fini(); 399 nft_chain_filter_bridge_fini();
398 nft_chain_filter_inet_fini(); 400 nft_chain_filter_inet_fini();
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 4855d4ce1c8f..26a8baebd072 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -832,12 +832,13 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
832 __u8 l4num; 832 __u8 l4num;
833 int ret; 833 int ret;
834 834
835 if (!tb[NFTA_CT_TIMEOUT_L3PROTO] || 835 if (!tb[NFTA_CT_TIMEOUT_L4PROTO] ||
836 !tb[NFTA_CT_TIMEOUT_L4PROTO] ||
837 !tb[NFTA_CT_TIMEOUT_DATA]) 836 !tb[NFTA_CT_TIMEOUT_DATA])
838 return -EINVAL; 837 return -EINVAL;
839 838
840 l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO])); 839 if (tb[NFTA_CT_TIMEOUT_L3PROTO])
840 l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO]));
841
841 l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]); 842 l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
842 priv->l4proto = l4num; 843 priv->l4proto = l4num;
843 844
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 81184c244d1a..6e91a37d57f2 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -187,8 +187,6 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
187 if (tb[NFTA_DYNSET_EXPR] != NULL) { 187 if (tb[NFTA_DYNSET_EXPR] != NULL) {
188 if (!(set->flags & NFT_SET_EVAL)) 188 if (!(set->flags & NFT_SET_EVAL))
189 return -EINVAL; 189 return -EINVAL;
190 if (!nft_set_is_anonymous(set))
191 return -EOPNOTSUPP;
192 190
193 priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); 191 priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
194 if (IS_ERR(priv->expr)) 192 if (IS_ERR(priv->expr))
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 128bc16f52dd..f866bd41e5d2 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -248,13 +248,13 @@ static inline u32 nft_bitmap_size(u32 klen)
248 return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1; 248 return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1;
249} 249}
250 250
251static inline u32 nft_bitmap_total_size(u32 klen) 251static inline u64 nft_bitmap_total_size(u32 klen)
252{ 252{
253 return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); 253 return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
254} 254}
255 255
256static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[], 256static u64 nft_bitmap_privsize(const struct nlattr * const nla[],
257 const struct nft_set_desc *desc) 257 const struct nft_set_desc *desc)
258{ 258{
259 u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); 259 u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
260 260
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 90c3e7e6cacb..015124e649cb 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -341,8 +341,8 @@ schedule:
341 nft_set_gc_interval(set)); 341 nft_set_gc_interval(set));
342} 342}
343 343
344static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], 344static u64 nft_rhash_privsize(const struct nlattr * const nla[],
345 const struct nft_set_desc *desc) 345 const struct nft_set_desc *desc)
346{ 346{
347 return sizeof(struct nft_rhash); 347 return sizeof(struct nft_rhash);
348} 348}
@@ -585,8 +585,8 @@ cont:
585 } 585 }
586} 586}
587 587
588static unsigned int nft_hash_privsize(const struct nlattr * const nla[], 588static u64 nft_hash_privsize(const struct nlattr * const nla[],
589 const struct nft_set_desc *desc) 589 const struct nft_set_desc *desc)
590{ 590{
591 return sizeof(struct nft_hash) + 591 return sizeof(struct nft_hash) +
592 nft_hash_buckets(desc->size) * sizeof(struct hlist_head); 592 nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9873d734b494..55e2d9215c0d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -411,8 +411,8 @@ static void nft_rbtree_gc(struct work_struct *work)
411 nft_set_gc_interval(set)); 411 nft_set_gc_interval(set));
412} 412}
413 413
414static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], 414static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
415 const struct nft_set_desc *desc) 415 const struct nft_set_desc *desc)
416{ 416{
417 return sizeof(struct nft_rbtree); 417 return sizeof(struct nft_rbtree);
418} 418}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index eff99dffc842..f92a82c73880 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -82,13 +82,15 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
82 const struct nft_tproxy *priv = nft_expr_priv(expr); 82 const struct nft_tproxy *priv = nft_expr_priv(expr);
83 struct sk_buff *skb = pkt->skb; 83 struct sk_buff *skb = pkt->skb;
84 const struct ipv6hdr *iph = ipv6_hdr(skb); 84 const struct ipv6hdr *iph = ipv6_hdr(skb);
85 struct in6_addr taddr = {0}; 85 struct in6_addr taddr;
86 int thoff = pkt->xt.thoff; 86 int thoff = pkt->xt.thoff;
87 struct udphdr _hdr, *hp; 87 struct udphdr _hdr, *hp;
88 __be16 tport = 0; 88 __be16 tport = 0;
89 struct sock *sk; 89 struct sock *sk;
90 int l4proto; 90 int l4proto;
91 91
92 memset(&taddr, 0, sizeof(taddr));
93
92 if (!pkt->tprot_set) { 94 if (!pkt->tprot_set) {
93 regs->verdict.code = NFT_BREAK; 95 regs->verdict.code = NFT_BREAK;
94 return; 96 return;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d0d8397c9588..aecadd471e1d 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1178,12 +1178,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
1178 if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE) 1178 if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE)
1179 return NULL; 1179 return NULL;
1180 1180
1181 /* __GFP_NORETRY is not fully supported by kvmalloc but it should 1181 info = kvmalloc(sz, GFP_KERNEL_ACCOUNT);
1182 * work reasonably well if sz is too large and bail out rather
1183 * than shoot all processes down before realizing there is nothing
1184 * more to reclaim.
1185 */
1186 info = kvmalloc(sz, GFP_KERNEL | __GFP_NORETRY);
1187 if (!info) 1182 if (!info)
1188 return NULL; 1183 return NULL;
1189 1184