From ee586bbc28fb7128133457cf711880d13a3b7ce4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 16 Feb 2015 18:54:04 +0100
Subject: netfilter: reject: don't send icmp error if csum is invalid

tcp resets are never emitted if the packet that triggers the
reject/reset has an invalid checksum.

For icmp error responses there was no such check.
It allows to distinguish icmp response generated via

iptables -I INPUT -p udp --dport 42 -j REJECT

and those emitted by network stack (won't respond if csum is invalid,
REJECT does).

Arguably its possible to avoid this by using conntrack and only
using REJECT with -m conntrack NEW/RELATED.

However, this doesn't work when connection tracking is not in use
or when using nf_conntrack_checksum=0.

Furthermore, sending errors in response to invalid csums doesn't make
much sense so just add similar test as in nf_send_reset.

Validate csum if needed and only send the response if it is ok.

Reference: http://bugzilla.redhat.com/show_bug.cgi?id=1169829
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_REJECT.c      | 17 +++++++++--------
 net/ipv4/netfilter/nf_reject_ipv4.c  | 23 +++++++++++++++++++++++
 net/ipv4/netfilter/nft_reject_ipv4.c |  3 ++-
 net/ipv6/netfilter/nf_reject_ipv6.c  | 35 +++++++++++++++++++++++++++++++++++
 net/netfilter/nft_reject_inet.c      |  6 ++++--
 5 files changed, 73 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 8f48f5517e33..87907d4bd259 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -34,31 +34,32 @@ static unsigned int
 reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
+	int hook = par->hooknum;
 
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
-		nf_send_unreach(skb, ICMP_NET_UNREACH);
+		nf_send_unreach(skb, ICMP_NET_UNREACH, hook);
 		break;
 	case IPT_ICMP_HOST_UNREACHABLE:
-		nf_send_unreach(skb, ICMP_HOST_UNREACH);
+		nf_send_unreach(skb, ICMP_HOST_UNREACH, hook);
 		break;
 	case IPT_ICMP_PROT_UNREACHABLE:
-		nf_send_unreach(skb, ICMP_PROT_UNREACH);
+		nf_send_unreach(skb, ICMP_PROT_UNREACH, hook);
 		break;
 	case IPT_ICMP_PORT_UNREACHABLE:
-		nf_send_unreach(skb, ICMP_PORT_UNREACH);
+		nf_send_unreach(skb, ICMP_PORT_UNREACH, hook);
 		break;
 	case IPT_ICMP_NET_PROHIBITED:
-		nf_send_unreach(skb, ICMP_NET_ANO);
+		nf_send_unreach(skb, ICMP_NET_ANO, hook);
 		break;
 	case IPT_ICMP_HOST_PROHIBITED:
-		nf_send_unreach(skb, ICMP_HOST_ANO);
+		nf_send_unreach(skb, ICMP_HOST_ANO, hook);
 		break;
 	case IPT_ICMP_ADMIN_PROHIBITED:
-		nf_send_unreach(skb, ICMP_PKT_FILTERED);
+		nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
 		break;
 	case IPT_TCP_RESET:
-		nf_send_reset(skb, par->hooknum);
+		nf_send_reset(skb, hook);
 	case IPT_ICMP_ECHOREPLY:
 		/* Doesn't happen. */
 		break;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 536da7bc598a..b7405eb7f1ef 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -164,4 +164,27 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
 }
 EXPORT_SYMBOL_GPL(nf_send_reset);
 
+void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
+{
+	struct iphdr *iph = ip_hdr(skb_in);
+	u8 proto;
+
+	if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+		return;
+
+	if (skb_csum_unnecessary(skb_in)) {
+		icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+		return;
+	}
+
+	if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
+		proto = iph->protocol;
+	else
+		proto = 0;
+
+	if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
+		icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach);
+
 MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index d729542bd1b7..16a5d4d73d75 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,7 +27,8 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
 
 	switch (priv->type) {
 	case NFT_REJECT_ICMP_UNREACH:
-		nf_send_unreach(pkt->skb, priv->icmp_code);
+		nf_send_unreach(pkt->skb, priv->icmp_code,
+				pkt->ops->hooknum);
 		break;
 	case NFT_REJECT_TCP_RST:
 		nf_send_reset(pkt->skb, pkt->ops->hooknum);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index d05b36440e8b..68e0bb4db1bf 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -208,4 +208,39 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
 }
 EXPORT_SYMBOL_GPL(nf_send_reset6);
 
+static bool reject6_csum_ok(struct sk_buff *skb, int hook)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	int thoff;
+	__be16 fo;
+	u8 proto;
+
+	if (skb->csum_bad)
+		return false;
+
+	if (skb_csum_unnecessary(skb))
+		return true;
+
+	proto = ip6h->nexthdr;
+	thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+
+	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+		return false;
+
+	return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
+}
+
+void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
+		      unsigned char code, unsigned int hooknum)
+{
+	if (!reject6_csum_ok(skb_in, hooknum))
+		return;
+
+	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
+		skb_in->dev = net->loopback_dev;
+
+	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach6);
+
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 7b5f9d58680a..92877114aff4 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -28,14 +28,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
 	case NFPROTO_IPV4:
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
-			nf_send_unreach(pkt->skb, priv->icmp_code);
+			nf_send_unreach(pkt->skb, priv->icmp_code,
+					pkt->ops->hooknum);
 			break;
 		case NFT_REJECT_TCP_RST:
 			nf_send_reset(pkt->skb, pkt->ops->hooknum);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
 			nf_send_unreach(pkt->skb,
-					nft_reject_icmp_code(priv->icmp_code));
+					nft_reject_icmp_code(priv->icmp_code),
+					pkt->ops->hooknum);
 			break;
 		}
 		break;
-- 
cgit v1.2.2


From 72500bc11e4da570bd66c0965e2dc74f52c8aba2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 16 Feb 2015 18:57:53 +0100
Subject: netfilter: bridge: rework reject handling

bridge reject handling is not straightforward, there are many subtle
differences depending on configuration.

skb->dev is either the bridge port (PRE_ROUTING) or the bridge
itself (INPUT), so we need to use indev instead.

Also, checksum validation will only work reliably if we trim skb
according to the l3 header size.

While at it, add csum validation for ipv6 and skip existing tests
if skb was already checked e.g. by GRO.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/nft_reject_bridge.c | 84 +++++++++++++++++++++++++-------
 1 file changed, 66 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 3244aead0926..5c6c96585acd 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -21,6 +21,7 @@
 #include <net/ip.h>
 #include <net/ip6_checksum.h>
 #include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv6.h>
 #include "../br_private.h"
 
 static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
@@ -36,7 +37,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
 	skb_pull(nskb, ETH_HLEN);
 }
 
-static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook)
+/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
+ * or the bridge port (NF_BRIDGE PREROUTING).
+ */
+static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
+					    const struct net_device *dev,
+					    int hook)
 {
 	struct sk_buff *nskb;
 	struct iphdr *niph;
@@ -65,11 +71,12 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook)
 
 	nft_reject_br_push_etherhdr(oldskb, nskb);
 
-	br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+	br_deliver(br_port_get_rcu(dev), nskb);
 }
 
-static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
-					  u8 code)
+static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
+					  const struct net_device *dev,
+					  int hook, u8 code)
 {
 	struct sk_buff *nskb;
 	struct iphdr *niph;
@@ -77,8 +84,9 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
 	unsigned int len;
 	void *payload;
 	__wsum csum;
+	u8 proto;
 
-	if (!nft_bridge_iphdr_validate(oldskb))
+	if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
 		return;
 
 	/* IP header checks: fragment. */
@@ -91,7 +99,17 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
 	if (!pskb_may_pull(oldskb, len))
 		return;
 
-	if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0))
+	if (pskb_trim_rcsum(oldskb, htons(ip_hdr(oldskb)->tot_len)))
+		return;
+
+	if (ip_hdr(oldskb)->protocol == IPPROTO_TCP ||
+	    ip_hdr(oldskb)->protocol == IPPROTO_UDP)
+		proto = ip_hdr(oldskb)->protocol;
+	else
+		proto = 0;
+
+	if (!skb_csum_unnecessary(oldskb) &&
+	    nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
 		return;
 
 	nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) +
@@ -120,11 +138,13 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
 
 	nft_reject_br_push_etherhdr(oldskb, nskb);
 
-	br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+	br_deliver(br_port_get_rcu(dev), nskb);
 }
 
 static void nft_reject_br_send_v6_tcp_reset(struct net *net,
-					    struct sk_buff *oldskb, int hook)
+					    struct sk_buff *oldskb,
+					    const struct net_device *dev,
+					    int hook)
 {
 	struct sk_buff *nskb;
 	const struct tcphdr *oth;
@@ -152,12 +172,37 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
 
 	nft_reject_br_push_etherhdr(oldskb, nskb);
 
-	br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+	br_deliver(br_port_get_rcu(dev), nskb);
+}
+
+static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	int thoff;
+	__be16 fo;
+	u8 proto = ip6h->nexthdr;
+
+	if (skb->csum_bad)
+		return false;
+
+	if (skb_csum_unnecessary(skb))
+		return true;
+
+	if (ip6h->payload_len &&
+	    pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h)))
+		return false;
+
+	thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+		return false;
+
+	return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
 }
 
 static void nft_reject_br_send_v6_unreach(struct net *net,
-					  struct sk_buff *oldskb, int hook,
-					  u8 code)
+					  struct sk_buff *oldskb,
+					  const struct net_device *dev,
+					  int hook, u8 code)
 {
 	struct sk_buff *nskb;
 	struct ipv6hdr *nip6h;
@@ -176,6 +221,9 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
 	if (!pskb_may_pull(oldskb, len))
 		return;
 
+	if (!reject6_br_csum_ok(oldskb, hook))
+		return;
+
 	nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) +
 			 LL_MAX_HEADER + len, GFP_ATOMIC);
 	if (!nskb)
@@ -205,7 +253,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
 
 	nft_reject_br_push_etherhdr(oldskb, nskb);
 
-	br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+	br_deliver(br_port_get_rcu(dev), nskb);
 }
 
 static void nft_reject_bridge_eval(const struct nft_expr *expr,
@@ -224,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
 	case htons(ETH_P_IP):
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
-			nft_reject_br_send_v4_unreach(pkt->skb,
+			nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
 						      pkt->ops->hooknum,
 						      priv->icmp_code);
 			break;
 		case NFT_REJECT_TCP_RST:
-			nft_reject_br_send_v4_tcp_reset(pkt->skb,
+			nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in,
 							pkt->ops->hooknum);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
-			nft_reject_br_send_v4_unreach(pkt->skb,
+			nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
 						      pkt->ops->hooknum,
 						      nft_reject_icmp_code(priv->icmp_code));
 			break;
@@ -242,16 +290,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
 	case htons(ETH_P_IPV6):
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
-			nft_reject_br_send_v6_unreach(net, pkt->skb,
+			nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
 						      pkt->ops->hooknum,
 						      priv->icmp_code);
 			break;
 		case NFT_REJECT_TCP_RST:
-			nft_reject_br_send_v6_tcp_reset(net, pkt->skb,
+			nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in,
 							pkt->ops->hooknum);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
-			nft_reject_br_send_v6_unreach(net, pkt->skb,
+			nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
 						      pkt->ops->hooknum,
 						      nft_reject_icmpv6_code(priv->icmp_code));
 			break;
-- 
cgit v1.2.2


From 43270b1bc5f1e33522dacf3d3b9175c29404c36c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 2 Mar 2015 14:40:39 +0100
Subject: netfilter: ipt_CLUSTERIP: deprecate it in favour of xt_cluster

xt_cluster supersedes ipt_CLUSTERIP since it can be also used in
gateway configurations (not only from the backend side).

ipt_CLUSTER is also known to leak the netdev that it uses on
device removal, which requires a rather large fix to workaround
the problem: http://patchwork.ozlabs.org/patch/358629/

So let's deprecate this so we can probably kill code this in the
future.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e90f83a3415b..f75e9df5e017 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -418,6 +418,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
+
+	if (!par->net->xt.clusterip_deprecated_warning) {
+		pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
+			"use xt_cluster instead\n");
+		par->net->xt.clusterip_deprecated_warning = true;
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.2


From 01ef16c2dd2e9a77fbd94eb0314c4787ab8f7113 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:10:04 +0000
Subject: netfilter: nf_tables: minor tracing cleanups

The tracing code is squeezed between multiple related parts of the
evaluation code, move it out. Also add an inline wrapper for the
reoccuring test for skb->nf_trace.

Small code savings in nft_do_chain():

  nft_trace_packet          | -137
  nft_do_chain              |   -8
 2 functions changed, 145 bytes removed, diff: -145

net/netfilter/nf_tables_core.c:
  __nft_trace_packet | +137
 1 function changed, 137 bytes added, diff: +137

net/netfilter/nf_tables_core.o:
 3 functions changed, 137 bytes added, 145 bytes removed, diff: -8

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_core.c | 98 +++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 3b90eb2b2c55..074067d4fc1e 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -21,6 +21,48 @@
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_log.h>
 
+enum nft_trace {
+	NFT_TRACE_RULE,
+	NFT_TRACE_RETURN,
+	NFT_TRACE_POLICY,
+};
+
+static const char *const comments[] = {
+	[NFT_TRACE_RULE]	= "rule",
+	[NFT_TRACE_RETURN]	= "return",
+	[NFT_TRACE_POLICY]	= "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+	.type = NF_LOG_TYPE_LOG,
+	.u = {
+		.log = {
+			.level = 4,
+			.logflags = NF_LOG_MASK,
+	        },
+	},
+};
+
+static void __nft_trace_packet(const struct nft_pktinfo *pkt,
+			       const struct nft_chain *chain,
+			       int rulenum, enum nft_trace type)
+{
+	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+	nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+		      pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+		      chain->table->name, chain->name, comments[type],
+		      rulenum);
+}
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+				    const struct nft_chain *chain,
+				    int rulenum, enum nft_trace type)
+{
+	if (unlikely(pkt->skb->nf_trace))
+		__nft_trace_packet(pkt, chain, rulenum, type);
+}
+
 static void nft_cmp_fast_eval(const struct nft_expr *expr,
 			      struct nft_data data[NFT_REG_MAX + 1])
 {
@@ -66,40 +108,6 @@ struct nft_jumpstack {
 	int			rulenum;
 };
 
-enum nft_trace {
-	NFT_TRACE_RULE,
-	NFT_TRACE_RETURN,
-	NFT_TRACE_POLICY,
-};
-
-static const char *const comments[] = {
-	[NFT_TRACE_RULE]	= "rule",
-	[NFT_TRACE_RETURN]	= "return",
-	[NFT_TRACE_POLICY]	= "policy",
-};
-
-static struct nf_loginfo trace_loginfo = {
-	.type = NF_LOG_TYPE_LOG,
-	.u = {
-		.log = {
-			.level = 4,
-			.logflags = NF_LOG_MASK,
-	        },
-	},
-};
-
-static void nft_trace_packet(const struct nft_pktinfo *pkt,
-			     const struct nft_chain *chain,
-			     int rulenum, enum nft_trace type)
-{
-	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
-
-	nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
-		      pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
-		      chain->table->name, chain->name, comments[type],
-		      rulenum);
-}
-
 unsigned int
 nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
 {
@@ -146,8 +154,7 @@ next_rule:
 			data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
 			continue;
 		case NFT_CONTINUE:
-			if (unlikely(pkt->skb->nf_trace))
-				nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
 			continue;
 		}
 		break;
@@ -157,16 +164,13 @@ next_rule:
 	case NF_ACCEPT:
 	case NF_DROP:
 	case NF_QUEUE:
-		if (unlikely(pkt->skb->nf_trace))
-			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
+		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
 		return data[NFT_REG_VERDICT].verdict;
 	}
 
 	switch (data[NFT_REG_VERDICT].verdict) {
 	case NFT_JUMP:
-		if (unlikely(pkt->skb->nf_trace))
-			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
 
 		BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
 		jumpstack[stackptr].chain = chain;
@@ -176,18 +180,15 @@ next_rule:
 		chain = data[NFT_REG_VERDICT].chain;
 		goto do_chain;
 	case NFT_GOTO:
-		if (unlikely(pkt->skb->nf_trace))
-			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
 
 		chain = data[NFT_REG_VERDICT].chain;
 		goto do_chain;
 	case NFT_RETURN:
-		if (unlikely(pkt->skb->nf_trace))
-			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
+		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
 		break;
 	case NFT_CONTINUE:
-		if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN)))
-			nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
+		nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
 		break;
 	default:
 		WARN_ON(1);
@@ -201,8 +202,7 @@ next_rule:
 		goto next_rule;
 	}
 
-	if (unlikely(pkt->skb->nf_trace))
-		nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+	nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
 
 	rcu_read_lock_bh();
 	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
-- 
cgit v1.2.2


From 354bf5a0d794a34dc98ed25e72f460b3b360c174 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:10:05 +0000
Subject: netfilter: nf_tables: consolidate tracing invocations

* JUMP and GOTO are equivalent except for JUMP pushing the current
  context to the stack

* RETURN and implicit RETURN (CONTINUE) are equivalent except that
  the logged rule number differs

Result:

  nft_do_chain              | -112
 1 function changed, 112 bytes removed, diff: -112

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_core.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 074067d4fc1e..77165bf023f3 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -170,26 +170,23 @@ next_rule:
 
 	switch (data[NFT_REG_VERDICT].verdict) {
 	case NFT_JUMP:
-		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
 		BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
 		jumpstack[stackptr].chain = chain;
 		jumpstack[stackptr].rule  = rule;
 		jumpstack[stackptr].rulenum = rulenum;
 		stackptr++;
-		chain = data[NFT_REG_VERDICT].chain;
-		goto do_chain;
+		/* fall through */
 	case NFT_GOTO:
 		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
 
 		chain = data[NFT_REG_VERDICT].chain;
 		goto do_chain;
+	case NFT_CONTINUE:
+		rulenum++;
+		/* fall through */
 	case NFT_RETURN:
 		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
 		break;
-	case NFT_CONTINUE:
-		nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
-		break;
 	default:
 		WARN_ON(1);
 	}
-- 
cgit v1.2.2


From f04e599e20d7ee9b9e5069c7d1ff59c21b9bf4c2 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 5 Mar 2015 14:56:15 +0100
Subject: netfilter: nf_tables: consolidate Kconfig options

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig | 38 +++++++++++++++++++++-----------------
 net/ipv6/netfilter/Kconfig | 18 +++++++++++-------
 net/netfilter/Kconfig      | 20 +++++---------------
 3 files changed, 37 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 59f883d9cadf..fb20f363151f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,24 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT
 
 	  If unsure, say Y.
 
-config NF_LOG_ARP
-	tristate "ARP packet logging"
-	default m if NETFILTER_ADVANCED=n
-	select NF_LOG_COMMON
-
-config NF_LOG_IPV4
-	tristate "IPv4 packet logging"
-	default m if NETFILTER_ADVANCED=n
-	select NF_LOG_COMMON
+if NF_TABLES
 
 config NF_TABLES_IPV4
-	depends on NF_TABLES
 	tristate "IPv4 nf_tables support"
 	help
 	  This option enables the IPv4 support for nf_tables.
 
+if NF_TABLES_IPV4
+
 config NFT_CHAIN_ROUTE_IPV4
-	depends on NF_TABLES_IPV4
 	tristate "IPv4 nf_tables route chain support"
 	help
 	  This option enables the "route" chain for IPv4 in nf_tables. This
@@ -61,22 +53,34 @@ config NFT_CHAIN_ROUTE_IPV4
 	  fields such as the source, destination, type of service and
 	  the packet mark.
 
-config NF_REJECT_IPV4
-	tristate "IPv4 packet rejection"
-	default m if NETFILTER_ADVANCED=n
-
 config NFT_REJECT_IPV4
-	depends on NF_TABLES_IPV4
 	select NF_REJECT_IPV4
 	default NFT_REJECT
 	tristate
 
+endif # NF_TABLES_IPV4
+
 config NF_TABLES_ARP
-	depends on NF_TABLES
 	tristate "ARP nf_tables support"
 	help
 	  This option enables the ARP support for nf_tables.
 
+endif # NF_TABLES
+
+config NF_LOG_ARP
+	tristate "ARP packet logging"
+	default m if NETFILTER_ADVANCED=n
+	select NF_LOG_COMMON
+
+config NF_LOG_IPV4
+	tristate "IPv4 packet logging"
+	default m if NETFILTER_ADVANCED=n
+	select NF_LOG_COMMON
+
+config NF_REJECT_IPV4
+	tristate "IPv4 packet rejection"
+	default m if NETFILTER_ADVANCED=n
+
 config NF_NAT_IPV4
 	tristate "IPv4 NAT"
 	depends on NF_CONNTRACK_IPV4
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a069822936e6..ca6998345b42 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,14 +25,16 @@ config NF_CONNTRACK_IPV6
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+if NF_TABLES
+
 config NF_TABLES_IPV6
-	depends on NF_TABLES
 	tristate "IPv6 nf_tables support"
 	help
 	  This option enables the IPv6 support for nf_tables.
 
+if NF_TABLES_IPV6
+
 config NFT_CHAIN_ROUTE_IPV6
-	depends on NF_TABLES_IPV6
 	tristate "IPv6 nf_tables route chain support"
 	help
 	  This option enables the "route" chain for IPv6 in nf_tables. This
@@ -40,16 +42,18 @@ config NFT_CHAIN_ROUTE_IPV6
 	  fields such as the source, destination, flowlabel, hop-limit and
 	  the packet mark.
 
-config NF_REJECT_IPV6
-	tristate "IPv6 packet rejection"
-	default m if NETFILTER_ADVANCED=n
-
 config NFT_REJECT_IPV6
-	depends on NF_TABLES_IPV6
 	select NF_REJECT_IPV6
 	default NFT_REJECT
 	tristate
 
+endif # NF_TABLES_IPV6
+endif # NF_TABLES
+
+config NF_REJECT_IPV6
+	tristate "IPv6 packet rejection"
+	default m if NETFILTER_ADVANCED=n
+
 config NF_LOG_IPV6
 	tristate "IPv6 packet logging"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c68c3b441381..971cd7526f4b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -438,8 +438,10 @@ config NF_TABLES
 
 	  To compile it as a module, choose M here.
 
+if NF_TABLES
+
 config NF_TABLES_INET
-	depends on NF_TABLES && IPV6
+	depends on IPV6
 	select NF_TABLES_IPV4
 	select NF_TABLES_IPV6
 	tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support"
@@ -447,21 +449,18 @@ config NF_TABLES_INET
 	  This option enables support for a mixed IPv4/IPv6 "inet" table.
 
 config NFT_EXTHDR
-	depends on NF_TABLES
 	tristate "Netfilter nf_tables IPv6 exthdr module"
 	help
 	  This option adds the "exthdr" expression that you can use to match
 	  IPv6 extension headers.
 
 config NFT_META
-	depends on NF_TABLES
 	tristate "Netfilter nf_tables meta module"
 	help
 	  This option adds the "meta" expression that you can use to match and
 	  to set packet metainformation such as the packet mark.
 
 config NFT_CT
-	depends on NF_TABLES
 	depends on NF_CONNTRACK
 	tristate "Netfilter nf_tables conntrack module"
 	help
@@ -469,42 +468,36 @@ config NFT_CT
 	  connection tracking information such as the flow state.
 
 config NFT_RBTREE
-	depends on NF_TABLES
 	tristate "Netfilter nf_tables rbtree set module"
 	help
 	  This option adds the "rbtree" set type (Red Black tree) that is used
 	  to build interval-based sets.
 
 config NFT_HASH
-	depends on NF_TABLES
 	tristate "Netfilter nf_tables hash set module"
 	help
 	  This option adds the "hash" set type that is used to build one-way
 	  mappings between matchings and actions.
 
 config NFT_COUNTER
-	depends on NF_TABLES
 	tristate "Netfilter nf_tables counter module"
 	help
 	  This option adds the "counter" expression that you can use to
 	  include packet and byte counters in a rule.
 
 config NFT_LOG
-	depends on NF_TABLES
 	tristate "Netfilter nf_tables log module"
 	help
 	  This option adds the "log" expression that you can use to log
 	  packets matching some criteria.
 
 config NFT_LIMIT
-	depends on NF_TABLES
 	tristate "Netfilter nf_tables limit module"
 	help
 	  This option adds the "limit" expression that you can use to
 	  ratelimit rule matchings.
 
 config NFT_MASQ
-	depends on NF_TABLES
 	depends on NF_CONNTRACK
 	depends on NF_NAT
 	tristate "Netfilter nf_tables masquerade support"
@@ -513,7 +506,6 @@ config NFT_MASQ
 	  to perform NAT in the masquerade flavour.
 
 config NFT_REDIR
-	depends on NF_TABLES
 	depends on NF_CONNTRACK
 	depends on NF_NAT
 	tristate "Netfilter nf_tables redirect support"
@@ -522,7 +514,6 @@ config NFT_REDIR
 	  to perform NAT in the redirect flavour.
 
 config NFT_NAT
-	depends on NF_TABLES
 	depends on NF_CONNTRACK
 	select NF_NAT
 	tristate "Netfilter nf_tables nat module"
@@ -531,7 +522,6 @@ config NFT_NAT
 	  typical Network Address Translation (NAT) packet transformations.
 
 config NFT_QUEUE
-	depends on NF_TABLES
 	depends on NETFILTER_XTABLES
 	depends on NETFILTER_NETLINK_QUEUE
 	tristate "Netfilter nf_tables queue module"
@@ -540,7 +530,6 @@ config NFT_QUEUE
 	  infrastructure (also known as NFQUEUE) from nftables.
 
 config NFT_REJECT
-	depends on NF_TABLES
 	default m if NETFILTER_ADVANCED=n
 	tristate "Netfilter nf_tables reject support"
 	help
@@ -554,7 +543,6 @@ config NFT_REJECT_INET
 	tristate
 
 config NFT_COMPAT
-	depends on NF_TABLES
 	depends on NETFILTER_XTABLES
 	tristate "Netfilter x_tables over nf_tables module"
 	help
@@ -562,6 +550,8 @@ config NFT_COMPAT
 	  x_tables match/target extensions over the nf_tables
 	  framework.
 
+endif # NF_TABLES
+
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
 	default m if NETFILTER_ADVANCED=n
-- 
cgit v1.2.2


From 1cae565e8b746f484f1ff1b71d2a1c89d7cf0668 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 5 Mar 2015 15:05:36 +0100
Subject: netfilter: nf_tables: limit maximum table name length to 32 bytes

Set the same as we use for chain names, it should be enough.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 199fd0f27b0e..284b20ce566b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -401,7 +401,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi,
 }
 
 static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
-	[NFTA_TABLE_NAME]	= { .type = NLA_STRING },
+	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_TABLE_MAXNAMELEN - 1 },
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
 };
 
@@ -686,13 +687,13 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	if (!try_module_get(afi->owner))
 		return -EAFNOSUPPORT;
 
-	table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (table == NULL) {
 		module_put(afi->owner);
 		return -ENOMEM;
 	}
 
-	nla_strlcpy(table->name, name, nla_len(name));
+	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
-- 
cgit v1.2.2


From 8bd63cf1a426e69bf4f611b08978f721e46c194f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 5 Mar 2015 00:52:33 +0100
Subject: bridge: move mac header copying into br_netfilter

The mac header only has to be copied back into the skb for
fragments generated by ip_fragment(), which only happens
for bridge forwarded packets with nf-call-iptables=1 && active nf_defrag.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_forward.c   |  4 +---
 net/bridge/br_netfilter.c | 29 ++++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f96933a823e3..32541d4f72e8 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -37,9 +37,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
 
 int br_dev_queue_push_xmit(struct sk_buff *skb)
 {
-	/* ip_fragment doesn't copy the MAC header */
-	if (nf_bridge_maybe_copy_header(skb) ||
-	    !is_skb_forwardable(skb->dev, skb)) {
+	if (!is_skb_forwardable(skb->dev, skb)) {
 		kfree_skb(skb);
 	} else {
 		skb_push(skb, ETH_HLEN);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0ee453fad3de..e5479112c4a3 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -764,6 +764,33 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 }
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+static bool nf_bridge_copy_header(struct sk_buff *skb)
+{
+	int err;
+	unsigned int header_size;
+
+	nf_bridge_update_protocol(skb);
+	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+	err = skb_cow_head(skb, header_size);
+	if (err)
+		return false;
+
+	skb_copy_to_linear_data_offset(skb, -header_size,
+				       skb->nf_bridge->data, header_size);
+	__skb_push(skb, nf_bridge_encap_header_len(skb));
+	return true;
+}
+
+static int br_nf_push_frag_xmit(struct sk_buff *skb)
+{
+	if (!nf_bridge_copy_header(skb)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return br_dev_queue_push_xmit(skb);
+}
+
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	int ret;
@@ -780,7 +807,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 			/* Drop invalid packet */
 			return NF_DROP;
 		IPCB(skb)->frag_max_size = frag_max_size;
-		ret = ip_fragment(skb, br_dev_queue_push_xmit);
+		ret = ip_fragment(skb, br_nf_push_frag_xmit);
 	} else
 		ret = br_dev_queue_push_xmit(skb);
 
-- 
cgit v1.2.2


From 4a9d2f200862683d6680d5565f30c126625afe65 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 5 Mar 2015 00:52:34 +0100
Subject: netfilter: bridge: move nf_bridge_update_protocol to where its used

no need to keep it in a header file.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index e5479112c4a3..5b3bceb3ee62 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -239,6 +239,14 @@ drop:
 	return -1;
 }
 
+static void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_8021Q)
+		skb->protocol = htons(ETH_P_8021Q);
+	else if (skb->nf_bridge->mask & BRNF_PPPoE)
+		skb->protocol = htons(ETH_P_PPP_SES);
+}
+
 /* PF_BRIDGE/PRE_ROUTING *********************************************/
 /* Undo the changes made for ip6tables PREROUTING and continue the
  * bridge PRE_ROUTING hook. */
-- 
cgit v1.2.2


From 7a8d831df5811f49957cc9b7976319973d088c34 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 5 Mar 2015 00:52:36 +0100
Subject: netfilter: bridge: refactor conditional in br_nf_dev_queue_xmit

simpilifies followup patch that re-works brnf ip_fragment handling.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5b3bceb3ee62..ef1fe281ca11 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -803,13 +803,16 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	int ret;
 	int frag_max_size;
+	unsigned int mtu_reserved;
 
+	if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
+		return br_dev_queue_push_xmit(skb);
+
+	mtu_reserved = nf_bridge_mtu_reduction(skb);
 	/* This is wrong! We should preserve the original fragment
 	 * boundaries by preserving frag_list rather than refragmenting.
 	 */
-	if (skb->protocol == htons(ETH_P_IP) &&
-	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
-	    !skb_is_gso(skb)) {
+	if (skb->len + mtu_reserved > skb->dev->mtu) {
 		frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
 		if (br_parse_ip_options(skb))
 			/* Drop invalid packet */
-- 
cgit v1.2.2


From e5de75bf88858f5b3ab11e2504b86ec059f03102 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 9 Mar 2015 12:30:12 +0100
Subject: netfilter: bridge: move DNAT helper to br_netfilter

Only one caller, there is no need to keep this in a header.
Move it to br_netfilter.c where this belongs to.

Based on patch from Florian Westphal.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_device.c    |  5 +----
 net/bridge/br_netfilter.c | 32 ++++++++++++++++++++++++++++++++
 net/bridge/br_private.h   |  5 +++++
 3 files changed, 38 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ffd379db5938..294cbcc49263 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,13 +36,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 vid = 0;
 
 	rcu_read_lock();
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
-		br_nf_pre_routing_finish_bridge_slow(skb);
+	if (br_nf_prerouting_finish_bridge(skb)) {
 		rcu_read_unlock();
 		return NETDEV_TX_OK;
 	}
-#endif
 
 	u64_stats_update_begin(&brstats->syncp);
 	brstats->tx_packets++;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index ef1fe281ca11..a8361c7cdf81 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -892,6 +892,38 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
 	return NF_ACCEPT;
 }
 
+/* This is called when br_netfilter has called into iptables/netfilter,
+ * and DNAT has taken place on a bridge-forwarded packet.
+ *
+ * neigh->output has created a new MAC header, with local br0 MAC
+ * as saddr.
+ *
+ * This restores the original MAC saddr of the bridged packet
+ * before invoking bridge forward logic to transmit the packet.
+ */
+static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+	skb_pull(skb, ETH_HLEN);
+	nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
+
+	skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
+				       skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+	skb->dev = nf_bridge->physindev;
+	br_handle_frame_finish(skb);
+}
+
+int br_nf_prerouting_finish_bridge(struct sk_buff *skb)
+{
+	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+		br_nf_pre_routing_finish_bridge_slow(skb);
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(br_nf_prerouting_finish_bridge);
+
 void br_netfilter_enable(void)
 {
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index de0919975a25..d63fc17fe4f4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -764,10 +764,15 @@ static inline int br_vlan_enabled(struct net_bridge *br)
 
 /* br_netfilter.c */
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_prerouting_finish_bridge(struct sk_buff *skb);
 int br_nf_core_init(void);
 void br_nf_core_fini(void);
 void br_netfilter_rtable_init(struct net_bridge *);
 #else
+static inline int br_nf_prerouting_finish_bridge(struct sk_buff *skb)
+{
+        return 0;
+}
 static inline int br_nf_core_init(void) { return 0; }
 static inline void br_nf_core_fini(void) {}
 #define br_netfilter_rtable_init(x)
-- 
cgit v1.2.2