From ee586bbc28fb7128133457cf711880d13a3b7ce4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Feb 2015 18:54:04 +0100 Subject: netfilter: reject: don't send icmp error if csum is invalid tcp resets are never emitted if the packet that triggers the reject/reset has an invalid checksum. For icmp error responses there was no such check. It allows to distinguish icmp response generated via iptables -I INPUT -p udp --dport 42 -j REJECT and those emitted by network stack (won't respond if csum is invalid, REJECT does). Arguably its possible to avoid this by using conntrack and only using REJECT with -m conntrack NEW/RELATED. However, this doesn't work when connection tracking is not in use or when using nf_conntrack_checksum=0. Furthermore, sending errors in response to invalid csums doesn't make much sense so just add similar test as in nf_send_reset. Validate csum if needed and only send the response if it is ok. Reference: http://bugzilla.redhat.com/show_bug.cgi?id=1169829 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_REJECT.c | 17 +++++++++-------- net/ipv4/netfilter/nf_reject_ipv4.c | 23 +++++++++++++++++++++++ net/ipv4/netfilter/nft_reject_ipv4.c | 3 ++- net/ipv6/netfilter/nf_reject_ipv6.c | 35 +++++++++++++++++++++++++++++++++++ net/netfilter/nft_reject_inet.c | 6 ++++-- 5 files changed, 73 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 8f48f5517e33..87907d4bd259 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -34,31 +34,32 @@ static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_reject_info *reject = par->targinfo; + int hook = par->hooknum; switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - nf_send_unreach(skb, ICMP_NET_UNREACH); + nf_send_unreach(skb, ICMP_NET_UNREACH, hook); break; case IPT_ICMP_HOST_UNREACHABLE: - nf_send_unreach(skb, ICMP_HOST_UNREACH); + nf_send_unreach(skb, ICMP_HOST_UNREACH, hook); break; case IPT_ICMP_PROT_UNREACHABLE: - nf_send_unreach(skb, ICMP_PROT_UNREACH); + nf_send_unreach(skb, ICMP_PROT_UNREACH, hook); break; case IPT_ICMP_PORT_UNREACHABLE: - nf_send_unreach(skb, ICMP_PORT_UNREACH); + nf_send_unreach(skb, ICMP_PORT_UNREACH, hook); break; case IPT_ICMP_NET_PROHIBITED: - nf_send_unreach(skb, ICMP_NET_ANO); + nf_send_unreach(skb, ICMP_NET_ANO, hook); break; case IPT_ICMP_HOST_PROHIBITED: - nf_send_unreach(skb, ICMP_HOST_ANO); + nf_send_unreach(skb, ICMP_HOST_ANO, hook); break; case IPT_ICMP_ADMIN_PROHIBITED: - nf_send_unreach(skb, ICMP_PKT_FILTERED); + nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); break; case IPT_TCP_RESET: - nf_send_reset(skb, par->hooknum); + nf_send_reset(skb, hook); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 536da7bc598a..b7405eb7f1ef 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -164,4 +164,27 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) } EXPORT_SYMBOL_GPL(nf_send_reset); +void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) +{ + struct iphdr *iph = ip_hdr(skb_in); + u8 proto; + + if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET)) + return; + + if (skb_csum_unnecessary(skb_in)) { + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); + return; + } + + if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) + proto = iph->protocol; + else + proto = 0; + + if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0) + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach); + MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index d729542bd1b7..16a5d4d73d75 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -27,7 +27,8 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nf_send_unreach(pkt->skb, priv->icmp_code, + pkt->ops->hooknum); break; case NFT_REJECT_TCP_RST: nf_send_reset(pkt->skb, pkt->ops->hooknum); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index d05b36440e8b..68e0bb4db1bf 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -208,4 +208,39 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) } EXPORT_SYMBOL_GPL(nf_send_reset6); +static bool reject6_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto; + + if (skb->csum_bad) + return false; + + if (skb_csum_unnecessary(skb)) + return true; + + proto = ip6h->nexthdr; + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; +} + +void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, + unsigned char code, unsigned int hooknum) +{ + if (!reject6_csum_ok(skb_in, hooknum)) + return; + + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = net->loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach6); + MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 7b5f9d58680a..92877114aff4 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -28,14 +28,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nf_send_unreach(pkt->skb, priv->icmp_code, + pkt->ops->hooknum); break; case NFT_REJECT_TCP_RST: nf_send_reset(pkt->skb, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, - nft_reject_icmp_code(priv->icmp_code)); + nft_reject_icmp_code(priv->icmp_code), + pkt->ops->hooknum); break; } break; -- cgit v1.2.2 From 72500bc11e4da570bd66c0965e2dc74f52c8aba2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Feb 2015 18:57:53 +0100 Subject: netfilter: bridge: rework reject handling bridge reject handling is not straightforward, there are many subtle differences depending on configuration. skb->dev is either the bridge port (PRE_ROUTING) or the bridge itself (INPUT), so we need to use indev instead. Also, checksum validation will only work reliably if we trim skb according to the l3 header size. While at it, add csum validation for ipv6 and skip existing tests if skb was already checked e.g. by GRO. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 84 +++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 3244aead0926..5c6c96585acd 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "../br_private.h" static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, @@ -36,7 +37,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, skb_pull(nskb, ETH_HLEN); } -static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) + * or the bridge port (NF_BRIDGE PREROUTING). + */ +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, + const struct net_device *dev, + int hook) { struct sk_buff *nskb; struct iphdr *niph; @@ -65,11 +71,12 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } -static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, - u8 code) +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) { struct sk_buff *nskb; struct iphdr *niph; @@ -77,8 +84,9 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, unsigned int len; void *payload; __wsum csum; + u8 proto; - if (!nft_bridge_iphdr_validate(oldskb)) + if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; /* IP header checks: fragment. */ @@ -91,7 +99,17 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, if (!pskb_may_pull(oldskb, len)) return; - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + if (pskb_trim_rcsum(oldskb, htons(ip_hdr(oldskb)->tot_len))) + return; + + if (ip_hdr(oldskb)->protocol == IPPROTO_TCP || + ip_hdr(oldskb)->protocol == IPPROTO_UDP) + proto = ip_hdr(oldskb)->protocol; + else + proto = 0; + + if (!skb_csum_unnecessary(oldskb) && + nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) return; nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + @@ -120,11 +138,13 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } static void nft_reject_br_send_v6_tcp_reset(struct net *net, - struct sk_buff *oldskb, int hook) + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) { struct sk_buff *nskb; const struct tcphdr *oth; @@ -152,12 +172,37 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); +} + +static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto = ip6h->nexthdr; + + if (skb->csum_bad) + return false; + + if (skb_csum_unnecessary(skb)) + return true; + + if (ip6h->payload_len && + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } static void nft_reject_br_send_v6_unreach(struct net *net, - struct sk_buff *oldskb, int hook, - u8 code) + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) { struct sk_buff *nskb; struct ipv6hdr *nip6h; @@ -176,6 +221,9 @@ static void nft_reject_br_send_v6_unreach(struct net *net, if (!pskb_may_pull(oldskb, len)) return; + if (!reject6_br_csum_ok(oldskb, hook)) + return; + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + LL_MAX_HEADER + len, GFP_ATOMIC); if (!nskb) @@ -205,7 +253,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } static void nft_reject_bridge_eval(const struct nft_expr *expr, @@ -224,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, + nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, pkt->ops->hooknum, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->skb, + nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, + nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, pkt->ops->hooknum, nft_reject_icmp_code(priv->icmp_code)); break; @@ -242,16 +290,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, + nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, pkt->ops->hooknum, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, + nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, pkt->ops->hooknum, nft_reject_icmpv6_code(priv->icmp_code)); break; -- cgit v1.2.2 From 43270b1bc5f1e33522dacf3d3b9175c29404c36c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Mar 2015 14:40:39 +0100 Subject: netfilter: ipt_CLUSTERIP: deprecate it in favour of xt_cluster xt_cluster supersedes ipt_CLUSTERIP since it can be also used in gateway configurations (not only from the backend side). ipt_CLUSTER is also known to leak the netdev that it uses on device removal, which requires a rather large fix to workaround the problem: http://patchwork.ozlabs.org/patch/358629/ So let's deprecate this so we can probably kill code this in the future. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index e90f83a3415b..f75e9df5e017 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -418,6 +418,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); + + if (!par->net->xt.clusterip_deprecated_warning) { + pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " + "use xt_cluster instead\n"); + par->net->xt.clusterip_deprecated_warning = true; + } + return ret; } -- cgit v1.2.2 From 01ef16c2dd2e9a77fbd94eb0314c4787ab8f7113 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 3 Mar 2015 20:10:04 +0000 Subject: netfilter: nf_tables: minor tracing cleanups The tracing code is squeezed between multiple related parts of the evaluation code, move it out. Also add an inline wrapper for the reoccuring test for skb->nf_trace. Small code savings in nft_do_chain(): nft_trace_packet | -137 nft_do_chain | -8 2 functions changed, 145 bytes removed, diff: -145 net/netfilter/nf_tables_core.c: __nft_trace_packet | +137 1 function changed, 137 bytes added, diff: +137 net/netfilter/nf_tables_core.o: 3 functions changed, 137 bytes added, 145 bytes removed, diff: -8 Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 98 +++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3b90eb2b2c55..074067d4fc1e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -21,6 +21,48 @@ #include #include +enum nft_trace { + NFT_TRACE_RULE, + NFT_TRACE_RETURN, + NFT_TRACE_POLICY, +}; + +static const char *const comments[] = { + [NFT_TRACE_RULE] = "rule", + [NFT_TRACE_RETURN] = "return", + [NFT_TRACE_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static void __nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); +} + +static inline void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + if (unlikely(pkt->skb->nf_trace)) + __nft_trace_packet(pkt, chain, rulenum, type); +} + static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1]) { @@ -66,40 +108,6 @@ struct nft_jumpstack { int rulenum; }; -enum nft_trace { - NFT_TRACE_RULE, - NFT_TRACE_RETURN, - NFT_TRACE_POLICY, -}; - -static const char *const comments[] = { - [NFT_TRACE_RULE] = "rule", - [NFT_TRACE_RETURN] = "return", - [NFT_TRACE_POLICY] = "policy", -}; - -static struct nf_loginfo trace_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 4, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static void nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) -{ - struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - - nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); -} - unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { @@ -146,8 +154,7 @@ next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; continue; case NFT_CONTINUE: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); continue; } break; @@ -157,16 +164,13 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); return data[NFT_REG_VERDICT].verdict; } switch (data[NFT_REG_VERDICT].verdict) { case NFT_JUMP: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; @@ -176,18 +180,15 @@ next_rule: chain = data[NFT_REG_VERDICT].chain; goto do_chain; case NFT_GOTO: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); chain = data[NFT_REG_VERDICT].chain; goto do_chain; case NFT_RETURN: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); break; case NFT_CONTINUE: - if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN))) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); break; default: WARN_ON(1); @@ -201,8 +202,7 @@ next_rule: goto next_rule; } - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); -- cgit v1.2.2 From 354bf5a0d794a34dc98ed25e72f460b3b360c174 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 3 Mar 2015 20:10:05 +0000 Subject: netfilter: nf_tables: consolidate tracing invocations * JUMP and GOTO are equivalent except for JUMP pushing the current context to the stack * RETURN and implicit RETURN (CONTINUE) are equivalent except that the logged rule number differs Result: nft_do_chain | -112 1 function changed, 112 bytes removed, diff: -112 Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 074067d4fc1e..77165bf023f3 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -170,26 +170,23 @@ next_rule: switch (data[NFT_REG_VERDICT].verdict) { case NFT_JUMP: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = rule; jumpstack[stackptr].rulenum = rulenum; stackptr++; - chain = data[NFT_REG_VERDICT].chain; - goto do_chain; + /* fall through */ case NFT_GOTO: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); chain = data[NFT_REG_VERDICT].chain; goto do_chain; + case NFT_CONTINUE: + rulenum++; + /* fall through */ case NFT_RETURN: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); break; - case NFT_CONTINUE: - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); - break; default: WARN_ON(1); } -- cgit v1.2.2 From f04e599e20d7ee9b9e5069c7d1ff59c21b9bf4c2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Mar 2015 14:56:15 +0100 Subject: netfilter: nf_tables: consolidate Kconfig options Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 38 +++++++++++++++++++++----------------- net/ipv6/netfilter/Kconfig | 18 +++++++++++------- net/netfilter/Kconfig | 20 +++++--------------- 3 files changed, 37 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 59f883d9cadf..fb20f363151f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,24 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. -config NF_LOG_ARP - tristate "ARP packet logging" - default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON - -config NF_LOG_IPV4 - tristate "IPv4 packet logging" - default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON +if NF_TABLES config NF_TABLES_IPV4 - depends on NF_TABLES tristate "IPv4 nf_tables support" help This option enables the IPv4 support for nf_tables. +if NF_TABLES_IPV4 + config NFT_CHAIN_ROUTE_IPV4 - depends on NF_TABLES_IPV4 tristate "IPv4 nf_tables route chain support" help This option enables the "route" chain for IPv4 in nf_tables. This @@ -61,22 +53,34 @@ config NFT_CHAIN_ROUTE_IPV4 fields such as the source, destination, type of service and the packet mark. -config NF_REJECT_IPV4 - tristate "IPv4 packet rejection" - default m if NETFILTER_ADVANCED=n - config NFT_REJECT_IPV4 - depends on NF_TABLES_IPV4 select NF_REJECT_IPV4 default NFT_REJECT tristate +endif # NF_TABLES_IPV4 + config NF_TABLES_ARP - depends on NF_TABLES tristate "ARP nf_tables support" help This option enables the ARP support for nf_tables. +endif # NF_TABLES + +config NF_LOG_ARP + tristate "ARP packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_LOG_IPV4 + tristate "IPv4 packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_REJECT_IPV4 + tristate "IPv4 packet rejection" + default m if NETFILTER_ADVANCED=n + config NF_NAT_IPV4 tristate "IPv4 NAT" depends on NF_CONNTRACK_IPV4 diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a069822936e6..ca6998345b42 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,14 +25,16 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +if NF_TABLES + config NF_TABLES_IPV6 - depends on NF_TABLES tristate "IPv6 nf_tables support" help This option enables the IPv6 support for nf_tables. +if NF_TABLES_IPV6 + config NFT_CHAIN_ROUTE_IPV6 - depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" help This option enables the "route" chain for IPv6 in nf_tables. This @@ -40,16 +42,18 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. -config NF_REJECT_IPV6 - tristate "IPv6 packet rejection" - default m if NETFILTER_ADVANCED=n - config NFT_REJECT_IPV6 - depends on NF_TABLES_IPV6 select NF_REJECT_IPV6 default NFT_REJECT tristate +endif # NF_TABLES_IPV6 +endif # NF_TABLES + +config NF_REJECT_IPV6 + tristate "IPv6 packet rejection" + default m if NETFILTER_ADVANCED=n + config NF_LOG_IPV6 tristate "IPv6 packet logging" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c68c3b441381..971cd7526f4b 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -438,8 +438,10 @@ config NF_TABLES To compile it as a module, choose M here. +if NF_TABLES + config NF_TABLES_INET - depends on NF_TABLES && IPV6 + depends on IPV6 select NF_TABLES_IPV4 select NF_TABLES_IPV6 tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support" @@ -447,21 +449,18 @@ config NF_TABLES_INET This option enables support for a mixed IPv4/IPv6 "inet" table. config NFT_EXTHDR - depends on NF_TABLES tristate "Netfilter nf_tables IPv6 exthdr module" help This option adds the "exthdr" expression that you can use to match IPv6 extension headers. config NFT_META - depends on NF_TABLES tristate "Netfilter nf_tables meta module" help This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. config NFT_CT - depends on NF_TABLES depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" help @@ -469,42 +468,36 @@ config NFT_CT connection tracking information such as the flow state. config NFT_RBTREE - depends on NF_TABLES tristate "Netfilter nf_tables rbtree set module" help This option adds the "rbtree" set type (Red Black tree) that is used to build interval-based sets. config NFT_HASH - depends on NF_TABLES tristate "Netfilter nf_tables hash set module" help This option adds the "hash" set type that is used to build one-way mappings between matchings and actions. config NFT_COUNTER - depends on NF_TABLES tristate "Netfilter nf_tables counter module" help This option adds the "counter" expression that you can use to include packet and byte counters in a rule. config NFT_LOG - depends on NF_TABLES tristate "Netfilter nf_tables log module" help This option adds the "log" expression that you can use to log packets matching some criteria. config NFT_LIMIT - depends on NF_TABLES tristate "Netfilter nf_tables limit module" help This option adds the "limit" expression that you can use to ratelimit rule matchings. config NFT_MASQ - depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables masquerade support" @@ -513,7 +506,6 @@ config NFT_MASQ to perform NAT in the masquerade flavour. config NFT_REDIR - depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables redirect support" @@ -522,7 +514,6 @@ config NFT_REDIR to perform NAT in the redirect flavour. config NFT_NAT - depends on NF_TABLES depends on NF_CONNTRACK select NF_NAT tristate "Netfilter nf_tables nat module" @@ -531,7 +522,6 @@ config NFT_NAT typical Network Address Translation (NAT) packet transformations. config NFT_QUEUE - depends on NF_TABLES depends on NETFILTER_XTABLES depends on NETFILTER_NETLINK_QUEUE tristate "Netfilter nf_tables queue module" @@ -540,7 +530,6 @@ config NFT_QUEUE infrastructure (also known as NFQUEUE) from nftables. config NFT_REJECT - depends on NF_TABLES default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" help @@ -554,7 +543,6 @@ config NFT_REJECT_INET tristate config NFT_COMPAT - depends on NF_TABLES depends on NETFILTER_XTABLES tristate "Netfilter x_tables over nf_tables module" help @@ -562,6 +550,8 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +endif # NF_TABLES + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n -- cgit v1.2.2 From 1cae565e8b746f484f1ff1b71d2a1c89d7cf0668 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Mar 2015 15:05:36 +0100 Subject: netfilter: nf_tables: limit maximum table name length to 32 bytes Set the same as we use for chain names, it should be enough. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 199fd0f27b0e..284b20ce566b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -401,7 +401,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi, } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { - [NFTA_TABLE_NAME] = { .type = NLA_STRING }, + [NFTA_TABLE_NAME] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; @@ -686,13 +687,13 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, if (!try_module_get(afi->owner)) return -EAFNOSUPPORT; - table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); + table = kzalloc(sizeof(*table), GFP_KERNEL); if (table == NULL) { module_put(afi->owner); return -ENOMEM; } - nla_strlcpy(table->name, name, nla_len(name)); + nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); table->flags = flags; -- cgit v1.2.2 From 8bd63cf1a426e69bf4f611b08978f721e46c194f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:33 +0100 Subject: bridge: move mac header copying into br_netfilter The mac header only has to be copied back into the skb for fragments generated by ip_fragment(), which only happens for bridge forwarded packets with nf-call-iptables=1 && active nf_defrag. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_forward.c | 4 +--- net/bridge/br_netfilter.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index f96933a823e3..32541d4f72e8 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -37,9 +37,7 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sk_buff *skb) { - /* ip_fragment doesn't copy the MAC header */ - if (nf_bridge_maybe_copy_header(skb) || - !is_skb_forwardable(skb->dev, skb)) { + if (!is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 0ee453fad3de..e5479112c4a3 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -764,6 +764,33 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +static bool nf_bridge_copy_header(struct sk_buff *skb) +{ + int err; + unsigned int header_size; + + nf_bridge_update_protocol(skb); + header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); + err = skb_cow_head(skb, header_size); + if (err) + return false; + + skb_copy_to_linear_data_offset(skb, -header_size, + skb->nf_bridge->data, header_size); + __skb_push(skb, nf_bridge_encap_header_len(skb)); + return true; +} + +static int br_nf_push_frag_xmit(struct sk_buff *skb) +{ + if (!nf_bridge_copy_header(skb)) { + kfree_skb(skb); + return 0; + } + + return br_dev_queue_push_xmit(skb); +} + static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; @@ -780,7 +807,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; - ret = ip_fragment(skb, br_dev_queue_push_xmit); + ret = ip_fragment(skb, br_nf_push_frag_xmit); } else ret = br_dev_queue_push_xmit(skb); -- cgit v1.2.2 From 4a9d2f200862683d6680d5565f30c126625afe65 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:34 +0100 Subject: netfilter: bridge: move nf_bridge_update_protocol to where its used no need to keep it in a header file. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index e5479112c4a3..5b3bceb3ee62 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -239,6 +239,14 @@ drop: return -1; } +static void nf_bridge_update_protocol(struct sk_buff *skb) +{ + if (skb->nf_bridge->mask & BRNF_8021Q) + skb->protocol = htons(ETH_P_8021Q); + else if (skb->nf_bridge->mask & BRNF_PPPoE) + skb->protocol = htons(ETH_P_PPP_SES); +} + /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ -- cgit v1.2.2 From 7a8d831df5811f49957cc9b7976319973d088c34 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:36 +0100 Subject: netfilter: bridge: refactor conditional in br_nf_dev_queue_xmit simpilifies followup patch that re-works brnf ip_fragment handling. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 5b3bceb3ee62..ef1fe281ca11 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -803,13 +803,16 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; int frag_max_size; + unsigned int mtu_reserved; + if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) + return br_dev_queue_push_xmit(skb); + + mtu_reserved = nf_bridge_mtu_reduction(skb); /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ - if (skb->protocol == htons(ETH_P_IP) && - skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && - !skb_is_gso(skb)) { + if (skb->len + mtu_reserved > skb->dev->mtu) { frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ -- cgit v1.2.2 From e5de75bf88858f5b3ab11e2504b86ec059f03102 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 9 Mar 2015 12:30:12 +0100 Subject: netfilter: bridge: move DNAT helper to br_netfilter Only one caller, there is no need to keep this in a header. Move it to br_netfilter.c where this belongs to. Based on patch from Florian Westphal. Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_device.c | 5 +---- net/bridge/br_netfilter.c | 32 ++++++++++++++++++++++++++++++++ net/bridge/br_private.h | 5 +++++ 3 files changed, 38 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ffd379db5938..294cbcc49263 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -36,13 +36,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) u16 vid = 0; rcu_read_lock(); -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { - br_nf_pre_routing_finish_bridge_slow(skb); + if (br_nf_prerouting_finish_bridge(skb)) { rcu_read_unlock(); return NETDEV_TX_OK; } -#endif u64_stats_update_begin(&brstats->syncp); brstats->tx_packets++; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ef1fe281ca11..a8361c7cdf81 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -892,6 +892,38 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, return NF_ACCEPT; } +/* This is called when br_netfilter has called into iptables/netfilter, + * and DNAT has taken place on a bridge-forwarded packet. + * + * neigh->output has created a new MAC header, with local br0 MAC + * as saddr. + * + * This restores the original MAC saddr of the bridged packet + * before invoking bridge forward logic to transmit the packet. + */ +static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + skb_pull(skb, ETH_HLEN); + nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), + skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); + skb->dev = nf_bridge->physindev; + br_handle_frame_finish(skb); +} + +int br_nf_prerouting_finish_bridge(struct sk_buff *skb) +{ + if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + br_nf_pre_routing_finish_bridge_slow(skb); + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(br_nf_prerouting_finish_bridge); + void br_netfilter_enable(void) { } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index de0919975a25..d63fc17fe4f4 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -764,10 +764,15 @@ static inline int br_vlan_enabled(struct net_bridge *br) /* br_netfilter.c */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +int br_nf_prerouting_finish_bridge(struct sk_buff *skb); int br_nf_core_init(void); void br_nf_core_fini(void); void br_netfilter_rtable_init(struct net_bridge *); #else +static inline int br_nf_prerouting_finish_bridge(struct sk_buff *skb) +{ + return 0; +} static inline int br_nf_core_init(void) { return 0; } static inline void br_nf_core_fini(void) {} #define br_netfilter_rtable_init(x) -- cgit v1.2.2