aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-01-06 13:29:30 -0500
committerDavid S. Miller <davem@davemloft.net>2014-01-06 13:29:30 -0500
commit9aa28f2b71055d5ae17a2e1daee359d4174bb13e (patch)
treefbf4e0fd11eb924e0bece74a87f442bc54441b35 /include
parent6a8c4796df74045088a916581c736432d08c53c0 (diff)
parentc9c8e485978a308c8a359140da187d55120f8fee (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nftables
Pablo Neira Ayuso says: <pablo@netfilter.org> ==================== nftables updates for net-next The following patchset contains nftables updates for your net-next tree, they are: * Add set operation to the meta expression by means of the select_ops() infrastructure, this allows us to set the packet mark among other things. From Arturo Borrero Gonzalez. * Fix wrong format in sscanf in nf_tables_set_alloc_name(), from Daniel Borkmann. * Add new queue expression to nf_tables. These comes with two previous patches to prepare this new feature, one to add mask in nf_tables_core to evaluate the queue verdict appropriately and another to refactor common code with xt_NFQUEUE, from Eric Leblond. * Do not hide nftables from Kconfig if nfnetlink is not enabled, also from Eric Leblond. * Add the reject expression to nf_tables, this adds the missing TCP RST support. It comes with an initial patch to refactor common code with xt_NFQUEUE, again from Eric Leblond. * Remove an unused variable assignment in nf_tables_dump_set(), from Michal Nazarewicz. * Remove the nft_meta_target code, now that Arturo added the set operation to the meta expression, from me. * Add help information for nf_tables to Kconfig, also from me. * Allow to dump all sets by specifying NFPROTO_UNSPEC, similar feature is available to other nf_tables objects, requested by Arturo, from me. * Expose the table usage counter, so we can know how many chains are using this table without dumping the list of chains, from Tomasz Bursztyka. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/netfilter/ipv4/nf_reject.h128
-rw-r--r--include/net/netfilter/ipv6/nf_reject.h171
-rw-r--r--include/net/netfilter/nf_queue.h62
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h24
4 files changed, 385 insertions, 0 deletions
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
new file mode 100644
index 000000000000..931fbf812171
--- /dev/null
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -0,0 +1,128 @@
1#ifndef _IPV4_NF_REJECT_H
2#define _IPV4_NF_REJECT_H
3
4#include <net/ip.h>
5#include <net/tcp.h>
6#include <net/route.h>
7#include <net/dst.h>
8
9static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
10{
11 icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
12}
13
14/* Send RST reply */
15static void nf_send_reset(struct sk_buff *oldskb, int hook)
16{
17 struct sk_buff *nskb;
18 const struct iphdr *oiph;
19 struct iphdr *niph;
20 const struct tcphdr *oth;
21 struct tcphdr _otcph, *tcph;
22
23 /* IP header checks: fragment. */
24 if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
25 return;
26
27 oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
28 sizeof(_otcph), &_otcph);
29 if (oth == NULL)
30 return;
31
32 /* No RST for RST. */
33 if (oth->rst)
34 return;
35
36 if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
37 return;
38
39 /* Check checksum */
40 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
41 return;
42 oiph = ip_hdr(oldskb);
43
44 nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
45 LL_MAX_HEADER, GFP_ATOMIC);
46 if (!nskb)
47 return;
48
49 skb_reserve(nskb, LL_MAX_HEADER);
50
51 skb_reset_network_header(nskb);
52 niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
53 niph->version = 4;
54 niph->ihl = sizeof(struct iphdr) / 4;
55 niph->tos = 0;
56 niph->id = 0;
57 niph->frag_off = htons(IP_DF);
58 niph->protocol = IPPROTO_TCP;
59 niph->check = 0;
60 niph->saddr = oiph->daddr;
61 niph->daddr = oiph->saddr;
62
63 skb_reset_transport_header(nskb);
64 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
65 memset(tcph, 0, sizeof(*tcph));
66 tcph->source = oth->dest;
67 tcph->dest = oth->source;
68 tcph->doff = sizeof(struct tcphdr) / 4;
69
70 if (oth->ack)
71 tcph->seq = oth->ack_seq;
72 else {
73 tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
74 oldskb->len - ip_hdrlen(oldskb) -
75 (oth->doff << 2));
76 tcph->ack = 1;
77 }
78
79 tcph->rst = 1;
80 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
81 niph->daddr, 0);
82 nskb->ip_summed = CHECKSUM_PARTIAL;
83 nskb->csum_start = (unsigned char *)tcph - nskb->head;
84 nskb->csum_offset = offsetof(struct tcphdr, check);
85
86 /* ip_route_me_harder expects skb->dst to be set */
87 skb_dst_set_noref(nskb, skb_dst(oldskb));
88
89 nskb->protocol = htons(ETH_P_IP);
90 if (ip_route_me_harder(nskb, RTN_UNSPEC))
91 goto free_nskb;
92
93 niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
94
95 /* "Never happens" */
96 if (nskb->len > dst_mtu(skb_dst(nskb)))
97 goto free_nskb;
98
99 nf_ct_attach(nskb, oldskb);
100
101#ifdef CONFIG_BRIDGE_NETFILTER
102 /* If we use ip_local_out for bridged traffic, the MAC source on
103 * the RST will be ours, instead of the destination's. This confuses
104 * some routers/firewalls, and they drop the packet. So we need to
105 * build the eth header using the original destination's MAC as the
106 * source, and send the RST packet directly.
107 */
108 if (oldskb->nf_bridge) {
109 struct ethhdr *oeth = eth_hdr(oldskb);
110 nskb->dev = oldskb->nf_bridge->physindev;
111 niph->tot_len = htons(nskb->len);
112 ip_send_check(niph);
113 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
114 oeth->h_source, oeth->h_dest, nskb->len) < 0)
115 goto free_nskb;
116 dev_queue_xmit(nskb);
117 } else
118#endif
119 ip_local_out(nskb);
120
121 return;
122
123 free_nskb:
124 kfree_skb(nskb);
125}
126
127
128#endif /* _IPV4_NF_REJECT_H */
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
new file mode 100644
index 000000000000..710d17ed70b4
--- /dev/null
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -0,0 +1,171 @@
1#ifndef _IPV6_NF_REJECT_H
2#define _IPV6_NF_REJECT_H
3
4#include <net/ipv6.h>
5#include <net/ip6_route.h>
6#include <net/ip6_fib.h>
7#include <net/ip6_checksum.h>
8#include <linux/netfilter_ipv6.h>
9
10static inline void
11nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
12 unsigned int hooknum)
13{
14 if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
15 skb_in->dev = net->loopback_dev;
16
17 icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
18}
19
20/* Send RST reply */
21static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
22{
23 struct sk_buff *nskb;
24 struct tcphdr otcph, *tcph;
25 unsigned int otcplen, hh_len;
26 int tcphoff, needs_ack;
27 const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
28 struct ipv6hdr *ip6h;
29#define DEFAULT_TOS_VALUE 0x0U
30 const __u8 tclass = DEFAULT_TOS_VALUE;
31 struct dst_entry *dst = NULL;
32 u8 proto;
33 __be16 frag_off;
34 struct flowi6 fl6;
35
36 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
37 (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
38 pr_debug("addr is not unicast.\n");
39 return;
40 }
41
42 proto = oip6h->nexthdr;
43 tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
44
45 if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
46 pr_debug("Cannot get TCP header.\n");
47 return;
48 }
49
50 otcplen = oldskb->len - tcphoff;
51
52 /* IP header checks: fragment, too short. */
53 if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
54 pr_debug("proto(%d) != IPPROTO_TCP, "
55 "or too short. otcplen = %d\n",
56 proto, otcplen);
57 return;
58 }
59
60 if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
61 BUG();
62
63 /* No RST for RST. */
64 if (otcph.rst) {
65 pr_debug("RST is set\n");
66 return;
67 }
68
69 /* Check checksum. */
70 if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
71 pr_debug("TCP checksum is invalid\n");
72 return;
73 }
74
75 memset(&fl6, 0, sizeof(fl6));
76 fl6.flowi6_proto = IPPROTO_TCP;
77 fl6.saddr = oip6h->daddr;
78 fl6.daddr = oip6h->saddr;
79 fl6.fl6_sport = otcph.dest;
80 fl6.fl6_dport = otcph.source;
81 security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
82 dst = ip6_route_output(net, NULL, &fl6);
83 if (dst == NULL || dst->error) {
84 dst_release(dst);
85 return;
86 }
87 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
88 if (IS_ERR(dst))
89 return;
90
91 hh_len = (dst->dev->hard_header_len + 15)&~15;
92 nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
93 + sizeof(struct tcphdr) + dst->trailer_len,
94 GFP_ATOMIC);
95
96 if (!nskb) {
97 net_dbg_ratelimited("cannot alloc skb\n");
98 dst_release(dst);
99 return;
100 }
101
102 skb_dst_set(nskb, dst);
103
104 skb_reserve(nskb, hh_len + dst->header_len);
105
106 skb_put(nskb, sizeof(struct ipv6hdr));
107 skb_reset_network_header(nskb);
108 ip6h = ipv6_hdr(nskb);
109 ip6_flow_hdr(ip6h, tclass, 0);
110 ip6h->hop_limit = ip6_dst_hoplimit(dst);
111 ip6h->nexthdr = IPPROTO_TCP;
112 ip6h->saddr = oip6h->daddr;
113 ip6h->daddr = oip6h->saddr;
114
115 skb_reset_transport_header(nskb);
116 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
117 /* Truncate to length (no data) */
118 tcph->doff = sizeof(struct tcphdr)/4;
119 tcph->source = otcph.dest;
120 tcph->dest = otcph.source;
121
122 if (otcph.ack) {
123 needs_ack = 0;
124 tcph->seq = otcph.ack_seq;
125 tcph->ack_seq = 0;
126 } else {
127 needs_ack = 1;
128 tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
129 + otcplen - (otcph.doff<<2));
130 tcph->seq = 0;
131 }
132
133 /* Reset flags */
134 ((u_int8_t *)tcph)[13] = 0;
135 tcph->rst = 1;
136 tcph->ack = needs_ack;
137 tcph->window = 0;
138 tcph->urg_ptr = 0;
139 tcph->check = 0;
140
141 /* Adjust TCP checksum */
142 tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
143 &ipv6_hdr(nskb)->daddr,
144 sizeof(struct tcphdr), IPPROTO_TCP,
145 csum_partial(tcph,
146 sizeof(struct tcphdr), 0));
147
148 nf_ct_attach(nskb, oldskb);
149
150#ifdef CONFIG_BRIDGE_NETFILTER
151 /* If we use ip6_local_out for bridged traffic, the MAC source on
152 * the RST will be ours, instead of the destination's. This confuses
153 * some routers/firewalls, and they drop the packet. So we need to
154 * build the eth header using the original destination's MAC as the
155 * source, and send the RST packet directly.
156 */
157 if (oldskb->nf_bridge) {
158 struct ethhdr *oeth = eth_hdr(oldskb);
159 nskb->dev = oldskb->nf_bridge->physindev;
160 nskb->protocol = htons(ETH_P_IPV6);
161 ip6h->payload_len = htons(sizeof(struct tcphdr));
162 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
163 oeth->h_source, oeth->h_dest, nskb->len) < 0)
164 return;
165 dev_queue_xmit(nskb);
166 } else
167#endif
168 ip6_local_out(nskb);
169}
170
171#endif /* _IPV6_NF_REJECT_H */
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index c1d5b3e34a21..84a53d780306 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -1,6 +1,10 @@
1#ifndef _NF_QUEUE_H 1#ifndef _NF_QUEUE_H
2#define _NF_QUEUE_H 2#define _NF_QUEUE_H
3 3
4#include <linux/ip.h>
5#include <linux/ipv6.h>
6#include <linux/jhash.h>
7
4/* Each queued (to userspace) skbuff has one of these. */ 8/* Each queued (to userspace) skbuff has one of these. */
5struct nf_queue_entry { 9struct nf_queue_entry {
6 struct list_head list; 10 struct list_head list;
@@ -33,4 +37,62 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
33bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); 37bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
34void nf_queue_entry_release_refs(struct nf_queue_entry *entry); 38void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
35 39
40static inline void init_hashrandom(u32 *jhash_initval)
41{
42 while (*jhash_initval == 0)
43 *jhash_initval = prandom_u32();
44}
45
46static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval)
47{
48 const struct iphdr *iph = ip_hdr(skb);
49
50 /* packets in either direction go into same queue */
51 if ((__force u32)iph->saddr < (__force u32)iph->daddr)
52 return jhash_3words((__force u32)iph->saddr,
53 (__force u32)iph->daddr, iph->protocol, jhash_initval);
54
55 return jhash_3words((__force u32)iph->daddr,
56 (__force u32)iph->saddr, iph->protocol, jhash_initval);
57}
58
59#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
60static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
61{
62 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
63 u32 a, b, c;
64
65 if ((__force u32)ip6h->saddr.s6_addr32[3] <
66 (__force u32)ip6h->daddr.s6_addr32[3]) {
67 a = (__force u32) ip6h->saddr.s6_addr32[3];
68 b = (__force u32) ip6h->daddr.s6_addr32[3];
69 } else {
70 b = (__force u32) ip6h->saddr.s6_addr32[3];
71 a = (__force u32) ip6h->daddr.s6_addr32[3];
72 }
73
74 if ((__force u32)ip6h->saddr.s6_addr32[1] <
75 (__force u32)ip6h->daddr.s6_addr32[1])
76 c = (__force u32) ip6h->saddr.s6_addr32[1];
77 else
78 c = (__force u32) ip6h->daddr.s6_addr32[1];
79
80 return jhash_3words(a, b, c, jhash_initval);
81}
82#endif
83
84static inline u32
85nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
86 u32 jhash_initval)
87{
88 if (family == NFPROTO_IPV4)
89 queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32;
90#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
91 else if (family == NFPROTO_IPV6)
92 queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32;
93#endif
94
95 return queue;
96}
97
36#endif /* _NF_QUEUE_H */ 98#endif /* _NF_QUEUE_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index fbfd229a8e99..aa86a15293e1 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -110,11 +110,13 @@ enum nft_table_flags {
110 * 110 *
111 * @NFTA_TABLE_NAME: name of the table (NLA_STRING) 111 * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
112 * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) 112 * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
113 * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
113 */ 114 */
114enum nft_table_attributes { 115enum nft_table_attributes {
115 NFTA_TABLE_UNSPEC, 116 NFTA_TABLE_UNSPEC,
116 NFTA_TABLE_NAME, 117 NFTA_TABLE_NAME,
117 NFTA_TABLE_FLAGS, 118 NFTA_TABLE_FLAGS,
119 NFTA_TABLE_USE,
118 __NFTA_TABLE_MAX 120 __NFTA_TABLE_MAX
119}; 121};
120#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) 122#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
@@ -553,11 +555,13 @@ enum nft_meta_keys {
553 * 555 *
554 * @NFTA_META_DREG: destination register (NLA_U32) 556 * @NFTA_META_DREG: destination register (NLA_U32)
555 * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) 557 * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
558 * @NFTA_META_SREG: source register (NLA_U32)
556 */ 559 */
557enum nft_meta_attributes { 560enum nft_meta_attributes {
558 NFTA_META_UNSPEC, 561 NFTA_META_UNSPEC,
559 NFTA_META_DREG, 562 NFTA_META_DREG,
560 NFTA_META_KEY, 563 NFTA_META_KEY,
564 NFTA_META_SREG,
561 __NFTA_META_MAX 565 __NFTA_META_MAX
562}; 566};
563#define NFTA_META_MAX (__NFTA_META_MAX - 1) 567#define NFTA_META_MAX (__NFTA_META_MAX - 1)
@@ -658,6 +662,26 @@ enum nft_log_attributes {
658#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) 662#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1)
659 663
660/** 664/**
665 * enum nft_queue_attributes - nf_tables queue expression netlink attributes
666 *
667 * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
668 * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
669 * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
670 */
671enum nft_queue_attributes {
672 NFTA_QUEUE_UNSPEC,
673 NFTA_QUEUE_NUM,
674 NFTA_QUEUE_TOTAL,
675 NFTA_QUEUE_FLAGS,
676 __NFTA_QUEUE_MAX
677};
678#define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1)
679
680#define NFT_QUEUE_FLAG_BYPASS 0x01 /* for compatibility with v2 */
681#define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */
682#define NFT_QUEUE_FLAG_MASK 0x03
683
684/**
661 * enum nft_reject_types - nf_tables reject expression reject types 685 * enum nft_reject_types - nf_tables reject expression reject types
662 * 686 *
663 * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable 687 * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable