summaryrefslogtreecommitdiffstats
path: root/net/bridge
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2019-05-29 07:25:37 -0400
committerDavid S. Miller <davem@davemloft.net>2019-05-30 17:18:18 -0400
commit3c171f496ef57774f8e5d509923372549734877f (patch)
treee143c7fb0085c211a93ef1c5c3bda109a93acb97 /net/bridge
parentd035f19f59c5bca2fda2faa43b5e9fe09dfb7884 (diff)
netfilter: bridge: add connection tracking system
This patch adds basic connection tracking support for the bridge, including initial IPv4 support. This patch register two hooks to deal with the bridge forwarding path, one from the bridge prerouting hook to call nf_conntrack_in(); and another from the bridge postrouting hook to confirm the entry. The conntrack bridge prerouting hook defragments packets before passing them to nf_conntrack_in() to look up for an existing entry, otherwise a new entry is allocated and it is attached to the skbuff. The conntrack bridge postrouting hook confirms new conntrack entries, ie. if this is the first packet seen, then it adds the entry to the hashtable and (if needed) it refragments the skbuff into the original fragments, leaving the geometry as is if possible. Exceptions are linearized skbuffs, eg. skbuffs that are passed up to nfqueue and conntrack helpers, as well as cloned skbuff for the local delivery (eg. tcpdump), also in case of bridge port flooding (cloned skbuff too). The packet defragmentation is done through the ip_defrag() call. This forces us to save the bridge control buffer, reset the IP control buffer area and then restore it after call. This function also bumps the IP fragmentation statistics, it would be probably desiderable to have independent statistics for the bridge defragmentation/refragmentation. The maximum fragment length is stored in the control buffer and it is used to refragment the skbuff from the postrouting path. The new fraglist splitter and fragment transformer APIs are used to implement the bridge refragmentation code. The br_ip_fragment() function drops the packet in case the maximum fragment size seen is larger than the output port MTU. This patchset follows the principle that conntrack should not drop packets, so users can do it through policy via invalid state matching. Like br_netfilter, there is no refragmentation for packets that are passed up for local delivery, ie. prerouting -> input path. There are calls to nf_reset() already in several spots in the stack since time ago already, eg. af_packet, that show that skbuff fraglist handling from the netif_rx path is supported already. The helpers are called from the postrouting hook, before confirmation, from there we may see packet floods to bridge ports. Then, although unlikely, this may result in exercising the helpers many times for each clone. It would be good to explore how to pass all the packets in a list to the conntrack hook to do this handle only once for this case. Thanks to Florian Westphal for handing me over an initial patchset version to add support for conntrack bridge. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/bridge')
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/netfilter/Kconfig14
-rw-r--r--net/bridge/netfilter/Makefile3
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c378
5 files changed, 397 insertions, 0 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 013323b6dbe4..693aefad7f8a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -56,6 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
56 56
57 br_switchdev_frame_unmark(skb); 57 br_switchdev_frame_unmark(skb);
58 BR_INPUT_SKB_CB(skb)->brdev = dev; 58 BR_INPUT_SKB_CB(skb)->brdev = dev;
59 BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
59 60
60 skb_reset_mac_header(skb); 61 skb_reset_mac_header(skb);
61 eth = eth_hdr(skb); 62 eth = eth_hdr(skb);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 334a8c496b50..68561741e827 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -425,6 +425,7 @@ struct net_bridge {
425struct br_input_skb_cb { 425struct br_input_skb_cb {
426 struct net_device *brdev; 426 struct net_device *brdev;
427 427
428 u16 frag_max_size;
428#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 429#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
429 u8 igmp; 430 u8 igmp;
430 u8 mrouters_only:1; 431 u8 mrouters_only:1;
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index c3ad90c43801..f4fb0b9b927d 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -19,6 +19,20 @@ config NF_LOG_BRIDGE
19 tristate "Bridge packet logging" 19 tristate "Bridge packet logging"
20 select NF_LOG_COMMON 20 select NF_LOG_COMMON
21 21
22config NF_CONNTRACK_BRIDGE
23 tristate "IPv4/IPV6 bridge connection tracking support"
24 depends on NF_CONNTRACK
25 default n
26 help
27 Connection tracking keeps a record of what packets have passed
28 through your machine, in order to figure out how they are related
29 into connections. This is used to enhance packet filtering via
30 stateful policies. Enable this if you want native tracking from
31 the bridge. This provides a replacement for the `br_netfilter'
32 infrastructure.
33
34 To compile it as a module, choose M here. If unsure, say N.
35
22endif # NF_TABLES_BRIDGE 36endif # NF_TABLES_BRIDGE
23 37
24menuconfig BRIDGE_NF_EBTABLES 38menuconfig BRIDGE_NF_EBTABLES
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 9b868861f21a..9d7767322a64 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -5,6 +5,9 @@
5 5
6obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o 6obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
7 7
8# connection tracking
9obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
10
8# packet logging 11# packet logging
9obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o 12obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
10 13
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
new file mode 100644
index 000000000000..2571528ed582
--- /dev/null
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -0,0 +1,378 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/types.h>
3#include <linux/ip.h>
4#include <linux/netfilter.h>
5#include <linux/netfilter_ipv6.h>
6#include <linux/netfilter_bridge.h>
7#include <linux/module.h>
8#include <linux/skbuff.h>
9#include <linux/icmp.h>
10#include <linux/sysctl.h>
11#include <net/route.h>
12#include <net/ip.h>
13
14#include <net/netfilter/nf_conntrack.h>
15#include <net/netfilter/nf_conntrack_core.h>
16#include <net/netfilter/nf_conntrack_helper.h>
17#include <net/netfilter/nf_conntrack_bridge.h>
18
19#include <linux/netfilter/nf_tables.h>
20#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
21#include <net/netfilter/nf_tables.h>
22
23#include "../br_private.h"
24
25/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
26 * has been linearized or cloned.
27 */
28static int nf_br_ip_fragment(struct net *net, struct sock *sk,
29 struct sk_buff *skb,
30 struct nf_ct_bridge_frag_data *data,
31 int (*output)(struct net *, struct sock *sk,
32 const struct nf_ct_bridge_frag_data *data,
33 struct sk_buff *))
34{
35 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
36 unsigned int hlen, ll_rs, mtu;
37 struct ip_frag_state state;
38 struct iphdr *iph;
39 int err;
40
41 /* for offloaded checksums cleanup checksum before fragmentation */
42 if (skb->ip_summed == CHECKSUM_PARTIAL &&
43 (err = skb_checksum_help(skb)))
44 goto blackhole;
45
46 iph = ip_hdr(skb);
47
48 /*
49 * Setup starting values
50 */
51
52 hlen = iph->ihl * 4;
53 frag_max_size -= hlen;
54 ll_rs = LL_RESERVED_SPACE(skb->dev);
55 mtu = skb->dev->mtu;
56
57 if (skb_has_frag_list(skb)) {
58 unsigned int first_len = skb_pagelen(skb);
59 struct ip_fraglist_iter iter;
60 struct sk_buff *frag;
61
62 if (first_len - hlen > mtu ||
63 skb_headroom(skb) < ll_rs)
64 goto blackhole;
65
66 if (skb_cloned(skb))
67 goto slow_path;
68
69 skb_walk_frags(skb, frag) {
70 if (frag->len > mtu ||
71 skb_headroom(frag) < hlen + ll_rs)
72 goto blackhole;
73
74 if (skb_shared(frag))
75 goto slow_path;
76 }
77
78 ip_fraglist_init(skb, iph, hlen, &iter);
79
80 for (;;) {
81 if (iter.frag)
82 ip_fraglist_prepare(skb, &iter);
83
84 err = output(net, sk, data, skb);
85 if (err || !iter.frag)
86 break;
87
88 skb = ip_fraglist_next(&iter);
89 }
90 return err;
91 }
92slow_path:
93 /* This is a linearized skbuff, the original geometry is lost for us.
94 * This may also be a clone skbuff, we could preserve the geometry for
95 * the copies but probably not worth the effort.
96 */
97 ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
98
99 while (state.left > 0) {
100 struct sk_buff *skb2;
101
102 skb2 = ip_frag_next(skb, &state);
103 if (IS_ERR(skb2)) {
104 err = PTR_ERR(skb2);
105 goto blackhole;
106 }
107
108 err = output(net, sk, data, skb2);
109 if (err)
110 goto blackhole;
111 }
112 consume_skb(skb);
113 return err;
114
115blackhole:
116 kfree_skb(skb);
117 return 0;
118}
119
120/* ip_defrag() expects IPCB() in place. */
121static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
122 size_t inet_skb_parm_size)
123{
124 memcpy(cb, skb->cb, sizeof(*cb));
125 memset(skb->cb, 0, inet_skb_parm_size);
126}
127
128static void br_skb_cb_restore(struct sk_buff *skb,
129 const struct br_input_skb_cb *cb,
130 u16 fragsz)
131{
132 memcpy(skb->cb, cb, sizeof(*cb));
133 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
134}
135
136static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
137 const struct nf_hook_state *state)
138{
139 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
140 enum ip_conntrack_info ctinfo;
141 struct br_input_skb_cb cb;
142 const struct nf_conn *ct;
143 int err;
144
145 if (!ip_is_fragment(ip_hdr(skb)))
146 return NF_ACCEPT;
147
148 ct = nf_ct_get(skb, &ctinfo);
149 if (ct)
150 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
151
152 br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
153 local_bh_disable();
154 err = ip_defrag(state->net, skb,
155 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
156 local_bh_enable();
157 if (!err) {
158 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
159 skb->ignore_df = 1;
160 return NF_ACCEPT;
161 }
162
163 return NF_STOLEN;
164}
165
166static int nf_ct_br_ip_check(const struct sk_buff *skb)
167{
168 const struct iphdr *iph;
169 int nhoff, len;
170
171 nhoff = skb_network_offset(skb);
172 iph = ip_hdr(skb);
173 if (iph->ihl < 5 ||
174 iph->version != 4)
175 return -1;
176
177 len = ntohs(iph->tot_len);
178 if (skb->len < nhoff + len ||
179 len < (iph->ihl * 4))
180 return -1;
181
182 return 0;
183}
184
185static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
186 const struct nf_hook_state *state)
187{
188 struct nf_hook_state bridge_state = *state;
189 enum ip_conntrack_info ctinfo;
190 struct nf_conn *ct;
191 u32 len;
192 int ret;
193
194 ct = nf_ct_get(skb, &ctinfo);
195 if ((ct && !nf_ct_is_template(ct)) ||
196 ctinfo == IP_CT_UNTRACKED)
197 return NF_ACCEPT;
198
199 switch (skb->protocol) {
200 case htons(ETH_P_IP):
201 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
202 return NF_ACCEPT;
203
204 len = ntohs(ip_hdr(skb)->tot_len);
205 if (pskb_trim_rcsum(skb, len))
206 return NF_ACCEPT;
207
208 if (nf_ct_br_ip_check(skb))
209 return NF_ACCEPT;
210
211 bridge_state.pf = NFPROTO_IPV4;
212 ret = nf_ct_br_defrag4(skb, &bridge_state);
213 break;
214 case htons(ETH_P_IPV6):
215 /* fall through */
216 default:
217 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
218 return NF_ACCEPT;
219 }
220
221 if (ret != NF_ACCEPT)
222 return ret;
223
224 return nf_conntrack_in(skb, &bridge_state);
225}
226
227static void nf_ct_bridge_frag_save(struct sk_buff *skb,
228 struct nf_ct_bridge_frag_data *data)
229{
230 if (skb_vlan_tag_present(skb)) {
231 data->vlan_present = true;
232 data->vlan_tci = skb->vlan_tci;
233 data->vlan_proto = skb->vlan_proto;
234 } else {
235 data->vlan_present = false;
236 }
237 skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
238}
239
240static unsigned int
241nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
242 int (*output)(struct net *, struct sock *sk,
243 const struct nf_ct_bridge_frag_data *data,
244 struct sk_buff *))
245{
246 struct nf_ct_bridge_frag_data data;
247
248 if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
249 return NF_ACCEPT;
250
251 nf_ct_bridge_frag_save(skb, &data);
252 switch (skb->protocol) {
253 case htons(ETH_P_IP):
254 nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
255 break;
256 case htons(ETH_P_IPV6):
257 return NF_ACCEPT;
258 default:
259 WARN_ON_ONCE(1);
260 return NF_DROP;
261 }
262
263 return NF_STOLEN;
264}
265
266/* Actually only slow path refragmentation needs this. */
267static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
268 const struct nf_ct_bridge_frag_data *data)
269{
270 int err;
271
272 err = skb_cow_head(skb, ETH_HLEN);
273 if (err) {
274 kfree_skb(skb);
275 return -ENOMEM;
276 }
277 if (data->vlan_present)
278 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
279
280 skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
281 skb_reset_mac_header(skb);
282
283 return 0;
284}
285
286static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
287 const struct nf_ct_bridge_frag_data *data,
288 struct sk_buff *skb)
289{
290 int err;
291
292 err = nf_ct_bridge_frag_restore(skb, data);
293 if (err < 0)
294 return err;
295
296 return br_dev_queue_push_xmit(net, sk, skb);
297}
298
299static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
300{
301 enum ip_conntrack_info ctinfo;
302 struct nf_conn *ct;
303 int protoff;
304
305 ct = nf_ct_get(skb, &ctinfo);
306 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
307 return nf_conntrack_confirm(skb);
308
309 switch (skb->protocol) {
310 case htons(ETH_P_IP):
311 protoff = skb_network_offset(skb) + ip_hdrlen(skb);
312 break;
313 case htons(ETH_P_IPV6): {
314 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
315 __be16 frag_off;
316
317 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
318 &frag_off);
319 if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
320 return nf_conntrack_confirm(skb);
321 }
322 break;
323 default:
324 return NF_ACCEPT;
325 }
326 return nf_confirm(skb, protoff, ct, ctinfo);
327}
328
329static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
330 const struct nf_hook_state *state)
331{
332 int ret;
333
334 ret = nf_ct_bridge_confirm(skb);
335 if (ret != NF_ACCEPT)
336 return ret;
337
338 return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
339}
340
341static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
342 {
343 .hook = nf_ct_bridge_pre,
344 .pf = NFPROTO_BRIDGE,
345 .hooknum = NF_BR_PRE_ROUTING,
346 .priority = NF_IP_PRI_CONNTRACK,
347 },
348 {
349 .hook = nf_ct_bridge_post,
350 .pf = NFPROTO_BRIDGE,
351 .hooknum = NF_BR_POST_ROUTING,
352 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
353 },
354};
355
356static struct nf_ct_bridge_info bridge_info = {
357 .ops = nf_ct_bridge_hook_ops,
358 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops),
359 .me = THIS_MODULE,
360};
361
362static int __init nf_conntrack_l3proto_bridge_init(void)
363{
364 nf_ct_bridge_register(&bridge_info);
365
366 return 0;
367}
368
369static void __exit nf_conntrack_l3proto_bridge_fini(void)
370{
371 nf_ct_bridge_unregister(&bridge_info);
372}
373
374module_init(nf_conntrack_l3proto_bridge_init);
375module_exit(nf_conntrack_l3proto_bridge_fini);
376
377MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
378MODULE_LICENSE("GPL");