summaryrefslogtreecommitdiffstats
path: root/net/openvswitch/conntrack.c
diff options
context:
space:
mode:
authorJoe Stringer <joestringer@nicira.com>2015-08-26 14:31:48 -0400
committerDavid S. Miller <davem@davemloft.net>2015-08-27 14:40:43 -0400
commit7f8a436eaa2c3ddd8e1ff2fbca267e6275085536 (patch)
tree44fa82400d8fc974e52788ff20689eab4f2fb7eb /net/openvswitch/conntrack.c
parente79e259588a414589a016edc428ee8dd308f81ad (diff)
openvswitch: Add conntrack action
Expose the kernel connection tracker via OVS. Userspace components can make use of the CT action to populate the connection state (ct_state) field for a flow. This state can be subsequently matched. Exposed connection states are OVS_CS_F_*: - NEW (0x01) - Beginning of a new connection. - ESTABLISHED (0x02) - Part of an existing connection. - RELATED (0x04) - Related to an established connection. - INVALID (0x20) - Could not track the connection for this packet. - REPLY_DIR (0x40) - This packet is in the reply direction for the flow. - TRACKED (0x80) - This packet has been sent through conntrack. When the CT action is executed by itself, it will send the packet through the connection tracker and populate the ct_state field with one or more of the connection state flags above. The CT action will always set the TRACKED bit. When the COMMIT flag is passed to the conntrack action, this specifies that information about the connection should be stored. This allows subsequent packets for the same (or related) connections to be correlated with this connection. Sending subsequent packets for the connection through conntrack allows the connection tracker to consider the packets as ESTABLISHED, RELATED, and/or REPLY_DIR. The CT action may optionally take a zone to track the flow within. This allows connections with the same 5-tuple to be kept logically separate from connections in other zones. If the zone is specified, then the "ct_zone" match field will be subsequently populated with the zone id. IP fragments are handled by transparently assembling them as part of the CT action. The maximum received unit (MRU) size is tracked so that refragmentation can occur during output. IP frag handling contributed by Andy Zhou. Based on original design by Justin Pettit. Signed-off-by: Joe Stringer <joestringer@nicira.com> Signed-off-by: Justin Pettit <jpettit@nicira.com> Signed-off-by: Andy Zhou <azhou@nicira.com> Acked-by: Thomas Graf <tgraf@suug.ch> Acked-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/openvswitch/conntrack.c')
-rw-r--r--net/openvswitch/conntrack.c454
1 files changed, 454 insertions, 0 deletions
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
new file mode 100644
index 000000000000..1189fd50f1cf
--- /dev/null
+++ b/net/openvswitch/conntrack.c
@@ -0,0 +1,454 @@
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <linux/module.h>
15#include <linux/openvswitch.h>
16#include <net/ip.h>
17#include <net/netfilter/nf_conntrack_core.h>
18#include <net/netfilter/nf_conntrack_zones.h>
19#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
20
21#include "datapath.h"
22#include "conntrack.h"
23#include "flow.h"
24#include "flow_netlink.h"
25
26struct ovs_ct_len_tbl {
27 size_t maxlen;
28 size_t minlen;
29};
30
31/* Conntrack action context for execution. */
32struct ovs_conntrack_info {
33 struct nf_conntrack_zone zone;
34 struct nf_conn *ct;
35 u32 flags;
36 u16 family;
37};
38
39static u16 key_to_nfproto(const struct sw_flow_key *key)
40{
41 switch (ntohs(key->eth.type)) {
42 case ETH_P_IP:
43 return NFPROTO_IPV4;
44 case ETH_P_IPV6:
45 return NFPROTO_IPV6;
46 default:
47 return NFPROTO_UNSPEC;
48 }
49}
50
51/* Map SKB connection state into the values used by flow definition. */
52static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
53{
54 u8 ct_state = OVS_CS_F_TRACKED;
55
56 switch (ctinfo) {
57 case IP_CT_ESTABLISHED_REPLY:
58 case IP_CT_RELATED_REPLY:
59 case IP_CT_NEW_REPLY:
60 ct_state |= OVS_CS_F_REPLY_DIR;
61 break;
62 default:
63 break;
64 }
65
66 switch (ctinfo) {
67 case IP_CT_ESTABLISHED:
68 case IP_CT_ESTABLISHED_REPLY:
69 ct_state |= OVS_CS_F_ESTABLISHED;
70 break;
71 case IP_CT_RELATED:
72 case IP_CT_RELATED_REPLY:
73 ct_state |= OVS_CS_F_RELATED;
74 break;
75 case IP_CT_NEW:
76 case IP_CT_NEW_REPLY:
77 ct_state |= OVS_CS_F_NEW;
78 break;
79 default:
80 break;
81 }
82
83 return ct_state;
84}
85
86static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
87 const struct nf_conntrack_zone *zone)
88{
89 key->ct.state = state;
90 key->ct.zone = zone->id;
91}
92
93/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
94 * previously sent the packet to conntrack via the ct action.
95 */
96static void ovs_ct_update_key(const struct sk_buff *skb,
97 struct sw_flow_key *key, bool post_ct)
98{
99 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
100 enum ip_conntrack_info ctinfo;
101 struct nf_conn *ct;
102 u8 state = 0;
103
104 ct = nf_ct_get(skb, &ctinfo);
105 if (ct) {
106 state = ovs_ct_get_state(ctinfo);
107 if (ct->master)
108 state |= OVS_CS_F_RELATED;
109 zone = nf_ct_zone(ct);
110 } else if (post_ct) {
111 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
112 }
113 __ovs_ct_update_key(key, state, zone);
114}
115
116void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
117{
118 ovs_ct_update_key(skb, key, false);
119}
120
121int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
122{
123 if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
124 return -EMSGSIZE;
125
126 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
127 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
128 return -EMSGSIZE;
129
130 return 0;
131}
132
133static int handle_fragments(struct net *net, struct sw_flow_key *key,
134 u16 zone, struct sk_buff *skb)
135{
136 struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
137
138 if (key->eth.type == htons(ETH_P_IP)) {
139 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
140 int err;
141
142 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
143 err = ip_defrag(skb, user);
144 if (err)
145 return err;
146
147 ovs_cb.mru = IPCB(skb)->frag_max_size;
148 } else if (key->eth.type == htons(ETH_P_IPV6)) {
149#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
150 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
151 struct sk_buff *reasm;
152
153 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
154 reasm = nf_ct_frag6_gather(skb, user);
155 if (!reasm)
156 return -EINPROGRESS;
157
158 if (skb == reasm)
159 return -EINVAL;
160
161 key->ip.proto = ipv6_hdr(reasm)->nexthdr;
162 skb_morph(skb, reasm);
163 consume_skb(reasm);
164 ovs_cb.mru = IP6CB(skb)->frag_max_size;
165#else
166 return -EPFNOSUPPORT;
167#endif
168 } else {
169 return -EPFNOSUPPORT;
170 }
171
172 key->ip.frag = OVS_FRAG_TYPE_NONE;
173 skb_clear_hash(skb);
174 skb->ignore_df = 1;
175 *OVS_CB(skb) = ovs_cb;
176
177 return 0;
178}
179
180static struct nf_conntrack_expect *
181ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
182 u16 proto, const struct sk_buff *skb)
183{
184 struct nf_conntrack_tuple tuple;
185
186 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
187 return NULL;
188 return __nf_ct_expect_find(net, zone, &tuple);
189}
190
191/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
192static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
193 const struct ovs_conntrack_info *info)
194{
195 enum ip_conntrack_info ctinfo;
196 struct nf_conn *ct;
197
198 ct = nf_ct_get(skb, &ctinfo);
199 if (!ct)
200 return false;
201 if (!net_eq(net, read_pnet(&ct->ct_net)))
202 return false;
203 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
204 return false;
205
206 return true;
207}
208
209static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
210 const struct ovs_conntrack_info *info,
211 struct sk_buff *skb)
212{
213 /* If we are recirculating packets to match on conntrack fields and
214 * committing with a separate conntrack action, then we don't need to
215 * actually run the packet through conntrack twice unless it's for a
216 * different zone.
217 */
218 if (!skb_nfct_cached(net, skb, info)) {
219 struct nf_conn *tmpl = info->ct;
220
221 /* Associate skb with specified zone. */
222 if (tmpl) {
223 if (skb->nfct)
224 nf_conntrack_put(skb->nfct);
225 nf_conntrack_get(&tmpl->ct_general);
226 skb->nfct = &tmpl->ct_general;
227 skb->nfctinfo = IP_CT_NEW;
228 }
229
230 if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING,
231 skb) != NF_ACCEPT)
232 return -ENOENT;
233 }
234
235 return 0;
236}
237
238/* Lookup connection and read fields into key. */
239static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
240 const struct ovs_conntrack_info *info,
241 struct sk_buff *skb)
242{
243 struct nf_conntrack_expect *exp;
244
245 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
246 if (exp) {
247 u8 state;
248
249 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
250 __ovs_ct_update_key(key, state, &info->zone);
251 } else {
252 int err;
253
254 err = __ovs_ct_lookup(net, key, info, skb);
255 if (err)
256 return err;
257
258 ovs_ct_update_key(skb, key, true);
259 }
260
261 return 0;
262}
263
264/* Lookup connection and confirm if unconfirmed. */
265static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
266 const struct ovs_conntrack_info *info,
267 struct sk_buff *skb)
268{
269 u8 state;
270 int err;
271
272 state = key->ct.state;
273 if (key->ct.zone == info->zone.id &&
274 ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
275 /* Previous lookup has shown that this connection is already
276 * tracked and committed. Skip committing.
277 */
278 return 0;
279 }
280
281 err = __ovs_ct_lookup(net, key, info, skb);
282 if (err)
283 return err;
284 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
285 return -EINVAL;
286
287 ovs_ct_update_key(skb, key, true);
288
289 return 0;
290}
291
292int ovs_ct_execute(struct net *net, struct sk_buff *skb,
293 struct sw_flow_key *key,
294 const struct ovs_conntrack_info *info)
295{
296 int nh_ofs;
297 int err;
298
299 /* The conntrack module expects to be working at L3. */
300 nh_ofs = skb_network_offset(skb);
301 skb_pull(skb, nh_ofs);
302
303 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
304 err = handle_fragments(net, key, info->zone.id, skb);
305 if (err)
306 return err;
307 }
308
309 if (info->flags & OVS_CT_F_COMMIT)
310 err = ovs_ct_commit(net, key, info, skb);
311 else
312 err = ovs_ct_lookup(net, key, info, skb);
313
314 skb_push(skb, nh_ofs);
315 return err;
316}
317
318static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
319 [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32),
320 .maxlen = sizeof(u32) },
321 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
322 .maxlen = sizeof(u16) },
323};
324
325static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
326 bool log)
327{
328 struct nlattr *a;
329 int rem;
330
331 nla_for_each_nested(a, attr, rem) {
332 int type = nla_type(a);
333 int maxlen = ovs_ct_attr_lens[type].maxlen;
334 int minlen = ovs_ct_attr_lens[type].minlen;
335
336 if (type > OVS_CT_ATTR_MAX) {
337 OVS_NLERR(log,
338 "Unknown conntrack attr (type=%d, max=%d)",
339 type, OVS_CT_ATTR_MAX);
340 return -EINVAL;
341 }
342 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
343 OVS_NLERR(log,
344 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
345 type, nla_len(a), maxlen);
346 return -EINVAL;
347 }
348
349 switch (type) {
350 case OVS_CT_ATTR_FLAGS:
351 info->flags = nla_get_u32(a);
352 break;
353#ifdef CONFIG_NF_CONNTRACK_ZONES
354 case OVS_CT_ATTR_ZONE:
355 info->zone.id = nla_get_u16(a);
356 break;
357#endif
358 default:
359 OVS_NLERR(log, "Unknown conntrack attr (%d)",
360 type);
361 return -EINVAL;
362 }
363 }
364
365 if (rem > 0) {
366 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
367 return -EINVAL;
368 }
369
370 return 0;
371}
372
373bool ovs_ct_verify(enum ovs_key_attr attr)
374{
375 if (attr == OVS_KEY_ATTR_CT_STATE)
376 return true;
377 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
378 attr == OVS_KEY_ATTR_CT_ZONE)
379 return true;
380
381 return false;
382}
383
384int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
385 const struct sw_flow_key *key,
386 struct sw_flow_actions **sfa, bool log)
387{
388 struct ovs_conntrack_info ct_info;
389 u16 family;
390 int err;
391
392 family = key_to_nfproto(key);
393 if (family == NFPROTO_UNSPEC) {
394 OVS_NLERR(log, "ct family unspecified");
395 return -EINVAL;
396 }
397
398 memset(&ct_info, 0, sizeof(ct_info));
399 ct_info.family = family;
400
401 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
402 NF_CT_DEFAULT_ZONE_DIR, 0);
403
404 err = parse_ct(attr, &ct_info, log);
405 if (err)
406 return err;
407
408 /* Set up template for tracking connections in specific zones. */
409 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
410 if (!ct_info.ct) {
411 OVS_NLERR(log, "Failed to allocate conntrack template");
412 return -ENOMEM;
413 }
414
415 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
416 sizeof(ct_info), log);
417 if (err)
418 goto err_free_ct;
419
420 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
421 nf_conntrack_get(&ct_info.ct->ct_general);
422 return 0;
423err_free_ct:
424 nf_conntrack_free(ct_info.ct);
425 return err;
426}
427
428int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
429 struct sk_buff *skb)
430{
431 struct nlattr *start;
432
433 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
434 if (!start)
435 return -EMSGSIZE;
436
437 if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags))
438 return -EMSGSIZE;
439 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
440 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
441 return -EMSGSIZE;
442
443 nla_nest_end(skb, start);
444
445 return 0;
446}
447
448void ovs_ct_free_action(const struct nlattr *a)
449{
450 struct ovs_conntrack_info *ct_info = nla_data(a);
451
452 if (ct_info->ct)
453 nf_ct_put(ct_info->ct);
454}