aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
authorJoe Stringer <joestringer@nicira.com>2015-08-26 14:31:48 -0400
committerDavid S. Miller <davem@davemloft.net>2015-08-27 14:40:43 -0400
commit7f8a436eaa2c3ddd8e1ff2fbca267e6275085536 (patch)
tree44fa82400d8fc974e52788ff20689eab4f2fb7eb /net/openvswitch
parente79e259588a414589a016edc428ee8dd308f81ad (diff)
openvswitch: Add conntrack action
Expose the kernel connection tracker via OVS. Userspace components can make use of the CT action to populate the connection state (ct_state) field for a flow. This state can be subsequently matched. Exposed connection states are OVS_CS_F_*: - NEW (0x01) - Beginning of a new connection. - ESTABLISHED (0x02) - Part of an existing connection. - RELATED (0x04) - Related to an established connection. - INVALID (0x20) - Could not track the connection for this packet. - REPLY_DIR (0x40) - This packet is in the reply direction for the flow. - TRACKED (0x80) - This packet has been sent through conntrack. When the CT action is executed by itself, it will send the packet through the connection tracker and populate the ct_state field with one or more of the connection state flags above. The CT action will always set the TRACKED bit. When the COMMIT flag is passed to the conntrack action, this specifies that information about the connection should be stored. This allows subsequent packets for the same (or related) connections to be correlated with this connection. Sending subsequent packets for the connection through conntrack allows the connection tracker to consider the packets as ESTABLISHED, RELATED, and/or REPLY_DIR. The CT action may optionally take a zone to track the flow within. This allows connections with the same 5-tuple to be kept logically separate from connections in other zones. If the zone is specified, then the "ct_zone" match field will be subsequently populated with the zone id. IP fragments are handled by transparently assembling them as part of the CT action. The maximum received unit (MRU) size is tracked so that refragmentation can occur during output. IP frag handling contributed by Andy Zhou. Based on original design by Justin Pettit. Signed-off-by: Joe Stringer <joestringer@nicira.com> Signed-off-by: Justin Pettit <jpettit@nicira.com> Signed-off-by: Andy Zhou <azhou@nicira.com> Acked-by: Thomas Graf <tgraf@suug.ch> Acked-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/Kconfig11
-rw-r--r--net/openvswitch/Makefile2
-rw-r--r--net/openvswitch/actions.c175
-rw-r--r--net/openvswitch/conntrack.c454
-rw-r--r--net/openvswitch/conntrack.h78
-rw-r--r--net/openvswitch/datapath.c66
-rw-r--r--net/openvswitch/datapath.h6
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/openvswitch/flow.h6
-rw-r--r--net/openvswitch/flow_netlink.c69
-rw-r--r--net/openvswitch/flow_netlink.h4
-rw-r--r--net/openvswitch/vport.c1
12 files changed, 837 insertions, 37 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 422dc0567de9..98f343d0d6dd 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -31,6 +31,17 @@ config OPENVSWITCH
31 31
32 If unsure, say N. 32 If unsure, say N.
33 33
34config OPENVSWITCH_CONNTRACK
35 bool "Open vSwitch conntrack action support"
36 depends on OPENVSWITCH
37 depends on NF_CONNTRACK
38 default OPENVSWITCH
39 ---help---
40 If you say Y here, then Open vSwitch module will be able to pass
41 packets through conntrack.
42
43 Say N to exclude this support and reduce the binary size.
44
34config OPENVSWITCH_GRE 45config OPENVSWITCH_GRE
35 tristate "Open vSwitch GRE tunneling support" 46 tristate "Open vSwitch GRE tunneling support"
36 depends on OPENVSWITCH 47 depends on OPENVSWITCH
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 6e1701de04d8..5b5913b06f54 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,6 +15,8 @@ openvswitch-y := \
15 vport-internal_dev.o \ 15 vport-internal_dev.o \
16 vport-netdev.o 16 vport-netdev.o
17 17
18openvswitch-$(CONFIG_OPENVSWITCH_CONNTRACK) += conntrack.o
19
18obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o 20obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
19obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o 21obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
20obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o 22obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 520438b77dc8..72ca2c491b0a 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -22,6 +22,7 @@
22#include <linux/in.h> 22#include <linux/in.h>
23#include <linux/ip.h> 23#include <linux/ip.h>
24#include <linux/openvswitch.h> 24#include <linux/openvswitch.h>
25#include <linux/netfilter_ipv6.h>
25#include <linux/sctp.h> 26#include <linux/sctp.h>
26#include <linux/tcp.h> 27#include <linux/tcp.h>
27#include <linux/udp.h> 28#include <linux/udp.h>
@@ -29,6 +30,7 @@
29#include <linux/if_arp.h> 30#include <linux/if_arp.h>
30#include <linux/if_vlan.h> 31#include <linux/if_vlan.h>
31 32
33#include <net/dst.h>
32#include <net/ip.h> 34#include <net/ip.h>
33#include <net/ipv6.h> 35#include <net/ipv6.h>
34#include <net/checksum.h> 36#include <net/checksum.h>
@@ -38,6 +40,7 @@
38 40
39#include "datapath.h" 41#include "datapath.h"
40#include "flow.h" 42#include "flow.h"
43#include "conntrack.h"
41#include "vport.h" 44#include "vport.h"
42 45
43static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, 46static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
@@ -52,6 +55,20 @@ struct deferred_action {
52 struct sw_flow_key pkt_key; 55 struct sw_flow_key pkt_key;
53}; 56};
54 57
58#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
59struct ovs_frag_data {
60 unsigned long dst;
61 struct vport *vport;
62 struct ovs_skb_cb cb;
63 __be16 inner_protocol;
64 __u16 vlan_tci;
65 __be16 vlan_proto;
66 unsigned int l2_len;
67 u8 l2_data[MAX_L2_LEN];
68};
69
70static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
71
55#define DEFERRED_ACTION_FIFO_SIZE 10 72#define DEFERRED_ACTION_FIFO_SIZE 10
56struct action_fifo { 73struct action_fifo {
57 int head; 74 int head;
@@ -602,14 +619,145 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
602 return 0; 619 return 0;
603} 620}
604 621
605static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) 622static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
623{
624 struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
625 struct vport *vport = data->vport;
626
627 if (skb_cow_head(skb, data->l2_len) < 0) {
628 kfree_skb(skb);
629 return -ENOMEM;
630 }
631
632 __skb_dst_copy(skb, data->dst);
633 *OVS_CB(skb) = data->cb;
634 skb->inner_protocol = data->inner_protocol;
635 skb->vlan_tci = data->vlan_tci;
636 skb->vlan_proto = data->vlan_proto;
637
638 /* Reconstruct the MAC header. */
639 skb_push(skb, data->l2_len);
640 memcpy(skb->data, &data->l2_data, data->l2_len);
641 ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
642 skb_reset_mac_header(skb);
643
644 ovs_vport_send(vport, skb);
645 return 0;
646}
647
648static unsigned int
649ovs_dst_get_mtu(const struct dst_entry *dst)
650{
651 return dst->dev->mtu;
652}
653
654static struct dst_ops ovs_dst_ops = {
655 .family = AF_UNSPEC,
656 .mtu = ovs_dst_get_mtu,
657};
658
659/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
660 * ovs_vport_output(), which is called once per fragmented packet.
661 */
662static void prepare_frag(struct vport *vport, struct sk_buff *skb)
663{
664 unsigned int hlen = skb_network_offset(skb);
665 struct ovs_frag_data *data;
666
667 data = this_cpu_ptr(&ovs_frag_data_storage);
668 data->dst = skb->_skb_refdst;
669 data->vport = vport;
670 data->cb = *OVS_CB(skb);
671 data->inner_protocol = skb->inner_protocol;
672 data->vlan_tci = skb->vlan_tci;
673 data->vlan_proto = skb->vlan_proto;
674 data->l2_len = hlen;
675 memcpy(&data->l2_data, skb->data, hlen);
676
677 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
678 skb_pull(skb, hlen);
679}
680
681static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
682 __be16 ethertype)
683{
684 if (skb_network_offset(skb) > MAX_L2_LEN) {
685 OVS_NLERR(1, "L2 header too long to fragment");
686 return;
687 }
688
689 if (ethertype == htons(ETH_P_IP)) {
690 struct dst_entry ovs_dst;
691 unsigned long orig_dst;
692
693 prepare_frag(vport, skb);
694 dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
695 DST_OBSOLETE_NONE, DST_NOCOUNT);
696 ovs_dst.dev = vport->dev;
697
698 orig_dst = skb->_skb_refdst;
699 skb_dst_set_noref(skb, &ovs_dst);
700 IPCB(skb)->frag_max_size = mru;
701
702 ip_do_fragment(skb->sk, skb, ovs_vport_output);
703 refdst_drop(orig_dst);
704 } else if (ethertype == htons(ETH_P_IPV6)) {
705 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
706 unsigned long orig_dst;
707 struct rt6_info ovs_rt;
708
709 if (!v6ops) {
710 kfree_skb(skb);
711 return;
712 }
713
714 prepare_frag(vport, skb);
715 memset(&ovs_rt, 0, sizeof(ovs_rt));
716 dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
717 DST_OBSOLETE_NONE, DST_NOCOUNT);
718 ovs_rt.dst.dev = vport->dev;
719
720 orig_dst = skb->_skb_refdst;
721 skb_dst_set_noref(skb, &ovs_rt.dst);
722 IP6CB(skb)->frag_max_size = mru;
723
724 v6ops->fragment(skb->sk, skb, ovs_vport_output);
725 refdst_drop(orig_dst);
726 } else {
727 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
728 ovs_vport_name(vport), ntohs(ethertype), mru,
729 vport->dev->mtu);
730 kfree_skb(skb);
731 }
732}
733
734static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
735 struct sw_flow_key *key)
606{ 736{
607 struct vport *vport = ovs_vport_rcu(dp, out_port); 737 struct vport *vport = ovs_vport_rcu(dp, out_port);
608 738
609 if (likely(vport)) 739 if (likely(vport)) {
610 ovs_vport_send(vport, skb); 740 u16 mru = OVS_CB(skb)->mru;
611 else 741
742 if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
743 ovs_vport_send(vport, skb);
744 } else if (mru <= vport->dev->mtu) {
745 __be16 ethertype = key->eth.type;
746
747 if (!is_flow_key_valid(key)) {
748 if (eth_p_mpls(skb->protocol))
749 ethertype = skb->inner_protocol;
750 else
751 ethertype = vlan_get_protocol(skb);
752 }
753
754 ovs_fragment(vport, skb, mru, ethertype);
755 } else {
756 kfree_skb(skb);
757 }
758 } else {
612 kfree_skb(skb); 759 kfree_skb(skb);
760 }
613} 761}
614 762
615static int output_userspace(struct datapath *dp, struct sk_buff *skb, 763static int output_userspace(struct datapath *dp, struct sk_buff *skb,
@@ -623,6 +771,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
623 771
624 memset(&upcall, 0, sizeof(upcall)); 772 memset(&upcall, 0, sizeof(upcall));
625 upcall.cmd = OVS_PACKET_CMD_ACTION; 773 upcall.cmd = OVS_PACKET_CMD_ACTION;
774 upcall.mru = OVS_CB(skb)->mru;
626 775
627 for (a = nla_data(attr), rem = nla_len(attr); rem > 0; 776 for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
628 a = nla_next(a, &rem)) { 777 a = nla_next(a, &rem)) {
@@ -816,6 +965,11 @@ static int execute_masked_set_action(struct sk_buff *skb,
816 err = set_mpls(skb, flow_key, nla_data(a), get_mask(a, 965 err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
817 __be32 *)); 966 __be32 *));
818 break; 967 break;
968
969 case OVS_KEY_ATTR_CT_STATE:
970 case OVS_KEY_ATTR_CT_ZONE:
971 err = -EINVAL;
972 break;
819 } 973 }
820 974
821 return err; 975 return err;
@@ -885,7 +1039,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
885 struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC); 1039 struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
886 1040
887 if (out_skb) 1041 if (out_skb)
888 do_output(dp, out_skb, prev_port); 1042 do_output(dp, out_skb, prev_port, key);
889 1043
890 prev_port = -1; 1044 prev_port = -1;
891 } 1045 }
@@ -942,6 +1096,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
942 case OVS_ACTION_ATTR_SAMPLE: 1096 case OVS_ACTION_ATTR_SAMPLE:
943 err = sample(dp, skb, key, a, attr, len); 1097 err = sample(dp, skb, key, a, attr, len);
944 break; 1098 break;
1099
1100 case OVS_ACTION_ATTR_CT:
1101 err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
1102 nla_data(a));
1103
1104 /* Hide stolen IP fragments from user space. */
1105 if (err == -EINPROGRESS)
1106 return 0;
1107 break;
945 } 1108 }
946 1109
947 if (unlikely(err)) { 1110 if (unlikely(err)) {
@@ -951,7 +1114,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
951 } 1114 }
952 1115
953 if (prev_port != -1) 1116 if (prev_port != -1)
954 do_output(dp, skb, prev_port); 1117 do_output(dp, skb, prev_port, key);
955 else 1118 else
956 consume_skb(skb); 1119 consume_skb(skb);
957 1120
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
new file mode 100644
index 000000000000..1189fd50f1cf
--- /dev/null
+++ b/net/openvswitch/conntrack.c
@@ -0,0 +1,454 @@
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <linux/module.h>
15#include <linux/openvswitch.h>
16#include <net/ip.h>
17#include <net/netfilter/nf_conntrack_core.h>
18#include <net/netfilter/nf_conntrack_zones.h>
19#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
20
21#include "datapath.h"
22#include "conntrack.h"
23#include "flow.h"
24#include "flow_netlink.h"
25
26struct ovs_ct_len_tbl {
27 size_t maxlen;
28 size_t minlen;
29};
30
31/* Conntrack action context for execution. */
32struct ovs_conntrack_info {
33 struct nf_conntrack_zone zone;
34 struct nf_conn *ct;
35 u32 flags;
36 u16 family;
37};
38
39static u16 key_to_nfproto(const struct sw_flow_key *key)
40{
41 switch (ntohs(key->eth.type)) {
42 case ETH_P_IP:
43 return NFPROTO_IPV4;
44 case ETH_P_IPV6:
45 return NFPROTO_IPV6;
46 default:
47 return NFPROTO_UNSPEC;
48 }
49}
50
51/* Map SKB connection state into the values used by flow definition. */
52static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
53{
54 u8 ct_state = OVS_CS_F_TRACKED;
55
56 switch (ctinfo) {
57 case IP_CT_ESTABLISHED_REPLY:
58 case IP_CT_RELATED_REPLY:
59 case IP_CT_NEW_REPLY:
60 ct_state |= OVS_CS_F_REPLY_DIR;
61 break;
62 default:
63 break;
64 }
65
66 switch (ctinfo) {
67 case IP_CT_ESTABLISHED:
68 case IP_CT_ESTABLISHED_REPLY:
69 ct_state |= OVS_CS_F_ESTABLISHED;
70 break;
71 case IP_CT_RELATED:
72 case IP_CT_RELATED_REPLY:
73 ct_state |= OVS_CS_F_RELATED;
74 break;
75 case IP_CT_NEW:
76 case IP_CT_NEW_REPLY:
77 ct_state |= OVS_CS_F_NEW;
78 break;
79 default:
80 break;
81 }
82
83 return ct_state;
84}
85
86static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
87 const struct nf_conntrack_zone *zone)
88{
89 key->ct.state = state;
90 key->ct.zone = zone->id;
91}
92
93/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
94 * previously sent the packet to conntrack via the ct action.
95 */
96static void ovs_ct_update_key(const struct sk_buff *skb,
97 struct sw_flow_key *key, bool post_ct)
98{
99 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
100 enum ip_conntrack_info ctinfo;
101 struct nf_conn *ct;
102 u8 state = 0;
103
104 ct = nf_ct_get(skb, &ctinfo);
105 if (ct) {
106 state = ovs_ct_get_state(ctinfo);
107 if (ct->master)
108 state |= OVS_CS_F_RELATED;
109 zone = nf_ct_zone(ct);
110 } else if (post_ct) {
111 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
112 }
113 __ovs_ct_update_key(key, state, zone);
114}
115
116void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
117{
118 ovs_ct_update_key(skb, key, false);
119}
120
121int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
122{
123 if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
124 return -EMSGSIZE;
125
126 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
127 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
128 return -EMSGSIZE;
129
130 return 0;
131}
132
133static int handle_fragments(struct net *net, struct sw_flow_key *key,
134 u16 zone, struct sk_buff *skb)
135{
136 struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
137
138 if (key->eth.type == htons(ETH_P_IP)) {
139 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
140 int err;
141
142 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
143 err = ip_defrag(skb, user);
144 if (err)
145 return err;
146
147 ovs_cb.mru = IPCB(skb)->frag_max_size;
148 } else if (key->eth.type == htons(ETH_P_IPV6)) {
149#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
150 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
151 struct sk_buff *reasm;
152
153 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
154 reasm = nf_ct_frag6_gather(skb, user);
155 if (!reasm)
156 return -EINPROGRESS;
157
158 if (skb == reasm)
159 return -EINVAL;
160
161 key->ip.proto = ipv6_hdr(reasm)->nexthdr;
162 skb_morph(skb, reasm);
163 consume_skb(reasm);
164 ovs_cb.mru = IP6CB(skb)->frag_max_size;
165#else
166 return -EPFNOSUPPORT;
167#endif
168 } else {
169 return -EPFNOSUPPORT;
170 }
171
172 key->ip.frag = OVS_FRAG_TYPE_NONE;
173 skb_clear_hash(skb);
174 skb->ignore_df = 1;
175 *OVS_CB(skb) = ovs_cb;
176
177 return 0;
178}
179
180static struct nf_conntrack_expect *
181ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
182 u16 proto, const struct sk_buff *skb)
183{
184 struct nf_conntrack_tuple tuple;
185
186 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
187 return NULL;
188 return __nf_ct_expect_find(net, zone, &tuple);
189}
190
191/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
192static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
193 const struct ovs_conntrack_info *info)
194{
195 enum ip_conntrack_info ctinfo;
196 struct nf_conn *ct;
197
198 ct = nf_ct_get(skb, &ctinfo);
199 if (!ct)
200 return false;
201 if (!net_eq(net, read_pnet(&ct->ct_net)))
202 return false;
203 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
204 return false;
205
206 return true;
207}
208
209static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
210 const struct ovs_conntrack_info *info,
211 struct sk_buff *skb)
212{
213 /* If we are recirculating packets to match on conntrack fields and
214 * committing with a separate conntrack action, then we don't need to
215 * actually run the packet through conntrack twice unless it's for a
216 * different zone.
217 */
218 if (!skb_nfct_cached(net, skb, info)) {
219 struct nf_conn *tmpl = info->ct;
220
221 /* Associate skb with specified zone. */
222 if (tmpl) {
223 if (skb->nfct)
224 nf_conntrack_put(skb->nfct);
225 nf_conntrack_get(&tmpl->ct_general);
226 skb->nfct = &tmpl->ct_general;
227 skb->nfctinfo = IP_CT_NEW;
228 }
229
230 if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING,
231 skb) != NF_ACCEPT)
232 return -ENOENT;
233 }
234
235 return 0;
236}
237
238/* Lookup connection and read fields into key. */
239static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
240 const struct ovs_conntrack_info *info,
241 struct sk_buff *skb)
242{
243 struct nf_conntrack_expect *exp;
244
245 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
246 if (exp) {
247 u8 state;
248
249 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
250 __ovs_ct_update_key(key, state, &info->zone);
251 } else {
252 int err;
253
254 err = __ovs_ct_lookup(net, key, info, skb);
255 if (err)
256 return err;
257
258 ovs_ct_update_key(skb, key, true);
259 }
260
261 return 0;
262}
263
264/* Lookup connection and confirm if unconfirmed. */
265static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
266 const struct ovs_conntrack_info *info,
267 struct sk_buff *skb)
268{
269 u8 state;
270 int err;
271
272 state = key->ct.state;
273 if (key->ct.zone == info->zone.id &&
274 ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
275 /* Previous lookup has shown that this connection is already
276 * tracked and committed. Skip committing.
277 */
278 return 0;
279 }
280
281 err = __ovs_ct_lookup(net, key, info, skb);
282 if (err)
283 return err;
284 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
285 return -EINVAL;
286
287 ovs_ct_update_key(skb, key, true);
288
289 return 0;
290}
291
292int ovs_ct_execute(struct net *net, struct sk_buff *skb,
293 struct sw_flow_key *key,
294 const struct ovs_conntrack_info *info)
295{
296 int nh_ofs;
297 int err;
298
299 /* The conntrack module expects to be working at L3. */
300 nh_ofs = skb_network_offset(skb);
301 skb_pull(skb, nh_ofs);
302
303 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
304 err = handle_fragments(net, key, info->zone.id, skb);
305 if (err)
306 return err;
307 }
308
309 if (info->flags & OVS_CT_F_COMMIT)
310 err = ovs_ct_commit(net, key, info, skb);
311 else
312 err = ovs_ct_lookup(net, key, info, skb);
313
314 skb_push(skb, nh_ofs);
315 return err;
316}
317
318static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
319 [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32),
320 .maxlen = sizeof(u32) },
321 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
322 .maxlen = sizeof(u16) },
323};
324
325static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
326 bool log)
327{
328 struct nlattr *a;
329 int rem;
330
331 nla_for_each_nested(a, attr, rem) {
332 int type = nla_type(a);
333 int maxlen = ovs_ct_attr_lens[type].maxlen;
334 int minlen = ovs_ct_attr_lens[type].minlen;
335
336 if (type > OVS_CT_ATTR_MAX) {
337 OVS_NLERR(log,
338 "Unknown conntrack attr (type=%d, max=%d)",
339 type, OVS_CT_ATTR_MAX);
340 return -EINVAL;
341 }
342 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
343 OVS_NLERR(log,
344 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
345 type, nla_len(a), maxlen);
346 return -EINVAL;
347 }
348
349 switch (type) {
350 case OVS_CT_ATTR_FLAGS:
351 info->flags = nla_get_u32(a);
352 break;
353#ifdef CONFIG_NF_CONNTRACK_ZONES
354 case OVS_CT_ATTR_ZONE:
355 info->zone.id = nla_get_u16(a);
356 break;
357#endif
358 default:
359 OVS_NLERR(log, "Unknown conntrack attr (%d)",
360 type);
361 return -EINVAL;
362 }
363 }
364
365 if (rem > 0) {
366 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
367 return -EINVAL;
368 }
369
370 return 0;
371}
372
373bool ovs_ct_verify(enum ovs_key_attr attr)
374{
375 if (attr == OVS_KEY_ATTR_CT_STATE)
376 return true;
377 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
378 attr == OVS_KEY_ATTR_CT_ZONE)
379 return true;
380
381 return false;
382}
383
384int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
385 const struct sw_flow_key *key,
386 struct sw_flow_actions **sfa, bool log)
387{
388 struct ovs_conntrack_info ct_info;
389 u16 family;
390 int err;
391
392 family = key_to_nfproto(key);
393 if (family == NFPROTO_UNSPEC) {
394 OVS_NLERR(log, "ct family unspecified");
395 return -EINVAL;
396 }
397
398 memset(&ct_info, 0, sizeof(ct_info));
399 ct_info.family = family;
400
401 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
402 NF_CT_DEFAULT_ZONE_DIR, 0);
403
404 err = parse_ct(attr, &ct_info, log);
405 if (err)
406 return err;
407
408 /* Set up template for tracking connections in specific zones. */
409 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
410 if (!ct_info.ct) {
411 OVS_NLERR(log, "Failed to allocate conntrack template");
412 return -ENOMEM;
413 }
414
415 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
416 sizeof(ct_info), log);
417 if (err)
418 goto err_free_ct;
419
420 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
421 nf_conntrack_get(&ct_info.ct->ct_general);
422 return 0;
423err_free_ct:
424 nf_conntrack_free(ct_info.ct);
425 return err;
426}
427
428int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
429 struct sk_buff *skb)
430{
431 struct nlattr *start;
432
433 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
434 if (!start)
435 return -EMSGSIZE;
436
437 if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags))
438 return -EMSGSIZE;
439 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
440 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
441 return -EMSGSIZE;
442
443 nla_nest_end(skb, start);
444
445 return 0;
446}
447
448void ovs_ct_free_action(const struct nlattr *a)
449{
450 struct ovs_conntrack_info *ct_info = nla_data(a);
451
452 if (ct_info->ct)
453 nf_ct_put(ct_info->ct);
454}
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
new file mode 100644
index 000000000000..e812ee64a718
--- /dev/null
+++ b/net/openvswitch/conntrack.h
@@ -0,0 +1,78 @@
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#ifndef OVS_CONNTRACK_H
15#define OVS_CONNTRACK_H 1
16
17#include "flow.h"
18
19struct ovs_conntrack_info;
20enum ovs_key_attr;
21
22#if defined(CONFIG_OPENVSWITCH_CONNTRACK)
23bool ovs_ct_verify(enum ovs_key_attr attr);
24int ovs_ct_copy_action(struct net *, const struct nlattr *,
25 const struct sw_flow_key *, struct sw_flow_actions **,
26 bool log);
27int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
28
29int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
30 const struct ovs_conntrack_info *);
31
32void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
33int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
34void ovs_ct_free_action(const struct nlattr *a);
35#else
36#include <linux/errno.h>
37
38static inline bool ovs_ct_verify(int attr)
39{
40 return false;
41}
42
43static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
44 const struct sw_flow_key *key,
45 struct sw_flow_actions **acts, bool log)
46{
47 return -ENOTSUPP;
48}
49
50static inline int ovs_ct_action_to_attr(const struct ovs_conntrack_info *info,
51 struct sk_buff *skb)
52{
53 return -ENOTSUPP;
54}
55
56static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
57 struct sw_flow_key *key,
58 const struct ovs_conntrack_info *info)
59{
60 return -ENOTSUPP;
61}
62
63static inline void ovs_ct_fill_key(const struct sk_buff *skb,
64 struct sw_flow_key *key)
65{
66 key->ct.state = 0;
67 key->ct.zone = 0;
68}
69
70static inline int ovs_ct_put_key(const struct sw_flow_key *key,
71 struct sk_buff *skb)
72{
73 return 0;
74}
75
76static inline void ovs_ct_free_action(const struct nlattr *a) { }
77#endif
78#endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d5b547375887..72e63726efa0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -275,6 +275,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
275 memset(&upcall, 0, sizeof(upcall)); 275 memset(&upcall, 0, sizeof(upcall));
276 upcall.cmd = OVS_PACKET_CMD_MISS; 276 upcall.cmd = OVS_PACKET_CMD_MISS;
277 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 277 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
278 upcall.mru = OVS_CB(skb)->mru;
278 error = ovs_dp_upcall(dp, skb, key, &upcall); 279 error = ovs_dp_upcall(dp, skb, key, &upcall);
279 if (unlikely(error)) 280 if (unlikely(error))
280 kfree_skb(skb); 281 kfree_skb(skb);
@@ -400,9 +401,23 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
400 if (upcall_info->actions_len) 401 if (upcall_info->actions_len)
401 size += nla_total_size(upcall_info->actions_len); 402 size += nla_total_size(upcall_info->actions_len);
402 403
404 /* OVS_PACKET_ATTR_MRU */
405 if (upcall_info->mru)
406 size += nla_total_size(sizeof(upcall_info->mru));
407
403 return size; 408 return size;
404} 409}
405 410
411static void pad_packet(struct datapath *dp, struct sk_buff *skb)
412{
413 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
414 size_t plen = NLA_ALIGN(skb->len) - skb->len;
415
416 if (plen > 0)
417 memset(skb_put(skb, plen), 0, plen);
418 }
419}
420
406static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 421static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
407 const struct sw_flow_key *key, 422 const struct sw_flow_key *key,
408 const struct dp_upcall_info *upcall_info) 423 const struct dp_upcall_info *upcall_info)
@@ -492,6 +507,16 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
492 nla_nest_cancel(user_skb, nla); 507 nla_nest_cancel(user_skb, nla);
493 } 508 }
494 509
510 /* Add OVS_PACKET_ATTR_MRU */
511 if (upcall_info->mru) {
512 if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
513 upcall_info->mru)) {
514 err = -ENOBUFS;
515 goto out;
516 }
517 pad_packet(dp, user_skb);
518 }
519
495 /* Only reserve room for attribute header, packet data is added 520 /* Only reserve room for attribute header, packet data is added
496 * in skb_zerocopy() */ 521 * in skb_zerocopy() */
497 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 522 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -505,12 +530,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
505 goto out; 530 goto out;
506 531
507 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 532 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
508 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { 533 pad_packet(dp, user_skb);
509 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
510
511 if (plen > 0)
512 memset(skb_put(user_skb, plen), 0, plen);
513 }
514 534
515 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 535 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
516 536
@@ -527,6 +547,7 @@ out:
527static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 547static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
528{ 548{
529 struct ovs_header *ovs_header = info->userhdr; 549 struct ovs_header *ovs_header = info->userhdr;
550 struct net *net = sock_net(skb->sk);
530 struct nlattr **a = info->attrs; 551 struct nlattr **a = info->attrs;
531 struct sw_flow_actions *acts; 552 struct sw_flow_actions *acts;
532 struct sk_buff *packet; 553 struct sk_buff *packet;
@@ -535,6 +556,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
535 struct datapath *dp; 556 struct datapath *dp;
536 struct ethhdr *eth; 557 struct ethhdr *eth;
537 struct vport *input_vport; 558 struct vport *input_vport;
559 u16 mru = 0;
538 int len; 560 int len;
539 int err; 561 int err;
540 bool log = !a[OVS_PACKET_ATTR_PROBE]; 562 bool log = !a[OVS_PACKET_ATTR_PROBE];
@@ -564,6 +586,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
564 else 586 else
565 packet->protocol = htons(ETH_P_802_2); 587 packet->protocol = htons(ETH_P_802_2);
566 588
589 /* Set packet's mru */
590 if (a[OVS_PACKET_ATTR_MRU]) {
591 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
592 packet->ignore_df = 1;
593 }
594 OVS_CB(packet)->mru = mru;
595
567 /* Build an sw_flow for sending this packet. */ 596 /* Build an sw_flow for sending this packet. */
568 flow = ovs_flow_alloc(); 597 flow = ovs_flow_alloc();
569 err = PTR_ERR(flow); 598 err = PTR_ERR(flow);
@@ -575,7 +604,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
575 if (err) 604 if (err)
576 goto err_flow_free; 605 goto err_flow_free;
577 606
578 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 607 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
579 &flow->key, &acts, log); 608 &flow->key, &acts, log);
580 if (err) 609 if (err)
581 goto err_flow_free; 610 goto err_flow_free;
@@ -586,7 +615,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
586 packet->mark = flow->key.phy.skb_mark; 615 packet->mark = flow->key.phy.skb_mark;
587 616
588 rcu_read_lock(); 617 rcu_read_lock();
589 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 618 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
590 err = -ENODEV; 619 err = -ENODEV;
591 if (!dp) 620 if (!dp)
592 goto err_unlock; 621 goto err_unlock;
@@ -598,6 +627,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
598 if (!input_vport) 627 if (!input_vport)
599 goto err_unlock; 628 goto err_unlock;
600 629
630 packet->dev = input_vport->dev;
601 OVS_CB(packet)->input_vport = input_vport; 631 OVS_CB(packet)->input_vport = input_vport;
602 sf_acts = rcu_dereference(flow->sf_acts); 632 sf_acts = rcu_dereference(flow->sf_acts);
603 633
@@ -624,6 +654,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
624 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 654 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
625 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 655 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
626 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, 656 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
657 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
627}; 658};
628 659
629static const struct genl_ops dp_packet_genl_ops[] = { 660static const struct genl_ops dp_packet_genl_ops[] = {
@@ -880,6 +911,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
880 911
881static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) 912static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
882{ 913{
914 struct net *net = sock_net(skb->sk);
883 struct nlattr **a = info->attrs; 915 struct nlattr **a = info->attrs;
884 struct ovs_header *ovs_header = info->userhdr; 916 struct ovs_header *ovs_header = info->userhdr;
885 struct sw_flow *flow = NULL, *new_flow; 917 struct sw_flow *flow = NULL, *new_flow;
@@ -929,8 +961,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
929 goto err_kfree_flow; 961 goto err_kfree_flow;
930 962
931 /* Validate actions. */ 963 /* Validate actions. */
932 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, 964 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
933 &acts, log); 965 &new_flow->key, &acts, log);
934 if (error) { 966 if (error) {
935 OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); 967 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
936 goto err_kfree_flow; 968 goto err_kfree_flow;
@@ -944,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
944 } 976 }
945 977
946 ovs_lock(); 978 ovs_lock();
947 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 979 dp = get_dp(net, ovs_header->dp_ifindex);
948 if (unlikely(!dp)) { 980 if (unlikely(!dp)) {
949 error = -ENODEV; 981 error = -ENODEV;
950 goto err_unlock_ovs; 982 goto err_unlock_ovs;
@@ -1038,7 +1070,8 @@ error:
1038} 1070}
1039 1071
1040/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ 1072/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1041static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, 1073static struct sw_flow_actions *get_flow_actions(struct net *net,
1074 const struct nlattr *a,
1042 const struct sw_flow_key *key, 1075 const struct sw_flow_key *key,
1043 const struct sw_flow_mask *mask, 1076 const struct sw_flow_mask *mask,
1044 bool log) 1077 bool log)
@@ -1048,7 +1081,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
1048 int error; 1081 int error;
1049 1082
1050 ovs_flow_mask_key(&masked_key, key, mask); 1083 ovs_flow_mask_key(&masked_key, key, mask);
1051 error = ovs_nla_copy_actions(a, &masked_key, &acts, log); 1084 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1052 if (error) { 1085 if (error) {
1053 OVS_NLERR(log, 1086 OVS_NLERR(log,
1054 "Actions may not be safe on all matching packets"); 1087 "Actions may not be safe on all matching packets");
@@ -1060,6 +1093,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
1060 1093
1061static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) 1094static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1062{ 1095{
1096 struct net *net = sock_net(skb->sk);
1063 struct nlattr **a = info->attrs; 1097 struct nlattr **a = info->attrs;
1064 struct ovs_header *ovs_header = info->userhdr; 1098 struct ovs_header *ovs_header = info->userhdr;
1065 struct sw_flow_key key; 1099 struct sw_flow_key key;
@@ -1091,8 +1125,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1091 1125
1092 /* Validate actions. */ 1126 /* Validate actions. */
1093 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1127 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1094 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask, 1128 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
1095 log); 1129 &mask, log);
1096 if (IS_ERR(acts)) { 1130 if (IS_ERR(acts)) {
1097 error = PTR_ERR(acts); 1131 error = PTR_ERR(acts);
1098 goto error; 1132 goto error;
@@ -1108,7 +1142,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1108 } 1142 }
1109 1143
1110 ovs_lock(); 1144 ovs_lock();
1111 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1145 dp = get_dp(net, ovs_header->dp_ifindex);
1112 if (unlikely(!dp)) { 1146 if (unlikely(!dp)) {
1113 error = -ENODEV; 1147 error = -ENODEV;
1114 goto err_unlock_ovs; 1148 goto err_unlock_ovs;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 487a85f7d967..d24ba98024be 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -27,6 +27,7 @@
27#include <linux/u64_stats_sync.h> 27#include <linux/u64_stats_sync.h>
28#include <net/ip_tunnels.h> 28#include <net/ip_tunnels.h>
29 29
30#include "conntrack.h"
30#include "flow.h" 31#include "flow.h"
31#include "flow_table.h" 32#include "flow_table.h"
32#include "vport.h" 33#include "vport.h"
@@ -97,10 +98,13 @@ struct datapath {
97 * NULL if the packet is not being tunneled. 98 * NULL if the packet is not being tunneled.
98 * @input_vport: The original vport packet came in on. This value is cached 99 * @input_vport: The original vport packet came in on. This value is cached
99 * when a packet is received by OVS. 100 * when a packet is received by OVS.
101 * @mru: The maximum received fragement size; 0 if the packet is not
102 * fragmented.
100 */ 103 */
101struct ovs_skb_cb { 104struct ovs_skb_cb {
102 struct ip_tunnel_info *egress_tun_info; 105 struct ip_tunnel_info *egress_tun_info;
103 struct vport *input_vport; 106 struct vport *input_vport;
107 u16 mru;
104}; 108};
105#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) 109#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
106 110
@@ -113,6 +117,7 @@ struct ovs_skb_cb {
113 * then no packet is sent and the packet is accounted in the datapath's @n_lost 117 * then no packet is sent and the packet is accounted in the datapath's @n_lost
114 * counter. 118 * counter.
115 * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. 119 * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
120 * @mru: If not zero, Maximum received IP fragment size.
116 */ 121 */
117struct dp_upcall_info { 122struct dp_upcall_info {
118 const struct ip_tunnel_info *egress_tun_info; 123 const struct ip_tunnel_info *egress_tun_info;
@@ -121,6 +126,7 @@ struct dp_upcall_info {
121 int actions_len; 126 int actions_len;
122 u32 portid; 127 u32 portid;
123 u8 cmd; 128 u8 cmd;
129 u16 mru;
124}; 130};
125 131
126/** 132/**
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8db22ef73626..376ca8738fd4 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -49,6 +49,7 @@
49#include "datapath.h" 49#include "datapath.h"
50#include "flow.h" 50#include "flow.h"
51#include "flow_netlink.h" 51#include "flow_netlink.h"
52#include "conntrack.h"
52 53
53u64 ovs_flow_used_time(unsigned long flow_jiffies) 54u64 ovs_flow_used_time(unsigned long flow_jiffies)
54{ 55{
@@ -707,6 +708,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
707 key->phy.priority = skb->priority; 708 key->phy.priority = skb->priority;
708 key->phy.in_port = OVS_CB(skb)->input_vport->port_no; 709 key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
709 key->phy.skb_mark = skb->mark; 710 key->phy.skb_mark = skb->mark;
711 ovs_ct_fill_key(skb, key);
710 key->ovs_flow_hash = 0; 712 key->ovs_flow_hash = 0;
711 key->recirc_id = 0; 713 key->recirc_id = 0;
712 714
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 082a87bac819..312c7d755b9b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -111,6 +111,12 @@ struct sw_flow_key {
111 } nd; 111 } nd;
112 } ipv6; 112 } ipv6;
113 }; 113 };
114 struct {
115 /* Connection tracking fields. */
116 u16 zone;
117 u8 state;
118 } ct;
119
114} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ 120} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
115 121
116struct sw_flow_key_range { 122struct sw_flow_key_range {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c182b28c0884..4e795b289eb7 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void)
281 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 281 /* Whenever adding new OVS_KEY_ FIELDS, we should consider
282 * updating this function. 282 * updating this function.
283 */ 283 */
284 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); 284 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 24);
285 285
286 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 286 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
287 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 287 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -290,6 +290,8 @@ size_t ovs_key_attr_size(void)
290 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 290 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
291 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 291 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
292 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 292 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
293 + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */
294 + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
293 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 295 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
294 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 296 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
295 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 297 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -339,6 +341,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
339 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 341 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
340 .next = ovs_tunnel_key_lens, }, 342 .next = ovs_tunnel_key_lens, },
341 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 343 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
344 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) },
345 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
342}; 346};
343 347
344static bool is_all_zero(const u8 *fp, size_t size) 348static bool is_all_zero(const u8 *fp, size_t size)
@@ -768,6 +772,21 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
768 return -EINVAL; 772 return -EINVAL;
769 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 773 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
770 } 774 }
775
776 if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
777 ovs_ct_verify(OVS_KEY_ATTR_CT_STATE)) {
778 u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]);
779
780 SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
781 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
782 }
783 if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
784 ovs_ct_verify(OVS_KEY_ATTR_CT_ZONE)) {
785 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
786
787 SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask);
788 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
789 }
771 return 0; 790 return 0;
772} 791}
773 792
@@ -1266,6 +1285,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
1266 memset(&match, 0, sizeof(match)); 1285 memset(&match, 0, sizeof(match));
1267 match.key = key; 1286 match.key = key;
1268 1287
1288 memset(&key->ct, 0, sizeof(key->ct));
1269 key->phy.in_port = DP_MAX_PORTS; 1289 key->phy.in_port = DP_MAX_PORTS;
1270 1290
1271 return metadata_from_nlattrs(&match, &attrs, a, false, log); 1291 return metadata_from_nlattrs(&match, &attrs, a, false, log);
@@ -1314,6 +1334,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1314 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1334 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1315 goto nla_put_failure; 1335 goto nla_put_failure;
1316 1336
1337 if (ovs_ct_put_key(output, skb))
1338 goto nla_put_failure;
1339
1317 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1340 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1318 if (!nla) 1341 if (!nla)
1319 goto nla_put_failure; 1342 goto nla_put_failure;
@@ -1574,6 +1597,9 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1574 case OVS_ACTION_ATTR_SET: 1597 case OVS_ACTION_ATTR_SET:
1575 ovs_nla_free_set_action(a); 1598 ovs_nla_free_set_action(a);
1576 break; 1599 break;
1600 case OVS_ACTION_ATTR_CT:
1601 ovs_ct_free_action(a);
1602 break;
1577 } 1603 }
1578 } 1604 }
1579 1605
@@ -1647,8 +1673,8 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa,
1647 return a; 1673 return a;
1648} 1674}
1649 1675
1650static int add_action(struct sw_flow_actions **sfa, int attrtype, 1676int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
1651 void *data, int len, bool log) 1677 int len, bool log)
1652{ 1678{
1653 struct nlattr *a; 1679 struct nlattr *a;
1654 1680
@@ -1663,7 +1689,7 @@ static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1663 int used = (*sfa)->actions_len; 1689 int used = (*sfa)->actions_len;
1664 int err; 1690 int err;
1665 1691
1666 err = add_action(sfa, attrtype, NULL, 0, log); 1692 err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
1667 if (err) 1693 if (err)
1668 return err; 1694 return err;
1669 1695
@@ -1679,12 +1705,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1679 a->nla_len = sfa->actions_len - st_offset; 1705 a->nla_len = sfa->actions_len - st_offset;
1680} 1706}
1681 1707
1682static int __ovs_nla_copy_actions(const struct nlattr *attr, 1708static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
1683 const struct sw_flow_key *key, 1709 const struct sw_flow_key *key,
1684 int depth, struct sw_flow_actions **sfa, 1710 int depth, struct sw_flow_actions **sfa,
1685 __be16 eth_type, __be16 vlan_tci, bool log); 1711 __be16 eth_type, __be16 vlan_tci, bool log);
1686 1712
1687static int validate_and_copy_sample(const struct nlattr *attr, 1713static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
1688 const struct sw_flow_key *key, int depth, 1714 const struct sw_flow_key *key, int depth,
1689 struct sw_flow_actions **sfa, 1715 struct sw_flow_actions **sfa,
1690 __be16 eth_type, __be16 vlan_tci, bool log) 1716 __be16 eth_type, __be16 vlan_tci, bool log)
@@ -1716,15 +1742,15 @@ static int validate_and_copy_sample(const struct nlattr *attr,
1716 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); 1742 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
1717 if (start < 0) 1743 if (start < 0)
1718 return start; 1744 return start;
1719 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 1745 err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1720 nla_data(probability), sizeof(u32), log); 1746 nla_data(probability), sizeof(u32), log);
1721 if (err) 1747 if (err)
1722 return err; 1748 return err;
1723 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); 1749 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
1724 if (st_acts < 0) 1750 if (st_acts < 0)
1725 return st_acts; 1751 return st_acts;
1726 1752
1727 err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, 1753 err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
1728 eth_type, vlan_tci, log); 1754 eth_type, vlan_tci, log);
1729 if (err) 1755 if (err)
1730 return err; 1756 return err;
@@ -2058,7 +2084,7 @@ static int copy_action(const struct nlattr *from,
2058 return 0; 2084 return 0;
2059} 2085}
2060 2086
2061static int __ovs_nla_copy_actions(const struct nlattr *attr, 2087static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2062 const struct sw_flow_key *key, 2088 const struct sw_flow_key *key,
2063 int depth, struct sw_flow_actions **sfa, 2089 int depth, struct sw_flow_actions **sfa,
2064 __be16 eth_type, __be16 vlan_tci, bool log) 2090 __be16 eth_type, __be16 vlan_tci, bool log)
@@ -2082,7 +2108,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
2082 [OVS_ACTION_ATTR_SET] = (u32)-1, 2108 [OVS_ACTION_ATTR_SET] = (u32)-1,
2083 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, 2109 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2084 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2110 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2085 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) 2111 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2112 [OVS_ACTION_ATTR_CT] = (u32)-1,
2086 }; 2113 };
2087 const struct ovs_action_push_vlan *vlan; 2114 const struct ovs_action_push_vlan *vlan;
2088 int type = nla_type(a); 2115 int type = nla_type(a);
@@ -2189,13 +2216,20 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
2189 break; 2216 break;
2190 2217
2191 case OVS_ACTION_ATTR_SAMPLE: 2218 case OVS_ACTION_ATTR_SAMPLE:
2192 err = validate_and_copy_sample(a, key, depth, sfa, 2219 err = validate_and_copy_sample(net, a, key, depth, sfa,
2193 eth_type, vlan_tci, log); 2220 eth_type, vlan_tci, log);
2194 if (err) 2221 if (err)
2195 return err; 2222 return err;
2196 skip_copy = true; 2223 skip_copy = true;
2197 break; 2224 break;
2198 2225
2226 case OVS_ACTION_ATTR_CT:
2227 err = ovs_ct_copy_action(net, a, key, sfa, log);
2228 if (err)
2229 return err;
2230 skip_copy = true;
2231 break;
2232
2199 default: 2233 default:
2200 OVS_NLERR(log, "Unknown Action type %d", type); 2234 OVS_NLERR(log, "Unknown Action type %d", type);
2201 return -EINVAL; 2235 return -EINVAL;
@@ -2214,7 +2248,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
2214} 2248}
2215 2249
2216/* 'key' must be the masked key. */ 2250/* 'key' must be the masked key. */
2217int ovs_nla_copy_actions(const struct nlattr *attr, 2251int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2218 const struct sw_flow_key *key, 2252 const struct sw_flow_key *key,
2219 struct sw_flow_actions **sfa, bool log) 2253 struct sw_flow_actions **sfa, bool log)
2220{ 2254{
@@ -2225,7 +2259,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
2225 return PTR_ERR(*sfa); 2259 return PTR_ERR(*sfa);
2226 2260
2227 (*sfa)->orig_len = nla_len(attr); 2261 (*sfa)->orig_len = nla_len(attr);
2228 err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, 2262 err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
2229 key->eth.tci, log); 2263 key->eth.tci, log);
2230 if (err) 2264 if (err)
2231 ovs_nla_free_flow_actions(*sfa); 2265 ovs_nla_free_flow_actions(*sfa);
@@ -2350,6 +2384,13 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
2350 if (err) 2384 if (err)
2351 return err; 2385 return err;
2352 break; 2386 break;
2387
2388 case OVS_ACTION_ATTR_CT:
2389 err = ovs_ct_action_to_attr(nla_data(a), skb);
2390 if (err)
2391 return err;
2392 break;
2393
2353 default: 2394 default:
2354 if (nla_put(skb, type, nla_len(a), nla_data(a))) 2395 if (nla_put(skb, type, nla_len(a), nla_data(a)))
2355 return -EMSGSIZE; 2396 return -EMSGSIZE;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index acd074408f0a..c0b484b237c9 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -62,9 +62,11 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
62 const struct sw_flow_key *key, bool log); 62 const struct sw_flow_key *key, bool log);
63u32 ovs_nla_get_ufid_flags(const struct nlattr *attr); 63u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
64 64
65int ovs_nla_copy_actions(const struct nlattr *attr, 65int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
66 const struct sw_flow_key *key, 66 const struct sw_flow_key *key,
67 struct sw_flow_actions **sfa, bool log); 67 struct sw_flow_actions **sfa, bool log);
68int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype,
69 void *data, int len, bool log);
68int ovs_nla_put_actions(const struct nlattr *attr, 70int ovs_nla_put_actions(const struct nlattr *attr,
69 int len, struct sk_buff *skb); 71 int len, struct sk_buff *skb);
70 72
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index d73e5a16e7ca..e2dc9dac59e6 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -484,6 +484,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
484 484
485 OVS_CB(skb)->input_vport = vport; 485 OVS_CB(skb)->input_vport = vport;
486 OVS_CB(skb)->egress_tun_info = NULL; 486 OVS_CB(skb)->egress_tun_info = NULL;
487 OVS_CB(skb)->mru = 0;
487 /* Extract flow from 'skb' into 'key'. */ 488 /* Extract flow from 'skb' into 'key'. */
488 error = ovs_flow_key_extract(tun_info, skb, &key); 489 error = ovs_flow_key_extract(tun_info, skb, &key);
489 if (unlikely(error)) { 490 if (unlikely(error)) {