diff options
author | Pravin B Shelar <pshelar@nicira.com> | 2013-06-17 20:50:33 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-06-19 21:07:42 -0400 |
commit | aa310701e787087dbfbccf1409982a96e16c57a6 (patch) | |
tree | f426945de6694203f2c34218b4e4b06913b8f58c | |
parent | a3e82996a8874c4cfe8c7f1be4d552018d8cba7e (diff) |
openvswitch: Add gre tunnel support.
Add gre vport implementation. Most of gre protocol processing
is pushed to gre module. It make use of gre demultiplexer
therefore it can co-exist with linux device based gre tunnels.
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/uapi/linux/openvswitch.h | 1 | ||||
-rw-r--r-- | net/openvswitch/Kconfig | 2 | ||||
-rw-r--r-- | net/openvswitch/Makefile | 3 | ||||
-rw-r--r-- | net/openvswitch/datapath.h | 1 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 18 | ||||
-rw-r--r-- | net/openvswitch/vport-gre.c | 274 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 19 | ||||
-rw-r--r-- | net/openvswitch/vport.h | 7 |
8 files changed, 323 insertions, 2 deletions
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index b15a445927d6..c55efaaa9bb4 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h | |||
@@ -164,6 +164,7 @@ enum ovs_vport_type { | |||
164 | OVS_VPORT_TYPE_UNSPEC, | 164 | OVS_VPORT_TYPE_UNSPEC, |
165 | OVS_VPORT_TYPE_NETDEV, /* network device */ | 165 | OVS_VPORT_TYPE_NETDEV, /* network device */ |
166 | OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ | 166 | OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ |
167 | OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ | ||
167 | __OVS_VPORT_TYPE_MAX | 168 | __OVS_VPORT_TYPE_MAX |
168 | }; | 169 | }; |
169 | 170 | ||
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index d9ea33c361be..9fbc04a31ed6 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig | |||
@@ -19,6 +19,8 @@ config OPENVSWITCH | |||
19 | which is able to accept configuration from a variety of sources and | 19 | which is able to accept configuration from a variety of sources and |
20 | translate it into packet processing rules. | 20 | translate it into packet processing rules. |
21 | 21 | ||
22 | Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. | ||
23 | |||
22 | See http://openvswitch.org for more information and userspace | 24 | See http://openvswitch.org for more information and userspace |
23 | utilities. | 25 | utilities. |
24 | 26 | ||
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 15e7384745c1..01bddb2991e3 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile | |||
@@ -10,5 +10,6 @@ openvswitch-y := \ | |||
10 | dp_notify.o \ | 10 | dp_notify.o \ |
11 | flow.o \ | 11 | flow.o \ |
12 | vport.o \ | 12 | vport.o \ |
13 | vport-gre.o \ | ||
13 | vport-internal_dev.o \ | 14 | vport-internal_dev.o \ |
14 | vport-netdev.o \ | 15 | vport-netdev.o |
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index e88ebc2f1c54..a91486484916 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h | |||
@@ -122,6 +122,7 @@ struct dp_upcall_info { | |||
122 | struct ovs_net { | 122 | struct ovs_net { |
123 | struct list_head dps; | 123 | struct list_head dps; |
124 | struct work_struct dp_notify_work; | 124 | struct work_struct dp_notify_work; |
125 | struct vport_net vport_net; | ||
125 | }; | 126 | }; |
126 | 127 | ||
127 | extern int ovs_net_id; | 128 | extern int ovs_net_id; |
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 999842f247a0..66ef7220293e 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h | |||
@@ -49,11 +49,27 @@ struct ovs_key_ipv4_tunnel { | |||
49 | __be64 tun_id; | 49 | __be64 tun_id; |
50 | __be32 ipv4_src; | 50 | __be32 ipv4_src; |
51 | __be32 ipv4_dst; | 51 | __be32 ipv4_dst; |
52 | u16 tun_flags; | 52 | __be16 tun_flags; |
53 | u8 ipv4_tos; | 53 | u8 ipv4_tos; |
54 | u8 ipv4_ttl; | 54 | u8 ipv4_ttl; |
55 | }; | 55 | }; |
56 | 56 | ||
57 | static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, | ||
58 | const struct iphdr *iph, __be64 tun_id, | ||
59 | __be16 tun_flags) | ||
60 | { | ||
61 | tun_key->tun_id = tun_id; | ||
62 | tun_key->ipv4_src = iph->saddr; | ||
63 | tun_key->ipv4_dst = iph->daddr; | ||
64 | tun_key->ipv4_tos = iph->tos; | ||
65 | tun_key->ipv4_ttl = iph->ttl; | ||
66 | tun_key->tun_flags = tun_flags; | ||
67 | |||
68 | /* clear struct padding. */ | ||
69 | memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0, | ||
70 | sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE); | ||
71 | } | ||
72 | |||
57 | struct sw_flow_key { | 73 | struct sw_flow_key { |
58 | struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ | 74 | struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ |
59 | struct { | 75 | struct { |
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c new file mode 100644 index 000000000000..3a8d1900aa78 --- /dev/null +++ b/net/openvswitch/vport-gre.c | |||
@@ -0,0 +1,274 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007-2013 Nicira, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of version 2 of the GNU General Public | ||
6 | * License as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write to the Free Software | ||
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
16 | * 02110-1301, USA | ||
17 | */ | ||
18 | |||
19 | #ifdef CONFIG_NET_IPGRE_DEMUX | ||
20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
21 | |||
22 | #include <linux/if.h> | ||
23 | #include <linux/skbuff.h> | ||
24 | #include <linux/ip.h> | ||
25 | #include <linux/if_tunnel.h> | ||
26 | #include <linux/if_vlan.h> | ||
27 | #include <linux/in.h> | ||
28 | #include <linux/if_vlan.h> | ||
29 | #include <linux/in.h> | ||
30 | #include <linux/in_route.h> | ||
31 | #include <linux/inetdevice.h> | ||
32 | #include <linux/jhash.h> | ||
33 | #include <linux/list.h> | ||
34 | #include <linux/kernel.h> | ||
35 | #include <linux/workqueue.h> | ||
36 | #include <linux/rculist.h> | ||
37 | #include <net/route.h> | ||
38 | #include <net/xfrm.h> | ||
39 | |||
40 | #include <net/icmp.h> | ||
41 | #include <net/ip.h> | ||
42 | #include <net/ip_tunnels.h> | ||
43 | #include <net/gre.h> | ||
44 | #include <net/net_namespace.h> | ||
45 | #include <net/netns/generic.h> | ||
46 | #include <net/protocol.h> | ||
47 | |||
48 | #include "datapath.h" | ||
49 | #include "vport.h" | ||
50 | |||
51 | /* Returns the least-significant 32 bits of a __be64. */ | ||
52 | static __be32 be64_get_low32(__be64 x) | ||
53 | { | ||
54 | #ifdef __BIG_ENDIAN | ||
55 | return (__force __be32)x; | ||
56 | #else | ||
57 | return (__force __be32)((__force u64)x >> 32); | ||
58 | #endif | ||
59 | } | ||
60 | |||
61 | static __be16 filter_tnl_flags(__be16 flags) | ||
62 | { | ||
63 | return flags & (TUNNEL_CSUM | TUNNEL_KEY); | ||
64 | } | ||
65 | |||
66 | static struct sk_buff *__build_header(struct sk_buff *skb, | ||
67 | int tunnel_hlen) | ||
68 | { | ||
69 | const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; | ||
70 | struct tnl_ptk_info tpi; | ||
71 | |||
72 | skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); | ||
73 | if (IS_ERR(skb)) | ||
74 | return NULL; | ||
75 | |||
76 | tpi.flags = filter_tnl_flags(tun_key->tun_flags); | ||
77 | tpi.proto = htons(ETH_P_TEB); | ||
78 | tpi.key = be64_get_low32(tun_key->tun_id); | ||
79 | tpi.seq = 0; | ||
80 | gre_build_header(skb, &tpi, tunnel_hlen); | ||
81 | |||
82 | return skb; | ||
83 | } | ||
84 | |||
85 | static __be64 key_to_tunnel_id(__be32 key, __be32 seq) | ||
86 | { | ||
87 | #ifdef __BIG_ENDIAN | ||
88 | return (__force __be64)((__force u64)seq << 32 | (__force u32)key); | ||
89 | #else | ||
90 | return (__force __be64)((__force u64)key << 32 | (__force u32)seq); | ||
91 | #endif | ||
92 | } | ||
93 | |||
94 | /* Called with rcu_read_lock and BH disabled. */ | ||
95 | static int gre_rcv(struct sk_buff *skb, | ||
96 | const struct tnl_ptk_info *tpi) | ||
97 | { | ||
98 | struct ovs_key_ipv4_tunnel tun_key; | ||
99 | struct ovs_net *ovs_net; | ||
100 | struct vport *vport; | ||
101 | __be64 key; | ||
102 | |||
103 | ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); | ||
104 | vport = rcu_dereference(ovs_net->vport_net.gre_vport); | ||
105 | if (unlikely(!vport)) | ||
106 | return PACKET_REJECT; | ||
107 | |||
108 | key = key_to_tunnel_id(tpi->key, tpi->seq); | ||
109 | ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, | ||
110 | filter_tnl_flags(tpi->flags)); | ||
111 | |||
112 | ovs_vport_receive(vport, skb, &tun_key); | ||
113 | return PACKET_RCVD; | ||
114 | } | ||
115 | |||
116 | static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) | ||
117 | { | ||
118 | struct net *net = ovs_dp_get_net(vport->dp); | ||
119 | struct flowi4 fl; | ||
120 | struct rtable *rt; | ||
121 | int min_headroom; | ||
122 | int tunnel_hlen; | ||
123 | __be16 df; | ||
124 | int err; | ||
125 | |||
126 | if (unlikely(!OVS_CB(skb)->tun_key)) { | ||
127 | err = -EINVAL; | ||
128 | goto error; | ||
129 | } | ||
130 | |||
131 | /* Route lookup */ | ||
132 | memset(&fl, 0, sizeof(fl)); | ||
133 | fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst; | ||
134 | fl.saddr = OVS_CB(skb)->tun_key->ipv4_src; | ||
135 | fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos); | ||
136 | fl.flowi4_mark = skb->mark; | ||
137 | fl.flowi4_proto = IPPROTO_GRE; | ||
138 | |||
139 | rt = ip_route_output_key(net, &fl); | ||
140 | if (IS_ERR(rt)) | ||
141 | return PTR_ERR(rt); | ||
142 | |||
143 | tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags); | ||
144 | |||
145 | min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len | ||
146 | + tunnel_hlen + sizeof(struct iphdr) | ||
147 | + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); | ||
148 | if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { | ||
149 | int head_delta = SKB_DATA_ALIGN(min_headroom - | ||
150 | skb_headroom(skb) + | ||
151 | 16); | ||
152 | err = pskb_expand_head(skb, max_t(int, head_delta, 0), | ||
153 | 0, GFP_ATOMIC); | ||
154 | if (unlikely(err)) | ||
155 | goto err_free_rt; | ||
156 | } | ||
157 | |||
158 | if (vlan_tx_tag_present(skb)) { | ||
159 | if (unlikely(!__vlan_put_tag(skb, | ||
160 | skb->vlan_proto, | ||
161 | vlan_tx_tag_get(skb)))) { | ||
162 | err = -ENOMEM; | ||
163 | goto err_free_rt; | ||
164 | } | ||
165 | skb->vlan_tci = 0; | ||
166 | } | ||
167 | |||
168 | /* Push Tunnel header. */ | ||
169 | skb = __build_header(skb, tunnel_hlen); | ||
170 | if (unlikely(!skb)) { | ||
171 | err = 0; | ||
172 | goto err_free_rt; | ||
173 | } | ||
174 | |||
175 | df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? | ||
176 | htons(IP_DF) : 0; | ||
177 | |||
178 | skb->local_df = 1; | ||
179 | |||
180 | return iptunnel_xmit(net, rt, skb, fl.saddr, | ||
181 | OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, | ||
182 | OVS_CB(skb)->tun_key->ipv4_tos, | ||
183 | OVS_CB(skb)->tun_key->ipv4_ttl, df); | ||
184 | err_free_rt: | ||
185 | ip_rt_put(rt); | ||
186 | error: | ||
187 | return err; | ||
188 | } | ||
189 | |||
190 | static struct gre_cisco_protocol gre_protocol = { | ||
191 | .handler = gre_rcv, | ||
192 | .priority = 1, | ||
193 | }; | ||
194 | |||
195 | static int gre_ports; | ||
196 | static int gre_init(void) | ||
197 | { | ||
198 | int err; | ||
199 | |||
200 | gre_ports++; | ||
201 | if (gre_ports > 1) | ||
202 | return 0; | ||
203 | |||
204 | err = gre_cisco_register(&gre_protocol); | ||
205 | if (err) | ||
206 | pr_warn("cannot register gre protocol handler\n"); | ||
207 | |||
208 | return err; | ||
209 | } | ||
210 | |||
211 | static void gre_exit(void) | ||
212 | { | ||
213 | gre_ports--; | ||
214 | if (gre_ports > 0) | ||
215 | return; | ||
216 | |||
217 | gre_cisco_unregister(&gre_protocol); | ||
218 | } | ||
219 | |||
220 | static const char *gre_get_name(const struct vport *vport) | ||
221 | { | ||
222 | return vport_priv(vport); | ||
223 | } | ||
224 | |||
225 | static struct vport *gre_create(const struct vport_parms *parms) | ||
226 | { | ||
227 | struct net *net = ovs_dp_get_net(parms->dp); | ||
228 | struct ovs_net *ovs_net; | ||
229 | struct vport *vport; | ||
230 | int err; | ||
231 | |||
232 | err = gre_init(); | ||
233 | if (err) | ||
234 | return ERR_PTR(err); | ||
235 | |||
236 | ovs_net = net_generic(net, ovs_net_id); | ||
237 | if (ovsl_dereference(ovs_net->vport_net.gre_vport)) { | ||
238 | vport = ERR_PTR(-EEXIST); | ||
239 | goto error; | ||
240 | } | ||
241 | |||
242 | vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); | ||
243 | if (IS_ERR(vport)) | ||
244 | goto error; | ||
245 | |||
246 | strncpy(vport_priv(vport), parms->name, IFNAMSIZ); | ||
247 | rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); | ||
248 | return vport; | ||
249 | |||
250 | error: | ||
251 | gre_exit(); | ||
252 | return vport; | ||
253 | } | ||
254 | |||
255 | static void gre_tnl_destroy(struct vport *vport) | ||
256 | { | ||
257 | struct net *net = ovs_dp_get_net(vport->dp); | ||
258 | struct ovs_net *ovs_net; | ||
259 | |||
260 | ovs_net = net_generic(net, ovs_net_id); | ||
261 | |||
262 | rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); | ||
263 | ovs_vport_deferred_free(vport); | ||
264 | gre_exit(); | ||
265 | } | ||
266 | |||
267 | const struct vport_ops ovs_gre_vport_ops = { | ||
268 | .type = OVS_VPORT_TYPE_GRE, | ||
269 | .create = gre_create, | ||
270 | .destroy = gre_tnl_destroy, | ||
271 | .get_name = gre_get_name, | ||
272 | .send = gre_tnl_send, | ||
273 | }; | ||
274 | #endif | ||
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 413287a1877f..f52dfb9cb5a7 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c | |||
@@ -38,6 +38,10 @@ | |||
38 | static const struct vport_ops *vport_ops_list[] = { | 38 | static const struct vport_ops *vport_ops_list[] = { |
39 | &ovs_netdev_vport_ops, | 39 | &ovs_netdev_vport_ops, |
40 | &ovs_internal_vport_ops, | 40 | &ovs_internal_vport_ops, |
41 | |||
42 | #ifdef CONFIG_NET_IPGRE_DEMUX | ||
43 | &ovs_gre_vport_ops, | ||
44 | #endif | ||
41 | }; | 45 | }; |
42 | 46 | ||
43 | /* Protected by RCU read lock for reading, ovs_mutex for writing. */ | 47 | /* Protected by RCU read lock for reading, ovs_mutex for writing. */ |
@@ -404,3 +408,18 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) | |||
404 | 408 | ||
405 | spin_unlock(&vport->stats_lock); | 409 | spin_unlock(&vport->stats_lock); |
406 | } | 410 | } |
411 | |||
412 | static void free_vport_rcu(struct rcu_head *rcu) | ||
413 | { | ||
414 | struct vport *vport = container_of(rcu, struct vport, rcu); | ||
415 | |||
416 | ovs_vport_free(vport); | ||
417 | } | ||
418 | |||
419 | void ovs_vport_deferred_free(struct vport *vport) | ||
420 | { | ||
421 | if (!vport) | ||
422 | return; | ||
423 | |||
424 | call_rcu(&vport->rcu, free_vport_rcu); | ||
425 | } | ||
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 2d961aedd71d..376045c42f8b 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h | |||
@@ -34,6 +34,11 @@ struct vport_parms; | |||
34 | 34 | ||
35 | /* The following definitions are for users of the vport subsytem: */ | 35 | /* The following definitions are for users of the vport subsytem: */ |
36 | 36 | ||
37 | /* The following definitions are for users of the vport subsytem: */ | ||
38 | struct vport_net { | ||
39 | struct vport __rcu *gre_vport; | ||
40 | }; | ||
41 | |||
37 | int ovs_vport_init(void); | 42 | int ovs_vport_init(void); |
38 | void ovs_vport_exit(void); | 43 | void ovs_vport_exit(void); |
39 | 44 | ||
@@ -152,6 +157,7 @@ enum vport_err_type { | |||
152 | struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, | 157 | struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, |
153 | const struct vport_parms *); | 158 | const struct vport_parms *); |
154 | void ovs_vport_free(struct vport *); | 159 | void ovs_vport_free(struct vport *); |
160 | void ovs_vport_deferred_free(struct vport *vport); | ||
155 | 161 | ||
156 | #define VPORT_ALIGN 8 | 162 | #define VPORT_ALIGN 8 |
157 | 163 | ||
@@ -192,6 +198,7 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); | |||
192 | * add yours to the list at the top of vport.c. */ | 198 | * add yours to the list at the top of vport.c. */ |
193 | extern const struct vport_ops ovs_netdev_vport_ops; | 199 | extern const struct vport_ops ovs_netdev_vport_ops; |
194 | extern const struct vport_ops ovs_internal_vport_ops; | 200 | extern const struct vport_ops ovs_internal_vport_ops; |
201 | extern const struct vport_ops ovs_gre_vport_ops; | ||
195 | 202 | ||
196 | static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, | 203 | static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, |
197 | const void *start, unsigned int len) | 204 | const void *start, unsigned int len) |