diff options
author | Andy Zhou <azhou@nicira.com> | 2014-10-03 18:35:28 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-10-06 00:32:20 -0400 |
commit | 0b5e8b8eeae40bae6ad7c7e91c97c3c0d0e57882 (patch) | |
tree | 1e3263634ab52faac57459120033776cf1a08542 /net/ipv4/geneve.c | |
parent | c259c132ad284576ab44308d5d17ea6a16c971b5 (diff) |
net: Add Geneve tunneling protocol driver
This adds a device level support for Geneve -- Generic Network
Virtualization Encapsulation. The protocol is documented at
http://tools.ietf.org/html/draft-gross-geneve-01
Only protocol layer Geneve support is provided by this driver.
Openvswitch can be used for configuring, set up and tear down
functional Geneve tunnels.
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/geneve.c')
-rw-r--r-- | net/ipv4/geneve.c | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c new file mode 100644 index 000000000000..f008c5515f48 --- /dev/null +++ b/net/ipv4/geneve.c | |||
@@ -0,0 +1,373 @@ | |||
1 | /* | ||
2 | * Geneve: Generic Network Virtualization Encapsulation | ||
3 | * | ||
4 | * Copyright (c) 2014 Nicira, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/types.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/skbuff.h> | ||
20 | #include <linux/rculist.h> | ||
21 | #include <linux/netdevice.h> | ||
22 | #include <linux/in.h> | ||
23 | #include <linux/ip.h> | ||
24 | #include <linux/udp.h> | ||
25 | #include <linux/igmp.h> | ||
26 | #include <linux/etherdevice.h> | ||
27 | #include <linux/if_ether.h> | ||
28 | #include <linux/if_vlan.h> | ||
29 | #include <linux/hash.h> | ||
30 | #include <linux/ethtool.h> | ||
31 | #include <net/arp.h> | ||
32 | #include <net/ndisc.h> | ||
33 | #include <net/ip.h> | ||
34 | #include <net/ip_tunnels.h> | ||
35 | #include <net/icmp.h> | ||
36 | #include <net/udp.h> | ||
37 | #include <net/rtnetlink.h> | ||
38 | #include <net/route.h> | ||
39 | #include <net/dsfield.h> | ||
40 | #include <net/inet_ecn.h> | ||
41 | #include <net/net_namespace.h> | ||
42 | #include <net/netns/generic.h> | ||
43 | #include <net/geneve.h> | ||
44 | #include <net/protocol.h> | ||
45 | #include <net/udp_tunnel.h> | ||
46 | #if IS_ENABLED(CONFIG_IPV6) | ||
47 | #include <net/ipv6.h> | ||
48 | #include <net/addrconf.h> | ||
49 | #include <net/ip6_tunnel.h> | ||
50 | #include <net/ip6_checksum.h> | ||
51 | #endif | ||
52 | |||
53 | #define PORT_HASH_BITS 8 | ||
54 | #define PORT_HASH_SIZE (1<<PORT_HASH_BITS) | ||
55 | |||
56 | /* per-network namespace private data for this module */ | ||
57 | struct geneve_net { | ||
58 | struct hlist_head sock_list[PORT_HASH_SIZE]; | ||
59 | spinlock_t sock_lock; /* Protects sock_list */ | ||
60 | }; | ||
61 | |||
62 | static int geneve_net_id; | ||
63 | |||
64 | static struct workqueue_struct *geneve_wq; | ||
65 | |||
66 | static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) | ||
67 | { | ||
68 | return (struct genevehdr *)(udp_hdr(skb) + 1); | ||
69 | } | ||
70 | |||
71 | static struct hlist_head *gs_head(struct net *net, __be16 port) | ||
72 | { | ||
73 | struct geneve_net *gn = net_generic(net, geneve_net_id); | ||
74 | |||
75 | return &gn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; | ||
76 | } | ||
77 | |||
78 | /* Find geneve socket based on network namespace and UDP port */ | ||
79 | static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) | ||
80 | { | ||
81 | struct geneve_sock *gs; | ||
82 | |||
83 | hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) { | ||
84 | if (inet_sk(gs->sock->sk)->inet_sport == port) | ||
85 | return gs; | ||
86 | } | ||
87 | |||
88 | return NULL; | ||
89 | } | ||
90 | |||
91 | static void geneve_build_header(struct genevehdr *geneveh, | ||
92 | __be16 tun_flags, u8 vni[3], | ||
93 | u8 options_len, u8 *options) | ||
94 | { | ||
95 | geneveh->ver = GENEVE_VER; | ||
96 | geneveh->opt_len = options_len / 4; | ||
97 | geneveh->oam = !!(tun_flags & TUNNEL_OAM); | ||
98 | geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); | ||
99 | geneveh->rsvd1 = 0; | ||
100 | memcpy(geneveh->vni, vni, 3); | ||
101 | geneveh->proto_type = htons(ETH_P_TEB); | ||
102 | geneveh->rsvd2 = 0; | ||
103 | |||
104 | memcpy(geneveh->options, options, options_len); | ||
105 | } | ||
106 | |||
107 | /* Transmit a fully formated Geneve frame. | ||
108 | * | ||
109 | * When calling this function. The skb->data should point | ||
110 | * to the geneve header which is fully formed. | ||
111 | * | ||
112 | * This function will add other UDP tunnel headers. | ||
113 | */ | ||
114 | int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, | ||
115 | struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, | ||
116 | __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, | ||
117 | __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, | ||
118 | bool xnet) | ||
119 | { | ||
120 | struct genevehdr *gnvh; | ||
121 | int min_headroom; | ||
122 | int err; | ||
123 | |||
124 | skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); | ||
125 | |||
126 | min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len | ||
127 | + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) | ||
128 | + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); | ||
129 | |||
130 | err = skb_cow_head(skb, min_headroom); | ||
131 | if (unlikely(err)) | ||
132 | return err; | ||
133 | |||
134 | if (vlan_tx_tag_present(skb)) { | ||
135 | if (unlikely(!__vlan_put_tag(skb, | ||
136 | skb->vlan_proto, | ||
137 | vlan_tx_tag_get(skb)))) { | ||
138 | err = -ENOMEM; | ||
139 | return err; | ||
140 | } | ||
141 | skb->vlan_tci = 0; | ||
142 | } | ||
143 | |||
144 | gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); | ||
145 | geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); | ||
146 | |||
147 | return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, | ||
148 | tos, ttl, df, src_port, dst_port, xnet); | ||
149 | } | ||
150 | EXPORT_SYMBOL_GPL(geneve_xmit_skb); | ||
151 | |||
152 | static void geneve_notify_add_rx_port(struct geneve_sock *gs) | ||
153 | { | ||
154 | struct sock *sk = gs->sock->sk; | ||
155 | sa_family_t sa_family = sk->sk_family; | ||
156 | int err; | ||
157 | |||
158 | if (sa_family == AF_INET) { | ||
159 | err = udp_add_offload(&gs->udp_offloads); | ||
160 | if (err) | ||
161 | pr_warn("geneve: udp_add_offload failed with status %d\n", | ||
162 | err); | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* Callback from net/ipv4/udp.c to receive packets */ | ||
167 | static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) | ||
168 | { | ||
169 | struct genevehdr *geneveh; | ||
170 | struct geneve_sock *gs; | ||
171 | int opts_len; | ||
172 | |||
173 | /* Need Geneve and inner Ethernet header to be present */ | ||
174 | if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) | ||
175 | goto error; | ||
176 | |||
177 | /* Return packets with reserved bits set */ | ||
178 | geneveh = geneve_hdr(skb); | ||
179 | |||
180 | if (unlikely(geneveh->ver != GENEVE_VER)) | ||
181 | goto error; | ||
182 | |||
183 | if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) | ||
184 | goto error; | ||
185 | |||
186 | opts_len = geneveh->opt_len * 4; | ||
187 | if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, | ||
188 | htons(ETH_P_TEB))) | ||
189 | goto drop; | ||
190 | |||
191 | gs = rcu_dereference_sk_user_data(sk); | ||
192 | if (!gs) | ||
193 | goto drop; | ||
194 | |||
195 | gs->rcv(gs, skb); | ||
196 | return 0; | ||
197 | |||
198 | drop: | ||
199 | /* Consume bad packet */ | ||
200 | kfree_skb(skb); | ||
201 | return 0; | ||
202 | |||
203 | error: | ||
204 | /* Let the UDP layer deal with the skb */ | ||
205 | return 1; | ||
206 | } | ||
207 | |||
208 | static void geneve_del_work(struct work_struct *work) | ||
209 | { | ||
210 | struct geneve_sock *gs = container_of(work, struct geneve_sock, | ||
211 | del_work); | ||
212 | |||
213 | udp_tunnel_sock_release(gs->sock); | ||
214 | kfree_rcu(gs, rcu); | ||
215 | } | ||
216 | |||
217 | static struct socket *geneve_create_sock(struct net *net, bool ipv6, | ||
218 | __be16 port) | ||
219 | { | ||
220 | struct socket *sock; | ||
221 | struct udp_port_cfg udp_conf; | ||
222 | int err; | ||
223 | |||
224 | memset(&udp_conf, 0, sizeof(udp_conf)); | ||
225 | |||
226 | if (ipv6) { | ||
227 | udp_conf.family = AF_INET6; | ||
228 | } else { | ||
229 | udp_conf.family = AF_INET; | ||
230 | udp_conf.local_ip.s_addr = INADDR_ANY; | ||
231 | } | ||
232 | |||
233 | udp_conf.local_udp_port = port; | ||
234 | |||
235 | /* Open UDP socket */ | ||
236 | err = udp_sock_create(net, &udp_conf, &sock); | ||
237 | if (err < 0) | ||
238 | return ERR_PTR(err); | ||
239 | |||
240 | return sock; | ||
241 | } | ||
242 | |||
243 | /* Create new listen socket if needed */ | ||
244 | static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, | ||
245 | geneve_rcv_t *rcv, void *data, | ||
246 | bool ipv6) | ||
247 | { | ||
248 | struct geneve_net *gn = net_generic(net, geneve_net_id); | ||
249 | struct geneve_sock *gs; | ||
250 | struct socket *sock; | ||
251 | struct udp_tunnel_sock_cfg tunnel_cfg; | ||
252 | |||
253 | gs = kzalloc(sizeof(*gs), GFP_KERNEL); | ||
254 | if (!gs) | ||
255 | return ERR_PTR(-ENOMEM); | ||
256 | |||
257 | INIT_WORK(&gs->del_work, geneve_del_work); | ||
258 | |||
259 | sock = geneve_create_sock(net, ipv6, port); | ||
260 | if (IS_ERR(sock)) { | ||
261 | kfree(gs); | ||
262 | return ERR_CAST(sock); | ||
263 | } | ||
264 | |||
265 | gs->sock = sock; | ||
266 | atomic_set(&gs->refcnt, 1); | ||
267 | gs->rcv = rcv; | ||
268 | gs->rcv_data = data; | ||
269 | |||
270 | /* Initialize the geneve udp offloads structure */ | ||
271 | gs->udp_offloads.port = port; | ||
272 | gs->udp_offloads.callbacks.gro_receive = NULL; | ||
273 | gs->udp_offloads.callbacks.gro_complete = NULL; | ||
274 | |||
275 | spin_lock(&gn->sock_lock); | ||
276 | hlist_add_head_rcu(&gs->hlist, gs_head(net, port)); | ||
277 | geneve_notify_add_rx_port(gs); | ||
278 | spin_unlock(&gn->sock_lock); | ||
279 | |||
280 | /* Mark socket as an encapsulation socket */ | ||
281 | tunnel_cfg.sk_user_data = gs; | ||
282 | tunnel_cfg.encap_type = 1; | ||
283 | tunnel_cfg.encap_rcv = geneve_udp_encap_recv; | ||
284 | tunnel_cfg.encap_destroy = NULL; | ||
285 | setup_udp_tunnel_sock(net, sock, &tunnel_cfg); | ||
286 | |||
287 | return gs; | ||
288 | } | ||
289 | |||
290 | struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, | ||
291 | geneve_rcv_t *rcv, void *data, | ||
292 | bool no_share, bool ipv6) | ||
293 | { | ||
294 | struct geneve_sock *gs; | ||
295 | |||
296 | gs = geneve_socket_create(net, port, rcv, data, ipv6); | ||
297 | if (!IS_ERR(gs)) | ||
298 | return gs; | ||
299 | |||
300 | if (no_share) /* Return error if sharing is not allowed. */ | ||
301 | return ERR_PTR(-EINVAL); | ||
302 | |||
303 | gs = geneve_find_sock(net, port); | ||
304 | if (gs) { | ||
305 | if (gs->rcv == rcv) | ||
306 | atomic_inc(&gs->refcnt); | ||
307 | else | ||
308 | gs = ERR_PTR(-EBUSY); | ||
309 | } else { | ||
310 | gs = ERR_PTR(-EINVAL); | ||
311 | } | ||
312 | |||
313 | return gs; | ||
314 | } | ||
315 | EXPORT_SYMBOL_GPL(geneve_sock_add); | ||
316 | |||
317 | void geneve_sock_release(struct geneve_sock *gs) | ||
318 | { | ||
319 | if (!atomic_dec_and_test(&gs->refcnt)) | ||
320 | return; | ||
321 | |||
322 | queue_work(geneve_wq, &gs->del_work); | ||
323 | } | ||
324 | EXPORT_SYMBOL_GPL(geneve_sock_release); | ||
325 | |||
326 | static __net_init int geneve_init_net(struct net *net) | ||
327 | { | ||
328 | struct geneve_net *gn = net_generic(net, geneve_net_id); | ||
329 | unsigned int h; | ||
330 | |||
331 | spin_lock_init(&gn->sock_lock); | ||
332 | |||
333 | for (h = 0; h < PORT_HASH_SIZE; ++h) | ||
334 | INIT_HLIST_HEAD(&gn->sock_list[h]); | ||
335 | |||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | static struct pernet_operations geneve_net_ops = { | ||
340 | .init = geneve_init_net, | ||
341 | .exit = NULL, | ||
342 | .id = &geneve_net_id, | ||
343 | .size = sizeof(struct geneve_net), | ||
344 | }; | ||
345 | |||
346 | static int __init geneve_init_module(void) | ||
347 | { | ||
348 | int rc; | ||
349 | |||
350 | geneve_wq = alloc_workqueue("geneve", 0, 0); | ||
351 | if (!geneve_wq) | ||
352 | return -ENOMEM; | ||
353 | |||
354 | rc = register_pernet_subsys(&geneve_net_ops); | ||
355 | if (rc) | ||
356 | return rc; | ||
357 | |||
358 | pr_info("Geneve driver\n"); | ||
359 | |||
360 | return 0; | ||
361 | } | ||
362 | late_initcall(geneve_init_module); | ||
363 | |||
364 | static void __exit geneve_cleanup_module(void) | ||
365 | { | ||
366 | destroy_workqueue(geneve_wq); | ||
367 | } | ||
368 | module_exit(geneve_cleanup_module); | ||
369 | |||
370 | MODULE_LICENSE("GPL"); | ||
371 | MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>"); | ||
372 | MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic"); | ||
373 | MODULE_ALIAS_RTNL_LINK("geneve"); | ||