aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
authorJesse Gross <jesse@nicira.com>2014-10-03 18:35:33 -0400
committerDavid S. Miller <davem@davemloft.net>2014-10-06 00:32:21 -0400
commitf5796684069e0c71c65bce6a6d4766114aec1396 (patch)
treea4906c8cba2b6c2d116e7b72a71f9e1020b476cb /net/openvswitch
parent6b205b2ca17e88ef5e10451b720056b790cc63a5 (diff)
openvswitch: Add support for Geneve tunneling.
The Openvswitch implementation is completely agnostic to the options that are in use and can handle newly defined options without further work. It does this by simply matching on a byte array of options and allowing userspace to setup flows on this array. Signed-off-by: Jesse Gross <jesse@nicira.com> Singed-off-by: Ansis Atteka <aatteka@nicira.com> Signed-off-by: Andy Zhou <azhou@nicira.com> Acked-by: Thomas Graf <tgraf@noironetworks.com> Acked-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/Kconfig11
-rw-r--r--net/openvswitch/Makefile4
-rw-r--r--net/openvswitch/datapath.c5
-rw-r--r--net/openvswitch/flow.c20
-rw-r--r--net/openvswitch/flow.h20
-rw-r--r--net/openvswitch/flow_netlink.c176
-rw-r--r--net/openvswitch/vport-geneve.c236
-rw-r--r--net/openvswitch/vport-gre.c2
-rw-r--r--net/openvswitch/vport-vxlan.c2
-rw-r--r--net/openvswitch/vport.c3
-rw-r--r--net/openvswitch/vport.h1
11 files changed, 448 insertions, 32 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 6ecf491ad509..ba3bb8203b99 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -54,3 +54,14 @@ config OPENVSWITCH_VXLAN
54 Say N to exclude this support and reduce the binary size. 54 Say N to exclude this support and reduce the binary size.
55 55
56 If unsure, say Y. 56 If unsure, say Y.
57
58config OPENVSWITCH_GENEVE
59 bool "Open vSwitch Geneve tunneling support"
60 depends on INET
61 depends on OPENVSWITCH
62 depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m)
63 default y
64 ---help---
65 If you say Y here, then the Open vSwitch will be able create geneve vport.
66
67 Say N to exclude this support and reduce the binary size.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 3591cb5dae91..9a33a273c375 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,6 +15,10 @@ openvswitch-y := \
15 vport-internal_dev.o \ 15 vport-internal_dev.o \
16 vport-netdev.o 16 vport-netdev.o
17 17
18ifneq ($(CONFIG_OPENVSWITCH_GENEVE),)
19openvswitch-y += vport-geneve.o
20endif
21
18ifneq ($(CONFIG_OPENVSWITCH_VXLAN),) 22ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
19openvswitch-y += vport-vxlan.o 23openvswitch-y += vport-vxlan.o
20endif 24endif
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 010125c48244..2e31d9e7f4dc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -370,6 +370,7 @@ static size_t key_attr_size(void)
370 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 370 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
371 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 371 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
372 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 372 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
373 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
373 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 374 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
374 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 375 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
375 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 376 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
@@ -556,10 +557,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
556 557
557 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 558 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
558 &flow->key, 0, &acts); 559 &flow->key, 0, &acts);
559 rcu_assign_pointer(flow->sf_acts, acts);
560 if (err) 560 if (err)
561 goto err_flow_free; 561 goto err_flow_free;
562 562
563 rcu_assign_pointer(flow->sf_acts, acts);
564
565 OVS_CB(packet)->egress_tun_info = NULL;
563 OVS_CB(packet)->flow = flow; 566 OVS_CB(packet)->flow = flow;
564 packet->priority = flow->key.phy.priority; 567 packet->priority = flow->key.phy.priority;
565 packet->mark = flow->key.phy.skb_mark; 568 packet->mark = flow->key.phy.skb_mark;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2924cb340868..62db02ba36bc 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -448,6 +448,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
448 int error; 448 int error;
449 struct ethhdr *eth; 449 struct ethhdr *eth;
450 450
451 /* Flags are always used as part of stats */
452 key->tp.flags = 0;
453
451 skb_reset_mac_header(skb); 454 skb_reset_mac_header(skb);
452 455
453 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet 456 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
@@ -646,10 +649,23 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
646 struct sk_buff *skb, struct sw_flow_key *key) 649 struct sk_buff *skb, struct sw_flow_key *key)
647{ 650{
648 /* Extract metadata from packet. */ 651 /* Extract metadata from packet. */
649 if (tun_info) 652 if (tun_info) {
650 memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key)); 653 memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
651 else 654
655 if (tun_info->options) {
656 BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
657 8)) - 1
658 > sizeof(key->tun_opts));
659 memcpy(GENEVE_OPTS(key, tun_info->options_len),
660 tun_info->options, tun_info->options_len);
661 key->tun_opts_len = tun_info->options_len;
662 } else {
663 key->tun_opts_len = 0;
664 }
665 } else {
666 key->tun_opts_len = 0;
652 memset(&key->tun_key, 0, sizeof(key->tun_key)); 667 memset(&key->tun_key, 0, sizeof(key->tun_key));
668 }
653 669
654 key->phy.priority = skb->priority; 670 key->phy.priority = skb->priority;
655 key->phy.in_port = OVS_CB(skb)->input_vport->port_no; 671 key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index fe5a71b81c1f..71813318c8c7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -51,11 +51,24 @@ struct ovs_key_ipv4_tunnel {
51 51
52struct ovs_tunnel_info { 52struct ovs_tunnel_info {
53 struct ovs_key_ipv4_tunnel tunnel; 53 struct ovs_key_ipv4_tunnel tunnel;
54 struct geneve_opt *options;
55 u8 options_len;
54}; 56};
55 57
58/* Store options at the end of the array if they are less than the
59 * maximum size. This allows us to get the benefits of variable length
60 * matching for small options.
61 */
62#define GENEVE_OPTS(flow_key, opt_len) \
63 ((struct geneve_opt *)((flow_key)->tun_opts + \
64 FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
65 opt_len))
66
56static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, 67static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
57 const struct iphdr *iph, 68 const struct iphdr *iph,
58 __be64 tun_id, __be16 tun_flags) 69 __be64 tun_id, __be16 tun_flags,
70 struct geneve_opt *opts,
71 u8 opts_len)
59{ 72{
60 tun_info->tunnel.tun_id = tun_id; 73 tun_info->tunnel.tun_id = tun_id;
61 tun_info->tunnel.ipv4_src = iph->saddr; 74 tun_info->tunnel.ipv4_src = iph->saddr;
@@ -67,9 +80,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
67 /* clear struct padding. */ 80 /* clear struct padding. */
68 memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, 81 memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
69 sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); 82 sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
83
84 tun_info->options = opts;
85 tun_info->options_len = opts_len;
70} 86}
71 87
72struct sw_flow_key { 88struct sw_flow_key {
89 u8 tun_opts[255];
90 u8 tun_opts_len;
73 struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ 91 struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
74 struct { 92 struct {
75 u32 priority; /* Packet QoS priority. */ 93 u32 priority; /* Packet QoS priority. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 5d6194d9dadc..368f23307911 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -42,6 +42,7 @@
42#include <linux/icmp.h> 42#include <linux/icmp.h>
43#include <linux/icmpv6.h> 43#include <linux/icmpv6.h>
44#include <linux/rculist.h> 44#include <linux/rculist.h>
45#include <net/geneve.h>
45#include <net/ip.h> 46#include <net/ip.h>
46#include <net/ipv6.h> 47#include <net/ipv6.h>
47#include <net/ndisc.h> 48#include <net/ndisc.h>
@@ -88,18 +89,20 @@ static void update_range__(struct sw_flow_match *match,
88 } \ 89 } \
89 } while (0) 90 } while (0)
90 91
91#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 92#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
92 do { \ 93 do { \
93 update_range__(match, offsetof(struct sw_flow_key, field), \ 94 update_range__(match, offset, len, is_mask); \
94 len, is_mask); \ 95 if (is_mask) \
95 if (is_mask) { \ 96 memcpy((u8 *)&(match)->mask->key + offset, value_p, \
96 if ((match)->mask) \ 97 len); \
97 memcpy(&(match)->mask->key.field, value_p, len);\ 98 else \
98 } else { \ 99 memcpy((u8 *)(match)->key + offset, value_p, len); \
99 memcpy(&(match)->key->field, value_p, len); \
100 } \
101 } while (0) 100 } while (0)
102 101
102#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
103 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
104 value_p, len, is_mask)
105
103static u16 range_n_bytes(const struct sw_flow_key_range *range) 106static u16 range_n_bytes(const struct sw_flow_key_range *range)
104{ 107{
105 return range->end - range->start; 108 return range->end - range->start;
@@ -335,6 +338,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
335 int rem; 338 int rem;
336 bool ttl = false; 339 bool ttl = false;
337 __be16 tun_flags = 0; 340 __be16 tun_flags = 0;
341 unsigned long opt_key_offset;
338 342
339 nla_for_each_nested(a, attr, rem) { 343 nla_for_each_nested(a, attr, rem) {
340 int type = nla_type(a); 344 int type = nla_type(a);
@@ -347,6 +351,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
347 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 351 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
348 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 352 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
349 [OVS_TUNNEL_KEY_ATTR_OAM] = 0, 353 [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
354 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
350 }; 355 };
351 356
352 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 357 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -355,7 +360,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
355 return -EINVAL; 360 return -EINVAL;
356 } 361 }
357 362
358 if (ovs_tunnel_key_lens[type] != nla_len(a)) { 363 if (ovs_tunnel_key_lens[type] != nla_len(a) &&
364 ovs_tunnel_key_lens[type] != -1) {
359 OVS_NLERR("IPv4 tunnel attribute type has unexpected " 365 OVS_NLERR("IPv4 tunnel attribute type has unexpected "
360 " length (type=%d, length=%d, expected=%d).\n", 366 " length (type=%d, length=%d, expected=%d).\n",
361 type, nla_len(a), ovs_tunnel_key_lens[type]); 367 type, nla_len(a), ovs_tunnel_key_lens[type]);
@@ -394,7 +400,60 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
394 case OVS_TUNNEL_KEY_ATTR_OAM: 400 case OVS_TUNNEL_KEY_ATTR_OAM:
395 tun_flags |= TUNNEL_OAM; 401 tun_flags |= TUNNEL_OAM;
396 break; 402 break;
403 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
404 tun_flags |= TUNNEL_OPTIONS_PRESENT;
405 if (nla_len(a) > sizeof(match->key->tun_opts)) {
406 OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n",
407 nla_len(a),
408 sizeof(match->key->tun_opts));
409 return -EINVAL;
410 }
411
412 if (nla_len(a) % 4 != 0) {
413 OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n",
414 nla_len(a));
415 return -EINVAL;
416 }
417
418 /* We need to record the length of the options passed
419 * down, otherwise packets with the same format but
420 * additional options will be silently matched.
421 */
422 if (!is_mask) {
423 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
424 false);
425 } else {
426 /* This is somewhat unusual because it looks at
427 * both the key and mask while parsing the
428 * attributes (and by extension assumes the key
429 * is parsed first). Normally, we would verify
430 * that each is the correct length and that the
431 * attributes line up in the validate function.
432 * However, that is difficult because this is
433 * variable length and we won't have the
434 * information later.
435 */
436 if (match->key->tun_opts_len != nla_len(a)) {
437 OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).",
438 match->key->tun_opts_len,
439 nla_len(a));
440 return -EINVAL;
441 }
442
443 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
444 true);
445 }
446
447 opt_key_offset = (unsigned long)GENEVE_OPTS(
448 (struct sw_flow_key *)0,
449 nla_len(a));
450 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset,
451 nla_data(a), nla_len(a),
452 is_mask);
453 break;
397 default: 454 default:
455 OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n",
456 type);
398 return -EINVAL; 457 return -EINVAL;
399 } 458 }
400 } 459 }
@@ -421,16 +480,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
421 return 0; 480 return 0;
422} 481}
423 482
424static int ipv4_tun_to_nlattr(struct sk_buff *skb, 483static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
425 const struct ovs_key_ipv4_tunnel *tun_key, 484 const struct ovs_key_ipv4_tunnel *output,
426 const struct ovs_key_ipv4_tunnel *output) 485 const struct geneve_opt *tun_opts,
486 int swkey_tun_opts_len)
427{ 487{
428 struct nlattr *nla;
429
430 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
431 if (!nla)
432 return -EMSGSIZE;
433
434 if (output->tun_flags & TUNNEL_KEY && 488 if (output->tun_flags & TUNNEL_KEY &&
435 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 489 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
436 return -EMSGSIZE; 490 return -EMSGSIZE;
@@ -454,12 +508,35 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
454 if ((output->tun_flags & TUNNEL_OAM) && 508 if ((output->tun_flags & TUNNEL_OAM) &&
455 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 509 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
456 return -EMSGSIZE; 510 return -EMSGSIZE;
511 if (tun_opts &&
512 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
513 swkey_tun_opts_len, tun_opts))
514 return -EMSGSIZE;
457 515
458 nla_nest_end(skb, nla);
459 return 0; 516 return 0;
460} 517}
461 518
462 519
520static int ipv4_tun_to_nlattr(struct sk_buff *skb,
521 const struct ovs_key_ipv4_tunnel *output,
522 const struct geneve_opt *tun_opts,
523 int swkey_tun_opts_len)
524{
525 struct nlattr *nla;
526 int err;
527
528 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
529 if (!nla)
530 return -EMSGSIZE;
531
532 err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
533 if (err)
534 return err;
535
536 nla_nest_end(skb, nla);
537 return 0;
538}
539
463static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 540static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
464 const struct nlattr **a, bool is_mask) 541 const struct nlattr **a, bool is_mask)
465{ 542{
@@ -905,9 +982,16 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
905 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 982 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
906 goto nla_put_failure; 983 goto nla_put_failure;
907 984
908 if ((swkey->tun_key.ipv4_dst || is_mask) && 985 if ((swkey->tun_key.ipv4_dst || is_mask)) {
909 ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) 986 const struct geneve_opt *opts = NULL;
910 goto nla_put_failure; 987
988 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
989 opts = GENEVE_OPTS(output, swkey->tun_opts_len);
990
991 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
992 swkey->tun_opts_len))
993 goto nla_put_failure;
994 }
911 995
912 if (swkey->phy.in_port == DP_MAX_PORTS) { 996 if (swkey->phy.in_port == DP_MAX_PORTS) {
913 if (is_mask && (output->phy.in_port == 0xffff)) 997 if (is_mask && (output->phy.in_port == 0xffff))
@@ -1290,17 +1374,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1290 if (err) 1374 if (err)
1291 return err; 1375 return err;
1292 1376
1377 if (key.tun_opts_len) {
1378 struct geneve_opt *option = GENEVE_OPTS(&key,
1379 key.tun_opts_len);
1380 int opts_len = key.tun_opts_len;
1381 bool crit_opt = false;
1382
1383 while (opts_len > 0) {
1384 int len;
1385
1386 if (opts_len < sizeof(*option))
1387 return -EINVAL;
1388
1389 len = sizeof(*option) + option->length * 4;
1390 if (len > opts_len)
1391 return -EINVAL;
1392
1393 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
1394
1395 option = (struct geneve_opt *)((u8 *)option + len);
1396 opts_len -= len;
1397 };
1398
1399 key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
1400 };
1401
1293 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); 1402 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
1294 if (start < 0) 1403 if (start < 0)
1295 return start; 1404 return start;
1296 1405
1297 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 1406 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
1298 sizeof(*tun_info)); 1407 sizeof(*tun_info) + key.tun_opts_len);
1299 if (IS_ERR(a)) 1408 if (IS_ERR(a))
1300 return PTR_ERR(a); 1409 return PTR_ERR(a);
1301 1410
1302 tun_info = nla_data(a); 1411 tun_info = nla_data(a);
1303 tun_info->tunnel = key.tun_key; 1412 tun_info->tunnel = key.tun_key;
1413 tun_info->options_len = key.tun_opts_len;
1414
1415 if (tun_info->options_len) {
1416 /* We need to store the options in the action itself since
1417 * everything else will go away after flow setup. We can append
1418 * it to tun_info and then point there.
1419 */
1420 memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
1421 key.tun_opts_len);
1422 tun_info->options = (struct geneve_opt *)(tun_info + 1);
1423 } else {
1424 tun_info->options = NULL;
1425 }
1304 1426
1305 add_nested_action_end(*sfa, start); 1427 add_nested_action_end(*sfa, start);
1306 1428
@@ -1592,7 +1714,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1592 return -EMSGSIZE; 1714 return -EMSGSIZE;
1593 1715
1594 err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, 1716 err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
1595 nla_data(ovs_key)); 1717 tun_info->options_len ?
1718 tun_info->options : NULL,
1719 tun_info->options_len);
1596 if (err) 1720 if (err)
1597 return err; 1721 return err;
1598 nla_nest_end(skb, start); 1722 nla_nest_end(skb, start);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
new file mode 100644
index 000000000000..5572d482f285
--- /dev/null
+++ b/net/openvswitch/vport-geneve.c
@@ -0,0 +1,236 @@
1/*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/version.h>
13
14#include <linux/in.h>
15#include <linux/ip.h>
16#include <linux/net.h>
17#include <linux/rculist.h>
18#include <linux/udp.h>
19#include <linux/if_vlan.h>
20
21#include <net/geneve.h>
22#include <net/icmp.h>
23#include <net/ip.h>
24#include <net/route.h>
25#include <net/udp.h>
26#include <net/xfrm.h>
27
28#include "datapath.h"
29#include "vport.h"
30
31/**
32 * struct geneve_port - Keeps track of open UDP ports
33 * @sock: The socket created for this port number.
34 * @name: vport name.
35 */
36struct geneve_port {
37 struct geneve_sock *gs;
38 char name[IFNAMSIZ];
39};
40
41static LIST_HEAD(geneve_ports);
42
43static inline struct geneve_port *geneve_vport(const struct vport *vport)
44{
45 return vport_priv(vport);
46}
47
48static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
49{
50 return (struct genevehdr *)(udp_hdr(skb) + 1);
51}
52
53/* Convert 64 bit tunnel ID to 24 bit VNI. */
54static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
55{
56#ifdef __BIG_ENDIAN
57 vni[0] = (__force __u8)(tun_id >> 16);
58 vni[1] = (__force __u8)(tun_id >> 8);
59 vni[2] = (__force __u8)tun_id;
60#else
61 vni[0] = (__force __u8)((__force u64)tun_id >> 40);
62 vni[1] = (__force __u8)((__force u64)tun_id >> 48);
63 vni[2] = (__force __u8)((__force u64)tun_id >> 56);
64#endif
65}
66
67/* Convert 24 bit VNI to 64 bit tunnel ID. */
68static __be64 vni_to_tunnel_id(__u8 *vni)
69{
70#ifdef __BIG_ENDIAN
71 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
72#else
73 return (__force __be64)(((__force u64)vni[0] << 40) |
74 ((__force u64)vni[1] << 48) |
75 ((__force u64)vni[2] << 56));
76#endif
77}
78
79static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
80{
81 struct vport *vport = gs->rcv_data;
82 struct genevehdr *geneveh = geneve_hdr(skb);
83 int opts_len;
84 struct ovs_tunnel_info tun_info;
85 __be64 key;
86 __be16 flags;
87
88 opts_len = geneveh->opt_len * 4;
89
90 flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
91 (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
92 (geneveh->oam ? TUNNEL_OAM : 0) |
93 (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
94
95 key = vni_to_tunnel_id(geneveh->vni);
96
97 ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
98 geneveh->options, opts_len);
99
100 ovs_vport_receive(vport, skb, &tun_info);
101}
102
103static int geneve_get_options(const struct vport *vport,
104 struct sk_buff *skb)
105{
106 struct geneve_port *geneve_port = geneve_vport(vport);
107 __be16 sport;
108
109 sport = ntohs(inet_sk(geneve_port->gs->sock->sk)->inet_sport);
110 if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, sport))
111 return -EMSGSIZE;
112 return 0;
113}
114
115static void geneve_tnl_destroy(struct vport *vport)
116{
117 struct geneve_port *geneve_port = geneve_vport(vport);
118
119 geneve_sock_release(geneve_port->gs);
120
121 ovs_vport_deferred_free(vport);
122}
123
124static struct vport *geneve_tnl_create(const struct vport_parms *parms)
125{
126 struct net *net = ovs_dp_get_net(parms->dp);
127 struct nlattr *options = parms->options;
128 struct geneve_port *geneve_port;
129 struct geneve_sock *gs;
130 struct vport *vport;
131 struct nlattr *a;
132 int err;
133 u16 dst_port;
134
135 if (!options) {
136 err = -EINVAL;
137 goto error;
138 }
139
140 a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
141 if (a && nla_len(a) == sizeof(u16)) {
142 dst_port = nla_get_u16(a);
143 } else {
144 /* Require destination port from userspace. */
145 err = -EINVAL;
146 goto error;
147 }
148
149 vport = ovs_vport_alloc(sizeof(struct geneve_port),
150 &ovs_geneve_vport_ops, parms);
151 if (IS_ERR(vport))
152 return vport;
153
154 geneve_port = geneve_vport(vport);
155 strncpy(geneve_port->name, parms->name, IFNAMSIZ);
156
157 gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0);
158 if (IS_ERR(gs)) {
159 ovs_vport_free(vport);
160 return (void *)gs;
161 }
162 geneve_port->gs = gs;
163
164 return vport;
165error:
166 return ERR_PTR(err);
167}
168
169static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
170{
171 struct ovs_key_ipv4_tunnel *tun_key;
172 struct ovs_tunnel_info *tun_info;
173 struct net *net = ovs_dp_get_net(vport->dp);
174 struct geneve_port *geneve_port = geneve_vport(vport);
175 __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
176 __be16 sport;
177 struct rtable *rt;
178 struct flowi4 fl;
179 u8 vni[3];
180 __be16 df;
181 int err;
182
183 tun_info = OVS_CB(skb)->egress_tun_info;
184 if (unlikely(!tun_info)) {
185 err = -EINVAL;
186 goto error;
187 }
188
189 tun_key = &tun_info->tunnel;
190
191 /* Route lookup */
192 memset(&fl, 0, sizeof(fl));
193 fl.daddr = tun_key->ipv4_dst;
194 fl.saddr = tun_key->ipv4_src;
195 fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
196 fl.flowi4_mark = skb->mark;
197 fl.flowi4_proto = IPPROTO_UDP;
198
199 rt = ip_route_output_key(net, &fl);
200 if (IS_ERR(rt)) {
201 err = PTR_ERR(rt);
202 goto error;
203 }
204
205 df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
206 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
207 tunnel_id_to_vni(tun_key->tun_id, vni);
208 skb->ignore_df = 1;
209
210 err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
211 tun_key->ipv4_dst, tun_key->ipv4_tos,
212 tun_key->ipv4_ttl, df, sport, dport,
213 tun_key->tun_flags, vni,
214 tun_info->options_len, (u8 *)tun_info->options,
215 false);
216 if (err < 0)
217 ip_rt_put(rt);
218error:
219 return err;
220}
221
222static const char *geneve_get_name(const struct vport *vport)
223{
224 struct geneve_port *geneve_port = geneve_vport(vport);
225
226 return geneve_port->name;
227}
228
229const struct vport_ops ovs_geneve_vport_ops = {
230 .type = OVS_VPORT_TYPE_GENEVE,
231 .create = geneve_tnl_create,
232 .destroy = geneve_tnl_destroy,
233 .get_name = geneve_get_name,
234 .get_options = geneve_get_options,
235 .send = geneve_tnl_send,
236};
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index fe768bd600eb..108b82da2fd9 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -106,7 +106,7 @@ static int gre_rcv(struct sk_buff *skb,
106 106
107 key = key_to_tunnel_id(tpi->key, tpi->seq); 107 key = key_to_tunnel_id(tpi->key, tpi->seq);
108 ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, 108 ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
109 filter_tnl_flags(tpi->flags)); 109 filter_tnl_flags(tpi->flags), NULL, 0);
110 110
111 ovs_vport_receive(vport, skb, &tun_info); 111 ovs_vport_receive(vport, skb, &tun_info);
112 return PACKET_RCVD; 112 return PACKET_RCVD;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 5fbff2c1ee49..2735e01dca73 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -66,7 +66,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
66 /* Save outer tunnel values */ 66 /* Save outer tunnel values */
67 iph = ip_hdr(skb); 67 iph = ip_hdr(skb);
68 key = cpu_to_be64(ntohl(vx_vni) >> 8); 68 key = cpu_to_be64(ntohl(vx_vni) >> 8);
69 ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY); 69 ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
70 70
71 ovs_vport_receive(vport, skb, &tun_info); 71 ovs_vport_receive(vport, skb, &tun_info);
72} 72}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 3e50ee8a218c..53001b020ca7 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -48,6 +48,9 @@ static const struct vport_ops *vport_ops_list[] = {
48#ifdef CONFIG_OPENVSWITCH_VXLAN 48#ifdef CONFIG_OPENVSWITCH_VXLAN
49 &ovs_vxlan_vport_ops, 49 &ovs_vxlan_vport_ops,
50#endif 50#endif
51#ifdef CONFIG_OPENVSWITCH_GENEVE
52 &ovs_geneve_vport_ops,
53#endif
51}; 54};
52 55
53/* Protected by RCU read lock for reading, ovs_mutex for writing. */ 56/* Protected by RCU read lock for reading, ovs_mutex for writing. */
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index e28964aba021..8942125de3a6 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -215,6 +215,7 @@ extern const struct vport_ops ovs_netdev_vport_ops;
215extern const struct vport_ops ovs_internal_vport_ops; 215extern const struct vport_ops ovs_internal_vport_ops;
216extern const struct vport_ops ovs_gre_vport_ops; 216extern const struct vport_ops ovs_gre_vport_ops;
217extern const struct vport_ops ovs_vxlan_vport_ops; 217extern const struct vport_ops ovs_vxlan_vport_ops;
218extern const struct vport_ops ovs_geneve_vport_ops;
218 219
219static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, 220static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
220 const void *start, unsigned int len) 221 const void *start, unsigned int len)