aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/mpls.h39
-rw-r--r--include/uapi/linux/openvswitch.h32
-rw-r--r--net/core/dev.c3
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/actions.c106
-rw-r--r--net/openvswitch/datapath.c6
-rw-r--r--net/openvswitch/flow.c30
-rw-r--r--net/openvswitch/flow.h17
-rw-r--r--net/openvswitch/flow_netlink.c139
-rw-r--r--net/openvswitch/flow_netlink.h2
10 files changed, 345 insertions, 30 deletions
diff --git a/include/net/mpls.h b/include/net/mpls.h
new file mode 100644
index 000000000000..5b3b5addfb08
--- /dev/null
+++ b/include/net/mpls.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#ifndef _NET_MPLS_H
15#define _NET_MPLS_H 1
16
17#include <linux/if_ether.h>
18#include <linux/netdevice.h>
19
20#define MPLS_HLEN 4
21
22static inline bool eth_p_mpls(__be16 eth_type)
23{
24 return eth_type == htons(ETH_P_MPLS_UC) ||
25 eth_type == htons(ETH_P_MPLS_MC);
26}
27
28/*
29 * For non-MPLS skbs this will correspond to the network header.
30 * For MPLS skbs it will be before the network_header as the MPLS
31 * label stack lies between the end of the mac header and the network
32 * header. That is, for MPLS skbs the end of the mac header
33 * is the top of the MPLS label stack.
34 */
35static inline unsigned char *skb_mpls_header(struct sk_buff *skb)
36{
37 return skb_mac_header(skb) + skb->mac_len;
38}
39#endif
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 435eabc5ffaa..631056b66f80 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -293,6 +293,9 @@ enum ovs_key_attr {
293 OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash 293 OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash
294 is not computed by the datapath. */ 294 is not computed by the datapath. */
295 OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ 295 OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
296 OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls.
297 * The implementation may restrict
298 * the accepted length of the array. */
296 299
297#ifdef __KERNEL__ 300#ifdef __KERNEL__
298 OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ 301 OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */
@@ -340,6 +343,10 @@ struct ovs_key_ethernet {
340 __u8 eth_dst[ETH_ALEN]; 343 __u8 eth_dst[ETH_ALEN];
341}; 344};
342 345
346struct ovs_key_mpls {
347 __be32 mpls_lse;
348};
349
343struct ovs_key_ipv4 { 350struct ovs_key_ipv4 {
344 __be32 ipv4_src; 351 __be32 ipv4_src;
345 __be32 ipv4_dst; 352 __be32 ipv4_dst;
@@ -484,6 +491,19 @@ enum ovs_userspace_attr {
484#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) 491#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
485 492
486/** 493/**
494 * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
495 * @mpls_lse: MPLS label stack entry to push.
496 * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
497 *
498 * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
499 * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
500 */
501struct ovs_action_push_mpls {
502 __be32 mpls_lse;
503 __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
504};
505
506/**
487 * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. 507 * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
488 * @vlan_tpid: Tag protocol identifier (TPID) to push. 508 * @vlan_tpid: Tag protocol identifier (TPID) to push.
489 * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set 509 * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
@@ -534,6 +554,15 @@ struct ovs_action_hash {
534 * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. 554 * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
535 * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in 555 * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
536 * the nested %OVS_SAMPLE_ATTR_* attributes. 556 * the nested %OVS_SAMPLE_ATTR_* attributes.
557 * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
558 * top of the packets MPLS label stack. Set the ethertype of the
559 * encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to
560 * indicate the new packet contents.
561 * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the
562 * packet's MPLS label stack. Set the encapsulating frame's ethertype to
563 * indicate the new packet contents. This could potentially still be
564 * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
565 * is no MPLS label stack, as determined by ethertype, no action is taken.
537 * 566 *
538 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all 567 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
539 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment 568 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -550,6 +579,9 @@ enum ovs_action_attr {
550 OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ 579 OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
551 OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */ 580 OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */
552 OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */ 581 OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
582 OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
583 OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
584
553 __OVS_ACTION_ATTR_MAX 585 __OVS_ACTION_ATTR_MAX
554}; 586};
555 587
diff --git a/net/core/dev.c b/net/core/dev.c
index 40be481268de..70bb609c283d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -118,6 +118,7 @@
118#include <linux/if_vlan.h> 118#include <linux/if_vlan.h>
119#include <linux/ip.h> 119#include <linux/ip.h>
120#include <net/ip.h> 120#include <net/ip.h>
121#include <net/mpls.h>
121#include <linux/ipv6.h> 122#include <linux/ipv6.h>
122#include <linux/in.h> 123#include <linux/in.h>
123#include <linux/jhash.h> 124#include <linux/jhash.h>
@@ -2530,7 +2531,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb,
2530 netdev_features_t features, 2531 netdev_features_t features,
2531 __be16 type) 2532 __be16 type)
2532{ 2533{
2533 if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) 2534 if (eth_p_mpls(type))
2534 features &= skb->dev->mpls_features; 2535 features &= skb->dev->mpls_features;
2535 2536
2536 return features; 2537 return features;
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 2a9673e39ca1..454ce12efbbf 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -30,6 +30,7 @@ config OPENVSWITCH
30 30
31config OPENVSWITCH_GRE 31config OPENVSWITCH_GRE
32 tristate "Open vSwitch GRE tunneling support" 32 tristate "Open vSwitch GRE tunneling support"
33 select NET_MPLS_GSO
33 depends on INET 34 depends on INET
34 depends on OPENVSWITCH 35 depends on OPENVSWITCH
35 depends on NET_IPGRE_DEMUX 36 depends on NET_IPGRE_DEMUX
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 922c133b1933..930b1b6e4cef 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -28,10 +28,12 @@
28#include <linux/in6.h> 28#include <linux/in6.h>
29#include <linux/if_arp.h> 29#include <linux/if_arp.h>
30#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
31
31#include <net/ip.h> 32#include <net/ip.h>
32#include <net/ipv6.h> 33#include <net/ipv6.h>
33#include <net/checksum.h> 34#include <net/checksum.h>
34#include <net/dsfield.h> 35#include <net/dsfield.h>
36#include <net/mpls.h>
35#include <net/sctp/checksum.h> 37#include <net/sctp/checksum.h>
36 38
37#include "datapath.h" 39#include "datapath.h"
@@ -118,6 +120,92 @@ static int make_writable(struct sk_buff *skb, int write_len)
118 return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 120 return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
119} 121}
120 122
123static int push_mpls(struct sk_buff *skb,
124 const struct ovs_action_push_mpls *mpls)
125{
126 __be32 *new_mpls_lse;
127 struct ethhdr *hdr;
128
129 /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
130 if (skb->encapsulation)
131 return -ENOTSUPP;
132
133 if (skb_cow_head(skb, MPLS_HLEN) < 0)
134 return -ENOMEM;
135
136 skb_push(skb, MPLS_HLEN);
137 memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
138 skb->mac_len);
139 skb_reset_mac_header(skb);
140
141 new_mpls_lse = (__be32 *)skb_mpls_header(skb);
142 *new_mpls_lse = mpls->mpls_lse;
143
144 if (skb->ip_summed == CHECKSUM_COMPLETE)
145 skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
146 MPLS_HLEN, 0));
147
148 hdr = eth_hdr(skb);
149 hdr->h_proto = mpls->mpls_ethertype;
150
151 skb_set_inner_protocol(skb, skb->protocol);
152 skb->protocol = mpls->mpls_ethertype;
153
154 return 0;
155}
156
157static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
158{
159 struct ethhdr *hdr;
160 int err;
161
162 err = make_writable(skb, skb->mac_len + MPLS_HLEN);
163 if (unlikely(err))
164 return err;
165
166 if (skb->ip_summed == CHECKSUM_COMPLETE)
167 skb->csum = csum_sub(skb->csum,
168 csum_partial(skb_mpls_header(skb),
169 MPLS_HLEN, 0));
170
171 memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
172 skb->mac_len);
173
174 __skb_pull(skb, MPLS_HLEN);
175 skb_reset_mac_header(skb);
176
177 /* skb_mpls_header() is used to locate the ethertype
178 * field correctly in the presence of VLAN tags.
179 */
180 hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
181 hdr->h_proto = ethertype;
182 if (eth_p_mpls(skb->protocol))
183 skb->protocol = ethertype;
184 return 0;
185}
186
187static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
188{
189 __be32 *stack;
190 int err;
191
192 err = make_writable(skb, skb->mac_len + MPLS_HLEN);
193 if (unlikely(err))
194 return err;
195
196 stack = (__be32 *)skb_mpls_header(skb);
197 if (skb->ip_summed == CHECKSUM_COMPLETE) {
198 __be32 diff[] = { ~(*stack), *mpls_lse };
199
200 skb->csum = ~csum_partial((char *)diff, sizeof(diff),
201 ~skb->csum);
202 }
203
204 *stack = *mpls_lse;
205
206 return 0;
207}
208
121/* remove VLAN header from packet and update csum accordingly. */ 209/* remove VLAN header from packet and update csum accordingly. */
122static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) 210static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
123{ 211{
@@ -140,10 +228,12 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
140 228
141 vlan_set_encap_proto(skb, vhdr); 229 vlan_set_encap_proto(skb, vhdr);
142 skb->mac_header += VLAN_HLEN; 230 skb->mac_header += VLAN_HLEN;
231
143 if (skb_network_offset(skb) < ETH_HLEN) 232 if (skb_network_offset(skb) < ETH_HLEN)
144 skb_set_network_header(skb, ETH_HLEN); 233 skb_set_network_header(skb, ETH_HLEN);
145 skb_reset_mac_len(skb);
146 234
235 /* Update mac_len for subsequent MPLS actions */
236 skb_reset_mac_len(skb);
147 return 0; 237 return 0;
148} 238}
149 239
@@ -186,6 +276,8 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
186 276
187 if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) 277 if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
188 return -ENOMEM; 278 return -ENOMEM;
279 /* Update mac_len for subsequent MPLS actions */
280 skb->mac_len += VLAN_HLEN;
189 281
190 if (skb->ip_summed == CHECKSUM_COMPLETE) 282 if (skb->ip_summed == CHECKSUM_COMPLETE)
191 skb->csum = csum_add(skb->csum, csum_partial(skb->data 283 skb->csum = csum_add(skb->csum, csum_partial(skb->data
@@ -612,6 +704,10 @@ static int execute_set_action(struct sk_buff *skb,
612 case OVS_KEY_ATTR_SCTP: 704 case OVS_KEY_ATTR_SCTP:
613 err = set_sctp(skb, nla_data(nested_attr)); 705 err = set_sctp(skb, nla_data(nested_attr));
614 break; 706 break;
707
708 case OVS_KEY_ATTR_MPLS:
709 err = set_mpls(skb, nla_data(nested_attr));
710 break;
615 } 711 }
616 712
617 return err; 713 return err;
@@ -690,6 +786,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
690 execute_hash(skb, key, a); 786 execute_hash(skb, key, a);
691 break; 787 break;
692 788
789 case OVS_ACTION_ATTR_PUSH_MPLS:
790 err = push_mpls(skb, nla_data(a));
791 break;
792
793 case OVS_ACTION_ATTR_POP_MPLS:
794 err = pop_mpls(skb, nla_get_be16(a));
795 break;
796
693 case OVS_ACTION_ATTR_PUSH_VLAN: 797 case OVS_ACTION_ATTR_PUSH_VLAN:
694 err = push_vlan(skb, nla_data(a)); 798 err = push_vlan(skb, nla_data(a));
695 if (unlikely(err)) /* skb already freed. */ 799 if (unlikely(err)) /* skb already freed. */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f18302f32049..688cb9bc0ef1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -560,7 +560,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
560 goto err_flow_free; 560 goto err_flow_free;
561 561
562 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 562 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
563 &flow->key, 0, &acts); 563 &flow->key, &acts);
564 if (err) 564 if (err)
565 goto err_flow_free; 565 goto err_flow_free;
566 566
@@ -846,7 +846,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
846 goto err_kfree_flow; 846 goto err_kfree_flow;
847 847
848 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, 848 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
849 0, &acts); 849 &acts);
850 if (error) { 850 if (error) {
851 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 851 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
852 goto err_kfree_acts; 852 goto err_kfree_acts;
@@ -953,7 +953,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
953 return acts; 953 return acts;
954 954
955 ovs_flow_mask_key(&masked_key, key, mask); 955 ovs_flow_mask_key(&masked_key, key, mask);
956 error = ovs_nla_copy_actions(a, &masked_key, 0, &acts); 956 error = ovs_nla_copy_actions(a, &masked_key, &acts);
957 if (error) { 957 if (error) {
958 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 958 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
959 kfree(acts); 959 kfree(acts);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2b78789ea7c5..90a21010fc8f 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -32,6 +32,7 @@
32#include <linux/if_arp.h> 32#include <linux/if_arp.h>
33#include <linux/ip.h> 33#include <linux/ip.h>
34#include <linux/ipv6.h> 34#include <linux/ipv6.h>
35#include <linux/mpls.h>
35#include <linux/sctp.h> 36#include <linux/sctp.h>
36#include <linux/smp.h> 37#include <linux/smp.h>
37#include <linux/tcp.h> 38#include <linux/tcp.h>
@@ -42,6 +43,7 @@
42#include <net/ip.h> 43#include <net/ip.h>
43#include <net/ip_tunnels.h> 44#include <net/ip_tunnels.h>
44#include <net/ipv6.h> 45#include <net/ipv6.h>
46#include <net/mpls.h>
45#include <net/ndisc.h> 47#include <net/ndisc.h>
46 48
47#include "datapath.h" 49#include "datapath.h"
@@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
480 return -ENOMEM; 482 return -ENOMEM;
481 483
482 skb_reset_network_header(skb); 484 skb_reset_network_header(skb);
485 skb_reset_mac_len(skb);
483 __skb_push(skb, skb->data - skb_mac_header(skb)); 486 __skb_push(skb, skb->data - skb_mac_header(skb));
484 487
485 /* Network layer. */ 488 /* Network layer. */
@@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
584 memset(&key->ip, 0, sizeof(key->ip)); 587 memset(&key->ip, 0, sizeof(key->ip));
585 memset(&key->ipv4, 0, sizeof(key->ipv4)); 588 memset(&key->ipv4, 0, sizeof(key->ipv4));
586 } 589 }
590 } else if (eth_p_mpls(key->eth.type)) {
591 size_t stack_len = MPLS_HLEN;
592
593 /* In the presence of an MPLS label stack the end of the L2
594 * header and the beginning of the L3 header differ.
595 *
596 * Advance network_header to the beginning of the L3
597 * header. mac_len corresponds to the end of the L2 header.
598 */
599 while (1) {
600 __be32 lse;
601
602 error = check_header(skb, skb->mac_len + stack_len);
603 if (unlikely(error))
604 return 0;
605
606 memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
607
608 if (stack_len == MPLS_HLEN)
609 memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
610
611 skb_set_network_header(skb, skb->mac_len + stack_len);
612 if (lse & htonl(MPLS_LS_S_MASK))
613 break;
614
615 stack_len += MPLS_HLEN;
616 }
587 } else if (key->eth.type == htons(ETH_P_IPV6)) { 617 } else if (key->eth.type == htons(ETH_P_IPV6)) {
588 int nh_len; /* IPv6 Header + Extensions */ 618 int nh_len; /* IPv6 Header + Extensions */
589 619
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 71813318c8c7..4962bee81a11 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -102,12 +102,17 @@ struct sw_flow_key {
102 __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ 102 __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
103 __be16 type; /* Ethernet frame type. */ 103 __be16 type; /* Ethernet frame type. */
104 } eth; 104 } eth;
105 struct { 105 union {
106 u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ 106 struct {
107 u8 tos; /* IP ToS. */ 107 __be32 top_lse; /* top label stack entry */
108 u8 ttl; /* IP TTL/hop limit. */ 108 } mpls;
109 u8 frag; /* One of OVS_FRAG_TYPE_*. */ 109 struct {
110 } ip; 110 u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
111 u8 tos; /* IP ToS. */
112 u8 ttl; /* IP TTL/hop limit. */
113 u8 frag; /* One of OVS_FRAG_TYPE_*. */
114 } ip;
115 };
111 struct { 116 struct {
112 __be16 src; /* TCP/UDP/SCTP source port. */ 117 __be16 src; /* TCP/UDP/SCTP source port. */
113 __be16 dst; /* TCP/UDP/SCTP destination port. */ 118 __be16 dst; /* TCP/UDP/SCTP destination port. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 939bcb32100f..569309c49cc0 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -46,6 +46,7 @@
46#include <net/ip.h> 46#include <net/ip.h>
47#include <net/ipv6.h> 47#include <net/ipv6.h>
48#include <net/ndisc.h> 48#include <net/ndisc.h>
49#include <net/mpls.h>
49 50
50#include "flow_netlink.h" 51#include "flow_netlink.h"
51 52
@@ -134,7 +135,8 @@ static bool match_validate(const struct sw_flow_match *match,
134 | (1 << OVS_KEY_ATTR_ICMP) 135 | (1 << OVS_KEY_ATTR_ICMP)
135 | (1 << OVS_KEY_ATTR_ICMPV6) 136 | (1 << OVS_KEY_ATTR_ICMPV6)
136 | (1 << OVS_KEY_ATTR_ARP) 137 | (1 << OVS_KEY_ATTR_ARP)
137 | (1 << OVS_KEY_ATTR_ND)); 138 | (1 << OVS_KEY_ATTR_ND)
139 | (1 << OVS_KEY_ATTR_MPLS));
138 140
139 /* Always allowed mask fields. */ 141 /* Always allowed mask fields. */
140 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 142 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -149,6 +151,12 @@ static bool match_validate(const struct sw_flow_match *match,
149 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 151 mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
150 } 152 }
151 153
154 if (eth_p_mpls(match->key->eth.type)) {
155 key_expected |= 1 << OVS_KEY_ATTR_MPLS;
156 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
157 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
158 }
159
152 if (match->key->eth.type == htons(ETH_P_IP)) { 160 if (match->key->eth.type == htons(ETH_P_IP)) {
153 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 161 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
154 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 162 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
@@ -266,6 +274,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
266 [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), 274 [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
267 [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), 275 [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
268 [OVS_KEY_ATTR_TUNNEL] = -1, 276 [OVS_KEY_ATTR_TUNNEL] = -1,
277 [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
269}; 278};
270 279
271static bool is_all_zero(const u8 *fp, size_t size) 280static bool is_all_zero(const u8 *fp, size_t size)
@@ -735,6 +744,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
735 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 744 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
736 } 745 }
737 746
747 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
748 const struct ovs_key_mpls *mpls_key;
749
750 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
751 SW_FLOW_KEY_PUT(match, mpls.top_lse,
752 mpls_key->mpls_lse, is_mask);
753
754 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
755 }
756
738 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 757 if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
739 const struct ovs_key_tcp *tcp_key; 758 const struct ovs_key_tcp *tcp_key;
740 759
@@ -1140,6 +1159,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
1140 arp_key->arp_op = htons(output->ip.proto); 1159 arp_key->arp_op = htons(output->ip.proto);
1141 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1160 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
1142 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1161 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
1162 } else if (eth_p_mpls(swkey->eth.type)) {
1163 struct ovs_key_mpls *mpls_key;
1164
1165 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
1166 if (!nla)
1167 goto nla_put_failure;
1168 mpls_key = nla_data(nla);
1169 mpls_key->mpls_lse = output->mpls.top_lse;
1143 } 1170 }
1144 1171
1145 if ((swkey->eth.type == htons(ETH_P_IP) || 1172 if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1336,9 +1363,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1336 a->nla_len = sfa->actions_len - st_offset; 1363 a->nla_len = sfa->actions_len - st_offset;
1337} 1364}
1338 1365
1366static int ovs_nla_copy_actions__(const struct nlattr *attr,
1367 const struct sw_flow_key *key,
1368 int depth, struct sw_flow_actions **sfa,
1369 __be16 eth_type, __be16 vlan_tci);
1370
1339static int validate_and_copy_sample(const struct nlattr *attr, 1371static int validate_and_copy_sample(const struct nlattr *attr,
1340 const struct sw_flow_key *key, int depth, 1372 const struct sw_flow_key *key, int depth,
1341 struct sw_flow_actions **sfa) 1373 struct sw_flow_actions **sfa,
1374 __be16 eth_type, __be16 vlan_tci)
1342{ 1375{
1343 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1376 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1344 const struct nlattr *probability, *actions; 1377 const struct nlattr *probability, *actions;
@@ -1375,7 +1408,8 @@ static int validate_and_copy_sample(const struct nlattr *attr,
1375 if (st_acts < 0) 1408 if (st_acts < 0)
1376 return st_acts; 1409 return st_acts;
1377 1410
1378 err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); 1411 err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa,
1412 eth_type, vlan_tci);
1379 if (err) 1413 if (err)
1380 return err; 1414 return err;
1381 1415
@@ -1385,10 +1419,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
1385 return 0; 1419 return 0;
1386} 1420}
1387 1421
1388static int validate_tp_port(const struct sw_flow_key *flow_key) 1422static int validate_tp_port(const struct sw_flow_key *flow_key,
1423 __be16 eth_type)
1389{ 1424{
1390 if ((flow_key->eth.type == htons(ETH_P_IP) || 1425 if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
1391 flow_key->eth.type == htons(ETH_P_IPV6)) &&
1392 (flow_key->tp.src || flow_key->tp.dst)) 1426 (flow_key->tp.src || flow_key->tp.dst))
1393 return 0; 1427 return 0;
1394 1428
@@ -1483,7 +1517,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1483static int validate_set(const struct nlattr *a, 1517static int validate_set(const struct nlattr *a,
1484 const struct sw_flow_key *flow_key, 1518 const struct sw_flow_key *flow_key,
1485 struct sw_flow_actions **sfa, 1519 struct sw_flow_actions **sfa,
1486 bool *set_tun) 1520 bool *set_tun, __be16 eth_type)
1487{ 1521{
1488 const struct nlattr *ovs_key = nla_data(a); 1522 const struct nlattr *ovs_key = nla_data(a);
1489 int key_type = nla_type(ovs_key); 1523 int key_type = nla_type(ovs_key);
@@ -1508,6 +1542,9 @@ static int validate_set(const struct nlattr *a,
1508 break; 1542 break;
1509 1543
1510 case OVS_KEY_ATTR_TUNNEL: 1544 case OVS_KEY_ATTR_TUNNEL:
1545 if (eth_p_mpls(eth_type))
1546 return -EINVAL;
1547
1511 *set_tun = true; 1548 *set_tun = true;
1512 err = validate_and_copy_set_tun(a, sfa); 1549 err = validate_and_copy_set_tun(a, sfa);
1513 if (err) 1550 if (err)
@@ -1515,7 +1552,7 @@ static int validate_set(const struct nlattr *a,
1515 break; 1552 break;
1516 1553
1517 case OVS_KEY_ATTR_IPV4: 1554 case OVS_KEY_ATTR_IPV4:
1518 if (flow_key->eth.type != htons(ETH_P_IP)) 1555 if (eth_type != htons(ETH_P_IP))
1519 return -EINVAL; 1556 return -EINVAL;
1520 1557
1521 if (!flow_key->ip.proto) 1558 if (!flow_key->ip.proto)
@@ -1531,7 +1568,7 @@ static int validate_set(const struct nlattr *a,
1531 break; 1568 break;
1532 1569
1533 case OVS_KEY_ATTR_IPV6: 1570 case OVS_KEY_ATTR_IPV6:
1534 if (flow_key->eth.type != htons(ETH_P_IPV6)) 1571 if (eth_type != htons(ETH_P_IPV6))
1535 return -EINVAL; 1572 return -EINVAL;
1536 1573
1537 if (!flow_key->ip.proto) 1574 if (!flow_key->ip.proto)
@@ -1553,19 +1590,24 @@ static int validate_set(const struct nlattr *a,
1553 if (flow_key->ip.proto != IPPROTO_TCP) 1590 if (flow_key->ip.proto != IPPROTO_TCP)
1554 return -EINVAL; 1591 return -EINVAL;
1555 1592
1556 return validate_tp_port(flow_key); 1593 return validate_tp_port(flow_key, eth_type);
1557 1594
1558 case OVS_KEY_ATTR_UDP: 1595 case OVS_KEY_ATTR_UDP:
1559 if (flow_key->ip.proto != IPPROTO_UDP) 1596 if (flow_key->ip.proto != IPPROTO_UDP)
1560 return -EINVAL; 1597 return -EINVAL;
1561 1598
1562 return validate_tp_port(flow_key); 1599 return validate_tp_port(flow_key, eth_type);
1600
1601 case OVS_KEY_ATTR_MPLS:
1602 if (!eth_p_mpls(eth_type))
1603 return -EINVAL;
1604 break;
1563 1605
1564 case OVS_KEY_ATTR_SCTP: 1606 case OVS_KEY_ATTR_SCTP:
1565 if (flow_key->ip.proto != IPPROTO_SCTP) 1607 if (flow_key->ip.proto != IPPROTO_SCTP)
1566 return -EINVAL; 1608 return -EINVAL;
1567 1609
1568 return validate_tp_port(flow_key); 1610 return validate_tp_port(flow_key, eth_type);
1569 1611
1570 default: 1612 default:
1571 return -EINVAL; 1613 return -EINVAL;
@@ -1609,12 +1651,13 @@ static int copy_action(const struct nlattr *from,
1609 return 0; 1651 return 0;
1610} 1652}
1611 1653
1612int ovs_nla_copy_actions(const struct nlattr *attr, 1654static int ovs_nla_copy_actions__(const struct nlattr *attr,
1613 const struct sw_flow_key *key, 1655 const struct sw_flow_key *key,
1614 int depth, 1656 int depth, struct sw_flow_actions **sfa,
1615 struct sw_flow_actions **sfa) 1657 __be16 eth_type, __be16 vlan_tci)
1616{ 1658{
1617 const struct nlattr *a; 1659 const struct nlattr *a;
1660 bool out_tnl_port = false;
1618 int rem, err; 1661 int rem, err;
1619 1662
1620 if (depth >= SAMPLE_ACTION_DEPTH) 1663 if (depth >= SAMPLE_ACTION_DEPTH)
@@ -1626,6 +1669,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1626 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 1669 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
1627 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 1670 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
1628 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 1671 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
1672 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
1673 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
1629 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 1674 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
1630 [OVS_ACTION_ATTR_POP_VLAN] = 0, 1675 [OVS_ACTION_ATTR_POP_VLAN] = 0,
1631 [OVS_ACTION_ATTR_SET] = (u32)-1, 1676 [OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -1655,6 +1700,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1655 case OVS_ACTION_ATTR_OUTPUT: 1700 case OVS_ACTION_ATTR_OUTPUT:
1656 if (nla_get_u32(a) >= DP_MAX_PORTS) 1701 if (nla_get_u32(a) >= DP_MAX_PORTS)
1657 return -EINVAL; 1702 return -EINVAL;
1703 out_tnl_port = false;
1704
1658 break; 1705 break;
1659 1706
1660 case OVS_ACTION_ATTR_HASH: { 1707 case OVS_ACTION_ATTR_HASH: {
@@ -1671,6 +1718,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1671 } 1718 }
1672 1719
1673 case OVS_ACTION_ATTR_POP_VLAN: 1720 case OVS_ACTION_ATTR_POP_VLAN:
1721 vlan_tci = htons(0);
1674 break; 1722 break;
1675 1723
1676 case OVS_ACTION_ATTR_PUSH_VLAN: 1724 case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -1679,19 +1727,66 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1679 return -EINVAL; 1727 return -EINVAL;
1680 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 1728 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
1681 return -EINVAL; 1729 return -EINVAL;
1730 vlan_tci = vlan->vlan_tci;
1682 break; 1731 break;
1683 1732
1684 case OVS_ACTION_ATTR_RECIRC: 1733 case OVS_ACTION_ATTR_RECIRC:
1685 break; 1734 break;
1686 1735
1736 case OVS_ACTION_ATTR_PUSH_MPLS: {
1737 const struct ovs_action_push_mpls *mpls = nla_data(a);
1738
1739 /* Networking stack do not allow simultaneous Tunnel
1740 * and MPLS GSO.
1741 */
1742 if (out_tnl_port)
1743 return -EINVAL;
1744
1745 if (!eth_p_mpls(mpls->mpls_ethertype))
1746 return -EINVAL;
1747 /* Prohibit push MPLS other than to a white list
1748 * for packets that have a known tag order.
1749 */
1750 if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
1751 (eth_type != htons(ETH_P_IP) &&
1752 eth_type != htons(ETH_P_IPV6) &&
1753 eth_type != htons(ETH_P_ARP) &&
1754 eth_type != htons(ETH_P_RARP) &&
1755 !eth_p_mpls(eth_type)))
1756 return -EINVAL;
1757 eth_type = mpls->mpls_ethertype;
1758 break;
1759 }
1760
1761 case OVS_ACTION_ATTR_POP_MPLS:
1762 if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
1763 !eth_p_mpls(eth_type))
1764 return -EINVAL;
1765
1766 /* Disallow subsequent L2.5+ set and mpls_pop actions
1767 * as there is no check here to ensure that the new
1768 * eth_type is valid and thus set actions could
1769 * write off the end of the packet or otherwise
1770 * corrupt it.
1771 *
1772 * Support for these actions is planned using packet
1773 * recirculation.
1774 */
1775 eth_type = htons(0);
1776 break;
1777
1687 case OVS_ACTION_ATTR_SET: 1778 case OVS_ACTION_ATTR_SET:
1688 err = validate_set(a, key, sfa, &skip_copy); 1779 err = validate_set(a, key, sfa,
1780 &out_tnl_port, eth_type);
1689 if (err) 1781 if (err)
1690 return err; 1782 return err;
1783
1784 skip_copy = out_tnl_port;
1691 break; 1785 break;
1692 1786
1693 case OVS_ACTION_ATTR_SAMPLE: 1787 case OVS_ACTION_ATTR_SAMPLE:
1694 err = validate_and_copy_sample(a, key, depth, sfa); 1788 err = validate_and_copy_sample(a, key, depth, sfa,
1789 eth_type, vlan_tci);
1695 if (err) 1790 if (err)
1696 return err; 1791 return err;
1697 skip_copy = true; 1792 skip_copy = true;
@@ -1713,6 +1808,14 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1713 return 0; 1808 return 0;
1714} 1809}
1715 1810
1811int ovs_nla_copy_actions(const struct nlattr *attr,
1812 const struct sw_flow_key *key,
1813 struct sw_flow_actions **sfa)
1814{
1815 return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type,
1816 key->eth.tci);
1817}
1818
1716static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 1819static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1717{ 1820{
1718 const struct nlattr *a; 1821 const struct nlattr *a;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 206e45add888..6355b1d01329 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -49,7 +49,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
49 const struct nlattr *); 49 const struct nlattr *);
50 50
51int ovs_nla_copy_actions(const struct nlattr *attr, 51int ovs_nla_copy_actions(const struct nlattr *attr,
52 const struct sw_flow_key *key, int depth, 52 const struct sw_flow_key *key,
53 struct sw_flow_actions **sfa); 53 struct sw_flow_actions **sfa);
54int ovs_nla_put_actions(const struct nlattr *attr, 54int ovs_nla_put_actions(const struct nlattr *attr,
55 int len, struct sk_buff *skb); 55 int len, struct sk_buff *skb);