aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-11-13 00:51:04 -0500
committerDavid S. Miller <davem@davemloft.net>2016-11-13 00:51:04 -0500
commitf0a404001204a23f94e1ca0cda8597ce80714f5e (patch)
tree4223e36f63fb87bb4bdc5767dffef1fcbadcf34f /net
parentc540594f864bb4645573c2c0a304919fabb3d7ea (diff)
parent217ac77a3c2524d999730b2a80b61fcc2d0f734a (diff)
Merge branch 'ovs-L3-encap'
Jiri Benc says: ==================== openvswitch: support for layer 3 encapsulated packets At the core of this patch set is removing the assumption in Open vSwitch datapath that all packets have Ethernet header. The implementation relies on the presence of pop_eth and push_eth actions in datapath flows to facilitate adding and removing Ethernet headers as appropriate. The construction of such flows is left up to user-space. This series is based on work by Simon Horman, Lorand Jakab, Thomas Morin and others. I kept Lorand's and Simon's s-o-b in the patches that are derived from v11 to record their authorship of parts of the code. Changes from v12 to v13: * Addressed Pravin's feedback. * Removed the GRE vport conversion patch; L3 GRE ports should be created by rtnetlink instead. Main changes from v11 to v12: * The patches were restructured and split differently for easier review. * They were rebased and adjusted to the current net-next. Especially MPLS handling is different (and easier) thanks to the recent MPLS GSO rework. * Several bugs were discovered and fixed. The most notable is fragment handling: header adjustment for ARPHRD_NONE devices on tx needs to be done after refragmentation, not before it. This required significant changes in the patchset. Another one is stricter checking of attributes (match on L2 vs. L3 packet) at the kernel level. * Instead of is_layer3 bool, a mac_proto field is used. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/openvswitch/actions.c111
-rw-r--r--net/openvswitch/datapath.c13
-rw-r--r--net/openvswitch/flow.c105
-rw-r--r--net/openvswitch/flow.h22
-rw-r--r--net/openvswitch/flow_netlink.c179
-rw-r--r--net/openvswitch/vport-netdev.c9
-rw-r--r--net/openvswitch/vport.c31
-rw-r--r--net/openvswitch/vport.h2
8 files changed, 338 insertions, 134 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 1105c4e29c62..514f7bcf7c63 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -66,6 +66,7 @@ struct ovs_frag_data {
66 u16 vlan_tci; 66 u16 vlan_tci;
67 __be16 vlan_proto; 67 __be16 vlan_proto;
68 unsigned int l2_len; 68 unsigned int l2_len;
69 u8 mac_proto;
69 u8 l2_data[MAX_L2_LEN]; 70 u8 l2_data[MAX_L2_LEN];
70}; 71};
71 72
@@ -137,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
137 138
138static void invalidate_flow_key(struct sw_flow_key *key) 139static void invalidate_flow_key(struct sw_flow_key *key)
139{ 140{
140 key->eth.type = htons(0); 141 key->mac_proto |= SW_FLOW_KEY_INVALID;
141} 142}
142 143
143static bool is_flow_key_valid(const struct sw_flow_key *key) 144static bool is_flow_key_valid(const struct sw_flow_key *key)
144{ 145{
145 return !!key->eth.type; 146 return !(key->mac_proto & SW_FLOW_KEY_INVALID);
146} 147}
147 148
148static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, 149static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
@@ -186,7 +187,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
186 187
187 skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); 188 skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
188 189
189 update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); 190 if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
191 update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
190 skb->protocol = mpls->mpls_ethertype; 192 skb->protocol = mpls->mpls_ethertype;
191 193
192 invalidate_flow_key(key); 194 invalidate_flow_key(key);
@@ -196,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
196static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, 198static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
197 const __be16 ethertype) 199 const __be16 ethertype)
198{ 200{
199 struct ethhdr *hdr;
200 int err; 201 int err;
201 202
202 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); 203 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -212,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
212 skb_reset_mac_header(skb); 213 skb_reset_mac_header(skb);
213 skb_set_network_header(skb, skb->mac_len); 214 skb_set_network_header(skb, skb->mac_len);
214 215
215 /* mpls_hdr() is used to locate the ethertype field correctly in the 216 if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
216 * presence of VLAN tags. 217 struct ethhdr *hdr;
217 */ 218
218 hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); 219 /* mpls_hdr() is used to locate the ethertype field correctly in the
219 update_ethertype(skb, hdr, ethertype); 220 * presence of VLAN tags.
221 */
222 hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
223 update_ethertype(skb, hdr, ethertype);
224 }
220 if (eth_p_mpls(skb->protocol)) 225 if (eth_p_mpls(skb->protocol))
221 skb->protocol = ethertype; 226 skb->protocol = ethertype;
222 227
@@ -312,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
312 return 0; 317 return 0;
313} 318}
314 319
320/* pop_eth does not support VLAN packets as this action is never called
321 * for them.
322 */
323static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
324{
325 skb_pull_rcsum(skb, ETH_HLEN);
326 skb_reset_mac_header(skb);
327 skb_reset_mac_len(skb);
328
329 /* safe right before invalidate_flow_key */
330 key->mac_proto = MAC_PROTO_NONE;
331 invalidate_flow_key(key);
332 return 0;
333}
334
335static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
336 const struct ovs_action_push_eth *ethh)
337{
338 struct ethhdr *hdr;
339
340 /* Add the new Ethernet header */
341 if (skb_cow_head(skb, ETH_HLEN) < 0)
342 return -ENOMEM;
343
344 skb_push(skb, ETH_HLEN);
345 skb_reset_mac_header(skb);
346 skb_reset_mac_len(skb);
347
348 hdr = eth_hdr(skb);
349 ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
350 ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
351 hdr->h_proto = skb->protocol;
352
353 skb_postpush_rcsum(skb, hdr, ETH_HLEN);
354
355 /* safe right before invalidate_flow_key */
356 key->mac_proto = MAC_PROTO_ETHERNET;
357 invalidate_flow_key(key);
358 return 0;
359}
360
315static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, 361static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
316 __be32 addr, __be32 new_addr) 362 __be32 addr, __be32 new_addr)
317{ 363{
@@ -673,7 +719,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
673 skb_reset_mac_len(skb); 719 skb_reset_mac_len(skb);
674 } 720 }
675 721
676 ovs_vport_send(vport, skb); 722 ovs_vport_send(vport, skb, data->mac_proto);
677 return 0; 723 return 0;
678} 724}
679 725
@@ -692,7 +738,7 @@ static struct dst_ops ovs_dst_ops = {
692 * ovs_vport_output(), which is called once per fragmented packet. 738 * ovs_vport_output(), which is called once per fragmented packet.
693 */ 739 */
694static void prepare_frag(struct vport *vport, struct sk_buff *skb, 740static void prepare_frag(struct vport *vport, struct sk_buff *skb,
695 u16 orig_network_offset) 741 u16 orig_network_offset, u8 mac_proto)
696{ 742{
697 unsigned int hlen = skb_network_offset(skb); 743 unsigned int hlen = skb_network_offset(skb);
698 struct ovs_frag_data *data; 744 struct ovs_frag_data *data;
@@ -705,6 +751,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
705 data->network_offset = orig_network_offset; 751 data->network_offset = orig_network_offset;
706 data->vlan_tci = skb->vlan_tci; 752 data->vlan_tci = skb->vlan_tci;
707 data->vlan_proto = skb->vlan_proto; 753 data->vlan_proto = skb->vlan_proto;
754 data->mac_proto = mac_proto;
708 data->l2_len = hlen; 755 data->l2_len = hlen;
709 memcpy(&data->l2_data, skb->data, hlen); 756 memcpy(&data->l2_data, skb->data, hlen);
710 757
@@ -713,7 +760,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
713} 760}
714 761
715static void ovs_fragment(struct net *net, struct vport *vport, 762static void ovs_fragment(struct net *net, struct vport *vport,
716 struct sk_buff *skb, u16 mru, __be16 ethertype) 763 struct sk_buff *skb, u16 mru,
764 struct sw_flow_key *key)
717{ 765{
718 u16 orig_network_offset = 0; 766 u16 orig_network_offset = 0;
719 767
@@ -727,11 +775,12 @@ static void ovs_fragment(struct net *net, struct vport *vport,
727 goto err; 775 goto err;
728 } 776 }
729 777
730 if (ethertype == htons(ETH_P_IP)) { 778 if (key->eth.type == htons(ETH_P_IP)) {
731 struct dst_entry ovs_dst; 779 struct dst_entry ovs_dst;
732 unsigned long orig_dst; 780 unsigned long orig_dst;
733 781
734 prepare_frag(vport, skb, orig_network_offset); 782 prepare_frag(vport, skb, orig_network_offset,
783 ovs_key_mac_proto(key));
735 dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, 784 dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
736 DST_OBSOLETE_NONE, DST_NOCOUNT); 785 DST_OBSOLETE_NONE, DST_NOCOUNT);
737 ovs_dst.dev = vport->dev; 786 ovs_dst.dev = vport->dev;
@@ -742,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
742 791
743 ip_do_fragment(net, skb->sk, skb, ovs_vport_output); 792 ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
744 refdst_drop(orig_dst); 793 refdst_drop(orig_dst);
745 } else if (ethertype == htons(ETH_P_IPV6)) { 794 } else if (key->eth.type == htons(ETH_P_IPV6)) {
746 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 795 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
747 unsigned long orig_dst; 796 unsigned long orig_dst;
748 struct rt6_info ovs_rt; 797 struct rt6_info ovs_rt;
@@ -751,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
751 goto err; 800 goto err;
752 } 801 }
753 802
754 prepare_frag(vport, skb, orig_network_offset); 803 prepare_frag(vport, skb, orig_network_offset,
804 ovs_key_mac_proto(key));
755 memset(&ovs_rt, 0, sizeof(ovs_rt)); 805 memset(&ovs_rt, 0, sizeof(ovs_rt));
756 dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, 806 dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
757 DST_OBSOLETE_NONE, DST_NOCOUNT); 807 DST_OBSOLETE_NONE, DST_NOCOUNT);
@@ -765,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
765 refdst_drop(orig_dst); 815 refdst_drop(orig_dst);
766 } else { 816 } else {
767 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", 817 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
768 ovs_vport_name(vport), ntohs(ethertype), mru, 818 ovs_vport_name(vport), ntohs(key->eth.type), mru,
769 vport->dev->mtu); 819 vport->dev->mtu);
770 goto err; 820 goto err;
771 } 821 }
@@ -785,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
785 u32 cutlen = OVS_CB(skb)->cutlen; 835 u32 cutlen = OVS_CB(skb)->cutlen;
786 836
787 if (unlikely(cutlen > 0)) { 837 if (unlikely(cutlen > 0)) {
788 if (skb->len - cutlen > ETH_HLEN) 838 if (skb->len - cutlen > ovs_mac_header_len(key))
789 pskb_trim(skb, skb->len - cutlen); 839 pskb_trim(skb, skb->len - cutlen);
790 else 840 else
791 pskb_trim(skb, ETH_HLEN); 841 pskb_trim(skb, ovs_mac_header_len(key));
792 } 842 }
793 843
794 if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { 844 if (likely(!mru ||
795 ovs_vport_send(vport, skb); 845 (skb->len <= mru + vport->dev->hard_header_len))) {
846 ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
796 } else if (mru <= vport->dev->mtu) { 847 } else if (mru <= vport->dev->mtu) {
797 struct net *net = read_pnet(&dp->net); 848 struct net *net = read_pnet(&dp->net);
798 __be16 ethertype = key->eth.type;
799
800 if (!is_flow_key_valid(key)) {
801 if (eth_p_mpls(skb->protocol))
802 ethertype = skb->inner_protocol;
803 else
804 ethertype = vlan_get_protocol(skb);
805 }
806 849
807 ovs_fragment(net, vport, skb, mru, ethertype); 850 ovs_fragment(net, vport, skb, mru, key);
808 } else { 851 } else {
809 kfree_skb(skb); 852 kfree_skb(skb);
810 } 853 }
@@ -1198,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1198 if (err) 1241 if (err)
1199 return err == -EINPROGRESS ? 0 : err; 1242 return err == -EINPROGRESS ? 0 : err;
1200 break; 1243 break;
1244
1245 case OVS_ACTION_ATTR_PUSH_ETH:
1246 err = push_eth(skb, key, nla_data(a));
1247 break;
1248
1249 case OVS_ACTION_ATTR_POP_ETH:
1250 err = pop_eth(skb, key);
1251 break;
1201 } 1252 }
1202 1253
1203 if (unlikely(err)) { 1254 if (unlikely(err)) {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index fa8760176b7d..1402f1be642d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -560,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
560 struct sw_flow *flow; 560 struct sw_flow *flow;
561 struct sw_flow_actions *sf_acts; 561 struct sw_flow_actions *sf_acts;
562 struct datapath *dp; 562 struct datapath *dp;
563 struct ethhdr *eth;
564 struct vport *input_vport; 563 struct vport *input_vport;
565 u16 mru = 0; 564 u16 mru = 0;
566 int len; 565 int len;
@@ -581,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
581 580
582 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 581 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
583 582
584 skb_reset_mac_header(packet);
585 eth = eth_hdr(packet);
586
587 /* Normally, setting the skb 'protocol' field would be handled by a
588 * call to eth_type_trans(), but it assumes there's a sending
589 * device, which we may not have. */
590 if (eth_proto_is_802_3(eth->h_proto))
591 packet->protocol = eth->h_proto;
592 else
593 packet->protocol = htons(ETH_P_802_2);
594
595 /* Set packet's mru */ 583 /* Set packet's mru */
596 if (a[OVS_PACKET_ATTR_MRU]) { 584 if (a[OVS_PACKET_ATTR_MRU]) {
597 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); 585 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
@@ -618,6 +606,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
618 rcu_assign_pointer(flow->sf_acts, acts); 606 rcu_assign_pointer(flow->sf_acts, acts);
619 packet->priority = flow->key.phy.priority; 607 packet->priority = flow->key.phy.priority;
620 packet->mark = flow->key.phy.skb_mark; 608 packet->mark = flow->key.phy.skb_mark;
609 packet->protocol = flow->key.eth.type;
621 610
622 rcu_read_lock(); 611 rcu_read_lock();
623 dp = get_dp_rcu(net, ovs_header->dp_ifindex); 612 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 22087062bd10..08aa926cd5cf 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -334,14 +334,17 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
334 return 1; 334 return 1;
335} 335}
336 336
337static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) 337static void clear_vlan(struct sw_flow_key *key)
338{ 338{
339 int res;
340
341 key->eth.vlan.tci = 0; 339 key->eth.vlan.tci = 0;
342 key->eth.vlan.tpid = 0; 340 key->eth.vlan.tpid = 0;
343 key->eth.cvlan.tci = 0; 341 key->eth.cvlan.tci = 0;
344 key->eth.cvlan.tpid = 0; 342 key->eth.cvlan.tpid = 0;
343}
344
345static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
346{
347 int res;
345 348
346 if (skb_vlan_tag_present(skb)) { 349 if (skb_vlan_tag_present(skb)) {
347 key->eth.vlan.tci = htons(skb->vlan_tci); 350 key->eth.vlan.tci = htons(skb->vlan_tci);
@@ -483,17 +486,20 @@ invalid:
483 * 486 *
484 * Returns 0 if successful, otherwise a negative errno value. 487 * Returns 0 if successful, otherwise a negative errno value.
485 * 488 *
486 * Initializes @skb header pointers as follows: 489 * Initializes @skb header fields as follows:
487 * 490 *
488 * - skb->mac_header: the Ethernet header. 491 * - skb->mac_header: the L2 header.
489 * 492 *
490 * - skb->network_header: just past the Ethernet header, or just past the 493 * - skb->network_header: just past the L2 header, or just past the
491 * VLAN header, to the first byte of the Ethernet payload. 494 * VLAN header, to the first byte of the L2 payload.
492 * 495 *
493 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 496 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
494 * on output, then just past the IP header, if one is present and 497 * on output, then just past the IP header, if one is present and
495 * of a correct length, otherwise the same as skb->network_header. 498 * of a correct length, otherwise the same as skb->network_header.
496 * For other key->eth.type values it is left untouched. 499 * For other key->eth.type values it is left untouched.
500 *
501 * - skb->protocol: the type of the data starting at skb->network_header.
502 * Equals to key->eth.type.
497 */ 503 */
498static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) 504static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
499{ 505{
@@ -505,28 +511,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
505 511
506 skb_reset_mac_header(skb); 512 skb_reset_mac_header(skb);
507 513
508 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet 514 /* Link layer. */
509 * header in the linear data area. 515 clear_vlan(key);
510 */ 516 if (key->mac_proto == MAC_PROTO_NONE) {
511 eth = eth_hdr(skb); 517 if (unlikely(eth_type_vlan(skb->protocol)))
512 ether_addr_copy(key->eth.src, eth->h_source); 518 return -EINVAL;
513 ether_addr_copy(key->eth.dst, eth->h_dest);
514 519
515 __skb_pull(skb, 2 * ETH_ALEN); 520 skb_reset_network_header(skb);
516 /* We are going to push all headers that we pull, so no need to 521 } else {
517 * update skb->csum here. 522 eth = eth_hdr(skb);
518 */ 523 ether_addr_copy(key->eth.src, eth->h_source);
524 ether_addr_copy(key->eth.dst, eth->h_dest);
519 525
520 if (unlikely(parse_vlan(skb, key))) 526 __skb_pull(skb, 2 * ETH_ALEN);
521 return -ENOMEM; 527 /* We are going to push all headers that we pull, so no need to
528 * update skb->csum here.
529 */
522 530
523 key->eth.type = parse_ethertype(skb); 531 if (unlikely(parse_vlan(skb, key)))
524 if (unlikely(key->eth.type == htons(0))) 532 return -ENOMEM;
525 return -ENOMEM; 533
534 skb->protocol = parse_ethertype(skb);
535 if (unlikely(skb->protocol == htons(0)))
536 return -ENOMEM;
526 537
527 skb_reset_network_header(skb); 538 skb_reset_network_header(skb);
539 __skb_push(skb, skb->data - skb_mac_header(skb));
540 }
528 skb_reset_mac_len(skb); 541 skb_reset_mac_len(skb);
529 __skb_push(skb, skb->data - skb_mac_header(skb)); 542 key->eth.type = skb->protocol;
530 543
531 /* Network layer. */ 544 /* Network layer. */
532 if (key->eth.type == htons(ETH_P_IP)) { 545 if (key->eth.type == htons(ETH_P_IP)) {
@@ -721,9 +734,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
721 return key_extract(skb, key); 734 return key_extract(skb, key);
722} 735}
723 736
737static int key_extract_mac_proto(struct sk_buff *skb)
738{
739 switch (skb->dev->type) {
740 case ARPHRD_ETHER:
741 return MAC_PROTO_ETHERNET;
742 case ARPHRD_NONE:
743 if (skb->protocol == htons(ETH_P_TEB))
744 return MAC_PROTO_ETHERNET;
745 return MAC_PROTO_NONE;
746 }
747 WARN_ON_ONCE(1);
748 return -EINVAL;
749}
750
724int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, 751int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
725 struct sk_buff *skb, struct sw_flow_key *key) 752 struct sk_buff *skb, struct sw_flow_key *key)
726{ 753{
754 int res;
755
727 /* Extract metadata from packet. */ 756 /* Extract metadata from packet. */
728 if (tun_info) { 757 if (tun_info) {
729 key->tun_proto = ip_tunnel_info_af(tun_info); 758 key->tun_proto = ip_tunnel_info_af(tun_info);
@@ -751,6 +780,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
751 key->phy.skb_mark = skb->mark; 780 key->phy.skb_mark = skb->mark;
752 ovs_ct_fill_key(skb, key); 781 ovs_ct_fill_key(skb, key);
753 key->ovs_flow_hash = 0; 782 key->ovs_flow_hash = 0;
783 res = key_extract_mac_proto(skb);
784 if (res < 0)
785 return res;
786 key->mac_proto = res;
754 key->recirc_id = 0; 787 key->recirc_id = 0;
755 788
756 return key_extract(skb, key); 789 return key_extract(skb, key);
@@ -767,5 +800,29 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
767 if (err) 800 if (err)
768 return err; 801 return err;
769 802
803 if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
804 /* key_extract assumes that skb->protocol is set-up for
805 * layer 3 packets which is the case for other callers,
806 * in particular packets recieved from the network stack.
807 * Here the correct value can be set from the metadata
808 * extracted above.
809 */
810 skb->protocol = key->eth.type;
811 } else {
812 struct ethhdr *eth;
813
814 skb_reset_mac_header(skb);
815 eth = eth_hdr(skb);
816
817 /* Normally, setting the skb 'protocol' field would be
818 * handled by a call to eth_type_trans(), but it assumes
819 * there's a sending device, which we may not have.
820 */
821 if (eth_proto_is_802_3(eth->h_proto))
822 skb->protocol = eth->h_proto;
823 else
824 skb->protocol = htons(ETH_P_802_2);
825 }
826
770 return key_extract(skb, key); 827 return key_extract(skb, key);
771} 828}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index ae783f5c6695..f61cae7f9030 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,6 +37,12 @@
37 37
38struct sk_buff; 38struct sk_buff;
39 39
40enum sw_flow_mac_proto {
41 MAC_PROTO_NONE = 0,
42 MAC_PROTO_ETHERNET,
43};
44#define SW_FLOW_KEY_INVALID 0x80
45
40/* Store options at the end of the array if they are less than the 46/* Store options at the end of the array if they are less than the
41 * maximum size. This allows us to get the benefits of variable length 47 * maximum size. This allows us to get the benefits of variable length
42 * matching for small options. 48 * matching for small options.
@@ -68,6 +74,7 @@ struct sw_flow_key {
68 u32 skb_mark; /* SKB mark. */ 74 u32 skb_mark; /* SKB mark. */
69 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ 75 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
70 } __packed phy; /* Safe when right after 'tun_key'. */ 76 } __packed phy; /* Safe when right after 'tun_key'. */
77 u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */
71 u8 tun_proto; /* Protocol of encapsulating tunnel. */ 78 u8 tun_proto; /* Protocol of encapsulating tunnel. */
72 u32 ovs_flow_hash; /* Datapath computed hash value. */ 79 u32 ovs_flow_hash; /* Datapath computed hash value. */
73 u32 recirc_id; /* Recirculation ID. */ 80 u32 recirc_id; /* Recirculation ID. */
@@ -206,6 +213,21 @@ struct arp_eth_header {
206 unsigned char ar_tip[4]; /* target IP address */ 213 unsigned char ar_tip[4]; /* target IP address */
207} __packed; 214} __packed;
208 215
216static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
217{
218 return key->mac_proto & ~SW_FLOW_KEY_INVALID;
219}
220
221static inline u16 __ovs_mac_header_len(u8 mac_proto)
222{
223 return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
224}
225
226static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
227{
228 return __ovs_mac_header_len(ovs_key_mac_proto(key));
229}
230
209static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) 231static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
210{ 232{
211 return sfid->ufid_len; 233 return sfid->ufid_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ae25ded82b3b..d19044f2b1f4 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -123,7 +123,7 @@ static void update_range(struct sw_flow_match *match,
123static bool match_validate(const struct sw_flow_match *match, 123static bool match_validate(const struct sw_flow_match *match,
124 u64 key_attrs, u64 mask_attrs, bool log) 124 u64 key_attrs, u64 mask_attrs, bool log)
125{ 125{
126 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 126 u64 key_expected = 0;
127 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 127 u64 mask_allowed = key_attrs; /* At most allow all key attributes */
128 128
129 /* The following mask attributes allowed only if they 129 /* The following mask attributes allowed only if they
@@ -969,10 +969,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
969 return 0; 969 return 0;
970} 970}
971 971
972static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
973 u64 *attrs, const struct nlattr **a,
974 bool is_mask, bool log)
975{
976 __be16 eth_type;
977
978 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
979 if (is_mask) {
980 /* Always exact match EtherType. */
981 eth_type = htons(0xffff);
982 } else if (!eth_proto_is_802_3(eth_type)) {
983 OVS_NLERR(log, "EtherType %x is less than min %x",
984 ntohs(eth_type), ETH_P_802_3_MIN);
985 return -EINVAL;
986 }
987
988 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
989 *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
990 return 0;
991}
992
972static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, 993static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
973 u64 *attrs, const struct nlattr **a, 994 u64 *attrs, const struct nlattr **a,
974 bool is_mask, bool log) 995 bool is_mask, bool log)
975{ 996{
997 u8 mac_proto = MAC_PROTO_ETHERNET;
998
976 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 999 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
977 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 1000 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
978 1001
@@ -1059,6 +1082,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1059 sizeof(*cl), is_mask); 1082 sizeof(*cl), is_mask);
1060 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); 1083 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1061 } 1084 }
1085
1086 /* For layer 3 packets the Ethernet type is provided
1087 * and treated as metadata but no MAC addresses are provided.
1088 */
1089 if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1090 (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1091 mac_proto = MAC_PROTO_NONE;
1092
1093 /* Always exact match mac_proto */
1094 SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1095
1096 if (mac_proto == MAC_PROTO_NONE)
1097 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1098 log);
1099
1062 return 0; 1100 return 0;
1063} 1101}
1064 1102
@@ -1081,33 +1119,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1081 SW_FLOW_KEY_MEMCPY(match, eth.dst, 1119 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1082 eth_key->eth_dst, ETH_ALEN, is_mask); 1120 eth_key->eth_dst, ETH_ALEN, is_mask);
1083 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1121 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1084 }
1085 1122
1086 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 1123 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1087 /* VLAN attribute is always parsed before getting here since it 1124 /* VLAN attribute is always parsed before getting here since it
1088 * may occur multiple times. 1125 * may occur multiple times.
1089 */ 1126 */
1090 OVS_NLERR(log, "VLAN attribute unexpected."); 1127 OVS_NLERR(log, "VLAN attribute unexpected.");
1091 return -EINVAL;
1092 }
1093
1094 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1095 __be16 eth_type;
1096
1097 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1098 if (is_mask) {
1099 /* Always exact match EtherType. */
1100 eth_type = htons(0xffff);
1101 } else if (!eth_proto_is_802_3(eth_type)) {
1102 OVS_NLERR(log, "EtherType %x is less than min %x",
1103 ntohs(eth_type), ETH_P_802_3_MIN);
1104 return -EINVAL; 1128 return -EINVAL;
1105 } 1129 }
1106 1130
1107 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 1131 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1108 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1132 err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1109 } else if (!is_mask) { 1133 log);
1110 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 1134 if (err)
1135 return err;
1136 } else if (!is_mask) {
1137 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1138 }
1139 } else if (!match->key->eth.type) {
1140 OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1141 return -EINVAL;
1111 } 1142 }
1112 1143
1113 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1144 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
@@ -1556,42 +1587,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1556 if (ovs_ct_put_key(output, skb)) 1587 if (ovs_ct_put_key(output, skb))
1557 goto nla_put_failure; 1588 goto nla_put_failure;
1558 1589
1559 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1590 if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
1560 if (!nla) 1591 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1561 goto nla_put_failure; 1592 if (!nla)
1562
1563 eth_key = nla_data(nla);
1564 ether_addr_copy(eth_key->eth_src, output->eth.src);
1565 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1566
1567 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
1568 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
1569 goto nla_put_failure; 1593 goto nla_put_failure;
1570 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1571 if (!swkey->eth.vlan.tci)
1572 goto unencap;
1573 1594
1574 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { 1595 eth_key = nla_data(nla);
1575 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) 1596 ether_addr_copy(eth_key->eth_src, output->eth.src);
1597 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1598
1599 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
1600 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
1576 goto nla_put_failure; 1601 goto nla_put_failure;
1577 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1602 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1578 if (!swkey->eth.cvlan.tci) 1603 if (!swkey->eth.vlan.tci)
1579 goto unencap; 1604 goto unencap;
1605
1606 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
1607 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
1608 goto nla_put_failure;
1609 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1610 if (!swkey->eth.cvlan.tci)
1611 goto unencap;
1612 }
1580 } 1613 }
1581 }
1582 1614
1583 if (swkey->eth.type == htons(ETH_P_802_2)) { 1615 if (swkey->eth.type == htons(ETH_P_802_2)) {
1584 /* 1616 /*
1585 * Ethertype 802.2 is represented in the netlink with omitted 1617 * Ethertype 802.2 is represented in the netlink with omitted
1586 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1618 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1587 * 0xffff in the mask attribute. Ethertype can also 1619 * 0xffff in the mask attribute. Ethertype can also
1588 * be wildcarded. 1620 * be wildcarded.
1589 */ 1621 */
1590 if (is_mask && output->eth.type) 1622 if (is_mask && output->eth.type)
1591 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1623 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1592 output->eth.type)) 1624 output->eth.type))
1593 goto nla_put_failure; 1625 goto nla_put_failure;
1594 goto unencap; 1626 goto unencap;
1627 }
1595 } 1628 }
1596 1629
1597 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1630 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
@@ -2126,8 +2159,8 @@ static bool validate_masked(u8 *data, int len)
2126 2159
2127static int validate_set(const struct nlattr *a, 2160static int validate_set(const struct nlattr *a,
2128 const struct sw_flow_key *flow_key, 2161 const struct sw_flow_key *flow_key,
2129 struct sw_flow_actions **sfa, 2162 struct sw_flow_actions **sfa, bool *skip_copy,
2130 bool *skip_copy, __be16 eth_type, bool masked, bool log) 2163 u8 mac_proto, __be16 eth_type, bool masked, bool log)
2131{ 2164{
2132 const struct nlattr *ovs_key = nla_data(a); 2165 const struct nlattr *ovs_key = nla_data(a);
2133 int key_type = nla_type(ovs_key); 2166 int key_type = nla_type(ovs_key);
@@ -2157,9 +2190,12 @@ static int validate_set(const struct nlattr *a,
2157 case OVS_KEY_ATTR_SKB_MARK: 2190 case OVS_KEY_ATTR_SKB_MARK:
2158 case OVS_KEY_ATTR_CT_MARK: 2191 case OVS_KEY_ATTR_CT_MARK:
2159 case OVS_KEY_ATTR_CT_LABELS: 2192 case OVS_KEY_ATTR_CT_LABELS:
2160 case OVS_KEY_ATTR_ETHERNET:
2161 break; 2193 break;
2162 2194
2195 case OVS_KEY_ATTR_ETHERNET:
2196 if (mac_proto != MAC_PROTO_ETHERNET)
2197 return -EINVAL;
2198
2163 case OVS_KEY_ATTR_TUNNEL: 2199 case OVS_KEY_ATTR_TUNNEL:
2164 if (masked) 2200 if (masked)
2165 return -EINVAL; /* Masked tunnel set not supported. */ 2201 return -EINVAL; /* Masked tunnel set not supported. */
@@ -2324,6 +2360,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2324 int depth, struct sw_flow_actions **sfa, 2360 int depth, struct sw_flow_actions **sfa,
2325 __be16 eth_type, __be16 vlan_tci, bool log) 2361 __be16 eth_type, __be16 vlan_tci, bool log)
2326{ 2362{
2363 u8 mac_proto = ovs_key_mac_proto(key);
2327 const struct nlattr *a; 2364 const struct nlattr *a;
2328 int rem, err; 2365 int rem, err;
2329 2366
@@ -2346,6 +2383,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2346 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), 2383 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2347 [OVS_ACTION_ATTR_CT] = (u32)-1, 2384 [OVS_ACTION_ATTR_CT] = (u32)-1,
2348 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2385 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2386 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2387 [OVS_ACTION_ATTR_POP_ETH] = 0,
2349 }; 2388 };
2350 const struct ovs_action_push_vlan *vlan; 2389 const struct ovs_action_push_vlan *vlan;
2351 int type = nla_type(a); 2390 int type = nla_type(a);
@@ -2394,10 +2433,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2394 } 2433 }
2395 2434
2396 case OVS_ACTION_ATTR_POP_VLAN: 2435 case OVS_ACTION_ATTR_POP_VLAN:
2436 if (mac_proto != MAC_PROTO_ETHERNET)
2437 return -EINVAL;
2397 vlan_tci = htons(0); 2438 vlan_tci = htons(0);
2398 break; 2439 break;
2399 2440
2400 case OVS_ACTION_ATTR_PUSH_VLAN: 2441 case OVS_ACTION_ATTR_PUSH_VLAN:
2442 if (mac_proto != MAC_PROTO_ETHERNET)
2443 return -EINVAL;
2401 vlan = nla_data(a); 2444 vlan = nla_data(a);
2402 if (!eth_type_vlan(vlan->vlan_tpid)) 2445 if (!eth_type_vlan(vlan->vlan_tpid))
2403 return -EINVAL; 2446 return -EINVAL;
@@ -2447,14 +2490,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2447 2490
2448 case OVS_ACTION_ATTR_SET: 2491 case OVS_ACTION_ATTR_SET:
2449 err = validate_set(a, key, sfa, 2492 err = validate_set(a, key, sfa,
2450 &skip_copy, eth_type, false, log); 2493 &skip_copy, mac_proto, eth_type,
2494 false, log);
2451 if (err) 2495 if (err)
2452 return err; 2496 return err;
2453 break; 2497 break;
2454 2498
2455 case OVS_ACTION_ATTR_SET_MASKED: 2499 case OVS_ACTION_ATTR_SET_MASKED:
2456 err = validate_set(a, key, sfa, 2500 err = validate_set(a, key, sfa,
2457 &skip_copy, eth_type, true, log); 2501 &skip_copy, mac_proto, eth_type,
2502 true, log);
2458 if (err) 2503 if (err)
2459 return err; 2504 return err;
2460 break; 2505 break;
@@ -2474,6 +2519,22 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2474 skip_copy = true; 2519 skip_copy = true;
2475 break; 2520 break;
2476 2521
2522 case OVS_ACTION_ATTR_PUSH_ETH:
2523 /* Disallow pushing an Ethernet header if one
2524 * is already present */
2525 if (mac_proto != MAC_PROTO_NONE)
2526 return -EINVAL;
2527 mac_proto = MAC_PROTO_NONE;
2528 break;
2529
2530 case OVS_ACTION_ATTR_POP_ETH:
2531 if (mac_proto != MAC_PROTO_ETHERNET)
2532 return -EINVAL;
2533 if (vlan_tci & htons(VLAN_TAG_PRESENT))
2534 return -EINVAL;
2535 mac_proto = MAC_PROTO_ETHERNET;
2536 break;
2537
2477 default: 2538 default:
2478 OVS_NLERR(log, "Unknown Action type %d", type); 2539 OVS_NLERR(log, "Unknown Action type %d", type);
2479 return -EINVAL; 2540 return -EINVAL;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index e825753de1e0..0389398fa4ab 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -57,8 +57,10 @@ static void netdev_port_receive(struct sk_buff *skb)
57 if (unlikely(!skb)) 57 if (unlikely(!skb))
58 return; 58 return;
59 59
60 skb_push(skb, ETH_HLEN); 60 if (skb->dev->type == ARPHRD_ETHER) {
61 skb_postpush_rcsum(skb, skb->data, ETH_HLEN); 61 skb_push(skb, ETH_HLEN);
62 skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
63 }
62 ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); 64 ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
63 return; 65 return;
64error: 66error:
@@ -97,7 +99,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
97 } 99 }
98 100
99 if (vport->dev->flags & IFF_LOOPBACK || 101 if (vport->dev->flags & IFF_LOOPBACK ||
100 vport->dev->type != ARPHRD_ETHER || 102 (vport->dev->type != ARPHRD_ETHER &&
103 vport->dev->type != ARPHRD_NONE) ||
101 ovs_is_internal_dev(vport->dev)) { 104 ovs_is_internal_dev(vport->dev)) {
102 err = -EINVAL; 105 err = -EINVAL;
103 goto error_put; 106 goto error_put;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 9bb85b35a1fb..b6c8524032a0 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -464,9 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
464 return 0; 464 return 0;
465} 465}
466 466
467static unsigned int packet_length(const struct sk_buff *skb) 467static unsigned int packet_length(const struct sk_buff *skb,
468 struct net_device *dev)
468{ 469{
469 unsigned int length = skb->len - ETH_HLEN; 470 unsigned int length = skb->len - dev->hard_header_len;
470 471
471 if (!skb_vlan_tag_present(skb) && 472 if (!skb_vlan_tag_present(skb) &&
472 eth_type_vlan(skb->protocol)) 473 eth_type_vlan(skb->protocol))
@@ -480,14 +481,34 @@ static unsigned int packet_length(const struct sk_buff *skb)
480 return length; 481 return length;
481} 482}
482 483
483void ovs_vport_send(struct vport *vport, struct sk_buff *skb) 484void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
484{ 485{
485 int mtu = vport->dev->mtu; 486 int mtu = vport->dev->mtu;
486 487
487 if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { 488 switch (vport->dev->type) {
489 case ARPHRD_NONE:
490 if (mac_proto == MAC_PROTO_ETHERNET) {
491 skb_reset_network_header(skb);
492 skb_reset_mac_len(skb);
493 skb->protocol = htons(ETH_P_TEB);
494 } else if (mac_proto != MAC_PROTO_NONE) {
495 WARN_ON_ONCE(1);
496 goto drop;
497 }
498 break;
499 case ARPHRD_ETHER:
500 if (mac_proto != MAC_PROTO_ETHERNET)
501 goto drop;
502 break;
503 default:
504 goto drop;
505 }
506
507 if (unlikely(packet_length(skb, vport->dev) > mtu &&
508 !skb_is_gso(skb))) {
488 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", 509 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
489 vport->dev->name, 510 vport->dev->name,
490 packet_length(skb), mtu); 511 packet_length(skb, vport->dev), mtu);
491 vport->dev->stats.tx_errors++; 512 vport->dev->stats.tx_errors++;
492 goto drop; 513 goto drop;
493 } 514 }
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 46e5b69927c7..cda66c26ad08 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -197,6 +197,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops);
197 }) 197 })
198 198
199void ovs_vport_ops_unregister(struct vport_ops *ops); 199void ovs_vport_ops_unregister(struct vport_ops *ops);
200void ovs_vport_send(struct vport *vport, struct sk_buff *skb); 200void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto);
201 201
202#endif /* vport.h */ 202#endif /* vport.h */