aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-04-06 16:50:33 -0400
committerDavid S. Miller <davem@davemloft.net>2016-04-06 16:50:33 -0400
commit6f5556356a1ed288fd24f0521a9c606632ad9e1f (patch)
tree1e18cdabf1c9d9ef17e26c6480e629465447f77f
parent8a21ec4e0abb99884ef2da3e4f950025f3bf7fd3 (diff)
parente1e5314de08ba6003b358125eafc9ad9e75a950c (diff)
Merge branch 'vxlan-gpe'
Jiri Benc says: ==================== vxlan: implement Generic Protocol Extension (GPE) v3: just rebased on top of the current net-next, no changes This patchset implements VXLAN-GPE. It follows the same model as the tun/tap driver: depending on the chosen mode, the vxlan interface is created either as ARPHRD_ETHER (non-GPE) or ARPHRD_NONE (GPE). Note that the internal fdb control plane cannot be used together with VXLAN-GPE and attempt to configure it will be rejected by the driver. In fact, COLLECT_METADATA is required to be set for now. This can be relaxed in the future by adding support for static PtP configuration; it will be backward compatible and won't affect existing users. The previous version of the patchset supported two GPE modes, L2 and L3. The L2 mode (now called "ether mode" in the code) was removed from this version. It can be easily added later if there's demand. The L3 mode is now called "raw mode" and supports also encapsulated Ethernet headers (via ETH_P_TEB). The only limitation of not having "ether mode" for GPE is for ip route based encapsulation: with such setup, only IP packets can be encapsulated. Meaning no Ethernet encapsulation. It seems there's not much use for this, though. If it turns out to be useful, we'll add it. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/vxlan.c210
-rw-r--r--include/net/ip_tunnels.h11
-rw-r--r--include/net/vxlan.h68
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--net/ipv4/ip_tunnel_core.c8
5 files changed, 258 insertions, 40 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 1c0fa364323e..51cccddfe403 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1192,6 +1192,45 @@ out:
1192 unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; 1192 unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
1193} 1193}
1194 1194
1195static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
1196 __be32 *protocol,
1197 struct sk_buff *skb, u32 vxflags)
1198{
1199 struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
1200
1201 /* Need to have Next Protocol set for interfaces in GPE mode. */
1202 if (!gpe->np_applied)
1203 return false;
1204 /* "The initial version is 0. If a receiver does not support the
1205 * version indicated it MUST drop the packet.
1206 */
1207 if (gpe->version != 0)
1208 return false;
1209 /* "When the O bit is set to 1, the packet is an OAM packet and OAM
1210 * processing MUST occur." However, we don't implement OAM
1211 * processing, thus drop the packet.
1212 */
1213 if (gpe->oam_flag)
1214 return false;
1215
1216 switch (gpe->next_protocol) {
1217 case VXLAN_GPE_NP_IPV4:
1218 *protocol = htons(ETH_P_IP);
1219 break;
1220 case VXLAN_GPE_NP_IPV6:
1221 *protocol = htons(ETH_P_IPV6);
1222 break;
1223 case VXLAN_GPE_NP_ETHERNET:
1224 *protocol = htons(ETH_P_TEB);
1225 break;
1226 default:
1227 return false;
1228 }
1229
1230 unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
1231 return true;
1232}
1233
1195static bool vxlan_set_mac(struct vxlan_dev *vxlan, 1234static bool vxlan_set_mac(struct vxlan_dev *vxlan,
1196 struct vxlan_sock *vs, 1235 struct vxlan_sock *vs,
1197 struct sk_buff *skb) 1236 struct sk_buff *skb)
@@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1257 struct vxlanhdr unparsed; 1296 struct vxlanhdr unparsed;
1258 struct vxlan_metadata _md; 1297 struct vxlan_metadata _md;
1259 struct vxlan_metadata *md = &_md; 1298 struct vxlan_metadata *md = &_md;
1299 __be32 protocol = htons(ETH_P_TEB);
1300 bool raw_proto = false;
1260 void *oiph; 1301 void *oiph;
1261 1302
1262 /* Need Vxlan and inner Ethernet header to be present */ 1303 /* Need UDP and VXLAN header to be present */
1263 if (!pskb_may_pull(skb, VXLAN_HLEN)) 1304 if (!pskb_may_pull(skb, VXLAN_HLEN))
1264 return 1; 1305 return 1;
1265 1306
@@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1283 if (!vxlan) 1324 if (!vxlan)
1284 goto drop; 1325 goto drop;
1285 1326
1286 if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB), 1327 /* For backwards compatibility, only allow reserved fields to be
1287 !net_eq(vxlan->net, dev_net(vxlan->dev)))) 1328 * used by VXLAN extensions if explicitly requested.
1288 goto drop; 1329 */
1330 if (vs->flags & VXLAN_F_GPE) {
1331 if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
1332 goto drop;
1333 raw_proto = true;
1334 }
1335
1336 if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
1337 !net_eq(vxlan->net, dev_net(vxlan->dev))))
1338 goto drop;
1289 1339
1290 if (vxlan_collect_metadata(vs)) { 1340 if (vxlan_collect_metadata(vs)) {
1291 __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); 1341 __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
@@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1304 memset(md, 0, sizeof(*md)); 1354 memset(md, 0, sizeof(*md));
1305 } 1355 }
1306 1356
1307 /* For backwards compatibility, only allow reserved fields to be
1308 * used by VXLAN extensions if explicitly requested.
1309 */
1310 if (vs->flags & VXLAN_F_REMCSUM_RX) 1357 if (vs->flags & VXLAN_F_REMCSUM_RX)
1311 if (!vxlan_remcsum(&unparsed, skb, vs->flags)) 1358 if (!vxlan_remcsum(&unparsed, skb, vs->flags))
1312 goto drop; 1359 goto drop;
1313 if (vs->flags & VXLAN_F_GBP) 1360 if (vs->flags & VXLAN_F_GBP)
1314 vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md); 1361 vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
1362 /* Note that GBP and GPE can never be active together. This is
1363 * ensured in vxlan_dev_configure.
1364 */
1315 1365
1316 if (unparsed.vx_flags || unparsed.vx_vni) { 1366 if (unparsed.vx_flags || unparsed.vx_vni) {
1317 /* If there are any unprocessed flags remaining treat 1367 /* If there are any unprocessed flags remaining treat
@@ -1325,8 +1375,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1325 goto drop; 1375 goto drop;
1326 } 1376 }
1327 1377
1328 if (!vxlan_set_mac(vxlan, vs, skb)) 1378 if (!raw_proto) {
1329 goto drop; 1379 if (!vxlan_set_mac(vxlan, vs, skb))
1380 goto drop;
1381 } else {
1382 skb->dev = vxlan->dev;
1383 skb->pkt_type = PACKET_HOST;
1384 }
1330 1385
1331 oiph = skb_network_header(skb); 1386 oiph = skb_network_header(skb);
1332 skb_reset_network_header(skb); 1387 skb_reset_network_header(skb);
@@ -1685,6 +1740,27 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
1685 gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); 1740 gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
1686} 1741}
1687 1742
1743static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
1744 __be16 protocol)
1745{
1746 struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
1747
1748 gpe->np_applied = 1;
1749
1750 switch (protocol) {
1751 case htons(ETH_P_IP):
1752 gpe->next_protocol = VXLAN_GPE_NP_IPV4;
1753 return 0;
1754 case htons(ETH_P_IPV6):
1755 gpe->next_protocol = VXLAN_GPE_NP_IPV6;
1756 return 0;
1757 case htons(ETH_P_TEB):
1758 gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
1759 return 0;
1760 }
1761 return -EPFNOSUPPORT;
1762}
1763
1688static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, 1764static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
1689 int iphdr_len, __be32 vni, 1765 int iphdr_len, __be32 vni,
1690 struct vxlan_metadata *md, u32 vxflags, 1766 struct vxlan_metadata *md, u32 vxflags,
@@ -1694,6 +1770,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
1694 int min_headroom; 1770 int min_headroom;
1695 int err; 1771 int err;
1696 int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; 1772 int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
1773 __be16 inner_protocol = htons(ETH_P_TEB);
1697 1774
1698 if ((vxflags & VXLAN_F_REMCSUM_TX) && 1775 if ((vxflags & VXLAN_F_REMCSUM_TX) &&
1699 skb->ip_summed == CHECKSUM_PARTIAL) { 1776 skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1712,10 +1789,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
1712 1789
1713 /* Need space for new headers (invalidates iph ptr) */ 1790 /* Need space for new headers (invalidates iph ptr) */
1714 err = skb_cow_head(skb, min_headroom); 1791 err = skb_cow_head(skb, min_headroom);
1715 if (unlikely(err)) { 1792 if (unlikely(err))
1716 kfree_skb(skb); 1793 goto out_free;
1717 return err;
1718 }
1719 1794
1720 skb = vlan_hwaccel_push_inside(skb); 1795 skb = vlan_hwaccel_push_inside(skb);
1721 if (WARN_ON(!skb)) 1796 if (WARN_ON(!skb))
@@ -1744,9 +1819,19 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
1744 1819
1745 if (vxflags & VXLAN_F_GBP) 1820 if (vxflags & VXLAN_F_GBP)
1746 vxlan_build_gbp_hdr(vxh, vxflags, md); 1821 vxlan_build_gbp_hdr(vxh, vxflags, md);
1822 if (vxflags & VXLAN_F_GPE) {
1823 err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
1824 if (err < 0)
1825 goto out_free;
1826 inner_protocol = skb->protocol;
1827 }
1747 1828
1748 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 1829 skb_set_inner_protocol(skb, inner_protocol);
1749 return 0; 1830 return 0;
1831
1832out_free:
1833 kfree_skb(skb);
1834 return err;
1750} 1835}
1751 1836
1752static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, 1837static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
@@ -2106,9 +2191,17 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
2106 info = skb_tunnel_info(skb); 2191 info = skb_tunnel_info(skb);
2107 2192
2108 skb_reset_mac_header(skb); 2193 skb_reset_mac_header(skb);
2109 eth = eth_hdr(skb);
2110 2194
2111 if ((vxlan->flags & VXLAN_F_PROXY)) { 2195 if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
2196 if (info && info->mode & IP_TUNNEL_INFO_TX)
2197 vxlan_xmit_one(skb, dev, NULL, false);
2198 else
2199 kfree_skb(skb);
2200 return NETDEV_TX_OK;
2201 }
2202
2203 if (vxlan->flags & VXLAN_F_PROXY) {
2204 eth = eth_hdr(skb);
2112 if (ntohs(eth->h_proto) == ETH_P_ARP) 2205 if (ntohs(eth->h_proto) == ETH_P_ARP)
2113 return arp_reduce(dev, skb); 2206 return arp_reduce(dev, skb);
2114#if IS_ENABLED(CONFIG_IPV6) 2207#if IS_ENABLED(CONFIG_IPV6)
@@ -2123,18 +2216,10 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
2123 msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) 2216 msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
2124 return neigh_reduce(dev, skb); 2217 return neigh_reduce(dev, skb);
2125 } 2218 }
2126 eth = eth_hdr(skb);
2127#endif 2219#endif
2128 } 2220 }
2129 2221
2130 if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { 2222 eth = eth_hdr(skb);
2131 if (info && info->mode & IP_TUNNEL_INFO_TX)
2132 vxlan_xmit_one(skb, dev, NULL, false);
2133 else
2134 kfree_skb(skb);
2135 return NETDEV_TX_OK;
2136 }
2137
2138 f = vxlan_find_mac(vxlan, eth->h_dest); 2223 f = vxlan_find_mac(vxlan, eth->h_dest);
2139 did_rsc = false; 2224 did_rsc = false;
2140 2225
@@ -2404,7 +2489,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2404 return 0; 2489 return 0;
2405} 2490}
2406 2491
2407static const struct net_device_ops vxlan_netdev_ops = { 2492static const struct net_device_ops vxlan_netdev_ether_ops = {
2408 .ndo_init = vxlan_init, 2493 .ndo_init = vxlan_init,
2409 .ndo_uninit = vxlan_uninit, 2494 .ndo_uninit = vxlan_uninit,
2410 .ndo_open = vxlan_open, 2495 .ndo_open = vxlan_open,
@@ -2421,6 +2506,17 @@ static const struct net_device_ops vxlan_netdev_ops = {
2421 .ndo_fill_metadata_dst = vxlan_fill_metadata_dst, 2506 .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
2422}; 2507};
2423 2508
2509static const struct net_device_ops vxlan_netdev_raw_ops = {
2510 .ndo_init = vxlan_init,
2511 .ndo_uninit = vxlan_uninit,
2512 .ndo_open = vxlan_open,
2513 .ndo_stop = vxlan_stop,
2514 .ndo_start_xmit = vxlan_xmit,
2515 .ndo_get_stats64 = ip_tunnel_get_stats64,
2516 .ndo_change_mtu = vxlan_change_mtu,
2517 .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
2518};
2519
2424/* Info for udev, that this is a virtual tunnel endpoint */ 2520/* Info for udev, that this is a virtual tunnel endpoint */
2425static struct device_type vxlan_type = { 2521static struct device_type vxlan_type = {
2426 .name = "vxlan", 2522 .name = "vxlan",
@@ -2458,10 +2554,6 @@ static void vxlan_setup(struct net_device *dev)
2458 struct vxlan_dev *vxlan = netdev_priv(dev); 2554 struct vxlan_dev *vxlan = netdev_priv(dev);
2459 unsigned int h; 2555 unsigned int h;
2460 2556
2461 eth_hw_addr_random(dev);
2462 ether_setup(dev);
2463
2464 dev->netdev_ops = &vxlan_netdev_ops;
2465 dev->destructor = free_netdev; 2557 dev->destructor = free_netdev;
2466 SET_NETDEV_DEVTYPE(dev, &vxlan_type); 2558 SET_NETDEV_DEVTYPE(dev, &vxlan_type);
2467 2559
@@ -2476,8 +2568,7 @@ static void vxlan_setup(struct net_device *dev)
2476 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 2568 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
2477 dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; 2569 dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
2478 netif_keep_dst(dev); 2570 netif_keep_dst(dev);
2479 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 2571 dev->priv_flags |= IFF_NO_QUEUE;
2480 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
2481 2572
2482 INIT_LIST_HEAD(&vxlan->next); 2573 INIT_LIST_HEAD(&vxlan->next);
2483 spin_lock_init(&vxlan->hash_lock); 2574 spin_lock_init(&vxlan->hash_lock);
@@ -2496,6 +2587,26 @@ static void vxlan_setup(struct net_device *dev)
2496 INIT_HLIST_HEAD(&vxlan->fdb_head[h]); 2587 INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
2497} 2588}
2498 2589
2590static void vxlan_ether_setup(struct net_device *dev)
2591{
2592 eth_hw_addr_random(dev);
2593 ether_setup(dev);
2594 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
2595 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
2596 dev->netdev_ops = &vxlan_netdev_ether_ops;
2597}
2598
2599static void vxlan_raw_setup(struct net_device *dev)
2600{
2601 dev->type = ARPHRD_NONE;
2602 dev->hard_header_len = 0;
2603 dev->addr_len = 0;
2604 dev->mtu = ETH_DATA_LEN;
2605 dev->tx_queue_len = 1000;
2606 dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
2607 dev->netdev_ops = &vxlan_netdev_raw_ops;
2608}
2609
2499static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { 2610static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
2500 [IFLA_VXLAN_ID] = { .type = NLA_U32 }, 2611 [IFLA_VXLAN_ID] = { .type = NLA_U32 },
2501 [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 2612 [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
@@ -2522,6 +2633,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
2522 [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 }, 2633 [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
2523 [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 }, 2634 [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
2524 [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, }, 2635 [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
2636 [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
2525 [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, 2637 [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
2526}; 2638};
2527 2639
@@ -2722,6 +2834,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
2722 __be16 default_port = vxlan->cfg.dst_port; 2834 __be16 default_port = vxlan->cfg.dst_port;
2723 struct net_device *lowerdev = NULL; 2835 struct net_device *lowerdev = NULL;
2724 2836
2837 if (conf->flags & VXLAN_F_GPE) {
2838 if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
2839 return -EINVAL;
2840 /* For now, allow GPE only together with COLLECT_METADATA.
2841 * This can be relaxed later; in such case, the other side
2842 * of the PtP link will have to be provided.
2843 */
2844 if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
2845 return -EINVAL;
2846
2847 vxlan_raw_setup(dev);
2848 } else {
2849 vxlan_ether_setup(dev);
2850 }
2851
2725 vxlan->net = src_net; 2852 vxlan->net = src_net;
2726 2853
2727 dst->remote_vni = conf->vni; 2854 dst->remote_vni = conf->vni;
@@ -2783,8 +2910,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
2783 dev->needed_headroom = needed_headroom; 2910 dev->needed_headroom = needed_headroom;
2784 2911
2785 memcpy(&vxlan->cfg, conf, sizeof(*conf)); 2912 memcpy(&vxlan->cfg, conf, sizeof(*conf));
2786 if (!vxlan->cfg.dst_port) 2913 if (!vxlan->cfg.dst_port) {
2787 vxlan->cfg.dst_port = default_port; 2914 if (conf->flags & VXLAN_F_GPE)
2915 vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */
2916 else
2917 vxlan->cfg.dst_port = default_port;
2918 }
2788 vxlan->flags |= conf->flags; 2919 vxlan->flags |= conf->flags;
2789 2920
2790 if (!vxlan->cfg.age_interval) 2921 if (!vxlan->cfg.age_interval)
@@ -2955,6 +3086,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
2955 if (data[IFLA_VXLAN_GBP]) 3086 if (data[IFLA_VXLAN_GBP])
2956 conf.flags |= VXLAN_F_GBP; 3087 conf.flags |= VXLAN_F_GBP;
2957 3088
3089 if (data[IFLA_VXLAN_GPE])
3090 conf.flags |= VXLAN_F_GPE;
3091
2958 if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) 3092 if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
2959 conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL; 3093 conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
2960 3094
@@ -2971,6 +3105,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
2971 case -EEXIST: 3105 case -EEXIST:
2972 pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni)); 3106 pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
2973 break; 3107 break;
3108
3109 case -EINVAL:
3110 pr_info("unsupported combination of extensions\n");
3111 break;
2974 } 3112 }
2975 3113
2976 return err; 3114 return err;
@@ -3098,6 +3236,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
3098 nla_put_flag(skb, IFLA_VXLAN_GBP)) 3236 nla_put_flag(skb, IFLA_VXLAN_GBP))
3099 goto nla_put_failure; 3237 goto nla_put_failure;
3100 3238
3239 if (vxlan->flags & VXLAN_F_GPE &&
3240 nla_put_flag(skb, IFLA_VXLAN_GPE))
3241 goto nla_put_failure;
3242
3101 if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL && 3243 if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
3102 nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL)) 3244 nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
3103 goto nla_put_failure; 3245 goto nla_put_failure;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 56050f913339..16435d8b1f93 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -295,8 +295,15 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
295 return INET_ECN_encapsulate(tos, inner); 295 return INET_ECN_encapsulate(tos, inner);
296} 296}
297 297
298int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, 298int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
299 bool xnet); 299 __be16 inner_proto, bool raw_proto, bool xnet);
300
301static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
302 __be16 inner_proto, bool xnet)
303{
304 return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet);
305}
306
300void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, 307void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
301 __be32 src, __be32 dst, u8 proto, 308 __be32 src, __be32 dst, u8 proto,
302 u8 tos, u8 ttl, __be16 df, bool xnet); 309 u8 tos, u8 ttl, __be16 df, bool xnet);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 73ed2e951c02..dcc6f4057115 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -119,6 +119,64 @@ struct vxlanhdr_gbp {
119#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) 119#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
120#define VXLAN_GBP_ID_MASK (0xFFFF) 120#define VXLAN_GBP_ID_MASK (0xFFFF)
121 121
122/*
123 * VXLAN Generic Protocol Extension (VXLAN_F_GPE):
124 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
125 * |R|R|Ver|I|P|R|O| Reserved |Next Protocol |
126 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
127 * | VXLAN Network Identifier (VNI) | Reserved |
128 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
129 *
130 * Ver = Version. Indicates VXLAN GPE protocol version.
131 *
132 * P = Next Protocol Bit. The P bit is set to indicate that the
133 * Next Protocol field is present.
134 *
135 * O = OAM Flag Bit. The O bit is set to indicate that the packet
136 * is an OAM packet.
137 *
138 * Next Protocol = This 8 bit field indicates the protocol header
139 * immediately following the VXLAN GPE header.
140 *
141 * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
142 */
143
144struct vxlanhdr_gpe {
145#if defined(__LITTLE_ENDIAN_BITFIELD)
146 u8 oam_flag:1,
147 reserved_flags1:1,
148 np_applied:1,
149 instance_applied:1,
150 version:2,
151reserved_flags2:2;
152#elif defined(__BIG_ENDIAN_BITFIELD)
153 u8 reserved_flags2:2,
154 version:2,
155 instance_applied:1,
156 np_applied:1,
157 reserved_flags1:1,
158 oam_flag:1;
159#endif
160 u8 reserved_flags3;
161 u8 reserved_flags4;
162 u8 next_protocol;
163 __be32 vx_vni;
164};
165
166/* VXLAN-GPE header flags. */
167#define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28))
168#define VXLAN_HF_NP cpu_to_be32(BIT(26))
169#define VXLAN_HF_OAM cpu_to_be32(BIT(24))
170
171#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
172 cpu_to_be32(0xff))
173
174/* VXLAN-GPE header Next Protocol. */
175#define VXLAN_GPE_NP_IPV4 0x01
176#define VXLAN_GPE_NP_IPV6 0x02
177#define VXLAN_GPE_NP_ETHERNET 0x03
178#define VXLAN_GPE_NP_NSH 0x04
179
122struct vxlan_metadata { 180struct vxlan_metadata {
123 u32 gbp; 181 u32 gbp;
124}; 182};
@@ -206,16 +264,26 @@ struct vxlan_dev {
206#define VXLAN_F_GBP 0x800 264#define VXLAN_F_GBP 0x800
207#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 265#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000
208#define VXLAN_F_COLLECT_METADATA 0x2000 266#define VXLAN_F_COLLECT_METADATA 0x2000
267#define VXLAN_F_GPE 0x4000
209 268
210/* Flags that are used in the receive path. These flags must match in 269/* Flags that are used in the receive path. These flags must match in
211 * order for a socket to be shareable 270 * order for a socket to be shareable
212 */ 271 */
213#define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ 272#define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \
273 VXLAN_F_GPE | \
214 VXLAN_F_UDP_ZERO_CSUM6_RX | \ 274 VXLAN_F_UDP_ZERO_CSUM6_RX | \
215 VXLAN_F_REMCSUM_RX | \ 275 VXLAN_F_REMCSUM_RX | \
216 VXLAN_F_REMCSUM_NOPARTIAL | \ 276 VXLAN_F_REMCSUM_NOPARTIAL | \
217 VXLAN_F_COLLECT_METADATA) 277 VXLAN_F_COLLECT_METADATA)
218 278
279/* Flags that can be set together with VXLAN_F_GPE. */
280#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \
281 VXLAN_F_IPV6 | \
282 VXLAN_F_UDP_ZERO_CSUM_TX | \
283 VXLAN_F_UDP_ZERO_CSUM6_TX | \
284 VXLAN_F_UDP_ZERO_CSUM6_RX | \
285 VXLAN_F_COLLECT_METADATA)
286
219struct net_device *vxlan_dev_create(struct net *net, const char *name, 287struct net_device *vxlan_dev_create(struct net *net, const char *name,
220 u8 name_assign_type, struct vxlan_config *conf); 288 u8 name_assign_type, struct vxlan_config *conf);
221 289
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c488066fb53a..9427f17d06d6 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -488,6 +488,7 @@ enum {
488 IFLA_VXLAN_REMCSUM_NOPARTIAL, 488 IFLA_VXLAN_REMCSUM_NOPARTIAL,
489 IFLA_VXLAN_COLLECT_METADATA, 489 IFLA_VXLAN_COLLECT_METADATA,
490 IFLA_VXLAN_LABEL, 490 IFLA_VXLAN_LABEL,
491 IFLA_VXLAN_GPE,
491 __IFLA_VXLAN_MAX 492 __IFLA_VXLAN_MAX
492}; 493};
493#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) 494#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index b3ab1205dfdf..43445df61efd 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -86,15 +86,15 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
86} 86}
87EXPORT_SYMBOL_GPL(iptunnel_xmit); 87EXPORT_SYMBOL_GPL(iptunnel_xmit);
88 88
89int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, 89int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
90 bool xnet) 90 __be16 inner_proto, bool raw_proto, bool xnet)
91{ 91{
92 if (unlikely(!pskb_may_pull(skb, hdr_len))) 92 if (unlikely(!pskb_may_pull(skb, hdr_len)))
93 return -ENOMEM; 93 return -ENOMEM;
94 94
95 skb_pull_rcsum(skb, hdr_len); 95 skb_pull_rcsum(skb, hdr_len);
96 96
97 if (inner_proto == htons(ETH_P_TEB)) { 97 if (!raw_proto && inner_proto == htons(ETH_P_TEB)) {
98 struct ethhdr *eh; 98 struct ethhdr *eh;
99 99
100 if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) 100 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
@@ -117,7 +117,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
117 117
118 return iptunnel_pull_offloads(skb); 118 return iptunnel_pull_offloads(skb);
119} 119}
120EXPORT_SYMBOL_GPL(iptunnel_pull_header); 120EXPORT_SYMBOL_GPL(__iptunnel_pull_header);
121 121
122struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, 122struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
123 gfp_t flags) 123 gfp_t flags)