aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/vxlan.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r--drivers/net/vxlan.c440
1 files changed, 331 insertions, 109 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a8c755dcab14..0e57e862c399 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -61,12 +61,6 @@
61#define FDB_AGE_DEFAULT 300 /* 5 min */ 61#define FDB_AGE_DEFAULT 300 /* 5 min */
62#define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */ 62#define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */
63 63
64#define VXLAN_N_VID (1u << 24)
65#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
66#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
67
68#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
69
70/* UDP port for VXLAN traffic. 64/* UDP port for VXLAN traffic.
71 * The IANA assigned port is 4789, but the Linux default is 8472 65 * The IANA assigned port is 4789, but the Linux default is 8472
72 * for compatibility with early adopters. 66 * for compatibility with early adopters.
@@ -269,15 +263,20 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
269 return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); 263 return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
270} 264}
271 265
272/* Find VXLAN socket based on network namespace, address family and UDP port */ 266/* Find VXLAN socket based on network namespace, address family and UDP port
273static struct vxlan_sock *vxlan_find_sock(struct net *net, 267 * and enabled unshareable flags.
274 sa_family_t family, __be16 port) 268 */
269static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
270 __be16 port, u32 flags)
275{ 271{
276 struct vxlan_sock *vs; 272 struct vxlan_sock *vs;
277 273
274 flags &= VXLAN_F_RCV_FLAGS;
275
278 hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { 276 hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
279 if (inet_sk(vs->sock->sk)->inet_sport == port && 277 if (inet_sk(vs->sock->sk)->inet_sport == port &&
280 inet_sk(vs->sock->sk)->sk.sk_family == family) 278 inet_sk(vs->sock->sk)->sk.sk_family == family &&
279 vs->flags == flags)
281 return vs; 280 return vs;
282 } 281 }
283 return NULL; 282 return NULL;
@@ -297,11 +296,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
297 296
298/* Look up VNI in a per net namespace table */ 297/* Look up VNI in a per net namespace table */
299static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, 298static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
300 sa_family_t family, __be16 port) 299 sa_family_t family, __be16 port,
300 u32 flags)
301{ 301{
302 struct vxlan_sock *vs; 302 struct vxlan_sock *vs;
303 303
304 vs = vxlan_find_sock(net, family, port); 304 vs = vxlan_find_sock(net, family, port, flags);
305 if (!vs) 305 if (!vs)
306 return NULL; 306 return NULL;
307 307
@@ -340,6 +340,11 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
340 ndm->ndm_flags = fdb->flags; 340 ndm->ndm_flags = fdb->flags;
341 ndm->ndm_type = RTN_UNICAST; 341 ndm->ndm_type = RTN_UNICAST;
342 342
343 if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
344 nla_put_s32(skb, NDA_LINK_NETNSID,
345 peernet2id(dev_net(vxlan->dev), vxlan->net)))
346 goto nla_put_failure;
347
343 if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) 348 if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
344 goto nla_put_failure; 349 goto nla_put_failure;
345 350
@@ -364,7 +369,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
364 if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 369 if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
365 goto nla_put_failure; 370 goto nla_put_failure;
366 371
367 return nlmsg_end(skb, nlh); 372 nlmsg_end(skb, nlh);
373 return 0;
368 374
369nla_put_failure: 375nla_put_failure:
370 nlmsg_cancel(skb, nlh); 376 nlmsg_cancel(skb, nlh);
@@ -379,6 +385,7 @@ static inline size_t vxlan_nlmsg_size(void)
379 + nla_total_size(sizeof(__be16)) /* NDA_PORT */ 385 + nla_total_size(sizeof(__be16)) /* NDA_PORT */
380 + nla_total_size(sizeof(__be32)) /* NDA_VNI */ 386 + nla_total_size(sizeof(__be32)) /* NDA_VNI */
381 + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */ 387 + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
388 + nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
382 + nla_total_size(sizeof(struct nda_cacheinfo)); 389 + nla_total_size(sizeof(struct nda_cacheinfo));
383} 390}
384 391
@@ -545,15 +552,51 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
545 return 1; 552 return 1;
546} 553}
547 554
548static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff *skb) 555static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
556 unsigned int off,
557 struct vxlanhdr *vh, size_t hdrlen,
558 u32 data)
559{
560 size_t start, offset, plen;
561
562 if (skb->remcsum_offload)
563 return vh;
564
565 if (!NAPI_GRO_CB(skb)->csum_valid)
566 return NULL;
567
568 start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
569 offset = start + ((data & VXLAN_RCO_UDP) ?
570 offsetof(struct udphdr, check) :
571 offsetof(struct tcphdr, check));
572
573 plen = hdrlen + offset + sizeof(u16);
574
575 /* Pull checksum that will be written */
576 if (skb_gro_header_hard(skb, off + plen)) {
577 vh = skb_gro_header_slow(skb, off + plen, off);
578 if (!vh)
579 return NULL;
580 }
581
582 skb_gro_remcsum_process(skb, (void *)vh + hdrlen, start, offset);
583
584 skb->remcsum_offload = 1;
585
586 return vh;
587}
588
589static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
590 struct sk_buff *skb,
591 struct udp_offload *uoff)
549{ 592{
550 struct sk_buff *p, **pp = NULL; 593 struct sk_buff *p, **pp = NULL;
551 struct vxlanhdr *vh, *vh2; 594 struct vxlanhdr *vh, *vh2;
552 struct ethhdr *eh, *eh2; 595 unsigned int hlen, off_vx;
553 unsigned int hlen, off_vx, off_eth;
554 const struct packet_offload *ptype;
555 __be16 type;
556 int flush = 1; 596 int flush = 1;
597 struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
598 udp_offloads);
599 u32 flags;
557 600
558 off_vx = skb_gro_offset(skb); 601 off_vx = skb_gro_offset(skb);
559 hlen = off_vx + sizeof(*vh); 602 hlen = off_vx + sizeof(*vh);
@@ -563,15 +606,17 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
563 if (unlikely(!vh)) 606 if (unlikely(!vh))
564 goto out; 607 goto out;
565 } 608 }
609
566 skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */ 610 skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
567 skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); 611 skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
568 612
569 off_eth = skb_gro_offset(skb); 613 flags = ntohl(vh->vx_flags);
570 hlen = off_eth + sizeof(*eh); 614
571 eh = skb_gro_header_fast(skb, off_eth); 615 if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
572 if (skb_gro_header_hard(skb, hlen)) { 616 vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
573 eh = skb_gro_header_slow(skb, hlen, off_eth); 617 ntohl(vh->vx_vni));
574 if (unlikely(!eh)) 618
619 if (!vh)
575 goto out; 620 goto out;
576 } 621 }
577 622
@@ -582,54 +627,27 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
582 continue; 627 continue;
583 628
584 vh2 = (struct vxlanhdr *)(p->data + off_vx); 629 vh2 = (struct vxlanhdr *)(p->data + off_vx);
585 eh2 = (struct ethhdr *)(p->data + off_eth); 630 if (vh->vx_flags != vh2->vx_flags ||
586 if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) { 631 vh->vx_vni != vh2->vx_vni) {
587 NAPI_GRO_CB(p)->same_flow = 0; 632 NAPI_GRO_CB(p)->same_flow = 0;
588 continue; 633 continue;
589 } 634 }
590 } 635 }
591 636
592 type = eh->h_proto; 637 pp = eth_gro_receive(head, skb);
593
594 rcu_read_lock();
595 ptype = gro_find_receive_by_type(type);
596 if (ptype == NULL) {
597 flush = 1;
598 goto out_unlock;
599 }
600 638
601 skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
602 skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
603 pp = ptype->callbacks.gro_receive(head, skb);
604
605out_unlock:
606 rcu_read_unlock();
607out: 639out:
608 NAPI_GRO_CB(skb)->flush |= flush; 640 NAPI_GRO_CB(skb)->flush |= flush;
609 641
610 return pp; 642 return pp;
611} 643}
612 644
613static int vxlan_gro_complete(struct sk_buff *skb, int nhoff) 645static int vxlan_gro_complete(struct sk_buff *skb, int nhoff,
646 struct udp_offload *uoff)
614{ 647{
615 struct ethhdr *eh;
616 struct packet_offload *ptype;
617 __be16 type;
618 int vxlan_len = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
619 int err = -ENOSYS;
620
621 udp_tunnel_gro_complete(skb, nhoff); 648 udp_tunnel_gro_complete(skb, nhoff);
622 649
623 eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr)); 650 return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
624 type = eh->h_proto;
625
626 rcu_read_lock();
627 ptype = gro_find_complete_by_type(type);
628 if (ptype != NULL)
629 err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
630
631 rcu_read_unlock();
632 return err;
633} 651}
634 652
635/* Notify netdevs that UDP port started listening */ 653/* Notify netdevs that UDP port started listening */
@@ -991,7 +1009,7 @@ static bool vxlan_snoop(struct net_device *dev,
991 if (net_ratelimit()) 1009 if (net_ratelimit())
992 netdev_info(dev, 1010 netdev_info(dev,
993 "%pM migrated from %pIS to %pIS\n", 1011 "%pM migrated from %pIS to %pIS\n",
994 src_mac, &rdst->remote_ip, &src_ip); 1012 src_mac, &rdst->remote_ip.sa, &src_ip->sa);
995 1013
996 rdst->remote_ip = *src_ip; 1014 rdst->remote_ip = *src_ip;
997 f->updated = jiffies; 1015 f->updated = jiffies;
@@ -1131,33 +1149,107 @@ static void vxlan_igmp_leave(struct work_struct *work)
1131 dev_put(vxlan->dev); 1149 dev_put(vxlan->dev);
1132} 1150}
1133 1151
1152static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
1153 size_t hdrlen, u32 data)
1154{
1155 size_t start, offset, plen;
1156
1157 if (skb->remcsum_offload) {
1158 /* Already processed in GRO path */
1159 skb->remcsum_offload = 0;
1160 return vh;
1161 }
1162
1163 start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
1164 offset = start + ((data & VXLAN_RCO_UDP) ?
1165 offsetof(struct udphdr, check) :
1166 offsetof(struct tcphdr, check));
1167
1168 plen = hdrlen + offset + sizeof(u16);
1169
1170 if (!pskb_may_pull(skb, plen))
1171 return NULL;
1172
1173 vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
1174
1175 skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset);
1176
1177 return vh;
1178}
1179
1134/* Callback from net/ipv4/udp.c to receive packets */ 1180/* Callback from net/ipv4/udp.c to receive packets */
1135static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 1181static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
1136{ 1182{
1137 struct vxlan_sock *vs; 1183 struct vxlan_sock *vs;
1138 struct vxlanhdr *vxh; 1184 struct vxlanhdr *vxh;
1185 u32 flags, vni;
1186 struct vxlan_metadata md = {0};
1139 1187
1140 /* Need Vxlan and inner Ethernet header to be present */ 1188 /* Need Vxlan and inner Ethernet header to be present */
1141 if (!pskb_may_pull(skb, VXLAN_HLEN)) 1189 if (!pskb_may_pull(skb, VXLAN_HLEN))
1142 goto error; 1190 goto error;
1143 1191
1144 /* Return packets with reserved bits set */
1145 vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); 1192 vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
1146 if (vxh->vx_flags != htonl(VXLAN_FLAGS) || 1193 flags = ntohl(vxh->vx_flags);
1147 (vxh->vx_vni & htonl(0xff))) { 1194 vni = ntohl(vxh->vx_vni);
1148 netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", 1195
1149 ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); 1196 if (flags & VXLAN_HF_VNI) {
1150 goto error; 1197 flags &= ~VXLAN_HF_VNI;
1198 } else {
1199 /* VNI flag always required to be set */
1200 goto bad_flags;
1151 } 1201 }
1152 1202
1153 if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) 1203 if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
1154 goto drop; 1204 goto drop;
1205 vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
1155 1206
1156 vs = rcu_dereference_sk_user_data(sk); 1207 vs = rcu_dereference_sk_user_data(sk);
1157 if (!vs) 1208 if (!vs)
1158 goto drop; 1209 goto drop;
1159 1210
1160 vs->rcv(vs, skb, vxh->vx_vni); 1211 if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
1212 vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni);
1213 if (!vxh)
1214 goto drop;
1215
1216 flags &= ~VXLAN_HF_RCO;
1217 vni &= VXLAN_VID_MASK;
1218 }
1219
1220 /* For backwards compatibility, only allow reserved fields to be
1221 * used by VXLAN extensions if explicitly requested.
1222 */
1223 if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
1224 struct vxlanhdr_gbp *gbp;
1225
1226 gbp = (struct vxlanhdr_gbp *)vxh;
1227 md.gbp = ntohs(gbp->policy_id);
1228
1229 if (gbp->dont_learn)
1230 md.gbp |= VXLAN_GBP_DONT_LEARN;
1231
1232 if (gbp->policy_applied)
1233 md.gbp |= VXLAN_GBP_POLICY_APPLIED;
1234
1235 flags &= ~VXLAN_GBP_USED_BITS;
1236 }
1237
1238 if (flags || (vni & ~VXLAN_VID_MASK)) {
1239 /* If there are any unprocessed flags remaining treat
1240 * this as a malformed packet. This behavior diverges from
1241 * VXLAN RFC (RFC7348) which stipulates that bits in reserved
1242 * in reserved fields are to be ignored. The approach here
1243 * maintains compatbility with previous stack code, and also
1244 * is more robust and provides a little more security in
1245 * adding extensions to VXLAN.
1246 */
1247
1248 goto bad_flags;
1249 }
1250
1251 md.vni = vxh->vx_vni;
1252 vs->rcv(vs, skb, &md);
1161 return 0; 1253 return 0;
1162 1254
1163drop: 1255drop:
@@ -1165,13 +1257,17 @@ drop:
1165 kfree_skb(skb); 1257 kfree_skb(skb);
1166 return 0; 1258 return 0;
1167 1259
1260bad_flags:
1261 netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
1262 ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
1263
1168error: 1264error:
1169 /* Return non vxlan pkt */ 1265 /* Return non vxlan pkt */
1170 return 1; 1266 return 1;
1171} 1267}
1172 1268
1173static void vxlan_rcv(struct vxlan_sock *vs, 1269static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
1174 struct sk_buff *skb, __be32 vx_vni) 1270 struct vxlan_metadata *md)
1175{ 1271{
1176 struct iphdr *oip = NULL; 1272 struct iphdr *oip = NULL;
1177 struct ipv6hdr *oip6 = NULL; 1273 struct ipv6hdr *oip6 = NULL;
@@ -1182,7 +1278,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
1182 int err = 0; 1278 int err = 0;
1183 union vxlan_addr *remote_ip; 1279 union vxlan_addr *remote_ip;
1184 1280
1185 vni = ntohl(vx_vni) >> 8; 1281 vni = ntohl(md->vni) >> 8;
1186 /* Is this VNI defined? */ 1282 /* Is this VNI defined? */
1187 vxlan = vxlan_vs_find_vni(vs, vni); 1283 vxlan = vxlan_vs_find_vni(vs, vni);
1188 if (!vxlan) 1284 if (!vxlan)
@@ -1216,6 +1312,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
1216 goto drop; 1312 goto drop;
1217 1313
1218 skb_reset_network_header(skb); 1314 skb_reset_network_header(skb);
1315 skb->mark = md->gbp;
1219 1316
1220 if (oip6) 1317 if (oip6)
1221 err = IP6_ECN_decapsulate(oip6, skb); 1318 err = IP6_ECN_decapsulate(oip6, skb);
@@ -1565,20 +1662,54 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
1565 return false; 1662 return false;
1566} 1663}
1567 1664
1665static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
1666 struct vxlan_metadata *md)
1667{
1668 struct vxlanhdr_gbp *gbp;
1669
1670 if (!md->gbp)
1671 return;
1672
1673 gbp = (struct vxlanhdr_gbp *)vxh;
1674 vxh->vx_flags |= htonl(VXLAN_HF_GBP);
1675
1676 if (md->gbp & VXLAN_GBP_DONT_LEARN)
1677 gbp->dont_learn = 1;
1678
1679 if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
1680 gbp->policy_applied = 1;
1681
1682 gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
1683}
1684
1568#if IS_ENABLED(CONFIG_IPV6) 1685#if IS_ENABLED(CONFIG_IPV6)
1569static int vxlan6_xmit_skb(struct vxlan_sock *vs, 1686static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
1570 struct dst_entry *dst, struct sk_buff *skb,
1571 struct net_device *dev, struct in6_addr *saddr, 1687 struct net_device *dev, struct in6_addr *saddr,
1572 struct in6_addr *daddr, __u8 prio, __u8 ttl, 1688 struct in6_addr *daddr, __u8 prio, __u8 ttl,
1573 __be16 src_port, __be16 dst_port, __be32 vni, 1689 __be16 src_port, __be16 dst_port,
1574 bool xnet) 1690 struct vxlan_metadata *md, bool xnet, u32 vxflags)
1575{ 1691{
1576 struct vxlanhdr *vxh; 1692 struct vxlanhdr *vxh;
1577 int min_headroom; 1693 int min_headroom;
1578 int err; 1694 int err;
1579 bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk); 1695 bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
1696 int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
1697 u16 hdrlen = sizeof(struct vxlanhdr);
1698
1699 if ((vxflags & VXLAN_F_REMCSUM_TX) &&
1700 skb->ip_summed == CHECKSUM_PARTIAL) {
1701 int csum_start = skb_checksum_start_offset(skb);
1702
1703 if (csum_start <= VXLAN_MAX_REMCSUM_START &&
1704 !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
1705 (skb->csum_offset == offsetof(struct udphdr, check) ||
1706 skb->csum_offset == offsetof(struct tcphdr, check))) {
1707 udp_sum = false;
1708 type |= SKB_GSO_TUNNEL_REMCSUM;
1709 }
1710 }
1580 1711
1581 skb = udp_tunnel_handle_offloads(skb, udp_sum); 1712 skb = iptunnel_handle_offloads(skb, udp_sum, type);
1582 if (IS_ERR(skb)) { 1713 if (IS_ERR(skb)) {
1583 err = -EINVAL; 1714 err = -EINVAL;
1584 goto err; 1715 goto err;
@@ -1588,7 +1719,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1588 1719
1589 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len 1720 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
1590 + VXLAN_HLEN + sizeof(struct ipv6hdr) 1721 + VXLAN_HLEN + sizeof(struct ipv6hdr)
1591 + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); 1722 + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
1592 1723
1593 /* Need space for new headers (invalidates iph ptr) */ 1724 /* Need space for new headers (invalidates iph ptr) */
1594 err = skb_cow_head(skb, min_headroom); 1725 err = skb_cow_head(skb, min_headroom);
@@ -1604,13 +1735,33 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1604 } 1735 }
1605 1736
1606 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); 1737 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
1607 vxh->vx_flags = htonl(VXLAN_FLAGS); 1738 vxh->vx_flags = htonl(VXLAN_HF_VNI);
1608 vxh->vx_vni = vni; 1739 vxh->vx_vni = md->vni;
1740
1741 if (type & SKB_GSO_TUNNEL_REMCSUM) {
1742 u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
1743 VXLAN_RCO_SHIFT;
1744
1745 if (skb->csum_offset == offsetof(struct udphdr, check))
1746 data |= VXLAN_RCO_UDP;
1747
1748 vxh->vx_vni |= htonl(data);
1749 vxh->vx_flags |= htonl(VXLAN_HF_RCO);
1750
1751 if (!skb_is_gso(skb)) {
1752 skb->ip_summed = CHECKSUM_NONE;
1753 skb->encapsulation = 0;
1754 }
1755 }
1756
1757 if (vxflags & VXLAN_F_GBP)
1758 vxlan_build_gbp_hdr(vxh, vxflags, md);
1609 1759
1610 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 1760 skb_set_inner_protocol(skb, htons(ETH_P_TEB));
1611 1761
1612 udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio, 1762 udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio,
1613 ttl, src_port, dst_port); 1763 ttl, src_port, dst_port,
1764 !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
1614 return 0; 1765 return 0;
1615err: 1766err:
1616 dst_release(dst); 1767 dst_release(dst);
@@ -1618,23 +1769,38 @@ err:
1618} 1769}
1619#endif 1770#endif
1620 1771
1621int vxlan_xmit_skb(struct vxlan_sock *vs, 1772int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
1622 struct rtable *rt, struct sk_buff *skb,
1623 __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, 1773 __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
1624 __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) 1774 __be16 src_port, __be16 dst_port,
1775 struct vxlan_metadata *md, bool xnet, u32 vxflags)
1625{ 1776{
1626 struct vxlanhdr *vxh; 1777 struct vxlanhdr *vxh;
1627 int min_headroom; 1778 int min_headroom;
1628 int err; 1779 int err;
1629 bool udp_sum = !vs->sock->sk->sk_no_check_tx; 1780 bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
1781 int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
1782 u16 hdrlen = sizeof(struct vxlanhdr);
1783
1784 if ((vxflags & VXLAN_F_REMCSUM_TX) &&
1785 skb->ip_summed == CHECKSUM_PARTIAL) {
1786 int csum_start = skb_checksum_start_offset(skb);
1787
1788 if (csum_start <= VXLAN_MAX_REMCSUM_START &&
1789 !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
1790 (skb->csum_offset == offsetof(struct udphdr, check) ||
1791 skb->csum_offset == offsetof(struct tcphdr, check))) {
1792 udp_sum = false;
1793 type |= SKB_GSO_TUNNEL_REMCSUM;
1794 }
1795 }
1630 1796
1631 skb = udp_tunnel_handle_offloads(skb, udp_sum); 1797 skb = iptunnel_handle_offloads(skb, udp_sum, type);
1632 if (IS_ERR(skb)) 1798 if (IS_ERR(skb))
1633 return PTR_ERR(skb); 1799 return PTR_ERR(skb);
1634 1800
1635 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 1801 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
1636 + VXLAN_HLEN + sizeof(struct iphdr) 1802 + VXLAN_HLEN + sizeof(struct iphdr)
1637 + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); 1803 + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
1638 1804
1639 /* Need space for new headers (invalidates iph ptr) */ 1805 /* Need space for new headers (invalidates iph ptr) */
1640 err = skb_cow_head(skb, min_headroom); 1806 err = skb_cow_head(skb, min_headroom);
@@ -1648,13 +1814,33 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
1648 return -ENOMEM; 1814 return -ENOMEM;
1649 1815
1650 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); 1816 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
1651 vxh->vx_flags = htonl(VXLAN_FLAGS); 1817 vxh->vx_flags = htonl(VXLAN_HF_VNI);
1652 vxh->vx_vni = vni; 1818 vxh->vx_vni = md->vni;
1819
1820 if (type & SKB_GSO_TUNNEL_REMCSUM) {
1821 u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
1822 VXLAN_RCO_SHIFT;
1823
1824 if (skb->csum_offset == offsetof(struct udphdr, check))
1825 data |= VXLAN_RCO_UDP;
1826
1827 vxh->vx_vni |= htonl(data);
1828 vxh->vx_flags |= htonl(VXLAN_HF_RCO);
1829
1830 if (!skb_is_gso(skb)) {
1831 skb->ip_summed = CHECKSUM_NONE;
1832 skb->encapsulation = 0;
1833 }
1834 }
1835
1836 if (vxflags & VXLAN_F_GBP)
1837 vxlan_build_gbp_hdr(vxh, vxflags, md);
1653 1838
1654 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 1839 skb_set_inner_protocol(skb, htons(ETH_P_TEB));
1655 1840
1656 return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos, 1841 return udp_tunnel_xmit_skb(rt, skb, src, dst, tos,
1657 ttl, df, src_port, dst_port, xnet); 1842 ttl, df, src_port, dst_port, xnet,
1843 !(vxflags & VXLAN_F_UDP_CSUM));
1658} 1844}
1659EXPORT_SYMBOL_GPL(vxlan_xmit_skb); 1845EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
1660 1846
@@ -1711,6 +1897,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1711 const struct iphdr *old_iph; 1897 const struct iphdr *old_iph;
1712 struct flowi4 fl4; 1898 struct flowi4 fl4;
1713 union vxlan_addr *dst; 1899 union vxlan_addr *dst;
1900 struct vxlan_metadata md;
1714 __be16 src_port = 0, dst_port; 1901 __be16 src_port = 0, dst_port;
1715 u32 vni; 1902 u32 vni;
1716 __be16 df = 0; 1903 __be16 df = 0;
@@ -1772,7 +1959,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1772 1959
1773 ip_rt_put(rt); 1960 ip_rt_put(rt);
1774 dst_vxlan = vxlan_find_vni(vxlan->net, vni, 1961 dst_vxlan = vxlan_find_vni(vxlan->net, vni,
1775 dst->sa.sa_family, dst_port); 1962 dst->sa.sa_family, dst_port,
1963 vxlan->flags);
1776 if (!dst_vxlan) 1964 if (!dst_vxlan)
1777 goto tx_error; 1965 goto tx_error;
1778 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 1966 vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1781,12 +1969,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1781 1969
1782 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 1970 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
1783 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 1971 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
1784 1972 md.vni = htonl(vni << 8);
1785 err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, 1973 md.gbp = skb->mark;
1786 fl4.saddr, dst->sin.sin_addr.s_addr, 1974
1787 tos, ttl, df, src_port, dst_port, 1975 err = vxlan_xmit_skb(rt, skb, fl4.saddr,
1788 htonl(vni << 8), 1976 dst->sin.sin_addr.s_addr, tos, ttl, df,
1789 !net_eq(vxlan->net, dev_net(vxlan->dev))); 1977 src_port, dst_port, &md,
1978 !net_eq(vxlan->net, dev_net(vxlan->dev)),
1979 vxlan->flags);
1790 if (err < 0) { 1980 if (err < 0) {
1791 /* skb is already freed. */ 1981 /* skb is already freed. */
1792 skb = NULL; 1982 skb = NULL;
@@ -1830,7 +2020,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1830 2020
1831 dst_release(ndst); 2021 dst_release(ndst);
1832 dst_vxlan = vxlan_find_vni(vxlan->net, vni, 2022 dst_vxlan = vxlan_find_vni(vxlan->net, vni,
1833 dst->sa.sa_family, dst_port); 2023 dst->sa.sa_family, dst_port,
2024 vxlan->flags);
1834 if (!dst_vxlan) 2025 if (!dst_vxlan)
1835 goto tx_error; 2026 goto tx_error;
1836 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 2027 vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1838,11 +2029,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1838 } 2029 }
1839 2030
1840 ttl = ttl ? : ip6_dst_hoplimit(ndst); 2031 ttl = ttl ? : ip6_dst_hoplimit(ndst);
2032 md.vni = htonl(vni << 8);
2033 md.gbp = skb->mark;
1841 2034
1842 err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, 2035 err = vxlan6_xmit_skb(ndst, skb, dev, &fl6.saddr, &fl6.daddr,
1843 dev, &fl6.saddr, &fl6.daddr, 0, ttl, 2036 0, ttl, src_port, dst_port, &md,
1844 src_port, dst_port, htonl(vni << 8), 2037 !net_eq(vxlan->net, dev_net(vxlan->dev)),
1845 !net_eq(vxlan->net, dev_net(vxlan->dev))); 2038 vxlan->flags);
1846#endif 2039#endif
1847 } 2040 }
1848 2041
@@ -1998,7 +2191,7 @@ static int vxlan_init(struct net_device *dev)
1998 2191
1999 spin_lock(&vn->sock_lock); 2192 spin_lock(&vn->sock_lock);
2000 vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, 2193 vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
2001 vxlan->dst_port); 2194 vxlan->dst_port, vxlan->flags);
2002 if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) { 2195 if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
2003 /* If we have a socket with same port already, reuse it */ 2196 /* If we have a socket with same port already, reuse it */
2004 vxlan_vs_add_dev(vs, vxlan); 2197 vxlan_vs_add_dev(vs, vxlan);
@@ -2242,6 +2435,9 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
2242 [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 }, 2435 [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 },
2243 [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 2436 [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
2244 [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 2437 [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
2438 [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
2439 [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
2440 [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
2245}; 2441};
2246 2442
2247static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) 2443static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2311,15 +2507,11 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
2311 2507
2312 if (ipv6) { 2508 if (ipv6) {
2313 udp_conf.family = AF_INET6; 2509 udp_conf.family = AF_INET6;
2314 udp_conf.use_udp6_tx_checksums =
2315 !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
2316 udp_conf.use_udp6_rx_checksums = 2510 udp_conf.use_udp6_rx_checksums =
2317 !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); 2511 !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
2318 } else { 2512 } else {
2319 udp_conf.family = AF_INET; 2513 udp_conf.family = AF_INET;
2320 udp_conf.local_ip.s_addr = INADDR_ANY; 2514 udp_conf.local_ip.s_addr = INADDR_ANY;
2321 udp_conf.use_udp_checksums =
2322 !!(flags & VXLAN_F_UDP_CSUM);
2323 } 2515 }
2324 2516
2325 udp_conf.local_udp_port = port; 2517 udp_conf.local_udp_port = port;
@@ -2363,6 +2555,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
2363 atomic_set(&vs->refcnt, 1); 2555 atomic_set(&vs->refcnt, 1);
2364 vs->rcv = rcv; 2556 vs->rcv = rcv;
2365 vs->data = data; 2557 vs->data = data;
2558 vs->flags = (flags & VXLAN_F_RCV_FLAGS);
2366 2559
2367 /* Initialize the vxlan udp offloads structure */ 2560 /* Initialize the vxlan udp offloads structure */
2368 vs->udp_offloads.port = port; 2561 vs->udp_offloads.port = port;
@@ -2401,7 +2594,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
2401 return vs; 2594 return vs;
2402 2595
2403 spin_lock(&vn->sock_lock); 2596 spin_lock(&vn->sock_lock);
2404 vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); 2597 vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
2405 if (vs && ((vs->rcv != rcv) || 2598 if (vs && ((vs->rcv != rcv) ||
2406 !atomic_add_unless(&vs->refcnt, 1, 0))) 2599 !atomic_add_unless(&vs->refcnt, 1, 0)))
2407 vs = ERR_PTR(-EBUSY); 2600 vs = ERR_PTR(-EBUSY);
@@ -2557,8 +2750,19 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
2557 nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) 2750 nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
2558 vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; 2751 vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
2559 2752
2753 if (data[IFLA_VXLAN_REMCSUM_TX] &&
2754 nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
2755 vxlan->flags |= VXLAN_F_REMCSUM_TX;
2756
2757 if (data[IFLA_VXLAN_REMCSUM_RX] &&
2758 nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
2759 vxlan->flags |= VXLAN_F_REMCSUM_RX;
2760
2761 if (data[IFLA_VXLAN_GBP])
2762 vxlan->flags |= VXLAN_F_GBP;
2763
2560 if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET, 2764 if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET,
2561 vxlan->dst_port)) { 2765 vxlan->dst_port, vxlan->flags)) {
2562 pr_info("duplicate VNI %u\n", vni); 2766 pr_info("duplicate VNI %u\n", vni);
2563 return -EEXIST; 2767 return -EEXIST;
2564 } 2768 }
@@ -2625,6 +2829,8 @@ static size_t vxlan_get_size(const struct net_device *dev)
2625 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */ 2829 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
2626 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */ 2830 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
2627 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */ 2831 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
2832 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
2833 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
2628 0; 2834 0;
2629} 2835}
2630 2836
@@ -2690,18 +2896,33 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
2690 nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, 2896 nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
2691 !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || 2897 !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
2692 nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 2898 nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
2693 !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX))) 2899 !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
2900 nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
2901 !!(vxlan->flags & VXLAN_F_REMCSUM_TX)) ||
2902 nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
2903 !!(vxlan->flags & VXLAN_F_REMCSUM_RX)))
2694 goto nla_put_failure; 2904 goto nla_put_failure;
2695 2905
2696 if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) 2906 if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
2697 goto nla_put_failure; 2907 goto nla_put_failure;
2698 2908
2909 if (vxlan->flags & VXLAN_F_GBP &&
2910 nla_put_flag(skb, IFLA_VXLAN_GBP))
2911 goto nla_put_failure;
2912
2699 return 0; 2913 return 0;
2700 2914
2701nla_put_failure: 2915nla_put_failure:
2702 return -EMSGSIZE; 2916 return -EMSGSIZE;
2703} 2917}
2704 2918
2919static struct net *vxlan_get_link_net(const struct net_device *dev)
2920{
2921 struct vxlan_dev *vxlan = netdev_priv(dev);
2922
2923 return vxlan->net;
2924}
2925
2705static struct rtnl_link_ops vxlan_link_ops __read_mostly = { 2926static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
2706 .kind = "vxlan", 2927 .kind = "vxlan",
2707 .maxtype = IFLA_VXLAN_MAX, 2928 .maxtype = IFLA_VXLAN_MAX,
@@ -2713,6 +2934,7 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
2713 .dellink = vxlan_dellink, 2934 .dellink = vxlan_dellink,
2714 .get_size = vxlan_get_size, 2935 .get_size = vxlan_get_size,
2715 .fill_info = vxlan_fill_info, 2936 .fill_info = vxlan_fill_info,
2937 .get_link_net = vxlan_get_link_net,
2716}; 2938};
2717 2939
2718static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn, 2940static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,