diff options
author | David Stevens <dlstevens@us.ibm.com> | 2013-03-15 00:35:51 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-03-17 12:23:46 -0400 |
commit | 6681712d67eef14c4ce793561c3231659153a320 (patch) | |
tree | 9a4eb4ee55b9b65a7838758e727d266c9a1fbde6 /drivers/net/vxlan.c | |
parent | 7a875903389f3492d4cb06faa1d55a1630e77c11 (diff) |
vxlan: generalize forwarding tables
This patch generalizes VXLAN forwarding table entries allowing an administrator
to:
1) specify multiple destinations for a given MAC
2) specify alternate vni's in the VXLAN header
3) specify alternate destination UDP ports
4) use multicast MAC addresses as fdb lookup keys
5) specify multicast destinations
6) specify the outgoing interface for forwarded packets
The combination allows configuration of more complex topologies using VXLAN
encapsulation.
Changes since v1: rebase to 3.9.0-rc2
Signed-Off-By: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r-- | drivers/net/vxlan.c | 263 |
1 files changed, 206 insertions, 57 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index db0df07c18dc..33427fd62515 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c | |||
@@ -81,13 +81,22 @@ struct vxlan_net { | |||
81 | struct hlist_head vni_list[VNI_HASH_SIZE]; | 81 | struct hlist_head vni_list[VNI_HASH_SIZE]; |
82 | }; | 82 | }; |
83 | 83 | ||
84 | struct vxlan_rdst { | ||
85 | struct rcu_head rcu; | ||
86 | __be32 remote_ip; | ||
87 | __be16 remote_port; | ||
88 | u32 remote_vni; | ||
89 | u32 remote_ifindex; | ||
90 | struct vxlan_rdst *remote_next; | ||
91 | }; | ||
92 | |||
84 | /* Forwarding table entry */ | 93 | /* Forwarding table entry */ |
85 | struct vxlan_fdb { | 94 | struct vxlan_fdb { |
86 | struct hlist_node hlist; /* linked list of entries */ | 95 | struct hlist_node hlist; /* linked list of entries */ |
87 | struct rcu_head rcu; | 96 | struct rcu_head rcu; |
88 | unsigned long updated; /* jiffies */ | 97 | unsigned long updated; /* jiffies */ |
89 | unsigned long used; | 98 | unsigned long used; |
90 | __be32 remote_ip; | 99 | struct vxlan_rdst remote; |
91 | u16 state; /* see ndm_state */ | 100 | u16 state; /* see ndm_state */ |
92 | u8 eth_addr[ETH_ALEN]; | 101 | u8 eth_addr[ETH_ALEN]; |
93 | }; | 102 | }; |
@@ -157,7 +166,8 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) | |||
157 | /* Fill in neighbour message in skbuff. */ | 166 | /* Fill in neighbour message in skbuff. */ |
158 | static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, | 167 | static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, |
159 | const struct vxlan_fdb *fdb, | 168 | const struct vxlan_fdb *fdb, |
160 | u32 portid, u32 seq, int type, unsigned int flags) | 169 | u32 portid, u32 seq, int type, unsigned int flags, |
170 | const struct vxlan_rdst *rdst) | ||
161 | { | 171 | { |
162 | unsigned long now = jiffies; | 172 | unsigned long now = jiffies; |
163 | struct nda_cacheinfo ci; | 173 | struct nda_cacheinfo ci; |
@@ -176,7 +186,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, | |||
176 | 186 | ||
177 | if (type == RTM_GETNEIGH) { | 187 | if (type == RTM_GETNEIGH) { |
178 | ndm->ndm_family = AF_INET; | 188 | ndm->ndm_family = AF_INET; |
179 | send_ip = fdb->remote_ip != 0; | 189 | send_ip = rdst->remote_ip != htonl(INADDR_ANY); |
180 | send_eth = !is_zero_ether_addr(fdb->eth_addr); | 190 | send_eth = !is_zero_ether_addr(fdb->eth_addr); |
181 | } else | 191 | } else |
182 | ndm->ndm_family = AF_BRIDGE; | 192 | ndm->ndm_family = AF_BRIDGE; |
@@ -188,7 +198,17 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, | |||
188 | if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) | 198 | if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) |
189 | goto nla_put_failure; | 199 | goto nla_put_failure; |
190 | 200 | ||
191 | if (send_ip && nla_put_be32(skb, NDA_DST, fdb->remote_ip)) | 201 | if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip)) |
202 | goto nla_put_failure; | ||
203 | |||
204 | if (rdst->remote_port && rdst->remote_port != vxlan_port && | ||
205 | nla_put_be16(skb, NDA_PORT, rdst->remote_port)) | ||
206 | goto nla_put_failure; | ||
207 | if (rdst->remote_vni != vxlan->vni && | ||
208 | nla_put_be32(skb, NDA_VNI, rdst->remote_vni)) | ||
209 | goto nla_put_failure; | ||
210 | if (rdst->remote_ifindex && | ||
211 | nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) | ||
192 | goto nla_put_failure; | 212 | goto nla_put_failure; |
193 | 213 | ||
194 | ci.ndm_used = jiffies_to_clock_t(now - fdb->used); | 214 | ci.ndm_used = jiffies_to_clock_t(now - fdb->used); |
@@ -211,6 +231,9 @@ static inline size_t vxlan_nlmsg_size(void) | |||
211 | return NLMSG_ALIGN(sizeof(struct ndmsg)) | 231 | return NLMSG_ALIGN(sizeof(struct ndmsg)) |
212 | + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ | 232 | + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ |
213 | + nla_total_size(sizeof(__be32)) /* NDA_DST */ | 233 | + nla_total_size(sizeof(__be32)) /* NDA_DST */ |
234 | + nla_total_size(sizeof(__be32)) /* NDA_PORT */ | ||
235 | + nla_total_size(sizeof(__be32)) /* NDA_VNI */ | ||
236 | + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */ | ||
214 | + nla_total_size(sizeof(struct nda_cacheinfo)); | 237 | + nla_total_size(sizeof(struct nda_cacheinfo)); |
215 | } | 238 | } |
216 | 239 | ||
@@ -225,7 +248,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan, | |||
225 | if (skb == NULL) | 248 | if (skb == NULL) |
226 | goto errout; | 249 | goto errout; |
227 | 250 | ||
228 | err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0); | 251 | err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote); |
229 | if (err < 0) { | 252 | if (err < 0) { |
230 | /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ | 253 | /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ |
231 | WARN_ON(err == -EMSGSIZE); | 254 | WARN_ON(err == -EMSGSIZE); |
@@ -247,7 +270,8 @@ static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) | |||
247 | 270 | ||
248 | memset(&f, 0, sizeof f); | 271 | memset(&f, 0, sizeof f); |
249 | f.state = NUD_STALE; | 272 | f.state = NUD_STALE; |
250 | f.remote_ip = ipa; /* goes to NDA_DST */ | 273 | f.remote.remote_ip = ipa; /* goes to NDA_DST */ |
274 | f.remote.remote_vni = VXLAN_N_VID; | ||
251 | 275 | ||
252 | vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); | 276 | vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); |
253 | } | 277 | } |
@@ -300,10 +324,38 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, | |||
300 | return NULL; | 324 | return NULL; |
301 | } | 325 | } |
302 | 326 | ||
327 | /* Add/update destinations for multicast */ | ||
328 | static int vxlan_fdb_append(struct vxlan_fdb *f, | ||
329 | __be32 ip, __u32 port, __u32 vni, __u32 ifindex) | ||
330 | { | ||
331 | struct vxlan_rdst *rd_prev, *rd; | ||
332 | |||
333 | rd_prev = NULL; | ||
334 | for (rd = &f->remote; rd; rd = rd->remote_next) { | ||
335 | if (rd->remote_ip == ip && | ||
336 | rd->remote_port == port && | ||
337 | rd->remote_vni == vni && | ||
338 | rd->remote_ifindex == ifindex) | ||
339 | return 0; | ||
340 | rd_prev = rd; | ||
341 | } | ||
342 | rd = kmalloc(sizeof(*rd), GFP_ATOMIC); | ||
343 | if (rd == NULL) | ||
344 | return -ENOBUFS; | ||
345 | rd->remote_ip = ip; | ||
346 | rd->remote_port = port; | ||
347 | rd->remote_vni = vni; | ||
348 | rd->remote_ifindex = ifindex; | ||
349 | rd->remote_next = NULL; | ||
350 | rd_prev->remote_next = rd; | ||
351 | return 1; | ||
352 | } | ||
353 | |||
303 | /* Add new entry to forwarding table -- assumes lock held */ | 354 | /* Add new entry to forwarding table -- assumes lock held */ |
304 | static int vxlan_fdb_create(struct vxlan_dev *vxlan, | 355 | static int vxlan_fdb_create(struct vxlan_dev *vxlan, |
305 | const u8 *mac, __be32 ip, | 356 | const u8 *mac, __be32 ip, |
306 | __u16 state, __u16 flags) | 357 | __u16 state, __u16 flags, |
358 | __u32 port, __u32 vni, __u32 ifindex) | ||
307 | { | 359 | { |
308 | struct vxlan_fdb *f; | 360 | struct vxlan_fdb *f; |
309 | int notify = 0; | 361 | int notify = 0; |
@@ -320,6 +372,14 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, | |||
320 | f->updated = jiffies; | 372 | f->updated = jiffies; |
321 | notify = 1; | 373 | notify = 1; |
322 | } | 374 | } |
375 | if ((flags & NLM_F_APPEND) && | ||
376 | is_multicast_ether_addr(f->eth_addr)) { | ||
377 | int rc = vxlan_fdb_append(f, ip, port, vni, ifindex); | ||
378 | |||
379 | if (rc < 0) | ||
380 | return rc; | ||
381 | notify |= rc; | ||
382 | } | ||
323 | } else { | 383 | } else { |
324 | if (!(flags & NLM_F_CREATE)) | 384 | if (!(flags & NLM_F_CREATE)) |
325 | return -ENOENT; | 385 | return -ENOENT; |
@@ -333,7 +393,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, | |||
333 | return -ENOMEM; | 393 | return -ENOMEM; |
334 | 394 | ||
335 | notify = 1; | 395 | notify = 1; |
336 | f->remote_ip = ip; | 396 | f->remote.remote_ip = ip; |
397 | f->remote.remote_port = port; | ||
398 | f->remote.remote_vni = vni; | ||
399 | f->remote.remote_ifindex = ifindex; | ||
400 | f->remote.remote_next = NULL; | ||
337 | f->state = state; | 401 | f->state = state; |
338 | f->updated = f->used = jiffies; | 402 | f->updated = f->used = jiffies; |
339 | memcpy(f->eth_addr, mac, ETH_ALEN); | 403 | memcpy(f->eth_addr, mac, ETH_ALEN); |
@@ -349,6 +413,19 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, | |||
349 | return 0; | 413 | return 0; |
350 | } | 414 | } |
351 | 415 | ||
416 | void vxlan_fdb_free(struct rcu_head *head) | ||
417 | { | ||
418 | struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); | ||
419 | |||
420 | while (f->remote.remote_next) { | ||
421 | struct vxlan_rdst *rd = f->remote.remote_next; | ||
422 | |||
423 | f->remote.remote_next = rd->remote_next; | ||
424 | kfree(rd); | ||
425 | } | ||
426 | kfree(f); | ||
427 | } | ||
428 | |||
352 | static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) | 429 | static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) |
353 | { | 430 | { |
354 | netdev_dbg(vxlan->dev, | 431 | netdev_dbg(vxlan->dev, |
@@ -358,7 +435,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) | |||
358 | vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH); | 435 | vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH); |
359 | 436 | ||
360 | hlist_del_rcu(&f->hlist); | 437 | hlist_del_rcu(&f->hlist); |
361 | kfree_rcu(f, rcu); | 438 | call_rcu(&f->rcu, vxlan_fdb_free); |
362 | } | 439 | } |
363 | 440 | ||
364 | /* Add static entry (via netlink) */ | 441 | /* Add static entry (via netlink) */ |
@@ -367,7 +444,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], | |||
367 | const unsigned char *addr, u16 flags) | 444 | const unsigned char *addr, u16 flags) |
368 | { | 445 | { |
369 | struct vxlan_dev *vxlan = netdev_priv(dev); | 446 | struct vxlan_dev *vxlan = netdev_priv(dev); |
447 | struct net *net = dev_net(vxlan->dev); | ||
370 | __be32 ip; | 448 | __be32 ip; |
449 | u32 port, vni, ifindex; | ||
371 | int err; | 450 | int err; |
372 | 451 | ||
373 | if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { | 452 | if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { |
@@ -384,8 +463,36 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], | |||
384 | 463 | ||
385 | ip = nla_get_be32(tb[NDA_DST]); | 464 | ip = nla_get_be32(tb[NDA_DST]); |
386 | 465 | ||
466 | if (tb[NDA_PORT]) { | ||
467 | if (nla_len(tb[NDA_PORT]) != sizeof(u32)) | ||
468 | return -EINVAL; | ||
469 | port = nla_get_u32(tb[NDA_PORT]); | ||
470 | } else | ||
471 | port = vxlan_port; | ||
472 | |||
473 | if (tb[NDA_VNI]) { | ||
474 | if (nla_len(tb[NDA_VNI]) != sizeof(u32)) | ||
475 | return -EINVAL; | ||
476 | vni = nla_get_u32(tb[NDA_VNI]); | ||
477 | } else | ||
478 | vni = vxlan->vni; | ||
479 | |||
480 | if (tb[NDA_IFINDEX]) { | ||
481 | struct net_device *dev; | ||
482 | |||
483 | if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) | ||
484 | return -EINVAL; | ||
485 | ifindex = nla_get_u32(tb[NDA_IFINDEX]); | ||
486 | dev = dev_get_by_index(net, ifindex); | ||
487 | if (!dev) | ||
488 | return -EADDRNOTAVAIL; | ||
489 | dev_put(dev); | ||
490 | } else | ||
491 | ifindex = 0; | ||
492 | |||
387 | spin_lock_bh(&vxlan->hash_lock); | 493 | spin_lock_bh(&vxlan->hash_lock); |
388 | err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags); | 494 | err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, port, |
495 | vni, ifindex); | ||
389 | spin_unlock_bh(&vxlan->hash_lock); | 496 | spin_unlock_bh(&vxlan->hash_lock); |
390 | 497 | ||
391 | return err; | 498 | return err; |
@@ -423,18 +530,21 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, | |||
423 | int err; | 530 | int err; |
424 | 531 | ||
425 | hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { | 532 | hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { |
426 | if (idx < cb->args[0]) | 533 | struct vxlan_rdst *rd; |
427 | goto skip; | 534 | for (rd = &f->remote; rd; rd = rd->remote_next) { |
428 | 535 | if (idx < cb->args[0]) | |
429 | err = vxlan_fdb_info(skb, vxlan, f, | 536 | goto skip; |
430 | NETLINK_CB(cb->skb).portid, | 537 | |
431 | cb->nlh->nlmsg_seq, | 538 | err = vxlan_fdb_info(skb, vxlan, f, |
432 | RTM_NEWNEIGH, | 539 | NETLINK_CB(cb->skb).portid, |
433 | NLM_F_MULTI); | 540 | cb->nlh->nlmsg_seq, |
434 | if (err < 0) | 541 | RTM_NEWNEIGH, |
435 | break; | 542 | NLM_F_MULTI, rd); |
543 | if (err < 0) | ||
544 | break; | ||
436 | skip: | 545 | skip: |
437 | ++idx; | 546 | ++idx; |
547 | } | ||
438 | } | 548 | } |
439 | } | 549 | } |
440 | 550 | ||
@@ -454,22 +564,23 @@ static void vxlan_snoop(struct net_device *dev, | |||
454 | f = vxlan_find_mac(vxlan, src_mac); | 564 | f = vxlan_find_mac(vxlan, src_mac); |
455 | if (likely(f)) { | 565 | if (likely(f)) { |
456 | f->used = jiffies; | 566 | f->used = jiffies; |
457 | if (likely(f->remote_ip == src_ip)) | 567 | if (likely(f->remote.remote_ip == src_ip)) |
458 | return; | 568 | return; |
459 | 569 | ||
460 | if (net_ratelimit()) | 570 | if (net_ratelimit()) |
461 | netdev_info(dev, | 571 | netdev_info(dev, |
462 | "%pM migrated from %pI4 to %pI4\n", | 572 | "%pM migrated from %pI4 to %pI4\n", |
463 | src_mac, &f->remote_ip, &src_ip); | 573 | src_mac, &f->remote.remote_ip, &src_ip); |
464 | 574 | ||
465 | f->remote_ip = src_ip; | 575 | f->remote.remote_ip = src_ip; |
466 | f->updated = jiffies; | 576 | f->updated = jiffies; |
467 | } else { | 577 | } else { |
468 | /* learned new entry */ | 578 | /* learned new entry */ |
469 | spin_lock(&vxlan->hash_lock); | 579 | spin_lock(&vxlan->hash_lock); |
470 | err = vxlan_fdb_create(vxlan, src_mac, src_ip, | 580 | err = vxlan_fdb_create(vxlan, src_mac, src_ip, |
471 | NUD_REACHABLE, | 581 | NUD_REACHABLE, |
472 | NLM_F_EXCL|NLM_F_CREATE); | 582 | NLM_F_EXCL|NLM_F_CREATE, |
583 | vxlan_port, vxlan->vni, 0); | ||
473 | spin_unlock(&vxlan->hash_lock); | 584 | spin_unlock(&vxlan->hash_lock); |
474 | } | 585 | } |
475 | } | 586 | } |
@@ -701,7 +812,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) | |||
701 | } | 812 | } |
702 | 813 | ||
703 | f = vxlan_find_mac(vxlan, n->ha); | 814 | f = vxlan_find_mac(vxlan, n->ha); |
704 | if (f && f->remote_ip == 0) { | 815 | if (f && f->remote.remote_ip == htonl(INADDR_ANY)) { |
705 | /* bridge-local neighbor */ | 816 | /* bridge-local neighbor */ |
706 | neigh_release(n); | 817 | neigh_release(n); |
707 | goto out; | 818 | goto out; |
@@ -834,47 +945,26 @@ static int handle_offloads(struct sk_buff *skb) | |||
834 | return 0; | 945 | return 0; |
835 | } | 946 | } |
836 | 947 | ||
837 | /* Transmit local packets over Vxlan | 948 | static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, |
838 | * | 949 | struct vxlan_rdst *rdst, bool did_rsc) |
839 | * Outer IP header inherits ECN and DF from inner header. | ||
840 | * Outer UDP destination is the VXLAN assigned port. | ||
841 | * source port is based on hash of flow | ||
842 | */ | ||
843 | static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | ||
844 | { | 950 | { |
845 | struct vxlan_dev *vxlan = netdev_priv(dev); | 951 | struct vxlan_dev *vxlan = netdev_priv(dev); |
846 | struct rtable *rt; | 952 | struct rtable *rt; |
847 | const struct iphdr *old_iph; | 953 | const struct iphdr *old_iph; |
848 | struct ethhdr *eth; | ||
849 | struct iphdr *iph; | 954 | struct iphdr *iph; |
850 | struct vxlanhdr *vxh; | 955 | struct vxlanhdr *vxh; |
851 | struct udphdr *uh; | 956 | struct udphdr *uh; |
852 | struct flowi4 fl4; | 957 | struct flowi4 fl4; |
853 | unsigned int pkt_len = skb->len; | 958 | unsigned int pkt_len = skb->len; |
854 | __be32 dst; | 959 | __be32 dst; |
855 | __u16 src_port; | 960 | __u16 src_port, dst_port; |
961 | u32 vni; | ||
856 | __be16 df = 0; | 962 | __be16 df = 0; |
857 | __u8 tos, ttl; | 963 | __u8 tos, ttl; |
858 | bool did_rsc = false; | ||
859 | const struct vxlan_fdb *f; | ||
860 | |||
861 | skb_reset_mac_header(skb); | ||
862 | eth = eth_hdr(skb); | ||
863 | |||
864 | if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) | ||
865 | return arp_reduce(dev, skb); | ||
866 | else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP) | ||
867 | did_rsc = route_shortcircuit(dev, skb); | ||
868 | 964 | ||
869 | f = vxlan_find_mac(vxlan, eth->h_dest); | 965 | dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port; |
870 | if (f == NULL) { | 966 | vni = rdst->remote_vni; |
871 | did_rsc = false; | 967 | dst = rdst->remote_ip; |
872 | dst = vxlan->gaddr; | ||
873 | if (!dst && (vxlan->flags & VXLAN_F_L2MISS) && | ||
874 | !is_multicast_ether_addr(eth->h_dest)) | ||
875 | vxlan_fdb_miss(vxlan, eth->h_dest); | ||
876 | } else | ||
877 | dst = f->remote_ip; | ||
878 | 968 | ||
879 | if (!dst) { | 969 | if (!dst) { |
880 | if (did_rsc) { | 970 | if (did_rsc) { |
@@ -922,7 +1012,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | |||
922 | src_port = vxlan_src_port(vxlan, skb); | 1012 | src_port = vxlan_src_port(vxlan, skb); |
923 | 1013 | ||
924 | memset(&fl4, 0, sizeof(fl4)); | 1014 | memset(&fl4, 0, sizeof(fl4)); |
925 | fl4.flowi4_oif = vxlan->link; | 1015 | fl4.flowi4_oif = rdst->remote_ifindex; |
926 | fl4.flowi4_tos = RT_TOS(tos); | 1016 | fl4.flowi4_tos = RT_TOS(tos); |
927 | fl4.daddr = dst; | 1017 | fl4.daddr = dst; |
928 | fl4.saddr = vxlan->saddr; | 1018 | fl4.saddr = vxlan->saddr; |
@@ -949,13 +1039,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | |||
949 | 1039 | ||
950 | vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); | 1040 | vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); |
951 | vxh->vx_flags = htonl(VXLAN_FLAGS); | 1041 | vxh->vx_flags = htonl(VXLAN_FLAGS); |
952 | vxh->vx_vni = htonl(vxlan->vni << 8); | 1042 | vxh->vx_vni = htonl(vni << 8); |
953 | 1043 | ||
954 | __skb_push(skb, sizeof(*uh)); | 1044 | __skb_push(skb, sizeof(*uh)); |
955 | skb_reset_transport_header(skb); | 1045 | skb_reset_transport_header(skb); |
956 | uh = udp_hdr(skb); | 1046 | uh = udp_hdr(skb); |
957 | 1047 | ||
958 | uh->dest = htons(vxlan_port); | 1048 | uh->dest = htons(dst_port); |
959 | uh->source = htons(src_port); | 1049 | uh->source = htons(src_port); |
960 | 1050 | ||
961 | uh->len = htons(skb->len); | 1051 | uh->len = htons(skb->len); |
@@ -995,6 +1085,64 @@ tx_free: | |||
995 | return NETDEV_TX_OK; | 1085 | return NETDEV_TX_OK; |
996 | } | 1086 | } |
997 | 1087 | ||
1088 | /* Transmit local packets over Vxlan | ||
1089 | * | ||
1090 | * Outer IP header inherits ECN and DF from inner header. | ||
1091 | * Outer UDP destination is the VXLAN assigned port. | ||
1092 | * source port is based on hash of flow | ||
1093 | */ | ||
1094 | static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | ||
1095 | { | ||
1096 | struct vxlan_dev *vxlan = netdev_priv(dev); | ||
1097 | struct ethhdr *eth; | ||
1098 | bool did_rsc = false; | ||
1099 | struct vxlan_rdst group, *rdst0, *rdst; | ||
1100 | struct vxlan_fdb *f; | ||
1101 | int rc1, rc; | ||
1102 | |||
1103 | skb_reset_mac_header(skb); | ||
1104 | eth = eth_hdr(skb); | ||
1105 | |||
1106 | if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) | ||
1107 | return arp_reduce(dev, skb); | ||
1108 | else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP) | ||
1109 | did_rsc = route_shortcircuit(dev, skb); | ||
1110 | |||
1111 | f = vxlan_find_mac(vxlan, eth->h_dest); | ||
1112 | if (f == NULL) { | ||
1113 | did_rsc = false; | ||
1114 | group.remote_port = vxlan_port; | ||
1115 | group.remote_vni = vxlan->vni; | ||
1116 | group.remote_ip = vxlan->gaddr; | ||
1117 | group.remote_ifindex = vxlan->link; | ||
1118 | group.remote_next = 0; | ||
1119 | rdst0 = &group; | ||
1120 | |||
1121 | if (group.remote_ip == htonl(INADDR_ANY) && | ||
1122 | (vxlan->flags & VXLAN_F_L2MISS) && | ||
1123 | !is_multicast_ether_addr(eth->h_dest)) | ||
1124 | vxlan_fdb_miss(vxlan, eth->h_dest); | ||
1125 | } else | ||
1126 | rdst0 = &f->remote; | ||
1127 | |||
1128 | rc = NETDEV_TX_OK; | ||
1129 | |||
1130 | /* if there are multiple destinations, send copies */ | ||
1131 | for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) { | ||
1132 | struct sk_buff *skb1; | ||
1133 | |||
1134 | skb1 = skb_clone(skb, GFP_ATOMIC); | ||
1135 | rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc); | ||
1136 | if (rc == NETDEV_TX_OK) | ||
1137 | rc = rc1; | ||
1138 | } | ||
1139 | |||
1140 | rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc); | ||
1141 | if (rc == NETDEV_TX_OK) | ||
1142 | rc = rc1; | ||
1143 | return rc; | ||
1144 | } | ||
1145 | |||
998 | /* Walk the forwarding table and purge stale entries */ | 1146 | /* Walk the forwarding table and purge stale entries */ |
999 | static void vxlan_cleanup(unsigned long arg) | 1147 | static void vxlan_cleanup(unsigned long arg) |
1000 | { | 1148 | { |
@@ -1558,6 +1706,7 @@ static void __exit vxlan_cleanup_module(void) | |||
1558 | { | 1706 | { |
1559 | rtnl_link_unregister(&vxlan_link_ops); | 1707 | rtnl_link_unregister(&vxlan_link_ops); |
1560 | unregister_pernet_device(&vxlan_net_ops); | 1708 | unregister_pernet_device(&vxlan_net_ops); |
1709 | rcu_barrier(); | ||
1561 | } | 1710 | } |
1562 | module_exit(vxlan_cleanup_module); | 1711 | module_exit(vxlan_cleanup_module); |
1563 | 1712 | ||