aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Stevens <dlstevens@us.ibm.com>2013-04-18 20:36:26 -0400
committerDavid S. Miller <davem@davemloft.net>2013-04-22 16:19:51 -0400
commitae8840825605f36f98f247323edc150e761cb64e (patch)
treef639aa7898c0d6cab0eca99d7b95fa3530a100ba
parent53759be99772f39db5148a7066a768066592a1e7 (diff)
VXLAN: Allow L2 redirection with L3 switching
Allow L2 redirection when VXLAN L3 switching is enabled This patch restricts L3 switching to destination MAC addresses that are marked as routers in order to allow virtual IP appliances that do L2 redirection to function with VXLAN L3 switching enabled. We use L3 switching on VXLAN networks to avoid extra hops when the nominal router for cross-subnet traffic for a VM is remote and the ultimate destination may be local, or closer to the local node. Currently, the destination IP address takes precedence over the MAC address in all cases. Some network appliances receive packets for a virtualized IP address and redirect by changing the destination MAC address (only) to be the final destination for packet processing. VXLAN tunnel endpoints with L3 switching enabled may then overwrite this destination MAC address based on the packet IP address, resulting in potential loops and, at least, breaking L2 redirections that travel through tunnel endpoints. This patch limits L3 switching to the intended case where the original destination MAC address is a next-hop router and relies on the destination MAC address for all other cases, thus allowing L2 redirection and L3 switching to coexist peacefully. Signed-Off-By: David L Stevens <dlstevens@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/vxlan.c30
1 files changed, 23 insertions, 7 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 916a62149a12..a7fd9a089a35 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -98,6 +98,7 @@ struct vxlan_fdb {
98 unsigned long used; 98 unsigned long used;
99 struct vxlan_rdst remote; 99 struct vxlan_rdst remote;
100 u16 state; /* see ndm_state */ 100 u16 state; /* see ndm_state */
101 u8 flags; /* see ndm_flags */
101 u8 eth_addr[ETH_ALEN]; 102 u8 eth_addr[ETH_ALEN];
102}; 103};
103 104
@@ -180,7 +181,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
180 ndm->ndm_family = AF_BRIDGE; 181 ndm->ndm_family = AF_BRIDGE;
181 ndm->ndm_state = fdb->state; 182 ndm->ndm_state = fdb->state;
182 ndm->ndm_ifindex = vxlan->dev->ifindex; 183 ndm->ndm_ifindex = vxlan->dev->ifindex;
183 ndm->ndm_flags = NTF_SELF; 184 ndm->ndm_flags = fdb->flags;
184 ndm->ndm_type = NDA_DST; 185 ndm->ndm_type = NDA_DST;
185 186
186 if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) 187 if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
@@ -343,7 +344,8 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
343static int vxlan_fdb_create(struct vxlan_dev *vxlan, 344static int vxlan_fdb_create(struct vxlan_dev *vxlan,
344 const u8 *mac, __be32 ip, 345 const u8 *mac, __be32 ip,
345 __u16 state, __u16 flags, 346 __u16 state, __u16 flags,
346 __u32 port, __u32 vni, __u32 ifindex) 347 __u32 port, __u32 vni, __u32 ifindex,
348 __u8 ndm_flags)
347{ 349{
348 struct vxlan_fdb *f; 350 struct vxlan_fdb *f;
349 int notify = 0; 351 int notify = 0;
@@ -360,6 +362,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
360 f->updated = jiffies; 362 f->updated = jiffies;
361 notify = 1; 363 notify = 1;
362 } 364 }
365 if (f->flags != ndm_flags) {
366 f->flags = ndm_flags;
367 f->updated = jiffies;
368 notify = 1;
369 }
363 if ((flags & NLM_F_APPEND) && 370 if ((flags & NLM_F_APPEND) &&
364 is_multicast_ether_addr(f->eth_addr)) { 371 is_multicast_ether_addr(f->eth_addr)) {
365 int rc = vxlan_fdb_append(f, ip, port, vni, ifindex); 372 int rc = vxlan_fdb_append(f, ip, port, vni, ifindex);
@@ -387,6 +394,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
387 f->remote.remote_ifindex = ifindex; 394 f->remote.remote_ifindex = ifindex;
388 f->remote.remote_next = NULL; 395 f->remote.remote_next = NULL;
389 f->state = state; 396 f->state = state;
397 f->flags = ndm_flags;
390 f->updated = f->used = jiffies; 398 f->updated = f->used = jiffies;
391 memcpy(f->eth_addr, mac, ETH_ALEN); 399 memcpy(f->eth_addr, mac, ETH_ALEN);
392 400
@@ -480,7 +488,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
480 488
481 spin_lock_bh(&vxlan->hash_lock); 489 spin_lock_bh(&vxlan->hash_lock);
482 err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, port, 490 err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, port,
483 vni, ifindex); 491 vni, ifindex, ndm->ndm_flags);
484 spin_unlock_bh(&vxlan->hash_lock); 492 spin_unlock_bh(&vxlan->hash_lock);
485 493
486 return err; 494 return err;
@@ -568,7 +576,9 @@ static void vxlan_snoop(struct net_device *dev,
568 err = vxlan_fdb_create(vxlan, src_mac, src_ip, 576 err = vxlan_fdb_create(vxlan, src_mac, src_ip,
569 NUD_REACHABLE, 577 NUD_REACHABLE,
570 NLM_F_EXCL|NLM_F_CREATE, 578 NLM_F_EXCL|NLM_F_CREATE,
571 vxlan_port, vxlan->default_dst.remote_vni, 0); 579 vxlan_port,
580 vxlan->default_dst.remote_vni,
581 0, NTF_SELF);
572 spin_unlock(&vxlan->hash_lock); 582 spin_unlock(&vxlan->hash_lock);
573 } 583 }
574} 584}
@@ -1098,12 +1108,18 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
1098 1108
1099 if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) 1109 if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP)
1100 return arp_reduce(dev, skb); 1110 return arp_reduce(dev, skb);
1101 else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP)
1102 did_rsc = route_shortcircuit(dev, skb);
1103 1111
1104 f = vxlan_find_mac(vxlan, eth->h_dest); 1112 f = vxlan_find_mac(vxlan, eth->h_dest);
1113 did_rsc = false;
1114
1115 if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
1116 ntohs(eth->h_proto) == ETH_P_IP) {
1117 did_rsc = route_shortcircuit(dev, skb);
1118 if (did_rsc)
1119 f = vxlan_find_mac(vxlan, eth->h_dest);
1120 }
1121
1105 if (f == NULL) { 1122 if (f == NULL) {
1106 did_rsc = false;
1107 rdst0 = &vxlan->default_dst; 1123 rdst0 = &vxlan->default_dst;
1108 1124
1109 if (rdst0->remote_ip == htonl(INADDR_ANY) && 1125 if (rdst0->remote_ip == htonl(INADDR_ANY) &&