aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorNicolas Dichtel <nicolas.dichtel@6wind.com>2014-04-24 04:02:49 -0400
committerDavid S. Miller <davem@davemloft.net>2014-04-24 16:18:26 -0400
commitf01ec1c017dead42092997a2b8684fcab4cbf126 (patch)
tree8fefd68d91d348f71b472b992021fefbb20add99 /drivers/net
parent6afc0d7a85770abd282ebed41f0f35cde390d861 (diff)
vxlan: add x-netns support
This patch allows to switch the netns when packet is encapsulated or decapsulated. The vxlan socket is openned into the i/o netns, ie into the netns where encapsulated packets are received. The socket lookup is done into this netns to find the corresponding vxlan tunnel. After decapsulation, the packet is injecting into the corresponding interface which may stand to another netns. When one of the two netns is removed, the tunnel is destroyed. Configuration example: ip netns add netns1 ip netns exec netns1 ip link set lo up ip link add vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 ip link set vxlan10 netns netns1 ip netns exec netns1 ip addr add 192.168.0.249/24 broadcast 192.168.0.255 dev vxlan10 ip netns exec netns1 ip link set vxlan10 up Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/vxlan.c63
1 files changed, 47 insertions, 16 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 4dbb2ed85b97..1dfee9a7fbf7 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -127,6 +127,7 @@ struct vxlan_dev {
127 struct list_head next; /* vxlan's per namespace list */ 127 struct list_head next; /* vxlan's per namespace list */
128 struct vxlan_sock *vn_sock; /* listening socket */ 128 struct vxlan_sock *vn_sock; /* listening socket */
129 struct net_device *dev; 129 struct net_device *dev;
130 struct net *net; /* netns for packet i/o */
130 struct vxlan_rdst default_dst; /* default destination */ 131 struct vxlan_rdst default_dst; /* default destination */
131 union vxlan_addr saddr; /* source address */ 132 union vxlan_addr saddr; /* source address */
132 __be16 dst_port; 133 __be16 dst_port;
@@ -1203,6 +1204,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
1203 1204
1204 remote_ip = &vxlan->default_dst.remote_ip; 1205 remote_ip = &vxlan->default_dst.remote_ip;
1205 skb_reset_mac_header(skb); 1206 skb_reset_mac_header(skb);
1207 skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
1206 skb->protocol = eth_type_trans(skb, vxlan->dev); 1208 skb->protocol = eth_type_trans(skb, vxlan->dev);
1207 1209
1208 /* Ignore packet loops (and multicast echo) */ 1210 /* Ignore packet loops (and multicast echo) */
@@ -1618,7 +1620,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1618 struct dst_entry *dst, struct sk_buff *skb, 1620 struct dst_entry *dst, struct sk_buff *skb,
1619 struct net_device *dev, struct in6_addr *saddr, 1621 struct net_device *dev, struct in6_addr *saddr,
1620 struct in6_addr *daddr, __u8 prio, __u8 ttl, 1622 struct in6_addr *daddr, __u8 prio, __u8 ttl,
1621 __be16 src_port, __be16 dst_port, __be32 vni) 1623 __be16 src_port, __be16 dst_port, __be32 vni,
1624 bool xnet)
1622{ 1625{
1623 struct ipv6hdr *ip6h; 1626 struct ipv6hdr *ip6h;
1624 struct vxlanhdr *vxh; 1627 struct vxlanhdr *vxh;
@@ -1631,7 +1634,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1631 skb->encapsulation = 1; 1634 skb->encapsulation = 1;
1632 } 1635 }
1633 1636
1634 skb_scrub_packet(skb, false); 1637 skb_scrub_packet(skb, xnet);
1635 1638
1636 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len 1639 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
1637 + VXLAN_HLEN + sizeof(struct ipv6hdr) 1640 + VXLAN_HLEN + sizeof(struct ipv6hdr)
@@ -1711,7 +1714,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1711int vxlan_xmit_skb(struct vxlan_sock *vs, 1714int vxlan_xmit_skb(struct vxlan_sock *vs,
1712 struct rtable *rt, struct sk_buff *skb, 1715 struct rtable *rt, struct sk_buff *skb,
1713 __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, 1716 __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
1714 __be16 src_port, __be16 dst_port, __be32 vni) 1717 __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
1715{ 1718{
1716 struct vxlanhdr *vxh; 1719 struct vxlanhdr *vxh;
1717 struct udphdr *uh; 1720 struct udphdr *uh;
@@ -1760,7 +1763,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
1760 return err; 1763 return err;
1761 1764
1762 return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, 1765 return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
1763 tos, ttl, df, false); 1766 tos, ttl, df, xnet);
1764} 1767}
1765EXPORT_SYMBOL_GPL(vxlan_xmit_skb); 1768EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
1766 1769
@@ -1853,7 +1856,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1853 fl4.daddr = dst->sin.sin_addr.s_addr; 1856 fl4.daddr = dst->sin.sin_addr.s_addr;
1854 fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr; 1857 fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr;
1855 1858
1856 rt = ip_route_output_key(dev_net(dev), &fl4); 1859 rt = ip_route_output_key(vxlan->net, &fl4);
1857 if (IS_ERR(rt)) { 1860 if (IS_ERR(rt)) {
1858 netdev_dbg(dev, "no route to %pI4\n", 1861 netdev_dbg(dev, "no route to %pI4\n",
1859 &dst->sin.sin_addr.s_addr); 1862 &dst->sin.sin_addr.s_addr);
@@ -1874,7 +1877,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1874 struct vxlan_dev *dst_vxlan; 1877 struct vxlan_dev *dst_vxlan;
1875 1878
1876 ip_rt_put(rt); 1879 ip_rt_put(rt);
1877 dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); 1880 dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
1878 if (!dst_vxlan) 1881 if (!dst_vxlan)
1879 goto tx_error; 1882 goto tx_error;
1880 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 1883 vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1887,7 +1890,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1887 err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, 1890 err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
1888 fl4.saddr, dst->sin.sin_addr.s_addr, 1891 fl4.saddr, dst->sin.sin_addr.s_addr,
1889 tos, ttl, df, src_port, dst_port, 1892 tos, ttl, df, src_port, dst_port,
1890 htonl(vni << 8)); 1893 htonl(vni << 8),
1894 !net_eq(vxlan->net, dev_net(vxlan->dev)));
1891 1895
1892 if (err < 0) 1896 if (err < 0)
1893 goto rt_tx_error; 1897 goto rt_tx_error;
@@ -1927,7 +1931,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1927 struct vxlan_dev *dst_vxlan; 1931 struct vxlan_dev *dst_vxlan;
1928 1932
1929 dst_release(ndst); 1933 dst_release(ndst);
1930 dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); 1934 dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
1931 if (!dst_vxlan) 1935 if (!dst_vxlan)
1932 goto tx_error; 1936 goto tx_error;
1933 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 1937 vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1938,7 +1942,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1938 1942
1939 err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, 1943 err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
1940 dev, &fl6.saddr, &fl6.daddr, 0, ttl, 1944 dev, &fl6.saddr, &fl6.daddr, 0, ttl,
1941 src_port, dst_port, htonl(vni << 8)); 1945 src_port, dst_port, htonl(vni << 8),
1946 !net_eq(vxlan->net, dev_net(vxlan->dev)));
1942#endif 1947#endif
1943 } 1948 }
1944 1949
@@ -2082,7 +2087,7 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
2082static int vxlan_init(struct net_device *dev) 2087static int vxlan_init(struct net_device *dev)
2083{ 2088{
2084 struct vxlan_dev *vxlan = netdev_priv(dev); 2089 struct vxlan_dev *vxlan = netdev_priv(dev);
2085 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 2090 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2086 struct vxlan_sock *vs; 2091 struct vxlan_sock *vs;
2087 2092
2088 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 2093 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
@@ -2090,7 +2095,7 @@ static int vxlan_init(struct net_device *dev)
2090 return -ENOMEM; 2095 return -ENOMEM;
2091 2096
2092 spin_lock(&vn->sock_lock); 2097 spin_lock(&vn->sock_lock);
2093 vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); 2098 vs = vxlan_find_sock(vxlan->net, vxlan->dst_port);
2094 if (vs) { 2099 if (vs) {
2095 /* If we have a socket with same port already, reuse it */ 2100 /* If we have a socket with same port already, reuse it */
2096 atomic_inc(&vs->refcnt); 2101 atomic_inc(&vs->refcnt);
@@ -2172,8 +2177,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
2172/* Cleanup timer and forwarding table on shutdown */ 2177/* Cleanup timer and forwarding table on shutdown */
2173static int vxlan_stop(struct net_device *dev) 2178static int vxlan_stop(struct net_device *dev)
2174{ 2179{
2175 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
2176 struct vxlan_dev *vxlan = netdev_priv(dev); 2180 struct vxlan_dev *vxlan = netdev_priv(dev);
2181 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2177 struct vxlan_sock *vs = vxlan->vn_sock; 2182 struct vxlan_sock *vs = vxlan->vn_sock;
2178 2183
2179 if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && 2184 if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
@@ -2202,7 +2207,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
2202 struct net_device *lowerdev; 2207 struct net_device *lowerdev;
2203 int max_mtu; 2208 int max_mtu;
2204 2209
2205 lowerdev = __dev_get_by_index(dev_net(dev), dst->remote_ifindex); 2210 lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
2206 if (lowerdev == NULL) 2211 if (lowerdev == NULL)
2207 return eth_change_mtu(dev, new_mtu); 2212 return eth_change_mtu(dev, new_mtu);
2208 2213
@@ -2285,7 +2290,6 @@ static void vxlan_setup(struct net_device *dev)
2285 2290
2286 dev->tx_queue_len = 0; 2291 dev->tx_queue_len = 0;
2287 dev->features |= NETIF_F_LLTX; 2292 dev->features |= NETIF_F_LLTX;
2288 dev->features |= NETIF_F_NETNS_LOCAL;
2289 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; 2293 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
2290 dev->features |= NETIF_F_RXCSUM; 2294 dev->features |= NETIF_F_RXCSUM;
2291 dev->features |= NETIF_F_GSO_SOFTWARE; 2295 dev->features |= NETIF_F_GSO_SOFTWARE;
@@ -2578,7 +2582,7 @@ EXPORT_SYMBOL_GPL(vxlan_sock_add);
2578static void vxlan_sock_work(struct work_struct *work) 2582static void vxlan_sock_work(struct work_struct *work)
2579{ 2583{
2580 struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work); 2584 struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
2581 struct net *net = dev_net(vxlan->dev); 2585 struct net *net = vxlan->net;
2582 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 2586 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
2583 __be16 port = vxlan->dst_port; 2587 __be16 port = vxlan->dst_port;
2584 struct vxlan_sock *nvs; 2588 struct vxlan_sock *nvs;
@@ -2605,6 +2609,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
2605 if (!data[IFLA_VXLAN_ID]) 2609 if (!data[IFLA_VXLAN_ID])
2606 return -EINVAL; 2610 return -EINVAL;
2607 2611
2612 vxlan->net = dev_net(dev);
2613
2608 vni = nla_get_u32(data[IFLA_VXLAN_ID]); 2614 vni = nla_get_u32(data[IFLA_VXLAN_ID]);
2609 dst->remote_vni = vni; 2615 dst->remote_vni = vni;
2610 2616
@@ -2739,8 +2745,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
2739 2745
2740static void vxlan_dellink(struct net_device *dev, struct list_head *head) 2746static void vxlan_dellink(struct net_device *dev, struct list_head *head)
2741{ 2747{
2742 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
2743 struct vxlan_dev *vxlan = netdev_priv(dev); 2748 struct vxlan_dev *vxlan = netdev_priv(dev);
2749 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2744 2750
2745 spin_lock(&vn->sock_lock); 2751 spin_lock(&vn->sock_lock);
2746 if (!hlist_unhashed(&vxlan->hlist)) 2752 if (!hlist_unhashed(&vxlan->hlist))
@@ -2905,8 +2911,33 @@ static __net_init int vxlan_init_net(struct net *net)
2905 return 0; 2911 return 0;
2906} 2912}
2907 2913
2914static void __net_exit vxlan_exit_net(struct net *net)
2915{
2916 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
2917 struct vxlan_dev *vxlan, *next;
2918 struct net_device *dev, *aux;
2919 LIST_HEAD(list);
2920
2921 rtnl_lock();
2922 for_each_netdev_safe(net, dev, aux)
2923 if (dev->rtnl_link_ops == &vxlan_link_ops)
2924 unregister_netdevice_queue(dev, &list);
2925
2926 list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
2927 /* If vxlan->dev is in the same netns, it has already been added
2928 * to the list by the previous loop.
2929 */
2930 if (!net_eq(dev_net(vxlan->dev), net))
2931 unregister_netdevice_queue(dev, &list);
2932 }
2933
2934 unregister_netdevice_many(&list);
2935 rtnl_unlock();
2936}
2937
2908static struct pernet_operations vxlan_net_ops = { 2938static struct pernet_operations vxlan_net_ops = {
2909 .init = vxlan_init_net, 2939 .init = vxlan_init_net,
2940 .exit = vxlan_exit_net,
2910 .id = &vxlan_net_id, 2941 .id = &vxlan_net_id,
2911 .size = sizeof(struct vxlan_net), 2942 .size = sizeof(struct vxlan_net),
2912}; 2943};