aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/vxlan.c263
1 files changed, 181 insertions, 82 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ba81f3c39a83..80d359c6310b 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -44,6 +44,8 @@
44 44
45#define VXLAN_VERSION "0.1" 45#define VXLAN_VERSION "0.1"
46 46
47#define PORT_HASH_BITS 8
48#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
47#define VNI_HASH_BITS 10 49#define VNI_HASH_BITS 10
48#define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 50#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
49#define FDB_HASH_BITS 8 51#define FDB_HASH_BITS 8
@@ -76,13 +78,24 @@ static bool log_ecn_error = true;
76module_param(log_ecn_error, bool, 0644); 78module_param(log_ecn_error, bool, 0644);
77MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 79MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
78 80
79/* per-net private data for this module */
80static unsigned int vxlan_net_id; 81static unsigned int vxlan_net_id;
81struct vxlan_net { 82
82 struct socket *sock; /* UDP encap socket */ 83/* per UDP socket information */
84struct vxlan_sock {
85 struct hlist_node hlist;
86 struct rcu_head rcu;
87 struct work_struct del_work;
88 unsigned int refcnt;
89 struct socket *sock;
83 struct hlist_head vni_list[VNI_HASH_SIZE]; 90 struct hlist_head vni_list[VNI_HASH_SIZE];
84}; 91};
85 92
93/* per-network namespace private data for this module */
94struct vxlan_net {
95 struct list_head vxlan_list;
96 struct hlist_head sock_list[PORT_HASH_SIZE];
97};
98
86struct vxlan_rdst { 99struct vxlan_rdst {
87 struct rcu_head rcu; 100 struct rcu_head rcu;
88 __be32 remote_ip; 101 __be32 remote_ip;
@@ -106,7 +119,9 @@ struct vxlan_fdb {
106 119
107/* Pseudo network device */ 120/* Pseudo network device */
108struct vxlan_dev { 121struct vxlan_dev {
109 struct hlist_node hlist; 122 struct hlist_node hlist; /* vni hash table */
123 struct list_head next; /* vxlan's per namespace list */
124 struct vxlan_sock *vn_sock; /* listening socket */
110 struct net_device *dev; 125 struct net_device *dev;
111 struct vxlan_rdst default_dst; /* default destination */ 126 struct vxlan_rdst default_dst; /* default destination */
112 __be32 saddr; /* source address */ 127 __be32 saddr; /* source address */
@@ -135,19 +150,43 @@ struct vxlan_dev {
135/* salt for hash table */ 150/* salt for hash table */
136static u32 vxlan_salt __read_mostly; 151static u32 vxlan_salt __read_mostly;
137 152
138static inline struct hlist_head *vni_head(struct net *net, u32 id) 153/* Virtual Network hash table head */
154static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
155{
156 return &vs->vni_list[hash_32(id, VNI_HASH_BITS)];
157}
158
159/* Socket hash table head */
160static inline struct hlist_head *vs_head(struct net *net, __be16 port)
139{ 161{
140 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 162 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
141 163
142 return &vn->vni_list[hash_32(id, VNI_HASH_BITS)]; 164 return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
165}
166
167/* Find VXLAN socket based on network namespace and UDP port */
168static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
169{
170 struct vxlan_sock *vs;
171
172 hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
173 if (inet_sk(vs->sock->sk)->inet_sport == port)
174 return vs;
175 }
176 return NULL;
143} 177}
144 178
145/* Look up VNI in a per net namespace table */ 179/* Look up VNI in a per net namespace table */
146static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) 180static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
147{ 181{
182 struct vxlan_sock *vs;
148 struct vxlan_dev *vxlan; 183 struct vxlan_dev *vxlan;
149 184
150 hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) { 185 vs = vxlan_find_port(net, port);
186 if (!vs)
187 return NULL;
188
189 hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) {
151 if (vxlan->default_dst.remote_vni == id) 190 if (vxlan->default_dst.remote_vni == id)
152 return vxlan; 191 return vxlan;
153 } 192 }
@@ -592,20 +631,18 @@ static void vxlan_snoop(struct net_device *dev,
592static bool vxlan_group_used(struct vxlan_net *vn, 631static bool vxlan_group_used(struct vxlan_net *vn,
593 const struct vxlan_dev *this) 632 const struct vxlan_dev *this)
594{ 633{
595 const struct vxlan_dev *vxlan; 634 struct vxlan_dev *vxlan;
596 unsigned h;
597 635
598 for (h = 0; h < VNI_HASH_SIZE; ++h) 636 list_for_each_entry(vxlan, &vn->vxlan_list, next) {
599 hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) { 637 if (vxlan == this)
600 if (vxlan == this) 638 continue;
601 continue;
602 639
603 if (!netif_running(vxlan->dev)) 640 if (!netif_running(vxlan->dev))
604 continue; 641 continue;
605 642
606 if (vxlan->default_dst.remote_ip == this->default_dst.remote_ip) 643 if (vxlan->default_dst.remote_ip == this->default_dst.remote_ip)
607 return true; 644 return true;
608 } 645 }
609 646
610 return false; 647 return false;
611} 648}
@@ -615,7 +652,7 @@ static int vxlan_join_group(struct net_device *dev)
615{ 652{
616 struct vxlan_dev *vxlan = netdev_priv(dev); 653 struct vxlan_dev *vxlan = netdev_priv(dev);
617 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 654 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
618 struct sock *sk = vn->sock->sk; 655 struct sock *sk = vxlan->vn_sock->sock->sk;
619 struct ip_mreqn mreq = { 656 struct ip_mreqn mreq = {
620 .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, 657 .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip,
621 .imr_ifindex = vxlan->default_dst.remote_ifindex, 658 .imr_ifindex = vxlan->default_dst.remote_ifindex,
@@ -643,7 +680,7 @@ static int vxlan_leave_group(struct net_device *dev)
643 struct vxlan_dev *vxlan = netdev_priv(dev); 680 struct vxlan_dev *vxlan = netdev_priv(dev);
644 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 681 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
645 int err = 0; 682 int err = 0;
646 struct sock *sk = vn->sock->sk; 683 struct sock *sk = vxlan->vn_sock->sock->sk;
647 struct ip_mreqn mreq = { 684 struct ip_mreqn mreq = {
648 .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, 685 .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip,
649 .imr_ifindex = vxlan->default_dst.remote_ifindex, 686 .imr_ifindex = vxlan->default_dst.remote_ifindex,
@@ -670,6 +707,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
670 struct vxlanhdr *vxh; 707 struct vxlanhdr *vxh;
671 struct vxlan_dev *vxlan; 708 struct vxlan_dev *vxlan;
672 struct pcpu_tstats *stats; 709 struct pcpu_tstats *stats;
710 __be16 port;
673 __u32 vni; 711 __u32 vni;
674 int err; 712 int err;
675 713
@@ -693,9 +731,11 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
693 731
694 /* Is this VNI defined? */ 732 /* Is this VNI defined? */
695 vni = ntohl(vxh->vx_vni) >> 8; 733 vni = ntohl(vxh->vx_vni) >> 8;
696 vxlan = vxlan_find_vni(sock_net(sk), vni); 734 port = inet_sk(sk)->inet_sport;
735 vxlan = vxlan_find_vni(sock_net(sk), vni, port);
697 if (!vxlan) { 736 if (!vxlan) {
698 netdev_dbg(skb->dev, "unknown vni %d\n", vni); 737 netdev_dbg(skb->dev, "unknown vni %d port %u\n",
738 vni, ntohs(port));
699 goto drop; 739 goto drop;
700 } 740 }
701 741
@@ -875,7 +915,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
875 return false; 915 return false;
876} 916}
877 917
878static void vxlan_sock_free(struct sk_buff *skb) 918static void vxlan_sock_put(struct sk_buff *skb)
879{ 919{
880 sock_put(skb->sk); 920 sock_put(skb->sk);
881} 921}
@@ -883,13 +923,13 @@ static void vxlan_sock_free(struct sk_buff *skb)
883/* On transmit, associate with the tunnel socket */ 923/* On transmit, associate with the tunnel socket */
884static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb) 924static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
885{ 925{
886 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 926 struct vxlan_dev *vxlan = netdev_priv(dev);
887 struct sock *sk = vn->sock->sk; 927 struct sock *sk = vxlan->vn_sock->sock->sk;
888 928
889 skb_orphan(skb); 929 skb_orphan(skb);
890 sock_hold(sk); 930 sock_hold(sk);
891 skb->sk = sk; 931 skb->sk = sk;
892 skb->destructor = vxlan_sock_free; 932 skb->destructor = vxlan_sock_put;
893} 933}
894 934
895/* Compute source port for outgoing packet 935/* Compute source port for outgoing packet
@@ -1031,7 +1071,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1031 struct vxlan_dev *dst_vxlan; 1071 struct vxlan_dev *dst_vxlan;
1032 1072
1033 ip_rt_put(rt); 1073 ip_rt_put(rt);
1034 dst_vxlan = vxlan_find_vni(dev_net(dev), vni); 1074 dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port);
1035 if (!dst_vxlan) 1075 if (!dst_vxlan)
1036 goto tx_error; 1076 goto tx_error;
1037 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 1077 vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1306,6 +1346,7 @@ static void vxlan_setup(struct net_device *dev)
1306 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1346 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1307 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1347 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1308 1348
1349 INIT_LIST_HEAD(&vxlan->next);
1309 spin_lock_init(&vxlan->hash_lock); 1350 spin_lock_init(&vxlan->hash_lock);
1310 1351
1311 init_timer_deferrable(&vxlan->age_timer); 1352 init_timer_deferrable(&vxlan->age_timer);
@@ -1390,11 +1431,78 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
1390 .get_link = ethtool_op_get_link, 1431 .get_link = ethtool_op_get_link,
1391}; 1432};
1392 1433
1434static void vxlan_del_work(struct work_struct *work)
1435{
1436 struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
1437
1438 sk_release_kernel(vs->sock->sk);
1439 kfree_rcu(vs, rcu);
1440}
1441
1442/* Create new listen socket if needed */
1443static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
1444{
1445 struct vxlan_sock *vs;
1446 struct sock *sk;
1447 struct sockaddr_in vxlan_addr = {
1448 .sin_family = AF_INET,
1449 .sin_addr.s_addr = htonl(INADDR_ANY),
1450 };
1451 int rc;
1452 unsigned h;
1453
1454 vs = kmalloc(sizeof(*vs), GFP_KERNEL);
1455 if (!vs)
1456 return ERR_PTR(-ENOMEM);
1457
1458 for (h = 0; h < VNI_HASH_SIZE; ++h)
1459 INIT_HLIST_HEAD(&vs->vni_list[h]);
1460
1461 INIT_WORK(&vs->del_work, vxlan_del_work);
1462
1463 /* Create UDP socket for encapsulation receive. */
1464 rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vs->sock);
1465 if (rc < 0) {
1466 pr_debug("UDP socket create failed\n");
1467 kfree(vs);
1468 return ERR_PTR(rc);
1469 }
1470
1471 /* Put in proper namespace */
1472 sk = vs->sock->sk;
1473 sk_change_net(sk, net);
1474
1475 vxlan_addr.sin_port = port;
1476
1477 rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr,
1478 sizeof(vxlan_addr));
1479 if (rc < 0) {
1480 pr_debug("bind for UDP socket %pI4:%u (%d)\n",
1481 &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
1482 sk_release_kernel(sk);
1483 kfree(vs);
1484 return ERR_PTR(rc);
1485 }
1486
1487 /* Disable multicast loopback */
1488 inet_sk(sk)->mc_loop = 0;
1489
1490 /* Mark socket as an encapsulation socket. */
1491 udp_sk(sk)->encap_type = 1;
1492 udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
1493 udp_encap_enable();
1494
1495 vs->refcnt = 1;
1496 return vs;
1497}
1498
1393static int vxlan_newlink(struct net *net, struct net_device *dev, 1499static int vxlan_newlink(struct net *net, struct net_device *dev,
1394 struct nlattr *tb[], struct nlattr *data[]) 1500 struct nlattr *tb[], struct nlattr *data[])
1395{ 1501{
1502 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1396 struct vxlan_dev *vxlan = netdev_priv(dev); 1503 struct vxlan_dev *vxlan = netdev_priv(dev);
1397 struct vxlan_rdst *dst = &vxlan->default_dst; 1504 struct vxlan_rdst *dst = &vxlan->default_dst;
1505 struct vxlan_sock *vs;
1398 __u32 vni; 1506 __u32 vni;
1399 int err; 1507 int err;
1400 1508
@@ -1402,10 +1510,6 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
1402 return -EINVAL; 1510 return -EINVAL;
1403 1511
1404 vni = nla_get_u32(data[IFLA_VXLAN_ID]); 1512 vni = nla_get_u32(data[IFLA_VXLAN_ID]);
1405 if (vxlan_find_vni(net, vni)) {
1406 pr_info("duplicate VNI %u\n", vni);
1407 return -EEXIST;
1408 }
1409 dst->remote_vni = vni; 1513 dst->remote_vni = vni;
1410 1514
1411 if (data[IFLA_VXLAN_GROUP]) 1515 if (data[IFLA_VXLAN_GROUP])
@@ -1471,22 +1575,58 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
1471 if (data[IFLA_VXLAN_PORT]) 1575 if (data[IFLA_VXLAN_PORT])
1472 vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); 1576 vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
1473 1577
1578 if (vxlan_find_vni(net, vni, vxlan->dst_port)) {
1579 pr_info("duplicate VNI %u\n", vni);
1580 return -EEXIST;
1581 }
1582
1583 vs = vxlan_find_port(net, vxlan->dst_port);
1584 if (vs)
1585 ++vs->refcnt;
1586 else {
1587 /* Drop lock because socket create acquires RTNL lock */
1588 rtnl_unlock();
1589 vs = vxlan_socket_create(net, vxlan->dst_port);
1590 rtnl_lock();
1591 if (IS_ERR(vs))
1592 return PTR_ERR(vs);
1593
1594 hlist_add_head_rcu(&vs->hlist, vs_head(net, vxlan->dst_port));
1595 }
1596 vxlan->vn_sock = vs;
1597
1474 SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops); 1598 SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
1475 1599
1476 err = register_netdevice(dev); 1600 err = register_netdevice(dev);
1477 if (!err) 1601 if (err) {
1478 hlist_add_head_rcu(&vxlan->hlist, vni_head(net, dst->remote_vni)); 1602 if (--vs->refcnt == 0) {
1603 rtnl_unlock();
1604 sk_release_kernel(vs->sock->sk);
1605 kfree(vs);
1606 rtnl_lock();
1607 }
1608 return err;
1609 }
1479 1610
1480 return err; 1611 list_add(&vxlan->next, &vn->vxlan_list);
1612 hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
1613
1614 return 0;
1481} 1615}
1482 1616
1483static void vxlan_dellink(struct net_device *dev, struct list_head *head) 1617static void vxlan_dellink(struct net_device *dev, struct list_head *head)
1484{ 1618{
1485 struct vxlan_dev *vxlan = netdev_priv(dev); 1619 struct vxlan_dev *vxlan = netdev_priv(dev);
1620 struct vxlan_sock *vs = vxlan->vn_sock;
1486 1621
1487 hlist_del_rcu(&vxlan->hlist); 1622 hlist_del_rcu(&vxlan->hlist);
1488 1623 list_del(&vxlan->next);
1489 unregister_netdevice_queue(dev, head); 1624 unregister_netdevice_queue(dev, head);
1625
1626 if (--vs->refcnt == 0) {
1627 hlist_del_rcu(&vs->hlist);
1628 schedule_work(&vs->del_work);
1629 }
1490} 1630}
1491 1631
1492static size_t vxlan_get_size(const struct net_device *dev) 1632static size_t vxlan_get_size(const struct net_device *dev)
@@ -1572,46 +1712,12 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
1572static __net_init int vxlan_init_net(struct net *net) 1712static __net_init int vxlan_init_net(struct net *net)
1573{ 1713{
1574 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 1714 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1575 struct sock *sk;
1576 struct sockaddr_in vxlan_addr = {
1577 .sin_family = AF_INET,
1578 .sin_addr.s_addr = htonl(INADDR_ANY),
1579 };
1580 int rc;
1581 unsigned h; 1715 unsigned h;
1582 1716
1583 /* Create UDP socket for encapsulation receive. */ 1717 INIT_LIST_HEAD(&vn->vxlan_list);
1584 rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock);
1585 if (rc < 0) {
1586 pr_debug("UDP socket create failed\n");
1587 return rc;
1588 }
1589 /* Put in proper namespace */
1590 sk = vn->sock->sk;
1591 sk_change_net(sk, net);
1592
1593 vxlan_addr.sin_port = htons(vxlan_port);
1594
1595 rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr,
1596 sizeof(vxlan_addr));
1597 if (rc < 0) {
1598 pr_debug("bind for UDP socket %pI4:%u (%d)\n",
1599 &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
1600 sk_release_kernel(sk);
1601 vn->sock = NULL;
1602 return rc;
1603 }
1604
1605 /* Disable multicast loopback */
1606 inet_sk(sk)->mc_loop = 0;
1607 1718
1608 /* Mark socket as an encapsulation socket. */ 1719 for (h = 0; h < PORT_HASH_SIZE; ++h)
1609 udp_sk(sk)->encap_type = 1; 1720 INIT_HLIST_HEAD(&vn->sock_list[h]);
1610 udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
1611 udp_encap_enable();
1612
1613 for (h = 0; h < VNI_HASH_SIZE; ++h)
1614 INIT_HLIST_HEAD(&vn->vni_list[h]);
1615 1721
1616 return 0; 1722 return 0;
1617} 1723}
@@ -1620,18 +1726,11 @@ static __net_exit void vxlan_exit_net(struct net *net)
1620{ 1726{
1621 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 1727 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1622 struct vxlan_dev *vxlan; 1728 struct vxlan_dev *vxlan;
1623 unsigned h;
1624 1729
1625 rtnl_lock(); 1730 rtnl_lock();
1626 for (h = 0; h < VNI_HASH_SIZE; ++h) 1731 list_for_each_entry(vxlan, &vn->vxlan_list, next)
1627 hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) 1732 dev_close(vxlan->dev);
1628 dev_close(vxlan->dev);
1629 rtnl_unlock(); 1733 rtnl_unlock();
1630
1631 if (vn->sock) {
1632 sk_release_kernel(vn->sock->sk);
1633 vn->sock = NULL;
1634 }
1635} 1734}
1636 1735
1637static struct pernet_operations vxlan_net_ops = { 1736static struct pernet_operations vxlan_net_ops = {