aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/vxlan.c
diff options
context:
space:
mode:
authorStephen Hemminger <stephen@networkplumber.org>2013-06-17 17:16:11 -0400
committerStephen Hemminger <stephen@networkplumber.org>2013-06-24 11:40:32 -0400
commit1c51a9159ddefa5119724a4c7da3fd3ef44b68d5 (patch)
treebcc3144d0fcc631d666be990c6bb95b69505b741 /drivers/net/vxlan.c
parent8385f50a03a8ad3d2c6d76b1117c959261ab7a1c (diff)
vxlan: fix race caused by dropping rtnl_unlock
It is possible for two cpu's to race creating vxlan device. For most cases this is harmless, but the ability to assign "next avaliable vxlan device" relies on rtnl lock being held across the whole operation. Therfore two instances of calling: ip li add vxlan%d vxlan ... could collide and create two devices with same name. To fix this defer creation of socket to a work queue, and handle possible races there. Introduce a lock to ensure that changes to vxlan socket hash list is SMP safe. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r--drivers/net/vxlan.c111
1 files changed, 84 insertions, 27 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 1f2aa26550e9..71da8be98801 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -94,6 +94,7 @@ struct vxlan_sock {
94struct vxlan_net { 94struct vxlan_net {
95 struct list_head vxlan_list; 95 struct list_head vxlan_list;
96 struct hlist_head sock_list[PORT_HASH_SIZE]; 96 struct hlist_head sock_list[PORT_HASH_SIZE];
97 spinlock_t sock_lock;
97}; 98};
98 99
99struct vxlan_rdst { 100struct vxlan_rdst {
@@ -131,7 +132,9 @@ struct vxlan_dev {
131 __u8 ttl; 132 __u8 ttl;
132 u32 flags; /* VXLAN_F_* below */ 133 u32 flags; /* VXLAN_F_* below */
133 134
135 struct work_struct sock_work;
134 struct work_struct igmp_work; 136 struct work_struct igmp_work;
137
135 unsigned long age_interval; 138 unsigned long age_interval;
136 struct timer_list age_timer; 139 struct timer_list age_timer;
137 spinlock_t hash_lock; 140 spinlock_t hash_lock;
@@ -151,6 +154,8 @@ struct vxlan_dev {
151static u32 vxlan_salt __read_mostly; 154static u32 vxlan_salt __read_mostly;
152static struct workqueue_struct *vxlan_wq; 155static struct workqueue_struct *vxlan_wq;
153 156
157static void vxlan_sock_work(struct work_struct *work);
158
154/* Virtual Network hash table head */ 159/* Virtual Network hash table head */
155static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) 160static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
156{ 161{
@@ -670,12 +675,15 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
670 atomic_inc(&vs->refcnt); 675 atomic_inc(&vs->refcnt);
671} 676}
672 677
673static void vxlan_sock_release(struct vxlan_sock *vs) 678static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
674{ 679{
675 if (!atomic_dec_and_test(&vs->refcnt)) 680 if (!atomic_dec_and_test(&vs->refcnt))
676 return; 681 return;
677 682
683 spin_lock(&vn->sock_lock);
678 hlist_del_rcu(&vs->hlist); 684 hlist_del_rcu(&vs->hlist);
685 spin_unlock(&vn->sock_lock);
686
679 queue_work(vxlan_wq, &vs->del_work); 687 queue_work(vxlan_wq, &vs->del_work);
680} 688}
681 689
@@ -700,7 +708,7 @@ static void vxlan_igmp_work(struct work_struct *work)
700 ip_mc_leave_group(sk, &mreq); 708 ip_mc_leave_group(sk, &mreq);
701 release_sock(sk); 709 release_sock(sk);
702 710
703 vxlan_sock_release(vs); 711 vxlan_sock_release(vn, vs);
704 dev_put(vxlan->dev); 712 dev_put(vxlan->dev);
705} 713}
706 714
@@ -1222,10 +1230,29 @@ static void vxlan_cleanup(unsigned long arg)
1222/* Setup stats when device is created */ 1230/* Setup stats when device is created */
1223static int vxlan_init(struct net_device *dev) 1231static int vxlan_init(struct net_device *dev)
1224{ 1232{
1233 struct vxlan_dev *vxlan = netdev_priv(dev);
1234 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
1235 struct vxlan_sock *vs;
1236 __u32 vni = vxlan->default_dst.remote_vni;
1237
1225 dev->tstats = alloc_percpu(struct pcpu_tstats); 1238 dev->tstats = alloc_percpu(struct pcpu_tstats);
1226 if (!dev->tstats) 1239 if (!dev->tstats)
1227 return -ENOMEM; 1240 return -ENOMEM;
1228 1241
1242 spin_lock(&vn->sock_lock);
1243 vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
1244 if (vs) {
1245 /* If we have a socket with same port already, reuse it */
1246 atomic_inc(&vs->refcnt);
1247 vxlan->vn_sock = vs;
1248 hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
1249 } else {
1250 /* otherwise make new socket outside of RTNL */
1251 dev_hold(dev);
1252 queue_work(vxlan_wq, &vxlan->sock_work);
1253 }
1254 spin_unlock(&vn->sock_lock);
1255
1229 return 0; 1256 return 0;
1230} 1257}
1231 1258
@@ -1233,9 +1260,14 @@ static int vxlan_init(struct net_device *dev)
1233static int vxlan_open(struct net_device *dev) 1260static int vxlan_open(struct net_device *dev)
1234{ 1261{
1235 struct vxlan_dev *vxlan = netdev_priv(dev); 1262 struct vxlan_dev *vxlan = netdev_priv(dev);
1263 struct vxlan_sock *vs = vxlan->vn_sock;
1264
1265 /* socket hasn't been created */
1266 if (!vs)
1267 return -ENOTCONN;
1236 1268
1237 if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { 1269 if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
1238 vxlan_sock_hold(vxlan->vn_sock); 1270 vxlan_sock_hold(vs);
1239 dev_hold(dev); 1271 dev_hold(dev);
1240 queue_work(vxlan_wq, &vxlan->igmp_work); 1272 queue_work(vxlan_wq, &vxlan->igmp_work);
1241 } 1273 }
@@ -1267,9 +1299,10 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
1267static int vxlan_stop(struct net_device *dev) 1299static int vxlan_stop(struct net_device *dev)
1268{ 1300{
1269 struct vxlan_dev *vxlan = netdev_priv(dev); 1301 struct vxlan_dev *vxlan = netdev_priv(dev);
1302 struct vxlan_sock *vs = vxlan->vn_sock;
1270 1303
1271 if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { 1304 if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
1272 vxlan_sock_hold(vxlan->vn_sock); 1305 vxlan_sock_hold(vs);
1273 dev_hold(dev); 1306 dev_hold(dev);
1274 queue_work(vxlan_wq, &vxlan->igmp_work); 1307 queue_work(vxlan_wq, &vxlan->igmp_work);
1275 } 1308 }
@@ -1342,6 +1375,7 @@ static void vxlan_setup(struct net_device *dev)
1342 INIT_LIST_HEAD(&vxlan->next); 1375 INIT_LIST_HEAD(&vxlan->next);
1343 spin_lock_init(&vxlan->hash_lock); 1376 spin_lock_init(&vxlan->hash_lock);
1344 INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work); 1377 INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work);
1378 INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
1345 1379
1346 init_timer_deferrable(&vxlan->age_timer); 1380 init_timer_deferrable(&vxlan->age_timer);
1347 vxlan->age_timer.function = vxlan_cleanup; 1381 vxlan->age_timer.function = vxlan_cleanup;
@@ -1433,7 +1467,6 @@ static void vxlan_del_work(struct work_struct *work)
1433 kfree_rcu(vs, rcu); 1467 kfree_rcu(vs, rcu);
1434} 1468}
1435 1469
1436/* Create new listen socket if needed */
1437static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) 1470static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
1438{ 1471{
1439 struct vxlan_sock *vs; 1472 struct vxlan_sock *vs;
@@ -1490,13 +1523,52 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
1490 return vs; 1523 return vs;
1491} 1524}
1492 1525
1526/* Scheduled at device creation to bind to a socket */
1527static void vxlan_sock_work(struct work_struct *work)
1528{
1529 struct vxlan_dev *vxlan
1530 = container_of(work, struct vxlan_dev, sock_work);
1531 struct net_device *dev = vxlan->dev;
1532 struct net *net = dev_net(dev);
1533 __u32 vni = vxlan->default_dst.remote_vni;
1534 __be16 port = vxlan->dst_port;
1535 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1536 struct vxlan_sock *nvs, *ovs;
1537
1538 nvs = vxlan_socket_create(net, port);
1539 if (IS_ERR(nvs)) {
1540 netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
1541 PTR_ERR(nvs));
1542 goto out;
1543 }
1544
1545 spin_lock(&vn->sock_lock);
1546 /* Look again to see if can reuse socket */
1547 ovs = vxlan_find_port(net, port);
1548 if (ovs) {
1549 atomic_inc(&ovs->refcnt);
1550 vxlan->vn_sock = ovs;
1551 hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
1552 spin_unlock(&vn->sock_lock);
1553
1554 sk_release_kernel(nvs->sock->sk);
1555 kfree(nvs);
1556 } else {
1557 vxlan->vn_sock = nvs;
1558 hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
1559 hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
1560 spin_unlock(&vn->sock_lock);
1561 }
1562out:
1563 dev_put(dev);
1564}
1565
1493static int vxlan_newlink(struct net *net, struct net_device *dev, 1566static int vxlan_newlink(struct net *net, struct net_device *dev,
1494 struct nlattr *tb[], struct nlattr *data[]) 1567 struct nlattr *tb[], struct nlattr *data[])
1495{ 1568{
1496 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 1569 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1497 struct vxlan_dev *vxlan = netdev_priv(dev); 1570 struct vxlan_dev *vxlan = netdev_priv(dev);
1498 struct vxlan_rdst *dst = &vxlan->default_dst; 1571 struct vxlan_rdst *dst = &vxlan->default_dst;
1499 struct vxlan_sock *vs;
1500 __u32 vni; 1572 __u32 vni;
1501 int err; 1573 int err;
1502 1574
@@ -1574,31 +1646,13 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
1574 return -EEXIST; 1646 return -EEXIST;
1575 } 1647 }
1576 1648
1577 vs = vxlan_find_port(net, vxlan->dst_port);
1578 if (vs)
1579 atomic_inc(&vs->refcnt);
1580 else {
1581 /* Drop lock because socket create acquires RTNL lock */
1582 rtnl_unlock();
1583 vs = vxlan_socket_create(net, vxlan->dst_port);
1584 rtnl_lock();
1585 if (IS_ERR(vs))
1586 return PTR_ERR(vs);
1587
1588 hlist_add_head_rcu(&vs->hlist, vs_head(net, vxlan->dst_port));
1589 }
1590 vxlan->vn_sock = vs;
1591
1592 SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops); 1649 SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
1593 1650
1594 err = register_netdevice(dev); 1651 err = register_netdevice(dev);
1595 if (err) { 1652 if (err)
1596 vxlan_sock_release(vs);
1597 return err; 1653 return err;
1598 }
1599 1654
1600 list_add(&vxlan->next, &vn->vxlan_list); 1655 list_add(&vxlan->next, &vn->vxlan_list);
1601 hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
1602 1656
1603 return 0; 1657 return 0;
1604} 1658}
@@ -1606,12 +1660,14 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
1606static void vxlan_dellink(struct net_device *dev, struct list_head *head) 1660static void vxlan_dellink(struct net_device *dev, struct list_head *head)
1607{ 1661{
1608 struct vxlan_dev *vxlan = netdev_priv(dev); 1662 struct vxlan_dev *vxlan = netdev_priv(dev);
1663 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
1609 struct vxlan_sock *vs = vxlan->vn_sock; 1664 struct vxlan_sock *vs = vxlan->vn_sock;
1610 1665
1611 hlist_del_rcu(&vxlan->hlist); 1666 hlist_del_rcu(&vxlan->hlist);
1612 list_del(&vxlan->next); 1667 list_del(&vxlan->next);
1613 unregister_netdevice_queue(dev, head); 1668 unregister_netdevice_queue(dev, head);
1614 vxlan_sock_release(vs); 1669 if (vs)
1670 vxlan_sock_release(vn, vs);
1615} 1671}
1616 1672
1617static size_t vxlan_get_size(const struct net_device *dev) 1673static size_t vxlan_get_size(const struct net_device *dev)
@@ -1700,6 +1756,7 @@ static __net_init int vxlan_init_net(struct net *net)
1700 unsigned int h; 1756 unsigned int h;
1701 1757
1702 INIT_LIST_HEAD(&vn->vxlan_list); 1758 INIT_LIST_HEAD(&vn->vxlan_list);
1759 spin_lock_init(&vn->sock_lock);
1703 1760
1704 for (h = 0; h < PORT_HASH_SIZE; ++h) 1761 for (h = 0; h < PORT_HASH_SIZE; ++h)
1705 INIT_HLIST_HEAD(&vn->sock_list[h]); 1762 INIT_HLIST_HEAD(&vn->sock_list[h]);