aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core/device.c
diff options
context:
space:
mode:
authorParav Pandit <parav@mellanox.com>2019-04-15 06:22:50 -0400
committerJason Gunthorpe <jgg@mellanox.com>2019-04-22 13:44:58 -0400
commitdecbc7a6b0073f55b200d80a3ecf5a5e205edd06 (patch)
tree1f39165c9656f4829b2b93e1f82157fcfefed175 /drivers/infiniband/core/device.c
parent3042492bd1f9a08e9cf4c1a4621e359fb0f9a126 (diff)
RDMA/core: Introduce a helper function to change net namespace of rdma device
Introduce a helper function that changes rdma device's net namespace which performs mini disable/enable sequence to have device visible only in assigned net namespace. Device unregistration, device rename and device change net namespace may be invoked concurrently. (a) device unregistration needs to wait if a device change (rename or net namespace change) operation is in progress. (b) device net namespace change should not proceed if the unregistration has started. (c) while one cpu is changing device net namespace, other cpu should not be able to rename or change net namespace. To address above concurrency, (a) Use unreg_mutex to synchronize between ib_unregister_device() and net namespace change operation (b) In cases where unregister_device() has started unregistration before change_netns got chance to acquire unreg_mutex, validate the refcount - if it dropped to zero, abort the net namespace change operation. Finally use the helper function to change net namespace of ib device to move the device back to init_net when such net is deleted. Signed-off-by: Parav Pandit <parav@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/core/device.c')
-rw-r--r--drivers/infiniband/core/device.c77
1 files changed, 77 insertions, 0 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 25f49b646007..7fe4f8b880ee 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -201,6 +201,9 @@ static struct notifier_block ibdev_lsm_nb = {
201 .notifier_call = ib_security_change, 201 .notifier_call = ib_security_change,
202}; 202};
203 203
204static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
205 struct net *net);
206
204/* Pointer to the RCU head at the start of the ib_port_data array */ 207/* Pointer to the RCU head at the start of the ib_port_data array */
205struct ib_port_data_rcu { 208struct ib_port_data_rcu {
206 struct rcu_head rcu_head; 209 struct rcu_head rcu_head;
@@ -861,6 +864,8 @@ static int add_compat_devs(struct ib_device *device)
861 unsigned long index; 864 unsigned long index;
862 int ret = 0; 865 int ret = 0;
863 866
867 lockdep_assert_held(&devices_rwsem);
868
864 down_read(&rdma_nets_rwsem); 869 down_read(&rdma_nets_rwsem);
865 xa_for_each (&rdma_nets, index, rnet) { 870 xa_for_each (&rdma_nets, index, rnet) {
866 ret = add_one_compat_dev(device, rnet); 871 ret = add_one_compat_dev(device, rnet);
@@ -978,6 +983,11 @@ static void rdma_dev_exit_net(struct net *net)
978 983
979 remove_one_compat_dev(dev, rnet->id); 984 remove_one_compat_dev(dev, rnet->id);
980 985
986 /*
987 * If the real device is in the NS then move it back to init.
988 */
989 rdma_dev_change_netns(dev, net, &init_net);
990
981 put_device(&dev->dev); 991 put_device(&dev->dev);
982 down_read(&devices_rwsem); 992 down_read(&devices_rwsem);
983 } 993 }
@@ -1428,6 +1438,73 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
1428} 1438}
1429EXPORT_SYMBOL(ib_unregister_device_queued); 1439EXPORT_SYMBOL(ib_unregister_device_queued);
1430 1440
1441/*
1442 * The caller must pass in a device that has the kref held and the refcount
1443 * released. If the device is in cur_net and still registered then it is moved
1444 * into net.
1445 */
1446static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
1447 struct net *net)
1448{
1449 int ret2 = -EINVAL;
1450 int ret;
1451
1452 mutex_lock(&device->unregistration_lock);
1453
1454 /*
1455 * If a device not under ib_device_get() or the unregistration_lock
1456 * the namespace can be changed, or it can be unregistered. Check
1457 * again under the lock.
1458 */
1459 if (refcount_read(&device->refcount) == 0 ||
1460 !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
1461 ret = -ENODEV;
1462 goto out;
1463 }
1464
1465 kobject_uevent(&device->dev.kobj, KOBJ_REMOVE);
1466 disable_device(device);
1467
1468 /*
1469 * At this point no one can be using the device, so it is safe to
1470 * change the namespace.
1471 */
1472 write_pnet(&device->coredev.rdma_net, net);
1473
1474 /*
1475 * Currently rdma devices are system wide unique. So the device name
1476 * is guaranteed free in the new namespace. Publish the new namespace
1477 * at the sysfs level.
1478 */
1479 down_read(&devices_rwsem);
1480 ret = device_rename(&device->dev, dev_name(&device->dev));
1481 up_read(&devices_rwsem);
1482 if (ret) {
1483 dev_warn(&device->dev,
1484 "%s: Couldn't rename device after namespace change\n",
1485 __func__);
1486 /* Try and put things back and re-enable the device */
1487 write_pnet(&device->coredev.rdma_net, cur_net);
1488 }
1489
1490 ret2 = enable_device_and_get(device);
1491 if (ret2)
1492 /*
1493 * This shouldn't really happen, but if it does, let the user
1494 * retry at later point. So don't disable the device.
1495 */
1496 dev_warn(&device->dev,
1497 "%s: Couldn't re-enable device after namespace change\n",
1498 __func__);
1499 kobject_uevent(&device->dev.kobj, KOBJ_ADD);
1500 ib_device_put(device);
1501out:
1502 mutex_unlock(&device->unregistration_lock);
1503 if (ret)
1504 return ret;
1505 return ret2;
1506}
1507
1431static struct pernet_operations rdma_dev_net_ops = { 1508static struct pernet_operations rdma_dev_net_ops = {
1432 .init = rdma_dev_init_net, 1509 .init = rdma_dev_init_net,
1433 .exit = rdma_dev_exit_net, 1510 .exit = rdma_dev_exit_net,