aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2008-08-19 18:01:32 -0400
committerRoland Dreier <rolandd@cisco.com>2008-08-19 18:01:32 -0400
commita77a57a1a22afc31891d95879fe3cf2ab03838b0 (patch)
tree14f0b4a4e50f4e112d3189c75357e34829d78704
parentffaa5b984a9322bbd5d9a7f0814ca2ce70feebe5 (diff)
IPoIB: Fix deadlock on RTNL in ipoib_stop()
Commit c8c2afe3 ("IPoIB: Use rtnl lock/unlock when changing device flags") added a call to rtnl_lock() in ipoib_mcast_join_task(), which is run from the ipoib_workqueue. However, ipoib_stop() (which is run inside rtnl_lock()) flushes this workqueue, which leads to a deadlock if the join task is pending. Fix this by simply not flushing the workqueue from ipoib_stop(). It turns out that we really don't care about workqueue tasks running during or after ipoib_stop(), as long as we make sure to flush the workqueue before unregistering a netdev. This fixes <https://bugs.openfabrics.org/show_bug.cgi?id=1114>. Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c19
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c10
2 files changed, 18 insertions, 11 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f51201b17bfd..7e9e218738fa 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -156,14 +156,8 @@ static int ipoib_stop(struct net_device *dev)
156 156
157 netif_stop_queue(dev); 157 netif_stop_queue(dev);
158 158
159 /* 159 ipoib_ib_dev_down(dev, 0);
160 * Now flush workqueue to make sure a scheduled task doesn't 160 ipoib_ib_dev_stop(dev, 0);
161 * bring our internal state back up.
162 */
163 flush_workqueue(ipoib_workqueue);
164
165 ipoib_ib_dev_down(dev, 1);
166 ipoib_ib_dev_stop(dev, 1);
167 161
168 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 162 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
169 struct ipoib_dev_priv *cpriv; 163 struct ipoib_dev_priv *cpriv;
@@ -1314,7 +1308,7 @@ sysfs_failed:
1314 1308
1315register_failed: 1309register_failed:
1316 ib_unregister_event_handler(&priv->event_handler); 1310 ib_unregister_event_handler(&priv->event_handler);
1317 flush_scheduled_work(); 1311 flush_workqueue(ipoib_workqueue);
1318 1312
1319event_failed: 1313event_failed:
1320 ipoib_dev_cleanup(priv->dev); 1314 ipoib_dev_cleanup(priv->dev);
@@ -1373,7 +1367,12 @@ static void ipoib_remove_one(struct ib_device *device)
1373 1367
1374 list_for_each_entry_safe(priv, tmp, dev_list, list) { 1368 list_for_each_entry_safe(priv, tmp, dev_list, list) {
1375 ib_unregister_event_handler(&priv->event_handler); 1369 ib_unregister_event_handler(&priv->event_handler);
1376 flush_scheduled_work(); 1370
1371 rtnl_lock();
1372 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
1373 rtnl_unlock();
1374
1375 flush_workqueue(ipoib_workqueue);
1377 1376
1378 unregister_netdev(priv->dev); 1377 unregister_netdev(priv->dev);
1379 ipoib_dev_cleanup(priv->dev); 1378 ipoib_dev_cleanup(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 8950e9546f4e..ac33c8f3ea85 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -392,8 +392,16 @@ static int ipoib_mcast_join_complete(int status,
392 &priv->mcast_task, 0); 392 &priv->mcast_task, 0);
393 mutex_unlock(&mcast_mutex); 393 mutex_unlock(&mcast_mutex);
394 394
395 if (mcast == priv->broadcast) 395 if (mcast == priv->broadcast) {
396 /*
397 * Take RTNL lock here to avoid racing with
398 * ipoib_stop() and turning the carrier back
399 * on while a device is being removed.
400 */
401 rtnl_lock();
396 netif_carrier_on(dev); 402 netif_carrier_on(dev);
403 rtnl_unlock();
404 }
397 405
398 return 0; 406 return 0;
399 } 407 }