aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2014-12-10 11:47:03 -0500
committerRoland Dreier <roland@purestorage.com>2014-12-15 21:11:15 -0500
commit5141861cd5e17eac9676ff49c5abfafbea2b0e98 (patch)
treef057f31b5b5b656404c8657c9aa064c272bdc083
parent3bcce487fda8161597c20ed303d510e41ad7770e (diff)
IPoIB: Use dedicated workqueues per interface
During my recent work on the rtnl lock deadlock in the IPoIB driver, I saw that even once I fixed the apparent races for a single device, as soon as that device had any children, new races popped up. It turns out that this is because no matter how well we protect against races on a single device, the fact that all devices use the same workqueue, and flush_workqueue() flushes *everything* from that workqueue, we can have one device in the middle of a down and holding the rtnl lock and another totally unrelated device needing to run mcast_restart_task, which wants the rtnl lock and will loop trying to take it unless is sees its own FLAG_ADMIN_UP flag go away. Because the unrelated interface will never see its own ADMIN_UP flag drop, the interface going down will deadlock trying to flush the queue. There are several possible solutions to this problem: Make carrier_on_task and mcast_restart_task try to take the rtnl for some set period of time and if they fail, then bail. This runs the real risk of dropping work on the floor, which can end up being its own separate kind of deadlock. Set some global flag in the driver that says some device is in the middle of going down, letting all tasks know to bail. Again, this can drop work on the floor. I suppose if our own ADMIN_UP flag doesn't go away, then maybe after a few tries on the rtnl lock we can queue our own task back up as a delayed work and return and avoid dropping work on the floor that way. But I'm not 100% convinced that we won't cause other problems. Or the method this patch attempts to use, which is when we bring an interface up, create a workqueue specifically for that interface, so that when we take it back down, we are flushing only those tasks associated with our interface. In addition, keep the global workqueue, but now limit it to only flush tasks. In this way, the flush tasks can always flush the device specific work queues without having deadlock issues. Signed-off-by: Doug Ledford <dledford@redhat.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c19
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c26
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c22
6 files changed, 58 insertions, 34 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index f4c1b20b23b2..45fd10a72ec1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -323,6 +323,7 @@ struct ipoib_dev_priv {
323 struct list_head multicast_list; 323 struct list_head multicast_list;
324 struct rb_root multicast_tree; 324 struct rb_root multicast_tree;
325 325
326 struct workqueue_struct *wq;
326 struct delayed_work mcast_task; 327 struct delayed_work mcast_task;
327 struct work_struct carrier_on_task; 328 struct work_struct carrier_on_task;
328 struct work_struct flush_light; 329 struct work_struct flush_light;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 933efcea0d03..56959adb6c7d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
474 } 474 }
475 475
476 spin_lock_irq(&priv->lock); 476 spin_lock_irq(&priv->lock);
477 queue_delayed_work(ipoib_workqueue, 477 queue_delayed_work(priv->wq,
478 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 478 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
479 /* Add this entry to passive ids list head, but do not re-add it 479 /* Add this entry to passive ids list head, but do not re-add it
480 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ 480 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
576 spin_lock_irqsave(&priv->lock, flags); 576 spin_lock_irqsave(&priv->lock, flags);
577 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 577 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
578 ipoib_cm_start_rx_drain(priv); 578 ipoib_cm_start_rx_drain(priv);
579 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); 579 queue_work(priv->wq, &priv->cm.rx_reap_task);
580 spin_unlock_irqrestore(&priv->lock, flags); 580 spin_unlock_irqrestore(&priv->lock, flags);
581 } else 581 } else
582 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 582 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
603 spin_lock_irqsave(&priv->lock, flags); 603 spin_lock_irqsave(&priv->lock, flags);
604 list_move(&p->list, &priv->cm.rx_reap_list); 604 list_move(&p->list, &priv->cm.rx_reap_list);
605 spin_unlock_irqrestore(&priv->lock, flags); 605 spin_unlock_irqrestore(&priv->lock, flags);
606 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); 606 queue_work(priv->wq, &priv->cm.rx_reap_task);
607 } 607 }
608 return; 608 return;
609 } 609 }
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
827 827
828 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 828 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
829 list_move(&tx->list, &priv->cm.reap_list); 829 list_move(&tx->list, &priv->cm.reap_list);
830 queue_work(ipoib_workqueue, &priv->cm.reap_task); 830 queue_work(priv->wq, &priv->cm.reap_task);
831 } 831 }
832 832
833 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); 833 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
1255 1255
1256 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1256 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1257 list_move(&tx->list, &priv->cm.reap_list); 1257 list_move(&tx->list, &priv->cm.reap_list);
1258 queue_work(ipoib_workqueue, &priv->cm.reap_task); 1258 queue_work(priv->wq, &priv->cm.reap_task);
1259 } 1259 }
1260 1260
1261 spin_unlock_irqrestore(&priv->lock, flags); 1261 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
1284 tx->dev = dev; 1284 tx->dev = dev;
1285 list_add(&tx->list, &priv->cm.start_list); 1285 list_add(&tx->list, &priv->cm.start_list);
1286 set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); 1286 set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
1287 queue_work(ipoib_workqueue, &priv->cm.start_task); 1287 queue_work(priv->wq, &priv->cm.start_task);
1288 return tx; 1288 return tx;
1289} 1289}
1290 1290
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
1295 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1295 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1296 spin_lock_irqsave(&priv->lock, flags); 1296 spin_lock_irqsave(&priv->lock, flags);
1297 list_move(&tx->list, &priv->cm.reap_list); 1297 list_move(&tx->list, &priv->cm.reap_list);
1298 queue_work(ipoib_workqueue, &priv->cm.reap_task); 1298 queue_work(priv->wq, &priv->cm.reap_task);
1299 ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1299 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
1300 tx->neigh->daddr + 4); 1300 tx->neigh->daddr + 4);
1301 tx->neigh = NULL; 1301 tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1417 1417
1418 skb_queue_tail(&priv->cm.skb_queue, skb); 1418 skb_queue_tail(&priv->cm.skb_queue, skb);
1419 if (e) 1419 if (e)
1420 queue_work(ipoib_workqueue, &priv->cm.skb_task); 1420 queue_work(priv->wq, &priv->cm.skb_task);
1421} 1421}
1422 1422
1423static void ipoib_cm_rx_reap(struct work_struct *work) 1423static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
1450 } 1450 }
1451 1451
1452 if (!list_empty(&priv->cm.passive_ids)) 1452 if (!list_empty(&priv->cm.passive_ids))
1453 queue_delayed_work(ipoib_workqueue, 1453 queue_delayed_work(priv->wq,
1454 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 1454 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
1455 spin_unlock_irq(&priv->lock); 1455 spin_unlock_irq(&priv->lock);
1456} 1456}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 72626c348174..bfd17d41b5f2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -655,7 +655,7 @@ void ipoib_reap_ah(struct work_struct *work)
655 __ipoib_reap_ah(dev); 655 __ipoib_reap_ah(dev);
656 656
657 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) 657 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
658 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, 658 queue_delayed_work(priv->wq, &priv->ah_reap_task,
659 round_jiffies_relative(HZ)); 659 round_jiffies_relative(HZ));
660} 660}
661 661
@@ -696,7 +696,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
696 } 696 }
697 697
698 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 698 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
699 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, 699 queue_delayed_work(priv->wq, &priv->ah_reap_task,
700 round_jiffies_relative(HZ)); 700 round_jiffies_relative(HZ));
701 701
702 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 702 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -881,7 +881,7 @@ timeout:
881 set_bit(IPOIB_STOP_REAPER, &priv->flags); 881 set_bit(IPOIB_STOP_REAPER, &priv->flags);
882 cancel_delayed_work(&priv->ah_reap_task); 882 cancel_delayed_work(&priv->ah_reap_task);
883 if (flush) 883 if (flush)
884 flush_workqueue(ipoib_workqueue); 884 flush_workqueue(priv->wq);
885 885
886 begin = jiffies; 886 begin = jiffies;
887 887
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2cf81ef51412..42e5c278f489 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
839 return; 839 return;
840 } 840 }
841 841
842 queue_work(ipoib_workqueue, &priv->restart_task); 842 queue_work(priv->wq, &priv->restart_task);
843} 843}
844 844
845static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) 845static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +954,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
954 __ipoib_reap_neigh(priv); 954 __ipoib_reap_neigh(priv);
955 955
956 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 956 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
957 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, 957 queue_delayed_work(priv->wq, &priv->neigh_reap_task,
958 arp_tbl.gc_interval); 958 arp_tbl.gc_interval);
959} 959}
960 960
@@ -1133,7 +1133,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
1133 1133
1134 /* start garbage collection */ 1134 /* start garbage collection */
1135 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1135 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1136 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, 1136 queue_delayed_work(priv->wq, &priv->neigh_reap_task,
1137 arp_tbl.gc_interval); 1137 arp_tbl.gc_interval);
1138 1138
1139 return 0; 1139 return 0;
@@ -1293,7 +1293,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1293 return 0; 1293 return 0;
1294 1294
1295out_dev_uninit: 1295out_dev_uninit:
1296 ipoib_ib_dev_cleanup(); 1296 ipoib_ib_dev_cleanup(dev);
1297 1297
1298out_tx_ring_cleanup: 1298out_tx_ring_cleanup:
1299 vfree(priv->tx_ring); 1299 vfree(priv->tx_ring);
@@ -1646,7 +1646,7 @@ register_failed:
1646 /* Stop GC if started before flush */ 1646 /* Stop GC if started before flush */
1647 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1647 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1648 cancel_delayed_work(&priv->neigh_reap_task); 1648 cancel_delayed_work(&priv->neigh_reap_task);
1649 flush_workqueue(ipoib_workqueue); 1649 flush_workqueue(priv->wq);
1650 1650
1651event_failed: 1651event_failed:
1652 ipoib_dev_cleanup(priv->dev); 1652 ipoib_dev_cleanup(priv->dev);
@@ -1717,7 +1717,7 @@ static void ipoib_remove_one(struct ib_device *device)
1717 /* Stop GC */ 1717 /* Stop GC */
1718 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1718 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1719 cancel_delayed_work(&priv->neigh_reap_task); 1719 cancel_delayed_work(&priv->neigh_reap_task);
1720 flush_workqueue(ipoib_workqueue); 1720 flush_workqueue(priv->wq);
1721 1721
1722 unregister_netdev(priv->dev); 1722 unregister_netdev(priv->dev);
1723 free_netdev(priv->dev); 1723 free_netdev(priv->dev);
@@ -1758,8 +1758,13 @@ static int __init ipoib_init_module(void)
1758 * unregister_netdev() and linkwatch_event take the rtnl lock, 1758 * unregister_netdev() and linkwatch_event take the rtnl lock,
1759 * so flush_scheduled_work() can deadlock during device 1759 * so flush_scheduled_work() can deadlock during device
1760 * removal. 1760 * removal.
1761 *
1762 * In addition, bringing one device up and another down at the
1763 * same time can deadlock a single workqueue, so we have this
1764 * global fallback workqueue, but we also attempt to open a
1765 * per device workqueue each time we bring an interface up
1761 */ 1766 */
1762 ipoib_workqueue = create_singlethread_workqueue("ipoib"); 1767 ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
1763 if (!ipoib_workqueue) { 1768 if (!ipoib_workqueue) {
1764 ret = -ENOMEM; 1769 ret = -ENOMEM;
1765 goto err_fs; 1770 goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 41325960e4e0..845f910eb214 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -388,7 +388,7 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
388 * the workqueue while holding the rtnl lock, so loop 388 * the workqueue while holding the rtnl lock, so loop
389 * on trylock until either we get the lock or we see 389 * on trylock until either we get the lock or we see
390 * FLAG_ADMIN_UP go away as that signals that we are bailing 390 * FLAG_ADMIN_UP go away as that signals that we are bailing
391 * and can safely ignore the carrier on work 391 * and can safely ignore the carrier on work.
392 */ 392 */
393 while (!rtnl_trylock()) { 393 while (!rtnl_trylock()) {
394 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 394 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
@@ -432,15 +432,14 @@ static int ipoib_mcast_join_complete(int status,
432 if (!status) { 432 if (!status) {
433 mcast->backoff = 1; 433 mcast->backoff = 1;
434 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 434 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
435 queue_delayed_work(ipoib_workqueue, 435 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
436 &priv->mcast_task, 0);
437 436
438 /* 437 /*
439 * Defer carrier on work to ipoib_workqueue to avoid a 438 * Defer carrier on work to priv->wq to avoid a
440 * deadlock on rtnl_lock here. 439 * deadlock on rtnl_lock here.
441 */ 440 */
442 if (mcast == priv->broadcast) 441 if (mcast == priv->broadcast)
443 queue_work(ipoib_workqueue, &priv->carrier_on_task); 442 queue_work(priv->wq, &priv->carrier_on_task);
444 } else { 443 } else {
445 if (mcast->logcount++ < 20) { 444 if (mcast->logcount++ < 20) {
446 if (status == -ETIMEDOUT || status == -EAGAIN) { 445 if (status == -ETIMEDOUT || status == -EAGAIN) {
@@ -465,7 +464,7 @@ out:
465 if (status == -ENETRESET) 464 if (status == -ENETRESET)
466 status = 0; 465 status = 0;
467 if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags)) 466 if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
468 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 467 queue_delayed_work(priv->wq, &priv->mcast_task,
469 mcast->backoff * HZ); 468 mcast->backoff * HZ);
470 spin_unlock_irq(&priv->lock); 469 spin_unlock_irq(&priv->lock);
471 mutex_unlock(&mcast_mutex); 470 mutex_unlock(&mcast_mutex);
@@ -535,8 +534,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
535 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 534 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
536 535
537 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 536 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
538 queue_delayed_work(ipoib_workqueue, 537 queue_delayed_work(priv->wq, &priv->mcast_task,
539 &priv->mcast_task,
540 mcast->backoff * HZ); 538 mcast->backoff * HZ);
541 } 539 }
542 mutex_unlock(&mcast_mutex); 540 mutex_unlock(&mcast_mutex);
@@ -576,8 +574,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
576 ipoib_warn(priv, "failed to allocate broadcast group\n"); 574 ipoib_warn(priv, "failed to allocate broadcast group\n");
577 mutex_lock(&mcast_mutex); 575 mutex_lock(&mcast_mutex);
578 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 576 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
579 queue_delayed_work(ipoib_workqueue, 577 queue_delayed_work(priv->wq, &priv->mcast_task,
580 &priv->mcast_task, HZ); 578 HZ);
581 mutex_unlock(&mcast_mutex); 579 mutex_unlock(&mcast_mutex);
582 return; 580 return;
583 } 581 }
@@ -644,7 +642,7 @@ int ipoib_mcast_start_thread(struct net_device *dev)
644 642
645 mutex_lock(&mcast_mutex); 643 mutex_lock(&mcast_mutex);
646 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 644 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
647 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); 645 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
648 mutex_unlock(&mcast_mutex); 646 mutex_unlock(&mcast_mutex);
649 647
650 return 0; 648 return 0;
@@ -662,7 +660,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
662 mutex_unlock(&mcast_mutex); 660 mutex_unlock(&mcast_mutex);
663 661
664 if (flush) 662 if (flush)
665 flush_workqueue(ipoib_workqueue); 663 flush_workqueue(priv->wq);
666 664
667 return 0; 665 return 0;
668} 666}
@@ -729,7 +727,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
729 __ipoib_mcast_add(dev, mcast); 727 __ipoib_mcast_add(dev, mcast);
730 list_add_tail(&mcast->list, &priv->multicast_list); 728 list_add_tail(&mcast->list, &priv->multicast_list);
731 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 729 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
732 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); 730 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
733 } 731 }
734 732
735 if (!mcast->ah) { 733 if (!mcast->ah) {
@@ -944,7 +942,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
944 * completes. So do like the carrier on task and attempt to 942 * completes. So do like the carrier on task and attempt to
945 * take the rtnl lock, but if we can't before the ADMIN_UP flag 943 * take the rtnl lock, but if we can't before the ADMIN_UP flag
946 * goes away, then just return and know that the remove list will 944 * goes away, then just return and know that the remove list will
947 * get flushed later by mcast_dev_flush. 945 * get flushed later by mcast_stop_thread.
948 */ 946 */
949 while (!rtnl_trylock()) { 947 while (!rtnl_trylock()) {
950 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 948 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index c56d5d44c53b..b72a753eb41d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -145,10 +145,20 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
145 int ret, size; 145 int ret, size;
146 int i; 146 int i;
147 147
148 /*
149 * the various IPoIB tasks assume they will never race against
150 * themselves, so always use a single thread workqueue
151 */
152 priv->wq = create_singlethread_workqueue("ipoib_wq");
153 if (!priv->wq) {
154 printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
155 return -ENODEV;
156 }
157
148 priv->pd = ib_alloc_pd(priv->ca); 158 priv->pd = ib_alloc_pd(priv->ca);
149 if (IS_ERR(priv->pd)) { 159 if (IS_ERR(priv->pd)) {
150 printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); 160 printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
151 return -ENODEV; 161 goto out_free_wq;
152 } 162 }
153 163
154 priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); 164 priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
@@ -242,6 +252,10 @@ out_free_mr:
242 252
243out_free_pd: 253out_free_pd:
244 ib_dealloc_pd(priv->pd); 254 ib_dealloc_pd(priv->pd);
255
256out_free_wq:
257 destroy_workqueue(priv->wq);
258 priv->wq = NULL;
245 return -ENODEV; 259 return -ENODEV;
246} 260}
247 261
@@ -270,6 +284,12 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
270 284
271 if (ib_dealloc_pd(priv->pd)) 285 if (ib_dealloc_pd(priv->pd))
272 ipoib_warn(priv, "ib_dealloc_pd failed\n"); 286 ipoib_warn(priv, "ib_dealloc_pd failed\n");
287
288 if (priv->wq) {
289 flush_workqueue(priv->wq);
290 destroy_workqueue(priv->wq);
291 priv->wq = NULL;
292 }
273} 293}
274 294
275void ipoib_event(struct ib_event_handler *handler, 295void ipoib_event(struct ib_event_handler *handler,