aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-03 23:12:57 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-03 23:12:57 -0500
commitdc6d6844111d953d3fa2121da28d38be9359c911 (patch)
treeb09f7d9d3d0b09e338be3a459b7163ba7ed4a53a
parent59acf65776f8720d5f7c4efec545d1ecec6abca3 (diff)
parentecb7b1233c532c60fa7a8927a43944e69f6e6b1a (diff)
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull infiniband reverts from Roland Dreier: "Last minute InfiniBand/RDMA changes for 3.19: - Revert IPoIB driver back to 3.18 state. We had a number of fixes go into 3.19, but they introduced regressions. We tried to get everything fixed up but ran out of time, so we'll try again for 3.20. - Similarly, turn off the new "extended query port" verb. Late in the cycle we realized the ABI is not quite right, and rather than freeze something in a rush and make a mistake, we'll take a bit more time and get it right in 3.20" * tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: IB/core: Temporarily disable ex_query_device uverb Revert "IPoIB: Consolidate rtnl_lock tasks in workqueue" Revert "IPoIB: Make the carrier_on_task race aware" Revert "IPoIB: fix MCAST_FLAG_BUSY usage" Revert "IPoIB: fix mcast_dev_flush/mcast_restart_task race" Revert "IPoIB: change init sequence ordering" Revert "IPoIB: Use dedicated workqueues per interface" Revert "IPoIB: Make ipoib_mcast_stop_thread flush the workqueue" Revert "IPoIB: No longer use flush as a parameter"
-rw-r--r--drivers/infiniband/core/uverbs_main.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h19
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c49
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c239
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c22
7 files changed, 134 insertions, 241 deletions
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index e6c23b9eab33..5db1a8cc388d 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -123,7 +123,6 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
123 struct ib_udata *uhw) = { 123 struct ib_udata *uhw) = {
124 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, 124 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
125 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, 125 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
126 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device
127}; 126};
128 127
129static void ib_uverbs_add_one(struct ib_device *device); 128static void ib_uverbs_add_one(struct ib_device *device);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 8ba80a6d3a46..d7562beb5423 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -98,15 +98,9 @@ enum {
98 98
99 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ 99 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
100 IPOIB_MCAST_FLAG_SENDONLY = 1, 100 IPOIB_MCAST_FLAG_SENDONLY = 1,
101 /* 101 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
102 * For IPOIB_MCAST_FLAG_BUSY
103 * When set, in flight join and mcast->mc is unreliable
104 * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
105 * haven't started yet
106 * When clear and mcast->mc is valid pointer, join was successful
107 */
108 IPOIB_MCAST_FLAG_BUSY = 2,
109 IPOIB_MCAST_FLAG_ATTACHED = 3, 102 IPOIB_MCAST_FLAG_ATTACHED = 3,
103 IPOIB_MCAST_JOIN_STARTED = 4,
110 104
111 MAX_SEND_CQE = 16, 105 MAX_SEND_CQE = 16,
112 IPOIB_CM_COPYBREAK = 256, 106 IPOIB_CM_COPYBREAK = 256,
@@ -323,7 +317,6 @@ struct ipoib_dev_priv {
323 struct list_head multicast_list; 317 struct list_head multicast_list;
324 struct rb_root multicast_tree; 318 struct rb_root multicast_tree;
325 319
326 struct workqueue_struct *wq;
327 struct delayed_work mcast_task; 320 struct delayed_work mcast_task;
328 struct work_struct carrier_on_task; 321 struct work_struct carrier_on_task;
329 struct work_struct flush_light; 322 struct work_struct flush_light;
@@ -484,10 +477,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
484void ipoib_pkey_event(struct work_struct *work); 477void ipoib_pkey_event(struct work_struct *work);
485void ipoib_ib_dev_cleanup(struct net_device *dev); 478void ipoib_ib_dev_cleanup(struct net_device *dev);
486 479
487int ipoib_ib_dev_open(struct net_device *dev); 480int ipoib_ib_dev_open(struct net_device *dev, int flush);
488int ipoib_ib_dev_up(struct net_device *dev); 481int ipoib_ib_dev_up(struct net_device *dev);
489int ipoib_ib_dev_down(struct net_device *dev); 482int ipoib_ib_dev_down(struct net_device *dev, int flush);
490int ipoib_ib_dev_stop(struct net_device *dev); 483int ipoib_ib_dev_stop(struct net_device *dev, int flush);
491void ipoib_pkey_dev_check_presence(struct net_device *dev); 484void ipoib_pkey_dev_check_presence(struct net_device *dev);
492 485
493int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 486int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -499,7 +492,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
499 492
500void ipoib_mcast_restart_task(struct work_struct *work); 493void ipoib_mcast_restart_task(struct work_struct *work);
501int ipoib_mcast_start_thread(struct net_device *dev); 494int ipoib_mcast_start_thread(struct net_device *dev);
502int ipoib_mcast_stop_thread(struct net_device *dev); 495int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
503 496
504void ipoib_mcast_dev_down(struct net_device *dev); 497void ipoib_mcast_dev_down(struct net_device *dev);
505void ipoib_mcast_dev_flush(struct net_device *dev); 498void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 56959adb6c7d..933efcea0d03 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
474 } 474 }
475 475
476 spin_lock_irq(&priv->lock); 476 spin_lock_irq(&priv->lock);
477 queue_delayed_work(priv->wq, 477 queue_delayed_work(ipoib_workqueue,
478 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 478 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
479 /* Add this entry to passive ids list head, but do not re-add it 479 /* Add this entry to passive ids list head, but do not re-add it
480 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ 480 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
576 spin_lock_irqsave(&priv->lock, flags); 576 spin_lock_irqsave(&priv->lock, flags);
577 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 577 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
578 ipoib_cm_start_rx_drain(priv); 578 ipoib_cm_start_rx_drain(priv);
579 queue_work(priv->wq, &priv->cm.rx_reap_task); 579 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
580 spin_unlock_irqrestore(&priv->lock, flags); 580 spin_unlock_irqrestore(&priv->lock, flags);
581 } else 581 } else
582 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 582 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
603 spin_lock_irqsave(&priv->lock, flags); 603 spin_lock_irqsave(&priv->lock, flags);
604 list_move(&p->list, &priv->cm.rx_reap_list); 604 list_move(&p->list, &priv->cm.rx_reap_list);
605 spin_unlock_irqrestore(&priv->lock, flags); 605 spin_unlock_irqrestore(&priv->lock, flags);
606 queue_work(priv->wq, &priv->cm.rx_reap_task); 606 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
607 } 607 }
608 return; 608 return;
609 } 609 }
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
827 827
828 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 828 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
829 list_move(&tx->list, &priv->cm.reap_list); 829 list_move(&tx->list, &priv->cm.reap_list);
830 queue_work(priv->wq, &priv->cm.reap_task); 830 queue_work(ipoib_workqueue, &priv->cm.reap_task);
831 } 831 }
832 832
833 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); 833 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
1255 1255
1256 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1256 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1257 list_move(&tx->list, &priv->cm.reap_list); 1257 list_move(&tx->list, &priv->cm.reap_list);
1258 queue_work(priv->wq, &priv->cm.reap_task); 1258 queue_work(ipoib_workqueue, &priv->cm.reap_task);
1259 } 1259 }
1260 1260
1261 spin_unlock_irqrestore(&priv->lock, flags); 1261 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
1284 tx->dev = dev; 1284 tx->dev = dev;
1285 list_add(&tx->list, &priv->cm.start_list); 1285 list_add(&tx->list, &priv->cm.start_list);
1286 set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); 1286 set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
1287 queue_work(priv->wq, &priv->cm.start_task); 1287 queue_work(ipoib_workqueue, &priv->cm.start_task);
1288 return tx; 1288 return tx;
1289} 1289}
1290 1290
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
1295 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1295 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1296 spin_lock_irqsave(&priv->lock, flags); 1296 spin_lock_irqsave(&priv->lock, flags);
1297 list_move(&tx->list, &priv->cm.reap_list); 1297 list_move(&tx->list, &priv->cm.reap_list);
1298 queue_work(priv->wq, &priv->cm.reap_task); 1298 queue_work(ipoib_workqueue, &priv->cm.reap_task);
1299 ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1299 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
1300 tx->neigh->daddr + 4); 1300 tx->neigh->daddr + 4);
1301 tx->neigh = NULL; 1301 tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1417 1417
1418 skb_queue_tail(&priv->cm.skb_queue, skb); 1418 skb_queue_tail(&priv->cm.skb_queue, skb);
1419 if (e) 1419 if (e)
1420 queue_work(priv->wq, &priv->cm.skb_task); 1420 queue_work(ipoib_workqueue, &priv->cm.skb_task);
1421} 1421}
1422 1422
1423static void ipoib_cm_rx_reap(struct work_struct *work) 1423static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
1450 } 1450 }
1451 1451
1452 if (!list_empty(&priv->cm.passive_ids)) 1452 if (!list_empty(&priv->cm.passive_ids))
1453 queue_delayed_work(priv->wq, 1453 queue_delayed_work(ipoib_workqueue,
1454 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 1454 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
1455 spin_unlock_irq(&priv->lock); 1455 spin_unlock_irq(&priv->lock);
1456} 1456}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index fe65abb5150c..72626c348174 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -655,7 +655,7 @@ void ipoib_reap_ah(struct work_struct *work)
655 __ipoib_reap_ah(dev); 655 __ipoib_reap_ah(dev);
656 656
657 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) 657 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
658 queue_delayed_work(priv->wq, &priv->ah_reap_task, 658 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
659 round_jiffies_relative(HZ)); 659 round_jiffies_relative(HZ));
660} 660}
661 661
@@ -664,7 +664,7 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
664 drain_tx_cq((struct net_device *)ctx); 664 drain_tx_cq((struct net_device *)ctx);
665} 665}
666 666
667int ipoib_ib_dev_open(struct net_device *dev) 667int ipoib_ib_dev_open(struct net_device *dev, int flush)
668{ 668{
669 struct ipoib_dev_priv *priv = netdev_priv(dev); 669 struct ipoib_dev_priv *priv = netdev_priv(dev);
670 int ret; 670 int ret;
@@ -696,7 +696,7 @@ int ipoib_ib_dev_open(struct net_device *dev)
696 } 696 }
697 697
698 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 698 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
699 queue_delayed_work(priv->wq, &priv->ah_reap_task, 699 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
700 round_jiffies_relative(HZ)); 700 round_jiffies_relative(HZ));
701 701
702 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 702 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +706,7 @@ int ipoib_ib_dev_open(struct net_device *dev)
706dev_stop: 706dev_stop:
707 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 707 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
708 napi_enable(&priv->napi); 708 napi_enable(&priv->napi);
709 ipoib_ib_dev_stop(dev); 709 ipoib_ib_dev_stop(dev, flush);
710 return -1; 710 return -1;
711} 711}
712 712
@@ -738,7 +738,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
738 return ipoib_mcast_start_thread(dev); 738 return ipoib_mcast_start_thread(dev);
739} 739}
740 740
741int ipoib_ib_dev_down(struct net_device *dev) 741int ipoib_ib_dev_down(struct net_device *dev, int flush)
742{ 742{
743 struct ipoib_dev_priv *priv = netdev_priv(dev); 743 struct ipoib_dev_priv *priv = netdev_priv(dev);
744 744
@@ -747,7 +747,7 @@ int ipoib_ib_dev_down(struct net_device *dev)
747 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 747 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
748 netif_carrier_off(dev); 748 netif_carrier_off(dev);
749 749
750 ipoib_mcast_stop_thread(dev); 750 ipoib_mcast_stop_thread(dev, flush);
751 ipoib_mcast_dev_flush(dev); 751 ipoib_mcast_dev_flush(dev);
752 752
753 ipoib_flush_paths(dev); 753 ipoib_flush_paths(dev);
@@ -807,7 +807,7 @@ void ipoib_drain_cq(struct net_device *dev)
807 local_bh_enable(); 807 local_bh_enable();
808} 808}
809 809
810int ipoib_ib_dev_stop(struct net_device *dev) 810int ipoib_ib_dev_stop(struct net_device *dev, int flush)
811{ 811{
812 struct ipoib_dev_priv *priv = netdev_priv(dev); 812 struct ipoib_dev_priv *priv = netdev_priv(dev);
813 struct ib_qp_attr qp_attr; 813 struct ib_qp_attr qp_attr;
@@ -880,7 +880,8 @@ timeout:
880 /* Wait for all AHs to be reaped */ 880 /* Wait for all AHs to be reaped */
881 set_bit(IPOIB_STOP_REAPER, &priv->flags); 881 set_bit(IPOIB_STOP_REAPER, &priv->flags);
882 cancel_delayed_work(&priv->ah_reap_task); 882 cancel_delayed_work(&priv->ah_reap_task);
883 flush_workqueue(priv->wq); 883 if (flush)
884 flush_workqueue(ipoib_workqueue);
884 885
885 begin = jiffies; 886 begin = jiffies;
886 887
@@ -917,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
917 (unsigned long) dev); 918 (unsigned long) dev);
918 919
919 if (dev->flags & IFF_UP) { 920 if (dev->flags & IFF_UP) {
920 if (ipoib_ib_dev_open(dev)) { 921 if (ipoib_ib_dev_open(dev, 1)) {
921 ipoib_transport_dev_cleanup(dev); 922 ipoib_transport_dev_cleanup(dev);
922 return -ENODEV; 923 return -ENODEV;
923 } 924 }
@@ -1039,12 +1040,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1039 } 1040 }
1040 1041
1041 if (level >= IPOIB_FLUSH_NORMAL) 1042 if (level >= IPOIB_FLUSH_NORMAL)
1042 ipoib_ib_dev_down(dev); 1043 ipoib_ib_dev_down(dev, 0);
1043 1044
1044 if (level == IPOIB_FLUSH_HEAVY) { 1045 if (level == IPOIB_FLUSH_HEAVY) {
1045 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 1046 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
1046 ipoib_ib_dev_stop(dev); 1047 ipoib_ib_dev_stop(dev, 0);
1047 if (ipoib_ib_dev_open(dev) != 0) 1048 if (ipoib_ib_dev_open(dev, 0) != 0)
1048 return; 1049 return;
1049 if (netif_queue_stopped(dev)) 1050 if (netif_queue_stopped(dev))
1050 netif_start_queue(dev); 1051 netif_start_queue(dev);
@@ -1096,7 +1097,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
1096 */ 1097 */
1097 ipoib_flush_paths(dev); 1098 ipoib_flush_paths(dev);
1098 1099
1099 ipoib_mcast_stop_thread(dev); 1100 ipoib_mcast_stop_thread(dev, 1);
1100 ipoib_mcast_dev_flush(dev); 1101 ipoib_mcast_dev_flush(dev);
1101 1102
1102 ipoib_transport_dev_cleanup(dev); 1103 ipoib_transport_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6bad17d4d588..58b5aa3b6f2d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
108 108
109 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 109 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
110 110
111 if (ipoib_ib_dev_open(dev)) { 111 if (ipoib_ib_dev_open(dev, 1)) {
112 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) 112 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
113 return 0; 113 return 0;
114 goto err_disable; 114 goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
139 return 0; 139 return 0;
140 140
141err_stop: 141err_stop:
142 ipoib_ib_dev_stop(dev); 142 ipoib_ib_dev_stop(dev, 1);
143 143
144err_disable: 144err_disable:
145 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 145 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
157 157
158 netif_stop_queue(dev); 158 netif_stop_queue(dev);
159 159
160 ipoib_ib_dev_down(dev); 160 ipoib_ib_dev_down(dev, 1);
161 ipoib_ib_dev_stop(dev); 161 ipoib_ib_dev_stop(dev, 0);
162 162
163 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 163 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
164 struct ipoib_dev_priv *cpriv; 164 struct ipoib_dev_priv *cpriv;
@@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
839 return; 839 return;
840 } 840 }
841 841
842 queue_work(priv->wq, &priv->restart_task); 842 queue_work(ipoib_workqueue, &priv->restart_task);
843} 843}
844 844
845static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) 845static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +954,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
954 __ipoib_reap_neigh(priv); 954 __ipoib_reap_neigh(priv);
955 955
956 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 956 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
957 queue_delayed_work(priv->wq, &priv->neigh_reap_task, 957 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
958 arp_tbl.gc_interval); 958 arp_tbl.gc_interval);
959} 959}
960 960
@@ -1133,7 +1133,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
1133 1133
1134 /* start garbage collection */ 1134 /* start garbage collection */
1135 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1135 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1136 queue_delayed_work(priv->wq, &priv->neigh_reap_task, 1136 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
1137 arp_tbl.gc_interval); 1137 arp_tbl.gc_interval);
1138 1138
1139 return 0; 1139 return 0;
@@ -1262,13 +1262,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1262{ 1262{
1263 struct ipoib_dev_priv *priv = netdev_priv(dev); 1263 struct ipoib_dev_priv *priv = netdev_priv(dev);
1264 1264
1265 if (ipoib_neigh_hash_init(priv) < 0)
1266 goto out;
1265 /* Allocate RX/TX "rings" to hold queued skbs */ 1267 /* Allocate RX/TX "rings" to hold queued skbs */
1266 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 1268 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
1267 GFP_KERNEL); 1269 GFP_KERNEL);
1268 if (!priv->rx_ring) { 1270 if (!priv->rx_ring) {
1269 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 1271 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
1270 ca->name, ipoib_recvq_size); 1272 ca->name, ipoib_recvq_size);
1271 goto out; 1273 goto out_neigh_hash_cleanup;
1272 } 1274 }
1273 1275
1274 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); 1276 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1283,24 +1285,16 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1283 if (ipoib_ib_dev_init(dev, ca, port)) 1285 if (ipoib_ib_dev_init(dev, ca, port))
1284 goto out_tx_ring_cleanup; 1286 goto out_tx_ring_cleanup;
1285 1287
1286 /*
1287 * Must be after ipoib_ib_dev_init so we can allocate a per
1288 * device wq there and use it here
1289 */
1290 if (ipoib_neigh_hash_init(priv) < 0)
1291 goto out_dev_uninit;
1292
1293 return 0; 1288 return 0;
1294 1289
1295out_dev_uninit:
1296 ipoib_ib_dev_cleanup(dev);
1297
1298out_tx_ring_cleanup: 1290out_tx_ring_cleanup:
1299 vfree(priv->tx_ring); 1291 vfree(priv->tx_ring);
1300 1292
1301out_rx_ring_cleanup: 1293out_rx_ring_cleanup:
1302 kfree(priv->rx_ring); 1294 kfree(priv->rx_ring);
1303 1295
1296out_neigh_hash_cleanup:
1297 ipoib_neigh_hash_uninit(dev);
1304out: 1298out:
1305 return -ENOMEM; 1299 return -ENOMEM;
1306} 1300}
@@ -1323,12 +1317,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
1323 } 1317 }
1324 unregister_netdevice_many(&head); 1318 unregister_netdevice_many(&head);
1325 1319
1326 /*
1327 * Must be before ipoib_ib_dev_cleanup or we delete an in use
1328 * work queue
1329 */
1330 ipoib_neigh_hash_uninit(dev);
1331
1332 ipoib_ib_dev_cleanup(dev); 1320 ipoib_ib_dev_cleanup(dev);
1333 1321
1334 kfree(priv->rx_ring); 1322 kfree(priv->rx_ring);
@@ -1336,6 +1324,8 @@ void ipoib_dev_cleanup(struct net_device *dev)
1336 1324
1337 priv->rx_ring = NULL; 1325 priv->rx_ring = NULL;
1338 priv->tx_ring = NULL; 1326 priv->tx_ring = NULL;
1327
1328 ipoib_neigh_hash_uninit(dev);
1339} 1329}
1340 1330
1341static const struct header_ops ipoib_header_ops = { 1331static const struct header_ops ipoib_header_ops = {
@@ -1646,7 +1636,7 @@ register_failed:
1646 /* Stop GC if started before flush */ 1636 /* Stop GC if started before flush */
1647 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1637 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1648 cancel_delayed_work(&priv->neigh_reap_task); 1638 cancel_delayed_work(&priv->neigh_reap_task);
1649 flush_workqueue(priv->wq); 1639 flush_workqueue(ipoib_workqueue);
1650 1640
1651event_failed: 1641event_failed:
1652 ipoib_dev_cleanup(priv->dev); 1642 ipoib_dev_cleanup(priv->dev);
@@ -1717,7 +1707,7 @@ static void ipoib_remove_one(struct ib_device *device)
1717 /* Stop GC */ 1707 /* Stop GC */
1718 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1708 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1719 cancel_delayed_work(&priv->neigh_reap_task); 1709 cancel_delayed_work(&priv->neigh_reap_task);
1720 flush_workqueue(priv->wq); 1710 flush_workqueue(ipoib_workqueue);
1721 1711
1722 unregister_netdev(priv->dev); 1712 unregister_netdev(priv->dev);
1723 free_netdev(priv->dev); 1713 free_netdev(priv->dev);
@@ -1758,13 +1748,8 @@ static int __init ipoib_init_module(void)
1758 * unregister_netdev() and linkwatch_event take the rtnl lock, 1748 * unregister_netdev() and linkwatch_event take the rtnl lock,
1759 * so flush_scheduled_work() can deadlock during device 1749 * so flush_scheduled_work() can deadlock during device
1760 * removal. 1750 * removal.
1761 *
1762 * In addition, bringing one device up and another down at the
1763 * same time can deadlock a single workqueue, so we have this
1764 * global fallback workqueue, but we also attempt to open a
1765 * per device workqueue each time we bring an interface up
1766 */ 1751 */
1767 ipoib_workqueue = create_singlethread_workqueue("ipoib_flush"); 1752 ipoib_workqueue = create_singlethread_workqueue("ipoib");
1768 if (!ipoib_workqueue) { 1753 if (!ipoib_workqueue) {
1769 ret = -ENOMEM; 1754 ret = -ENOMEM;
1770 goto err_fs; 1755 goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index bc50dd0d0e4d..ffb83b5f7e80 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -190,6 +190,12 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
190 spin_unlock_irq(&priv->lock); 190 spin_unlock_irq(&priv->lock);
191 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 191 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
192 set_qkey = 1; 192 set_qkey = 1;
193
194 if (!ipoib_cm_admin_enabled(dev)) {
195 rtnl_lock();
196 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
197 rtnl_unlock();
198 }
193 } 199 }
194 200
195 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 201 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -271,27 +277,16 @@ ipoib_mcast_sendonly_join_complete(int status,
271 struct ipoib_mcast *mcast = multicast->context; 277 struct ipoib_mcast *mcast = multicast->context;
272 struct net_device *dev = mcast->dev; 278 struct net_device *dev = mcast->dev;
273 279
274 /*
275 * We have to take the mutex to force mcast_sendonly_join to
276 * return from ib_sa_multicast_join and set mcast->mc to a
277 * valid value. Otherwise we were racing with ourselves in
278 * that we might fail here, but get a valid return from
279 * ib_sa_multicast_join after we had cleared mcast->mc here,
280 * resulting in mis-matched joins and leaves and a deadlock
281 */
282 mutex_lock(&mcast_mutex);
283
284 /* We trap for port events ourselves. */ 280 /* We trap for port events ourselves. */
285 if (status == -ENETRESET) 281 if (status == -ENETRESET)
286 goto out; 282 return 0;
287 283
288 if (!status) 284 if (!status)
289 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 285 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
290 286
291 if (status) { 287 if (status) {
292 if (mcast->logcount++ < 20) 288 if (mcast->logcount++ < 20)
293 ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast " 289 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
294 "join failed for %pI6, status %d\n",
295 mcast->mcmember.mgid.raw, status); 290 mcast->mcmember.mgid.raw, status);
296 291
297 /* Flush out any queued packets */ 292 /* Flush out any queued packets */
@@ -301,15 +296,11 @@ ipoib_mcast_sendonly_join_complete(int status,
301 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); 296 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
302 } 297 }
303 netif_tx_unlock_bh(dev); 298 netif_tx_unlock_bh(dev);
299
300 /* Clear the busy flag so we try again */
301 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
302 &mcast->flags);
304 } 303 }
305out:
306 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
307 if (status)
308 mcast->mc = NULL;
309 complete(&mcast->done);
310 if (status == -ENETRESET)
311 status = 0;
312 mutex_unlock(&mcast_mutex);
313 return status; 304 return status;
314} 305}
315 306
@@ -327,14 +318,12 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
327 int ret = 0; 318 int ret = 0;
328 319
329 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 320 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
330 ipoib_dbg_mcast(priv, "device shutting down, no sendonly " 321 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
331 "multicast joins\n");
332 return -ENODEV; 322 return -ENODEV;
333 } 323 }
334 324
335 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { 325 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
336 ipoib_dbg_mcast(priv, "multicast entry busy, skipping " 326 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
337 "sendonly join\n");
338 return -EBUSY; 327 return -EBUSY;
339 } 328 }
340 329
@@ -342,9 +331,6 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
342 rec.port_gid = priv->local_gid; 331 rec.port_gid = priv->local_gid;
343 rec.pkey = cpu_to_be16(priv->pkey); 332 rec.pkey = cpu_to_be16(priv->pkey);
344 333
345 mutex_lock(&mcast_mutex);
346 init_completion(&mcast->done);
347 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
348 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, 334 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
349 priv->port, &rec, 335 priv->port, &rec,
350 IB_SA_MCMEMBER_REC_MGID | 336 IB_SA_MCMEMBER_REC_MGID |
@@ -357,14 +343,12 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
357 if (IS_ERR(mcast->mc)) { 343 if (IS_ERR(mcast->mc)) {
358 ret = PTR_ERR(mcast->mc); 344 ret = PTR_ERR(mcast->mc);
359 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 345 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
360 complete(&mcast->done); 346 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
361 ipoib_warn(priv, "ib_sa_join_multicast for sendonly join " 347 ret);
362 "failed (ret = %d)\n", ret);
363 } else { 348 } else {
364 ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting " 349 ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
365 "sendonly join\n", mcast->mcmember.mgid.raw); 350 mcast->mcmember.mgid.raw);
366 } 351 }
367 mutex_unlock(&mcast_mutex);
368 352
369 return ret; 353 return ret;
370} 354}
@@ -375,29 +359,18 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
375 carrier_on_task); 359 carrier_on_task);
376 struct ib_port_attr attr; 360 struct ib_port_attr attr;
377 361
362 /*
363 * Take rtnl_lock to avoid racing with ipoib_stop() and
364 * turning the carrier back on while a device is being
365 * removed.
366 */
378 if (ib_query_port(priv->ca, priv->port, &attr) || 367 if (ib_query_port(priv->ca, priv->port, &attr) ||
379 attr.state != IB_PORT_ACTIVE) { 368 attr.state != IB_PORT_ACTIVE) {
380 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); 369 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
381 return; 370 return;
382 } 371 }
383 372
384 /* 373 rtnl_lock();
385 * Take rtnl_lock to avoid racing with ipoib_stop() and
386 * turning the carrier back on while a device is being
387 * removed. However, ipoib_stop() will attempt to flush
388 * the workqueue while holding the rtnl lock, so loop
389 * on trylock until either we get the lock or we see
390 * FLAG_ADMIN_UP go away as that signals that we are bailing
391 * and can safely ignore the carrier on work.
392 */
393 while (!rtnl_trylock()) {
394 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
395 return;
396 else
397 msleep(20);
398 }
399 if (!ipoib_cm_admin_enabled(priv->dev))
400 dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
401 netif_carrier_on(priv->dev); 374 netif_carrier_on(priv->dev);
402 rtnl_unlock(); 375 rtnl_unlock();
403} 376}
@@ -412,63 +385,60 @@ static int ipoib_mcast_join_complete(int status,
412 ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", 385 ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
413 mcast->mcmember.mgid.raw, status); 386 mcast->mcmember.mgid.raw, status);
414 387
415 /*
416 * We have to take the mutex to force mcast_join to
417 * return from ib_sa_multicast_join and set mcast->mc to a
418 * valid value. Otherwise we were racing with ourselves in
419 * that we might fail here, but get a valid return from
420 * ib_sa_multicast_join after we had cleared mcast->mc here,
421 * resulting in mis-matched joins and leaves and a deadlock
422 */
423 mutex_lock(&mcast_mutex);
424
425 /* We trap for port events ourselves. */ 388 /* We trap for port events ourselves. */
426 if (status == -ENETRESET) 389 if (status == -ENETRESET) {
390 status = 0;
427 goto out; 391 goto out;
392 }
428 393
429 if (!status) 394 if (!status)
430 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 395 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
431 396
432 if (!status) { 397 if (!status) {
433 mcast->backoff = 1; 398 mcast->backoff = 1;
399 mutex_lock(&mcast_mutex);
434 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 400 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
435 queue_delayed_work(priv->wq, &priv->mcast_task, 0); 401 queue_delayed_work(ipoib_workqueue,
402 &priv->mcast_task, 0);
403 mutex_unlock(&mcast_mutex);
436 404
437 /* 405 /*
438 * Defer carrier on work to priv->wq to avoid a 406 * Defer carrier on work to ipoib_workqueue to avoid a
439 * deadlock on rtnl_lock here. 407 * deadlock on rtnl_lock here.
440 */ 408 */
441 if (mcast == priv->broadcast) 409 if (mcast == priv->broadcast)
442 queue_work(priv->wq, &priv->carrier_on_task); 410 queue_work(ipoib_workqueue, &priv->carrier_on_task);
443 } else {
444 if (mcast->logcount++ < 20) {
445 if (status == -ETIMEDOUT || status == -EAGAIN) {
446 ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
447 mcast->mcmember.mgid.raw, status);
448 } else {
449 ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
450 mcast->mcmember.mgid.raw, status);
451 }
452 }
453 411
454 mcast->backoff *= 2; 412 status = 0;
455 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 413 goto out;
456 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
457 } 414 }
458out: 415
416 if (mcast->logcount++ < 20) {
417 if (status == -ETIMEDOUT || status == -EAGAIN) {
418 ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
419 mcast->mcmember.mgid.raw, status);
420 } else {
421 ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
422 mcast->mcmember.mgid.raw, status);
423 }
424 }
425
426 mcast->backoff *= 2;
427 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
428 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
429
430 /* Clear the busy flag so we try again */
431 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
432
433 mutex_lock(&mcast_mutex);
459 spin_lock_irq(&priv->lock); 434 spin_lock_irq(&priv->lock);
460 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 435 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
461 if (status) 436 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
462 mcast->mc = NULL;
463 complete(&mcast->done);
464 if (status == -ENETRESET)
465 status = 0;
466 if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
467 queue_delayed_work(priv->wq, &priv->mcast_task,
468 mcast->backoff * HZ); 437 mcast->backoff * HZ);
469 spin_unlock_irq(&priv->lock); 438 spin_unlock_irq(&priv->lock);
470 mutex_unlock(&mcast_mutex); 439 mutex_unlock(&mcast_mutex);
471 440out:
441 complete(&mcast->done);
472 return status; 442 return status;
473} 443}
474 444
@@ -517,9 +487,10 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
517 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 487 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
518 } 488 }
519 489
520 mutex_lock(&mcast_mutex);
521 init_completion(&mcast->done);
522 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 490 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
491 init_completion(&mcast->done);
492 set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
493
523 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, 494 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
524 &rec, comp_mask, GFP_KERNEL, 495 &rec, comp_mask, GFP_KERNEL,
525 ipoib_mcast_join_complete, mcast); 496 ipoib_mcast_join_complete, mcast);
@@ -533,11 +504,13 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
533 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 504 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
534 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 505 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
535 506
507 mutex_lock(&mcast_mutex);
536 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 508 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
537 queue_delayed_work(priv->wq, &priv->mcast_task, 509 queue_delayed_work(ipoib_workqueue,
510 &priv->mcast_task,
538 mcast->backoff * HZ); 511 mcast->backoff * HZ);
512 mutex_unlock(&mcast_mutex);
539 } 513 }
540 mutex_unlock(&mcast_mutex);
541} 514}
542 515
543void ipoib_mcast_join_task(struct work_struct *work) 516void ipoib_mcast_join_task(struct work_struct *work)
@@ -574,8 +547,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
574 ipoib_warn(priv, "failed to allocate broadcast group\n"); 547 ipoib_warn(priv, "failed to allocate broadcast group\n");
575 mutex_lock(&mcast_mutex); 548 mutex_lock(&mcast_mutex);
576 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 549 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
577 queue_delayed_work(priv->wq, &priv->mcast_task, 550 queue_delayed_work(ipoib_workqueue,
578 HZ); 551 &priv->mcast_task, HZ);
579 mutex_unlock(&mcast_mutex); 552 mutex_unlock(&mcast_mutex);
580 return; 553 return;
581 } 554 }
@@ -590,8 +563,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
590 } 563 }
591 564
592 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 565 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
593 if (IS_ERR_OR_NULL(priv->broadcast->mc) && 566 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
594 !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
595 ipoib_mcast_join(dev, priv->broadcast, 0); 567 ipoib_mcast_join(dev, priv->broadcast, 0);
596 return; 568 return;
597 } 569 }
@@ -599,33 +571,23 @@ void ipoib_mcast_join_task(struct work_struct *work)
599 while (1) { 571 while (1) {
600 struct ipoib_mcast *mcast = NULL; 572 struct ipoib_mcast *mcast = NULL;
601 573
602 /*
603 * Need the mutex so our flags are consistent, need the
604 * priv->lock so we don't race with list removals in either
605 * mcast_dev_flush or mcast_restart_task
606 */
607 mutex_lock(&mcast_mutex);
608 spin_lock_irq(&priv->lock); 574 spin_lock_irq(&priv->lock);
609 list_for_each_entry(mcast, &priv->multicast_list, list) { 575 list_for_each_entry(mcast, &priv->multicast_list, list) {
610 if (IS_ERR_OR_NULL(mcast->mc) && 576 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
611 !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && 577 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
612 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 578 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
613 /* Found the next unjoined group */ 579 /* Found the next unjoined group */
614 break; 580 break;
615 } 581 }
616 } 582 }
617 spin_unlock_irq(&priv->lock); 583 spin_unlock_irq(&priv->lock);
618 mutex_unlock(&mcast_mutex);
619 584
620 if (&mcast->list == &priv->multicast_list) { 585 if (&mcast->list == &priv->multicast_list) {
621 /* All done */ 586 /* All done */
622 break; 587 break;
623 } 588 }
624 589
625 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 590 ipoib_mcast_join(dev, mcast, 1);
626 ipoib_mcast_sendonly_join(mcast);
627 else
628 ipoib_mcast_join(dev, mcast, 1);
629 return; 591 return;
630 } 592 }
631 593
@@ -642,13 +604,13 @@ int ipoib_mcast_start_thread(struct net_device *dev)
642 604
643 mutex_lock(&mcast_mutex); 605 mutex_lock(&mcast_mutex);
644 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 606 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
645 queue_delayed_work(priv->wq, &priv->mcast_task, 0); 607 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
646 mutex_unlock(&mcast_mutex); 608 mutex_unlock(&mcast_mutex);
647 609
648 return 0; 610 return 0;
649} 611}
650 612
651int ipoib_mcast_stop_thread(struct net_device *dev) 613int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
652{ 614{
653 struct ipoib_dev_priv *priv = netdev_priv(dev); 615 struct ipoib_dev_priv *priv = netdev_priv(dev);
654 616
@@ -659,7 +621,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
659 cancel_delayed_work(&priv->mcast_task); 621 cancel_delayed_work(&priv->mcast_task);
660 mutex_unlock(&mcast_mutex); 622 mutex_unlock(&mcast_mutex);
661 623
662 flush_workqueue(priv->wq); 624 if (flush)
625 flush_workqueue(ipoib_workqueue);
663 626
664 return 0; 627 return 0;
665} 628}
@@ -670,9 +633,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
670 int ret = 0; 633 int ret = 0;
671 634
672 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 635 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
673 ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
674
675 if (!IS_ERR_OR_NULL(mcast->mc))
676 ib_sa_free_multicast(mcast->mc); 636 ib_sa_free_multicast(mcast->mc);
677 637
678 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 638 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -725,8 +685,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
725 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); 685 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
726 __ipoib_mcast_add(dev, mcast); 686 __ipoib_mcast_add(dev, mcast);
727 list_add_tail(&mcast->list, &priv->multicast_list); 687 list_add_tail(&mcast->list, &priv->multicast_list);
728 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
729 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
730 } 688 }
731 689
732 if (!mcast->ah) { 690 if (!mcast->ah) {
@@ -740,6 +698,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
740 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 698 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
741 ipoib_dbg_mcast(priv, "no address vector, " 699 ipoib_dbg_mcast(priv, "no address vector, "
742 "but multicast join already started\n"); 700 "but multicast join already started\n");
701 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
702 ipoib_mcast_sendonly_join(mcast);
743 703
744 /* 704 /*
745 * If lookup completes between here and out:, don't 705 * If lookup completes between here and out:, don't
@@ -799,12 +759,9 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
799 759
800 spin_unlock_irqrestore(&priv->lock, flags); 760 spin_unlock_irqrestore(&priv->lock, flags);
801 761
802 /* 762 /* seperate between the wait to the leave*/
803 * make sure the in-flight joins have finished before we attempt
804 * to leave
805 */
806 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) 763 list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
807 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 764 if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
808 wait_for_completion(&mcast->done); 765 wait_for_completion(&mcast->done);
809 766
810 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 767 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -837,6 +794,8 @@ void ipoib_mcast_restart_task(struct work_struct *work)
837 794
838 ipoib_dbg_mcast(priv, "restarting multicast task\n"); 795 ipoib_dbg_mcast(priv, "restarting multicast task\n");
839 796
797 ipoib_mcast_stop_thread(dev, 0);
798
840 local_irq_save(flags); 799 local_irq_save(flags);
841 netif_addr_lock(dev); 800 netif_addr_lock(dev);
842 spin_lock(&priv->lock); 801 spin_lock(&priv->lock);
@@ -921,38 +880,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
921 netif_addr_unlock(dev); 880 netif_addr_unlock(dev);
922 local_irq_restore(flags); 881 local_irq_restore(flags);
923 882
924 /* 883 /* We have to cancel outside of the spinlock */
925 * make sure the in-flight joins have finished before we attempt
926 * to leave
927 */
928 list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
929 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
930 wait_for_completion(&mcast->done);
931
932 /*
933 * We have to cancel outside of the spinlock, but we have to
934 * take the rtnl lock or else we race with the removal of
935 * entries from the remove list in mcast_dev_flush as part
936 * of ipoib_stop(). We detect the drop of the ADMIN_UP flag
937 * to signal that we have hit this particular race, and we
938 * return since we know we don't need to do anything else
939 * anyway.
940 */
941 while (!rtnl_trylock()) {
942 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
943 return;
944 else
945 msleep(20);
946 }
947 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 884 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
948 ipoib_mcast_leave(mcast->dev, mcast); 885 ipoib_mcast_leave(mcast->dev, mcast);
949 ipoib_mcast_free(mcast); 886 ipoib_mcast_free(mcast);
950 } 887 }
951 /* 888
952 * Restart our join task if needed 889 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
953 */ 890 ipoib_mcast_start_thread(dev);
954 ipoib_mcast_start_thread(dev);
955 rtnl_unlock();
956} 891}
957 892
958#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 893#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index b72a753eb41d..c56d5d44c53b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -145,20 +145,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
145 int ret, size; 145 int ret, size;
146 int i; 146 int i;
147 147
148 /*
149 * the various IPoIB tasks assume they will never race against
150 * themselves, so always use a single thread workqueue
151 */
152 priv->wq = create_singlethread_workqueue("ipoib_wq");
153 if (!priv->wq) {
154 printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
155 return -ENODEV;
156 }
157
158 priv->pd = ib_alloc_pd(priv->ca); 148 priv->pd = ib_alloc_pd(priv->ca);
159 if (IS_ERR(priv->pd)) { 149 if (IS_ERR(priv->pd)) {
160 printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); 150 printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
161 goto out_free_wq; 151 return -ENODEV;
162 } 152 }
163 153
164 priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); 154 priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
@@ -252,10 +242,6 @@ out_free_mr:
252 242
253out_free_pd: 243out_free_pd:
254 ib_dealloc_pd(priv->pd); 244 ib_dealloc_pd(priv->pd);
255
256out_free_wq:
257 destroy_workqueue(priv->wq);
258 priv->wq = NULL;
259 return -ENODEV; 245 return -ENODEV;
260} 246}
261 247
@@ -284,12 +270,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
284 270
285 if (ib_dealloc_pd(priv->pd)) 271 if (ib_dealloc_pd(priv->pd))
286 ipoib_warn(priv, "ib_dealloc_pd failed\n"); 272 ipoib_warn(priv, "ib_dealloc_pd failed\n");
287
288 if (priv->wq) {
289 flush_workqueue(priv->wq);
290 destroy_workqueue(priv->wq);
291 priv->wq = NULL;
292 }
293} 273}
294 274
295void ipoib_event(struct ib_event_handler *handler, 275void ipoib_event(struct ib_event_handler *handler,