aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-23 12:56:11 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-23 12:56:11 -0400
commit0b776eb5426752d4e53354ac89e3710d857e09a7 (patch)
tree1eebeeaabab90de5834b32e72d2e259dc8a4a635 /drivers/infiniband
parent0d6810091cdbd05efeb31654c6a41a6cbdfdd2c8 (diff)
parent77109cc2823f025ccd66ebd9b88fbab90437b2d8 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: mlx4_core: Increase command timeout for INIT_HCA to 10 seconds IPoIB/cm: Use common CQ for CM send completions IB/uverbs: Fix checking of userspace object ownership IB/mlx4: Sanity check userspace send queue sizes IPoIB: Rewrite "if (!likely(...))" as "if (unlikely(!(...)))" IB/ehca: Enable large page MRs by default IB/ehca: Change meaning of hca_cap_mr_pgsize IB/ehca: Fix ehca_encode_hwpage_size() and alloc_fmr() IB/ehca: Fix masking error in {,re}reg_phys_mr() IB/ehca: Supply QP token for SRQ base QPs IPoIB: Use round_jiffies() for ah_reap_task RDMA/cma: Fix deadlock destroying listen requests RDMA/cma: Add locking around QP accesses IB/mthca: Avoid alignment traps when writing doorbells mlx4_core: Kill mlx4_write64_raw()
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cma.c160
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c20
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c57
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c4
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c16
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c53
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h13
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c45
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c114
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c52
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
17 files changed, 283 insertions, 312 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d08fb30768bc..0751697ef984 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -114,13 +114,16 @@ struct rdma_id_private {
114 114
115 struct rdma_bind_list *bind_list; 115 struct rdma_bind_list *bind_list;
116 struct hlist_node node; 116 struct hlist_node node;
117 struct list_head list; 117 struct list_head list; /* listen_any_list or cma_device.list */
118 struct list_head listen_list; 118 struct list_head listen_list; /* per device listens */
119 struct cma_device *cma_dev; 119 struct cma_device *cma_dev;
120 struct list_head mc_list; 120 struct list_head mc_list;
121 121
122 int internal_id;
122 enum cma_state state; 123 enum cma_state state;
123 spinlock_t lock; 124 spinlock_t lock;
125 struct mutex qp_mutex;
126
124 struct completion comp; 127 struct completion comp;
125 atomic_t refcount; 128 atomic_t refcount;
126 wait_queue_head_t wait_remove; 129 wait_queue_head_t wait_remove;
@@ -389,6 +392,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
389 id_priv->id.event_handler = event_handler; 392 id_priv->id.event_handler = event_handler;
390 id_priv->id.ps = ps; 393 id_priv->id.ps = ps;
391 spin_lock_init(&id_priv->lock); 394 spin_lock_init(&id_priv->lock);
395 mutex_init(&id_priv->qp_mutex);
392 init_completion(&id_priv->comp); 396 init_completion(&id_priv->comp);
393 atomic_set(&id_priv->refcount, 1); 397 atomic_set(&id_priv->refcount, 1);
394 init_waitqueue_head(&id_priv->wait_remove); 398 init_waitqueue_head(&id_priv->wait_remove);
@@ -474,61 +478,86 @@ EXPORT_SYMBOL(rdma_create_qp);
474 478
475void rdma_destroy_qp(struct rdma_cm_id *id) 479void rdma_destroy_qp(struct rdma_cm_id *id)
476{ 480{
477 ib_destroy_qp(id->qp); 481 struct rdma_id_private *id_priv;
482
483 id_priv = container_of(id, struct rdma_id_private, id);
484 mutex_lock(&id_priv->qp_mutex);
485 ib_destroy_qp(id_priv->id.qp);
486 id_priv->id.qp = NULL;
487 mutex_unlock(&id_priv->qp_mutex);
478} 488}
479EXPORT_SYMBOL(rdma_destroy_qp); 489EXPORT_SYMBOL(rdma_destroy_qp);
480 490
481static int cma_modify_qp_rtr(struct rdma_cm_id *id) 491static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
482{ 492{
483 struct ib_qp_attr qp_attr; 493 struct ib_qp_attr qp_attr;
484 int qp_attr_mask, ret; 494 int qp_attr_mask, ret;
485 495
486 if (!id->qp) 496 mutex_lock(&id_priv->qp_mutex);
487 return 0; 497 if (!id_priv->id.qp) {
498 ret = 0;
499 goto out;
500 }
488 501
489 /* Need to update QP attributes from default values. */ 502 /* Need to update QP attributes from default values. */
490 qp_attr.qp_state = IB_QPS_INIT; 503 qp_attr.qp_state = IB_QPS_INIT;
491 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 504 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
492 if (ret) 505 if (ret)
493 return ret; 506 goto out;
494 507
495 ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 508 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
496 if (ret) 509 if (ret)
497 return ret; 510 goto out;
498 511
499 qp_attr.qp_state = IB_QPS_RTR; 512 qp_attr.qp_state = IB_QPS_RTR;
500 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 513 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
501 if (ret) 514 if (ret)
502 return ret; 515 goto out;
503 516
504 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 517 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
518out:
519 mutex_unlock(&id_priv->qp_mutex);
520 return ret;
505} 521}
506 522
507static int cma_modify_qp_rts(struct rdma_cm_id *id) 523static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
508{ 524{
509 struct ib_qp_attr qp_attr; 525 struct ib_qp_attr qp_attr;
510 int qp_attr_mask, ret; 526 int qp_attr_mask, ret;
511 527
512 if (!id->qp) 528 mutex_lock(&id_priv->qp_mutex);
513 return 0; 529 if (!id_priv->id.qp) {
530 ret = 0;
531 goto out;
532 }
514 533
515 qp_attr.qp_state = IB_QPS_RTS; 534 qp_attr.qp_state = IB_QPS_RTS;
516 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 535 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
517 if (ret) 536 if (ret)
518 return ret; 537 goto out;
519 538
520 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 539 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
540out:
541 mutex_unlock(&id_priv->qp_mutex);
542 return ret;
521} 543}
522 544
523static int cma_modify_qp_err(struct rdma_cm_id *id) 545static int cma_modify_qp_err(struct rdma_id_private *id_priv)
524{ 546{
525 struct ib_qp_attr qp_attr; 547 struct ib_qp_attr qp_attr;
548 int ret;
526 549
527 if (!id->qp) 550 mutex_lock(&id_priv->qp_mutex);
528 return 0; 551 if (!id_priv->id.qp) {
552 ret = 0;
553 goto out;
554 }
529 555
530 qp_attr.qp_state = IB_QPS_ERR; 556 qp_attr.qp_state = IB_QPS_ERR;
531 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); 557 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
558out:
559 mutex_unlock(&id_priv->qp_mutex);
560 return ret;
532} 561}
533 562
534static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 563static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
@@ -717,50 +746,27 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
717 } 746 }
718} 747}
719 748
720static inline int cma_internal_listen(struct rdma_id_private *id_priv)
721{
722 return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
723 cma_any_addr(&id_priv->id.route.addr.src_addr);
724}
725
726static void cma_destroy_listen(struct rdma_id_private *id_priv)
727{
728 cma_exch(id_priv, CMA_DESTROYING);
729
730 if (id_priv->cma_dev) {
731 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
732 case RDMA_TRANSPORT_IB:
733 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
734 ib_destroy_cm_id(id_priv->cm_id.ib);
735 break;
736 case RDMA_TRANSPORT_IWARP:
737 if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
738 iw_destroy_cm_id(id_priv->cm_id.iw);
739 break;
740 default:
741 break;
742 }
743 cma_detach_from_dev(id_priv);
744 }
745 list_del(&id_priv->listen_list);
746
747 cma_deref_id(id_priv);
748 wait_for_completion(&id_priv->comp);
749
750 kfree(id_priv);
751}
752
753static void cma_cancel_listens(struct rdma_id_private *id_priv) 749static void cma_cancel_listens(struct rdma_id_private *id_priv)
754{ 750{
755 struct rdma_id_private *dev_id_priv; 751 struct rdma_id_private *dev_id_priv;
756 752
753 /*
754 * Remove from listen_any_list to prevent added devices from spawning
755 * additional listen requests.
756 */
757 mutex_lock(&lock); 757 mutex_lock(&lock);
758 list_del(&id_priv->list); 758 list_del(&id_priv->list);
759 759
760 while (!list_empty(&id_priv->listen_list)) { 760 while (!list_empty(&id_priv->listen_list)) {
761 dev_id_priv = list_entry(id_priv->listen_list.next, 761 dev_id_priv = list_entry(id_priv->listen_list.next,
762 struct rdma_id_private, listen_list); 762 struct rdma_id_private, listen_list);
763 cma_destroy_listen(dev_id_priv); 763 /* sync with device removal to avoid duplicate destruction */
764 list_del_init(&dev_id_priv->list);
765 list_del(&dev_id_priv->listen_list);
766 mutex_unlock(&lock);
767
768 rdma_destroy_id(&dev_id_priv->id);
769 mutex_lock(&lock);
764 } 770 }
765 mutex_unlock(&lock); 771 mutex_unlock(&lock);
766} 772}
@@ -848,6 +854,9 @@ void rdma_destroy_id(struct rdma_cm_id *id)
848 cma_deref_id(id_priv); 854 cma_deref_id(id_priv);
849 wait_for_completion(&id_priv->comp); 855 wait_for_completion(&id_priv->comp);
850 856
857 if (id_priv->internal_id)
858 cma_deref_id(id_priv->id.context);
859
851 kfree(id_priv->id.route.path_rec); 860 kfree(id_priv->id.route.path_rec);
852 kfree(id_priv); 861 kfree(id_priv);
853} 862}
@@ -857,11 +866,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
857{ 866{
858 int ret; 867 int ret;
859 868
860 ret = cma_modify_qp_rtr(&id_priv->id); 869 ret = cma_modify_qp_rtr(id_priv);
861 if (ret) 870 if (ret)
862 goto reject; 871 goto reject;
863 872
864 ret = cma_modify_qp_rts(&id_priv->id); 873 ret = cma_modify_qp_rts(id_priv);
865 if (ret) 874 if (ret)
866 goto reject; 875 goto reject;
867 876
@@ -871,7 +880,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
871 880
872 return 0; 881 return 0;
873reject: 882reject:
874 cma_modify_qp_err(&id_priv->id); 883 cma_modify_qp_err(id_priv);
875 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 884 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
876 NULL, 0, NULL, 0); 885 NULL, 0, NULL, 0);
877 return ret; 886 return ret;
@@ -947,7 +956,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
947 /* ignore event */ 956 /* ignore event */
948 goto out; 957 goto out;
949 case IB_CM_REJ_RECEIVED: 958 case IB_CM_REJ_RECEIVED:
950 cma_modify_qp_err(&id_priv->id); 959 cma_modify_qp_err(id_priv);
951 event.status = ib_event->param.rej_rcvd.reason; 960 event.status = ib_event->param.rej_rcvd.reason;
952 event.event = RDMA_CM_EVENT_REJECTED; 961 event.event = RDMA_CM_EVENT_REJECTED;
953 event.param.conn.private_data = ib_event->private_data; 962 event.param.conn.private_data = ib_event->private_data;
@@ -1404,14 +1413,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1404 1413
1405 cma_attach_to_dev(dev_id_priv, cma_dev); 1414 cma_attach_to_dev(dev_id_priv, cma_dev);
1406 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 1415 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1416 atomic_inc(&id_priv->refcount);
1417 dev_id_priv->internal_id = 1;
1407 1418
1408 ret = rdma_listen(id, id_priv->backlog); 1419 ret = rdma_listen(id, id_priv->backlog);
1409 if (ret) 1420 if (ret)
1410 goto err; 1421 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1411 1422 "listening on device %s", ret, cma_dev->device->name);
1412 return;
1413err:
1414 cma_destroy_listen(dev_id_priv);
1415} 1423}
1416 1424
1417static void cma_listen_on_all(struct rdma_id_private *id_priv) 1425static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2264,7 +2272,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
2264 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; 2272 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2265 cm_id->remote_addr = *sin; 2273 cm_id->remote_addr = *sin;
2266 2274
2267 ret = cma_modify_qp_rtr(&id_priv->id); 2275 ret = cma_modify_qp_rtr(id_priv);
2268 if (ret) 2276 if (ret)
2269 goto out; 2277 goto out;
2270 2278
@@ -2331,7 +2339,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
2331 int qp_attr_mask, ret; 2339 int qp_attr_mask, ret;
2332 2340
2333 if (id_priv->id.qp) { 2341 if (id_priv->id.qp) {
2334 ret = cma_modify_qp_rtr(&id_priv->id); 2342 ret = cma_modify_qp_rtr(id_priv);
2335 if (ret) 2343 if (ret)
2336 goto out; 2344 goto out;
2337 2345
@@ -2370,7 +2378,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
2370 struct iw_cm_conn_param iw_param; 2378 struct iw_cm_conn_param iw_param;
2371 int ret; 2379 int ret;
2372 2380
2373 ret = cma_modify_qp_rtr(&id_priv->id); 2381 ret = cma_modify_qp_rtr(id_priv);
2374 if (ret) 2382 if (ret)
2375 return ret; 2383 return ret;
2376 2384
@@ -2442,7 +2450,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2442 2450
2443 return 0; 2451 return 0;
2444reject: 2452reject:
2445 cma_modify_qp_err(id); 2453 cma_modify_qp_err(id_priv);
2446 rdma_reject(id, NULL, 0); 2454 rdma_reject(id, NULL, 0);
2447 return ret; 2455 return ret;
2448} 2456}
@@ -2512,7 +2520,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
2512 2520
2513 switch (rdma_node_get_transport(id->device->node_type)) { 2521 switch (rdma_node_get_transport(id->device->node_type)) {
2514 case RDMA_TRANSPORT_IB: 2522 case RDMA_TRANSPORT_IB:
2515 ret = cma_modify_qp_err(id); 2523 ret = cma_modify_qp_err(id_priv);
2516 if (ret) 2524 if (ret)
2517 goto out; 2525 goto out;
2518 /* Initiate or respond to a disconnect. */ 2526 /* Initiate or respond to a disconnect. */
@@ -2543,9 +2551,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2543 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) 2551 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2544 return 0; 2552 return 0;
2545 2553
2554 mutex_lock(&id_priv->qp_mutex);
2546 if (!status && id_priv->id.qp) 2555 if (!status && id_priv->id.qp)
2547 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 2556 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2548 multicast->rec.mlid); 2557 multicast->rec.mlid);
2558 mutex_unlock(&id_priv->qp_mutex);
2549 2559
2550 memset(&event, 0, sizeof event); 2560 memset(&event, 0, sizeof event);
2551 event.status = status; 2561 event.status = status;
@@ -2757,16 +2767,12 @@ static void cma_process_remove(struct cma_device *cma_dev)
2757 id_priv = list_entry(cma_dev->id_list.next, 2767 id_priv = list_entry(cma_dev->id_list.next,
2758 struct rdma_id_private, list); 2768 struct rdma_id_private, list);
2759 2769
2760 if (cma_internal_listen(id_priv)) { 2770 list_del(&id_priv->listen_list);
2761 cma_destroy_listen(id_priv);
2762 continue;
2763 }
2764
2765 list_del_init(&id_priv->list); 2771 list_del_init(&id_priv->list);
2766 atomic_inc(&id_priv->refcount); 2772 atomic_inc(&id_priv->refcount);
2767 mutex_unlock(&lock); 2773 mutex_unlock(&lock);
2768 2774
2769 ret = cma_remove_id_dev(id_priv); 2775 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
2770 cma_deref_id(id_priv); 2776 cma_deref_id(id_priv);
2771 if (ret) 2777 if (ret)
2772 rdma_destroy_id(&id_priv->id); 2778 rdma_destroy_id(&id_priv->id);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 01d70084aebe..495c803fb11d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -147,8 +147,12 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
147 147
148 spin_lock(&ib_uverbs_idr_lock); 148 spin_lock(&ib_uverbs_idr_lock);
149 uobj = idr_find(idr, id); 149 uobj = idr_find(idr, id);
150 if (uobj) 150 if (uobj) {
151 kref_get(&uobj->ref); 151 if (uobj->context == context)
152 kref_get(&uobj->ref);
153 else
154 uobj = NULL;
155 }
152 spin_unlock(&ib_uverbs_idr_lock); 156 spin_unlock(&ib_uverbs_idr_lock);
153 157
154 return uobj; 158 return uobj;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 3f2d68cff764..2d660ae189e5 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -323,7 +323,6 @@ extern int ehca_static_rate;
323extern int ehca_port_act_time; 323extern int ehca_port_act_time;
324extern int ehca_use_hp_mr; 324extern int ehca_use_hp_mr;
325extern int ehca_scaling_code; 325extern int ehca_scaling_code;
326extern int ehca_mr_largepage;
327 326
328struct ipzu_queue_resp { 327struct ipzu_queue_resp {
329 u32 qe_size; /* queue entry size */ 328 u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 4aa3ffa6a19f..15806d140461 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -77,6 +77,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
77 } 77 }
78 78
79 memset(props, 0, sizeof(struct ib_device_attr)); 79 memset(props, 0, sizeof(struct ib_device_attr));
80 props->page_size_cap = shca->hca_cap_mr_pgsize;
80 props->fw_ver = rblock->hw_ver; 81 props->fw_ver = rblock->hw_ver;
81 props->max_mr_size = rblock->max_mr_size; 82 props->max_mr_size = rblock->max_mr_size;
82 props->vendor_id = rblock->vendor_id >> 8; 83 props->vendor_id = rblock->vendor_id >> 8;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 7a7dab890f6d..c6cd38c5321f 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -65,7 +65,7 @@ int ehca_port_act_time = 30;
65int ehca_poll_all_eqs = 1; 65int ehca_poll_all_eqs = 1;
66int ehca_static_rate = -1; 66int ehca_static_rate = -1;
67int ehca_scaling_code = 0; 67int ehca_scaling_code = 0;
68int ehca_mr_largepage = 0; 68int ehca_mr_largepage = 1;
69 69
70module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO); 70module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO);
71module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); 71module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
@@ -260,13 +260,20 @@ static struct cap_descr {
260 { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, 260 { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
261}; 261};
262 262
263int ehca_sense_attributes(struct ehca_shca *shca) 263static int ehca_sense_attributes(struct ehca_shca *shca)
264{ 264{
265 int i, ret = 0; 265 int i, ret = 0;
266 u64 h_ret; 266 u64 h_ret;
267 struct hipz_query_hca *rblock; 267 struct hipz_query_hca *rblock;
268 struct hipz_query_port *port; 268 struct hipz_query_port *port;
269 269
270 static const u32 pgsize_map[] = {
271 HCA_CAP_MR_PGSIZE_4K, 0x1000,
272 HCA_CAP_MR_PGSIZE_64K, 0x10000,
273 HCA_CAP_MR_PGSIZE_1M, 0x100000,
274 HCA_CAP_MR_PGSIZE_16M, 0x1000000,
275 };
276
270 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 277 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
271 if (!rblock) { 278 if (!rblock) {
272 ehca_gen_err("Cannot allocate rblock memory."); 279 ehca_gen_err("Cannot allocate rblock memory.");
@@ -329,8 +336,15 @@ int ehca_sense_attributes(struct ehca_shca *shca)
329 if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) 336 if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
330 ehca_gen_dbg(" %s", hca_cap_descr[i].descr); 337 ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
331 338
332 shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported; 339 /* translate supported MR page sizes; always support 4K */
340 shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
341 if (ehca_mr_largepage) { /* support extra sizes only if enabled */
342 for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
343 if (rblock->memory_page_size_supported & pgsize_map[i])
344 shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
345 }
333 346
347 /* query max MTU from first port -- it's the same for all ports */
334 port = (struct hipz_query_port *)rblock; 348 port = (struct hipz_query_port *)rblock;
335 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); 349 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
336 if (h_ret != H_SUCCESS) { 350 if (h_ret != H_SUCCESS) {
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index ead7230d7738..e239bbf54da1 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -72,24 +72,14 @@ enum ehca_mr_pgsize {
72 72
73static u32 ehca_encode_hwpage_size(u32 pgsize) 73static u32 ehca_encode_hwpage_size(u32 pgsize)
74{ 74{
75 u32 idx = 0; 75 int log = ilog2(pgsize);
76 pgsize >>= 12; 76 WARN_ON(log < 12 || log > 24 || log & 3);
77 /* 77 return (log - 12) / 4;
78 * map mr page size into hw code:
79 * 0, 1, 2, 3 for 4K, 64K, 1M, 64M
80 */
81 while (!(pgsize & 1)) {
82 idx++;
83 pgsize >>= 4;
84 }
85 return idx;
86} 78}
87 79
88static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) 80static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
89{ 81{
90 if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) 82 return 1UL << ilog2(shca->hca_cap_mr_pgsize);
91 return EHCA_MR_PGSIZE16M;
92 return EHCA_MR_PGSIZE4K;
93} 83}
94 84
95static struct ehca_mr *ehca_mr_new(void) 85static struct ehca_mr *ehca_mr_new(void)
@@ -259,7 +249,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
259 pginfo.u.phy.num_phys_buf = num_phys_buf; 249 pginfo.u.phy.num_phys_buf = num_phys_buf;
260 pginfo.u.phy.phys_buf_array = phys_buf_array; 250 pginfo.u.phy.phys_buf_array = phys_buf_array;
261 pginfo.next_hwpage = 251 pginfo.next_hwpage =
262 ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; 252 ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
263 253
264 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, 254 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
265 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, 255 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
@@ -296,7 +286,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
296 container_of(pd->device, struct ehca_shca, ib_device); 286 container_of(pd->device, struct ehca_shca, ib_device);
297 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); 287 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
298 struct ehca_mr_pginfo pginfo; 288 struct ehca_mr_pginfo pginfo;
299 int ret; 289 int ret, page_shift;
300 u32 num_kpages; 290 u32 num_kpages;
301 u32 num_hwpages; 291 u32 num_hwpages;
302 u64 hwpage_size; 292 u64 hwpage_size;
@@ -351,19 +341,20 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
351 /* determine number of MR pages */ 341 /* determine number of MR pages */
352 num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); 342 num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
353 /* select proper hw_pgsize */ 343 /* select proper hw_pgsize */
354 if (ehca_mr_largepage && 344 page_shift = PAGE_SHIFT;
355 (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) { 345 if (e_mr->umem->hugetlb) {
356 int page_shift = PAGE_SHIFT; 346 /* determine page_shift, clamp between 4K and 16M */
357 if (e_mr->umem->hugetlb) { 347 page_shift = (fls64(length - 1) + 3) & ~3;
358 /* determine page_shift, clamp between 4K and 16M */ 348 page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
359 page_shift = (fls64(length - 1) + 3) & ~3; 349 EHCA_MR_PGSHIFT16M);
360 page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), 350 }
361 EHCA_MR_PGSHIFT16M); 351 hwpage_size = 1UL << page_shift;
362 } 352
363 hwpage_size = 1UL << page_shift; 353 /* now that we have the desired page size, shift until it's
364 } else 354 * supported, too. 4K is always supported, so this terminates.
365 hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */ 355 */
366 ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size); 356 while (!(hwpage_size & shca->hca_cap_mr_pgsize))
357 hwpage_size >>= 4;
367 358
368reg_user_mr_fallback: 359reg_user_mr_fallback:
369 num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); 360 num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
@@ -547,7 +538,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
547 pginfo.u.phy.num_phys_buf = num_phys_buf; 538 pginfo.u.phy.num_phys_buf = num_phys_buf;
548 pginfo.u.phy.phys_buf_array = phys_buf_array; 539 pginfo.u.phy.phys_buf_array = phys_buf_array;
549 pginfo.next_hwpage = 540 pginfo.next_hwpage =
550 ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; 541 ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
551 } 542 }
552 if (mr_rereg_mask & IB_MR_REREG_ACCESS) 543 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
553 new_acl = mr_access_flags; 544 new_acl = mr_access_flags;
@@ -809,8 +800,9 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
809 ib_fmr = ERR_PTR(-EINVAL); 800 ib_fmr = ERR_PTR(-EINVAL);
810 goto alloc_fmr_exit0; 801 goto alloc_fmr_exit0;
811 } 802 }
812 hw_pgsize = ehca_get_max_hwpage_size(shca); 803
813 if ((1 << fmr_attr->page_shift) != hw_pgsize) { 804 hw_pgsize = 1 << fmr_attr->page_shift;
805 if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
814 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", 806 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
815 fmr_attr->page_shift); 807 fmr_attr->page_shift);
816 ib_fmr = ERR_PTR(-EINVAL); 808 ib_fmr = ERR_PTR(-EINVAL);
@@ -826,6 +818,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
826 818
827 /* register MR on HCA */ 819 /* register MR on HCA */
828 memset(&pginfo, 0, sizeof(pginfo)); 820 memset(&pginfo, 0, sizeof(pginfo));
821 pginfo.hwpage_size = hw_pgsize;
829 /* 822 /*
830 * pginfo.num_hwpages==0, ie register_rpages() will not be called 823 * pginfo.num_hwpages==0, ie register_rpages() will not be called
831 * but deferred to map_phys_fmr() 824 * but deferred to map_phys_fmr()
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index e2bd62be11e7..de182648b282 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -451,7 +451,6 @@ static struct ehca_qp *internal_create_qp(
451 has_srq = 1; 451 has_srq = 1;
452 parms.ext_type = EQPT_SRQBASE; 452 parms.ext_type = EQPT_SRQBASE;
453 parms.srq_qpn = my_srq->real_qp_num; 453 parms.srq_qpn = my_srq->real_qp_num;
454 parms.srq_token = my_srq->token;
455 } 454 }
456 455
457 if (is_llqp && has_srq) { 456 if (is_llqp && has_srq) {
@@ -583,6 +582,9 @@ static struct ehca_qp *internal_create_qp(
583 goto create_qp_exit1; 582 goto create_qp_exit1;
584 } 583 }
585 584
585 if (has_srq)
586 parms.srq_token = my_qp->token;
587
586 parms.servicetype = ibqptype2servicetype(qp_type); 588 parms.servicetype = ibqptype2servicetype(qp_type);
587 if (parms.servicetype < 0) { 589 if (parms.servicetype < 0) {
588 ret = -EINVAL; 590 ret = -EINVAL;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 31a480e5b0d0..6b3322486b5e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -63,6 +63,10 @@ struct mlx4_ib_sqp {
63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; 63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
64}; 64};
65 65
66enum {
67 MLX4_IB_MIN_SQ_STRIDE = 6
68};
69
66static const __be32 mlx4_ib_opcode[] = { 70static const __be32 mlx4_ib_opcode[] = {
67 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), 71 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
68 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), 72 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
@@ -285,9 +289,17 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
285 return 0; 289 return 0;
286} 290}
287 291
288static int set_user_sq_size(struct mlx4_ib_qp *qp, 292static int set_user_sq_size(struct mlx4_ib_dev *dev,
293 struct mlx4_ib_qp *qp,
289 struct mlx4_ib_create_qp *ucmd) 294 struct mlx4_ib_create_qp *ucmd)
290{ 295{
296 /* Sanity check SQ size before proceeding */
297 if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
298 ucmd->log_sq_stride >
299 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
300 ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
301 return -EINVAL;
302
291 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; 303 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
292 qp->sq.wqe_shift = ucmd->log_sq_stride; 304 qp->sq.wqe_shift = ucmd->log_sq_stride;
293 305
@@ -330,7 +342,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
330 342
331 qp->sq_no_prefetch = ucmd.sq_no_prefetch; 343 qp->sq_no_prefetch = ucmd.sq_no_prefetch;
332 344
333 err = set_user_sq_size(qp, &ucmd); 345 err = set_user_sq_size(dev, qp, &ucmd);
334 if (err) 346 if (err)
335 goto err; 347 goto err;
336 348
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index be6e1e03bdab..6bd9f1393349 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -204,16 +204,11 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
204static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, 204static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
205 int incr) 205 int incr)
206{ 206{
207 __be32 doorbell[2];
208
209 if (mthca_is_memfree(dev)) { 207 if (mthca_is_memfree(dev)) {
210 *cq->set_ci_db = cpu_to_be32(cq->cons_index); 208 *cq->set_ci_db = cpu_to_be32(cq->cons_index);
211 wmb(); 209 wmb();
212 } else { 210 } else {
213 doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); 211 mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
214 doorbell[1] = cpu_to_be32(incr - 1);
215
216 mthca_write64(doorbell,
217 dev->kar + MTHCA_CQ_DOORBELL, 212 dev->kar + MTHCA_CQ_DOORBELL,
218 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 213 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
219 /* 214 /*
@@ -731,17 +726,12 @@ repoll:
731 726
732int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) 727int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
733{ 728{
734 __be32 doorbell[2]; 729 u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
730 MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
731 MTHCA_TAVOR_CQ_DB_REQ_NOT) |
732 to_mcq(cq)->cqn;
735 733
736 doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) == 734 mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
737 IB_CQ_SOLICITED ?
738 MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
739 MTHCA_TAVOR_CQ_DB_REQ_NOT) |
740 to_mcq(cq)->cqn);
741 doorbell[1] = (__force __be32) 0xffffffff;
742
743 mthca_write64(doorbell,
744 to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
745 MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock)); 735 MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
746 736
747 return 0; 737 return 0;
@@ -750,19 +740,16 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
750int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 740int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
751{ 741{
752 struct mthca_cq *cq = to_mcq(ibcq); 742 struct mthca_cq *cq = to_mcq(ibcq);
753 __be32 doorbell[2]; 743 __be32 db_rec[2];
754 u32 sn; 744 u32 dbhi;
755 __be32 ci; 745 u32 sn = cq->arm_sn & 3;
756
757 sn = cq->arm_sn & 3;
758 ci = cpu_to_be32(cq->cons_index);
759 746
760 doorbell[0] = ci; 747 db_rec[0] = cpu_to_be32(cq->cons_index);
761 doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | 748 db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
762 ((flags & IB_CQ_SOLICITED_MASK) == 749 ((flags & IB_CQ_SOLICITED_MASK) ==
763 IB_CQ_SOLICITED ? 1 : 2)); 750 IB_CQ_SOLICITED ? 1 : 2));
764 751
765 mthca_write_db_rec(doorbell, cq->arm_db); 752 mthca_write_db_rec(db_rec, cq->arm_db);
766 753
767 /* 754 /*
768 * Make sure that the doorbell record in host memory is 755 * Make sure that the doorbell record in host memory is
@@ -770,14 +757,12 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
770 */ 757 */
771 wmb(); 758 wmb();
772 759
773 doorbell[0] = cpu_to_be32((sn << 28) | 760 dbhi = (sn << 28) |
774 ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 761 ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
775 MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : 762 MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
776 MTHCA_ARBEL_CQ_DB_REQ_NOT) | 763 MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn;
777 cq->cqn);
778 doorbell[1] = ci;
779 764
780 mthca_write64(doorbell, 765 mthca_write64(dbhi, cq->cons_index,
781 to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL, 766 to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
782 MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock)); 767 MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
783 768
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
index dd9a44d170c9..b374dc395be1 100644
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -58,10 +58,10 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
58 __raw_writeq((__force u64) val, dest); 58 __raw_writeq((__force u64) val, dest);
59} 59}
60 60
61static inline void mthca_write64(__be32 val[2], void __iomem *dest, 61static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
62 spinlock_t *doorbell_lock) 62 spinlock_t *doorbell_lock)
63{ 63{
64 __raw_writeq(*(u64 *) val, dest); 64 __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest);
65} 65}
66 66
67static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) 67static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
@@ -87,14 +87,17 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
87 __raw_writel(((__force u32 *) &val)[1], dest + 4); 87 __raw_writel(((__force u32 *) &val)[1], dest + 4);
88} 88}
89 89
90static inline void mthca_write64(__be32 val[2], void __iomem *dest, 90static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
91 spinlock_t *doorbell_lock) 91 spinlock_t *doorbell_lock)
92{ 92{
93 unsigned long flags; 93 unsigned long flags;
94 94
95 hi = (__force u32) cpu_to_be32(hi);
96 lo = (__force u32) cpu_to_be32(lo);
97
95 spin_lock_irqsave(doorbell_lock, flags); 98 spin_lock_irqsave(doorbell_lock, flags);
96 __raw_writel((__force u32) val[0], dest); 99 __raw_writel(hi, dest);
97 __raw_writel((__force u32) val[1], dest + 4); 100 __raw_writel(lo, dest + 4);
98 spin_unlock_irqrestore(doorbell_lock, flags); 101 spin_unlock_irqrestore(doorbell_lock, flags);
99} 102}
100 103
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8592b26dc4e1..b29de51b7f35 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -173,11 +173,6 @@ static inline u64 async_mask(struct mthca_dev *dev)
173 173
174static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) 174static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
175{ 175{
176 __be32 doorbell[2];
177
178 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
179 doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
180
181 /* 176 /*
182 * This barrier makes sure that all updates to ownership bits 177 * This barrier makes sure that all updates to ownership bits
183 * done by set_eqe_hw() hit memory before the consumer index 178 * done by set_eqe_hw() hit memory before the consumer index
@@ -187,7 +182,7 @@ static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u
187 * having set_eqe_hw() overwrite the owner field. 182 * having set_eqe_hw() overwrite the owner field.
188 */ 183 */
189 wmb(); 184 wmb();
190 mthca_write64(doorbell, 185 mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
191 dev->kar + MTHCA_EQ_DOORBELL, 186 dev->kar + MTHCA_EQ_DOORBELL,
192 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 187 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
193} 188}
@@ -212,12 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
212 207
213static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) 208static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
214{ 209{
215 __be32 doorbell[2]; 210 mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
216
217 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
218 doorbell[1] = 0;
219
220 mthca_write64(doorbell,
221 dev->kar + MTHCA_EQ_DOORBELL, 211 dev->kar + MTHCA_EQ_DOORBELL,
222 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 212 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
223} 213}
@@ -230,12 +220,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
230static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) 220static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
231{ 221{
232 if (!mthca_is_memfree(dev)) { 222 if (!mthca_is_memfree(dev)) {
233 __be32 doorbell[2]; 223 mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
234
235 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
236 doorbell[1] = cpu_to_be32(cqn);
237
238 mthca_write64(doorbell,
239 dev->kar + MTHCA_EQ_DOORBELL, 224 dev->kar + MTHCA_EQ_DOORBELL,
240 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 225 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
241 } 226 }
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index df01b2026a64..0e5461c65731 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1799,15 +1799,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1799 1799
1800out: 1800out:
1801 if (likely(nreq)) { 1801 if (likely(nreq)) {
1802 __be32 doorbell[2];
1803
1804 doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
1805 qp->send_wqe_offset) | f0 | op0);
1806 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1807
1808 wmb(); 1802 wmb();
1809 1803
1810 mthca_write64(doorbell, 1804 mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
1805 qp->send_wqe_offset) | f0 | op0,
1806 (qp->qpn << 8) | size0,
1811 dev->kar + MTHCA_SEND_DOORBELL, 1807 dev->kar + MTHCA_SEND_DOORBELL,
1812 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1808 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1813 /* 1809 /*
@@ -1829,7 +1825,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1829{ 1825{
1830 struct mthca_dev *dev = to_mdev(ibqp->device); 1826 struct mthca_dev *dev = to_mdev(ibqp->device);
1831 struct mthca_qp *qp = to_mqp(ibqp); 1827 struct mthca_qp *qp = to_mqp(ibqp);
1832 __be32 doorbell[2];
1833 unsigned long flags; 1828 unsigned long flags;
1834 int err = 0; 1829 int err = 0;
1835 int nreq; 1830 int nreq;
@@ -1907,13 +1902,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1907 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { 1902 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
1908 nreq = 0; 1903 nreq = 0;
1909 1904
1910 doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1911 doorbell[1] = cpu_to_be32(qp->qpn << 8);
1912
1913 wmb(); 1905 wmb();
1914 1906
1915 mthca_write64(doorbell, 1907 mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
1916 dev->kar + MTHCA_RECEIVE_DOORBELL, 1908 qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
1917 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1909 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1918 1910
1919 qp->rq.next_ind = ind; 1911 qp->rq.next_ind = ind;
@@ -1923,13 +1915,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1923 1915
1924out: 1916out:
1925 if (likely(nreq)) { 1917 if (likely(nreq)) {
1926 doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1927 doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
1928
1929 wmb(); 1918 wmb();
1930 1919
1931 mthca_write64(doorbell, 1920 mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
1932 dev->kar + MTHCA_RECEIVE_DOORBELL, 1921 qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
1933 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1922 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1934 } 1923 }
1935 1924
@@ -1951,7 +1940,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1951{ 1940{
1952 struct mthca_dev *dev = to_mdev(ibqp->device); 1941 struct mthca_dev *dev = to_mdev(ibqp->device);
1953 struct mthca_qp *qp = to_mqp(ibqp); 1942 struct mthca_qp *qp = to_mqp(ibqp);
1954 __be32 doorbell[2]; 1943 u32 dbhi;
1955 void *wqe; 1944 void *wqe;
1956 void *prev_wqe; 1945 void *prev_wqe;
1957 unsigned long flags; 1946 unsigned long flags;
@@ -1981,10 +1970,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1981 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { 1970 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
1982 nreq = 0; 1971 nreq = 0;
1983 1972
1984 doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | 1973 dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
1985 ((qp->sq.head & 0xffff) << 8) | 1974 ((qp->sq.head & 0xffff) << 8) | f0 | op0;
1986 f0 | op0);
1987 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1988 1975
1989 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; 1976 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
1990 1977
@@ -2000,7 +1987,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2000 * write MMIO send doorbell. 1987 * write MMIO send doorbell.
2001 */ 1988 */
2002 wmb(); 1989 wmb();
2003 mthca_write64(doorbell, 1990
1991 mthca_write64(dbhi, (qp->qpn << 8) | size0,
2004 dev->kar + MTHCA_SEND_DOORBELL, 1992 dev->kar + MTHCA_SEND_DOORBELL,
2005 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1993 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
2006 } 1994 }
@@ -2154,10 +2142,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2154 2142
2155out: 2143out:
2156 if (likely(nreq)) { 2144 if (likely(nreq)) {
2157 doorbell[0] = cpu_to_be32((nreq << 24) | 2145 dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
2158 ((qp->sq.head & 0xffff) << 8) |
2159 f0 | op0);
2160 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
2161 2146
2162 qp->sq.head += nreq; 2147 qp->sq.head += nreq;
2163 2148
@@ -2173,8 +2158,8 @@ out:
2173 * write MMIO send doorbell. 2158 * write MMIO send doorbell.
2174 */ 2159 */
2175 wmb(); 2160 wmb();
2176 mthca_write64(doorbell, 2161
2177 dev->kar + MTHCA_SEND_DOORBELL, 2162 mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
2178 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 2163 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
2179 } 2164 }
2180 2165
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 3f58c11a62b7..553d681f6813 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -491,7 +491,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
491{ 491{
492 struct mthca_dev *dev = to_mdev(ibsrq->device); 492 struct mthca_dev *dev = to_mdev(ibsrq->device);
493 struct mthca_srq *srq = to_msrq(ibsrq); 493 struct mthca_srq *srq = to_msrq(ibsrq);
494 __be32 doorbell[2];
495 unsigned long flags; 494 unsigned long flags;
496 int err = 0; 495 int err = 0;
497 int first_ind; 496 int first_ind;
@@ -563,16 +562,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
563 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { 562 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
564 nreq = 0; 563 nreq = 0;
565 564
566 doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
567 doorbell[1] = cpu_to_be32(srq->srqn << 8);
568
569 /* 565 /*
570 * Make sure that descriptors are written 566 * Make sure that descriptors are written
571 * before doorbell is rung. 567 * before doorbell is rung.
572 */ 568 */
573 wmb(); 569 wmb();
574 570
575 mthca_write64(doorbell, 571 mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
576 dev->kar + MTHCA_RECEIVE_DOORBELL, 572 dev->kar + MTHCA_RECEIVE_DOORBELL,
577 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 573 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
578 574
@@ -581,16 +577,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
581 } 577 }
582 578
583 if (likely(nreq)) { 579 if (likely(nreq)) {
584 doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
585 doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq);
586
587 /* 580 /*
588 * Make sure that descriptors are written before 581 * Make sure that descriptors are written before
589 * doorbell is rung. 582 * doorbell is rung.
590 */ 583 */
591 wmb(); 584 wmb();
592 585
593 mthca_write64(doorbell, 586 mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
594 dev->kar + MTHCA_RECEIVE_DOORBELL, 587 dev->kar + MTHCA_RECEIVE_DOORBELL,
595 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 588 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
596 } 589 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 1b3327ad6bc4..eb7edab0e836 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -84,9 +84,8 @@ enum {
84 IPOIB_MCAST_RUN = 6, 84 IPOIB_MCAST_RUN = 6,
85 IPOIB_STOP_REAPER = 7, 85 IPOIB_STOP_REAPER = 7,
86 IPOIB_MCAST_STARTED = 8, 86 IPOIB_MCAST_STARTED = 8,
87 IPOIB_FLAG_NETIF_STOPPED = 9, 87 IPOIB_FLAG_ADMIN_CM = 9,
88 IPOIB_FLAG_ADMIN_CM = 10, 88 IPOIB_FLAG_UMCAST = 10,
89 IPOIB_FLAG_UMCAST = 11,
90 89
91 IPOIB_MAX_BACKOFF_SECONDS = 16, 90 IPOIB_MAX_BACKOFF_SECONDS = 16,
92 91
@@ -98,9 +97,9 @@ enum {
98 97
99#define IPOIB_OP_RECV (1ul << 31) 98#define IPOIB_OP_RECV (1ul << 31)
100#ifdef CONFIG_INFINIBAND_IPOIB_CM 99#ifdef CONFIG_INFINIBAND_IPOIB_CM
101#define IPOIB_CM_OP_SRQ (1ul << 30) 100#define IPOIB_OP_CM (1ul << 30)
102#else 101#else
103#define IPOIB_CM_OP_SRQ (0) 102#define IPOIB_OP_CM (0)
104#endif 103#endif
105 104
106/* structs */ 105/* structs */
@@ -197,7 +196,6 @@ struct ipoib_cm_rx {
197 196
198struct ipoib_cm_tx { 197struct ipoib_cm_tx {
199 struct ib_cm_id *id; 198 struct ib_cm_id *id;
200 struct ib_cq *cq;
201 struct ib_qp *qp; 199 struct ib_qp *qp;
202 struct list_head list; 200 struct list_head list;
203 struct net_device *dev; 201 struct net_device *dev;
@@ -294,6 +292,7 @@ struct ipoib_dev_priv {
294 unsigned tx_tail; 292 unsigned tx_tail;
295 struct ib_sge tx_sge; 293 struct ib_sge tx_sge;
296 struct ib_send_wr tx_wr; 294 struct ib_send_wr tx_wr;
295 unsigned tx_outstanding;
297 296
298 struct ib_wc ibwc[IPOIB_NUM_WC]; 297 struct ib_wc ibwc[IPOIB_NUM_WC];
299 298
@@ -504,6 +503,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
504void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 503void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
505 unsigned int mtu); 504 unsigned int mtu);
506void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); 505void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
506void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
507#else 507#else
508 508
509struct ipoib_cm_tx; 509struct ipoib_cm_tx;
@@ -592,6 +592,9 @@ static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *w
592{ 592{
593} 593}
594 594
595static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
596{
597}
595#endif 598#endif
596 599
597#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 600#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0a0dcb8fdfd1..87610772a979 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -87,7 +87,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
87 struct ib_recv_wr *bad_wr; 87 struct ib_recv_wr *bad_wr;
88 int i, ret; 88 int i, ret;
89 89
90 priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ; 90 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
91 91
92 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 92 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
93 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; 93 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
@@ -401,7 +401,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
401void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) 401void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
402{ 402{
403 struct ipoib_dev_priv *priv = netdev_priv(dev); 403 struct ipoib_dev_priv *priv = netdev_priv(dev);
404 unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; 404 unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
405 struct sk_buff *skb, *newskb; 405 struct sk_buff *skb, *newskb;
406 struct ipoib_cm_rx *p; 406 struct ipoib_cm_rx *p;
407 unsigned long flags; 407 unsigned long flags;
@@ -412,7 +412,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
412 wr_id, wc->status); 412 wr_id, wc->status);
413 413
414 if (unlikely(wr_id >= ipoib_recvq_size)) { 414 if (unlikely(wr_id >= ipoib_recvq_size)) {
415 if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) { 415 if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
416 spin_lock_irqsave(&priv->lock, flags); 416 spin_lock_irqsave(&priv->lock, flags);
417 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 417 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
418 ipoib_cm_start_rx_drain(priv); 418 ipoib_cm_start_rx_drain(priv);
@@ -434,7 +434,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
434 goto repost; 434 goto repost;
435 } 435 }
436 436
437 if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) { 437 if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
438 p = wc->qp->qp_context; 438 p = wc->qp->qp_context;
439 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 439 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
440 spin_lock_irqsave(&priv->lock, flags); 440 spin_lock_irqsave(&priv->lock, flags);
@@ -498,7 +498,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
498 priv->tx_sge.addr = addr; 498 priv->tx_sge.addr = addr;
499 priv->tx_sge.length = len; 499 priv->tx_sge.length = len;
500 500
501 priv->tx_wr.wr_id = wr_id; 501 priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
502 502
503 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); 503 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
504} 504}
@@ -549,20 +549,19 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
549 dev->trans_start = jiffies; 549 dev->trans_start = jiffies;
550 ++tx->tx_head; 550 ++tx->tx_head;
551 551
552 if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) { 552 if (++priv->tx_outstanding == ipoib_sendq_size) {
553 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 553 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
554 tx->qp->qp_num); 554 tx->qp->qp_num);
555 netif_stop_queue(dev); 555 netif_stop_queue(dev);
556 set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
557 } 556 }
558 } 557 }
559} 558}
560 559
561static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx, 560void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
562 struct ib_wc *wc)
563{ 561{
564 struct ipoib_dev_priv *priv = netdev_priv(dev); 562 struct ipoib_dev_priv *priv = netdev_priv(dev);
565 unsigned int wr_id = wc->wr_id; 563 struct ipoib_cm_tx *tx = wc->qp->qp_context;
564 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
566 struct ipoib_tx_buf *tx_req; 565 struct ipoib_tx_buf *tx_req;
567 unsigned long flags; 566 unsigned long flags;
568 567
@@ -587,11 +586,10 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
587 586
588 spin_lock_irqsave(&priv->tx_lock, flags); 587 spin_lock_irqsave(&priv->tx_lock, flags);
589 ++tx->tx_tail; 588 ++tx->tx_tail;
590 if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) && 589 if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
591 tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) { 590 netif_queue_stopped(dev) &&
592 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); 591 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
593 netif_wake_queue(dev); 592 netif_wake_queue(dev);
594 }
595 593
596 if (wc->status != IB_WC_SUCCESS && 594 if (wc->status != IB_WC_SUCCESS &&
597 wc->status != IB_WC_WR_FLUSH_ERR) { 595 wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -614,11 +612,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
614 tx->neigh = NULL; 612 tx->neigh = NULL;
615 } 613 }
616 614
617 /* queue would be re-started anyway when TX is destroyed,
618 * but it makes sense to do it ASAP here. */
619 if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
620 netif_wake_queue(dev);
621
622 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 615 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
623 list_move(&tx->list, &priv->cm.reap_list); 616 list_move(&tx->list, &priv->cm.reap_list);
624 queue_work(ipoib_workqueue, &priv->cm.reap_task); 617 queue_work(ipoib_workqueue, &priv->cm.reap_task);
@@ -632,19 +625,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
632 spin_unlock_irqrestore(&priv->tx_lock, flags); 625 spin_unlock_irqrestore(&priv->tx_lock, flags);
633} 626}
634 627
635static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
636{
637 struct ipoib_cm_tx *tx = tx_ptr;
638 int n, i;
639
640 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
641 do {
642 n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
643 for (i = 0; i < n; ++i)
644 ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
645 } while (n == IPOIB_NUM_WC);
646}
647
648int ipoib_cm_dev_open(struct net_device *dev) 628int ipoib_cm_dev_open(struct net_device *dev)
649{ 629{
650 struct ipoib_dev_priv *priv = netdev_priv(dev); 630 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -807,17 +787,18 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
807 return 0; 787 return 0;
808} 788}
809 789
810static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) 790static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
811{ 791{
812 struct ipoib_dev_priv *priv = netdev_priv(dev); 792 struct ipoib_dev_priv *priv = netdev_priv(dev);
813 struct ib_qp_init_attr attr = { 793 struct ib_qp_init_attr attr = {
814 .send_cq = cq, 794 .send_cq = priv->cq,
815 .recv_cq = priv->cq, 795 .recv_cq = priv->cq,
816 .srq = priv->cm.srq, 796 .srq = priv->cm.srq,
817 .cap.max_send_wr = ipoib_sendq_size, 797 .cap.max_send_wr = ipoib_sendq_size,
818 .cap.max_send_sge = 1, 798 .cap.max_send_sge = 1,
819 .sq_sig_type = IB_SIGNAL_ALL_WR, 799 .sq_sig_type = IB_SIGNAL_ALL_WR,
820 .qp_type = IB_QPT_RC, 800 .qp_type = IB_QPT_RC,
801 .qp_context = tx
821 }; 802 };
822 803
823 return ib_create_qp(priv->pd, &attr); 804 return ib_create_qp(priv->pd, &attr);
@@ -899,21 +880,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
899 goto err_tx; 880 goto err_tx;
900 } 881 }
901 882
902 p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p, 883 p->qp = ipoib_cm_create_tx_qp(p->dev, p);
903 ipoib_sendq_size + 1, 0);
904 if (IS_ERR(p->cq)) {
905 ret = PTR_ERR(p->cq);
906 ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
907 goto err_cq;
908 }
909
910 ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
911 if (ret) {
912 ipoib_warn(priv, "failed to request completion notification: %d\n", ret);
913 goto err_req_notify;
914 }
915
916 p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
917 if (IS_ERR(p->qp)) { 884 if (IS_ERR(p->qp)) {
918 ret = PTR_ERR(p->qp); 885 ret = PTR_ERR(p->qp);
919 ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); 886 ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
@@ -950,12 +917,8 @@ err_modify:
950err_id: 917err_id:
951 p->id = NULL; 918 p->id = NULL;
952 ib_destroy_qp(p->qp); 919 ib_destroy_qp(p->qp);
953err_req_notify:
954err_qp: 920err_qp:
955 p->qp = NULL; 921 p->qp = NULL;
956 ib_destroy_cq(p->cq);
957err_cq:
958 p->cq = NULL;
959err_tx: 922err_tx:
960 return ret; 923 return ret;
961} 924}
@@ -964,6 +927,8 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
964{ 927{
965 struct ipoib_dev_priv *priv = netdev_priv(p->dev); 928 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
966 struct ipoib_tx_buf *tx_req; 929 struct ipoib_tx_buf *tx_req;
930 unsigned long flags;
931 unsigned long begin;
967 932
968 ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", 933 ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
969 p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); 934 p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
@@ -971,27 +936,40 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
971 if (p->id) 936 if (p->id)
972 ib_destroy_cm_id(p->id); 937 ib_destroy_cm_id(p->id);
973 938
974 if (p->qp)
975 ib_destroy_qp(p->qp);
976
977 if (p->cq)
978 ib_destroy_cq(p->cq);
979
980 if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags))
981 netif_wake_queue(p->dev);
982
983 if (p->tx_ring) { 939 if (p->tx_ring) {
940 /* Wait for all sends to complete */
941 begin = jiffies;
984 while ((int) p->tx_tail - (int) p->tx_head < 0) { 942 while ((int) p->tx_tail - (int) p->tx_head < 0) {
985 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 943 if (time_after(jiffies, begin + 5 * HZ)) {
986 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, 944 ipoib_warn(priv, "timing out; %d sends not completed\n",
987 DMA_TO_DEVICE); 945 p->tx_head - p->tx_tail);
988 dev_kfree_skb_any(tx_req->skb); 946 goto timeout;
989 ++p->tx_tail; 947 }
948
949 msleep(1);
990 } 950 }
951 }
991 952
992 kfree(p->tx_ring); 953timeout:
954
955 while ((int) p->tx_tail - (int) p->tx_head < 0) {
956 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
957 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
958 DMA_TO_DEVICE);
959 dev_kfree_skb_any(tx_req->skb);
960 ++p->tx_tail;
961 spin_lock_irqsave(&priv->tx_lock, flags);
962 if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
963 netif_queue_stopped(p->dev) &&
964 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
965 netif_wake_queue(p->dev);
966 spin_unlock_irqrestore(&priv->tx_lock, flags);
993 } 967 }
994 968
969 if (p->qp)
970 ib_destroy_qp(p->qp);
971
972 kfree(p->tx_ring);
995 kfree(p); 973 kfree(p);
996} 974}
997 975
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 1a77e79f6b43..5063dd509ad2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -267,11 +267,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
267 267
268 spin_lock_irqsave(&priv->tx_lock, flags); 268 spin_lock_irqsave(&priv->tx_lock, flags);
269 ++priv->tx_tail; 269 ++priv->tx_tail;
270 if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) && 270 if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
271 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { 271 netif_queue_stopped(dev) &&
272 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); 272 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
273 netif_wake_queue(dev); 273 netif_wake_queue(dev);
274 }
275 spin_unlock_irqrestore(&priv->tx_lock, flags); 274 spin_unlock_irqrestore(&priv->tx_lock, flags);
276 275
277 if (wc->status != IB_WC_SUCCESS && 276 if (wc->status != IB_WC_SUCCESS &&
@@ -301,14 +300,18 @@ poll_more:
301 for (i = 0; i < n; i++) { 300 for (i = 0; i < n; i++) {
302 struct ib_wc *wc = priv->ibwc + i; 301 struct ib_wc *wc = priv->ibwc + i;
303 302
304 if (wc->wr_id & IPOIB_CM_OP_SRQ) { 303 if (wc->wr_id & IPOIB_OP_RECV) {
305 ++done;
306 ipoib_cm_handle_rx_wc(dev, wc);
307 } else if (wc->wr_id & IPOIB_OP_RECV) {
308 ++done; 304 ++done;
309 ipoib_ib_handle_rx_wc(dev, wc); 305 if (wc->wr_id & IPOIB_OP_CM)
310 } else 306 ipoib_cm_handle_rx_wc(dev, wc);
311 ipoib_ib_handle_tx_wc(dev, wc); 307 else
308 ipoib_ib_handle_rx_wc(dev, wc);
309 } else {
310 if (wc->wr_id & IPOIB_OP_CM)
311 ipoib_cm_handle_tx_wc(dev, wc);
312 else
313 ipoib_ib_handle_tx_wc(dev, wc);
314 }
312 } 315 }
313 316
314 if (n != t) 317 if (n != t)
@@ -401,10 +404,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
401 address->last_send = priv->tx_head; 404 address->last_send = priv->tx_head;
402 ++priv->tx_head; 405 ++priv->tx_head;
403 406
404 if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { 407 if (++priv->tx_outstanding == ipoib_sendq_size) {
405 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 408 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
406 netif_stop_queue(dev); 409 netif_stop_queue(dev);
407 set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
408 } 410 }
409 } 411 }
410} 412}
@@ -436,7 +438,8 @@ void ipoib_reap_ah(struct work_struct *work)
436 __ipoib_reap_ah(dev); 438 __ipoib_reap_ah(dev);
437 439
438 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) 440 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
439 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 441 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
442 round_jiffies_relative(HZ));
440} 443}
441 444
442int ipoib_ib_dev_open(struct net_device *dev) 445int ipoib_ib_dev_open(struct net_device *dev)
@@ -472,7 +475,8 @@ int ipoib_ib_dev_open(struct net_device *dev)
472 } 475 }
473 476
474 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 477 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
475 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 478 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
479 round_jiffies_relative(HZ));
476 480
477 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 481 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
478 482
@@ -561,12 +565,17 @@ void ipoib_drain_cq(struct net_device *dev)
561 if (priv->ibwc[i].status == IB_WC_SUCCESS) 565 if (priv->ibwc[i].status == IB_WC_SUCCESS)
562 priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR; 566 priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
563 567
564 if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ) 568 if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
565 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); 569 if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
566 else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) 570 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
567 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); 571 else
568 else 572 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
569 ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); 573 } else {
574 if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
575 ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
576 else
577 ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
578 }
570 } 579 }
571 } while (n == IPOIB_NUM_WC); 580 } while (n == IPOIB_NUM_WC);
572} 581}
@@ -612,6 +621,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
612 DMA_TO_DEVICE); 621 DMA_TO_DEVICE);
613 dev_kfree_skb_any(tx_req->skb); 622 dev_kfree_skb_any(tx_req->skb);
614 ++priv->tx_tail; 623 ++priv->tx_tail;
624 --priv->tx_outstanding;
615 } 625 }
616 626
617 for (i = 0; i < ipoib_recvq_size; ++i) { 627 for (i = 0; i < ipoib_recvq_size; ++i) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 362610d870e4..a03a65ebcf0c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -148,8 +148,6 @@ static int ipoib_stop(struct net_device *dev)
148 148
149 netif_stop_queue(dev); 149 netif_stop_queue(dev);
150 150
151 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
152
153 /* 151 /*
154 * Now flush workqueue to make sure a scheduled task doesn't 152 * Now flush workqueue to make sure a scheduled task doesn't
155 * bring our internal state back up. 153 * bring our internal state back up.
@@ -902,7 +900,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
902 goto out_rx_ring_cleanup; 900 goto out_rx_ring_cleanup;
903 } 901 }
904 902
905 /* priv->tx_head & tx_tail are already 0 */ 903 /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
906 904
907 if (ipoib_ib_dev_init(dev, ca, port)) 905 if (ipoib_ib_dev_init(dev, ca, port))
908 goto out_tx_ring_cleanup; 906 goto out_tx_ring_cleanup;