aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 15:56:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 15:56:07 -0400
commit5c402355adf8f920531f02099f4ec0d2bccd4c64 (patch)
treeeba326f8df01efc0d384874839384040401f5b45 /drivers
parentecc8b655b38a880b578146895e0e1e2d477ca2c0 (diff)
parent2cc177364e4746becdf421f926fb967c047ccc32 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: MAINTAINERS: Remove Glenn Streiff from NetEffect entry mlx4_core: Improve error message when not enough UAR pages are available IB/mlx4: Add support for memory management extensions and local DMA L_Key IB/mthca: Keep free count for MTT buddy allocator mlx4_core: Keep free count for MTT buddy allocator mlx4_code: Add missing FW status return code IB/mlx4: Rename struct mlx4_lso_seg to mlx4_wqe_lso_seg mlx4_core: Add module parameter to enable QoS support RDMA/iwcm: Remove IB_ACCESS_LOCAL_WRITE from remote QP attributes IPoIB: Include err code in trace message for ib_sa_path_rec_get() failures IB/sa_query: Check if sm_ah is NULL in ib_sa_remove_one() IB/ehca: Release mutex in error path of alloc_small_queue_page() IB/ehca: Use default value for Local CA ACK Delay if FW returns 0 IB/ehca: Filter PATH_MIG events if QP was never armed IB/iser: Add support for RDMA_CM_EVENT_ADDR_CHANGE event RDMA/cma: Add RDMA_CM_EVENT_TIMEWAIT_EXIT event RDMA/cma: Add RDMA_CM_EVENT_ADDR_CHANGE event
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/core/cma.c99
-rw-r--r--drivers/infiniband/core/iwcm.c3
-rw-r--r--drivers/infiniband/core/sa_query.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c2
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c1
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c12
-rw-r--r--drivers/infiniband/hw/mlx4/main.c11
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h15
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c70
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c74
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c26
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c1
-rw-r--r--drivers/net/mlx4/cmd.c3
-rw-r--r--drivers/net/mlx4/fw.c18
-rw-r--r--drivers/net/mlx4/fw.h2
-rw-r--r--drivers/net/mlx4/main.c2
-rw-r--r--drivers/net/mlx4/mlx4.h1
-rw-r--r--drivers/net/mlx4/mr.c49
-rw-r--r--drivers/net/mlx4/pd.c7
24 files changed, 370 insertions, 41 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index ae11d5cc74d0..e980ff3335db 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -168,6 +168,12 @@ struct cma_work {
168 struct rdma_cm_event event; 168 struct rdma_cm_event event;
169}; 169};
170 170
171struct cma_ndev_work {
172 struct work_struct work;
173 struct rdma_id_private *id;
174 struct rdma_cm_event event;
175};
176
171union cma_ip_addr { 177union cma_ip_addr {
172 struct in6_addr ip6; 178 struct in6_addr ip6;
173 struct { 179 struct {
@@ -914,7 +920,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
914 struct rdma_cm_event event; 920 struct rdma_cm_event event;
915 int ret = 0; 921 int ret = 0;
916 922
917 if (cma_disable_callback(id_priv, CMA_CONNECT)) 923 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
924 cma_disable_callback(id_priv, CMA_CONNECT)) ||
925 (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
926 cma_disable_callback(id_priv, CMA_DISCONNECT)))
918 return 0; 927 return 0;
919 928
920 memset(&event, 0, sizeof event); 929 memset(&event, 0, sizeof event);
@@ -950,6 +959,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
950 event.event = RDMA_CM_EVENT_DISCONNECTED; 959 event.event = RDMA_CM_EVENT_DISCONNECTED;
951 break; 960 break;
952 case IB_CM_TIMEWAIT_EXIT: 961 case IB_CM_TIMEWAIT_EXIT:
962 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
963 break;
953 case IB_CM_MRA_RECEIVED: 964 case IB_CM_MRA_RECEIVED:
954 /* ignore event */ 965 /* ignore event */
955 goto out; 966 goto out;
@@ -1598,6 +1609,30 @@ out:
1598 kfree(work); 1609 kfree(work);
1599} 1610}
1600 1611
1612static void cma_ndev_work_handler(struct work_struct *_work)
1613{
1614 struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
1615 struct rdma_id_private *id_priv = work->id;
1616 int destroy = 0;
1617
1618 mutex_lock(&id_priv->handler_mutex);
1619 if (id_priv->state == CMA_DESTROYING ||
1620 id_priv->state == CMA_DEVICE_REMOVAL)
1621 goto out;
1622
1623 if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1624 cma_exch(id_priv, CMA_DESTROYING);
1625 destroy = 1;
1626 }
1627
1628out:
1629 mutex_unlock(&id_priv->handler_mutex);
1630 cma_deref_id(id_priv);
1631 if (destroy)
1632 rdma_destroy_id(&id_priv->id);
1633 kfree(work);
1634}
1635
1601static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 1636static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1602{ 1637{
1603 struct rdma_route *route = &id_priv->id.route; 1638 struct rdma_route *route = &id_priv->id.route;
@@ -2723,6 +2758,65 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2723} 2758}
2724EXPORT_SYMBOL(rdma_leave_multicast); 2759EXPORT_SYMBOL(rdma_leave_multicast);
2725 2760
2761static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
2762{
2763 struct rdma_dev_addr *dev_addr;
2764 struct cma_ndev_work *work;
2765
2766 dev_addr = &id_priv->id.route.addr.dev_addr;
2767
2768 if ((dev_addr->src_dev == ndev) &&
2769 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
2770 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
2771 ndev->name, &id_priv->id);
2772 work = kzalloc(sizeof *work, GFP_KERNEL);
2773 if (!work)
2774 return -ENOMEM;
2775
2776 INIT_WORK(&work->work, cma_ndev_work_handler);
2777 work->id = id_priv;
2778 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
2779 atomic_inc(&id_priv->refcount);
2780 queue_work(cma_wq, &work->work);
2781 }
2782
2783 return 0;
2784}
2785
2786static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
2787 void *ctx)
2788{
2789 struct net_device *ndev = (struct net_device *)ctx;
2790 struct cma_device *cma_dev;
2791 struct rdma_id_private *id_priv;
2792 int ret = NOTIFY_DONE;
2793
2794 if (dev_net(ndev) != &init_net)
2795 return NOTIFY_DONE;
2796
2797 if (event != NETDEV_BONDING_FAILOVER)
2798 return NOTIFY_DONE;
2799
2800 if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
2801 return NOTIFY_DONE;
2802
2803 mutex_lock(&lock);
2804 list_for_each_entry(cma_dev, &dev_list, list)
2805 list_for_each_entry(id_priv, &cma_dev->id_list, list) {
2806 ret = cma_netdev_change(ndev, id_priv);
2807 if (ret)
2808 goto out;
2809 }
2810
2811out:
2812 mutex_unlock(&lock);
2813 return ret;
2814}
2815
2816static struct notifier_block cma_nb = {
2817 .notifier_call = cma_netdev_callback
2818};
2819
2726static void cma_add_one(struct ib_device *device) 2820static void cma_add_one(struct ib_device *device)
2727{ 2821{
2728 struct cma_device *cma_dev; 2822 struct cma_device *cma_dev;
@@ -2831,6 +2925,7 @@ static int cma_init(void)
2831 2925
2832 ib_sa_register_client(&sa_client); 2926 ib_sa_register_client(&sa_client);
2833 rdma_addr_register_client(&addr_client); 2927 rdma_addr_register_client(&addr_client);
2928 register_netdevice_notifier(&cma_nb);
2834 2929
2835 ret = ib_register_client(&cma_client); 2930 ret = ib_register_client(&cma_client);
2836 if (ret) 2931 if (ret)
@@ -2838,6 +2933,7 @@ static int cma_init(void)
2838 return 0; 2933 return 0;
2839 2934
2840err: 2935err:
2936 unregister_netdevice_notifier(&cma_nb);
2841 rdma_addr_unregister_client(&addr_client); 2937 rdma_addr_unregister_client(&addr_client);
2842 ib_sa_unregister_client(&sa_client); 2938 ib_sa_unregister_client(&sa_client);
2843 destroy_workqueue(cma_wq); 2939 destroy_workqueue(cma_wq);
@@ -2847,6 +2943,7 @@ err:
2847static void cma_cleanup(void) 2943static void cma_cleanup(void)
2848{ 2944{
2849 ib_unregister_client(&cma_client); 2945 ib_unregister_client(&cma_client);
2946 unregister_netdevice_notifier(&cma_nb);
2850 rdma_addr_unregister_client(&addr_client); 2947 rdma_addr_unregister_client(&addr_client);
2851 ib_sa_unregister_client(&sa_client); 2948 ib_sa_unregister_client(&sa_client);
2852 destroy_workqueue(cma_wq); 2949 destroy_workqueue(cma_wq);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 81c9195b512a..8f9509e1ebf7 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -942,8 +942,7 @@ static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
942 case IW_CM_STATE_CONN_RECV: 942 case IW_CM_STATE_CONN_RECV:
943 case IW_CM_STATE_ESTABLISHED: 943 case IW_CM_STATE_ESTABLISHED:
944 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 944 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
945 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | 945 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
946 IB_ACCESS_REMOTE_WRITE|
947 IB_ACCESS_REMOTE_READ; 946 IB_ACCESS_REMOTE_READ;
948 ret = 0; 947 ret = 0;
949 break; 948 break;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 1341de793e51..7863a50d56f2 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1064,7 +1064,8 @@ static void ib_sa_remove_one(struct ib_device *device)
1064 1064
1065 for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { 1065 for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
1066 ib_unregister_mad_agent(sa_dev->port[i].agent); 1066 ib_unregister_mad_agent(sa_dev->port[i].agent);
1067 kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); 1067 if (sa_dev->port[i].sm_ah)
1068 kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
1068 } 1069 }
1069 1070
1070 kfree(sa_dev); 1071 kfree(sa_dev);
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1e9e99a13933..0b0618edd645 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -194,6 +194,7 @@ struct ehca_qp {
194 u32 packet_count; 194 u32 packet_count;
195 atomic_t nr_events; /* events seen */ 195 atomic_t nr_events; /* events seen */
196 wait_queue_head_t wait_completion; 196 wait_queue_head_t wait_completion;
197 int mig_armed;
197}; 198};
198 199
199#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) 200#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index bc3b37d2070f..46288220cfbb 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -114,7 +114,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
114 } 114 }
115 115
116 props->max_pkeys = 16; 116 props->max_pkeys = 16;
117 props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255); 117 /* Some FW versions say 0 here; insert sensible value in that case */
118 props->local_ca_ack_delay = rblock->local_ca_ack_delay ?
119 min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
118 props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); 120 props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp);
119 props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); 121 props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp);
120 props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); 122 props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 7a64aa9b51b6..cb55be04442c 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -178,6 +178,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
178{ 178{
179 struct ib_event event; 179 struct ib_event event;
180 180
181 /* PATH_MIG without the QP ever having been armed is false alarm */
182 if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
183 return;
184
181 event.device = &shca->ib_device; 185 event.device = &shca->ib_device;
182 event.event = event_type; 186 event.event = event_type;
183 187
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 3f59587338ea..ea13efddf175 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -1460,6 +1460,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1460 goto modify_qp_exit2; 1460 goto modify_qp_exit2;
1461 } 1461 }
1462 mqpcb->path_migration_state = attr->path_mig_state + 1; 1462 mqpcb->path_migration_state = attr->path_mig_state + 1;
1463 if (attr->path_mig_state == IB_MIG_REARM)
1464 my_qp->mig_armed = 1;
1463 update_mask |= 1465 update_mask |=
1464 EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); 1466 EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
1465 } 1467 }
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index 661f8db62706..c3a328465431 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -163,6 +163,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
163 163
164out: 164out:
165 ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); 165 ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
166 mutex_unlock(&pd->lock);
166 return 0; 167 return 0;
167} 168}
168 169
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 299f20832ab6..0b191a4842ce 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -637,6 +637,7 @@ repoll:
637 case MLX4_OPCODE_SEND_IMM: 637 case MLX4_OPCODE_SEND_IMM:
638 wc->wc_flags |= IB_WC_WITH_IMM; 638 wc->wc_flags |= IB_WC_WITH_IMM;
639 case MLX4_OPCODE_SEND: 639 case MLX4_OPCODE_SEND:
640 case MLX4_OPCODE_SEND_INVAL:
640 wc->opcode = IB_WC_SEND; 641 wc->opcode = IB_WC_SEND;
641 break; 642 break;
642 case MLX4_OPCODE_RDMA_READ: 643 case MLX4_OPCODE_RDMA_READ:
@@ -657,6 +658,12 @@ repoll:
657 case MLX4_OPCODE_LSO: 658 case MLX4_OPCODE_LSO:
658 wc->opcode = IB_WC_LSO; 659 wc->opcode = IB_WC_LSO;
659 break; 660 break;
661 case MLX4_OPCODE_FMR:
662 wc->opcode = IB_WC_FAST_REG_MR;
663 break;
664 case MLX4_OPCODE_LOCAL_INVAL:
665 wc->opcode = IB_WC_LOCAL_INV;
666 break;
660 } 667 }
661 } else { 668 } else {
662 wc->byte_len = be32_to_cpu(cqe->byte_cnt); 669 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
@@ -667,6 +674,11 @@ repoll:
667 wc->wc_flags = IB_WC_WITH_IMM; 674 wc->wc_flags = IB_WC_WITH_IMM;
668 wc->ex.imm_data = cqe->immed_rss_invalid; 675 wc->ex.imm_data = cqe->immed_rss_invalid;
669 break; 676 break;
677 case MLX4_RECV_OPCODE_SEND_INVAL:
678 wc->opcode = IB_WC_RECV;
679 wc->wc_flags = IB_WC_WITH_INVALIDATE;
680 wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
681 break;
670 case MLX4_RECV_OPCODE_SEND: 682 case MLX4_RECV_OPCODE_SEND:
671 wc->opcode = IB_WC_RECV; 683 wc->opcode = IB_WC_RECV;
672 wc->wc_flags = 0; 684 wc->wc_flags = 0;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bcf50648fa18..38d6907ab521 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -104,6 +104,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
104 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 104 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
105 if (dev->dev->caps.max_gso_sz) 105 if (dev->dev->caps.max_gso_sz)
106 props->device_cap_flags |= IB_DEVICE_UD_TSO; 106 props->device_cap_flags |= IB_DEVICE_UD_TSO;
107 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
108 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
109 if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
110 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
111 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
112 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
107 113
108 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 114 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
109 0xffffff; 115 0xffffff;
@@ -127,6 +133,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
127 props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; 133 props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
128 props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; 134 props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
129 props->max_srq_sge = dev->dev->caps.max_srq_sge; 135 props->max_srq_sge = dev->dev->caps.max_srq_sge;
136 props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64);
130 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; 137 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
131 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? 138 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
132 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 139 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
@@ -565,6 +572,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
565 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); 572 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
566 ibdev->ib_dev.owner = THIS_MODULE; 573 ibdev->ib_dev.owner = THIS_MODULE;
567 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; 574 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
575 ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
568 ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; 576 ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports;
569 ibdev->ib_dev.num_comp_vectors = 1; 577 ibdev->ib_dev.num_comp_vectors = 1;
570 ibdev->ib_dev.dma_device = &dev->pdev->dev; 578 ibdev->ib_dev.dma_device = &dev->pdev->dev;
@@ -627,6 +635,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
627 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; 635 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
628 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; 636 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
629 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; 637 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
638 ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
639 ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
640 ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
630 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; 641 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
631 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 642 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
632 ibdev->ib_dev.process_mad = mlx4_ib_process_mad; 643 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c4cf5b69eefa..d26a91317d4d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -83,6 +83,11 @@ struct mlx4_ib_mr {
83 struct ib_umem *umem; 83 struct ib_umem *umem;
84}; 84};
85 85
86struct mlx4_ib_fast_reg_page_list {
87 struct ib_fast_reg_page_list ibfrpl;
88 dma_addr_t map;
89};
90
86struct mlx4_ib_fmr { 91struct mlx4_ib_fmr {
87 struct ib_fmr ibfmr; 92 struct ib_fmr ibfmr;
88 struct mlx4_fmr mfmr; 93 struct mlx4_fmr mfmr;
@@ -199,6 +204,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
199 return container_of(ibmr, struct mlx4_ib_mr, ibmr); 204 return container_of(ibmr, struct mlx4_ib_mr, ibmr);
200} 205}
201 206
207static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
208{
209 return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
210}
211
202static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) 212static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
203{ 213{
204 return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); 214 return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -239,6 +249,11 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
239 u64 virt_addr, int access_flags, 249 u64 virt_addr, int access_flags,
240 struct ib_udata *udata); 250 struct ib_udata *udata);
241int mlx4_ib_dereg_mr(struct ib_mr *mr); 251int mlx4_ib_dereg_mr(struct ib_mr *mr);
252struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
253 int max_page_list_len);
254struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
255 int page_list_len);
256void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
242 257
243int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); 258int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
244int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); 259int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 68e92485fc76..db2086faa4ed 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -183,6 +183,76 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
183 return 0; 183 return 0;
184} 184}
185 185
186struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
187 int max_page_list_len)
188{
189 struct mlx4_ib_dev *dev = to_mdev(pd->device);
190 struct mlx4_ib_mr *mr;
191 int err;
192
193 mr = kmalloc(sizeof *mr, GFP_KERNEL);
194 if (!mr)
195 return ERR_PTR(-ENOMEM);
196
197 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
198 max_page_list_len, 0, &mr->mmr);
199 if (err)
200 goto err_free;
201
202 err = mlx4_mr_enable(dev->dev, &mr->mmr);
203 if (err)
204 goto err_mr;
205
206 return &mr->ibmr;
207
208err_mr:
209 mlx4_mr_free(dev->dev, &mr->mmr);
210
211err_free:
212 kfree(mr);
213 return ERR_PTR(err);
214}
215
216struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
217 int page_list_len)
218{
219 struct mlx4_ib_dev *dev = to_mdev(ibdev);
220 struct mlx4_ib_fast_reg_page_list *mfrpl;
221 int size = page_list_len * sizeof (u64);
222
223 if (size > PAGE_SIZE)
224 return ERR_PTR(-EINVAL);
225
226 mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
227 if (!mfrpl)
228 return ERR_PTR(-ENOMEM);
229
230 mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
231 size, &mfrpl->map,
232 GFP_KERNEL);
233 if (!mfrpl->ibfrpl.page_list)
234 goto err_free;
235
236 WARN_ON(mfrpl->map & 0x3f);
237
238 return &mfrpl->ibfrpl;
239
240err_free:
241 kfree(mfrpl);
242 return ERR_PTR(-ENOMEM);
243}
244
245void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
246{
247 struct mlx4_ib_dev *dev = to_mdev(page_list->device);
248 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
249 int size = page_list->max_page_list_len * sizeof (u64);
250
251 dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list,
252 mfrpl->map);
253 kfree(mfrpl);
254}
255
186struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, 256struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
187 struct ib_fmr_attr *fmr_attr) 257 struct ib_fmr_attr *fmr_attr)
188{ 258{
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 89eb6cbe592e..02a99bc4442e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -78,6 +78,9 @@ static const __be32 mlx4_ib_opcode[] = {
78 [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), 78 [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
79 [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), 79 [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
80 [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), 80 [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
81 [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
82 [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
83 [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR),
81}; 84};
82 85
83static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) 86static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -976,6 +979,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
976 context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); 979 context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
977 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); 980 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
978 981
982 /* Set "fast registration enabled" for all kernel QPs */
983 if (!qp->ibqp.uobject)
984 context->params1 |= cpu_to_be32(1 << 11);
985
979 if (attr_mask & IB_QP_RNR_RETRY) { 986 if (attr_mask & IB_QP_RNR_RETRY) {
980 context->params1 |= cpu_to_be32(attr->rnr_retry << 13); 987 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
981 optpar |= MLX4_QP_OPTPAR_RNR_RETRY; 988 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
@@ -1322,6 +1329,38 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
1322 return cur + nreq >= wq->max_post; 1329 return cur + nreq >= wq->max_post;
1323} 1330}
1324 1331
1332static __be32 convert_access(int acc)
1333{
1334 return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) |
1335 (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) |
1336 (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) |
1337 (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
1338 cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
1339}
1340
1341static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
1342{
1343 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
1344
1345 fseg->flags = convert_access(wr->wr.fast_reg.access_flags);
1346 fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey);
1347 fseg->buf_list = cpu_to_be64(mfrpl->map);
1348 fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1349 fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length);
1350 fseg->offset = 0; /* XXX -- is this just for ZBVA? */
1351 fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift);
1352 fseg->reserved[0] = 0;
1353 fseg->reserved[1] = 0;
1354}
1355
1356static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
1357{
1358 iseg->flags = 0;
1359 iseg->mem_key = cpu_to_be32(rkey);
1360 iseg->guest_id = 0;
1361 iseg->pa = 0;
1362}
1363
1325static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, 1364static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
1326 u64 remote_addr, u32 rkey) 1365 u64 remote_addr, u32 rkey)
1327{ 1366{
@@ -1395,7 +1434,7 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
1395 dseg->addr = cpu_to_be64(sg->addr); 1434 dseg->addr = cpu_to_be64(sg->addr);
1396} 1435}
1397 1436
1398static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, 1437static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
1399 struct mlx4_ib_qp *qp, unsigned *lso_seg_len) 1438 struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
1400{ 1439{
1401 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); 1440 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
@@ -1423,6 +1462,21 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
1423 return 0; 1462 return 0;
1424} 1463}
1425 1464
1465static __be32 send_ieth(struct ib_send_wr *wr)
1466{
1467 switch (wr->opcode) {
1468 case IB_WR_SEND_WITH_IMM:
1469 case IB_WR_RDMA_WRITE_WITH_IMM:
1470 return wr->ex.imm_data;
1471
1472 case IB_WR_SEND_WITH_INV:
1473 return cpu_to_be32(wr->ex.invalidate_rkey);
1474
1475 default:
1476 return 0;
1477 }
1478}
1479
1426int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1480int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1427 struct ib_send_wr **bad_wr) 1481 struct ib_send_wr **bad_wr)
1428{ 1482{
@@ -1469,11 +1523,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1469 MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | 1523 MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
1470 qp->sq_signal_bits; 1524 qp->sq_signal_bits;
1471 1525
1472 if (wr->opcode == IB_WR_SEND_WITH_IMM || 1526 ctrl->imm = send_ieth(wr);
1473 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1474 ctrl->imm = wr->ex.imm_data;
1475 else
1476 ctrl->imm = 0;
1477 1527
1478 wqe += sizeof *ctrl; 1528 wqe += sizeof *ctrl;
1479 size = sizeof *ctrl / 16; 1529 size = sizeof *ctrl / 16;
@@ -1505,6 +1555,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1505 size += sizeof (struct mlx4_wqe_raddr_seg) / 16; 1555 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
1506 break; 1556 break;
1507 1557
1558 case IB_WR_LOCAL_INV:
1559 set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
1560 wqe += sizeof (struct mlx4_wqe_local_inval_seg);
1561 size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
1562 break;
1563
1564 case IB_WR_FAST_REG_MR:
1565 set_fmr_seg(wqe, wr);
1566 wqe += sizeof (struct mlx4_wqe_fmr_seg);
1567 size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
1568 break;
1569
1508 default: 1570 default:
1509 /* No extra segments required for sends */ 1571 /* No extra segments required for sends */
1510 break; 1572 break;
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index ee4d073c889f..252590116df5 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -202,6 +202,7 @@ struct mthca_pd_table {
202 202
203struct mthca_buddy { 203struct mthca_buddy {
204 unsigned long **bits; 204 unsigned long **bits;
205 int *num_free;
205 int max_order; 206 int max_order;
206 spinlock_t lock; 207 spinlock_t lock;
207}; 208};
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 8489b1e81c0f..882e6b735915 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -89,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
89 89
90 spin_lock(&buddy->lock); 90 spin_lock(&buddy->lock);
91 91
92 for (o = order; o <= buddy->max_order; ++o) { 92 for (o = order; o <= buddy->max_order; ++o)
93 m = 1 << (buddy->max_order - o); 93 if (buddy->num_free[o]) {
94 seg = find_first_bit(buddy->bits[o], m); 94 m = 1 << (buddy->max_order - o);
95 if (seg < m) 95 seg = find_first_bit(buddy->bits[o], m);
96 goto found; 96 if (seg < m)
97 } 97 goto found;
98 }
98 99
99 spin_unlock(&buddy->lock); 100 spin_unlock(&buddy->lock);
100 return -1; 101 return -1;
101 102
102 found: 103 found:
103 clear_bit(seg, buddy->bits[o]); 104 clear_bit(seg, buddy->bits[o]);
105 --buddy->num_free[o];
104 106
105 while (o > order) { 107 while (o > order) {
106 --o; 108 --o;
107 seg <<= 1; 109 seg <<= 1;
108 set_bit(seg ^ 1, buddy->bits[o]); 110 set_bit(seg ^ 1, buddy->bits[o]);
111 ++buddy->num_free[o];
109 } 112 }
110 113
111 spin_unlock(&buddy->lock); 114 spin_unlock(&buddy->lock);
@@ -123,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
123 126
124 while (test_bit(seg ^ 1, buddy->bits[order])) { 127 while (test_bit(seg ^ 1, buddy->bits[order])) {
125 clear_bit(seg ^ 1, buddy->bits[order]); 128 clear_bit(seg ^ 1, buddy->bits[order]);
129 --buddy->num_free[order];
126 seg >>= 1; 130 seg >>= 1;
127 ++order; 131 ++order;
128 } 132 }
129 133
130 set_bit(seg, buddy->bits[order]); 134 set_bit(seg, buddy->bits[order]);
135 ++buddy->num_free[order];
131 136
132 spin_unlock(&buddy->lock); 137 spin_unlock(&buddy->lock);
133} 138}
@@ -141,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
141 146
142 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), 147 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
143 GFP_KERNEL); 148 GFP_KERNEL);
144 if (!buddy->bits) 149 buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
150 GFP_KERNEL);
151 if (!buddy->bits || !buddy->num_free)
145 goto err_out; 152 goto err_out;
146 153
147 for (i = 0; i <= buddy->max_order; ++i) { 154 for (i = 0; i <= buddy->max_order; ++i) {
@@ -154,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
154 } 161 }
155 162
156 set_bit(0, buddy->bits[buddy->max_order]); 163 set_bit(0, buddy->bits[buddy->max_order]);
164 buddy->num_free[buddy->max_order] = 1;
157 165
158 return 0; 166 return 0;
159 167
@@ -161,9 +169,10 @@ err_out_free:
161 for (i = 0; i <= buddy->max_order; ++i) 169 for (i = 0; i <= buddy->max_order; ++i)
162 kfree(buddy->bits[i]); 170 kfree(buddy->bits[i]);
163 171
172err_out:
164 kfree(buddy->bits); 173 kfree(buddy->bits);
174 kfree(buddy->num_free);
165 175
166err_out:
167 return -ENOMEM; 176 return -ENOMEM;
168} 177}
169 178
@@ -175,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
175 kfree(buddy->bits[i]); 184 kfree(buddy->bits[i]);
176 185
177 kfree(buddy->bits); 186 kfree(buddy->bits);
187 kfree(buddy->num_free);
178} 188}
179 189
180static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, 190static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8be9ea0436e6..f51201b17bfd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -548,7 +548,7 @@ static int path_rec_start(struct net_device *dev,
548 path_rec_completion, 548 path_rec_completion,
549 path, &path->query); 549 path, &path->query);
550 if (path->query_id < 0) { 550 if (path->query_id < 0) {
551 ipoib_warn(priv, "ib_sa_path_rec_get failed\n"); 551 ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
552 path->query = NULL; 552 path->query = NULL;
553 return path->query_id; 553 return path->query_id;
554 } 554 }
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 3a917c1f796f..63462ecca147 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -483,6 +483,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
483 break; 483 break;
484 case RDMA_CM_EVENT_DISCONNECTED: 484 case RDMA_CM_EVENT_DISCONNECTED:
485 case RDMA_CM_EVENT_DEVICE_REMOVAL: 485 case RDMA_CM_EVENT_DEVICE_REMOVAL:
486 case RDMA_CM_EVENT_ADDR_CHANGE:
486 iser_disconnected_handler(cma_id); 487 iser_disconnected_handler(cma_id);
487 break; 488 break;
488 default: 489 default:
diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 70dff94a8bc6..04d5bc69a6f8 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -67,6 +67,8 @@ enum {
67 CMD_STAT_BAD_INDEX = 0x0a, 67 CMD_STAT_BAD_INDEX = 0x0a,
68 /* FW image corrupted: */ 68 /* FW image corrupted: */
69 CMD_STAT_BAD_NVMEM = 0x0b, 69 CMD_STAT_BAD_NVMEM = 0x0b,
70 /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */
71 CMD_STAT_ICM_ERROR = 0x0c,
70 /* Attempt to modify a QP/EE which is not in the presumed state: */ 72 /* Attempt to modify a QP/EE which is not in the presumed state: */
71 CMD_STAT_BAD_QP_STATE = 0x10, 73 CMD_STAT_BAD_QP_STATE = 0x10,
72 /* Bad segment parameters (Address/Size): */ 74 /* Bad segment parameters (Address/Size): */
@@ -119,6 +121,7 @@ static int mlx4_status_to_errno(u8 status)
119 [CMD_STAT_BAD_RES_STATE] = -EBADF, 121 [CMD_STAT_BAD_RES_STATE] = -EBADF,
120 [CMD_STAT_BAD_INDEX] = -EBADF, 122 [CMD_STAT_BAD_INDEX] = -EBADF,
121 [CMD_STAT_BAD_NVMEM] = -EFAULT, 123 [CMD_STAT_BAD_NVMEM] = -EFAULT,
124 [CMD_STAT_ICM_ERROR] = -ENFILE,
122 [CMD_STAT_BAD_QP_STATE] = -EINVAL, 125 [CMD_STAT_BAD_QP_STATE] = -EINVAL,
123 [CMD_STAT_BAD_SEG_PARAM] = -EFAULT, 126 [CMD_STAT_BAD_SEG_PARAM] = -EFAULT,
124 [CMD_STAT_REG_BOUND] = -EBUSY, 127 [CMD_STAT_REG_BOUND] = -EBUSY,
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 2b5006b9be67..57278224ba1e 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -46,6 +46,10 @@ enum {
46extern void __buggy_use_of_MLX4_GET(void); 46extern void __buggy_use_of_MLX4_GET(void);
47extern void __buggy_use_of_MLX4_PUT(void); 47extern void __buggy_use_of_MLX4_PUT(void);
48 48
49static int enable_qos;
50module_param(enable_qos, bool, 0444);
51MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)");
52
49#define MLX4_GET(dest, source, offset) \ 53#define MLX4_GET(dest, source, offset) \
50 do { \ 54 do { \
51 void *__p = (char *) (source) + (offset); \ 55 void *__p = (char *) (source) + (offset); \
@@ -198,7 +202,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
198#define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e 202#define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e
199#define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 203#define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90
200#define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 204#define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92
201#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x97 205#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94
202#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 206#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
203#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 207#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
204 208
@@ -373,12 +377,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
373 } 377 }
374 } 378 }
375 379
376 if (dev_cap->bmme_flags & 1) 380 mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
377 mlx4_dbg(dev, "Base MM extensions: yes " 381 dev_cap->bmme_flags, dev_cap->reserved_lkey);
378 "(flags %d, rsvd L_Key %08x)\n",
379 dev_cap->bmme_flags, dev_cap->reserved_lkey);
380 else
381 mlx4_dbg(dev, "Base MM extensions: no\n");
382 382
383 /* 383 /*
384 * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then 384 * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
@@ -737,6 +737,10 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
737 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) 737 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
738 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); 738 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3);
739 739
740 /* Enable QoS support if module parameter set */
741 if (enable_qos)
742 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2);
743
740 /* QPC/EEC/CQC/EQC/RDMARC attributes */ 744 /* QPC/EEC/CQC/EQC/RDMARC attributes */
741 745
742 MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); 746 MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index a0e046c149b7..fbf0e22be122 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -98,7 +98,7 @@ struct mlx4_dev_cap {
98 int cmpt_entry_sz; 98 int cmpt_entry_sz;
99 int mtt_entry_sz; 99 int mtt_entry_sz;
100 int resize_srq; 100 int resize_srq;
101 u8 bmme_flags; 101 u32 bmme_flags;
102 u32 reserved_lkey; 102 u32 reserved_lkey;
103 u64 max_icm_sz; 103 u64 max_icm_sz;
104 int max_gso_sz; 104 int max_gso_sz;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index d3736013fe9b..8e1d24cda1b0 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -158,6 +158,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
158 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 158 dev->caps.max_msg_sz = dev_cap->max_msg_sz;
159 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 159 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1);
160 dev->caps.flags = dev_cap->flags; 160 dev->caps.flags = dev_cap->flags;
161 dev->caps.bmme_flags = dev_cap->bmme_flags;
162 dev->caps.reserved_lkey = dev_cap->reserved_lkey;
161 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 163 dev->caps.stat_rate_support = dev_cap->stat_rate_support;
162 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 164 dev->caps.max_gso_sz = dev_cap->max_gso_sz;
163 165
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index a4023c2dd050..78038499cff5 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -118,6 +118,7 @@ struct mlx4_bitmap {
118 118
119struct mlx4_buddy { 119struct mlx4_buddy {
120 unsigned long **bits; 120 unsigned long **bits;
121 unsigned int *num_free;
121 int max_order; 122 int max_order;
122 spinlock_t lock; 123 spinlock_t lock;
123}; 124};
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 03a9abcce524..a3c04c5f12c2 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -47,7 +47,7 @@ struct mlx4_mpt_entry {
47 __be32 flags; 47 __be32 flags;
48 __be32 qpn; 48 __be32 qpn;
49 __be32 key; 49 __be32 key;
50 __be32 pd; 50 __be32 pd_flags;
51 __be64 start; 51 __be64 start;
52 __be64 length; 52 __be64 length;
53 __be32 lkey; 53 __be32 lkey;
@@ -61,11 +61,15 @@ struct mlx4_mpt_entry {
61} __attribute__((packed)); 61} __attribute__((packed));
62 62
63#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) 63#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
64#define MLX4_MPT_FLAG_FREE (0x3UL << 28)
64#define MLX4_MPT_FLAG_MIO (1 << 17) 65#define MLX4_MPT_FLAG_MIO (1 << 17)
65#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) 66#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15)
66#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) 67#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
67#define MLX4_MPT_FLAG_REGION (1 << 8) 68#define MLX4_MPT_FLAG_REGION (1 << 8)
68 69
70#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 26)
71#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
72
69#define MLX4_MTT_FLAG_PRESENT 1 73#define MLX4_MTT_FLAG_PRESENT 1
70 74
71#define MLX4_MPT_STATUS_SW 0xF0 75#define MLX4_MPT_STATUS_SW 0xF0
@@ -79,23 +83,26 @@ static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
79 83
80 spin_lock(&buddy->lock); 84 spin_lock(&buddy->lock);
81 85
82 for (o = order; o <= buddy->max_order; ++o) { 86 for (o = order; o <= buddy->max_order; ++o)
83 m = 1 << (buddy->max_order - o); 87 if (buddy->num_free[o]) {
84 seg = find_first_bit(buddy->bits[o], m); 88 m = 1 << (buddy->max_order - o);
85 if (seg < m) 89 seg = find_first_bit(buddy->bits[o], m);
86 goto found; 90 if (seg < m)
87 } 91 goto found;
92 }
88 93
89 spin_unlock(&buddy->lock); 94 spin_unlock(&buddy->lock);
90 return -1; 95 return -1;
91 96
92 found: 97 found:
93 clear_bit(seg, buddy->bits[o]); 98 clear_bit(seg, buddy->bits[o]);
99 --buddy->num_free[o];
94 100
95 while (o > order) { 101 while (o > order) {
96 --o; 102 --o;
97 seg <<= 1; 103 seg <<= 1;
98 set_bit(seg ^ 1, buddy->bits[o]); 104 set_bit(seg ^ 1, buddy->bits[o]);
105 ++buddy->num_free[o];
99 } 106 }
100 107
101 spin_unlock(&buddy->lock); 108 spin_unlock(&buddy->lock);
@@ -113,11 +120,13 @@ static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
113 120
114 while (test_bit(seg ^ 1, buddy->bits[order])) { 121 while (test_bit(seg ^ 1, buddy->bits[order])) {
115 clear_bit(seg ^ 1, buddy->bits[order]); 122 clear_bit(seg ^ 1, buddy->bits[order]);
123 --buddy->num_free[order];
116 seg >>= 1; 124 seg >>= 1;
117 ++order; 125 ++order;
118 } 126 }
119 127
120 set_bit(seg, buddy->bits[order]); 128 set_bit(seg, buddy->bits[order]);
129 ++buddy->num_free[order];
121 130
122 spin_unlock(&buddy->lock); 131 spin_unlock(&buddy->lock);
123} 132}
@@ -131,7 +140,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
131 140
132 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), 141 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
133 GFP_KERNEL); 142 GFP_KERNEL);
134 if (!buddy->bits) 143 buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
144 GFP_KERNEL);
145 if (!buddy->bits || !buddy->num_free)
135 goto err_out; 146 goto err_out;
136 147
137 for (i = 0; i <= buddy->max_order; ++i) { 148 for (i = 0; i <= buddy->max_order; ++i) {
@@ -143,6 +154,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
143 } 154 }
144 155
145 set_bit(0, buddy->bits[buddy->max_order]); 156 set_bit(0, buddy->bits[buddy->max_order]);
157 buddy->num_free[buddy->max_order] = 1;
146 158
147 return 0; 159 return 0;
148 160
@@ -150,9 +162,10 @@ err_out_free:
150 for (i = 0; i <= buddy->max_order; ++i) 162 for (i = 0; i <= buddy->max_order; ++i)
151 kfree(buddy->bits[i]); 163 kfree(buddy->bits[i]);
152 164
165err_out:
153 kfree(buddy->bits); 166 kfree(buddy->bits);
167 kfree(buddy->num_free);
154 168
155err_out:
156 return -ENOMEM; 169 return -ENOMEM;
157} 170}
158 171
@@ -164,6 +177,7 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
164 kfree(buddy->bits[i]); 177 kfree(buddy->bits[i]);
165 178
166 kfree(buddy->bits); 179 kfree(buddy->bits);
180 kfree(buddy->num_free);
167} 181}
168 182
169static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) 183static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
@@ -314,21 +328,30 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
314 328
315 memset(mpt_entry, 0, sizeof *mpt_entry); 329 memset(mpt_entry, 0, sizeof *mpt_entry);
316 330
317 mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS | 331 mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO |
318 MLX4_MPT_FLAG_MIO |
319 MLX4_MPT_FLAG_REGION | 332 MLX4_MPT_FLAG_REGION |
320 mr->access); 333 mr->access);
321 334
322 mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key)); 335 mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key));
323 mpt_entry->pd = cpu_to_be32(mr->pd); 336 mpt_entry->pd_flags = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV);
324 mpt_entry->start = cpu_to_be64(mr->iova); 337 mpt_entry->start = cpu_to_be64(mr->iova);
325 mpt_entry->length = cpu_to_be64(mr->size); 338 mpt_entry->length = cpu_to_be64(mr->size);
326 mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); 339 mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
340
327 if (mr->mtt.order < 0) { 341 if (mr->mtt.order < 0) {
328 mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); 342 mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
329 mpt_entry->mtt_seg = 0; 343 mpt_entry->mtt_seg = 0;
330 } else 344 } else {
331 mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt)); 345 mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
346 }
347
348 if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
349 /* fast register MR in free state */
350 mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
351 mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG);
352 } else {
353 mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
354 }
332 355
333 err = mlx4_SW2HW_MPT(dev, mailbox, 356 err = mlx4_SW2HW_MPT(dev, mailbox,
334 key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); 357 key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index 3a93c5f0f7ab..aa616892d09c 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c
@@ -91,6 +91,13 @@ EXPORT_SYMBOL_GPL(mlx4_uar_free);
91 91
92int mlx4_init_uar_table(struct mlx4_dev *dev) 92int mlx4_init_uar_table(struct mlx4_dev *dev)
93{ 93{
94 if (dev->caps.num_uars <= 128) {
95 mlx4_err(dev, "Only %d UAR pages (need more than 128)\n",
96 dev->caps.num_uars);
97 mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
98 return -ENODEV;
99 }
100
94 return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap, 101 return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
95 dev->caps.num_uars, dev->caps.num_uars - 1, 102 dev->caps.num_uars, dev->caps.num_uars - 1,
96 max(128, dev->caps.reserved_uars)); 103 max(128, dev->caps.reserved_uars));