aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/agent.c19
-rw-r--r--drivers/infiniband/core/cm.c42
-rw-r--r--drivers/infiniband/core/fmr_pool.c6
-rw-r--r--drivers/infiniband/core/mad.c307
-rw-r--r--drivers/infiniband/core/mad_priv.h19
-rw-r--r--drivers/infiniband/core/mad_rmpp.c198
-rw-r--r--drivers/infiniband/core/smi.h9
-rw-r--r--drivers/infiniband/core/sysfs.c36
-rw-r--r--drivers/infiniband/core/user_mad.c219
-rw-r--r--drivers/infiniband/core/uverbs.h5
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c202
-rw-r--r--drivers/infiniband/core/uverbs_main.c6
-rw-r--r--drivers/infiniband/core/verbs.c259
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c35
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c323
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c163
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h33
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c19
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c29
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c46
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c170
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h53
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c507
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c51
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c54
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c38
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c25
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c230
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h7
39 files changed, 2246 insertions, 984 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 34b724afd28d..ecd1a3057c61 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -78,25 +78,6 @@ ib_get_agent_port(struct ib_device *device, int port_num)
78 return entry; 78 return entry;
79} 79}
80 80
81int smi_check_local_dr_smp(struct ib_smp *smp,
82 struct ib_device *device,
83 int port_num)
84{
85 struct ib_agent_port_private *port_priv;
86
87 if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
88 return 1;
89
90 port_priv = ib_get_agent_port(device, port_num);
91 if (!port_priv) {
92 printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
93 "not open\n", device->name, port_num);
94 return 1;
95 }
96
97 return smi_check_local_smp(port_priv->agent[0], smp);
98}
99
100int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, 81int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
101 struct ib_wc *wc, struct ib_device *device, 82 struct ib_wc *wc, struct ib_device *device,
102 int port_num, int qpn) 83 int port_num, int qpn)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2514de3480d8..7cfedb8d9bcd 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -121,7 +121,7 @@ struct cm_id_private {
121 121
122 struct rb_node service_node; 122 struct rb_node service_node;
123 struct rb_node sidr_id_node; 123 struct rb_node sidr_id_node;
124 spinlock_t lock; 124 spinlock_t lock; /* Do not acquire inside cm.lock */
125 wait_queue_head_t wait; 125 wait_queue_head_t wait;
126 atomic_t refcount; 126 atomic_t refcount;
127 127
@@ -1547,40 +1547,46 @@ static int cm_rep_handler(struct cm_work *work)
1547 return -EINVAL; 1547 return -EINVAL;
1548 } 1548 }
1549 1549
1550 cm_format_rep_event(work);
1551
1552 spin_lock_irqsave(&cm_id_priv->lock, flags);
1553 switch (cm_id_priv->id.state) {
1554 case IB_CM_REQ_SENT:
1555 case IB_CM_MRA_REQ_RCVD:
1556 break;
1557 default:
1558 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1559 ret = -EINVAL;
1560 goto error;
1561 }
1562
1550 cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; 1563 cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1551 cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; 1564 cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1552 cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); 1565 cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1553 1566
1554 spin_lock_irqsave(&cm.lock, flags); 1567 spin_lock(&cm.lock);
1555 /* Check for duplicate REP. */ 1568 /* Check for duplicate REP. */
1556 if (cm_insert_remote_id(cm_id_priv->timewait_info)) { 1569 if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1557 spin_unlock_irqrestore(&cm.lock, flags); 1570 spin_unlock(&cm.lock);
1571 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1558 ret = -EINVAL; 1572 ret = -EINVAL;
1559 goto error; 1573 goto error;
1560 } 1574 }
1561 /* Check for a stale connection. */ 1575 /* Check for a stale connection. */
1562 if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { 1576 if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1563 spin_unlock_irqrestore(&cm.lock, flags); 1577 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1578 &cm.remote_id_table);
1579 cm_id_priv->timewait_info->inserted_remote_id = 0;
1580 spin_unlock(&cm.lock);
1581 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1564 cm_issue_rej(work->port, work->mad_recv_wc, 1582 cm_issue_rej(work->port, work->mad_recv_wc,
1565 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, 1583 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1566 NULL, 0); 1584 NULL, 0);
1567 ret = -EINVAL; 1585 ret = -EINVAL;
1568 goto error; 1586 goto error;
1569 } 1587 }
1570 spin_unlock_irqrestore(&cm.lock, flags); 1588 spin_unlock(&cm.lock);
1571
1572 cm_format_rep_event(work);
1573 1589
1574 spin_lock_irqsave(&cm_id_priv->lock, flags);
1575 switch (cm_id_priv->id.state) {
1576 case IB_CM_REQ_SENT:
1577 case IB_CM_MRA_REQ_RCVD:
1578 break;
1579 default:
1580 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1581 ret = -EINVAL;
1582 goto error;
1583 }
1584 cm_id_priv->id.state = IB_CM_REP_RCVD; 1590 cm_id_priv->id.state = IB_CM_REP_RCVD;
1585 cm_id_priv->id.remote_id = rep_msg->local_comm_id; 1591 cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1586 cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); 1592 cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
@@ -1603,7 +1609,7 @@ static int cm_rep_handler(struct cm_work *work)
1603 cm_deref_id(cm_id_priv); 1609 cm_deref_id(cm_id_priv);
1604 return 0; 1610 return 0;
1605 1611
1606error: cm_cleanup_timewait(cm_id_priv->timewait_info); 1612error:
1607 cm_deref_id(cm_id_priv); 1613 cm_deref_id(cm_id_priv);
1608 return ret; 1614 return ret;
1609} 1615}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index d34a6f1c4f4c..838bf54458d2 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -278,9 +278,9 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
278 { 278 {
279 struct ib_pool_fmr *fmr; 279 struct ib_pool_fmr *fmr;
280 struct ib_fmr_attr attr = { 280 struct ib_fmr_attr attr = {
281 .max_pages = params->max_pages_per_fmr, 281 .max_pages = params->max_pages_per_fmr,
282 .max_maps = IB_FMR_MAX_REMAPS, 282 .max_maps = IB_FMR_MAX_REMAPS,
283 .page_size = PAGE_SHIFT 283 .page_shift = params->page_shift
284 }; 284 };
285 285
286 for (i = 0; i < params->pool_size; ++i) { 286 for (i = 0; i < params->pool_size; ++i) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index c82f47a66e48..ba54c856b0e5 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -31,7 +31,7 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 * 33 *
34 * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $ 34 * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 35 */
36#include <linux/dma-mapping.h> 36#include <linux/dma-mapping.h>
37 37
@@ -227,6 +227,14 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
227 if (!is_vendor_oui(mad_reg_req->oui)) 227 if (!is_vendor_oui(mad_reg_req->oui))
228 goto error1; 228 goto error1;
229 } 229 }
230 /* Make sure class supplied is consistent with RMPP */
231 if (ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
232 if (!rmpp_version)
233 goto error1;
234 } else {
235 if (rmpp_version)
236 goto error1;
237 }
230 /* Make sure class supplied is consistent with QP type */ 238 /* Make sure class supplied is consistent with QP type */
231 if (qp_type == IB_QPT_SMI) { 239 if (qp_type == IB_QPT_SMI) {
232 if ((mad_reg_req->mgmt_class != 240 if ((mad_reg_req->mgmt_class !=
@@ -679,8 +687,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
679 goto out; 687 goto out;
680 } 688 }
681 /* Check to post send on QP or process locally */ 689 /* Check to post send on QP or process locally */
682 ret = smi_check_local_dr_smp(smp, device, port_num); 690 ret = smi_check_local_smp(smp, device);
683 if (!ret || !device->process_mad) 691 if (!ret)
684 goto out; 692 goto out;
685 693
686 local = kmalloc(sizeof *local, GFP_ATOMIC); 694 local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -765,18 +773,67 @@ out:
765 return ret; 773 return ret;
766} 774}
767 775
768static int get_buf_length(int hdr_len, int data_len) 776static int get_pad_size(int hdr_len, int data_len)
769{ 777{
770 int seg_size, pad; 778 int seg_size, pad;
771 779
772 seg_size = sizeof(struct ib_mad) - hdr_len; 780 seg_size = sizeof(struct ib_mad) - hdr_len;
773 if (data_len && seg_size) { 781 if (data_len && seg_size) {
774 pad = seg_size - data_len % seg_size; 782 pad = seg_size - data_len % seg_size;
775 if (pad == seg_size) 783 return pad == seg_size ? 0 : pad;
776 pad = 0;
777 } else 784 } else
778 pad = seg_size; 785 return seg_size;
779 return hdr_len + data_len + pad; 786}
787
788static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
789{
790 struct ib_rmpp_segment *s, *t;
791
792 list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
793 list_del(&s->list);
794 kfree(s);
795 }
796}
797
798static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
799 gfp_t gfp_mask)
800{
801 struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
802 struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
803 struct ib_rmpp_segment *seg = NULL;
804 int left, seg_size, pad;
805
806 send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
807 seg_size = send_buf->seg_size;
808 pad = send_wr->pad;
809
810 /* Allocate data segments. */
811 for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
812 seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
813 if (!seg) {
814 printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
815 "alloc failed for len %zd, gfp %#x\n",
816 sizeof (*seg) + seg_size, gfp_mask);
817 free_send_rmpp_list(send_wr);
818 return -ENOMEM;
819 }
820 seg->num = ++send_buf->seg_count;
821 list_add_tail(&seg->list, &send_wr->rmpp_list);
822 }
823
824 /* Zero any padding */
825 if (pad)
826 memset(seg->data + seg_size - pad, 0, pad);
827
828 rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
829 agent.rmpp_version;
830 rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
831 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
832
833 send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
834 struct ib_rmpp_segment, list);
835 send_wr->last_ack_seg = send_wr->cur_seg;
836 return 0;
780} 837}
781 838
782struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, 839struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
@@ -787,32 +844,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
787{ 844{
788 struct ib_mad_agent_private *mad_agent_priv; 845 struct ib_mad_agent_private *mad_agent_priv;
789 struct ib_mad_send_wr_private *mad_send_wr; 846 struct ib_mad_send_wr_private *mad_send_wr;
790 int buf_size; 847 int pad, message_size, ret, size;
791 void *buf; 848 void *buf;
792 849
793 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, 850 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
794 agent); 851 agent);
795 buf_size = get_buf_length(hdr_len, data_len); 852 pad = get_pad_size(hdr_len, data_len);
853 message_size = hdr_len + data_len + pad;
796 854
797 if ((!mad_agent->rmpp_version && 855 if ((!mad_agent->rmpp_version &&
798 (rmpp_active || buf_size > sizeof(struct ib_mad))) || 856 (rmpp_active || message_size > sizeof(struct ib_mad))) ||
799 (!rmpp_active && buf_size > sizeof(struct ib_mad))) 857 (!rmpp_active && message_size > sizeof(struct ib_mad)))
800 return ERR_PTR(-EINVAL); 858 return ERR_PTR(-EINVAL);
801 859
802 buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask); 860 size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
861 buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
803 if (!buf) 862 if (!buf)
804 return ERR_PTR(-ENOMEM); 863 return ERR_PTR(-ENOMEM);
805 864
806 mad_send_wr = buf + buf_size; 865 mad_send_wr = buf + size;
866 INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
807 mad_send_wr->send_buf.mad = buf; 867 mad_send_wr->send_buf.mad = buf;
868 mad_send_wr->send_buf.hdr_len = hdr_len;
869 mad_send_wr->send_buf.data_len = data_len;
870 mad_send_wr->pad = pad;
808 871
809 mad_send_wr->mad_agent_priv = mad_agent_priv; 872 mad_send_wr->mad_agent_priv = mad_agent_priv;
810 mad_send_wr->sg_list[0].length = buf_size; 873 mad_send_wr->sg_list[0].length = hdr_len;
811 mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; 874 mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
875 mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
876 mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
812 877
813 mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; 878 mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
814 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; 879 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
815 mad_send_wr->send_wr.num_sge = 1; 880 mad_send_wr->send_wr.num_sge = 2;
816 mad_send_wr->send_wr.opcode = IB_WR_SEND; 881 mad_send_wr->send_wr.opcode = IB_WR_SEND;
817 mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; 882 mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
818 mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; 883 mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
@@ -820,13 +885,11 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
820 mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; 885 mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
821 886
822 if (rmpp_active) { 887 if (rmpp_active) {
823 struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; 888 ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
824 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - 889 if (ret) {
825 IB_MGMT_RMPP_HDR + data_len); 890 kfree(buf);
826 rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; 891 return ERR_PTR(ret);
827 rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; 892 }
828 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
829 IB_MGMT_RMPP_FLAG_ACTIVE);
830 } 893 }
831 894
832 mad_send_wr->send_buf.mad_agent = mad_agent; 895 mad_send_wr->send_buf.mad_agent = mad_agent;
@@ -835,14 +898,79 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
835} 898}
836EXPORT_SYMBOL(ib_create_send_mad); 899EXPORT_SYMBOL(ib_create_send_mad);
837 900
901int ib_get_mad_data_offset(u8 mgmt_class)
902{
903 if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
904 return IB_MGMT_SA_HDR;
905 else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
906 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
907 (mgmt_class == IB_MGMT_CLASS_BIS))
908 return IB_MGMT_DEVICE_HDR;
909 else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
910 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
911 return IB_MGMT_VENDOR_HDR;
912 else
913 return IB_MGMT_MAD_HDR;
914}
915EXPORT_SYMBOL(ib_get_mad_data_offset);
916
917int ib_is_mad_class_rmpp(u8 mgmt_class)
918{
919 if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
920 (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
921 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
922 (mgmt_class == IB_MGMT_CLASS_BIS) ||
923 ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
924 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
925 return 1;
926 return 0;
927}
928EXPORT_SYMBOL(ib_is_mad_class_rmpp);
929
930void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
931{
932 struct ib_mad_send_wr_private *mad_send_wr;
933 struct list_head *list;
934
935 mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
936 send_buf);
937 list = &mad_send_wr->cur_seg->list;
938
939 if (mad_send_wr->cur_seg->num < seg_num) {
940 list_for_each_entry(mad_send_wr->cur_seg, list, list)
941 if (mad_send_wr->cur_seg->num == seg_num)
942 break;
943 } else if (mad_send_wr->cur_seg->num > seg_num) {
944 list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
945 if (mad_send_wr->cur_seg->num == seg_num)
946 break;
947 }
948 return mad_send_wr->cur_seg->data;
949}
950EXPORT_SYMBOL(ib_get_rmpp_segment);
951
952static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
953{
954 if (mad_send_wr->send_buf.seg_count)
955 return ib_get_rmpp_segment(&mad_send_wr->send_buf,
956 mad_send_wr->seg_num);
957 else
958 return mad_send_wr->send_buf.mad +
959 mad_send_wr->send_buf.hdr_len;
960}
961
838void ib_free_send_mad(struct ib_mad_send_buf *send_buf) 962void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
839{ 963{
840 struct ib_mad_agent_private *mad_agent_priv; 964 struct ib_mad_agent_private *mad_agent_priv;
965 struct ib_mad_send_wr_private *mad_send_wr;
841 966
842 mad_agent_priv = container_of(send_buf->mad_agent, 967 mad_agent_priv = container_of(send_buf->mad_agent,
843 struct ib_mad_agent_private, agent); 968 struct ib_mad_agent_private, agent);
844 kfree(send_buf->mad); 969 mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
970 send_buf);
845 971
972 free_send_rmpp_list(mad_send_wr);
973 kfree(send_buf->mad);
846 if (atomic_dec_and_test(&mad_agent_priv->refcount)) 974 if (atomic_dec_and_test(&mad_agent_priv->refcount))
847 wake_up(&mad_agent_priv->wait); 975 wake_up(&mad_agent_priv->wait);
848} 976}
@@ -865,10 +993,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
865 993
866 mad_agent = mad_send_wr->send_buf.mad_agent; 994 mad_agent = mad_send_wr->send_buf.mad_agent;
867 sge = mad_send_wr->sg_list; 995 sge = mad_send_wr->sg_list;
868 sge->addr = dma_map_single(mad_agent->device->dma_device, 996 sge[0].addr = dma_map_single(mad_agent->device->dma_device,
869 mad_send_wr->send_buf.mad, sge->length, 997 mad_send_wr->send_buf.mad,
870 DMA_TO_DEVICE); 998 sge[0].length,
871 pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); 999 DMA_TO_DEVICE);
1000 pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
1001
1002 sge[1].addr = dma_map_single(mad_agent->device->dma_device,
1003 ib_get_payload(mad_send_wr),
1004 sge[1].length,
1005 DMA_TO_DEVICE);
1006 pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
872 1007
873 spin_lock_irqsave(&qp_info->send_queue.lock, flags); 1008 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
874 if (qp_info->send_queue.count < qp_info->send_queue.max_active) { 1009 if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
@@ -885,11 +1020,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
885 list_add_tail(&mad_send_wr->mad_list.list, list); 1020 list_add_tail(&mad_send_wr->mad_list.list, list);
886 } 1021 }
887 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); 1022 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
888 if (ret) 1023 if (ret) {
889 dma_unmap_single(mad_agent->device->dma_device, 1024 dma_unmap_single(mad_agent->device->dma_device,
890 pci_unmap_addr(mad_send_wr, mapping), 1025 pci_unmap_addr(mad_send_wr, header_mapping),
891 sge->length, DMA_TO_DEVICE); 1026 sge[0].length, DMA_TO_DEVICE);
892 1027 dma_unmap_single(mad_agent->device->dma_device,
1028 pci_unmap_addr(mad_send_wr, payload_mapping),
1029 sge[1].length, DMA_TO_DEVICE);
1030 }
893 return ret; 1031 return ret;
894} 1032}
895 1033
@@ -921,6 +1059,13 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
921 goto error; 1059 goto error;
922 } 1060 }
923 1061
1062 if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1063 if (mad_agent_priv->agent.rmpp_version) {
1064 ret = -EINVAL;
1065 goto error;
1066 }
1067 }
1068
924 /* 1069 /*
925 * Save pointer to next work request to post in case the 1070 * Save pointer to next work request to post in case the
926 * current one completes, and the user modifies the work 1071 * current one completes, and the user modifies the work
@@ -1517,14 +1662,59 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
1517 (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); 1662 (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1518} 1663}
1519 1664
1665static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
1666 struct ib_mad_recv_wc *rwc)
1667{
1668 return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class ==
1669 rwc->recv_buf.mad->mad_hdr.mgmt_class;
1670}
1671
1672static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
1673 struct ib_mad_recv_wc *rwc )
1674{
1675 struct ib_ah_attr attr;
1676 u8 send_resp, rcv_resp;
1677
1678 send_resp = ((struct ib_mad *)(wr->send_buf.mad))->
1679 mad_hdr.method & IB_MGMT_METHOD_RESP;
1680 rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP;
1681
1682 if (!send_resp && rcv_resp)
1683 /* is request/response. GID/LIDs are both local (same). */
1684 return 1;
1685
1686 if (send_resp == rcv_resp)
1687 /* both requests, or both responses. GIDs different */
1688 return 0;
1689
1690 if (ib_query_ah(wr->send_buf.ah, &attr))
1691 /* Assume not equal, to avoid false positives. */
1692 return 0;
1693
1694 if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH))
1695 return attr.dlid == rwc->wc->slid;
1696 else if ((attr.ah_flags & IB_AH_GRH) &&
1697 (rwc->wc->wc_flags & IB_WC_GRH))
1698 return memcmp(attr.grh.dgid.raw,
1699 rwc->recv_buf.grh->sgid.raw, 16) == 0;
1700 else
1701 /* one has GID, other does not. Assume different */
1702 return 0;
1703}
1520struct ib_mad_send_wr_private* 1704struct ib_mad_send_wr_private*
1521ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) 1705ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
1706 struct ib_mad_recv_wc *mad_recv_wc)
1522{ 1707{
1523 struct ib_mad_send_wr_private *mad_send_wr; 1708 struct ib_mad_send_wr_private *mad_send_wr;
1709 struct ib_mad *mad;
1710
1711 mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad;
1524 1712
1525 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, 1713 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
1526 agent_list) { 1714 agent_list) {
1527 if (mad_send_wr->tid == tid) 1715 if ((mad_send_wr->tid == mad->mad_hdr.tid) &&
1716 rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
1717 rcv_has_same_gid(mad_send_wr, mad_recv_wc))
1528 return mad_send_wr; 1718 return mad_send_wr;
1529 } 1719 }
1530 1720
@@ -1535,7 +1725,10 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid)
1535 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 1725 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
1536 agent_list) { 1726 agent_list) {
1537 if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && 1727 if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
1538 mad_send_wr->tid == tid && mad_send_wr->timeout) { 1728 mad_send_wr->tid == mad->mad_hdr.tid &&
1729 mad_send_wr->timeout &&
1730 rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
1731 rcv_has_same_gid(mad_send_wr, mad_recv_wc)) {
1539 /* Verify request has not been canceled */ 1732 /* Verify request has not been canceled */
1540 return (mad_send_wr->status == IB_WC_SUCCESS) ? 1733 return (mad_send_wr->status == IB_WC_SUCCESS) ?
1541 mad_send_wr : NULL; 1734 mad_send_wr : NULL;
@@ -1560,7 +1753,6 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1560 struct ib_mad_send_wr_private *mad_send_wr; 1753 struct ib_mad_send_wr_private *mad_send_wr;
1561 struct ib_mad_send_wc mad_send_wc; 1754 struct ib_mad_send_wc mad_send_wc;
1562 unsigned long flags; 1755 unsigned long flags;
1563 __be64 tid;
1564 1756
1565 INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); 1757 INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1566 list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); 1758 list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
@@ -1576,9 +1768,8 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1576 1768
1577 /* Complete corresponding request */ 1769 /* Complete corresponding request */
1578 if (response_mad(mad_recv_wc->recv_buf.mad)) { 1770 if (response_mad(mad_recv_wc->recv_buf.mad)) {
1579 tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid;
1580 spin_lock_irqsave(&mad_agent_priv->lock, flags); 1771 spin_lock_irqsave(&mad_agent_priv->lock, flags);
1581 mad_send_wr = ib_find_send_mad(mad_agent_priv, tid); 1772 mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1582 if (!mad_send_wr) { 1773 if (!mad_send_wr) {
1583 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1774 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1584 ib_free_recv_mad(mad_recv_wc); 1775 ib_free_recv_mad(mad_recv_wc);
@@ -1661,9 +1852,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1661 port_priv->device->node_type, 1852 port_priv->device->node_type,
1662 port_priv->port_num)) 1853 port_priv->port_num))
1663 goto out; 1854 goto out;
1664 if (!smi_check_local_dr_smp(&recv->mad.smp, 1855 if (!smi_check_local_smp(&recv->mad.smp, port_priv->device))
1665 port_priv->device,
1666 port_priv->port_num))
1667 goto out; 1856 goto out;
1668 } 1857 }
1669 1858
@@ -1862,8 +2051,11 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
1862 2051
1863retry: 2052retry:
1864 dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, 2053 dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
1865 pci_unmap_addr(mad_send_wr, mapping), 2054 pci_unmap_addr(mad_send_wr, header_mapping),
1866 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); 2055 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2056 dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
2057 pci_unmap_addr(mad_send_wr, payload_mapping),
2058 mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
1867 queued_send_wr = NULL; 2059 queued_send_wr = NULL;
1868 spin_lock_irqsave(&send_queue->lock, flags); 2060 spin_lock_irqsave(&send_queue->lock, flags);
1869 list_del(&mad_list->list); 2061 list_del(&mad_list->list);
@@ -2262,8 +2454,12 @@ static void timeout_sends(void *data)
2262static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg) 2454static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
2263{ 2455{
2264 struct ib_mad_port_private *port_priv = cq->cq_context; 2456 struct ib_mad_port_private *port_priv = cq->cq_context;
2457 unsigned long flags;
2265 2458
2266 queue_work(port_priv->wq, &port_priv->work); 2459 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2460 if (!list_empty(&port_priv->port_list))
2461 queue_work(port_priv->wq, &port_priv->work);
2462 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2267} 2463}
2268 2464
2269/* 2465/*
@@ -2302,11 +2498,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2302 } 2498 }
2303 } 2499 }
2304 sg_list.addr = dma_map_single(qp_info->port_priv-> 2500 sg_list.addr = dma_map_single(qp_info->port_priv->
2305 device->dma_device, 2501 device->dma_device,
2306 &mad_priv->grh, 2502 &mad_priv->grh,
2307 sizeof *mad_priv - 2503 sizeof *mad_priv -
2308 sizeof mad_priv->header, 2504 sizeof mad_priv->header,
2309 DMA_FROM_DEVICE); 2505 DMA_FROM_DEVICE);
2310 pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr); 2506 pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
2311 recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; 2507 recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
2312 mad_priv->header.mad_list.mad_queue = recv_queue; 2508 mad_priv->header.mad_list.mad_queue = recv_queue;
@@ -2575,18 +2771,23 @@ static int ib_mad_port_open(struct ib_device *device,
2575 } 2771 }
2576 INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv); 2772 INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv);
2577 2773
2774 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2775 list_add_tail(&port_priv->port_list, &ib_mad_port_list);
2776 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2777
2578 ret = ib_mad_port_start(port_priv); 2778 ret = ib_mad_port_start(port_priv);
2579 if (ret) { 2779 if (ret) {
2580 printk(KERN_ERR PFX "Couldn't start port\n"); 2780 printk(KERN_ERR PFX "Couldn't start port\n");
2581 goto error9; 2781 goto error9;
2582 } 2782 }
2583 2783
2584 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2585 list_add_tail(&port_priv->port_list, &ib_mad_port_list);
2586 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2587 return 0; 2784 return 0;
2588 2785
2589error9: 2786error9:
2787 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2788 list_del_init(&port_priv->port_list);
2789 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2790
2590 destroy_workqueue(port_priv->wq); 2791 destroy_workqueue(port_priv->wq);
2591error8: 2792error8:
2592 destroy_mad_qp(&port_priv->qp_info[1]); 2793 destroy_mad_qp(&port_priv->qp_info[1]);
@@ -2623,11 +2824,9 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
2623 printk(KERN_ERR PFX "Port %d not found\n", port_num); 2824 printk(KERN_ERR PFX "Port %d not found\n", port_num);
2624 return -ENODEV; 2825 return -ENODEV;
2625 } 2826 }
2626 list_del(&port_priv->port_list); 2827 list_del_init(&port_priv->port_list);
2627 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); 2828 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2628 2829
2629 /* Stop processing completions. */
2630 flush_workqueue(port_priv->wq);
2631 destroy_workqueue(port_priv->wq); 2830 destroy_workqueue(port_priv->wq);
2632 destroy_mad_qp(&port_priv->qp_info[1]); 2831 destroy_mad_qp(&port_priv->qp_info[1]);
2633 destroy_mad_qp(&port_priv->qp_info[0]); 2832 destroy_mad_qp(&port_priv->qp_info[0]);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 570f78682af3..6c9c133d71ef 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -31,7 +31,7 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 * 33 *
34 * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $ 34 * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 35 */
36 36
37#ifndef __IB_MAD_PRIV_H__ 37#ifndef __IB_MAD_PRIV_H__
@@ -85,6 +85,12 @@ struct ib_mad_private {
85 } mad; 85 } mad;
86} __attribute__ ((packed)); 86} __attribute__ ((packed));
87 87
88struct ib_rmpp_segment {
89 struct list_head list;
90 u32 num;
91 u8 data[0];
92};
93
88struct ib_mad_agent_private { 94struct ib_mad_agent_private {
89 struct list_head agent_list; 95 struct list_head agent_list;
90 struct ib_mad_agent agent; 96 struct ib_mad_agent agent;
@@ -119,7 +125,8 @@ struct ib_mad_send_wr_private {
119 struct list_head agent_list; 125 struct list_head agent_list;
120 struct ib_mad_agent_private *mad_agent_priv; 126 struct ib_mad_agent_private *mad_agent_priv;
121 struct ib_mad_send_buf send_buf; 127 struct ib_mad_send_buf send_buf;
122 DECLARE_PCI_UNMAP_ADDR(mapping) 128 DECLARE_PCI_UNMAP_ADDR(header_mapping)
129 DECLARE_PCI_UNMAP_ADDR(payload_mapping)
123 struct ib_send_wr send_wr; 130 struct ib_send_wr send_wr;
124 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; 131 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
125 __be64 tid; 132 __be64 tid;
@@ -130,11 +137,12 @@ struct ib_mad_send_wr_private {
130 enum ib_wc_status status; 137 enum ib_wc_status status;
131 138
132 /* RMPP control */ 139 /* RMPP control */
140 struct list_head rmpp_list;
141 struct ib_rmpp_segment *last_ack_seg;
142 struct ib_rmpp_segment *cur_seg;
133 int last_ack; 143 int last_ack;
134 int seg_num; 144 int seg_num;
135 int newwin; 145 int newwin;
136 int total_seg;
137 int data_offset;
138 int pad; 146 int pad;
139}; 147};
140 148
@@ -208,7 +216,8 @@ extern kmem_cache_t *ib_mad_cache;
208int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); 216int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
209 217
210struct ib_mad_send_wr_private * 218struct ib_mad_send_wr_private *
211ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid); 219ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
220 struct ib_mad_recv_wc *mad_recv_wc);
212 221
213void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, 222void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
214 struct ib_mad_send_wc *mad_send_wc); 223 struct ib_mad_send_wc *mad_send_wc);
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 3249e1d8c07b..dfd4e588ce03 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2005 Intel Inc. All rights reserved. 2 * Copyright (c) 2005 Intel Inc. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -100,25 +100,14 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
100 } 100 }
101} 101}
102 102
103static int data_offset(u8 mgmt_class) 103static void format_ack(struct ib_mad_send_buf *msg,
104{
105 if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
106 return IB_MGMT_SA_HDR;
107 else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
108 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
109 return IB_MGMT_VENDOR_HDR;
110 else
111 return IB_MGMT_RMPP_HDR;
112}
113
114static void format_ack(struct ib_rmpp_mad *ack,
115 struct ib_rmpp_mad *data, 104 struct ib_rmpp_mad *data,
116 struct mad_rmpp_recv *rmpp_recv) 105 struct mad_rmpp_recv *rmpp_recv)
117{ 106{
107 struct ib_rmpp_mad *ack = msg->mad;
118 unsigned long flags; 108 unsigned long flags;
119 109
120 memcpy(&ack->mad_hdr, &data->mad_hdr, 110 memcpy(ack, &data->mad_hdr, msg->hdr_len);
121 data_offset(data->mad_hdr.mgmt_class));
122 111
123 ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; 112 ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
124 ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; 113 ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
@@ -135,16 +124,16 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
135 struct ib_mad_recv_wc *recv_wc) 124 struct ib_mad_recv_wc *recv_wc)
136{ 125{
137 struct ib_mad_send_buf *msg; 126 struct ib_mad_send_buf *msg;
138 int ret; 127 int ret, hdr_len;
139 128
129 hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
140 msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, 130 msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
141 recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR, 131 recv_wc->wc->pkey_index, 1, hdr_len,
142 IB_MGMT_RMPP_DATA, GFP_KERNEL); 132 0, GFP_KERNEL);
143 if (!msg) 133 if (!msg)
144 return; 134 return;
145 135
146 format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, 136 format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
147 rmpp_recv);
148 msg->ah = rmpp_recv->ah; 137 msg->ah = rmpp_recv->ah;
149 ret = ib_post_send_mad(msg, NULL); 138 ret = ib_post_send_mad(msg, NULL);
150 if (ret) 139 if (ret)
@@ -156,16 +145,17 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
156{ 145{
157 struct ib_mad_send_buf *msg; 146 struct ib_mad_send_buf *msg;
158 struct ib_ah *ah; 147 struct ib_ah *ah;
148 int hdr_len;
159 149
160 ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, 150 ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
161 recv_wc->recv_buf.grh, agent->port_num); 151 recv_wc->recv_buf.grh, agent->port_num);
162 if (IS_ERR(ah)) 152 if (IS_ERR(ah))
163 return (void *) ah; 153 return (void *) ah;
164 154
155 hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
165 msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, 156 msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
166 recv_wc->wc->pkey_index, 1, 157 recv_wc->wc->pkey_index, 1,
167 IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, 158 hdr_len, 0, GFP_KERNEL);
168 GFP_KERNEL);
169 if (IS_ERR(msg)) 159 if (IS_ERR(msg))
170 ib_destroy_ah(ah); 160 ib_destroy_ah(ah);
171 else 161 else
@@ -195,8 +185,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
195 return; 185 return;
196 186
197 rmpp_mad = msg->mad; 187 rmpp_mad = msg->mad;
198 memcpy(rmpp_mad, recv_wc->recv_buf.mad, 188 memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
199 data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
200 189
201 rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; 190 rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
202 rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; 191 rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION;
@@ -408,7 +397,7 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
408 397
409 rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; 398 rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad;
410 399
411 hdr_size = data_offset(rmpp_mad->mad_hdr.mgmt_class); 400 hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
412 data_size = sizeof(struct ib_rmpp_mad) - hdr_size; 401 data_size = sizeof(struct ib_rmpp_mad) - hdr_size;
413 pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); 402 pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
414 if (pad > IB_MGMT_RMPP_DATA || pad < 0) 403 if (pad > IB_MGMT_RMPP_DATA || pad < 0)
@@ -433,44 +422,6 @@ static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
433 return rmpp_wc; 422 return rmpp_wc;
434} 423}
435 424
436void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf)
437{
438 struct ib_mad_recv_buf *seg_buf;
439 struct ib_rmpp_mad *rmpp_mad;
440 void *data;
441 int size, len, offset;
442 u8 flags;
443
444 len = mad_recv_wc->mad_len;
445 if (len <= sizeof(struct ib_mad)) {
446 memcpy(buf, mad_recv_wc->recv_buf.mad, len);
447 return;
448 }
449
450 offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
451
452 list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) {
453 rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad;
454 flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr);
455
456 if (flags & IB_MGMT_RMPP_FLAG_FIRST) {
457 data = rmpp_mad;
458 size = sizeof(*rmpp_mad);
459 } else {
460 data = (void *) rmpp_mad + offset;
461 if (flags & IB_MGMT_RMPP_FLAG_LAST)
462 size = len;
463 else
464 size = sizeof(*rmpp_mad) - offset;
465 }
466
467 memcpy(buf, data, size);
468 len -= size;
469 buf += size;
470 }
471}
472EXPORT_SYMBOL(ib_coalesce_recv_mad);
473
474static struct ib_mad_recv_wc * 425static struct ib_mad_recv_wc *
475continue_rmpp(struct ib_mad_agent_private *agent, 426continue_rmpp(struct ib_mad_agent_private *agent,
476 struct ib_mad_recv_wc *mad_recv_wc) 427 struct ib_mad_recv_wc *mad_recv_wc)
@@ -570,66 +521,49 @@ start_rmpp(struct ib_mad_agent_private *agent,
570 return mad_recv_wc; 521 return mad_recv_wc;
571} 522}
572 523
573static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
574{
575 return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset +
576 (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) *
577 (mad_send_wr->seg_num - 1);
578}
579
580static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) 524static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
581{ 525{
582 struct ib_rmpp_mad *rmpp_mad; 526 struct ib_rmpp_mad *rmpp_mad;
583 int timeout; 527 int timeout;
584 u32 paylen; 528 u32 paylen = 0;
585 529
586 rmpp_mad = mad_send_wr->send_buf.mad; 530 rmpp_mad = mad_send_wr->send_buf.mad;
587 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); 531 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
588 rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); 532 rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num);
589 533
590 if (mad_send_wr->seg_num == 1) { 534 if (mad_send_wr->seg_num == 1) {
591 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; 535 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
592 paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA - 536 paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA -
593 mad_send_wr->pad; 537 mad_send_wr->pad;
594 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
595 mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
596 } else {
597 mad_send_wr->send_wr.num_sge = 2;
598 mad_send_wr->sg_list[0].length = mad_send_wr->data_offset;
599 mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr);
600 mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) -
601 mad_send_wr->data_offset;
602 mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey;
603 rmpp_mad->rmpp_hdr.paylen_newwin = 0;
604 } 538 }
605 539
606 if (mad_send_wr->seg_num == mad_send_wr->total_seg) { 540 if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) {
607 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; 541 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
608 paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad; 542 paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
609 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
610 } 543 }
544 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
611 545
612 /* 2 seconds for an ACK until we can find the packet lifetime */ 546 /* 2 seconds for an ACK until we can find the packet lifetime */
613 timeout = mad_send_wr->send_buf.timeout_ms; 547 timeout = mad_send_wr->send_buf.timeout_ms;
614 if (!timeout || timeout > 2000) 548 if (!timeout || timeout > 2000)
615 mad_send_wr->timeout = msecs_to_jiffies(2000); 549 mad_send_wr->timeout = msecs_to_jiffies(2000);
616 mad_send_wr->seg_num++; 550
617 return ib_send_mad(mad_send_wr); 551 return ib_send_mad(mad_send_wr);
618} 552}
619 553
620static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, 554static void abort_send(struct ib_mad_agent_private *agent,
621 u8 rmpp_status) 555 struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status)
622{ 556{
623 struct ib_mad_send_wr_private *mad_send_wr; 557 struct ib_mad_send_wr_private *mad_send_wr;
624 struct ib_mad_send_wc wc; 558 struct ib_mad_send_wc wc;
625 unsigned long flags; 559 unsigned long flags;
626 560
627 spin_lock_irqsave(&agent->lock, flags); 561 spin_lock_irqsave(&agent->lock, flags);
628 mad_send_wr = ib_find_send_mad(agent, tid); 562 mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
629 if (!mad_send_wr) 563 if (!mad_send_wr)
630 goto out; /* Unmatched send */ 564 goto out; /* Unmatched send */
631 565
632 if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || 566 if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
633 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) 567 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
634 goto out; /* Send is already done */ 568 goto out; /* Send is already done */
635 569
@@ -645,6 +579,18 @@ out:
645 spin_unlock_irqrestore(&agent->lock, flags); 579 spin_unlock_irqrestore(&agent->lock, flags);
646} 580}
647 581
582static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
583 int seg_num)
584{
585 struct list_head *list;
586
587 wr->last_ack = seg_num;
588 list = &wr->last_ack_seg->list;
589 list_for_each_entry(wr->last_ack_seg, list, list)
590 if (wr->last_ack_seg->num == seg_num)
591 break;
592}
593
648static void process_rmpp_ack(struct ib_mad_agent_private *agent, 594static void process_rmpp_ack(struct ib_mad_agent_private *agent,
649 struct ib_mad_recv_wc *mad_recv_wc) 595 struct ib_mad_recv_wc *mad_recv_wc)
650{ 596{
@@ -655,8 +601,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
655 601
656 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; 602 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
657 if (rmpp_mad->rmpp_hdr.rmpp_status) { 603 if (rmpp_mad->rmpp_hdr.rmpp_status) {
658 abort_send(agent, rmpp_mad->mad_hdr.tid, 604 abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
659 IB_MGMT_RMPP_STATUS_BAD_STATUS);
660 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 605 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
661 return; 606 return;
662 } 607 }
@@ -664,25 +609,24 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
664 seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); 609 seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
665 newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); 610 newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
666 if (newwin < seg_num) { 611 if (newwin < seg_num) {
667 abort_send(agent, rmpp_mad->mad_hdr.tid, 612 abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
668 IB_MGMT_RMPP_STATUS_W2S);
669 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); 613 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
670 return; 614 return;
671 } 615 }
672 616
673 spin_lock_irqsave(&agent->lock, flags); 617 spin_lock_irqsave(&agent->lock, flags);
674 mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid); 618 mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
675 if (!mad_send_wr) 619 if (!mad_send_wr)
676 goto out; /* Unmatched ACK */ 620 goto out; /* Unmatched ACK */
677 621
678 if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || 622 if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
679 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) 623 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
680 goto out; /* Send is already done */ 624 goto out; /* Send is already done */
681 625
682 if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) { 626 if (seg_num > mad_send_wr->send_buf.seg_count ||
627 seg_num > mad_send_wr->newwin) {
683 spin_unlock_irqrestore(&agent->lock, flags); 628 spin_unlock_irqrestore(&agent->lock, flags);
684 abort_send(agent, rmpp_mad->mad_hdr.tid, 629 abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
685 IB_MGMT_RMPP_STATUS_S2B);
686 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); 630 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
687 return; 631 return;
688 } 632 }
@@ -691,11 +635,11 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
691 goto out; /* Old ACK */ 635 goto out; /* Old ACK */
692 636
693 if (seg_num > mad_send_wr->last_ack) { 637 if (seg_num > mad_send_wr->last_ack) {
694 mad_send_wr->last_ack = seg_num; 638 adjust_last_ack(mad_send_wr, seg_num);
695 mad_send_wr->retries = mad_send_wr->send_buf.retries; 639 mad_send_wr->retries = mad_send_wr->send_buf.retries;
696 } 640 }
697 mad_send_wr->newwin = newwin; 641 mad_send_wr->newwin = newwin;
698 if (mad_send_wr->last_ack == mad_send_wr->total_seg) { 642 if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
699 /* If no response is expected, the ACK completes the send */ 643 /* If no response is expected, the ACK completes the send */
700 if (!mad_send_wr->send_buf.timeout_ms) { 644 if (!mad_send_wr->send_buf.timeout_ms) {
701 struct ib_mad_send_wc wc; 645 struct ib_mad_send_wc wc;
@@ -714,7 +658,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
714 mad_send_wr->send_buf.timeout_ms); 658 mad_send_wr->send_buf.timeout_ms);
715 } else if (mad_send_wr->refcount == 1 && 659 } else if (mad_send_wr->refcount == 1 &&
716 mad_send_wr->seg_num < mad_send_wr->newwin && 660 mad_send_wr->seg_num < mad_send_wr->newwin &&
717 mad_send_wr->seg_num <= mad_send_wr->total_seg) { 661 mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
718 /* Send failure will just result in a timeout/retry */ 662 /* Send failure will just result in a timeout/retry */
719 ret = send_next_seg(mad_send_wr); 663 ret = send_next_seg(mad_send_wr);
720 if (ret) 664 if (ret)
@@ -770,12 +714,10 @@ static void process_rmpp_stop(struct ib_mad_agent_private *agent,
770 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; 714 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
771 715
772 if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { 716 if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) {
773 abort_send(agent, rmpp_mad->mad_hdr.tid, 717 abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
774 IB_MGMT_RMPP_STATUS_BAD_STATUS);
775 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 718 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
776 } else 719 } else
777 abort_send(agent, rmpp_mad->mad_hdr.tid, 720 abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
778 rmpp_mad->rmpp_hdr.rmpp_status);
779} 721}
780 722
781static void process_rmpp_abort(struct ib_mad_agent_private *agent, 723static void process_rmpp_abort(struct ib_mad_agent_private *agent,
@@ -787,12 +729,10 @@ static void process_rmpp_abort(struct ib_mad_agent_private *agent,
787 729
788 if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || 730 if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN ||
789 rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { 731 rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) {
790 abort_send(agent, rmpp_mad->mad_hdr.tid, 732 abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
791 IB_MGMT_RMPP_STATUS_BAD_STATUS);
792 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 733 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
793 } else 734 } else
794 abort_send(agent, rmpp_mad->mad_hdr.tid, 735 abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
795 rmpp_mad->rmpp_hdr.rmpp_status);
796} 736}
797 737
798struct ib_mad_recv_wc * 738struct ib_mad_recv_wc *
@@ -806,8 +746,7 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
806 return mad_recv_wc; 746 return mad_recv_wc;
807 747
808 if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { 748 if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) {
809 abort_send(agent, rmpp_mad->mad_hdr.tid, 749 abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
810 IB_MGMT_RMPP_STATUS_UNV);
811 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); 750 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
812 goto out; 751 goto out;
813 } 752 }
@@ -825,8 +764,7 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
825 process_rmpp_abort(agent, mad_recv_wc); 764 process_rmpp_abort(agent, mad_recv_wc);
826 break; 765 break;
827 default: 766 default:
828 abort_send(agent, rmpp_mad->mad_hdr.tid, 767 abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
829 IB_MGMT_RMPP_STATUS_BADT);
830 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); 768 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
831 break; 769 break;
832 } 770 }
@@ -838,31 +776,19 @@ out:
838int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) 776int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
839{ 777{
840 struct ib_rmpp_mad *rmpp_mad; 778 struct ib_rmpp_mad *rmpp_mad;
841 int i, total_len, ret; 779 int ret;
842 780
843 rmpp_mad = mad_send_wr->send_buf.mad; 781 rmpp_mad = mad_send_wr->send_buf.mad;
844 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & 782 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
845 IB_MGMT_RMPP_FLAG_ACTIVE)) 783 IB_MGMT_RMPP_FLAG_ACTIVE))
846 return IB_RMPP_RESULT_UNHANDLED; 784 return IB_RMPP_RESULT_UNHANDLED;
847 785
848 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) 786 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
787 mad_send_wr->seg_num = 1;
849 return IB_RMPP_RESULT_INTERNAL; 788 return IB_RMPP_RESULT_INTERNAL;
789 }
850 790
851 if (mad_send_wr->send_wr.num_sge > 1)
852 return -EINVAL; /* TODO: support num_sge > 1 */
853
854 mad_send_wr->seg_num = 1;
855 mad_send_wr->newwin = 1; 791 mad_send_wr->newwin = 1;
856 mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class);
857
858 total_len = 0;
859 for (i = 0; i < mad_send_wr->send_wr.num_sge; i++)
860 total_len += mad_send_wr->send_wr.sg_list[i].length;
861
862 mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
863 (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
864 mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
865 be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
866 792
867 /* We need to wait for the final ACK even if there isn't a response */ 793 /* We need to wait for the final ACK even if there isn't a response */
868 mad_send_wr->refcount += (mad_send_wr->timeout == 0); 794 mad_send_wr->refcount += (mad_send_wr->timeout == 0);
@@ -893,14 +819,14 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
893 if (!mad_send_wr->timeout) 819 if (!mad_send_wr->timeout)
894 return IB_RMPP_RESULT_PROCESSED; /* Response received */ 820 return IB_RMPP_RESULT_PROCESSED; /* Response received */
895 821
896 if (mad_send_wr->last_ack == mad_send_wr->total_seg) { 822 if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
897 mad_send_wr->timeout = 823 mad_send_wr->timeout =
898 msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); 824 msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
899 return IB_RMPP_RESULT_PROCESSED; /* Send done */ 825 return IB_RMPP_RESULT_PROCESSED; /* Send done */
900 } 826 }
901 827
902 if (mad_send_wr->seg_num > mad_send_wr->newwin || 828 if (mad_send_wr->seg_num == mad_send_wr->newwin ||
903 mad_send_wr->seg_num > mad_send_wr->total_seg) 829 mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count)
904 return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ 830 return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */
905 831
906 ret = send_next_seg(mad_send_wr); 832 ret = send_next_seg(mad_send_wr);
@@ -921,10 +847,12 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
921 IB_MGMT_RMPP_FLAG_ACTIVE)) 847 IB_MGMT_RMPP_FLAG_ACTIVE))
922 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ 848 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
923 849
924 if (mad_send_wr->last_ack == mad_send_wr->total_seg) 850 if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count)
925 return IB_RMPP_RESULT_PROCESSED; 851 return IB_RMPP_RESULT_PROCESSED;
926 852
927 mad_send_wr->seg_num = mad_send_wr->last_ack + 1; 853 mad_send_wr->seg_num = mad_send_wr->last_ack;
854 mad_send_wr->cur_seg = mad_send_wr->last_ack_seg;
855
928 ret = send_next_seg(mad_send_wr); 856 ret = send_next_seg(mad_send_wr);
929 if (ret) 857 if (ret)
930 return IB_RMPP_RESULT_PROCESSED; 858 return IB_RMPP_RESULT_PROCESSED;
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 2b3c40198f81..3011bfd86dc5 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -49,19 +49,16 @@ extern int smi_check_forward_dr_smp(struct ib_smp *smp);
49extern int smi_handle_dr_smp_send(struct ib_smp *smp, 49extern int smi_handle_dr_smp_send(struct ib_smp *smp,
50 u8 node_type, 50 u8 node_type,
51 int port_num); 51 int port_num);
52extern int smi_check_local_dr_smp(struct ib_smp *smp,
53 struct ib_device *device,
54 int port_num);
55 52
56/* 53/*
57 * Return 1 if the SMP should be handled by the local SMA/SM via process_mad 54 * Return 1 if the SMP should be handled by the local SMA/SM via process_mad
58 */ 55 */
59static inline int smi_check_local_smp(struct ib_mad_agent *mad_agent, 56static inline int smi_check_local_smp(struct ib_smp *smp,
60 struct ib_smp *smp) 57 struct ib_device *device)
61{ 58{
62 /* C14-9:3 -- We're at the end of the DR segment of path */ 59 /* C14-9:3 -- We're at the end of the DR segment of path */
63 /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ 60 /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
64 return ((mad_agent->device->process_mad && 61 return ((device->process_mad &&
65 !ib_get_smp_direction(smp) && 62 !ib_get_smp_direction(smp) &&
66 (smp->hop_ptr == smp->hop_cnt + 1))); 63 (smp->hop_ptr == smp->hop_cnt + 1)));
67} 64}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 5982d687a000..15121cb5a1f6 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -112,7 +112,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
112 return ret; 112 return ret;
113 113
114 return sprintf(buf, "%d: %s\n", attr.state, 114 return sprintf(buf, "%d: %s\n", attr.state,
115 attr.state >= 0 && attr.state <= ARRAY_SIZE(state_name) ? 115 attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
116 state_name[attr.state] : "UNKNOWN"); 116 state_name[attr.state] : "UNKNOWN");
117} 117}
118 118
@@ -472,8 +472,10 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
472 goto err; 472 goto err;
473 473
474 if (snprintf(element->name, sizeof(element->name), 474 if (snprintf(element->name, sizeof(element->name),
475 "%d", i) >= sizeof(element->name)) 475 "%d", i) >= sizeof(element->name)) {
476 kfree(element);
476 goto err; 477 goto err;
478 }
477 479
478 element->attr.attr.name = element->name; 480 element->attr.attr.name = element->name;
479 element->attr.attr.mode = S_IRUGO; 481 element->attr.attr.mode = S_IRUGO;
@@ -628,14 +630,42 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf)
628 be16_to_cpu(((__be16 *) &dev->node_guid)[3])); 630 be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
629} 631}
630 632
633static ssize_t show_node_desc(struct class_device *cdev, char *buf)
634{
635 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
636
637 return sprintf(buf, "%.64s\n", dev->node_desc);
638}
639
640static ssize_t set_node_desc(struct class_device *cdev, const char *buf,
641 size_t count)
642{
643 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
644 struct ib_device_modify desc = {};
645 int ret;
646
647 if (!dev->modify_device)
648 return -EIO;
649
650 memcpy(desc.node_desc, buf, min_t(int, count, 64));
651 ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
652 if (ret)
653 return ret;
654
655 return count;
656}
657
631static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); 658static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
632static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); 659static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
633static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); 660static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
661static CLASS_DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc,
662 set_node_desc);
634 663
635static struct class_device_attribute *ib_class_attributes[] = { 664static struct class_device_attribute *ib_class_attributes[] = {
636 &class_device_attr_node_type, 665 &class_device_attr_node_type,
637 &class_device_attr_sys_image_guid, 666 &class_device_attr_sys_image_guid,
638 &class_device_attr_node_guid 667 &class_device_attr_node_guid,
668 &class_device_attr_node_desc
639}; 669};
640 670
641static struct class ib_class = { 671static struct class ib_class = {
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c908de8db5a9..afe70a549c2f 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -31,7 +31,7 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 * 33 *
34 * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $ 34 * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 35 */
36 36
37#include <linux/module.h> 37#include <linux/module.h>
@@ -121,6 +121,7 @@ struct ib_umad_file {
121 121
122struct ib_umad_packet { 122struct ib_umad_packet {
123 struct ib_mad_send_buf *msg; 123 struct ib_mad_send_buf *msg;
124 struct ib_mad_recv_wc *recv_wc;
124 struct list_head list; 125 struct list_head list;
125 int length; 126 int length;
126 struct ib_user_mad mad; 127 struct ib_user_mad mad;
@@ -180,27 +181,17 @@ static void send_handler(struct ib_mad_agent *agent,
180 struct ib_mad_send_wc *send_wc) 181 struct ib_mad_send_wc *send_wc)
181{ 182{
182 struct ib_umad_file *file = agent->context; 183 struct ib_umad_file *file = agent->context;
183 struct ib_umad_packet *timeout;
184 struct ib_umad_packet *packet = send_wc->send_buf->context[0]; 184 struct ib_umad_packet *packet = send_wc->send_buf->context[0];
185 185
186 ib_destroy_ah(packet->msg->ah); 186 ib_destroy_ah(packet->msg->ah);
187 ib_free_send_mad(packet->msg); 187 ib_free_send_mad(packet->msg);
188 188
189 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { 189 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
190 timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); 190 packet->length = IB_MGMT_MAD_HDR;
191 if (!timeout) 191 packet->mad.hdr.status = ETIMEDOUT;
192 goto out; 192 if (!queue_packet(file, agent, packet))
193 193 return;
194 timeout->length = IB_MGMT_MAD_HDR;
195 timeout->mad.hdr.id = packet->mad.hdr.id;
196 timeout->mad.hdr.status = ETIMEDOUT;
197 memcpy(timeout->mad.data, packet->mad.data,
198 sizeof (struct ib_mad_hdr));
199
200 if (queue_packet(file, agent, timeout))
201 kfree(timeout);
202 } 194 }
203out:
204 kfree(packet); 195 kfree(packet);
205} 196}
206 197
@@ -209,22 +200,20 @@ static void recv_handler(struct ib_mad_agent *agent,
209{ 200{
210 struct ib_umad_file *file = agent->context; 201 struct ib_umad_file *file = agent->context;
211 struct ib_umad_packet *packet; 202 struct ib_umad_packet *packet;
212 int length;
213 203
214 if (mad_recv_wc->wc->status != IB_WC_SUCCESS) 204 if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
215 goto out; 205 goto err1;
216 206
217 length = mad_recv_wc->mad_len; 207 packet = kzalloc(sizeof *packet, GFP_KERNEL);
218 packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
219 if (!packet) 208 if (!packet)
220 goto out; 209 goto err1;
221 210
222 packet->length = length; 211 packet->length = mad_recv_wc->mad_len;
223 212 packet->recv_wc = mad_recv_wc;
224 ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
225 213
226 packet->mad.hdr.status = 0; 214 packet->mad.hdr.status = 0;
227 packet->mad.hdr.length = length + sizeof (struct ib_user_mad); 215 packet->mad.hdr.length = sizeof (struct ib_user_mad) +
216 mad_recv_wc->mad_len;
228 packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); 217 packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
229 packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); 218 packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
230 packet->mad.hdr.sl = mad_recv_wc->wc->sl; 219 packet->mad.hdr.sl = mad_recv_wc->wc->sl;
@@ -240,12 +229,79 @@ static void recv_handler(struct ib_mad_agent *agent,
240 } 229 }
241 230
242 if (queue_packet(file, agent, packet)) 231 if (queue_packet(file, agent, packet))
243 kfree(packet); 232 goto err2;
233 return;
244 234
245out: 235err2:
236 kfree(packet);
237err1:
246 ib_free_recv_mad(mad_recv_wc); 238 ib_free_recv_mad(mad_recv_wc);
247} 239}
248 240
241static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
242 size_t count)
243{
244 struct ib_mad_recv_buf *recv_buf;
245 int left, seg_payload, offset, max_seg_payload;
246
247 /* We need enough room to copy the first (or only) MAD segment. */
248 recv_buf = &packet->recv_wc->recv_buf;
249 if ((packet->length <= sizeof (*recv_buf->mad) &&
250 count < sizeof (packet->mad) + packet->length) ||
251 (packet->length > sizeof (*recv_buf->mad) &&
252 count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
253 return -EINVAL;
254
255 if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
256 return -EFAULT;
257
258 buf += sizeof (packet->mad);
259 seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
260 if (copy_to_user(buf, recv_buf->mad, seg_payload))
261 return -EFAULT;
262
263 if (seg_payload < packet->length) {
264 /*
265 * Multipacket RMPP MAD message. Copy remainder of message.
266 * Note that last segment may have a shorter payload.
267 */
268 if (count < sizeof (packet->mad) + packet->length) {
269 /*
270 * The buffer is too small, return the first RMPP segment,
271 * which includes the RMPP message length.
272 */
273 return -ENOSPC;
274 }
275 offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class);
276 max_seg_payload = sizeof (struct ib_mad) - offset;
277
278 for (left = packet->length - seg_payload, buf += seg_payload;
279 left; left -= seg_payload, buf += seg_payload) {
280 recv_buf = container_of(recv_buf->list.next,
281 struct ib_mad_recv_buf, list);
282 seg_payload = min(left, max_seg_payload);
283 if (copy_to_user(buf, ((void *) recv_buf->mad) + offset,
284 seg_payload))
285 return -EFAULT;
286 }
287 }
288 return sizeof (packet->mad) + packet->length;
289}
290
291static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
292 size_t count)
293{
294 ssize_t size = sizeof (packet->mad) + packet->length;
295
296 if (count < size)
297 return -EINVAL;
298
299 if (copy_to_user(buf, &packet->mad, size))
300 return -EFAULT;
301
302 return size;
303}
304
249static ssize_t ib_umad_read(struct file *filp, char __user *buf, 305static ssize_t ib_umad_read(struct file *filp, char __user *buf,
250 size_t count, loff_t *pos) 306 size_t count, loff_t *pos)
251{ 307{
@@ -253,7 +309,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
253 struct ib_umad_packet *packet; 309 struct ib_umad_packet *packet;
254 ssize_t ret; 310 ssize_t ret;
255 311
256 if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad)) 312 if (count < sizeof (struct ib_user_mad))
257 return -EINVAL; 313 return -EINVAL;
258 314
259 spin_lock_irq(&file->recv_lock); 315 spin_lock_irq(&file->recv_lock);
@@ -276,28 +332,44 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
276 332
277 spin_unlock_irq(&file->recv_lock); 333 spin_unlock_irq(&file->recv_lock);
278 334
279 if (count < packet->length + sizeof (struct ib_user_mad)) { 335 if (packet->recv_wc)
280 /* Return length needed (and first RMPP segment) if too small */ 336 ret = copy_recv_mad(buf, packet, count);
281 if (copy_to_user(buf, &packet->mad,
282 sizeof (struct ib_user_mad) + sizeof (struct ib_mad)))
283 ret = -EFAULT;
284 else
285 ret = -ENOSPC;
286 } else if (copy_to_user(buf, &packet->mad,
287 packet->length + sizeof (struct ib_user_mad)))
288 ret = -EFAULT;
289 else 337 else
290 ret = packet->length + sizeof (struct ib_user_mad); 338 ret = copy_send_mad(buf, packet, count);
339
291 if (ret < 0) { 340 if (ret < 0) {
292 /* Requeue packet */ 341 /* Requeue packet */
293 spin_lock_irq(&file->recv_lock); 342 spin_lock_irq(&file->recv_lock);
294 list_add(&packet->list, &file->recv_list); 343 list_add(&packet->list, &file->recv_list);
295 spin_unlock_irq(&file->recv_lock); 344 spin_unlock_irq(&file->recv_lock);
296 } else 345 } else {
346 if (packet->recv_wc)
347 ib_free_recv_mad(packet->recv_wc);
297 kfree(packet); 348 kfree(packet);
349 }
298 return ret; 350 return ret;
299} 351}
300 352
353static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf)
354{
355 int left, seg;
356
357 /* Copy class specific header */
358 if ((msg->hdr_len > IB_MGMT_RMPP_HDR) &&
359 copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR,
360 msg->hdr_len - IB_MGMT_RMPP_HDR))
361 return -EFAULT;
362
363 /* All headers are in place. Copy data segments. */
364 for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0;
365 seg++, left -= msg->seg_size, buf += msg->seg_size) {
366 if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf,
367 min(left, msg->seg_size)))
368 return -EFAULT;
369 }
370 return 0;
371}
372
301static ssize_t ib_umad_write(struct file *filp, const char __user *buf, 373static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
302 size_t count, loff_t *pos) 374 size_t count, loff_t *pos)
303{ 375{
@@ -309,14 +381,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
309 struct ib_rmpp_mad *rmpp_mad; 381 struct ib_rmpp_mad *rmpp_mad;
310 u8 method; 382 u8 method;
311 __be64 *tid; 383 __be64 *tid;
312 int ret, length, hdr_len, copy_offset; 384 int ret, data_len, hdr_len, copy_offset, rmpp_active;
313 int rmpp_active, has_rmpp_header;
314 385
315 if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) 386 if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
316 return -EINVAL; 387 return -EINVAL;
317 388
318 length = count - sizeof (struct ib_user_mad); 389 packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
319 packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
320 if (!packet) 390 if (!packet)
321 return -ENOMEM; 391 return -ENOMEM;
322 392
@@ -360,38 +430,21 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
360 } 430 }
361 431
362 rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; 432 rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
363 if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { 433 hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
364 hdr_len = IB_MGMT_SA_HDR; 434 if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) {
365 copy_offset = IB_MGMT_RMPP_HDR;
366 has_rmpp_header = 1;
367 } else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START &&
368 rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) {
369 hdr_len = IB_MGMT_VENDOR_HDR;
370 copy_offset = IB_MGMT_RMPP_HDR;
371 has_rmpp_header = 1;
372 } else {
373 hdr_len = IB_MGMT_MAD_HDR;
374 copy_offset = IB_MGMT_MAD_HDR; 435 copy_offset = IB_MGMT_MAD_HDR;
375 has_rmpp_header = 0; 436 rmpp_active = 0;
376 } 437 } else {
377 438 copy_offset = IB_MGMT_RMPP_HDR;
378 if (has_rmpp_header)
379 rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & 439 rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
380 IB_MGMT_RMPP_FLAG_ACTIVE; 440 IB_MGMT_RMPP_FLAG_ACTIVE;
381 else
382 rmpp_active = 0;
383
384 /* Validate that the management class can support RMPP */
385 if (rmpp_active && !agent->rmpp_version) {
386 ret = -EINVAL;
387 goto err_ah;
388 } 441 }
389 442
443 data_len = count - sizeof (struct ib_user_mad) - hdr_len;
390 packet->msg = ib_create_send_mad(agent, 444 packet->msg = ib_create_send_mad(agent,
391 be32_to_cpu(packet->mad.hdr.qpn), 445 be32_to_cpu(packet->mad.hdr.qpn),
392 0, rmpp_active, 446 0, rmpp_active, hdr_len,
393 hdr_len, length - hdr_len, 447 data_len, GFP_KERNEL);
394 GFP_KERNEL);
395 if (IS_ERR(packet->msg)) { 448 if (IS_ERR(packet->msg)) {
396 ret = PTR_ERR(packet->msg); 449 ret = PTR_ERR(packet->msg);
397 goto err_ah; 450 goto err_ah;
@@ -402,14 +455,21 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
402 packet->msg->retries = packet->mad.hdr.retries; 455 packet->msg->retries = packet->mad.hdr.retries;
403 packet->msg->context[0] = packet; 456 packet->msg->context[0] = packet;
404 457
405 /* Copy MAD headers (RMPP header in place) */ 458 /* Copy MAD header. Any RMPP header is already in place. */
406 memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); 459 memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
407 /* Now, copy rest of message from user into send buffer */ 460 buf += sizeof (struct ib_user_mad);
408 if (copy_from_user(packet->msg->mad + copy_offset, 461
409 buf + sizeof (struct ib_user_mad) + copy_offset, 462 if (!rmpp_active) {
410 length - copy_offset)) { 463 if (copy_from_user(packet->msg->mad + copy_offset,
411 ret = -EFAULT; 464 buf + copy_offset,
412 goto err_msg; 465 hdr_len + data_len - copy_offset)) {
466 ret = -EFAULT;
467 goto err_msg;
468 }
469 } else {
470 ret = copy_rmpp_mad(packet->msg, buf);
471 if (ret)
472 goto err_msg;
413 } 473 }
414 474
415 /* 475 /*
@@ -433,18 +493,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
433 goto err_msg; 493 goto err_msg;
434 494
435 up_read(&file->port->mutex); 495 up_read(&file->port->mutex);
436
437 return count; 496 return count;
438 497
439err_msg: 498err_msg:
440 ib_free_send_mad(packet->msg); 499 ib_free_send_mad(packet->msg);
441
442err_ah: 500err_ah:
443 ib_destroy_ah(ah); 501 ib_destroy_ah(ah);
444
445err_up: 502err_up:
446 up_read(&file->port->mutex); 503 up_read(&file->port->mutex);
447
448err: 504err:
449 kfree(packet); 505 kfree(packet);
450 return ret; 506 return ret;
@@ -627,8 +683,11 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
627 already_dead = file->agents_dead; 683 already_dead = file->agents_dead;
628 file->agents_dead = 1; 684 file->agents_dead = 1;
629 685
630 list_for_each_entry_safe(packet, tmp, &file->recv_list, list) 686 list_for_each_entry_safe(packet, tmp, &file->recv_list, list) {
687 if (packet->recv_wc)
688 ib_free_recv_mad(packet->recv_wc);
631 kfree(packet); 689 kfree(packet);
690 }
632 691
633 list_del(&file->port_list); 692 list_del(&file->port_list);
634 693
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index f7eecbc6af6c..3372d67ff139 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
@@ -178,10 +178,12 @@ IB_UVERBS_DECLARE_CMD(reg_mr);
178IB_UVERBS_DECLARE_CMD(dereg_mr); 178IB_UVERBS_DECLARE_CMD(dereg_mr);
179IB_UVERBS_DECLARE_CMD(create_comp_channel); 179IB_UVERBS_DECLARE_CMD(create_comp_channel);
180IB_UVERBS_DECLARE_CMD(create_cq); 180IB_UVERBS_DECLARE_CMD(create_cq);
181IB_UVERBS_DECLARE_CMD(resize_cq);
181IB_UVERBS_DECLARE_CMD(poll_cq); 182IB_UVERBS_DECLARE_CMD(poll_cq);
182IB_UVERBS_DECLARE_CMD(req_notify_cq); 183IB_UVERBS_DECLARE_CMD(req_notify_cq);
183IB_UVERBS_DECLARE_CMD(destroy_cq); 184IB_UVERBS_DECLARE_CMD(destroy_cq);
184IB_UVERBS_DECLARE_CMD(create_qp); 185IB_UVERBS_DECLARE_CMD(create_qp);
186IB_UVERBS_DECLARE_CMD(query_qp);
185IB_UVERBS_DECLARE_CMD(modify_qp); 187IB_UVERBS_DECLARE_CMD(modify_qp);
186IB_UVERBS_DECLARE_CMD(destroy_qp); 188IB_UVERBS_DECLARE_CMD(destroy_qp);
187IB_UVERBS_DECLARE_CMD(post_send); 189IB_UVERBS_DECLARE_CMD(post_send);
@@ -193,6 +195,7 @@ IB_UVERBS_DECLARE_CMD(attach_mcast);
193IB_UVERBS_DECLARE_CMD(detach_mcast); 195IB_UVERBS_DECLARE_CMD(detach_mcast);
194IB_UVERBS_DECLARE_CMD(create_srq); 196IB_UVERBS_DECLARE_CMD(create_srq);
195IB_UVERBS_DECLARE_CMD(modify_srq); 197IB_UVERBS_DECLARE_CMD(modify_srq);
198IB_UVERBS_DECLARE_CMD(query_srq);
196IB_UVERBS_DECLARE_CMD(destroy_srq); 199IB_UVERBS_DECLARE_CMD(destroy_srq);
197 200
198#endif /* UVERBS_H */ 201#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 407b6284d7d5..9f69bd48eb1b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
5 * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
5 * 6 *
6 * This software is available to you under a choice of one of two 7 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU 8 * licenses. You may choose to be licensed under the terms of the GNU
@@ -675,6 +676,46 @@ err:
675 return ret; 676 return ret;
676} 677}
677 678
679ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
680 const char __user *buf, int in_len,
681 int out_len)
682{
683 struct ib_uverbs_resize_cq cmd;
684 struct ib_uverbs_resize_cq_resp resp;
685 struct ib_udata udata;
686 struct ib_cq *cq;
687 int ret = -EINVAL;
688
689 if (copy_from_user(&cmd, buf, sizeof cmd))
690 return -EFAULT;
691
692 INIT_UDATA(&udata, buf + sizeof cmd,
693 (unsigned long) cmd.response + sizeof resp,
694 in_len - sizeof cmd, out_len - sizeof resp);
695
696 mutex_lock(&ib_uverbs_idr_mutex);
697
698 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
699 if (!cq || cq->uobject->context != file->ucontext || !cq->device->resize_cq)
700 goto out;
701
702 ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
703 if (ret)
704 goto out;
705
706 memset(&resp, 0, sizeof resp);
707 resp.cqe = cq->cqe;
708
709 if (copy_to_user((void __user *) (unsigned long) cmd.response,
710 &resp, sizeof resp))
711 ret = -EFAULT;
712
713out:
714 mutex_unlock(&ib_uverbs_idr_mutex);
715
716 return ret ? ret : in_len;
717}
718
678ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, 719ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
679 const char __user *buf, int in_len, 720 const char __user *buf, int in_len,
680 int out_len) 721 int out_len)
@@ -956,6 +997,106 @@ err_up:
956 return ret; 997 return ret;
957} 998}
958 999
1000ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
1001 const char __user *buf, int in_len,
1002 int out_len)
1003{
1004 struct ib_uverbs_query_qp cmd;
1005 struct ib_uverbs_query_qp_resp resp;
1006 struct ib_qp *qp;
1007 struct ib_qp_attr *attr;
1008 struct ib_qp_init_attr *init_attr;
1009 int ret;
1010
1011 if (copy_from_user(&cmd, buf, sizeof cmd))
1012 return -EFAULT;
1013
1014 attr = kmalloc(sizeof *attr, GFP_KERNEL);
1015 init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
1016 if (!attr || !init_attr) {
1017 ret = -ENOMEM;
1018 goto out;
1019 }
1020
1021 mutex_lock(&ib_uverbs_idr_mutex);
1022
1023 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1024 if (qp && qp->uobject->context == file->ucontext)
1025 ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
1026 else
1027 ret = -EINVAL;
1028
1029 mutex_unlock(&ib_uverbs_idr_mutex);
1030
1031 if (ret)
1032 goto out;
1033
1034 memset(&resp, 0, sizeof resp);
1035
1036 resp.qp_state = attr->qp_state;
1037 resp.cur_qp_state = attr->cur_qp_state;
1038 resp.path_mtu = attr->path_mtu;
1039 resp.path_mig_state = attr->path_mig_state;
1040 resp.qkey = attr->qkey;
1041 resp.rq_psn = attr->rq_psn;
1042 resp.sq_psn = attr->sq_psn;
1043 resp.dest_qp_num = attr->dest_qp_num;
1044 resp.qp_access_flags = attr->qp_access_flags;
1045 resp.pkey_index = attr->pkey_index;
1046 resp.alt_pkey_index = attr->alt_pkey_index;
1047 resp.en_sqd_async_notify = attr->en_sqd_async_notify;
1048 resp.max_rd_atomic = attr->max_rd_atomic;
1049 resp.max_dest_rd_atomic = attr->max_dest_rd_atomic;
1050 resp.min_rnr_timer = attr->min_rnr_timer;
1051 resp.port_num = attr->port_num;
1052 resp.timeout = attr->timeout;
1053 resp.retry_cnt = attr->retry_cnt;
1054 resp.rnr_retry = attr->rnr_retry;
1055 resp.alt_port_num = attr->alt_port_num;
1056 resp.alt_timeout = attr->alt_timeout;
1057
1058 memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
1059 resp.dest.flow_label = attr->ah_attr.grh.flow_label;
1060 resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
1061 resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
1062 resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
1063 resp.dest.dlid = attr->ah_attr.dlid;
1064 resp.dest.sl = attr->ah_attr.sl;
1065 resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
1066 resp.dest.static_rate = attr->ah_attr.static_rate;
1067 resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
1068 resp.dest.port_num = attr->ah_attr.port_num;
1069
1070 memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
1071 resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
1072 resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
1073 resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
1074 resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
1075 resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
1076 resp.alt_dest.sl = attr->alt_ah_attr.sl;
1077 resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
1078 resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
1079 resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
1080 resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
1081
1082 resp.max_send_wr = init_attr->cap.max_send_wr;
1083 resp.max_recv_wr = init_attr->cap.max_recv_wr;
1084 resp.max_send_sge = init_attr->cap.max_send_sge;
1085 resp.max_recv_sge = init_attr->cap.max_recv_sge;
1086 resp.max_inline_data = init_attr->cap.max_inline_data;
1087 resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
1088
1089 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1090 &resp, sizeof resp))
1091 ret = -EFAULT;
1092
1093out:
1094 kfree(attr);
1095 kfree(init_attr);
1096
1097 return ret ? ret : in_len;
1098}
1099
959ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, 1100ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
960 const char __user *buf, int in_len, 1101 const char __user *buf, int in_len,
961 int out_len) 1102 int out_len)
@@ -990,7 +1131,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
990 attr->dest_qp_num = cmd.dest_qp_num; 1131 attr->dest_qp_num = cmd.dest_qp_num;
991 attr->qp_access_flags = cmd.qp_access_flags; 1132 attr->qp_access_flags = cmd.qp_access_flags;
992 attr->pkey_index = cmd.pkey_index; 1133 attr->pkey_index = cmd.pkey_index;
993 attr->alt_pkey_index = cmd.pkey_index; 1134 attr->alt_pkey_index = cmd.alt_pkey_index;
994 attr->en_sqd_async_notify = cmd.en_sqd_async_notify; 1135 attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
995 attr->max_rd_atomic = cmd.max_rd_atomic; 1136 attr->max_rd_atomic = cmd.max_rd_atomic;
996 attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; 1137 attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
@@ -1094,8 +1235,8 @@ out:
1094} 1235}
1095 1236
1096ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, 1237ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1097 const char __user *buf, int in_len, 1238 const char __user *buf, int in_len,
1098 int out_len) 1239 int out_len)
1099{ 1240{
1100 struct ib_uverbs_post_send cmd; 1241 struct ib_uverbs_post_send cmd;
1101 struct ib_uverbs_post_send_resp resp; 1242 struct ib_uverbs_post_send_resp resp;
@@ -1323,8 +1464,8 @@ err:
1323} 1464}
1324 1465
1325ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, 1466ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
1326 const char __user *buf, int in_len, 1467 const char __user *buf, int in_len,
1327 int out_len) 1468 int out_len)
1328{ 1469{
1329 struct ib_uverbs_post_recv cmd; 1470 struct ib_uverbs_post_recv cmd;
1330 struct ib_uverbs_post_recv_resp resp; 1471 struct ib_uverbs_post_recv_resp resp;
@@ -1374,8 +1515,8 @@ out:
1374} 1515}
1375 1516
1376ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, 1517ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
1377 const char __user *buf, int in_len, 1518 const char __user *buf, int in_len,
1378 int out_len) 1519 int out_len)
1379{ 1520{
1380 struct ib_uverbs_post_srq_recv cmd; 1521 struct ib_uverbs_post_srq_recv cmd;
1381 struct ib_uverbs_post_srq_recv_resp resp; 1522 struct ib_uverbs_post_srq_recv_resp resp;
@@ -1723,6 +1864,8 @@ retry:
1723 goto err_destroy; 1864 goto err_destroy;
1724 1865
1725 resp.srq_handle = uobj->uobject.id; 1866 resp.srq_handle = uobj->uobject.id;
1867 resp.max_wr = attr.attr.max_wr;
1868 resp.max_sge = attr.attr.max_sge;
1726 1869
1727 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1870 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1728 &resp, sizeof resp)) { 1871 &resp, sizeof resp)) {
@@ -1783,6 +1926,49 @@ out:
1783 return ret ? ret : in_len; 1926 return ret ? ret : in_len;
1784} 1927}
1785 1928
1929ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
1930 const char __user *buf,
1931 int in_len, int out_len)
1932{
1933 struct ib_uverbs_query_srq cmd;
1934 struct ib_uverbs_query_srq_resp resp;
1935 struct ib_srq_attr attr;
1936 struct ib_srq *srq;
1937 int ret;
1938
1939 if (out_len < sizeof resp)
1940 return -ENOSPC;
1941
1942 if (copy_from_user(&cmd, buf, sizeof cmd))
1943 return -EFAULT;
1944
1945 mutex_lock(&ib_uverbs_idr_mutex);
1946
1947 srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
1948 if (srq && srq->uobject->context == file->ucontext)
1949 ret = ib_query_srq(srq, &attr);
1950 else
1951 ret = -EINVAL;
1952
1953 mutex_unlock(&ib_uverbs_idr_mutex);
1954
1955 if (ret)
1956 goto out;
1957
1958 memset(&resp, 0, sizeof resp);
1959
1960 resp.max_wr = attr.max_wr;
1961 resp.max_sge = attr.max_sge;
1962 resp.srq_limit = attr.srq_limit;
1963
1964 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1965 &resp, sizeof resp))
1966 ret = -EFAULT;
1967
1968out:
1969 return ret ? ret : in_len;
1970}
1971
1786ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, 1972ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
1787 const char __user *buf, int in_len, 1973 const char __user *buf, int in_len,
1788 int out_len) 1974 int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 903f85a4bc0c..ff092a0a94da 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
@@ -91,10 +91,12 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
91 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, 91 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
92 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, 92 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
93 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, 93 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
94 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
94 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, 95 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
95 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, 96 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
96 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, 97 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
97 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, 98 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
99 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
98 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, 100 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
99 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, 101 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
100 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, 102 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
@@ -106,6 +108,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
106 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, 108 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
107 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, 109 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
108 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, 110 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
111 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
109 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, 112 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
110}; 113};
111 114
@@ -461,7 +464,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
461 ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, 464 ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
462 event->event, &uobj->async_list, 465 event->event, &uobj->async_list,
463 &uobj->async_events_reported); 466 &uobj->async_events_reported);
464
465} 467}
466 468
467void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) 469void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c857361be449..cae0845f472a 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -5,7 +5,7 @@
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8 * Copyright (c) 2005 Cisco Systems. All rights reserved. 8 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
9 * 9 *
10 * This software is available to you under a choice of one of two 10 * This software is available to you under a choice of one of two
11 * licenses. You may choose to be licensed under the terms of the GNU 11 * licenses. You may choose to be licensed under the terms of the GNU
@@ -245,6 +245,258 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
245} 245}
246EXPORT_SYMBOL(ib_create_qp); 246EXPORT_SYMBOL(ib_create_qp);
247 247
248static const struct {
249 int valid;
250 enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1];
251 enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1];
252} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
253 [IB_QPS_RESET] = {
254 [IB_QPS_RESET] = { .valid = 1 },
255 [IB_QPS_ERR] = { .valid = 1 },
256 [IB_QPS_INIT] = {
257 .valid = 1,
258 .req_param = {
259 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
260 IB_QP_PORT |
261 IB_QP_QKEY),
262 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
263 IB_QP_PORT |
264 IB_QP_ACCESS_FLAGS),
265 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
266 IB_QP_PORT |
267 IB_QP_ACCESS_FLAGS),
268 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
269 IB_QP_QKEY),
270 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
271 IB_QP_QKEY),
272 }
273 },
274 },
275 [IB_QPS_INIT] = {
276 [IB_QPS_RESET] = { .valid = 1 },
277 [IB_QPS_ERR] = { .valid = 1 },
278 [IB_QPS_INIT] = {
279 .valid = 1,
280 .opt_param = {
281 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
282 IB_QP_PORT |
283 IB_QP_QKEY),
284 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
285 IB_QP_PORT |
286 IB_QP_ACCESS_FLAGS),
287 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
288 IB_QP_PORT |
289 IB_QP_ACCESS_FLAGS),
290 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
291 IB_QP_QKEY),
292 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
293 IB_QP_QKEY),
294 }
295 },
296 [IB_QPS_RTR] = {
297 .valid = 1,
298 .req_param = {
299 [IB_QPT_UC] = (IB_QP_AV |
300 IB_QP_PATH_MTU |
301 IB_QP_DEST_QPN |
302 IB_QP_RQ_PSN),
303 [IB_QPT_RC] = (IB_QP_AV |
304 IB_QP_PATH_MTU |
305 IB_QP_DEST_QPN |
306 IB_QP_RQ_PSN |
307 IB_QP_MAX_DEST_RD_ATOMIC |
308 IB_QP_MIN_RNR_TIMER),
309 },
310 .opt_param = {
311 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
312 IB_QP_QKEY),
313 [IB_QPT_UC] = (IB_QP_ALT_PATH |
314 IB_QP_ACCESS_FLAGS |
315 IB_QP_PKEY_INDEX),
316 [IB_QPT_RC] = (IB_QP_ALT_PATH |
317 IB_QP_ACCESS_FLAGS |
318 IB_QP_PKEY_INDEX),
319 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
320 IB_QP_QKEY),
321 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
322 IB_QP_QKEY),
323 }
324 }
325 },
326 [IB_QPS_RTR] = {
327 [IB_QPS_RESET] = { .valid = 1 },
328 [IB_QPS_ERR] = { .valid = 1 },
329 [IB_QPS_RTS] = {
330 .valid = 1,
331 .req_param = {
332 [IB_QPT_UD] = IB_QP_SQ_PSN,
333 [IB_QPT_UC] = IB_QP_SQ_PSN,
334 [IB_QPT_RC] = (IB_QP_TIMEOUT |
335 IB_QP_RETRY_CNT |
336 IB_QP_RNR_RETRY |
337 IB_QP_SQ_PSN |
338 IB_QP_MAX_QP_RD_ATOMIC),
339 [IB_QPT_SMI] = IB_QP_SQ_PSN,
340 [IB_QPT_GSI] = IB_QP_SQ_PSN,
341 },
342 .opt_param = {
343 [IB_QPT_UD] = (IB_QP_CUR_STATE |
344 IB_QP_QKEY),
345 [IB_QPT_UC] = (IB_QP_CUR_STATE |
346 IB_QP_ALT_PATH |
347 IB_QP_ACCESS_FLAGS |
348 IB_QP_PATH_MIG_STATE),
349 [IB_QPT_RC] = (IB_QP_CUR_STATE |
350 IB_QP_ALT_PATH |
351 IB_QP_ACCESS_FLAGS |
352 IB_QP_MIN_RNR_TIMER |
353 IB_QP_PATH_MIG_STATE),
354 [IB_QPT_SMI] = (IB_QP_CUR_STATE |
355 IB_QP_QKEY),
356 [IB_QPT_GSI] = (IB_QP_CUR_STATE |
357 IB_QP_QKEY),
358 }
359 }
360 },
361 [IB_QPS_RTS] = {
362 [IB_QPS_RESET] = { .valid = 1 },
363 [IB_QPS_ERR] = { .valid = 1 },
364 [IB_QPS_RTS] = {
365 .valid = 1,
366 .opt_param = {
367 [IB_QPT_UD] = (IB_QP_CUR_STATE |
368 IB_QP_QKEY),
369 [IB_QPT_UC] = (IB_QP_CUR_STATE |
370 IB_QP_ACCESS_FLAGS |
371 IB_QP_ALT_PATH |
372 IB_QP_PATH_MIG_STATE),
373 [IB_QPT_RC] = (IB_QP_CUR_STATE |
374 IB_QP_ACCESS_FLAGS |
375 IB_QP_ALT_PATH |
376 IB_QP_PATH_MIG_STATE |
377 IB_QP_MIN_RNR_TIMER),
378 [IB_QPT_SMI] = (IB_QP_CUR_STATE |
379 IB_QP_QKEY),
380 [IB_QPT_GSI] = (IB_QP_CUR_STATE |
381 IB_QP_QKEY),
382 }
383 },
384 [IB_QPS_SQD] = {
385 .valid = 1,
386 .opt_param = {
387 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
388 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
389 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
390 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
391 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
392 }
393 },
394 },
395 [IB_QPS_SQD] = {
396 [IB_QPS_RESET] = { .valid = 1 },
397 [IB_QPS_ERR] = { .valid = 1 },
398 [IB_QPS_RTS] = {
399 .valid = 1,
400 .opt_param = {
401 [IB_QPT_UD] = (IB_QP_CUR_STATE |
402 IB_QP_QKEY),
403 [IB_QPT_UC] = (IB_QP_CUR_STATE |
404 IB_QP_ALT_PATH |
405 IB_QP_ACCESS_FLAGS |
406 IB_QP_PATH_MIG_STATE),
407 [IB_QPT_RC] = (IB_QP_CUR_STATE |
408 IB_QP_ALT_PATH |
409 IB_QP_ACCESS_FLAGS |
410 IB_QP_MIN_RNR_TIMER |
411 IB_QP_PATH_MIG_STATE),
412 [IB_QPT_SMI] = (IB_QP_CUR_STATE |
413 IB_QP_QKEY),
414 [IB_QPT_GSI] = (IB_QP_CUR_STATE |
415 IB_QP_QKEY),
416 }
417 },
418 [IB_QPS_SQD] = {
419 .valid = 1,
420 .opt_param = {
421 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
422 IB_QP_QKEY),
423 [IB_QPT_UC] = (IB_QP_AV |
424 IB_QP_ALT_PATH |
425 IB_QP_ACCESS_FLAGS |
426 IB_QP_PKEY_INDEX |
427 IB_QP_PATH_MIG_STATE),
428 [IB_QPT_RC] = (IB_QP_PORT |
429 IB_QP_AV |
430 IB_QP_TIMEOUT |
431 IB_QP_RETRY_CNT |
432 IB_QP_RNR_RETRY |
433 IB_QP_MAX_QP_RD_ATOMIC |
434 IB_QP_MAX_DEST_RD_ATOMIC |
435 IB_QP_ALT_PATH |
436 IB_QP_ACCESS_FLAGS |
437 IB_QP_PKEY_INDEX |
438 IB_QP_MIN_RNR_TIMER |
439 IB_QP_PATH_MIG_STATE),
440 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
441 IB_QP_QKEY),
442 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
443 IB_QP_QKEY),
444 }
445 }
446 },
447 [IB_QPS_SQE] = {
448 [IB_QPS_RESET] = { .valid = 1 },
449 [IB_QPS_ERR] = { .valid = 1 },
450 [IB_QPS_RTS] = {
451 .valid = 1,
452 .opt_param = {
453 [IB_QPT_UD] = (IB_QP_CUR_STATE |
454 IB_QP_QKEY),
455 [IB_QPT_UC] = (IB_QP_CUR_STATE |
456 IB_QP_ACCESS_FLAGS),
457 [IB_QPT_SMI] = (IB_QP_CUR_STATE |
458 IB_QP_QKEY),
459 [IB_QPT_GSI] = (IB_QP_CUR_STATE |
460 IB_QP_QKEY),
461 }
462 }
463 },
464 [IB_QPS_ERR] = {
465 [IB_QPS_RESET] = { .valid = 1 },
466 [IB_QPS_ERR] = { .valid = 1 }
467 }
468};
469
470int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
471 enum ib_qp_type type, enum ib_qp_attr_mask mask)
472{
473 enum ib_qp_attr_mask req_param, opt_param;
474
475 if (cur_state < 0 || cur_state > IB_QPS_ERR ||
476 next_state < 0 || next_state > IB_QPS_ERR)
477 return 0;
478
479 if (mask & IB_QP_CUR_STATE &&
480 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
481 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
482 return 0;
483
484 if (!qp_state_table[cur_state][next_state].valid)
485 return 0;
486
487 req_param = qp_state_table[cur_state][next_state].req_param[type];
488 opt_param = qp_state_table[cur_state][next_state].opt_param[type];
489
490 if ((mask & req_param) != req_param)
491 return 0;
492
493 if (mask & ~(req_param | opt_param | IB_QP_STATE))
494 return 0;
495
496 return 1;
497}
498EXPORT_SYMBOL(ib_modify_qp_is_ok);
499
248int ib_modify_qp(struct ib_qp *qp, 500int ib_modify_qp(struct ib_qp *qp,
249 struct ib_qp_attr *qp_attr, 501 struct ib_qp_attr *qp_attr,
250 int qp_attr_mask) 502 int qp_attr_mask)
@@ -322,11 +574,10 @@ int ib_destroy_cq(struct ib_cq *cq)
322} 574}
323EXPORT_SYMBOL(ib_destroy_cq); 575EXPORT_SYMBOL(ib_destroy_cq);
324 576
325int ib_resize_cq(struct ib_cq *cq, 577int ib_resize_cq(struct ib_cq *cq, int cqe)
326 int cqe)
327{ 578{
328 return cq->device->resize_cq ? 579 return cq->device->resize_cq ?
329 cq->device->resize_cq(cq, cqe) : -ENOSYS; 580 cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
330} 581}
331EXPORT_SYMBOL(ib_resize_cq); 582EXPORT_SYMBOL(ib_resize_cq);
332 583
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index a19e0ed03d7c..bc5bdcbe51b5 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -147,7 +147,7 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
147 switch (ah->type) { 147 switch (ah->type) {
148 case MTHCA_AH_ON_HCA: 148 case MTHCA_AH_ON_HCA:
149 mthca_free(&dev->av_table.alloc, 149 mthca_free(&dev->av_table.alloc,
150 (ah->avdma - dev->av_table.ddr_av_base) / 150 (ah->avdma - dev->av_table.ddr_av_base) /
151 MTHCA_AV_SIZE); 151 MTHCA_AV_SIZE);
152 break; 152 break;
153 153
@@ -193,6 +193,37 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
193 return 0; 193 return 0;
194} 194}
195 195
196int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
197{
198 struct mthca_ah *ah = to_mah(ibah);
199 struct mthca_dev *dev = to_mdev(ibah->device);
200
201 /* Only implement for MAD and memfree ah for now. */
202 if (ah->type == MTHCA_AH_ON_HCA)
203 return -ENOSYS;
204
205 memset(attr, 0, sizeof *attr);
206 attr->dlid = be16_to_cpu(ah->av->dlid);
207 attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
208 attr->static_rate = ah->av->msg_sr & 0x7;
209 attr->src_path_bits = ah->av->g_slid & 0x7F;
210 attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24;
211 attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
212
213 if (attr->ah_flags) {
214 attr->grh.traffic_class =
215 be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
216 attr->grh.flow_label =
217 be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
218 attr->grh.hop_limit = ah->av->hop_limit;
219 attr->grh.sgid_index = ah->av->gid_index &
220 (dev->limits.gid_table_len - 1);
221 memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
222 }
223
224 return 0;
225}
226
196int __devinit mthca_init_av_table(struct mthca_dev *dev) 227int __devinit mthca_init_av_table(struct mthca_dev *dev)
197{ 228{
198 int err; 229 int err;
@@ -234,7 +265,7 @@ int __devinit mthca_init_av_table(struct mthca_dev *dev)
234 return -ENOMEM; 265 return -ENOMEM;
235} 266}
236 267
237void __devexit mthca_cleanup_av_table(struct mthca_dev *dev) 268void mthca_cleanup_av_table(struct mthca_dev *dev)
238{ 269{
239 if (mthca_is_memfree(dev)) 270 if (mthca_is_memfree(dev))
240 return; 271 return;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 2825615ce81c..343eca507870 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -182,25 +182,58 @@ struct mthca_cmd_context {
182 u8 status; 182 u8 status;
183}; 183};
184 184
185static int fw_cmd_doorbell = 1;
186module_param(fw_cmd_doorbell, int, 0644);
187MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero "
188 "(and supported by FW)");
189
185static inline int go_bit(struct mthca_dev *dev) 190static inline int go_bit(struct mthca_dev *dev)
186{ 191{
187 return readl(dev->hcr + HCR_STATUS_OFFSET) & 192 return readl(dev->hcr + HCR_STATUS_OFFSET) &
188 swab32(1 << HCR_GO_BIT); 193 swab32(1 << HCR_GO_BIT);
189} 194}
190 195
191static int mthca_cmd_post(struct mthca_dev *dev, 196static void mthca_cmd_post_dbell(struct mthca_dev *dev,
192 u64 in_param, 197 u64 in_param,
193 u64 out_param, 198 u64 out_param,
194 u32 in_modifier, 199 u32 in_modifier,
195 u8 op_modifier, 200 u8 op_modifier,
196 u16 op, 201 u16 op,
197 u16 token, 202 u16 token)
198 int event)
199{ 203{
200 int err = 0; 204 void __iomem *ptr = dev->cmd.dbell_map;
205 u16 *offs = dev->cmd.dbell_offsets;
201 206
202 mutex_lock(&dev->cmd.hcr_mutex); 207 __raw_writel((__force u32) cpu_to_be32(in_param >> 32), ptr + offs[0]);
208 wmb();
209 __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), ptr + offs[1]);
210 wmb();
211 __raw_writel((__force u32) cpu_to_be32(in_modifier), ptr + offs[2]);
212 wmb();
213 __raw_writel((__force u32) cpu_to_be32(out_param >> 32), ptr + offs[3]);
214 wmb();
215 __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), ptr + offs[4]);
216 wmb();
217 __raw_writel((__force u32) cpu_to_be32(token << 16), ptr + offs[5]);
218 wmb();
219 __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
220 (1 << HCA_E_BIT) |
221 (op_modifier << HCR_OPMOD_SHIFT) |
222 op), ptr + offs[6]);
223 wmb();
224 __raw_writel((__force u32) 0, ptr + offs[7]);
225 wmb();
226}
203 227
228static int mthca_cmd_post_hcr(struct mthca_dev *dev,
229 u64 in_param,
230 u64 out_param,
231 u32 in_modifier,
232 u8 op_modifier,
233 u16 op,
234 u16 token,
235 int event)
236{
204 if (event) { 237 if (event) {
205 unsigned long end = jiffies + GO_BIT_TIMEOUT; 238 unsigned long end = jiffies + GO_BIT_TIMEOUT;
206 239
@@ -210,10 +243,8 @@ static int mthca_cmd_post(struct mthca_dev *dev,
210 } 243 }
211 } 244 }
212 245
213 if (go_bit(dev)) { 246 if (go_bit(dev))
214 err = -EAGAIN; 247 return -EAGAIN;
215 goto out;
216 }
217 248
218 /* 249 /*
219 * We use writel (instead of something like memcpy_toio) 250 * We use writel (instead of something like memcpy_toio)
@@ -236,7 +267,29 @@ static int mthca_cmd_post(struct mthca_dev *dev,
236 (op_modifier << HCR_OPMOD_SHIFT) | 267 (op_modifier << HCR_OPMOD_SHIFT) |
237 op), dev->hcr + 6 * 4); 268 op), dev->hcr + 6 * 4);
238 269
239out: 270 return 0;
271}
272
273static int mthca_cmd_post(struct mthca_dev *dev,
274 u64 in_param,
275 u64 out_param,
276 u32 in_modifier,
277 u8 op_modifier,
278 u16 op,
279 u16 token,
280 int event)
281{
282 int err = 0;
283
284 mutex_lock(&dev->cmd.hcr_mutex);
285
286 if (event && dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS && fw_cmd_doorbell)
287 mthca_cmd_post_dbell(dev, in_param, out_param, in_modifier,
288 op_modifier, op, token);
289 else
290 err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
291 op_modifier, op, token, event);
292
240 mutex_unlock(&dev->cmd.hcr_mutex); 293 mutex_unlock(&dev->cmd.hcr_mutex);
241 return err; 294 return err;
242} 295}
@@ -275,7 +328,7 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
275 } 328 }
276 329
277 if (out_is_imm) 330 if (out_is_imm)
278 *out_param = 331 *out_param =
279 (u64) be32_to_cpu((__force __be32) 332 (u64) be32_to_cpu((__force __be32)
280 __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 | 333 __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
281 (u64) be32_to_cpu((__force __be32) 334 (u64) be32_to_cpu((__force __be32)
@@ -386,7 +439,7 @@ static int mthca_cmd_box(struct mthca_dev *dev,
386 unsigned long timeout, 439 unsigned long timeout,
387 u8 *status) 440 u8 *status)
388{ 441{
389 if (dev->cmd.use_events) 442 if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
390 return mthca_cmd_wait(dev, in_param, &out_param, 0, 443 return mthca_cmd_wait(dev, in_param, &out_param, 0,
391 in_modifier, op_modifier, op, 444 in_modifier, op_modifier, op,
392 timeout, status); 445 timeout, status);
@@ -423,7 +476,7 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
423 unsigned long timeout, 476 unsigned long timeout,
424 u8 *status) 477 u8 *status)
425{ 478{
426 if (dev->cmd.use_events) 479 if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
427 return mthca_cmd_wait(dev, in_param, out_param, 1, 480 return mthca_cmd_wait(dev, in_param, out_param, 1,
428 in_modifier, op_modifier, op, 481 in_modifier, op_modifier, op,
429 timeout, status); 482 timeout, status);
@@ -437,7 +490,7 @@ int mthca_cmd_init(struct mthca_dev *dev)
437{ 490{
438 mutex_init(&dev->cmd.hcr_mutex); 491 mutex_init(&dev->cmd.hcr_mutex);
439 sema_init(&dev->cmd.poll_sem, 1); 492 sema_init(&dev->cmd.poll_sem, 1);
440 dev->cmd.use_events = 0; 493 dev->cmd.flags = 0;
441 494
442 dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE, 495 dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
443 MTHCA_HCR_SIZE); 496 MTHCA_HCR_SIZE);
@@ -461,6 +514,8 @@ void mthca_cmd_cleanup(struct mthca_dev *dev)
461{ 514{
462 pci_pool_destroy(dev->cmd.pool); 515 pci_pool_destroy(dev->cmd.pool);
463 iounmap(dev->hcr); 516 iounmap(dev->hcr);
517 if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS)
518 iounmap(dev->cmd.dbell_map);
464} 519}
465 520
466/* 521/*
@@ -498,7 +553,8 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
498 ; /* nothing */ 553 ; /* nothing */
499 --dev->cmd.token_mask; 554 --dev->cmd.token_mask;
500 555
501 dev->cmd.use_events = 1; 556 dev->cmd.flags |= MTHCA_CMD_USE_EVENTS;
557
502 down(&dev->cmd.poll_sem); 558 down(&dev->cmd.poll_sem);
503 559
504 return 0; 560 return 0;
@@ -511,7 +567,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
511{ 567{
512 int i; 568 int i;
513 569
514 dev->cmd.use_events = 0; 570 dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS;
515 571
516 for (i = 0; i < dev->cmd.max_cmds; ++i) 572 for (i = 0; i < dev->cmd.max_cmds; ++i)
517 down(&dev->cmd.event_sem); 573 down(&dev->cmd.event_sem);
@@ -596,8 +652,9 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
596 * address or size and use that as our log2 size. 652 * address or size and use that as our log2 size.
597 */ 653 */
598 lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1; 654 lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1;
599 if (lg < 12) { 655 if (lg < MTHCA_ICM_PAGE_SHIFT) {
600 mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%lx).\n", 656 mthca_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n",
657 MTHCA_ICM_PAGE_SIZE,
601 (unsigned long long) mthca_icm_addr(&iter), 658 (unsigned long long) mthca_icm_addr(&iter),
602 mthca_icm_size(&iter)); 659 mthca_icm_size(&iter));
603 err = -EINVAL; 660 err = -EINVAL;
@@ -609,8 +666,9 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
609 virt += 1 << lg; 666 virt += 1 << lg;
610 } 667 }
611 668
612 pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) + 669 pages[nent * 2 + 1] =
613 (i << lg)) | (lg - 12)); 670 cpu_to_be64((mthca_icm_addr(&iter) + (i << lg)) |
671 (lg - MTHCA_ICM_PAGE_SHIFT));
614 ts += 1 << (lg - 10); 672 ts += 1 << (lg - 10);
615 ++tc; 673 ++tc;
616 674
@@ -661,12 +719,41 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
661 return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status); 719 return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
662} 720}
663 721
722static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
723{
724 unsigned long addr;
725 u16 max_off = 0;
726 int i;
727
728 for (i = 0; i < 8; ++i)
729 max_off = max(max_off, dev->cmd.dbell_offsets[i]);
730
731 if ((base & PAGE_MASK) != ((base + max_off) & PAGE_MASK)) {
732 mthca_warn(dev, "Firmware doorbell region at 0x%016llx, "
733 "length 0x%x crosses a page boundary\n",
734 (unsigned long long) base, max_off);
735 return;
736 }
737
738 addr = pci_resource_start(dev->pdev, 2) +
739 ((pci_resource_len(dev->pdev, 2) - 1) & base);
740 dev->cmd.dbell_map = ioremap(addr, max_off + sizeof(u32));
741 if (!dev->cmd.dbell_map)
742 return;
743
744 dev->cmd.flags |= MTHCA_CMD_POST_DOORBELLS;
745 mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n");
746}
747
664int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) 748int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
665{ 749{
666 struct mthca_mailbox *mailbox; 750 struct mthca_mailbox *mailbox;
667 u32 *outbox; 751 u32 *outbox;
752 u64 base;
753 u32 tmp;
668 int err = 0; 754 int err = 0;
669 u8 lg; 755 u8 lg;
756 int i;
670 757
671#define QUERY_FW_OUT_SIZE 0x100 758#define QUERY_FW_OUT_SIZE 0x100
672#define QUERY_FW_VER_OFFSET 0x00 759#define QUERY_FW_VER_OFFSET 0x00
@@ -674,6 +761,10 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
674#define QUERY_FW_ERR_START_OFFSET 0x30 761#define QUERY_FW_ERR_START_OFFSET 0x30
675#define QUERY_FW_ERR_SIZE_OFFSET 0x38 762#define QUERY_FW_ERR_SIZE_OFFSET 0x38
676 763
764#define QUERY_FW_CMD_DB_EN_OFFSET 0x10
765#define QUERY_FW_CMD_DB_OFFSET 0x50
766#define QUERY_FW_CMD_DB_BASE 0x60
767
677#define QUERY_FW_START_OFFSET 0x20 768#define QUERY_FW_START_OFFSET 0x20
678#define QUERY_FW_END_OFFSET 0x28 769#define QUERY_FW_END_OFFSET 0x28
679 770
@@ -702,16 +793,29 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
702 ((dev->fw_ver & 0xffff0000ull) >> 16) | 793 ((dev->fw_ver & 0xffff0000ull) >> 16) |
703 ((dev->fw_ver & 0x0000ffffull) << 16); 794 ((dev->fw_ver & 0x0000ffffull) << 16);
704 795
796 mthca_dbg(dev, "FW version %012llx, max commands %d\n",
797 (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
798
705 MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); 799 MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
706 dev->cmd.max_cmds = 1 << lg; 800 dev->cmd.max_cmds = 1 << lg;
707 MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); 801 MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
708 MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); 802 MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
709 803
710 mthca_dbg(dev, "FW version %012llx, max commands %d\n",
711 (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
712 mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n", 804 mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
713 (unsigned long long) dev->catas_err.addr, dev->catas_err.size); 805 (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
714 806
807 MTHCA_GET(tmp, outbox, QUERY_FW_CMD_DB_EN_OFFSET);
808 if (tmp & 0x1) {
809 mthca_dbg(dev, "FW supports commands through doorbells\n");
810
811 MTHCA_GET(base, outbox, QUERY_FW_CMD_DB_BASE);
812 for (i = 0; i < MTHCA_CMD_NUM_DBELL_DWORDS; ++i)
813 MTHCA_GET(dev->cmd.dbell_offsets[i], outbox,
814 QUERY_FW_CMD_DB_OFFSET + (i << 1));
815
816 mthca_setup_cmd_doorbells(dev, base);
817 }
818
715 if (mthca_is_memfree(dev)) { 819 if (mthca_is_memfree(dev)) {
716 MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); 820 MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
717 MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); 821 MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
@@ -720,12 +824,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
720 mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2); 824 mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2);
721 825
722 /* 826 /*
723 * Arbel page size is always 4 KB; round up number of 827 * Round up number of system pages needed in case
724 * system pages needed. 828 * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
725 */ 829 */
726 dev->fw.arbel.fw_pages = 830 dev->fw.arbel.fw_pages =
727 ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE >> 12) >> 831 ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
728 (PAGE_SHIFT - 12); 832 (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
729 833
730 mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n", 834 mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n",
731 (unsigned long long) dev->fw.arbel.clr_int_base, 835 (unsigned long long) dev->fw.arbel.clr_int_base,
@@ -1173,7 +1277,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
1173 int err; 1277 int err;
1174 1278
1175#define INIT_HCA_IN_SIZE 0x200 1279#define INIT_HCA_IN_SIZE 0x200
1176#define INIT_HCA_FLAGS_OFFSET 0x014 1280#define INIT_HCA_FLAGS1_OFFSET 0x00c
1281#define INIT_HCA_FLAGS2_OFFSET 0x014
1177#define INIT_HCA_QPC_OFFSET 0x020 1282#define INIT_HCA_QPC_OFFSET 0x020
1178#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) 1283#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
1179#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) 1284#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
@@ -1216,15 +1321,18 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
1216 1321
1217 memset(inbox, 0, INIT_HCA_IN_SIZE); 1322 memset(inbox, 0, INIT_HCA_IN_SIZE);
1218 1323
1324 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
1325 MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
1326
1219#if defined(__LITTLE_ENDIAN) 1327#if defined(__LITTLE_ENDIAN)
1220 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); 1328 *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
1221#elif defined(__BIG_ENDIAN) 1329#elif defined(__BIG_ENDIAN)
1222 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); 1330 *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1);
1223#else 1331#else
1224#error Host endianness not defined 1332#error Host endianness not defined
1225#endif 1333#endif
1226 /* Check port for UD address vector: */ 1334 /* Check port for UD address vector: */
1227 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); 1335 *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
1228 1336
1229 /* We leave wqe_quota, responder_exu, etc as 0 (default) */ 1337 /* We leave wqe_quota, responder_exu, etc as 0 (default) */
1230 1338
@@ -1438,11 +1546,11 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
1438 return ret; 1546 return ret;
1439 1547
1440 /* 1548 /*
1441 * Arbel page size is always 4 KB; round up number of system 1549 * Round up number of system pages needed in case
1442 * pages needed. 1550 * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
1443 */ 1551 */
1444 *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12); 1552 *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
1445 *aux_pages = ALIGN(*aux_pages, PAGE_SIZE >> 12) >> (PAGE_SHIFT - 12); 1553 (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
1446 1554
1447 return 0; 1555 return 0;
1448} 1556}
@@ -1514,6 +1622,37 @@ int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1514 CMD_TIME_CLASS_A, status); 1622 CMD_TIME_CLASS_A, status);
1515} 1623}
1516 1624
1625int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
1626 u8 *status)
1627{
1628 struct mthca_mailbox *mailbox;
1629 __be32 *inbox;
1630 int err;
1631
1632#define RESIZE_CQ_IN_SIZE 0x40
1633#define RESIZE_CQ_LOG_SIZE_OFFSET 0x0c
1634#define RESIZE_CQ_LKEY_OFFSET 0x1c
1635
1636 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1637 if (IS_ERR(mailbox))
1638 return PTR_ERR(mailbox);
1639 inbox = mailbox->buf;
1640
1641 memset(inbox, 0, RESIZE_CQ_IN_SIZE);
1642 /*
1643 * Leave start address fields zeroed out -- mthca assumes that
1644 * MRs for CQs always start at virtual address 0.
1645 */
1646 MTHCA_PUT(inbox, log_size, RESIZE_CQ_LOG_SIZE_OFFSET);
1647 MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET);
1648
1649 err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ,
1650 CMD_TIME_CLASS_B, status);
1651
1652 mthca_free_mailbox(dev, mailbox);
1653 return err;
1654}
1655
1517int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, 1656int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1518 int srq_num, u8 *status) 1657 int srq_num, u8 *status)
1519{ 1658{
@@ -1529,37 +1668,69 @@ int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1529 CMD_TIME_CLASS_A, status); 1668 CMD_TIME_CLASS_A, status);
1530} 1669}
1531 1670
1671int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
1672 struct mthca_mailbox *mailbox, u8 *status)
1673{
1674 return mthca_cmd_box(dev, 0, mailbox->dma, num, 0,
1675 CMD_QUERY_SRQ, CMD_TIME_CLASS_A, status);
1676}
1677
1532int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status) 1678int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status)
1533{ 1679{
1534 return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ, 1680 return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ,
1535 CMD_TIME_CLASS_B, status); 1681 CMD_TIME_CLASS_B, status);
1536} 1682}
1537 1683
1538int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, 1684int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
1539 int is_ee, struct mthca_mailbox *mailbox, u32 optmask, 1685 enum ib_qp_state next, u32 num, int is_ee,
1686 struct mthca_mailbox *mailbox, u32 optmask,
1540 u8 *status) 1687 u8 *status)
1541{ 1688{
1542 static const u16 op[] = { 1689 static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
1543 [MTHCA_TRANS_RST2INIT] = CMD_RST2INIT_QPEE, 1690 [IB_QPS_RESET] = {
1544 [MTHCA_TRANS_INIT2INIT] = CMD_INIT2INIT_QPEE, 1691 [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
1545 [MTHCA_TRANS_INIT2RTR] = CMD_INIT2RTR_QPEE, 1692 [IB_QPS_ERR] = CMD_2ERR_QPEE,
1546 [MTHCA_TRANS_RTR2RTS] = CMD_RTR2RTS_QPEE, 1693 [IB_QPS_INIT] = CMD_RST2INIT_QPEE,
1547 [MTHCA_TRANS_RTS2RTS] = CMD_RTS2RTS_QPEE, 1694 },
1548 [MTHCA_TRANS_SQERR2RTS] = CMD_SQERR2RTS_QPEE, 1695 [IB_QPS_INIT] = {
1549 [MTHCA_TRANS_ANY2ERR] = CMD_2ERR_QPEE, 1696 [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
1550 [MTHCA_TRANS_RTS2SQD] = CMD_RTS2SQD_QPEE, 1697 [IB_QPS_ERR] = CMD_2ERR_QPEE,
1551 [MTHCA_TRANS_SQD2SQD] = CMD_SQD2SQD_QPEE, 1698 [IB_QPS_INIT] = CMD_INIT2INIT_QPEE,
1552 [MTHCA_TRANS_SQD2RTS] = CMD_SQD2RTS_QPEE, 1699 [IB_QPS_RTR] = CMD_INIT2RTR_QPEE,
1553 [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE 1700 },
1701 [IB_QPS_RTR] = {
1702 [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
1703 [IB_QPS_ERR] = CMD_2ERR_QPEE,
1704 [IB_QPS_RTS] = CMD_RTR2RTS_QPEE,
1705 },
1706 [IB_QPS_RTS] = {
1707 [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
1708 [IB_QPS_ERR] = CMD_2ERR_QPEE,
1709 [IB_QPS_RTS] = CMD_RTS2RTS_QPEE,
1710 [IB_QPS_SQD] = CMD_RTS2SQD_QPEE,
1711 },
1712 [IB_QPS_SQD] = {
1713 [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
1714 [IB_QPS_ERR] = CMD_2ERR_QPEE,
1715 [IB_QPS_RTS] = CMD_SQD2RTS_QPEE,
1716 [IB_QPS_SQD] = CMD_SQD2SQD_QPEE,
1717 },
1718 [IB_QPS_SQE] = {
1719 [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
1720 [IB_QPS_ERR] = CMD_2ERR_QPEE,
1721 [IB_QPS_RTS] = CMD_SQERR2RTS_QPEE,
1722 },
1723 [IB_QPS_ERR] = {
1724 [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
1725 [IB_QPS_ERR] = CMD_2ERR_QPEE,
1726 }
1554 }; 1727 };
1728
1555 u8 op_mod = 0; 1729 u8 op_mod = 0;
1556 int my_mailbox = 0; 1730 int my_mailbox = 0;
1557 int err; 1731 int err;
1558 1732
1559 if (trans < 0 || trans >= ARRAY_SIZE(op)) 1733 if (op[cur][next] == CMD_ERR2RST_QPEE) {
1560 return -EINVAL;
1561
1562 if (trans == MTHCA_TRANS_ANY2RST) {
1563 op_mod = 3; /* don't write outbox, any->reset */ 1734 op_mod = 3; /* don't write outbox, any->reset */
1564 1735
1565 /* For debugging */ 1736 /* For debugging */
@@ -1571,34 +1742,35 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
1571 } else 1742 } else
1572 mailbox = NULL; 1743 mailbox = NULL;
1573 } 1744 }
1574 } else { 1745
1575 if (0) { 1746 err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
1747 (!!is_ee << 24) | num, op_mod,
1748 op[cur][next], CMD_TIME_CLASS_C, status);
1749
1750 if (0 && mailbox) {
1576 int i; 1751 int i;
1577 mthca_dbg(dev, "Dumping QP context:\n"); 1752 mthca_dbg(dev, "Dumping QP context:\n");
1578 printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf)); 1753 printk(" %08x\n", be32_to_cpup(mailbox->buf));
1579 for (i = 0; i < 0x100 / 4; ++i) { 1754 for (i = 0; i < 0x100 / 4; ++i) {
1580 if (i % 8 == 0) 1755 if (i % 8 == 0)
1581 printk(" [%02x] ", i * 4); 1756 printk("[%02x] ", i * 4);
1582 printk(" %08x", 1757 printk(" %08x",
1583 be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); 1758 be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
1584 if ((i + 1) % 8 == 0) 1759 if ((i + 1) % 8 == 0)
1585 printk("\n"); 1760 printk("\n");
1586 } 1761 }
1587 } 1762 }
1588 }
1589
1590 if (trans == MTHCA_TRANS_ANY2RST) {
1591 err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
1592 (!!is_ee << 24) | num, op_mod,
1593 op[trans], CMD_TIME_CLASS_C, status);
1594 1763
1595 if (0 && mailbox) { 1764 if (my_mailbox)
1765 mthca_free_mailbox(dev, mailbox);
1766 } else {
1767 if (0) {
1596 int i; 1768 int i;
1597 mthca_dbg(dev, "Dumping QP context:\n"); 1769 mthca_dbg(dev, "Dumping QP context:\n");
1598 printk(" %08x\n", be32_to_cpup(mailbox->buf)); 1770 printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
1599 for (i = 0; i < 0x100 / 4; ++i) { 1771 for (i = 0; i < 0x100 / 4; ++i) {
1600 if (i % 8 == 0) 1772 if (i % 8 == 0)
1601 printk("[%02x] ", i * 4); 1773 printk(" [%02x] ", i * 4);
1602 printk(" %08x", 1774 printk(" %08x",
1603 be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); 1775 be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
1604 if ((i + 1) % 8 == 0) 1776 if ((i + 1) % 8 == 0)
@@ -1606,12 +1778,9 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
1606 } 1778 }
1607 } 1779 }
1608 1780
1609 } else 1781 err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num,
1610 err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num, 1782 op_mod, op[cur][next], CMD_TIME_CLASS_C, status);
1611 op_mod, op[trans], CMD_TIME_CLASS_C, status); 1783 }
1612
1613 if (my_mailbox)
1614 mthca_free_mailbox(dev, mailbox);
1615 1784
1616 return err; 1785 return err;
1617} 1786}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 18175bec84c2..e4ec35c40dd3 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2006 Cisco Systems. All rights reserved.
4 * 5 *
5 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -73,9 +74,9 @@ enum {
73 MTHCA_CMD_STAT_REG_BOUND = 0x21, 74 MTHCA_CMD_STAT_REG_BOUND = 0x21,
74 /* HCA local attached memory not present: */ 75 /* HCA local attached memory not present: */
75 MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22, 76 MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22,
76 /* Bad management packet (silently discarded): */ 77 /* Bad management packet (silently discarded): */
77 MTHCA_CMD_STAT_BAD_PKT = 0x30, 78 MTHCA_CMD_STAT_BAD_PKT = 0x30,
78 /* More outstanding CQEs in CQ than new CQ size: */ 79 /* More outstanding CQEs in CQ than new CQ size: */
79 MTHCA_CMD_STAT_BAD_SIZE = 0x40 80 MTHCA_CMD_STAT_BAD_SIZE = 0x40
80}; 81};
81 82
@@ -298,13 +299,18 @@ int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
298 int cq_num, u8 *status); 299 int cq_num, u8 *status);
299int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, 300int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
300 int cq_num, u8 *status); 301 int cq_num, u8 *status);
302int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
303 u8 *status);
301int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, 304int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
302 int srq_num, u8 *status); 305 int srq_num, u8 *status);
303int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, 306int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
304 int srq_num, u8 *status); 307 int srq_num, u8 *status);
308int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
309 struct mthca_mailbox *mailbox, u8 *status);
305int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status); 310int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status);
306int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, 311int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
307 int is_ee, struct mthca_mailbox *mailbox, u32 optmask, 312 enum ib_qp_state next, u32 num, int is_ee,
313 struct mthca_mailbox *mailbox, u32 optmask,
308 u8 *status); 314 u8 *status);
309int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, 315int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
310 struct mthca_mailbox *mailbox, u8 *status); 316 struct mthca_mailbox *mailbox, u8 *status);
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 96f1a86bf049..312cf90731ea 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 * 7 *
@@ -150,24 +150,29 @@ struct mthca_err_cqe {
150#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24) 150#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
151#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24) 151#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
152 152
153static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) 153static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf,
154 int entry)
154{ 155{
155 if (cq->is_direct) 156 if (buf->is_direct)
156 return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE); 157 return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
157 else 158 else
158 return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf 159 return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
159 + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE; 160 + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
160} 161}
161 162
162static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i) 163static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
164{
165 return get_cqe_from_buf(&cq->buf, entry);
166}
167
168static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe)
163{ 169{
164 struct mthca_cqe *cqe = get_cqe(cq, i);
165 return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe; 170 return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
166} 171}
167 172
168static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq) 173static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
169{ 174{
170 return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe); 175 return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe));
171} 176}
172 177
173static inline void set_cqe_hw(struct mthca_cqe *cqe) 178static inline void set_cqe_hw(struct mthca_cqe *cqe)
@@ -289,7 +294,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
289 * from our QP and therefore don't need to be checked. 294 * from our QP and therefore don't need to be checked.
290 */ 295 */
291 for (prod_index = cq->cons_index; 296 for (prod_index = cq->cons_index;
292 cqe_sw(cq, prod_index & cq->ibcq.cqe); 297 cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe));
293 ++prod_index) 298 ++prod_index)
294 if (prod_index == cq->cons_index + cq->ibcq.cqe) 299 if (prod_index == cq->cons_index + cq->ibcq.cqe)
295 break; 300 break;
@@ -324,12 +329,58 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
324 wake_up(&cq->wait); 329 wake_up(&cq->wait);
325} 330}
326 331
327static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, 332void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
328 struct mthca_qp *qp, int wqe_index, int is_send, 333{
329 struct mthca_err_cqe *cqe, 334 int i;
330 struct ib_wc *entry, int *free_cqe) 335
336 /*
337 * In Tavor mode, the hardware keeps the consumer and producer
338 * indices mod the CQ size. Since we might be making the CQ
339 * bigger, we need to deal with the case where the producer
340 * index wrapped around before the CQ was resized.
341 */
342 if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) &&
343 cq->ibcq.cqe < cq->resize_buf->cqe) {
344 cq->cons_index &= cq->ibcq.cqe;
345 if (cqe_sw(get_cqe(cq, cq->ibcq.cqe)))
346 cq->cons_index -= cq->ibcq.cqe + 1;
347 }
348
349 for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i)
350 memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
351 i & cq->resize_buf->cqe),
352 get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
353}
354
355int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
356{
357 int ret;
358 int i;
359
360 ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
361 MTHCA_MAX_DIRECT_CQ_SIZE,
362 &buf->queue, &buf->is_direct,
363 &dev->driver_pd, 1, &buf->mr);
364 if (ret)
365 return ret;
366
367 for (i = 0; i < nent; ++i)
368 set_cqe_hw(get_cqe_from_buf(buf, i));
369
370 return 0;
371}
372
373void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe)
374{
375 mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue,
376 buf->is_direct, &buf->mr);
377}
378
379static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
380 struct mthca_qp *qp, int wqe_index, int is_send,
381 struct mthca_err_cqe *cqe,
382 struct ib_wc *entry, int *free_cqe)
331{ 383{
332 int err;
333 int dbd; 384 int dbd;
334 __be32 new_wqe; 385 __be32 new_wqe;
335 386
@@ -412,11 +463,9 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
412 * error case, so we don't have to check the doorbell count, etc. 463 * error case, so we don't have to check the doorbell count, etc.
413 */ 464 */
414 if (mthca_is_memfree(dev)) 465 if (mthca_is_memfree(dev))
415 return 0; 466 return;
416 467
417 err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); 468 mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
418 if (err)
419 return err;
420 469
421 /* 470 /*
422 * If we're at the end of the WQE chain, or we've used up our 471 * If we're at the end of the WQE chain, or we've used up our
@@ -424,15 +473,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
424 * the next poll operation. 473 * the next poll operation.
425 */ 474 */
426 if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) 475 if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
427 return 0; 476 return;
428 477
429 cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd); 478 cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd);
430 cqe->wqe = new_wqe; 479 cqe->wqe = new_wqe;
431 cqe->syndrome = SYNDROME_WR_FLUSH_ERR; 480 cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
432 481
433 *free_cqe = 0; 482 *free_cqe = 0;
434
435 return 0;
436} 483}
437 484
438static inline int mthca_poll_one(struct mthca_dev *dev, 485static inline int mthca_poll_one(struct mthca_dev *dev,
@@ -518,9 +565,9 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
518 } 565 }
519 566
520 if (is_error) { 567 if (is_error) {
521 err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, 568 handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
522 (struct mthca_err_cqe *) cqe, 569 (struct mthca_err_cqe *) cqe,
523 entry, &free_cqe); 570 entry, &free_cqe);
524 goto out; 571 goto out;
525 } 572 }
526 573
@@ -614,11 +661,14 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
614 661
615 spin_lock_irqsave(&cq->lock, flags); 662 spin_lock_irqsave(&cq->lock, flags);
616 663
617 for (npolled = 0; npolled < num_entries; ++npolled) { 664 npolled = 0;
665repoll:
666 while (npolled < num_entries) {
618 err = mthca_poll_one(dev, cq, &qp, 667 err = mthca_poll_one(dev, cq, &qp,
619 &freed, entry + npolled); 668 &freed, entry + npolled);
620 if (err) 669 if (err)
621 break; 670 break;
671 ++npolled;
622 } 672 }
623 673
624 if (freed) { 674 if (freed) {
@@ -626,6 +676,42 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
626 update_cons_index(dev, cq, freed); 676 update_cons_index(dev, cq, freed);
627 } 677 }
628 678
679 /*
680 * If a CQ resize is in progress and we discovered that the
681 * old buffer is empty, then peek in the new buffer, and if
682 * it's not empty, switch to the new buffer and continue
683 * polling there.
684 */
685 if (unlikely(err == -EAGAIN && cq->resize_buf &&
686 cq->resize_buf->state == CQ_RESIZE_READY)) {
687 /*
688 * In Tavor mode, the hardware keeps the producer
689 * index modulo the CQ size. Since we might be making
690 * the CQ bigger, we need to mask our consumer index
691 * using the size of the old CQ buffer before looking
692 * in the new CQ buffer.
693 */
694 if (!mthca_is_memfree(dev))
695 cq->cons_index &= cq->ibcq.cqe;
696
697 if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf,
698 cq->cons_index & cq->resize_buf->cqe))) {
699 struct mthca_cq_buf tbuf;
700 int tcqe;
701
702 tbuf = cq->buf;
703 tcqe = cq->ibcq.cqe;
704 cq->buf = cq->resize_buf->buf;
705 cq->ibcq.cqe = cq->resize_buf->cqe;
706
707 cq->resize_buf->buf = tbuf;
708 cq->resize_buf->cqe = tcqe;
709 cq->resize_buf->state = CQ_RESIZE_SWAPPED;
710
711 goto repoll;
712 }
713 }
714
629 spin_unlock_irqrestore(&cq->lock, flags); 715 spin_unlock_irqrestore(&cq->lock, flags);
630 716
631 return err == 0 || err == -EAGAIN ? npolled : err; 717 return err == 0 || err == -EAGAIN ? npolled : err;
@@ -684,24 +770,14 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
684 return 0; 770 return 0;
685} 771}
686 772
687static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
688{
689 mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
690 &cq->queue, cq->is_direct, &cq->mr);
691}
692
693int mthca_init_cq(struct mthca_dev *dev, int nent, 773int mthca_init_cq(struct mthca_dev *dev, int nent,
694 struct mthca_ucontext *ctx, u32 pdn, 774 struct mthca_ucontext *ctx, u32 pdn,
695 struct mthca_cq *cq) 775 struct mthca_cq *cq)
696{ 776{
697 int size = nent * MTHCA_CQ_ENTRY_SIZE;
698 struct mthca_mailbox *mailbox; 777 struct mthca_mailbox *mailbox;
699 struct mthca_cq_context *cq_context; 778 struct mthca_cq_context *cq_context;
700 int err = -ENOMEM; 779 int err = -ENOMEM;
701 u8 status; 780 u8 status;
702 int i;
703
704 might_sleep();
705 781
706 cq->ibcq.cqe = nent - 1; 782 cq->ibcq.cqe = nent - 1;
707 cq->is_kernel = !ctx; 783 cq->is_kernel = !ctx;
@@ -739,14 +815,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
739 cq_context = mailbox->buf; 815 cq_context = mailbox->buf;
740 816
741 if (cq->is_kernel) { 817 if (cq->is_kernel) {
742 err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE, 818 err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
743 &cq->queue, &cq->is_direct,
744 &dev->driver_pd, 1, &cq->mr);
745 if (err) 819 if (err)
746 goto err_out_mailbox; 820 goto err_out_mailbox;
747
748 for (i = 0; i < nent; ++i)
749 set_cqe_hw(get_cqe(cq, i));
750 } 821 }
751 822
752 spin_lock_init(&cq->lock); 823 spin_lock_init(&cq->lock);
@@ -765,7 +836,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
765 cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); 836 cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
766 cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); 837 cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
767 cq_context->pd = cpu_to_be32(pdn); 838 cq_context->pd = cpu_to_be32(pdn);
768 cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); 839 cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey);
769 cq_context->cqn = cpu_to_be32(cq->cqn); 840 cq_context->cqn = cpu_to_be32(cq->cqn);
770 841
771 if (mthca_is_memfree(dev)) { 842 if (mthca_is_memfree(dev)) {
@@ -803,7 +874,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
803 874
804err_out_free_mr: 875err_out_free_mr:
805 if (cq->is_kernel) 876 if (cq->is_kernel)
806 mthca_free_cq_buf(dev, cq); 877 mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
807 878
808err_out_mailbox: 879err_out_mailbox:
809 mthca_free_mailbox(dev, mailbox); 880 mthca_free_mailbox(dev, mailbox);
@@ -832,8 +903,6 @@ void mthca_free_cq(struct mthca_dev *dev,
832 int err; 903 int err;
833 u8 status; 904 u8 status;
834 905
835 might_sleep();
836
837 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 906 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
838 if (IS_ERR(mailbox)) { 907 if (IS_ERR(mailbox)) {
839 mthca_warn(dev, "No memory for mailbox to free CQ.\n"); 908 mthca_warn(dev, "No memory for mailbox to free CQ.\n");
@@ -871,7 +940,7 @@ void mthca_free_cq(struct mthca_dev *dev,
871 wait_event(cq->wait, !atomic_read(&cq->refcount)); 940 wait_event(cq->wait, !atomic_read(&cq->refcount));
872 941
873 if (cq->is_kernel) { 942 if (cq->is_kernel) {
874 mthca_free_cq_buf(dev, cq); 943 mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
875 if (mthca_is_memfree(dev)) { 944 if (mthca_is_memfree(dev)) {
876 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); 945 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
877 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); 946 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
@@ -904,7 +973,7 @@ int __devinit mthca_init_cq_table(struct mthca_dev *dev)
904 return err; 973 return err;
905} 974}
906 975
907void __devexit mthca_cleanup_cq_table(struct mthca_dev *dev) 976void mthca_cleanup_cq_table(struct mthca_dev *dev)
908{ 977{
909 mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs); 978 mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs);
910 mthca_alloc_cleanup(&dev->cq_table.alloc); 979 mthca_alloc_cleanup(&dev->cq_table.alloc);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index e481037288d6..ad52edbefe98 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 * 7 *
@@ -53,8 +53,8 @@
53 53
54#define DRV_NAME "ib_mthca" 54#define DRV_NAME "ib_mthca"
55#define PFX DRV_NAME ": " 55#define PFX DRV_NAME ": "
56#define DRV_VERSION "0.07" 56#define DRV_VERSION "0.08"
57#define DRV_RELDATE "February 13, 2006" 57#define DRV_RELDATE "February 14, 2006"
58 58
59enum { 59enum {
60 MTHCA_FLAG_DDR_HIDDEN = 1 << 1, 60 MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
@@ -64,7 +64,8 @@ enum {
64 MTHCA_FLAG_NO_LAM = 1 << 5, 64 MTHCA_FLAG_NO_LAM = 1 << 5,
65 MTHCA_FLAG_FMR = 1 << 6, 65 MTHCA_FLAG_FMR = 1 << 6,
66 MTHCA_FLAG_MEMFREE = 1 << 7, 66 MTHCA_FLAG_MEMFREE = 1 << 7,
67 MTHCA_FLAG_PCIE = 1 << 8 67 MTHCA_FLAG_PCIE = 1 << 8,
68 MTHCA_FLAG_SINAI_OPT = 1 << 9
68}; 69};
69 70
70enum { 71enum {
@@ -110,9 +111,17 @@ enum {
110 MTHCA_OPCODE_INVALID = 0xff 111 MTHCA_OPCODE_INVALID = 0xff
111}; 112};
112 113
114enum {
115 MTHCA_CMD_USE_EVENTS = 1 << 0,
116 MTHCA_CMD_POST_DOORBELLS = 1 << 1
117};
118
119enum {
120 MTHCA_CMD_NUM_DBELL_DWORDS = 8
121};
122
113struct mthca_cmd { 123struct mthca_cmd {
114 struct pci_pool *pool; 124 struct pci_pool *pool;
115 int use_events;
116 struct mutex hcr_mutex; 125 struct mutex hcr_mutex;
117 struct semaphore poll_sem; 126 struct semaphore poll_sem;
118 struct semaphore event_sem; 127 struct semaphore event_sem;
@@ -121,6 +130,9 @@ struct mthca_cmd {
121 int free_head; 130 int free_head;
122 struct mthca_cmd_context *context; 131 struct mthca_cmd_context *context;
123 u16 token_mask; 132 u16 token_mask;
133 u32 flags;
134 void __iomem *dbell_map;
135 u16 dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS];
124}; 136};
125 137
126struct mthca_limits { 138struct mthca_limits {
@@ -470,12 +482,16 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
470 enum ib_event_type event_type); 482 enum ib_event_type event_type);
471void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, 483void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
472 struct mthca_srq *srq); 484 struct mthca_srq *srq);
485void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
486int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
487void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
473 488
474int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, 489int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
475 struct ib_srq_attr *attr, struct mthca_srq *srq); 490 struct ib_srq_attr *attr, struct mthca_srq *srq);
476void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); 491void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
477int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 492int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
478 enum ib_srq_attr_mask attr_mask); 493 enum ib_srq_attr_mask attr_mask);
494int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
479void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 495void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
480 enum ib_event_type event_type); 496 enum ib_event_type event_type);
481void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); 497void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
@@ -486,6 +502,8 @@ int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
486 502
487void mthca_qp_event(struct mthca_dev *dev, u32 qpn, 503void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
488 enum ib_event_type event_type); 504 enum ib_event_type event_type);
505int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
506 struct ib_qp_init_attr *qp_init_attr);
489int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); 507int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
490int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 508int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
491 struct ib_send_wr **bad_wr); 509 struct ib_send_wr **bad_wr);
@@ -495,8 +513,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
495 struct ib_send_wr **bad_wr); 513 struct ib_send_wr **bad_wr);
496int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 514int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
497 struct ib_recv_wr **bad_wr); 515 struct ib_recv_wr **bad_wr);
498int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, 516void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
499 int index, int *dbd, __be32 *new_wqe); 517 int index, int *dbd, __be32 *new_wqe);
500int mthca_alloc_qp(struct mthca_dev *dev, 518int mthca_alloc_qp(struct mthca_dev *dev,
501 struct mthca_pd *pd, 519 struct mthca_pd *pd,
502 struct mthca_cq *send_cq, 520 struct mthca_cq *send_cq,
@@ -522,6 +540,7 @@ int mthca_create_ah(struct mthca_dev *dev,
522int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah); 540int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
523int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, 541int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
524 struct ib_ud_header *header); 542 struct ib_ud_header *header);
543int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
525int mthca_ah_grh_present(struct mthca_ah *ah); 544int mthca_ah_grh_present(struct mthca_ah *ah);
526 545
527int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); 546int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 2eabb27804cd..99f109c3815d 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -497,7 +497,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
497 497
498 eq->dev = dev; 498 eq->dev = dev;
499 eq->nent = roundup_pow_of_two(max(nent, 2)); 499 eq->nent = roundup_pow_of_two(max(nent, 2));
500 npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; 500 npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
501 501
502 eq->page_list = kmalloc(npages * sizeof *eq->page_list, 502 eq->page_list = kmalloc(npages * sizeof *eq->page_list,
503 GFP_KERNEL); 503 GFP_KERNEL);
@@ -765,7 +765,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
765 765
766} 766}
767 767
768static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev) 768static void mthca_unmap_eq_regs(struct mthca_dev *dev)
769{ 769{
770 if (mthca_is_memfree(dev)) { 770 if (mthca_is_memfree(dev)) {
771 mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & 771 mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
@@ -821,11 +821,11 @@ int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
821 return ret; 821 return ret;
822} 822}
823 823
824void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev) 824void mthca_unmap_eq_icm(struct mthca_dev *dev)
825{ 825{
826 u8 status; 826 u8 status;
827 827
828 mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status); 828 mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status);
829 pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE, 829 pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
830 PCI_DMA_BIDIRECTIONAL); 830 PCI_DMA_BIDIRECTIONAL);
831 __free_page(dev->eq_table.icm_page); 831 __free_page(dev->eq_table.icm_page);
@@ -928,7 +928,7 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev)
928 mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n", 928 mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
929 dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status); 929 dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
930 930
931 for (i = 0; i < MTHCA_EQ_CMD; ++i) 931 for (i = 0; i < MTHCA_NUM_EQ; ++i)
932 if (mthca_is_memfree(dev)) 932 if (mthca_is_memfree(dev))
933 arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); 933 arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
934 else 934 else
@@ -954,7 +954,7 @@ err_out_free:
954 return err; 954 return err;
955} 955}
956 956
957void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev) 957void mthca_cleanup_eq_table(struct mthca_dev *dev)
958{ 958{
959 u8 status; 959 u8 status;
960 int i; 960 int i;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 1229c604c6e0..dfb482eac9a2 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -109,6 +109,19 @@ static void smp_snoop(struct ib_device *ibdev,
109 } 109 }
110} 110}
111 111
112static void node_desc_override(struct ib_device *dev,
113 struct ib_mad *mad)
114{
115 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
116 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
117 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
118 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
119 mutex_lock(&to_mdev(dev)->cap_mask_mutex);
120 memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
121 mutex_unlock(&to_mdev(dev)->cap_mask_mutex);
122 }
123}
124
112static void forward_trap(struct mthca_dev *dev, 125static void forward_trap(struct mthca_dev *dev,
113 u8 port_num, 126 u8 port_num,
114 struct ib_mad *mad) 127 struct ib_mad *mad)
@@ -207,8 +220,10 @@ int mthca_process_mad(struct ib_device *ibdev,
207 return IB_MAD_RESULT_FAILURE; 220 return IB_MAD_RESULT_FAILURE;
208 } 221 }
209 222
210 if (!out_mad->mad_hdr.status) 223 if (!out_mad->mad_hdr.status) {
211 smp_snoop(ibdev, port_num, in_mad); 224 smp_snoop(ibdev, port_num, in_mad);
225 node_desc_override(ibdev, out_mad);
226 }
212 227
213 /* set return bit in status of directed route responses */ 228 /* set return bit in status of directed route responses */
214 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) 229 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
@@ -256,7 +271,7 @@ err:
256 return PTR_ERR(agent); 271 return PTR_ERR(agent);
257} 272}
258 273
259void mthca_free_agents(struct mthca_dev *dev) 274void __devexit mthca_free_agents(struct mthca_dev *dev)
260{ 275{
261 struct ib_mad_agent *agent; 276 struct ib_mad_agent *agent;
262 int p, q; 277 int p, q;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 9c849d27b06e..266f347c6707 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -935,13 +935,19 @@ enum {
935 935
936static struct { 936static struct {
937 u64 latest_fw; 937 u64 latest_fw;
938 int is_memfree; 938 u32 flags;
939 int is_pcie;
940} mthca_hca_table[] = { 939} mthca_hca_table[] = {
941 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 }, 940 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0),
942 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 }, 941 .flags = 0 },
943 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 }, 942 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 400),
944 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 } 943 .flags = MTHCA_FLAG_PCIE },
944 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0),
945 .flags = MTHCA_FLAG_MEMFREE |
946 MTHCA_FLAG_PCIE },
947 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 800),
948 .flags = MTHCA_FLAG_MEMFREE |
949 MTHCA_FLAG_PCIE |
950 MTHCA_FLAG_SINAI_OPT }
945}; 951};
946 952
947static int __devinit mthca_init_one(struct pci_dev *pdev, 953static int __devinit mthca_init_one(struct pci_dev *pdev,
@@ -1031,12 +1037,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
1031 1037
1032 mdev->pdev = pdev; 1038 mdev->pdev = pdev;
1033 1039
1040 mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
1034 if (ddr_hidden) 1041 if (ddr_hidden)
1035 mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; 1042 mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
1036 if (mthca_hca_table[id->driver_data].is_memfree)
1037 mdev->mthca_flags |= MTHCA_FLAG_MEMFREE;
1038 if (mthca_hca_table[id->driver_data].is_pcie)
1039 mdev->mthca_flags |= MTHCA_FLAG_PCIE;
1040 1043
1041 /* 1044 /*
1042 * Now reset the HCA before we touch the PCI capabilities or 1045 * Now reset the HCA before we touch the PCI capabilities or
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 321f11e707f2..47ca8a9b7247 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -187,7 +187,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
187 187
188 for (i = 0; i < MTHCA_QP_PER_MGM; ++i) 188 for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
189 if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) { 189 if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
190 mthca_dbg(dev, "QP %06x already a member of MGM\n", 190 mthca_dbg(dev, "QP %06x already a member of MGM\n",
191 ibqp->qp_num); 191 ibqp->qp_num);
192 err = 0; 192 err = 0;
193 goto out; 193 goto out;
@@ -388,7 +388,7 @@ int __devinit mthca_init_mcg_table(struct mthca_dev *dev)
388 return 0; 388 return 0;
389} 389}
390 390
391void __devexit mthca_cleanup_mcg_table(struct mthca_dev *dev) 391void mthca_cleanup_mcg_table(struct mthca_dev *dev)
392{ 392{
393 mthca_alloc_cleanup(&dev->mcg_table.alloc); 393 mthca_alloc_cleanup(&dev->mcg_table.alloc);
394} 394}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index d709cb162a72..15cc2f6eb475 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -202,7 +202,8 @@ void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int o
202 202
203 if (--table->icm[i]->refcount == 0) { 203 if (--table->icm[i]->refcount == 0) {
204 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, 204 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
205 MTHCA_TABLE_CHUNK_SIZE >> 12, &status); 205 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
206 &status);
206 mthca_free_icm(dev, table->icm[i]); 207 mthca_free_icm(dev, table->icm[i]);
207 table->icm[i] = NULL; 208 table->icm[i] = NULL;
208 } 209 }
@@ -336,7 +337,8 @@ err:
336 for (i = 0; i < num_icm; ++i) 337 for (i = 0; i < num_icm; ++i)
337 if (table->icm[i]) { 338 if (table->icm[i]) {
338 mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, 339 mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
339 MTHCA_TABLE_CHUNK_SIZE >> 12, &status); 340 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
341 &status);
340 mthca_free_icm(dev, table->icm[i]); 342 mthca_free_icm(dev, table->icm[i]);
341 } 343 }
342 344
@@ -353,7 +355,8 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
353 for (i = 0; i < table->num_icm; ++i) 355 for (i = 0; i < table->num_icm; ++i)
354 if (table->icm[i]) { 356 if (table->icm[i]) {
355 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, 357 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
356 MTHCA_TABLE_CHUNK_SIZE >> 12, &status); 358 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
359 &status);
357 mthca_free_icm(dev, table->icm[i]); 360 mthca_free_icm(dev, table->icm[i]);
358 } 361 }
359 362
@@ -364,7 +367,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag
364{ 367{
365 return dev->uar_table.uarc_base + 368 return dev->uar_table.uarc_base +
366 uar->index * dev->uar_table.uarc_size + 369 uar->index * dev->uar_table.uarc_size +
367 page * 4096; 370 page * MTHCA_ICM_PAGE_SIZE;
368} 371}
369 372
370int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, 373int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
@@ -401,7 +404,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
401 if (ret < 0) 404 if (ret < 0)
402 goto out; 405 goto out;
403 406
404 db_tab->page[i].mem.length = 4096; 407 db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE;
405 db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK; 408 db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
406 409
407 ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); 410 ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
@@ -455,7 +458,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
455 if (!mthca_is_memfree(dev)) 458 if (!mthca_is_memfree(dev))
456 return NULL; 459 return NULL;
457 460
458 npages = dev->uar_table.uarc_size / 4096; 461 npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
459 db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); 462 db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
460 if (!db_tab) 463 if (!db_tab)
461 return ERR_PTR(-ENOMEM); 464 return ERR_PTR(-ENOMEM);
@@ -478,7 +481,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
478 if (!mthca_is_memfree(dev)) 481 if (!mthca_is_memfree(dev))
479 return; 482 return;
480 483
481 for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) { 484 for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
482 if (db_tab->page[i].uvirt) { 485 if (db_tab->page[i].uvirt) {
483 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); 486 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
484 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); 487 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
@@ -551,20 +554,20 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
551 page = dev->db_tab->page + end; 554 page = dev->db_tab->page + end;
552 555
553alloc: 556alloc:
554 page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096, 557 page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
555 &page->mapping, GFP_KERNEL); 558 &page->mapping, GFP_KERNEL);
556 if (!page->db_rec) { 559 if (!page->db_rec) {
557 ret = -ENOMEM; 560 ret = -ENOMEM;
558 goto out; 561 goto out;
559 } 562 }
560 memset(page->db_rec, 0, 4096); 563 memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
561 564
562 ret = mthca_MAP_ICM_page(dev, page->mapping, 565 ret = mthca_MAP_ICM_page(dev, page->mapping,
563 mthca_uarc_virt(dev, &dev->driver_uar, i), &status); 566 mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
564 if (!ret && status) 567 if (!ret && status)
565 ret = -EINVAL; 568 ret = -EINVAL;
566 if (ret) { 569 if (ret) {
567 dma_free_coherent(&dev->pdev->dev, 4096, 570 dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
568 page->db_rec, page->mapping); 571 page->db_rec, page->mapping);
569 goto out; 572 goto out;
570 } 573 }
@@ -612,7 +615,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
612 i >= dev->db_tab->max_group1 - 1) { 615 i >= dev->db_tab->max_group1 - 1) {
613 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); 616 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
614 617
615 dma_free_coherent(&dev->pdev->dev, 4096, 618 dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
616 page->db_rec, page->mapping); 619 page->db_rec, page->mapping);
617 page->db_rec = NULL; 620 page->db_rec = NULL;
618 621
@@ -640,7 +643,7 @@ int mthca_init_db_tab(struct mthca_dev *dev)
640 643
641 mutex_init(&dev->db_tab->mutex); 644 mutex_init(&dev->db_tab->mutex);
642 645
643 dev->db_tab->npages = dev->uar_table.uarc_size / 4096; 646 dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
644 dev->db_tab->max_group1 = 0; 647 dev->db_tab->max_group1 = 0;
645 dev->db_tab->min_group2 = dev->db_tab->npages - 1; 648 dev->db_tab->min_group2 = dev->db_tab->npages - 1;
646 649
@@ -681,7 +684,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
681 684
682 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); 685 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
683 686
684 dma_free_coherent(&dev->pdev->dev, 4096, 687 dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
685 dev->db_tab->page[i].db_rec, 688 dev->db_tab->page[i].db_rec,
686 dev->db_tab->page[i].mapping); 689 dev->db_tab->page[i].mapping);
687 } 690 }
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 36f1141a08aa..6d42947e1dc4 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -45,6 +45,12 @@
45 ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ 45 ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
46 (sizeof (struct scatterlist))) 46 (sizeof (struct scatterlist)))
47 47
48enum {
49 MTHCA_ICM_PAGE_SHIFT = 12,
50 MTHCA_ICM_PAGE_SIZE = 1 << MTHCA_ICM_PAGE_SHIFT,
51 MTHCA_DB_REC_PER_PAGE = MTHCA_ICM_PAGE_SIZE / 8
52};
53
48struct mthca_icm_chunk { 54struct mthca_icm_chunk {
49 struct list_head list; 55 struct list_head list;
50 int npages; 56 int npages;
@@ -131,10 +137,6 @@ static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
131 return sg_dma_len(&iter->chunk->mem[iter->page_idx]); 137 return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
132} 138}
133 139
134enum {
135 MTHCA_DB_REC_PER_PAGE = 4096 / 8
136};
137
138struct mthca_db_page { 140struct mthca_db_page {
139 DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE); 141 DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
140 __be64 *db_rec; 142 __be64 *db_rec;
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index e995e2aa016d..25e1c1db9a40 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -76,6 +76,8 @@ struct mthca_mpt_entry {
76#define MTHCA_MPT_STATUS_SW 0xF0 76#define MTHCA_MPT_STATUS_SW 0xF0
77#define MTHCA_MPT_STATUS_HW 0x00 77#define MTHCA_MPT_STATUS_HW 0x00
78 78
79#define SINAI_FMR_KEY_INC 0x1000000
80
79/* 81/*
80 * Buddy allocator for MTT segments (currently not very efficient 82 * Buddy allocator for MTT segments (currently not very efficient
81 * since it doesn't keep a free list and just searches linearly 83 * since it doesn't keep a free list and just searches linearly
@@ -168,7 +170,7 @@ err_out:
168 return -ENOMEM; 170 return -ENOMEM;
169} 171}
170 172
171static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy) 173static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
172{ 174{
173 int i; 175 int i;
174 176
@@ -330,6 +332,14 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
330 return tavor_key_to_hw_index(key); 332 return tavor_key_to_hw_index(key);
331} 333}
332 334
335static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
336{
337 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
338 return ((key << 20) & 0x800000) | (key & 0x7fffff);
339 else
340 return key;
341}
342
333int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, 343int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
334 u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) 344 u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
335{ 345{
@@ -340,13 +350,12 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
340 int err; 350 int err;
341 u8 status; 351 u8 status;
342 352
343 might_sleep();
344
345 WARN_ON(buffer_size_shift >= 32); 353 WARN_ON(buffer_size_shift >= 32);
346 354
347 key = mthca_alloc(&dev->mr_table.mpt_alloc); 355 key = mthca_alloc(&dev->mr_table.mpt_alloc);
348 if (key == -1) 356 if (key == -1)
349 return -ENOMEM; 357 return -ENOMEM;
358 key = adjust_key(dev, key);
350 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); 359 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
351 360
352 if (mthca_is_memfree(dev)) { 361 if (mthca_is_memfree(dev)) {
@@ -467,8 +476,6 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
467 int err; 476 int err;
468 u8 status; 477 u8 status;
469 478
470 might_sleep();
471
472 err = mthca_HW2SW_MPT(dev, NULL, 479 err = mthca_HW2SW_MPT(dev, NULL,
473 key_to_hw_index(dev, mr->ibmr.lkey) & 480 key_to_hw_index(dev, mr->ibmr.lkey) &
474 (dev->limits.num_mpts - 1), 481 (dev->limits.num_mpts - 1),
@@ -495,9 +502,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
495 int err = -ENOMEM; 502 int err = -ENOMEM;
496 int i; 503 int i;
497 504
498 might_sleep(); 505 if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
499
500 if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
501 return -EINVAL; 506 return -EINVAL;
502 507
503 /* For Arbel, all MTTs must fit in the same page. */ 508 /* For Arbel, all MTTs must fit in the same page. */
@@ -510,6 +515,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
510 key = mthca_alloc(&dev->mr_table.mpt_alloc); 515 key = mthca_alloc(&dev->mr_table.mpt_alloc);
511 if (key == -1) 516 if (key == -1)
512 return -ENOMEM; 517 return -ENOMEM;
518 key = adjust_key(dev, key);
513 519
514 idx = key & (dev->limits.num_mpts - 1); 520 idx = key & (dev->limits.num_mpts - 1);
515 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); 521 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
@@ -523,7 +529,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
523 BUG_ON(!mr->mem.arbel.mpt); 529 BUG_ON(!mr->mem.arbel.mpt);
524 } else 530 } else
525 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + 531 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
526 sizeof *(mr->mem.tavor.mpt) * idx; 532 sizeof *(mr->mem.tavor.mpt) * idx;
527 533
528 mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); 534 mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
529 if (IS_ERR(mr->mtt)) 535 if (IS_ERR(mr->mtt))
@@ -549,7 +555,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
549 MTHCA_MPT_FLAG_REGION | 555 MTHCA_MPT_FLAG_REGION |
550 access); 556 access);
551 557
552 mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12); 558 mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
553 mpt_entry->key = cpu_to_be32(key); 559 mpt_entry->key = cpu_to_be32(key);
554 mpt_entry->pd = cpu_to_be32(pd); 560 mpt_entry->pd = cpu_to_be32(pd);
555 memset(&mpt_entry->start, 0, 561 memset(&mpt_entry->start, 0,
@@ -617,7 +623,7 @@ static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
617 if (list_len > fmr->attr.max_pages) 623 if (list_len > fmr->attr.max_pages)
618 return -EINVAL; 624 return -EINVAL;
619 625
620 page_mask = (1 << fmr->attr.page_size) - 1; 626 page_mask = (1 << fmr->attr.page_shift) - 1;
621 627
622 /* We are getting page lists, so va must be page aligned. */ 628 /* We are getting page lists, so va must be page aligned. */
623 if (iova & page_mask) 629 if (iova & page_mask)
@@ -665,7 +671,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
665 } 671 }
666 672
667 mpt_entry.lkey = cpu_to_be32(key); 673 mpt_entry.lkey = cpu_to_be32(key);
668 mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); 674 mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
669 mpt_entry.start = cpu_to_be64(iova); 675 mpt_entry.start = cpu_to_be64(iova);
670 676
671 __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); 677 __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
@@ -693,7 +699,10 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
693 ++fmr->maps; 699 ++fmr->maps;
694 700
695 key = arbel_key_to_hw_index(fmr->ibmr.lkey); 701 key = arbel_key_to_hw_index(fmr->ibmr.lkey);
696 key += dev->limits.num_mpts; 702 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
703 key += SINAI_FMR_KEY_INC;
704 else
705 key += dev->limits.num_mpts;
697 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); 706 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
698 707
699 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; 708 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
@@ -706,7 +715,7 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
706 715
707 fmr->mem.arbel.mpt->key = cpu_to_be32(key); 716 fmr->mem.arbel.mpt->key = cpu_to_be32(key);
708 fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); 717 fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
709 fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); 718 fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
710 fmr->mem.arbel.mpt->start = cpu_to_be64(iova); 719 fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
711 720
712 wmb(); 721 wmb();
@@ -766,6 +775,9 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
766 else 775 else
767 dev->mthca_flags |= MTHCA_FLAG_FMR; 776 dev->mthca_flags |= MTHCA_FLAG_FMR;
768 777
778 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
779 mthca_dbg(dev, "Memory key throughput optimization activated.\n");
780
769 err = mthca_buddy_init(&dev->mr_table.mtt_buddy, 781 err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
770 fls(dev->limits.num_mtt_segs - 1)); 782 fls(dev->limits.num_mtt_segs - 1));
771 783
@@ -785,7 +797,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
785 } 797 }
786 798
787 dev->mr_table.tavor_fmr.mpt_base = 799 dev->mr_table.tavor_fmr.mpt_base =
788 ioremap(dev->mr_table.mpt_base, 800 ioremap(dev->mr_table.mpt_base,
789 (1 << i) * sizeof (struct mthca_mpt_entry)); 801 (1 << i) * sizeof (struct mthca_mpt_entry));
790 802
791 if (!dev->mr_table.tavor_fmr.mpt_base) { 803 if (!dev->mr_table.tavor_fmr.mpt_base) {
@@ -813,7 +825,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
813 goto err_reserve_fmr; 825 goto err_reserve_fmr;
814 826
815 dev->mr_table.fmr_mtt_buddy = 827 dev->mr_table.fmr_mtt_buddy =
816 &dev->mr_table.tavor_fmr.mtt_buddy; 828 &dev->mr_table.tavor_fmr.mtt_buddy;
817 } else 829 } else
818 dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy; 830 dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
819 831
@@ -854,7 +866,7 @@ err_mtt_buddy:
854 return err; 866 return err;
855} 867}
856 868
857void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev) 869void mthca_cleanup_mr_table(struct mthca_dev *dev)
858{ 870{
859 /* XXX check if any MRs are still allocated? */ 871 /* XXX check if any MRs are still allocated? */
860 if (dev->limits.fmr_reserved_mtts) 872 if (dev->limits.fmr_reserved_mtts)
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index 3dbf06a6e6f4..59df51614c85 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -43,8 +43,6 @@ int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
43{ 43{
44 int err = 0; 44 int err = 0;
45 45
46 might_sleep();
47
48 pd->privileged = privileged; 46 pd->privileged = privileged;
49 47
50 atomic_set(&pd->sqp_count, 0); 48 atomic_set(&pd->sqp_count, 0);
@@ -66,7 +64,6 @@ int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
66 64
67void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) 65void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
68{ 66{
69 might_sleep();
70 if (pd->privileged) 67 if (pd->privileged)
71 mthca_free_mr(dev, &pd->ntmr); 68 mthca_free_mr(dev, &pd->ntmr);
72 mthca_free(&dev->pd_table.alloc, pd->pd_num); 69 mthca_free(&dev->pd_table.alloc, pd->pd_num);
@@ -80,7 +77,7 @@ int __devinit mthca_init_pd_table(struct mthca_dev *dev)
80 dev->limits.reserved_pds); 77 dev->limits.reserved_pds);
81} 78}
82 79
83void __devexit mthca_cleanup_pd_table(struct mthca_dev *dev) 80void mthca_cleanup_pd_table(struct mthca_dev *dev)
84{ 81{
85 /* XXX check if any PDs are still allocated? */ 82 /* XXX check if any PDs are still allocated? */
86 mthca_alloc_cleanup(&dev->pd_table.alloc); 83 mthca_alloc_cleanup(&dev->pd_table.alloc);
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 08a909371b0a..58d44aa3c302 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -152,7 +152,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
152 } 152 }
153 if (total_size > mem_avail) { 153 if (total_size > mem_avail) {
154 mthca_err(dev, "Profile requires 0x%llx bytes; " 154 mthca_err(dev, "Profile requires 0x%llx bytes; "
155 "won't in 0x%llx bytes of context memory.\n", 155 "won't fit in 0x%llx bytes of context memory.\n",
156 (unsigned long long) total_size, 156 (unsigned long long) total_size,
157 (unsigned long long) mem_avail); 157 (unsigned long long) mem_avail);
158 kfree(profile); 158 kfree(profile);
@@ -262,6 +262,14 @@ u64 mthca_make_profile(struct mthca_dev *dev,
262 */ 262 */
263 dev->limits.num_pds = MTHCA_NUM_PDS; 263 dev->limits.num_pds = MTHCA_NUM_PDS;
264 264
265 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
266 init_hca->log_mpt_sz > 23) {
267 mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
268 init_hca->log_mpt_sz);
269 mthca_warn(dev, "Disabling memory key throughput optimization.\n");
270 dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
271 }
272
265 /* 273 /*
266 * For Tavor, FMRs use ioremapped PCI memory. For 32 bit 274 * For Tavor, FMRs use ioremapped PCI memory. For 32 bit
267 * systems it may use too much vmalloc space to map all MTT 275 * systems it may use too much vmalloc space to map all MTT
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index e88e39aef85a..2c250bc11c33 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 * 7 *
@@ -108,12 +108,12 @@ static int mthca_query_device(struct ib_device *ibdev,
108 props->max_srq_wr = mdev->limits.max_srq_wqes; 108 props->max_srq_wr = mdev->limits.max_srq_wqes;
109 props->max_srq_sge = mdev->limits.max_sg; 109 props->max_srq_sge = mdev->limits.max_sg;
110 props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; 110 props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
111 props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? 111 props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
112 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 112 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
113 props->max_pkeys = mdev->limits.pkey_table_len; 113 props->max_pkeys = mdev->limits.pkey_table_len;
114 props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; 114 props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms;
115 props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; 115 props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
116 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 116 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
117 props->max_mcast_grp; 117 props->max_mcast_grp;
118 118
119 err = 0; 119 err = 0;
@@ -176,6 +176,23 @@ static int mthca_query_port(struct ib_device *ibdev,
176 return err; 176 return err;
177} 177}
178 178
179static int mthca_modify_device(struct ib_device *ibdev,
180 int mask,
181 struct ib_device_modify *props)
182{
183 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
184 return -EOPNOTSUPP;
185
186 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
187 if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
188 return -ERESTARTSYS;
189 memcpy(ibdev->node_desc, props->node_desc, 64);
190 mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
191 }
192
193 return 0;
194}
195
179static int mthca_modify_port(struct ib_device *ibdev, 196static int mthca_modify_port(struct ib_device *ibdev,
180 u8 port, int port_modify_mask, 197 u8 port, int port_modify_mask,
181 struct ib_port_modify *props) 198 struct ib_port_modify *props)
@@ -669,9 +686,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
669 } 686 }
670 687
671 if (context) { 688 if (context) {
672 cq->mr.ibmr.lkey = ucmd.lkey; 689 cq->buf.mr.ibmr.lkey = ucmd.lkey;
673 cq->set_ci_db_index = ucmd.set_db_index; 690 cq->set_ci_db_index = ucmd.set_db_index;
674 cq->arm_db_index = ucmd.arm_db_index; 691 cq->arm_db_index = ucmd.arm_db_index;
675 } 692 }
676 693
677 for (nent = 1; nent <= entries; nent <<= 1) 694 for (nent = 1; nent <= entries; nent <<= 1)
@@ -689,6 +706,8 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
689 goto err_free; 706 goto err_free;
690 } 707 }
691 708
709 cq->resize_buf = NULL;
710
692 return &cq->ibcq; 711 return &cq->ibcq;
693 712
694err_free: 713err_free:
@@ -707,6 +726,121 @@ err_unmap_set:
707 return ERR_PTR(err); 726 return ERR_PTR(err);
708} 727}
709 728
729static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
730 int entries)
731{
732 int ret;
733
734 spin_lock_irq(&cq->lock);
735 if (cq->resize_buf) {
736 ret = -EBUSY;
737 goto unlock;
738 }
739
740 cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
741 if (!cq->resize_buf) {
742 ret = -ENOMEM;
743 goto unlock;
744 }
745
746 cq->resize_buf->state = CQ_RESIZE_ALLOC;
747
748 ret = 0;
749
750unlock:
751 spin_unlock_irq(&cq->lock);
752
753 if (ret)
754 return ret;
755
756 ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
757 if (ret) {
758 spin_lock_irq(&cq->lock);
759 kfree(cq->resize_buf);
760 cq->resize_buf = NULL;
761 spin_unlock_irq(&cq->lock);
762 return ret;
763 }
764
765 cq->resize_buf->cqe = entries - 1;
766
767 spin_lock_irq(&cq->lock);
768 cq->resize_buf->state = CQ_RESIZE_READY;
769 spin_unlock_irq(&cq->lock);
770
771 return 0;
772}
773
774static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
775{
776 struct mthca_dev *dev = to_mdev(ibcq->device);
777 struct mthca_cq *cq = to_mcq(ibcq);
778 struct mthca_resize_cq ucmd;
779 u32 lkey;
780 u8 status;
781 int ret;
782
783 if (entries < 1 || entries > dev->limits.max_cqes)
784 return -EINVAL;
785
786 entries = roundup_pow_of_two(entries + 1);
787 if (entries == ibcq->cqe + 1)
788 return 0;
789
790 if (cq->is_kernel) {
791 ret = mthca_alloc_resize_buf(dev, cq, entries);
792 if (ret)
793 return ret;
794 lkey = cq->resize_buf->buf.mr.ibmr.lkey;
795 } else {
796 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
797 return -EFAULT;
798 lkey = ucmd.lkey;
799 }
800
801 ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status);
802 if (status)
803 ret = -EINVAL;
804
805 if (ret) {
806 if (cq->resize_buf) {
807 mthca_free_cq_buf(dev, &cq->resize_buf->buf,
808 cq->resize_buf->cqe);
809 kfree(cq->resize_buf);
810 spin_lock_irq(&cq->lock);
811 cq->resize_buf = NULL;
812 spin_unlock_irq(&cq->lock);
813 }
814 return ret;
815 }
816
817 if (cq->is_kernel) {
818 struct mthca_cq_buf tbuf;
819 int tcqe;
820
821 spin_lock_irq(&cq->lock);
822 if (cq->resize_buf->state == CQ_RESIZE_READY) {
823 mthca_cq_resize_copy_cqes(cq);
824 tbuf = cq->buf;
825 tcqe = cq->ibcq.cqe;
826 cq->buf = cq->resize_buf->buf;
827 cq->ibcq.cqe = cq->resize_buf->cqe;
828 } else {
829 tbuf = cq->resize_buf->buf;
830 tcqe = cq->resize_buf->cqe;
831 }
832
833 kfree(cq->resize_buf);
834 cq->resize_buf = NULL;
835 spin_unlock_irq(&cq->lock);
836
837 mthca_free_cq_buf(dev, &tbuf, tcqe);
838 } else
839 ibcq->cqe = entries - 1;
840
841 return 0;
842}
843
710static int mthca_destroy_cq(struct ib_cq *cq) 844static int mthca_destroy_cq(struct ib_cq *cq)
711{ 845{
712 if (cq->uobject) { 846 if (cq->uobject) {
@@ -1070,6 +1204,20 @@ static int mthca_init_node_data(struct mthca_dev *dev)
1070 goto out; 1204 goto out;
1071 1205
1072 init_query_mad(in_mad); 1206 init_query_mad(in_mad);
1207 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
1208
1209 err = mthca_MAD_IFC(dev, 1, 1,
1210 1, NULL, NULL, in_mad, out_mad,
1211 &status);
1212 if (err)
1213 goto out;
1214 if (status) {
1215 err = -EINVAL;
1216 goto out;
1217 }
1218
1219 memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
1220
1073 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 1221 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
1074 1222
1075 err = mthca_MAD_IFC(dev, 1, 1, 1223 err = mthca_MAD_IFC(dev, 1, 1,
@@ -1113,14 +1261,17 @@ int mthca_register_device(struct mthca_dev *dev)
1113 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 1261 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
1114 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 1262 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1115 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 1263 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
1264 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
1116 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 1265 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
1117 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 1266 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
1267 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
1118 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 1268 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
1119 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 1269 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
1120 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 1270 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
1121 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 1271 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
1122 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 1272 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
1123 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 1273 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
1274 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
1124 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); 1275 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
1125 dev->ib_dev.node_type = IB_NODE_CA; 1276 dev->ib_dev.node_type = IB_NODE_CA;
1126 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 1277 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
@@ -1128,6 +1279,7 @@ int mthca_register_device(struct mthca_dev *dev)
1128 dev->ib_dev.class_dev.dev = &dev->pdev->dev; 1279 dev->ib_dev.class_dev.dev = &dev->pdev->dev;
1129 dev->ib_dev.query_device = mthca_query_device; 1280 dev->ib_dev.query_device = mthca_query_device;
1130 dev->ib_dev.query_port = mthca_query_port; 1281 dev->ib_dev.query_port = mthca_query_port;
1282 dev->ib_dev.modify_device = mthca_modify_device;
1131 dev->ib_dev.modify_port = mthca_modify_port; 1283 dev->ib_dev.modify_port = mthca_modify_port;
1132 dev->ib_dev.query_pkey = mthca_query_pkey; 1284 dev->ib_dev.query_pkey = mthca_query_pkey;
1133 dev->ib_dev.query_gid = mthca_query_gid; 1285 dev->ib_dev.query_gid = mthca_query_gid;
@@ -1137,11 +1289,13 @@ int mthca_register_device(struct mthca_dev *dev)
1137 dev->ib_dev.alloc_pd = mthca_alloc_pd; 1289 dev->ib_dev.alloc_pd = mthca_alloc_pd;
1138 dev->ib_dev.dealloc_pd = mthca_dealloc_pd; 1290 dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
1139 dev->ib_dev.create_ah = mthca_ah_create; 1291 dev->ib_dev.create_ah = mthca_ah_create;
1292 dev->ib_dev.query_ah = mthca_ah_query;
1140 dev->ib_dev.destroy_ah = mthca_ah_destroy; 1293 dev->ib_dev.destroy_ah = mthca_ah_destroy;
1141 1294
1142 if (dev->mthca_flags & MTHCA_FLAG_SRQ) { 1295 if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
1143 dev->ib_dev.create_srq = mthca_create_srq; 1296 dev->ib_dev.create_srq = mthca_create_srq;
1144 dev->ib_dev.modify_srq = mthca_modify_srq; 1297 dev->ib_dev.modify_srq = mthca_modify_srq;
1298 dev->ib_dev.query_srq = mthca_query_srq;
1145 dev->ib_dev.destroy_srq = mthca_destroy_srq; 1299 dev->ib_dev.destroy_srq = mthca_destroy_srq;
1146 1300
1147 if (mthca_is_memfree(dev)) 1301 if (mthca_is_memfree(dev))
@@ -1152,8 +1306,10 @@ int mthca_register_device(struct mthca_dev *dev)
1152 1306
1153 dev->ib_dev.create_qp = mthca_create_qp; 1307 dev->ib_dev.create_qp = mthca_create_qp;
1154 dev->ib_dev.modify_qp = mthca_modify_qp; 1308 dev->ib_dev.modify_qp = mthca_modify_qp;
1309 dev->ib_dev.query_qp = mthca_query_qp;
1155 dev->ib_dev.destroy_qp = mthca_destroy_qp; 1310 dev->ib_dev.destroy_qp = mthca_destroy_qp;
1156 dev->ib_dev.create_cq = mthca_create_cq; 1311 dev->ib_dev.create_cq = mthca_create_cq;
1312 dev->ib_dev.resize_cq = mthca_resize_cq;
1157 dev->ib_dev.destroy_cq = mthca_destroy_cq; 1313 dev->ib_dev.destroy_cq = mthca_destroy_cq;
1158 dev->ib_dev.poll_cq = mthca_poll_cq; 1314 dev->ib_dev.poll_cq = mthca_poll_cq;
1159 dev->ib_dev.get_dma_mr = mthca_get_dma_mr; 1315 dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1e73947b4702..2e7f52136965 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -164,9 +164,11 @@ struct mthca_ah {
164 * - wait_event until ref count is zero 164 * - wait_event until ref count is zero
165 * 165 *
166 * It is the consumer's responsibilty to make sure that no QP 166 * It is the consumer's responsibilty to make sure that no QP
167 * operations (WQE posting or state modification) are pending when the 167 * operations (WQE posting or state modification) are pending when a
168 * QP is destroyed. Also, the consumer must make sure that calls to 168 * QP is destroyed. Also, the consumer must make sure that calls to
169 * qp_modify are serialized. 169 * qp_modify are serialized. Similarly, the consumer is responsible
170 * for ensuring that no CQ resize operations are pending when a CQ
171 * is destroyed.
170 * 172 *
171 * Possible optimizations (wait for profile data to see if/where we 173 * Possible optimizations (wait for profile data to see if/where we
172 * have locks bouncing between CPUs): 174 * have locks bouncing between CPUs):
@@ -176,25 +178,40 @@ struct mthca_ah {
176 * send queue and one for the receive queue) 178 * send queue and one for the receive queue)
177 */ 179 */
178 180
181struct mthca_cq_buf {
182 union mthca_buf queue;
183 struct mthca_mr mr;
184 int is_direct;
185};
186
187struct mthca_cq_resize {
188 struct mthca_cq_buf buf;
189 int cqe;
190 enum {
191 CQ_RESIZE_ALLOC,
192 CQ_RESIZE_READY,
193 CQ_RESIZE_SWAPPED
194 } state;
195};
196
179struct mthca_cq { 197struct mthca_cq {
180 struct ib_cq ibcq; 198 struct ib_cq ibcq;
181 spinlock_t lock; 199 spinlock_t lock;
182 atomic_t refcount; 200 atomic_t refcount;
183 int cqn; 201 int cqn;
184 u32 cons_index; 202 u32 cons_index;
185 int is_direct; 203 struct mthca_cq_buf buf;
186 int is_kernel; 204 struct mthca_cq_resize *resize_buf;
205 int is_kernel;
187 206
188 /* Next fields are Arbel only */ 207 /* Next fields are Arbel only */
189 int set_ci_db_index; 208 int set_ci_db_index;
190 __be32 *set_ci_db; 209 __be32 *set_ci_db;
191 int arm_db_index; 210 int arm_db_index;
192 __be32 *arm_db; 211 __be32 *arm_db;
193 int arm_sn; 212 int arm_sn;
194 213
195 union mthca_buf queue; 214 wait_queue_head_t wait;
196 struct mthca_mr mr;
197 wait_queue_head_t wait;
198}; 215};
199 216
200struct mthca_srq { 217struct mthca_srq {
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index fba608ed7df2..057c8e6af87b 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -2,7 +2,7 @@
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
6 * 6 *
7 * This software is available to you under a choice of one of two 7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU 8 * licenses. You may choose to be licensed under the terms of the GNU
@@ -286,207 +286,6 @@ static int to_mthca_st(int transport)
286 } 286 }
287} 287}
288 288
289static const struct {
290 int trans;
291 u32 req_param[NUM_TRANS];
292 u32 opt_param[NUM_TRANS];
293} state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
294 [IB_QPS_RESET] = {
295 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
296 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
297 [IB_QPS_INIT] = {
298 .trans = MTHCA_TRANS_RST2INIT,
299 .req_param = {
300 [UD] = (IB_QP_PKEY_INDEX |
301 IB_QP_PORT |
302 IB_QP_QKEY),
303 [UC] = (IB_QP_PKEY_INDEX |
304 IB_QP_PORT |
305 IB_QP_ACCESS_FLAGS),
306 [RC] = (IB_QP_PKEY_INDEX |
307 IB_QP_PORT |
308 IB_QP_ACCESS_FLAGS),
309 [MLX] = (IB_QP_PKEY_INDEX |
310 IB_QP_QKEY),
311 },
312 /* bug-for-bug compatibility with VAPI: */
313 .opt_param = {
314 [MLX] = IB_QP_PORT
315 }
316 },
317 },
318 [IB_QPS_INIT] = {
319 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
320 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
321 [IB_QPS_INIT] = {
322 .trans = MTHCA_TRANS_INIT2INIT,
323 .opt_param = {
324 [UD] = (IB_QP_PKEY_INDEX |
325 IB_QP_PORT |
326 IB_QP_QKEY),
327 [UC] = (IB_QP_PKEY_INDEX |
328 IB_QP_PORT |
329 IB_QP_ACCESS_FLAGS),
330 [RC] = (IB_QP_PKEY_INDEX |
331 IB_QP_PORT |
332 IB_QP_ACCESS_FLAGS),
333 [MLX] = (IB_QP_PKEY_INDEX |
334 IB_QP_QKEY),
335 }
336 },
337 [IB_QPS_RTR] = {
338 .trans = MTHCA_TRANS_INIT2RTR,
339 .req_param = {
340 [UC] = (IB_QP_AV |
341 IB_QP_PATH_MTU |
342 IB_QP_DEST_QPN |
343 IB_QP_RQ_PSN),
344 [RC] = (IB_QP_AV |
345 IB_QP_PATH_MTU |
346 IB_QP_DEST_QPN |
347 IB_QP_RQ_PSN |
348 IB_QP_MAX_DEST_RD_ATOMIC |
349 IB_QP_MIN_RNR_TIMER),
350 },
351 .opt_param = {
352 [UD] = (IB_QP_PKEY_INDEX |
353 IB_QP_QKEY),
354 [UC] = (IB_QP_ALT_PATH |
355 IB_QP_ACCESS_FLAGS |
356 IB_QP_PKEY_INDEX),
357 [RC] = (IB_QP_ALT_PATH |
358 IB_QP_ACCESS_FLAGS |
359 IB_QP_PKEY_INDEX),
360 [MLX] = (IB_QP_PKEY_INDEX |
361 IB_QP_QKEY),
362 }
363 }
364 },
365 [IB_QPS_RTR] = {
366 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
367 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
368 [IB_QPS_RTS] = {
369 .trans = MTHCA_TRANS_RTR2RTS,
370 .req_param = {
371 [UD] = IB_QP_SQ_PSN,
372 [UC] = IB_QP_SQ_PSN,
373 [RC] = (IB_QP_TIMEOUT |
374 IB_QP_RETRY_CNT |
375 IB_QP_RNR_RETRY |
376 IB_QP_SQ_PSN |
377 IB_QP_MAX_QP_RD_ATOMIC),
378 [MLX] = IB_QP_SQ_PSN,
379 },
380 .opt_param = {
381 [UD] = (IB_QP_CUR_STATE |
382 IB_QP_QKEY),
383 [UC] = (IB_QP_CUR_STATE |
384 IB_QP_ALT_PATH |
385 IB_QP_ACCESS_FLAGS |
386 IB_QP_PATH_MIG_STATE),
387 [RC] = (IB_QP_CUR_STATE |
388 IB_QP_ALT_PATH |
389 IB_QP_ACCESS_FLAGS |
390 IB_QP_MIN_RNR_TIMER |
391 IB_QP_PATH_MIG_STATE),
392 [MLX] = (IB_QP_CUR_STATE |
393 IB_QP_QKEY),
394 }
395 }
396 },
397 [IB_QPS_RTS] = {
398 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
399 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
400 [IB_QPS_RTS] = {
401 .trans = MTHCA_TRANS_RTS2RTS,
402 .opt_param = {
403 [UD] = (IB_QP_CUR_STATE |
404 IB_QP_QKEY),
405 [UC] = (IB_QP_ACCESS_FLAGS |
406 IB_QP_ALT_PATH |
407 IB_QP_PATH_MIG_STATE),
408 [RC] = (IB_QP_ACCESS_FLAGS |
409 IB_QP_ALT_PATH |
410 IB_QP_PATH_MIG_STATE |
411 IB_QP_MIN_RNR_TIMER),
412 [MLX] = (IB_QP_CUR_STATE |
413 IB_QP_QKEY),
414 }
415 },
416 [IB_QPS_SQD] = {
417 .trans = MTHCA_TRANS_RTS2SQD,
418 },
419 },
420 [IB_QPS_SQD] = {
421 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
422 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
423 [IB_QPS_RTS] = {
424 .trans = MTHCA_TRANS_SQD2RTS,
425 .opt_param = {
426 [UD] = (IB_QP_CUR_STATE |
427 IB_QP_QKEY),
428 [UC] = (IB_QP_CUR_STATE |
429 IB_QP_ALT_PATH |
430 IB_QP_ACCESS_FLAGS |
431 IB_QP_PATH_MIG_STATE),
432 [RC] = (IB_QP_CUR_STATE |
433 IB_QP_ALT_PATH |
434 IB_QP_ACCESS_FLAGS |
435 IB_QP_MIN_RNR_TIMER |
436 IB_QP_PATH_MIG_STATE),
437 [MLX] = (IB_QP_CUR_STATE |
438 IB_QP_QKEY),
439 }
440 },
441 [IB_QPS_SQD] = {
442 .trans = MTHCA_TRANS_SQD2SQD,
443 .opt_param = {
444 [UD] = (IB_QP_PKEY_INDEX |
445 IB_QP_QKEY),
446 [UC] = (IB_QP_AV |
447 IB_QP_CUR_STATE |
448 IB_QP_ALT_PATH |
449 IB_QP_ACCESS_FLAGS |
450 IB_QP_PKEY_INDEX |
451 IB_QP_PATH_MIG_STATE),
452 [RC] = (IB_QP_AV |
453 IB_QP_TIMEOUT |
454 IB_QP_RETRY_CNT |
455 IB_QP_RNR_RETRY |
456 IB_QP_MAX_QP_RD_ATOMIC |
457 IB_QP_MAX_DEST_RD_ATOMIC |
458 IB_QP_CUR_STATE |
459 IB_QP_ALT_PATH |
460 IB_QP_ACCESS_FLAGS |
461 IB_QP_PKEY_INDEX |
462 IB_QP_MIN_RNR_TIMER |
463 IB_QP_PATH_MIG_STATE),
464 [MLX] = (IB_QP_PKEY_INDEX |
465 IB_QP_QKEY),
466 }
467 }
468 },
469 [IB_QPS_SQE] = {
470 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
471 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
472 [IB_QPS_RTS] = {
473 .trans = MTHCA_TRANS_SQERR2RTS,
474 .opt_param = {
475 [UD] = (IB_QP_CUR_STATE |
476 IB_QP_QKEY),
477 [UC] = (IB_QP_CUR_STATE |
478 IB_QP_ACCESS_FLAGS),
479 [MLX] = (IB_QP_CUR_STATE |
480 IB_QP_QKEY),
481 }
482 }
483 },
484 [IB_QPS_ERR] = {
485 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
486 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }
487 }
488};
489
490static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, 289static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
491 int attr_mask) 290 int attr_mask)
492{ 291{
@@ -549,23 +348,167 @@ static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
549 return cpu_to_be32(hw_access_flags); 348 return cpu_to_be32(hw_access_flags);
550} 349}
551 350
552static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path) 351static inline enum ib_qp_state to_ib_qp_state(int mthca_state)
352{
353 switch (mthca_state) {
354 case MTHCA_QP_STATE_RST: return IB_QPS_RESET;
355 case MTHCA_QP_STATE_INIT: return IB_QPS_INIT;
356 case MTHCA_QP_STATE_RTR: return IB_QPS_RTR;
357 case MTHCA_QP_STATE_RTS: return IB_QPS_RTS;
358 case MTHCA_QP_STATE_DRAINING:
359 case MTHCA_QP_STATE_SQD: return IB_QPS_SQD;
360 case MTHCA_QP_STATE_SQE: return IB_QPS_SQE;
361 case MTHCA_QP_STATE_ERR: return IB_QPS_ERR;
362 default: return -1;
363 }
364}
365
366static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state)
367{
368 switch (mthca_mig_state) {
369 case 0: return IB_MIG_ARMED;
370 case 1: return IB_MIG_REARM;
371 case 3: return IB_MIG_MIGRATED;
372 default: return -1;
373 }
374}
375
376static int to_ib_qp_access_flags(int mthca_flags)
377{
378 int ib_flags = 0;
379
380 if (mthca_flags & MTHCA_QP_BIT_RRE)
381 ib_flags |= IB_ACCESS_REMOTE_READ;
382 if (mthca_flags & MTHCA_QP_BIT_RWE)
383 ib_flags |= IB_ACCESS_REMOTE_WRITE;
384 if (mthca_flags & MTHCA_QP_BIT_RAE)
385 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
386
387 return ib_flags;
388}
389
390static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
391 struct mthca_qp_path *path)
392{
393 memset(ib_ah_attr, 0, sizeof *path);
394 ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
395 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
396 ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
397 ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
398 ib_ah_attr->static_rate = path->static_rate & 0x7;
399 ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
400 if (ib_ah_attr->ah_flags) {
401 ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
402 ib_ah_attr->grh.hop_limit = path->hop_limit;
403 ib_ah_attr->grh.traffic_class =
404 (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
405 ib_ah_attr->grh.flow_label =
406 be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
407 memcpy(ib_ah_attr->grh.dgid.raw,
408 path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
409 }
410}
411
412int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
413 struct ib_qp_init_attr *qp_init_attr)
414{
415 struct mthca_dev *dev = to_mdev(ibqp->device);
416 struct mthca_qp *qp = to_mqp(ibqp);
417 int err;
418 struct mthca_mailbox *mailbox;
419 struct mthca_qp_param *qp_param;
420 struct mthca_qp_context *context;
421 int mthca_state;
422 u8 status;
423
424 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
425 if (IS_ERR(mailbox))
426 return PTR_ERR(mailbox);
427
428 err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);
429 if (err)
430 goto out;
431 if (status) {
432 mthca_warn(dev, "QUERY_QP returned status %02x\n", status);
433 err = -EINVAL;
434 goto out;
435 }
436
437 qp_param = mailbox->buf;
438 context = &qp_param->context;
439 mthca_state = be32_to_cpu(context->flags) >> 28;
440
441 qp_attr->qp_state = to_ib_qp_state(mthca_state);
442 qp_attr->cur_qp_state = qp_attr->qp_state;
443 qp_attr->path_mtu = context->mtu_msgmax >> 5;
444 qp_attr->path_mig_state =
445 to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
446 qp_attr->qkey = be32_to_cpu(context->qkey);
447 qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
448 qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
449 qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff;
450 qp_attr->qp_access_flags =
451 to_ib_qp_access_flags(be32_to_cpu(context->params2));
452 qp_attr->cap.max_send_wr = qp->sq.max;
453 qp_attr->cap.max_recv_wr = qp->rq.max;
454 qp_attr->cap.max_send_sge = qp->sq.max_gs;
455 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
456 qp_attr->cap.max_inline_data = qp->max_inline_data;
457
458 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
459 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
460
461 qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
462 qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
463
464 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
465 qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
466
467 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
468
469 qp_attr->max_dest_rd_atomic =
470 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
471 qp_attr->min_rnr_timer =
472 (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
473 qp_attr->port_num = qp_attr->ah_attr.port_num;
474 qp_attr->timeout = context->pri_path.ackto >> 3;
475 qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
476 qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5;
477 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
478 qp_attr->alt_timeout = context->alt_path.ackto >> 3;
479 qp_init_attr->cap = qp_attr->cap;
480
481out:
482 mthca_free_mailbox(dev, mailbox);
483 return err;
484}
485
486static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
487 struct mthca_qp_path *path)
553{ 488{
554 path->g_mylmc = ah->src_path_bits & 0x7f; 489 path->g_mylmc = ah->src_path_bits & 0x7f;
555 path->rlid = cpu_to_be16(ah->dlid); 490 path->rlid = cpu_to_be16(ah->dlid);
556 path->static_rate = !!ah->static_rate; 491 path->static_rate = !!ah->static_rate;
557 492
558 if (ah->ah_flags & IB_AH_GRH) { 493 if (ah->ah_flags & IB_AH_GRH) {
494 if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
495 mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
496 ah->grh.sgid_index, dev->limits.gid_table_len-1);
497 return -1;
498 }
499
559 path->g_mylmc |= 1 << 7; 500 path->g_mylmc |= 1 << 7;
560 path->mgid_index = ah->grh.sgid_index; 501 path->mgid_index = ah->grh.sgid_index;
561 path->hop_limit = ah->grh.hop_limit; 502 path->hop_limit = ah->grh.hop_limit;
562 path->sl_tclass_flowlabel = 503 path->sl_tclass_flowlabel =
563 cpu_to_be32((ah->sl << 28) | 504 cpu_to_be32((ah->sl << 28) |
564 (ah->grh.traffic_class << 20) | 505 (ah->grh.traffic_class << 20) |
565 (ah->grh.flow_label)); 506 (ah->grh.flow_label));
566 memcpy(path->rgid, ah->grh.dgid.raw, 16); 507 memcpy(path->rgid, ah->grh.dgid.raw, 16);
567 } else 508 } else
568 path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28); 509 path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
510
511 return 0;
569} 512}
570 513
571int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) 514int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
@@ -576,18 +519,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
576 struct mthca_mailbox *mailbox; 519 struct mthca_mailbox *mailbox;
577 struct mthca_qp_param *qp_param; 520 struct mthca_qp_param *qp_param;
578 struct mthca_qp_context *qp_context; 521 struct mthca_qp_context *qp_context;
579 u32 req_param, opt_param; 522 u32 sqd_event = 0;
580 u8 status; 523 u8 status;
581 int err; 524 int err;
582 525
583 if (attr_mask & IB_QP_CUR_STATE) { 526 if (attr_mask & IB_QP_CUR_STATE) {
584 if (attr->cur_qp_state != IB_QPS_RTR && 527 cur_state = attr->cur_qp_state;
585 attr->cur_qp_state != IB_QPS_RTS &&
586 attr->cur_qp_state != IB_QPS_SQD &&
587 attr->cur_qp_state != IB_QPS_SQE)
588 return -EINVAL;
589 else
590 cur_state = attr->cur_qp_state;
591 } else { 528 } else {
592 spin_lock_irq(&qp->sq.lock); 529 spin_lock_irq(&qp->sq.lock);
593 spin_lock(&qp->rq.lock); 530 spin_lock(&qp->rq.lock);
@@ -596,44 +533,20 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
596 spin_unlock_irq(&qp->sq.lock); 533 spin_unlock_irq(&qp->sq.lock);
597 } 534 }
598 535
599 if (attr_mask & IB_QP_STATE) { 536 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
600 if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
601 return -EINVAL;
602 new_state = attr->qp_state;
603 } else
604 new_state = cur_state;
605
606 if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
607 mthca_dbg(dev, "Illegal QP transition "
608 "%d->%d\n", cur_state, new_state);
609 return -EINVAL;
610 }
611
612 req_param = state_table[cur_state][new_state].req_param[qp->transport];
613 opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
614
615 if ((req_param & attr_mask) != req_param) {
616 mthca_dbg(dev, "QP transition "
617 "%d->%d missing req attr 0x%08x\n",
618 cur_state, new_state,
619 req_param & ~attr_mask);
620 return -EINVAL;
621 }
622 537
623 if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) { 538 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
624 mthca_dbg(dev, "QP transition (transport %d) " 539 mthca_dbg(dev, "Bad QP transition (transport %d) "
625 "%d->%d has extra attr 0x%08x\n", 540 "%d->%d with attr 0x%08x\n",
626 qp->transport, 541 qp->transport, cur_state, new_state,
627 cur_state, new_state, 542 attr_mask);
628 attr_mask & ~(req_param | opt_param |
629 IB_QP_STATE));
630 return -EINVAL; 543 return -EINVAL;
631 } 544 }
632 545
633 if ((attr_mask & IB_QP_PKEY_INDEX) && 546 if ((attr_mask & IB_QP_PKEY_INDEX) &&
634 attr->pkey_index >= dev->limits.pkey_table_len) { 547 attr->pkey_index >= dev->limits.pkey_table_len) {
635 mthca_dbg(dev, "PKey index (%u) too large. max is %d\n", 548 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
636 attr->pkey_index,dev->limits.pkey_table_len-1); 549 attr->pkey_index, dev->limits.pkey_table_len-1);
637 return -EINVAL; 550 return -EINVAL;
638 } 551 }
639 552
@@ -688,8 +601,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
688 601
689 if (qp->transport == MLX || qp->transport == UD) 602 if (qp->transport == MLX || qp->transport == UD)
690 qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11; 603 qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
691 else if (attr_mask & IB_QP_PATH_MTU) 604 else if (attr_mask & IB_QP_PATH_MTU) {
605 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {
606 mthca_dbg(dev, "path MTU (%u) is invalid\n",
607 attr->path_mtu);
608 return -EINVAL;
609 }
692 qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31; 610 qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
611 }
693 612
694 if (mthca_is_memfree(dev)) { 613 if (mthca_is_memfree(dev)) {
695 if (qp->rq.max) 614 if (qp->rq.max)
@@ -733,12 +652,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
733 if (attr_mask & IB_QP_RNR_RETRY) { 652 if (attr_mask & IB_QP_RNR_RETRY) {
734 qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry = 653 qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
735 attr->rnr_retry << 5; 654 attr->rnr_retry << 5;
736 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | 655 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
737 MTHCA_QP_OPTPAR_ALT_RNR_RETRY); 656 MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
738 } 657 }
739 658
740 if (attr_mask & IB_QP_AV) { 659 if (attr_mask & IB_QP_AV) {
741 mthca_path_set(&attr->ah_attr, &qp_context->pri_path); 660 if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path))
661 return -EINVAL;
662
742 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); 663 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
743 } 664 }
744 665
@@ -748,14 +669,22 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
748 } 669 }
749 670
750 if (attr_mask & IB_QP_ALT_PATH) { 671 if (attr_mask & IB_QP_ALT_PATH) {
672 if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
673 mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
674 attr->alt_pkey_index, dev->limits.pkey_table_len-1);
675 return -EINVAL;
676 }
677
751 if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) { 678 if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
752 mthca_dbg(dev, "Alternate port number (%u) is invalid\n", 679 mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
753 attr->alt_port_num); 680 attr->alt_port_num);
754 return -EINVAL; 681 return -EINVAL;
755 } 682 }
756 683
757 mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path); 684 if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path))
758 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | 685 return -EINVAL;
686
687 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
759 attr->alt_port_num << 24); 688 attr->alt_port_num << 24);
760 qp_context->alt_path.ackto = attr->alt_timeout << 3; 689 qp_context->alt_path.ackto = attr->alt_timeout << 3;
761 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH); 690 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
@@ -841,23 +770,27 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
841 qp_context->srqn = cpu_to_be32(1 << 24 | 770 qp_context->srqn = cpu_to_be32(1 << 24 |
842 to_msrq(ibqp->srq)->srqn); 771 to_msrq(ibqp->srq)->srqn);
843 772
844 err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, 773 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
845 qp->qpn, 0, mailbox, 0, &status); 774 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY &&
775 attr->en_sqd_async_notify)
776 sqd_event = 1 << 31;
777
778 err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
779 mailbox, sqd_event, &status);
780 if (err)
781 goto out;
846 if (status) { 782 if (status) {
847 mthca_warn(dev, "modify QP %d returned status %02x.\n", 783 mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",
848 state_table[cur_state][new_state].trans, status); 784 cur_state, new_state, status);
849 err = -EINVAL; 785 err = -EINVAL;
786 goto out;
850 } 787 }
851 788
852 if (!err) { 789 qp->state = new_state;
853 qp->state = new_state; 790 if (attr_mask & IB_QP_ACCESS_FLAGS)
854 if (attr_mask & IB_QP_ACCESS_FLAGS) 791 qp->atomic_rd_en = attr->qp_access_flags;
855 qp->atomic_rd_en = attr->qp_access_flags; 792 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
856 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 793 qp->resp_depth = attr->max_dest_rd_atomic;
857 qp->resp_depth = attr->max_dest_rd_atomic;
858 }
859
860 mthca_free_mailbox(dev, mailbox);
861 794
862 if (is_sqp(dev, qp)) 795 if (is_sqp(dev, qp))
863 store_attrs(to_msqp(qp), attr, attr_mask); 796 store_attrs(to_msqp(qp), attr, attr_mask);
@@ -882,7 +815,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
882 * If we moved a kernel QP to RESET, clean up all old CQ 815 * If we moved a kernel QP to RESET, clean up all old CQ
883 * entries and reinitialize the QP. 816 * entries and reinitialize the QP.
884 */ 817 */
885 if (!err && new_state == IB_QPS_RESET && !qp->ibqp.uobject) { 818 if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
886 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, 819 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
887 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 820 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
888 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 821 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
@@ -901,6 +834,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
901 } 834 }
902 } 835 }
903 836
837out:
838 mthca_free_mailbox(dev, mailbox);
904 return err; 839 return err;
905} 840}
906 841
@@ -1078,10 +1013,10 @@ static int mthca_map_memfree(struct mthca_dev *dev,
1078 if (ret) 1013 if (ret)
1079 goto err_qpc; 1014 goto err_qpc;
1080 1015
1081 ret = mthca_table_get(dev, dev->qp_table.rdb_table, 1016 ret = mthca_table_get(dev, dev->qp_table.rdb_table,
1082 qp->qpn << dev->qp_table.rdb_shift); 1017 qp->qpn << dev->qp_table.rdb_shift);
1083 if (ret) 1018 if (ret)
1084 goto err_eqpc; 1019 goto err_eqpc;
1085 1020
1086 } 1021 }
1087 1022
@@ -1262,10 +1197,6 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1262{ 1197{
1263 int err; 1198 int err;
1264 1199
1265 err = mthca_set_qp_size(dev, cap, pd, qp);
1266 if (err)
1267 return err;
1268
1269 switch (type) { 1200 switch (type) {
1270 case IB_QPT_RC: qp->transport = RC; break; 1201 case IB_QPT_RC: qp->transport = RC; break;
1271 case IB_QPT_UC: qp->transport = UC; break; 1202 case IB_QPT_UC: qp->transport = UC; break;
@@ -1273,6 +1204,10 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1273 default: return -EINVAL; 1204 default: return -EINVAL;
1274 } 1205 }
1275 1206
1207 err = mthca_set_qp_size(dev, cap, pd, qp);
1208 if (err)
1209 return err;
1210
1276 qp->qpn = mthca_alloc(&dev->qp_table.alloc); 1211 qp->qpn = mthca_alloc(&dev->qp_table.alloc);
1277 if (qp->qpn == -1) 1212 if (qp->qpn == -1)
1278 return -ENOMEM; 1213 return -ENOMEM;
@@ -1305,6 +1240,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
1305 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; 1240 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1306 int err; 1241 int err;
1307 1242
1243 sqp->qp.transport = MLX;
1308 err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); 1244 err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
1309 if (err) 1245 if (err)
1310 return err; 1246 return err;
@@ -1393,7 +1329,8 @@ void mthca_free_qp(struct mthca_dev *dev,
1393 wait_event(qp->wait, !atomic_read(&qp->refcount)); 1329 wait_event(qp->wait, !atomic_read(&qp->refcount));
1394 1330
1395 if (qp->state != IB_QPS_RESET) 1331 if (qp->state != IB_QPS_RESET)
1396 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); 1332 mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
1333 NULL, 0, &status);
1397 1334
1398 /* 1335 /*
1399 * If this is a userspace QP, the buffers, MR, CQs and so on 1336 * If this is a userspace QP, the buffers, MR, CQs and so on
@@ -1699,7 +1636,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1699 mthca_opcode[wr->opcode]); 1636 mthca_opcode[wr->opcode]);
1700 wmb(); 1637 wmb();
1701 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 1638 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1702 cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size); 1639 cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
1640 ((wr->send_flags & IB_SEND_FENCE) ?
1641 MTHCA_NEXT_FENCE : 0));
1703 1642
1704 if (!size0) { 1643 if (!size0) {
1705 size0 = size; 1644 size0 = size;
@@ -2061,7 +2000,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2061 mthca_opcode[wr->opcode]); 2000 mthca_opcode[wr->opcode]);
2062 wmb(); 2001 wmb();
2063 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 2002 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
2064 cpu_to_be32(MTHCA_NEXT_DBD | size); 2003 cpu_to_be32(MTHCA_NEXT_DBD | size |
2004 ((wr->send_flags & IB_SEND_FENCE) ?
2005 MTHCA_NEXT_FENCE : 0));
2065 2006
2066 if (!size0) { 2007 if (!size0) {
2067 size0 = size; 2008 size0 = size;
@@ -2115,7 +2056,7 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2115 int i; 2056 int i;
2116 void *wqe; 2057 void *wqe;
2117 2058
2118 spin_lock_irqsave(&qp->rq.lock, flags); 2059 spin_lock_irqsave(&qp->rq.lock, flags);
2119 2060
2120 /* XXX check that state is OK to post receive */ 2061 /* XXX check that state is OK to post receive */
2121 2062
@@ -2182,8 +2123,8 @@ out:
2182 return err; 2123 return err;
2183} 2124}
2184 2125
2185int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, 2126void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
2186 int index, int *dbd, __be32 *new_wqe) 2127 int index, int *dbd, __be32 *new_wqe)
2187{ 2128{
2188 struct mthca_next_seg *next; 2129 struct mthca_next_seg *next;
2189 2130
@@ -2193,7 +2134,7 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
2193 */ 2134 */
2194 if (qp->ibqp.srq) { 2135 if (qp->ibqp.srq) {
2195 *new_wqe = 0; 2136 *new_wqe = 0;
2196 return 0; 2137 return;
2197 } 2138 }
2198 2139
2199 if (is_send) 2140 if (is_send)
@@ -2207,8 +2148,6 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
2207 (next->ee_nds & cpu_to_be32(0x3f)); 2148 (next->ee_nds & cpu_to_be32(0x3f));
2208 else 2149 else
2209 *new_wqe = 0; 2150 *new_wqe = 0;
2210
2211 return 0;
2212} 2151}
2213 2152
2214int __devinit mthca_init_qp_table(struct mthca_dev *dev) 2153int __devinit mthca_init_qp_table(struct mthca_dev *dev)
@@ -2265,7 +2204,7 @@ int __devinit mthca_init_qp_table(struct mthca_dev *dev)
2265 return err; 2204 return err;
2266} 2205}
2267 2206
2268void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev) 2207void mthca_cleanup_qp_table(struct mthca_dev *dev)
2269{ 2208{
2270 int i; 2209 int i;
2271 u8 status; 2210 u8 status;
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index e7e153d9c4c6..2dd3aea05341 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -49,7 +49,8 @@ struct mthca_tavor_srq_context {
49 __be32 state_pd; 49 __be32 state_pd;
50 __be32 lkey; 50 __be32 lkey;
51 __be32 uar; 51 __be32 uar;
52 __be32 wqe_cnt; 52 __be16 limit_watermark;
53 __be16 wqe_cnt;
53 u32 reserved[2]; 54 u32 reserved[2];
54}; 55};
55 56
@@ -204,6 +205,10 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
204 ds = max(64UL, 205 ds = max(64UL,
205 roundup_pow_of_two(sizeof (struct mthca_next_seg) + 206 roundup_pow_of_two(sizeof (struct mthca_next_seg) +
206 srq->max_gs * sizeof (struct mthca_data_seg))); 207 srq->max_gs * sizeof (struct mthca_data_seg)));
208
209 if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz))
210 return -EINVAL;
211
207 srq->wqe_shift = long_log2(ds); 212 srq->wqe_shift = long_log2(ds);
208 213
209 srq->srqn = mthca_alloc(&dev->srq_table.alloc); 214 srq->srqn = mthca_alloc(&dev->srq_table.alloc);
@@ -271,6 +276,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
271 srq->first_free = 0; 276 srq->first_free = 0;
272 srq->last_free = srq->max - 1; 277 srq->last_free = srq->max - 1;
273 278
279 attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max;
280 attr->max_sge = srq->max_gs;
281
274 return 0; 282 return 0;
275 283
276err_out_free_srq: 284err_out_free_srq:
@@ -339,7 +347,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
339 347
340int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 348int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
341 enum ib_srq_attr_mask attr_mask) 349 enum ib_srq_attr_mask attr_mask)
342{ 350{
343 struct mthca_dev *dev = to_mdev(ibsrq->device); 351 struct mthca_dev *dev = to_mdev(ibsrq->device);
344 struct mthca_srq *srq = to_msrq(ibsrq); 352 struct mthca_srq *srq = to_msrq(ibsrq);
345 int ret; 353 int ret;
@@ -350,6 +358,8 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
350 return -EINVAL; 358 return -EINVAL;
351 359
352 if (attr_mask & IB_SRQ_LIMIT) { 360 if (attr_mask & IB_SRQ_LIMIT) {
361 if (attr->srq_limit > srq->max)
362 return -EINVAL;
353 ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); 363 ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
354 if (ret) 364 if (ret)
355 return ret; 365 return ret;
@@ -360,6 +370,41 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
360 return 0; 370 return 0;
361} 371}
362 372
373int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
374{
375 struct mthca_dev *dev = to_mdev(ibsrq->device);
376 struct mthca_srq *srq = to_msrq(ibsrq);
377 struct mthca_mailbox *mailbox;
378 struct mthca_arbel_srq_context *arbel_ctx;
379 struct mthca_tavor_srq_context *tavor_ctx;
380 u8 status;
381 int err;
382
383 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
384 if (IS_ERR(mailbox))
385 return PTR_ERR(mailbox);
386
387 err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status);
388 if (err)
389 goto out;
390
391 if (mthca_is_memfree(dev)) {
392 arbel_ctx = mailbox->buf;
393 srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark);
394 } else {
395 tavor_ctx = mailbox->buf;
396 srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
397 }
398
399 srq_attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max;
400 srq_attr->max_sge = srq->max_gs;
401
402out:
403 mthca_free_mailbox(dev, mailbox);
404
405 return err;
406}
407
363void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 408void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
364 enum ib_event_type event_type) 409 enum ib_event_type event_type)
365{ 410{
@@ -639,7 +684,7 @@ int __devinit mthca_init_srq_table(struct mthca_dev *dev)
639 return err; 684 return err;
640} 685}
641 686
642void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev) 687void mthca_cleanup_srq_table(struct mthca_dev *dev)
643{ 688{
644 if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) 689 if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
645 return; 690 return;
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index bb015c6494c4..02cc0a766f3a 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -75,6 +75,11 @@ struct mthca_create_cq_resp {
75 __u32 reserved; 75 __u32 reserved;
76}; 76};
77 77
78struct mthca_resize_cq {
79 __u32 lkey;
80 __u32 reserved;
81};
82
78struct mthca_create_srq { 83struct mthca_create_srq {
79 __u32 lkey; 84 __u32 lkey;
80 __u32 db_index; 85 __u32 db_index;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 2f85a9a831b1..b640107fb732 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -72,13 +72,14 @@ enum {
72 IPOIB_MAX_MCAST_QUEUE = 3, 72 IPOIB_MAX_MCAST_QUEUE = 3,
73 73
74 IPOIB_FLAG_OPER_UP = 0, 74 IPOIB_FLAG_OPER_UP = 0,
75 IPOIB_FLAG_ADMIN_UP = 1, 75 IPOIB_FLAG_INITIALIZED = 1,
76 IPOIB_PKEY_ASSIGNED = 2, 76 IPOIB_FLAG_ADMIN_UP = 2,
77 IPOIB_PKEY_STOP = 3, 77 IPOIB_PKEY_ASSIGNED = 3,
78 IPOIB_FLAG_SUBINTERFACE = 4, 78 IPOIB_PKEY_STOP = 4,
79 IPOIB_MCAST_RUN = 5, 79 IPOIB_FLAG_SUBINTERFACE = 5,
80 IPOIB_STOP_REAPER = 6, 80 IPOIB_MCAST_RUN = 6,
81 IPOIB_MCAST_STARTED = 7, 81 IPOIB_STOP_REAPER = 7,
82 IPOIB_MCAST_STARTED = 8,
82 83
83 IPOIB_MAX_BACKOFF_SECONDS = 16, 84 IPOIB_MAX_BACKOFF_SECONDS = 16,
84 85
@@ -217,10 +218,16 @@ struct ipoib_neigh {
217 struct list_head list; 218 struct list_head list;
218}; 219};
219 220
221/*
222 * We stash a pointer to our private neighbour information after our
223 * hardware address in neigh->ha. The ALIGN() expression here makes
224 * sure that this pointer is stored aligned so that an unaligned
225 * load is not needed to dereference it.
226 */
220static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) 227static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
221{ 228{
222 return (struct ipoib_neigh **) (neigh->ha + 24 - 229 return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) +
223 (offsetof(struct neighbour, ha) & 4)); 230 INFINIBAND_ALEN, sizeof(void *));
224} 231}
225 232
226extern struct workqueue_struct *ipoib_workqueue; 233extern struct workqueue_struct *ipoib_workqueue;
@@ -253,7 +260,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev);
253 260
254int ipoib_ib_dev_open(struct net_device *dev); 261int ipoib_ib_dev_open(struct net_device *dev);
255int ipoib_ib_dev_up(struct net_device *dev); 262int ipoib_ib_dev_up(struct net_device *dev);
256int ipoib_ib_dev_down(struct net_device *dev); 263int ipoib_ib_dev_down(struct net_device *dev, int flush);
257int ipoib_ib_dev_stop(struct net_device *dev); 264int ipoib_ib_dev_stop(struct net_device *dev);
258 265
259int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 266int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 86bcdd72a107..ed65202878d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -416,25 +416,46 @@ int ipoib_ib_dev_open(struct net_device *dev)
416 ret = ipoib_ib_post_receives(dev); 416 ret = ipoib_ib_post_receives(dev);
417 if (ret) { 417 if (ret) {
418 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 418 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
419 ipoib_ib_dev_stop(dev);
419 return -1; 420 return -1;
420 } 421 }
421 422
422 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 423 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
423 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 424 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
424 425
426 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
427
425 return 0; 428 return 0;
426} 429}
427 430
431static void ipoib_pkey_dev_check_presence(struct net_device *dev)
432{
433 struct ipoib_dev_priv *priv = netdev_priv(dev);
434 u16 pkey_index = 0;
435
436 if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
437 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
438 else
439 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
440}
441
428int ipoib_ib_dev_up(struct net_device *dev) 442int ipoib_ib_dev_up(struct net_device *dev)
429{ 443{
430 struct ipoib_dev_priv *priv = netdev_priv(dev); 444 struct ipoib_dev_priv *priv = netdev_priv(dev);
431 445
446 ipoib_pkey_dev_check_presence(dev);
447
448 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
449 ipoib_dbg(priv, "PKEY is not assigned.\n");
450 return 0;
451 }
452
432 set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 453 set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
433 454
434 return ipoib_mcast_start_thread(dev); 455 return ipoib_mcast_start_thread(dev);
435} 456}
436 457
437int ipoib_ib_dev_down(struct net_device *dev) 458int ipoib_ib_dev_down(struct net_device *dev, int flush)
438{ 459{
439 struct ipoib_dev_priv *priv = netdev_priv(dev); 460 struct ipoib_dev_priv *priv = netdev_priv(dev);
440 461
@@ -449,10 +470,11 @@ int ipoib_ib_dev_down(struct net_device *dev)
449 set_bit(IPOIB_PKEY_STOP, &priv->flags); 470 set_bit(IPOIB_PKEY_STOP, &priv->flags);
450 cancel_delayed_work(&priv->pkey_task); 471 cancel_delayed_work(&priv->pkey_task);
451 mutex_unlock(&pkey_mutex); 472 mutex_unlock(&pkey_mutex);
452 flush_workqueue(ipoib_workqueue); 473 if (flush)
474 flush_workqueue(ipoib_workqueue);
453 } 475 }
454 476
455 ipoib_mcast_stop_thread(dev, 1); 477 ipoib_mcast_stop_thread(dev, flush);
456 ipoib_mcast_dev_flush(dev); 478 ipoib_mcast_dev_flush(dev);
457 479
458 ipoib_flush_paths(dev); 480 ipoib_flush_paths(dev);
@@ -481,6 +503,8 @@ int ipoib_ib_dev_stop(struct net_device *dev)
481 struct ipoib_tx_buf *tx_req; 503 struct ipoib_tx_buf *tx_req;
482 int i; 504 int i;
483 505
506 clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
507
484 /* 508 /*
485 * Move our QP to the error state and then reinitialize in 509 * Move our QP to the error state and then reinitialize in
486 * when all work requests have completed or have been flushed. 510 * when all work requests have completed or have been flushed.
@@ -585,12 +609,19 @@ void ipoib_ib_dev_flush(void *_dev)
585 struct net_device *dev = (struct net_device *)_dev; 609 struct net_device *dev = (struct net_device *)_dev;
586 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv; 610 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv;
587 611
588 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 612 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
613 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
589 return; 614 return;
615 }
616
617 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
618 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
619 return;
620 }
590 621
591 ipoib_dbg(priv, "flushing\n"); 622 ipoib_dbg(priv, "flushing\n");
592 623
593 ipoib_ib_dev_down(dev); 624 ipoib_ib_dev_down(dev, 0);
594 625
595 /* 626 /*
596 * The device could have been brought down between the start and when 627 * The device could have been brought down between the start and when
@@ -603,7 +634,7 @@ void ipoib_ib_dev_flush(void *_dev)
603 634
604 /* Flush any child interfaces too */ 635 /* Flush any child interfaces too */
605 list_for_each_entry(cpriv, &priv->child_intfs, list) 636 list_for_each_entry(cpriv, &priv->child_intfs, list)
606 ipoib_ib_dev_flush(&cpriv->dev); 637 ipoib_ib_dev_flush(cpriv->dev);
607 638
608 mutex_unlock(&priv->vlan_mutex); 639 mutex_unlock(&priv->vlan_mutex);
609} 640}
@@ -630,17 +661,6 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
630 * change async notification is available. 661 * change async notification is available.
631 */ 662 */
632 663
633static void ipoib_pkey_dev_check_presence(struct net_device *dev)
634{
635 struct ipoib_dev_priv *priv = netdev_priv(dev);
636 u16 pkey_index = 0;
637
638 if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
639 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
640 else
641 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
642}
643
644void ipoib_pkey_poll(void *dev_ptr) 664void ipoib_pkey_poll(void *dev_ptr)
645{ 665{
646 struct net_device *dev = dev_ptr; 666 struct net_device *dev = dev_ptr;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c3b5f79d1168..9b0bd7c746ca 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -133,7 +133,13 @@ static int ipoib_stop(struct net_device *dev)
133 133
134 netif_stop_queue(dev); 134 netif_stop_queue(dev);
135 135
136 ipoib_ib_dev_down(dev); 136 /*
137 * Now flush workqueue to make sure a scheduled task doesn't
138 * bring our internal state back up.
139 */
140 flush_workqueue(ipoib_workqueue);
141
142 ipoib_ib_dev_down(dev, 1);
137 ipoib_ib_dev_stop(dev); 143 ipoib_ib_dev_stop(dev);
138 144
139 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 145 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
@@ -247,7 +253,6 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
247 if (neigh->ah) 253 if (neigh->ah)
248 ipoib_put_ah(neigh->ah); 254 ipoib_put_ah(neigh->ah);
249 *to_ipoib_neigh(neigh->neighbour) = NULL; 255 *to_ipoib_neigh(neigh->neighbour) = NULL;
250 neigh->neighbour->ops->destructor = NULL;
251 kfree(neigh); 256 kfree(neigh);
252 } 257 }
253 258
@@ -513,12 +518,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
513 be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); 518 be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
514 } else { 519 } else {
515 neigh->ah = NULL; 520 neigh->ah = NULL;
516 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 521 __skb_queue_tail(&neigh->queue, skb);
517 __skb_queue_tail(&neigh->queue, skb);
518 } else {
519 ++priv->stats.tx_dropped;
520 dev_kfree_skb_any(skb);
521 }
522 522
523 if (!path->query && path_rec_start(dev, path)) 523 if (!path->query && path_rec_start(dev, path))
524 goto err; 524 goto err;
@@ -530,7 +530,6 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
530err: 530err:
531 *to_ipoib_neigh(skb->dst->neighbour) = NULL; 531 *to_ipoib_neigh(skb->dst->neighbour) = NULL;
532 list_del(&neigh->list); 532 list_del(&neigh->list);
533 neigh->neighbour->ops->destructor = NULL;
534 kfree(neigh); 533 kfree(neigh);
535 534
536 ++priv->stats.tx_dropped; 535 ++priv->stats.tx_dropped;
@@ -724,7 +723,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
724 * destination address onto the front of the skb so we can 723 * destination address onto the front of the skb so we can
725 * figure out where to send the packet later. 724 * figure out where to send the packet later.
726 */ 725 */
727 if (!skb->dst || !skb->dst->neighbour) { 726 if ((!skb->dst || !skb->dst->neighbour) && daddr) {
728 struct ipoib_pseudoheader *phdr = 727 struct ipoib_pseudoheader *phdr =
729 (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); 728 (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr);
730 memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); 729 memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
@@ -737,6 +736,11 @@ static void ipoib_set_mcast_list(struct net_device *dev)
737{ 736{
738 struct ipoib_dev_priv *priv = netdev_priv(dev); 737 struct ipoib_dev_priv *priv = netdev_priv(dev);
739 738
739 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
740 ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
741 return;
742 }
743
740 queue_work(ipoib_workqueue, &priv->restart_task); 744 queue_work(ipoib_workqueue, &priv->restart_task);
741} 745}
742 746
@@ -769,21 +773,9 @@ static void ipoib_neigh_destructor(struct neighbour *n)
769 ipoib_put_ah(ah); 773 ipoib_put_ah(ah);
770} 774}
771 775
772static int ipoib_neigh_setup(struct neighbour *neigh)
773{
774 /*
775 * Is this kosher? I can't find anybody in the kernel that
776 * sets neigh->destructor, so we should be able to set it here
777 * without trouble.
778 */
779 neigh->ops->destructor = ipoib_neigh_destructor;
780
781 return 0;
782}
783
784static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) 776static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
785{ 777{
786 parms->neigh_setup = ipoib_neigh_setup; 778 parms->neigh_destructor = ipoib_neigh_destructor;
787 779
788 return 0; 780 return 0;
789} 781}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index a2408d7ec598..93c462eaf4fd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -115,7 +115,6 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
115 if (neigh->ah) 115 if (neigh->ah)
116 ipoib_put_ah(neigh->ah); 116 ipoib_put_ah(neigh->ah);
117 *to_ipoib_neigh(neigh->neighbour) = NULL; 117 *to_ipoib_neigh(neigh->neighbour) = NULL;
118 neigh->neighbour->ops->destructor = NULL;
119 kfree(neigh); 118 kfree(neigh);
120 } 119 }
121 120
@@ -213,6 +212,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
213{ 212{
214 struct net_device *dev = mcast->dev; 213 struct net_device *dev = mcast->dev;
215 struct ipoib_dev_priv *priv = netdev_priv(dev); 214 struct ipoib_dev_priv *priv = netdev_priv(dev);
215 struct ipoib_ah *ah;
216 int ret; 216 int ret;
217 217
218 mcast->mcmember = *mcmember; 218 mcast->mcmember = *mcmember;
@@ -269,8 +269,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
269 av.static_rate, priv->local_rate, 269 av.static_rate, priv->local_rate,
270 ib_sa_rate_enum_to_int(mcast->mcmember.rate)); 270 ib_sa_rate_enum_to_int(mcast->mcmember.rate));
271 271
272 mcast->ah = ipoib_create_ah(dev, priv->pd, &av); 272 ah = ipoib_create_ah(dev, priv->pd, &av);
273 if (!mcast->ah) { 273 if (!ah) {
274 ipoib_warn(priv, "ib_address_create failed\n"); 274 ipoib_warn(priv, "ib_address_create failed\n");
275 } else { 275 } else {
276 ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT 276 ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
@@ -280,6 +280,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
280 be16_to_cpu(mcast->mcmember.mlid), 280 be16_to_cpu(mcast->mcmember.mlid),
281 mcast->mcmember.sl); 281 mcast->mcmember.sl);
282 } 282 }
283
284 spin_lock_irq(&priv->lock);
285 mcast->ah = ah;
286 spin_unlock_irq(&priv->lock);
283 } 287 }
284 288
285 /* actually send any queued packets */ 289 /* actually send any queued packets */
@@ -432,9 +436,11 @@ static void ipoib_mcast_join_complete(int status,
432 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 436 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
433 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 437 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
434 438
439 mutex_lock(&mcast_mutex);
440
441 spin_lock_irq(&priv->lock);
435 mcast->query = NULL; 442 mcast->query = NULL;
436 443
437 mutex_lock(&mcast_mutex);
438 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { 444 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
439 if (status == -ETIMEDOUT) 445 if (status == -ETIMEDOUT)
440 queue_work(ipoib_workqueue, &priv->mcast_task); 446 queue_work(ipoib_workqueue, &priv->mcast_task);
@@ -443,6 +449,7 @@ static void ipoib_mcast_join_complete(int status,
443 mcast->backoff * HZ); 449 mcast->backoff * HZ);
444 } else 450 } else
445 complete(&mcast->done); 451 complete(&mcast->done);
452 spin_unlock_irq(&priv->lock);
446 mutex_unlock(&mcast_mutex); 453 mutex_unlock(&mcast_mutex);
447 454
448 return; 455 return;
@@ -630,21 +637,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
630 if (flush) 637 if (flush)
631 flush_workqueue(ipoib_workqueue); 638 flush_workqueue(ipoib_workqueue);
632 639
640 spin_lock_irq(&priv->lock);
633 if (priv->broadcast && priv->broadcast->query) { 641 if (priv->broadcast && priv->broadcast->query) {
634 ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); 642 ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
635 priv->broadcast->query = NULL; 643 priv->broadcast->query = NULL;
644 spin_unlock_irq(&priv->lock);
636 ipoib_dbg_mcast(priv, "waiting for bcast\n"); 645 ipoib_dbg_mcast(priv, "waiting for bcast\n");
637 wait_for_completion(&priv->broadcast->done); 646 wait_for_completion(&priv->broadcast->done);
638 } 647 } else
648 spin_unlock_irq(&priv->lock);
639 649
640 list_for_each_entry(mcast, &priv->multicast_list, list) { 650 list_for_each_entry(mcast, &priv->multicast_list, list) {
651 spin_lock_irq(&priv->lock);
641 if (mcast->query) { 652 if (mcast->query) {
642 ib_sa_cancel_query(mcast->query_id, mcast->query); 653 ib_sa_cancel_query(mcast->query_id, mcast->query);
643 mcast->query = NULL; 654 mcast->query = NULL;
655 spin_unlock_irq(&priv->lock);
644 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", 656 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
645 IPOIB_GID_ARG(mcast->mcmember.mgid)); 657 IPOIB_GID_ARG(mcast->mcmember.mgid));
646 wait_for_completion(&mcast->done); 658 wait_for_completion(&mcast->done);
647 } 659 } else
660 spin_unlock_irq(&priv->lock);
648 } 661 }
649 662
650 return 0; 663 return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index faaf10e5fc7b..5f0388027b25 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -251,10 +251,12 @@ void ipoib_event(struct ib_event_handler *handler,
251 struct ipoib_dev_priv *priv = 251 struct ipoib_dev_priv *priv =
252 container_of(handler, struct ipoib_dev_priv, event_handler); 252 container_of(handler, struct ipoib_dev_priv, event_handler);
253 253
254 if (record->event == IB_EVENT_PORT_ACTIVE || 254 if (record->event == IB_EVENT_PORT_ERR ||
255 record->event == IB_EVENT_PKEY_CHANGE ||
256 record->event == IB_EVENT_PORT_ACTIVE ||
255 record->event == IB_EVENT_LID_CHANGE || 257 record->event == IB_EVENT_LID_CHANGE ||
256 record->event == IB_EVENT_SM_CHANGE) { 258 record->event == IB_EVENT_SM_CHANGE) {
257 ipoib_dbg(priv, "Port active event\n"); 259 ipoib_dbg(priv, "Port state change event\n");
258 schedule_work(&priv->flush_task); 260 queue_work(ipoib_workqueue, &priv->flush_task);
259 } 261 }
260} 262}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 960dae5c87d1..fd8a95a9c5d3 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -503,8 +503,10 @@ err:
503static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, 503static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
504 struct srp_request *req) 504 struct srp_request *req)
505{ 505{
506 struct scatterlist *scat;
506 struct srp_cmd *cmd = req->cmd->buf; 507 struct srp_cmd *cmd = req->cmd->buf;
507 int len; 508 int len, nents, count;
509 int i;
508 u8 fmt; 510 u8 fmt;
509 511
510 if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE) 512 if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE)
@@ -517,82 +519,66 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
517 return -EINVAL; 519 return -EINVAL;
518 } 520 }
519 521
520 if (scmnd->use_sg) { 522 /*
521 struct scatterlist *scat = scmnd->request_buffer; 523 * This handling of non-SG commands can be killed when the
522 int n; 524 * SCSI midlayer no longer generates non-SG commands.
523 int i; 525 */
524 526 if (likely(scmnd->use_sg)) {
525 n = dma_map_sg(target->srp_host->dev->dma_device, 527 nents = scmnd->use_sg;
526 scat, scmnd->use_sg, scmnd->sc_data_direction); 528 scat = scmnd->request_buffer;
529 } else {
530 nents = 1;
531 scat = &req->fake_sg;
532 sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen);
533 }
527 534
528 if (n == 1) { 535 count = dma_map_sg(target->srp_host->dev->dma_device, scat, nents,
529 struct srp_direct_buf *buf = (void *) cmd->add_data; 536 scmnd->sc_data_direction);
530 537
531 fmt = SRP_DATA_DESC_DIRECT; 538 if (count == 1) {
539 struct srp_direct_buf *buf = (void *) cmd->add_data;
532 540
533 buf->va = cpu_to_be64(sg_dma_address(scat)); 541 fmt = SRP_DATA_DESC_DIRECT;
534 buf->key = cpu_to_be32(target->srp_host->mr->rkey);
535 buf->len = cpu_to_be32(sg_dma_len(scat));
536 542
537 len = sizeof (struct srp_cmd) + 543 buf->va = cpu_to_be64(sg_dma_address(scat));
538 sizeof (struct srp_direct_buf); 544 buf->key = cpu_to_be32(target->srp_host->mr->rkey);
539 } else { 545 buf->len = cpu_to_be32(sg_dma_len(scat));
540 struct srp_indirect_buf *buf = (void *) cmd->add_data;
541 u32 datalen = 0;
542 546
543 fmt = SRP_DATA_DESC_INDIRECT; 547 len = sizeof (struct srp_cmd) +
548 sizeof (struct srp_direct_buf);
549 } else {
550 struct srp_indirect_buf *buf = (void *) cmd->add_data;
551 u32 datalen = 0;
544 552
545 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 553 fmt = SRP_DATA_DESC_INDIRECT;
546 cmd->data_out_desc_cnt = n;
547 else
548 cmd->data_in_desc_cnt = n;
549 554
550 buf->table_desc.va = cpu_to_be64(req->cmd->dma + 555 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
551 sizeof *cmd + 556 cmd->data_out_desc_cnt = count;
552 sizeof *buf); 557 else
553 buf->table_desc.key = 558 cmd->data_in_desc_cnt = count;
559
560 buf->table_desc.va = cpu_to_be64(req->cmd->dma +
561 sizeof *cmd +
562 sizeof *buf);
563 buf->table_desc.key =
564 cpu_to_be32(target->srp_host->mr->rkey);
565 buf->table_desc.len =
566 cpu_to_be32(count * sizeof (struct srp_direct_buf));
567
568 for (i = 0; i < count; ++i) {
569 buf->desc_list[i].va = cpu_to_be64(sg_dma_address(&scat[i]));
570 buf->desc_list[i].key =
554 cpu_to_be32(target->srp_host->mr->rkey); 571 cpu_to_be32(target->srp_host->mr->rkey);
555 buf->table_desc.len = 572 buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i]));
556 cpu_to_be32(n * sizeof (struct srp_direct_buf));
557
558 for (i = 0; i < n; ++i) {
559 buf->desc_list[i].va = cpu_to_be64(sg_dma_address(&scat[i]));
560 buf->desc_list[i].key =
561 cpu_to_be32(target->srp_host->mr->rkey);
562 buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i]));
563
564 datalen += sg_dma_len(&scat[i]);
565 }
566
567 buf->len = cpu_to_be32(datalen);
568 573
569 len = sizeof (struct srp_cmd) + 574 datalen += sg_dma_len(&scat[i]);
570 sizeof (struct srp_indirect_buf) +
571 n * sizeof (struct srp_direct_buf);
572 }
573 } else {
574 struct srp_direct_buf *buf = (void *) cmd->add_data;
575 dma_addr_t dma;
576
577 dma = dma_map_single(target->srp_host->dev->dma_device,
578 scmnd->request_buffer, scmnd->request_bufflen,
579 scmnd->sc_data_direction);
580 if (dma_mapping_error(dma)) {
581 printk(KERN_WARNING PFX "unable to map %p/%d (dir %d)\n",
582 scmnd->request_buffer, (int) scmnd->request_bufflen,
583 scmnd->sc_data_direction);
584 return -EINVAL;
585 } 575 }
586 576
587 pci_unmap_addr_set(req, direct_mapping, dma); 577 buf->len = cpu_to_be32(datalen);
588
589 buf->va = cpu_to_be64(dma);
590 buf->key = cpu_to_be32(target->srp_host->mr->rkey);
591 buf->len = cpu_to_be32(scmnd->request_bufflen);
592 578
593 fmt = SRP_DATA_DESC_DIRECT; 579 len = sizeof (struct srp_cmd) +
594 580 sizeof (struct srp_indirect_buf) +
595 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); 581 count * sizeof (struct srp_direct_buf);
596 } 582 }
597 583
598 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 584 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
@@ -600,7 +586,6 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
600 else 586 else
601 cmd->buf_fmt = fmt; 587 cmd->buf_fmt = fmt;
602 588
603
604 return len; 589 return len;
605} 590}
606 591
@@ -608,20 +593,28 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
608 struct srp_target_port *target, 593 struct srp_target_port *target,
609 struct srp_request *req) 594 struct srp_request *req)
610{ 595{
596 struct scatterlist *scat;
597 int nents;
598
611 if (!scmnd->request_buffer || 599 if (!scmnd->request_buffer ||
612 (scmnd->sc_data_direction != DMA_TO_DEVICE && 600 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
613 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 601 scmnd->sc_data_direction != DMA_FROM_DEVICE))
614 return; 602 return;
615 603
616 if (scmnd->use_sg) 604 /*
617 dma_unmap_sg(target->srp_host->dev->dma_device, 605 * This handling of non-SG commands can be killed when the
618 (struct scatterlist *) scmnd->request_buffer, 606 * SCSI midlayer no longer generates non-SG commands.
619 scmnd->use_sg, scmnd->sc_data_direction); 607 */
620 else 608 if (likely(scmnd->use_sg)) {
621 dma_unmap_single(target->srp_host->dev->dma_device, 609 nents = scmnd->use_sg;
622 pci_unmap_addr(req, direct_mapping), 610 scat = scmnd->request_buffer;
623 scmnd->request_bufflen, 611 } else {
624 scmnd->sc_data_direction); 612 nents = 1;
613 scat = &req->fake_sg;
614 }
615
616 dma_unmap_sg(target->srp_host->dev->dma_device, scat, nents,
617 scmnd->sc_data_direction);
625} 618}
626 619
627static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) 620static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
@@ -1237,6 +1230,87 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
1237 return ret; 1230 return ret;
1238} 1231}
1239 1232
1233static ssize_t show_id_ext(struct class_device *cdev, char *buf)
1234{
1235 struct srp_target_port *target = host_to_target(class_to_shost(cdev));
1236
1237 if (target->state == SRP_TARGET_DEAD ||
1238 target->state == SRP_TARGET_REMOVED)
1239 return -ENODEV;
1240
1241 return sprintf(buf, "0x%016llx\n",
1242 (unsigned long long) be64_to_cpu(target->id_ext));
1243}
1244
1245static ssize_t show_ioc_guid(struct class_device *cdev, char *buf)
1246{
1247 struct srp_target_port *target = host_to_target(class_to_shost(cdev));
1248
1249 if (target->state == SRP_TARGET_DEAD ||
1250 target->state == SRP_TARGET_REMOVED)
1251 return -ENODEV;
1252
1253 return sprintf(buf, "0x%016llx\n",
1254 (unsigned long long) be64_to_cpu(target->ioc_guid));
1255}
1256
1257static ssize_t show_service_id(struct class_device *cdev, char *buf)
1258{
1259 struct srp_target_port *target = host_to_target(class_to_shost(cdev));
1260
1261 if (target->state == SRP_TARGET_DEAD ||
1262 target->state == SRP_TARGET_REMOVED)
1263 return -ENODEV;
1264
1265 return sprintf(buf, "0x%016llx\n",
1266 (unsigned long long) be64_to_cpu(target->service_id));
1267}
1268
1269static ssize_t show_pkey(struct class_device *cdev, char *buf)
1270{
1271 struct srp_target_port *target = host_to_target(class_to_shost(cdev));
1272
1273 if (target->state == SRP_TARGET_DEAD ||
1274 target->state == SRP_TARGET_REMOVED)
1275 return -ENODEV;
1276
1277 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
1278}
1279
1280static ssize_t show_dgid(struct class_device *cdev, char *buf)
1281{
1282 struct srp_target_port *target = host_to_target(class_to_shost(cdev));
1283
1284 if (target->state == SRP_TARGET_DEAD ||
1285 target->state == SRP_TARGET_REMOVED)
1286 return -ENODEV;
1287
1288 return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
1289 be16_to_cpu(((__be16 *) target->path.dgid.raw)[0]),
1290 be16_to_cpu(((__be16 *) target->path.dgid.raw)[1]),
1291 be16_to_cpu(((__be16 *) target->path.dgid.raw)[2]),
1292 be16_to_cpu(((__be16 *) target->path.dgid.raw)[3]),
1293 be16_to_cpu(((__be16 *) target->path.dgid.raw)[4]),
1294 be16_to_cpu(((__be16 *) target->path.dgid.raw)[5]),
1295 be16_to_cpu(((__be16 *) target->path.dgid.raw)[6]),
1296 be16_to_cpu(((__be16 *) target->path.dgid.raw)[7]));
1297}
1298
1299static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
1300static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
1301static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
1302static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
1303static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
1304
1305static struct class_device_attribute *srp_host_attrs[] = {
1306 &class_device_attr_id_ext,
1307 &class_device_attr_ioc_guid,
1308 &class_device_attr_service_id,
1309 &class_device_attr_pkey,
1310 &class_device_attr_dgid,
1311 NULL
1312};
1313
1240static struct scsi_host_template srp_template = { 1314static struct scsi_host_template srp_template = {
1241 .module = THIS_MODULE, 1315 .module = THIS_MODULE,
1242 .name = DRV_NAME, 1316 .name = DRV_NAME,
@@ -1249,7 +1323,8 @@ static struct scsi_host_template srp_template = {
1249 .this_id = -1, 1323 .this_id = -1,
1250 .sg_tablesize = SRP_MAX_INDIRECT, 1324 .sg_tablesize = SRP_MAX_INDIRECT,
1251 .cmd_per_lun = SRP_SQ_SIZE, 1325 .cmd_per_lun = SRP_SQ_SIZE,
1252 .use_clustering = ENABLE_CLUSTERING 1326 .use_clustering = ENABLE_CLUSTERING,
1327 .shost_attrs = srp_host_attrs
1253}; 1328};
1254 1329
1255static int srp_add_target(struct srp_host *host, struct srp_target_port *target) 1330static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
@@ -1366,6 +1441,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
1366 strlcpy(dgid, p + i * 2, 3); 1441 strlcpy(dgid, p + i * 2, 3);
1367 target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16); 1442 target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16);
1368 } 1443 }
1444 kfree(p);
1369 break; 1445 break;
1370 1446
1371 case SRP_OPT_PKEY: 1447 case SRP_OPT_PKEY:
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 4e7727df32f1..bd7f7c3115de 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -38,6 +38,7 @@
38#include <linux/types.h> 38#include <linux/types.h>
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/mutex.h> 40#include <linux/mutex.h>
41#include <linux/scatterlist.h>
41 42
42#include <scsi/scsi_host.h> 43#include <scsi/scsi_host.h>
43#include <scsi/scsi_cmnd.h> 44#include <scsi/scsi_cmnd.h>
@@ -94,7 +95,11 @@ struct srp_request {
94 struct scsi_cmnd *scmnd; 95 struct scsi_cmnd *scmnd;
95 struct srp_iu *cmd; 96 struct srp_iu *cmd;
96 struct srp_iu *tsk_mgmt; 97 struct srp_iu *tsk_mgmt;
97 DECLARE_PCI_UNMAP_ADDR(direct_mapping) 98 /*
99 * Fake scatterlist used when scmnd->use_sg==0. Can be killed
100 * when the SCSI midlayer no longer generates non-SG commands.
101 */
102 struct scatterlist fake_sg;
98 struct completion done; 103 struct completion done;
99 short next; 104 short next;
100 u8 cmd_done; 105 u8 cmd_done;