aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2008-04-29 16:46:52 -0400
committerRoland Dreier <rolandd@cisco.com>2008-04-29 16:46:52 -0400
commitf8b0dfd15277974b5c9f3ff17f9e3ab6fdbe45ee (patch)
tree34e393cd342578f9ff223be2b631af7ab9b418aa /drivers/infiniband
parentccaf10d0ad17bf755750160ebe594de7261a893e (diff)
RDMA/cxgb3: Support peer-2-peer connection setup
Open MPI, Intel MPI and other applications don't respect the iWARP requirement that the client (active) side of the connection send the first RDMA message. This class of application connection setup is called peer-to-peer. Typically once the connection is setup, _both_ sides want to send data. This patch enables supporting peer-to-peer over the chelsio RNIC by enforcing this iWARP requirement in the driver itself as part of RDMA connection setup. Connection setup is extended, when the peer2peer module option is 1, such that the MPA initiator will send a 0B Read (the RTR) just after connection setup. The MPA responder will suspend SQ processing until the RTR message is received and reply-to. In the longer term, this will be handled in a standardized way by enhancing the MPA negotiation so peers can indicate whether they want/need the RTR and what type of RTR (0B read, 0B write, or 0B send) should be sent. This will be done by standardizing a few bits of the private data in order to negotiate all this. However this patch enables peer-to-peer applications now and allows most of the required firmware and driver changes to be done and tested now. Design: - Add a module option, peer2peer, to enable this mode. - New firmware support for peer-to-peer mode: - a new bit in the rdma_init WR to tell it to do peer-2-peer and what form of RTR message to send or expect. - process _all_ preposted recvs before moving the connection into rdma mode. - passive side: defer completing the rdma_init WR until all pre-posted recvs are processed. Suspend SQ processing until the RTR is received. - active side: expect and process the 0B read WR on offload TX queue. Defer completing the rdma_init WR until all pre-posted recvs are processed. Suspend SQ processing until the 0B read WR is processed from the offload TX queue. - If peer2peer is set, driver posts 0B read request on offload TX queue just after posting the rdma_init WR to the offload TX queue. - Add CQ poll logic to ignore unsolicitied read responses. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c18
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h21
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c67
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c54
6 files changed, 136 insertions, 28 deletions
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 66eb7030aea8..ed2ee4ba4b7c 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -456,7 +456,8 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
456 ptr = cq->sw_rptr; 456 ptr = cq->sw_rptr;
457 while (!Q_EMPTY(ptr, cq->sw_wptr)) { 457 while (!Q_EMPTY(ptr, cq->sw_wptr)) {
458 cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); 458 cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
459 if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) && 459 if ((SQ_TYPE(*cqe) ||
460 ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
460 (CQE_QPID(*cqe) == wq->qpid)) 461 (CQE_QPID(*cqe) == wq->qpid))
461 (*count)++; 462 (*count)++;
462 ptr++; 463 ptr++;
@@ -829,7 +830,8 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
829 wqe->mpaattrs = attr->mpaattrs; 830 wqe->mpaattrs = attr->mpaattrs;
830 wqe->qpcaps = attr->qpcaps; 831 wqe->qpcaps = attr->qpcaps;
831 wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); 832 wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
832 wqe->flags = cpu_to_be32(attr->flags); 833 wqe->rqe_count = cpu_to_be16(attr->rqe_count);
834 wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
833 wqe->ord = cpu_to_be32(attr->ord); 835 wqe->ord = cpu_to_be32(attr->ord);
834 wqe->ird = cpu_to_be32(attr->ird); 836 wqe->ird = cpu_to_be32(attr->ird);
835 wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); 837 wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
@@ -1135,6 +1137,18 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
1135 if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { 1137 if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
1136 1138
1137 /* 1139 /*
1140 * If this is an unsolicited read response, then the read
1141 * was generated by the kernel driver as part of peer-2-peer
1142 * connection setup. So ignore the completion.
1143 */
1144 if (!wq->oldest_read) {
1145 if (CQE_STATUS(*hw_cqe))
1146 wq->error = 1;
1147 ret = -1;
1148 goto skip_cqe;
1149 }
1150
1151 /*
1138 * Don't write to the HWCQ, so create a new read req CQE 1152 * Don't write to the HWCQ, so create a new read req CQE
1139 * in local memory. 1153 * in local memory.
1140 */ 1154 */
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 969d4d928455..f1a25a821a45 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -278,6 +278,17 @@ enum t3_qp_caps {
278 uP_RI_QP_STAG0_ENABLE = 0x10 278 uP_RI_QP_STAG0_ENABLE = 0x10
279} __attribute__ ((packed)); 279} __attribute__ ((packed));
280 280
281enum rdma_init_rtr_types {
282 RTR_READ = 1,
283 RTR_WRITE = 2,
284 RTR_SEND = 3,
285};
286
287#define S_RTR_TYPE 2
288#define M_RTR_TYPE 0x3
289#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
290#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
291
281struct t3_rdma_init_attr { 292struct t3_rdma_init_attr {
282 u32 tid; 293 u32 tid;
283 u32 qpid; 294 u32 qpid;
@@ -293,7 +304,9 @@ struct t3_rdma_init_attr {
293 u32 ird; 304 u32 ird;
294 u64 qp_dma_addr; 305 u64 qp_dma_addr;
295 u32 qp_dma_size; 306 u32 qp_dma_size;
296 u32 flags; 307 enum rdma_init_rtr_types rtr_type;
308 u16 flags;
309 u16 rqe_count;
297 u32 irs; 310 u32 irs;
298}; 311};
299 312
@@ -309,8 +322,8 @@ struct t3_rdma_init_wr {
309 u8 mpaattrs; /* 5 */ 322 u8 mpaattrs; /* 5 */
310 u8 qpcaps; 323 u8 qpcaps;
311 __be16 ulpdu_size; 324 __be16 ulpdu_size;
312 __be32 flags; /* bits 31-1 - reservered */ 325 __be16 flags_rtr_type;
313 /* bit 0 - set if RECV posted */ 326 __be16 rqe_count;
314 __be32 ord; /* 6 */ 327 __be32 ord; /* 6 */
315 __be32 ird; 328 __be32 ird;
316 __be64 qp_dma_addr; /* 7 */ 329 __be64 qp_dma_addr; /* 7 */
@@ -324,7 +337,7 @@ struct t3_genbit {
324}; 337};
325 338
326enum rdma_init_wr_flags { 339enum rdma_init_wr_flags {
327 RECVS_POSTED = (1<<0), 340 MPA_INITIATOR = (1<<0),
328 PRIV_QP = (1<<1), 341 PRIV_QP = (1<<1),
329}; 342};
330 343
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 0b515d899f6c..d44a6df9ad8c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -63,6 +63,10 @@ static char *states[] = {
63 NULL, 63 NULL,
64}; 64};
65 65
66int peer2peer = 0;
67module_param(peer2peer, int, 0644);
68MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
69
66static int ep_timeout_secs = 10; 70static int ep_timeout_secs = 10;
67module_param(ep_timeout_secs, int, 0644); 71module_param(ep_timeout_secs, int, 0644);
68MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " 72MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
@@ -514,7 +518,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
514 skb_reset_transport_header(skb); 518 skb_reset_transport_header(skb);
515 len = skb->len; 519 len = skb->len;
516 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 520 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
517 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 521 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
518 req->wr_lo = htonl(V_WR_TID(ep->hwtid)); 522 req->wr_lo = htonl(V_WR_TID(ep->hwtid));
519 req->len = htonl(len); 523 req->len = htonl(len);
520 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | 524 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -565,7 +569,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
565 set_arp_failure_handler(skb, arp_failure_discard); 569 set_arp_failure_handler(skb, arp_failure_discard);
566 skb_reset_transport_header(skb); 570 skb_reset_transport_header(skb);
567 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 571 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
568 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 572 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
569 req->wr_lo = htonl(V_WR_TID(ep->hwtid)); 573 req->wr_lo = htonl(V_WR_TID(ep->hwtid));
570 req->len = htonl(mpalen); 574 req->len = htonl(mpalen);
571 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | 575 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -617,7 +621,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
617 skb_reset_transport_header(skb); 621 skb_reset_transport_header(skb);
618 len = skb->len; 622 len = skb->len;
619 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 623 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
620 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 624 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
621 req->wr_lo = htonl(V_WR_TID(ep->hwtid)); 625 req->wr_lo = htonl(V_WR_TID(ep->hwtid));
622 req->len = htonl(len); 626 req->len = htonl(len);
623 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | 627 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -885,6 +889,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
885 * the MPA header is valid. 889 * the MPA header is valid.
886 */ 890 */
887 state_set(&ep->com, FPDU_MODE); 891 state_set(&ep->com, FPDU_MODE);
892 ep->mpa_attr.initiator = 1;
888 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 893 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
889 ep->mpa_attr.recv_marker_enabled = markers_enabled; 894 ep->mpa_attr.recv_marker_enabled = markers_enabled;
890 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 895 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -907,8 +912,14 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
907 /* bind QP and TID with INIT_WR */ 912 /* bind QP and TID with INIT_WR */
908 err = iwch_modify_qp(ep->com.qp->rhp, 913 err = iwch_modify_qp(ep->com.qp->rhp,
909 ep->com.qp, mask, &attrs, 1); 914 ep->com.qp, mask, &attrs, 1);
910 if (!err) 915 if (err)
911 goto out; 916 goto err;
917
918 if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
919 iwch_post_zb_read(ep->com.qp);
920 }
921
922 goto out;
912err: 923err:
913 abort_connection(ep, skb, GFP_KERNEL); 924 abort_connection(ep, skb, GFP_KERNEL);
914out: 925out:
@@ -1001,6 +1012,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
1001 * If we get here we have accumulated the entire mpa 1012 * If we get here we have accumulated the entire mpa
1002 * start reply message including private data. 1013 * start reply message including private data.
1003 */ 1014 */
1015 ep->mpa_attr.initiator = 0;
1004 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1016 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1005 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1017 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1006 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1018 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -1071,17 +1083,33 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1071 1083
1072 PDBG("%s ep %p credits %u\n", __func__, ep, credits); 1084 PDBG("%s ep %p credits %u\n", __func__, ep, credits);
1073 1085
1074 if (credits == 0) 1086 if (credits == 0) {
1087 PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n",
1088 __func__, ep, state_read(&ep->com));
1075 return CPL_RET_BUF_DONE; 1089 return CPL_RET_BUF_DONE;
1090 }
1091
1076 BUG_ON(credits != 1); 1092 BUG_ON(credits != 1);
1077 BUG_ON(ep->mpa_skb == NULL);
1078 kfree_skb(ep->mpa_skb);
1079 ep->mpa_skb = NULL;
1080 dst_confirm(ep->dst); 1093 dst_confirm(ep->dst);
1081 if (state_read(&ep->com) == MPA_REP_SENT) { 1094 if (!ep->mpa_skb) {
1082 ep->com.rpl_done = 1; 1095 PDBG("%s rdma_init wr_ack ep %p state %u\n",
1083 PDBG("waking up ep %p\n", ep); 1096 __func__, ep, state_read(&ep->com));
1084 wake_up(&ep->com.waitq); 1097 if (ep->mpa_attr.initiator) {
1098 PDBG("%s initiator ep %p state %u\n",
1099 __func__, ep, state_read(&ep->com));
1100 if (peer2peer)
1101 iwch_post_zb_read(ep->com.qp);
1102 } else {
1103 PDBG("%s responder ep %p state %u\n",
1104 __func__, ep, state_read(&ep->com));
1105 ep->com.rpl_done = 1;
1106 wake_up(&ep->com.waitq);
1107 }
1108 } else {
1109 PDBG("%s lsm ack ep %p state %u freeing skb\n",
1110 __func__, ep, state_read(&ep->com));
1111 kfree_skb(ep->mpa_skb);
1112 ep->mpa_skb = NULL;
1085 } 1113 }
1086 return CPL_RET_BUF_DONE; 1114 return CPL_RET_BUF_DONE;
1087} 1115}
@@ -1795,16 +1823,19 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1795 if (err) 1823 if (err)
1796 goto err; 1824 goto err;
1797 1825
1826 /* if needed, wait for wr_ack */
1827 if (iwch_rqes_posted(qp)) {
1828 wait_event(ep->com.waitq, ep->com.rpl_done);
1829 err = ep->com.rpl_err;
1830 if (err)
1831 goto err;
1832 }
1833
1798 err = send_mpa_reply(ep, conn_param->private_data, 1834 err = send_mpa_reply(ep, conn_param->private_data,
1799 conn_param->private_data_len); 1835 conn_param->private_data_len);
1800 if (err) 1836 if (err)
1801 goto err; 1837 goto err;
1802 1838
1803 /* wait for wr_ack */
1804 wait_event(ep->com.waitq, ep->com.rpl_done);
1805 err = ep->com.rpl_err;
1806 if (err)
1807 goto err;
1808 1839
1809 state_set(&ep->com, FPDU_MODE); 1840 state_set(&ep->com, FPDU_MODE);
1810 established_upcall(ep); 1841 established_upcall(ep);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index a2f1b787d970..d7c7e09f0996 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -226,5 +226,6 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, st
226 226
227int __init iwch_cm_init(void); 227int __init iwch_cm_init(void);
228void __exit iwch_cm_term(void); 228void __exit iwch_cm_term(void);
229extern int peer2peer;
229 230
230#endif /* _IWCH_CM_H_ */ 231#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 61356f91109d..db5100d27ca2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -118,6 +118,7 @@ enum IWCH_QP_FLAGS {
118}; 118};
119 119
120struct iwch_mpa_attributes { 120struct iwch_mpa_attributes {
121 u8 initiator;
121 u8 recv_marker_enabled; 122 u8 recv_marker_enabled;
122 u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ 123 u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
123 u8 crc_enabled; 124 u8 crc_enabled;
@@ -322,6 +323,7 @@ enum iwch_qp_query_flags {
322 IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ 323 IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
323}; 324};
324 325
326u16 iwch_rqes_posted(struct iwch_qp *qhp);
325int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 327int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
326 struct ib_send_wr **bad_wr); 328 struct ib_send_wr **bad_wr);
327int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 329int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
@@ -331,6 +333,7 @@ int iwch_bind_mw(struct ib_qp *qp,
331 struct ib_mw_bind *mw_bind); 333 struct ib_mw_bind *mw_bind);
332int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); 334int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
333int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); 335int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
336int iwch_post_zb_read(struct iwch_qp *qhp);
334int iwch_register_device(struct iwch_dev *dev); 337int iwch_register_device(struct iwch_dev *dev);
335void iwch_unregister_device(struct iwch_dev *dev); 338void iwch_unregister_device(struct iwch_dev *dev);
336int iwch_quiesce_qps(struct iwch_cq *chp); 339int iwch_quiesce_qps(struct iwch_cq *chp);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 6cd484e11c11..9b4be889c58e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -586,6 +586,36 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
586 } 586 }
587} 587}
588 588
589int iwch_post_zb_read(struct iwch_qp *qhp)
590{
591 union t3_wr *wqe;
592 struct sk_buff *skb;
593 u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
594
595 PDBG("%s enter\n", __func__);
596 skb = alloc_skb(40, GFP_KERNEL);
597 if (!skb) {
598 printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
599 return -ENOMEM;
600 }
601 wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
602 memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
603 wqe->read.rdmaop = T3_READ_REQ;
604 wqe->read.reserved[0] = 0;
605 wqe->read.reserved[1] = 0;
606 wqe->read.reserved[2] = 0;
607 wqe->read.rem_stag = cpu_to_be32(1);
608 wqe->read.rem_to = cpu_to_be64(1);
609 wqe->read.local_stag = cpu_to_be32(1);
610 wqe->read.local_len = cpu_to_be32(0);
611 wqe->read.local_to = cpu_to_be64(1);
612 wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
613 wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
614 V_FW_RIWR_LEN(flit_cnt));
615 skb->priority = CPL_PRIORITY_DATA;
616 return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
617}
618
589/* 619/*
590 * This posts a TERMINATE with layer=RDMA, type=catastrophic. 620 * This posts a TERMINATE with layer=RDMA, type=catastrophic.
591 */ 621 */
@@ -671,11 +701,18 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
671 701
672 702
673/* 703/*
674 * Return non zero if at least one RECV was pre-posted. 704 * Return count of RECV WRs posted
675 */ 705 */
676static int rqes_posted(struct iwch_qp *qhp) 706u16 iwch_rqes_posted(struct iwch_qp *qhp)
677{ 707{
678 return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV; 708 union t3_wr *wqe = qhp->wq.queue;
709 u16 count = 0;
710 while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
711 count++;
712 wqe++;
713 }
714 PDBG("%s qhp %p count %u\n", __func__, qhp, count);
715 return count;
679} 716}
680 717
681static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, 718static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
@@ -716,8 +753,17 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
716 init_attr.ird = qhp->attr.max_ird; 753 init_attr.ird = qhp->attr.max_ird;
717 init_attr.qp_dma_addr = qhp->wq.dma_addr; 754 init_attr.qp_dma_addr = qhp->wq.dma_addr;
718 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); 755 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
719 init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; 756 init_attr.rqe_count = iwch_rqes_posted(qhp);
757 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
720 init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; 758 init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
759 if (peer2peer) {
760 init_attr.rtr_type = RTR_READ;
761 if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
762 init_attr.ord = 1;
763 if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
764 init_attr.ird = 1;
765 } else
766 init_attr.rtr_type = 0;
721 init_attr.irs = qhp->ep->rcv_seq; 767 init_attr.irs = qhp->ep->rcv_seq;
722 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " 768 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
723 "flags 0x%x qpcaps 0x%x\n", __func__, 769 "flags 0x%x qpcaps 0x%x\n", __func__,