aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/cxgb3/iwch_qp.c
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2008-04-29 16:46:52 -0400
committerRoland Dreier <rolandd@cisco.com>2008-04-29 16:46:52 -0400
commitf8b0dfd15277974b5c9f3ff17f9e3ab6fdbe45ee (patch)
tree34e393cd342578f9ff223be2b631af7ab9b418aa /drivers/infiniband/hw/cxgb3/iwch_qp.c
parentccaf10d0ad17bf755750160ebe594de7261a893e (diff)
RDMA/cxgb3: Support peer-2-peer connection setup
Open MPI, Intel MPI and other applications don't respect the iWARP requirement that the client (active) side of the connection send the first RDMA message. This class of application connection setup is called peer-to-peer. Typically once the connection is setup, _both_ sides want to send data. This patch enables supporting peer-to-peer over the chelsio RNIC by enforcing this iWARP requirement in the driver itself as part of RDMA connection setup. Connection setup is extended, when the peer2peer module option is 1, such that the MPA initiator will send a 0B Read (the RTR) just after connection setup. The MPA responder will suspend SQ processing until the RTR message is received and reply-to. In the longer term, this will be handled in a standardized way by enhancing the MPA negotiation so peers can indicate whether they want/need the RTR and what type of RTR (0B read, 0B write, or 0B send) should be sent. This will be done by standardizing a few bits of the private data in order to negotiate all this. However this patch enables peer-to-peer applications now and allows most of the required firmware and driver changes to be done and tested now. Design: - Add a module option, peer2peer, to enable this mode. - New firmware support for peer-to-peer mode: - a new bit in the rdma_init WR to tell it to do peer-2-peer and what form of RTR message to send or expect. - process _all_ preposted recvs before moving the connection into rdma mode. - passive side: defer completing the rdma_init WR until all pre-posted recvs are processed. Suspend SQ processing until the RTR is received. - active side: expect and process the 0B read WR on offload TX queue. Defer completing the rdma_init WR until all pre-posted recvs are processed. Suspend SQ processing until the 0B read WR is processed from the offload TX queue. - If peer2peer is set, driver posts 0B read request on offload TX queue just after posting the rdma_init WR to the offload TX queue. - Add CQ poll logic to ignore unsolicitied read responses. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb3/iwch_qp.c')
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c54
1 files changed, 50 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 6cd484e11c11..9b4be889c58e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -586,6 +586,36 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
586 } 586 }
587} 587}
588 588
589int iwch_post_zb_read(struct iwch_qp *qhp)
590{
591 union t3_wr *wqe;
592 struct sk_buff *skb;
593 u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
594
595 PDBG("%s enter\n", __func__);
596 skb = alloc_skb(40, GFP_KERNEL);
597 if (!skb) {
598 printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
599 return -ENOMEM;
600 }
601 wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
602 memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
603 wqe->read.rdmaop = T3_READ_REQ;
604 wqe->read.reserved[0] = 0;
605 wqe->read.reserved[1] = 0;
606 wqe->read.reserved[2] = 0;
607 wqe->read.rem_stag = cpu_to_be32(1);
608 wqe->read.rem_to = cpu_to_be64(1);
609 wqe->read.local_stag = cpu_to_be32(1);
610 wqe->read.local_len = cpu_to_be32(0);
611 wqe->read.local_to = cpu_to_be64(1);
612 wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
613 wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
614 V_FW_RIWR_LEN(flit_cnt));
615 skb->priority = CPL_PRIORITY_DATA;
616 return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
617}
618
589/* 619/*
590 * This posts a TERMINATE with layer=RDMA, type=catastrophic. 620 * This posts a TERMINATE with layer=RDMA, type=catastrophic.
591 */ 621 */
@@ -671,11 +701,18 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
671 701
672 702
673/* 703/*
674 * Return non zero if at least one RECV was pre-posted. 704 * Return count of RECV WRs posted
675 */ 705 */
676static int rqes_posted(struct iwch_qp *qhp) 706u16 iwch_rqes_posted(struct iwch_qp *qhp)
677{ 707{
678 return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV; 708 union t3_wr *wqe = qhp->wq.queue;
709 u16 count = 0;
710 while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
711 count++;
712 wqe++;
713 }
714 PDBG("%s qhp %p count %u\n", __func__, qhp, count);
715 return count;
679} 716}
680 717
681static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, 718static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
@@ -716,8 +753,17 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
716 init_attr.ird = qhp->attr.max_ird; 753 init_attr.ird = qhp->attr.max_ird;
717 init_attr.qp_dma_addr = qhp->wq.dma_addr; 754 init_attr.qp_dma_addr = qhp->wq.dma_addr;
718 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); 755 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
719 init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; 756 init_attr.rqe_count = iwch_rqes_posted(qhp);
757 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
720 init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; 758 init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
759 if (peer2peer) {
760 init_attr.rtr_type = RTR_READ;
761 if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
762 init_attr.ord = 1;
763 if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
764 init_attr.ird = 1;
765 } else
766 init_attr.rtr_type = 0;
721 init_attr.irs = qhp->ep->rcv_seq; 767 init_attr.irs = qhp->ep->rcv_seq;
722 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " 768 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
723 "flags 0x%x qpcaps 0x%x\n", __func__, 769 "flags 0x%x qpcaps 0x%x\n", __func__,