aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/cxgb4/cm.c
diff options
context:
space:
mode:
authorKumar Sanghvi <kumaras@chelsio.com>2011-09-25 10:47:44 -0400
committerRoland Dreier <roland@purestorage.com>2011-10-06 12:39:24 -0400
commitd2fe99e86bb2ccbb87df20b0136d5983b6a4cc09 (patch)
tree2978723397dc5f036f30a1dcf316854310001b62 /drivers/infiniband/hw/cxgb4/cm.c
parent56da00fc92e6f227874bba36f127ffc8847ee1f8 (diff)
RDMA/cxgb4: Add support for MPAv2 Enhanced RDMA Negotiation
This patch adds support for Enhanced RDMA Connection Establishment (draft-ietf-storm-mpa-peer-connect-06), aka MPAv2. Details of draft can be obtained from: <http://www.ietf.org/id/draft-ietf-storm-mpa-peer-connect-06.txt> The patch updates the following functions for initiator perspective: - send_mpa_request - process_mpa_reply - post_terminate for TERM error codes - destroy_qp for TERM related change - adds layer/etype/ecode to c4iw_qp_attrs for sending with TERM - peer_abort for retrying connection attempt with MPA_v1 message - added c4iw_reconnect function The patch updates the following functions for responder perspective: - process_mpa_request - send_mpa_reply - c4iw_accept_cr - passes ird/ord to upper layers Signed-off-by: Kumar Sanghvi <kumaras@chelsio.com> Reviewed-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/cm.c')
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c469
1 files changed, 433 insertions, 36 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 77f769d9227d..b36cdac9c558 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -103,7 +103,8 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
103static int mpa_rev = 1; 103static int mpa_rev = 1;
104module_param(mpa_rev, int, 0644); 104module_param(mpa_rev, int, 0644);
105MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " 105MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
106 "1 is spec compliant. (default=1)"); 106 "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
107 " compliant (default=1)");
107 108
108static int markers_enabled; 109static int markers_enabled;
109module_param(markers_enabled, int, 0644); 110module_param(markers_enabled, int, 0644);
@@ -497,17 +498,21 @@ static int send_connect(struct c4iw_ep *ep)
497 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 498 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
498} 499}
499 500
500static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb) 501static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
502 u8 mpa_rev_to_use)
501{ 503{
502 int mpalen, wrlen; 504 int mpalen, wrlen;
503 struct fw_ofld_tx_data_wr *req; 505 struct fw_ofld_tx_data_wr *req;
504 struct mpa_message *mpa; 506 struct mpa_message *mpa;
507 struct mpa_v2_conn_params mpa_v2_params;
505 508
506 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); 509 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
507 510
508 BUG_ON(skb_cloned(skb)); 511 BUG_ON(skb_cloned(skb));
509 512
510 mpalen = sizeof(*mpa) + ep->plen; 513 mpalen = sizeof(*mpa) + ep->plen;
514 if (mpa_rev_to_use == 2)
515 mpalen += sizeof(struct mpa_v2_conn_params);
511 wrlen = roundup(mpalen + sizeof *req, 16); 516 wrlen = roundup(mpalen + sizeof *req, 16);
512 skb = get_skb(skb, wrlen, GFP_KERNEL); 517 skb = get_skb(skb, wrlen, GFP_KERNEL);
513 if (!skb) { 518 if (!skb) {
@@ -533,12 +538,39 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb)
533 mpa = (struct mpa_message *)(req + 1); 538 mpa = (struct mpa_message *)(req + 1);
534 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 539 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
535 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 540 mpa->flags = (crc_enabled ? MPA_CRC : 0) |
536 (markers_enabled ? MPA_MARKERS : 0); 541 (markers_enabled ? MPA_MARKERS : 0) |
542 (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
537 mpa->private_data_size = htons(ep->plen); 543 mpa->private_data_size = htons(ep->plen);
538 mpa->revision = mpa_rev; 544 mpa->revision = mpa_rev_to_use;
545 if (mpa_rev_to_use == 1)
546 ep->tried_with_mpa_v1 = 1;
547
548 if (mpa_rev_to_use == 2) {
549 mpa->private_data_size +=
550 htons(sizeof(struct mpa_v2_conn_params));
551 mpa_v2_params.ird = htons((u16)ep->ird);
552 mpa_v2_params.ord = htons((u16)ep->ord);
553
554 if (peer2peer) {
555 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
556 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
557 mpa_v2_params.ord |=
558 htons(MPA_V2_RDMA_WRITE_RTR);
559 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
560 mpa_v2_params.ord |=
561 htons(MPA_V2_RDMA_READ_RTR);
562 }
563 memcpy(mpa->private_data, &mpa_v2_params,
564 sizeof(struct mpa_v2_conn_params));
539 565
540 if (ep->plen) 566 if (ep->plen)
541 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); 567 memcpy(mpa->private_data +
568 sizeof(struct mpa_v2_conn_params),
569 ep->mpa_pkt + sizeof(*mpa), ep->plen);
570 } else
571 if (ep->plen)
572 memcpy(mpa->private_data,
573 ep->mpa_pkt + sizeof(*mpa), ep->plen);
542 574
543 /* 575 /*
544 * Reference the mpa skb. This ensures the data area 576 * Reference the mpa skb. This ensures the data area
@@ -562,10 +594,13 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
562 struct fw_ofld_tx_data_wr *req; 594 struct fw_ofld_tx_data_wr *req;
563 struct mpa_message *mpa; 595 struct mpa_message *mpa;
564 struct sk_buff *skb; 596 struct sk_buff *skb;
597 struct mpa_v2_conn_params mpa_v2_params;
565 598
566 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); 599 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
567 600
568 mpalen = sizeof(*mpa) + plen; 601 mpalen = sizeof(*mpa) + plen;
602 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
603 mpalen += sizeof(struct mpa_v2_conn_params);
569 wrlen = roundup(mpalen + sizeof *req, 16); 604 wrlen = roundup(mpalen + sizeof *req, 16);
570 605
571 skb = get_skb(NULL, wrlen, GFP_KERNEL); 606 skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -595,8 +630,29 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
595 mpa->flags = MPA_REJECT; 630 mpa->flags = MPA_REJECT;
596 mpa->revision = mpa_rev; 631 mpa->revision = mpa_rev;
597 mpa->private_data_size = htons(plen); 632 mpa->private_data_size = htons(plen);
598 if (plen) 633
599 memcpy(mpa->private_data, pdata, plen); 634 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
635 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
636 mpa->private_data_size +=
637 htons(sizeof(struct mpa_v2_conn_params));
638 mpa_v2_params.ird = htons(((u16)ep->ird) |
639 (peer2peer ? MPA_V2_PEER2PEER_MODEL :
640 0));
641 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
642 (p2p_type ==
643 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
644 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
645 FW_RI_INIT_P2PTYPE_READ_REQ ?
646 MPA_V2_RDMA_READ_RTR : 0) : 0));
647 memcpy(mpa->private_data, &mpa_v2_params,
648 sizeof(struct mpa_v2_conn_params));
649
650 if (ep->plen)
651 memcpy(mpa->private_data +
652 sizeof(struct mpa_v2_conn_params), pdata, plen);
653 } else
654 if (plen)
655 memcpy(mpa->private_data, pdata, plen);
600 656
601 /* 657 /*
602 * Reference the mpa skb again. This ensures the data area 658 * Reference the mpa skb again. This ensures the data area
@@ -617,10 +673,13 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
617 struct fw_ofld_tx_data_wr *req; 673 struct fw_ofld_tx_data_wr *req;
618 struct mpa_message *mpa; 674 struct mpa_message *mpa;
619 struct sk_buff *skb; 675 struct sk_buff *skb;
676 struct mpa_v2_conn_params mpa_v2_params;
620 677
621 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); 678 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
622 679
623 mpalen = sizeof(*mpa) + plen; 680 mpalen = sizeof(*mpa) + plen;
681 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
682 mpalen += sizeof(struct mpa_v2_conn_params);
624 wrlen = roundup(mpalen + sizeof *req, 16); 683 wrlen = roundup(mpalen + sizeof *req, 16);
625 684
626 skb = get_skb(NULL, wrlen, GFP_KERNEL); 685 skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -649,10 +708,36 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
649 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 708 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
650 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 709 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
651 (markers_enabled ? MPA_MARKERS : 0); 710 (markers_enabled ? MPA_MARKERS : 0);
652 mpa->revision = mpa_rev; 711 mpa->revision = ep->mpa_attr.version;
653 mpa->private_data_size = htons(plen); 712 mpa->private_data_size = htons(plen);
654 if (plen) 713
655 memcpy(mpa->private_data, pdata, plen); 714 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
715 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
716 mpa->private_data_size +=
717 htons(sizeof(struct mpa_v2_conn_params));
718 mpa_v2_params.ird = htons((u16)ep->ird);
719 mpa_v2_params.ord = htons((u16)ep->ord);
720 if (peer2peer && (ep->mpa_attr.p2p_type !=
721 FW_RI_INIT_P2PTYPE_DISABLED)) {
722 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
723
724 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
725 mpa_v2_params.ord |=
726 htons(MPA_V2_RDMA_WRITE_RTR);
727 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
728 mpa_v2_params.ord |=
729 htons(MPA_V2_RDMA_READ_RTR);
730 }
731
732 memcpy(mpa->private_data, &mpa_v2_params,
733 sizeof(struct mpa_v2_conn_params));
734
735 if (ep->plen)
736 memcpy(mpa->private_data +
737 sizeof(struct mpa_v2_conn_params), pdata, plen);
738 } else
739 if (plen)
740 memcpy(mpa->private_data, pdata, plen);
656 741
657 /* 742 /*
658 * Reference the mpa skb. This ensures the data area 743 * Reference the mpa skb. This ensures the data area
@@ -695,7 +780,10 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
695 780
696 /* start MPA negotiation */ 781 /* start MPA negotiation */
697 send_flowc(ep, NULL); 782 send_flowc(ep, NULL);
698 send_mpa_req(ep, skb); 783 if (ep->retry_with_mpa_v1)
784 send_mpa_req(ep, skb, 1);
785 else
786 send_mpa_req(ep, skb, mpa_rev);
699 787
700 return 0; 788 return 0;
701} 789}
@@ -769,8 +857,19 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
769 event.remote_addr = ep->com.remote_addr; 857 event.remote_addr = ep->com.remote_addr;
770 858
771 if ((status == 0) || (status == -ECONNREFUSED)) { 859 if ((status == 0) || (status == -ECONNREFUSED)) {
772 event.private_data_len = ep->plen; 860 if (!ep->tried_with_mpa_v1) {
773 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 861 /* this means MPA_v2 is used */
862 event.private_data_len = ep->plen -
863 sizeof(struct mpa_v2_conn_params);
864 event.private_data = ep->mpa_pkt +
865 sizeof(struct mpa_message) +
866 sizeof(struct mpa_v2_conn_params);
867 } else {
868 /* this means MPA_v1 is used */
869 event.private_data_len = ep->plen;
870 event.private_data = ep->mpa_pkt +
871 sizeof(struct mpa_message);
872 }
774 } 873 }
775 874
776 PDBG("%s ep %p tid %u status %d\n", __func__, ep, 875 PDBG("%s ep %p tid %u status %d\n", __func__, ep,
@@ -793,9 +892,22 @@ static void connect_request_upcall(struct c4iw_ep *ep)
793 event.event = IW_CM_EVENT_CONNECT_REQUEST; 892 event.event = IW_CM_EVENT_CONNECT_REQUEST;
794 event.local_addr = ep->com.local_addr; 893 event.local_addr = ep->com.local_addr;
795 event.remote_addr = ep->com.remote_addr; 894 event.remote_addr = ep->com.remote_addr;
796 event.private_data_len = ep->plen;
797 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
798 event.provider_data = ep; 895 event.provider_data = ep;
896 if (!ep->tried_with_mpa_v1) {
897 /* this means MPA_v2 is used */
898 event.ord = ep->ord;
899 event.ird = ep->ird;
900 event.private_data_len = ep->plen -
901 sizeof(struct mpa_v2_conn_params);
902 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
903 sizeof(struct mpa_v2_conn_params);
904 } else {
905 /* this means MPA_v1 is used. Send max supported */
906 event.ord = c4iw_max_read_depth;
907 event.ird = c4iw_max_read_depth;
908 event.private_data_len = ep->plen;
909 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
910 }
799 if (state_read(&ep->parent_ep->com) != DEAD) { 911 if (state_read(&ep->parent_ep->com) != DEAD) {
800 c4iw_get_ep(&ep->com); 912 c4iw_get_ep(&ep->com);
801 ep->parent_ep->com.cm_id->event_handler( 913 ep->parent_ep->com.cm_id->event_handler(
@@ -813,6 +925,8 @@ static void established_upcall(struct c4iw_ep *ep)
813 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 925 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
814 memset(&event, 0, sizeof(event)); 926 memset(&event, 0, sizeof(event));
815 event.event = IW_CM_EVENT_ESTABLISHED; 927 event.event = IW_CM_EVENT_ESTABLISHED;
928 event.ird = ep->ird;
929 event.ord = ep->ord;
816 if (ep->com.cm_id) { 930 if (ep->com.cm_id) {
817 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 931 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
818 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 932 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -848,7 +962,10 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
848static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) 962static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
849{ 963{
850 struct mpa_message *mpa; 964 struct mpa_message *mpa;
965 struct mpa_v2_conn_params *mpa_v2_params;
851 u16 plen; 966 u16 plen;
967 u16 resp_ird, resp_ord;
968 u8 rtr_mismatch = 0, insuff_ird = 0;
852 struct c4iw_qp_attributes attrs; 969 struct c4iw_qp_attributes attrs;
853 enum c4iw_qp_attr_mask mask; 970 enum c4iw_qp_attr_mask mask;
854 int err; 971 int err;
@@ -888,7 +1005,9 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
888 mpa = (struct mpa_message *) ep->mpa_pkt; 1005 mpa = (struct mpa_message *) ep->mpa_pkt;
889 1006
890 /* Validate MPA header. */ 1007 /* Validate MPA header. */
891 if (mpa->revision != mpa_rev) { 1008 if (mpa->revision > mpa_rev) {
1009 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1010 " Received = %d\n", __func__, mpa_rev, mpa->revision);
892 err = -EPROTO; 1011 err = -EPROTO;
893 goto err; 1012 goto err;
894 } 1013 }
@@ -938,13 +1057,66 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
938 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1057 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
939 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1058 ep->mpa_attr.recv_marker_enabled = markers_enabled;
940 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1059 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
941 ep->mpa_attr.version = mpa_rev; 1060 ep->mpa_attr.version = mpa->revision;
942 ep->mpa_attr.p2p_type = peer2peer ? p2p_type : 1061 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
943 FW_RI_INIT_P2PTYPE_DISABLED; 1062
1063 if (mpa->revision == 2) {
1064 ep->mpa_attr.enhanced_rdma_conn =
1065 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1066 if (ep->mpa_attr.enhanced_rdma_conn) {
1067 mpa_v2_params = (struct mpa_v2_conn_params *)
1068 (ep->mpa_pkt + sizeof(*mpa));
1069 resp_ird = ntohs(mpa_v2_params->ird) &
1070 MPA_V2_IRD_ORD_MASK;
1071 resp_ord = ntohs(mpa_v2_params->ord) &
1072 MPA_V2_IRD_ORD_MASK;
1073
1074 /*
1075 * This is a double-check. Ideally, below checks are
1076 * not required since ird/ord stuff has been taken
1077 * care of in c4iw_accept_cr
1078 */
1079 if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
1080 err = -ENOMEM;
1081 ep->ird = resp_ord;
1082 ep->ord = resp_ird;
1083 insuff_ird = 1;
1084 }
1085
1086 if (ntohs(mpa_v2_params->ird) &
1087 MPA_V2_PEER2PEER_MODEL) {
1088 if (ntohs(mpa_v2_params->ord) &
1089 MPA_V2_RDMA_WRITE_RTR)
1090 ep->mpa_attr.p2p_type =
1091 FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1092 else if (ntohs(mpa_v2_params->ord) &
1093 MPA_V2_RDMA_READ_RTR)
1094 ep->mpa_attr.p2p_type =
1095 FW_RI_INIT_P2PTYPE_READ_REQ;
1096 }
1097 }
1098 } else if (mpa->revision == 1)
1099 if (peer2peer)
1100 ep->mpa_attr.p2p_type = p2p_type;
1101
944 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " 1102 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
945 "xmit_marker_enabled=%d, version=%d\n", __func__, 1103 "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
946 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1104 "%d\n", __func__, ep->mpa_attr.crc_enabled,
947 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 1105 ep->mpa_attr.recv_marker_enabled,
1106 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1107 ep->mpa_attr.p2p_type, p2p_type);
1108
1109 /*
1110 * If responder's RTR does not match with that of initiator, assign
1111 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1112 * generated when moving QP to RTS state.
1113 * A TERM message will be sent after QP has moved to RTS state
1114 */
1115 if ((ep->mpa_attr.version == 2) &&
1116 (ep->mpa_attr.p2p_type != p2p_type)) {
1117 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1118 rtr_mismatch = 1;
1119 }
948 1120
949 attrs.mpa_attr = ep->mpa_attr; 1121 attrs.mpa_attr = ep->mpa_attr;
950 attrs.max_ird = ep->ird; 1122 attrs.max_ird = ep->ird;
@@ -961,6 +1133,39 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
961 ep->com.qp, mask, &attrs, 1); 1133 ep->com.qp, mask, &attrs, 1);
962 if (err) 1134 if (err)
963 goto err; 1135 goto err;
1136
1137 /*
1138 * If responder's RTR requirement did not match with what initiator
1139 * supports, generate TERM message
1140 */
1141 if (rtr_mismatch) {
1142 printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1143 attrs.layer_etype = LAYER_MPA | DDP_LLP;
1144 attrs.ecode = MPA_NOMATCH_RTR;
1145 attrs.next_state = C4IW_QP_STATE_TERMINATE;
1146 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1147 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1148 err = -ENOMEM;
1149 goto out;
1150 }
1151
1152 /*
1153 * Generate TERM if initiator IRD is not sufficient for responder
1154 * provided ORD. Currently, we do the same behaviour even when
1155 * responder provided IRD is also not sufficient as regards to
1156 * initiator ORD.
1157 */
1158 if (insuff_ird) {
1159 printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1160 __func__);
1161 attrs.layer_etype = LAYER_MPA | DDP_LLP;
1162 attrs.ecode = MPA_INSUFF_IRD;
1163 attrs.next_state = C4IW_QP_STATE_TERMINATE;
1164 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1165 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1166 err = -ENOMEM;
1167 goto out;
1168 }
964 goto out; 1169 goto out;
965err: 1170err:
966 state_set(&ep->com, ABORTING); 1171 state_set(&ep->com, ABORTING);
@@ -973,6 +1178,7 @@ out:
973static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) 1178static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
974{ 1179{
975 struct mpa_message *mpa; 1180 struct mpa_message *mpa;
1181 struct mpa_v2_conn_params *mpa_v2_params;
976 u16 plen; 1182 u16 plen;
977 1183
978 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1184 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
@@ -1013,7 +1219,9 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1013 /* 1219 /*
1014 * Validate MPA Header. 1220 * Validate MPA Header.
1015 */ 1221 */
1016 if (mpa->revision != mpa_rev) { 1222 if (mpa->revision > mpa_rev) {
1223 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1224 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1017 abort_connection(ep, skb, GFP_KERNEL); 1225 abort_connection(ep, skb, GFP_KERNEL);
1018 return; 1226 return;
1019 } 1227 }
@@ -1056,9 +1264,37 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1056 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1264 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1057 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1265 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1058 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1266 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1059 ep->mpa_attr.version = mpa_rev; 1267 ep->mpa_attr.version = mpa->revision;
1060 ep->mpa_attr.p2p_type = peer2peer ? p2p_type : 1268 if (mpa->revision == 1)
1061 FW_RI_INIT_P2PTYPE_DISABLED; 1269 ep->tried_with_mpa_v1 = 1;
1270 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1271
1272 if (mpa->revision == 2) {
1273 ep->mpa_attr.enhanced_rdma_conn =
1274 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1275 if (ep->mpa_attr.enhanced_rdma_conn) {
1276 mpa_v2_params = (struct mpa_v2_conn_params *)
1277 (ep->mpa_pkt + sizeof(*mpa));
1278 ep->ird = ntohs(mpa_v2_params->ird) &
1279 MPA_V2_IRD_ORD_MASK;
1280 ep->ord = ntohs(mpa_v2_params->ord) &
1281 MPA_V2_IRD_ORD_MASK;
1282 if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1283 if (peer2peer) {
1284 if (ntohs(mpa_v2_params->ord) &
1285 MPA_V2_RDMA_WRITE_RTR)
1286 ep->mpa_attr.p2p_type =
1287 FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1288 else if (ntohs(mpa_v2_params->ord) &
1289 MPA_V2_RDMA_READ_RTR)
1290 ep->mpa_attr.p2p_type =
1291 FW_RI_INIT_P2PTYPE_READ_REQ;
1292 }
1293 }
1294 } else if (mpa->revision == 1)
1295 if (peer2peer)
1296 ep->mpa_attr.p2p_type = p2p_type;
1297
1062 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " 1298 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1063 "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__, 1299 "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
1064 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1300 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
@@ -1550,6 +1786,112 @@ static int is_neg_adv_abort(unsigned int status)
1550 status == CPL_ERR_PERSIST_NEG_ADVICE; 1786 status == CPL_ERR_PERSIST_NEG_ADVICE;
1551} 1787}
1552 1788
1789static int c4iw_reconnect(struct c4iw_ep *ep)
1790{
1791 int err = 0;
1792 struct rtable *rt;
1793 struct net_device *pdev;
1794 struct neighbour *neigh;
1795 int step;
1796
1797 PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
1798 init_timer(&ep->timer);
1799
1800 /*
1801 * Allocate an active TID to initiate a TCP connection.
1802 */
1803 ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
1804 if (ep->atid == -1) {
1805 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
1806 err = -ENOMEM;
1807 goto fail2;
1808 }
1809
1810 /* find a route */
1811 rt = find_route(ep->com.dev,
1812 ep->com.cm_id->local_addr.sin_addr.s_addr,
1813 ep->com.cm_id->remote_addr.sin_addr.s_addr,
1814 ep->com.cm_id->local_addr.sin_port,
1815 ep->com.cm_id->remote_addr.sin_port, 0);
1816 if (!rt) {
1817 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
1818 err = -EHOSTUNREACH;
1819 goto fail3;
1820 }
1821 ep->dst = &rt->dst;
1822
1823 neigh = dst_get_neighbour(ep->dst);
1824
1825 /* get a l2t entry */
1826 if (neigh->dev->flags & IFF_LOOPBACK) {
1827 PDBG("%s LOOPBACK\n", __func__);
1828 pdev = ip_dev_find(&init_net,
1829 ep->com.cm_id->remote_addr.sin_addr.s_addr);
1830 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
1831 neigh, pdev, 0);
1832 ep->mtu = pdev->mtu;
1833 ep->tx_chan = cxgb4_port_chan(pdev);
1834 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1835 step = ep->com.dev->rdev.lldi.ntxq /
1836 ep->com.dev->rdev.lldi.nchan;
1837 ep->txq_idx = cxgb4_port_idx(pdev) * step;
1838 step = ep->com.dev->rdev.lldi.nrxq /
1839 ep->com.dev->rdev.lldi.nchan;
1840 ep->ctrlq_idx = cxgb4_port_idx(pdev);
1841 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
1842 cxgb4_port_idx(pdev) * step];
1843 dev_put(pdev);
1844 } else {
1845 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
1846 neigh, neigh->dev, 0);
1847 ep->mtu = dst_mtu(ep->dst);
1848 ep->tx_chan = cxgb4_port_chan(neigh->dev);
1849 ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
1850 step = ep->com.dev->rdev.lldi.ntxq /
1851 ep->com.dev->rdev.lldi.nchan;
1852 ep->txq_idx = cxgb4_port_idx(neigh->dev) * step;
1853 ep->ctrlq_idx = cxgb4_port_idx(neigh->dev);
1854 step = ep->com.dev->rdev.lldi.nrxq /
1855 ep->com.dev->rdev.lldi.nchan;
1856 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
1857 cxgb4_port_idx(neigh->dev) * step];
1858 }
1859 if (!ep->l2t) {
1860 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
1861 err = -ENOMEM;
1862 goto fail4;
1863 }
1864
1865 PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
1866 __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
1867 ep->l2t->idx);
1868
1869 state_set(&ep->com, CONNECTING);
1870 ep->tos = 0;
1871
1872 /* send connect request to rnic */
1873 err = send_connect(ep);
1874 if (!err)
1875 goto out;
1876
1877 cxgb4_l2t_release(ep->l2t);
1878fail4:
1879 dst_release(ep->dst);
1880fail3:
1881 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
1882fail2:
1883 /*
1884 * remember to send notification to upper layer.
1885 * We are in here so the upper layer is not aware that this is
1886 * re-connect attempt and so, upper layer is still waiting for
1887 * response of 1st connect request.
1888 */
1889 connect_reply_upcall(ep, -ECONNRESET);
1890 c4iw_put_ep(&ep->com);
1891out:
1892 return err;
1893}
1894
1553static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) 1895static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1554{ 1896{
1555 struct cpl_abort_req_rss *req = cplhdr(skb); 1897 struct cpl_abort_req_rss *req = cplhdr(skb);
@@ -1573,8 +1915,11 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1573 1915
1574 /* 1916 /*
1575 * Wake up any threads in rdma_init() or rdma_fini(). 1917 * Wake up any threads in rdma_init() or rdma_fini().
1918 * However, this is not needed if com state is just
1919 * MPA_REQ_SENT
1576 */ 1920 */
1577 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); 1921 if (ep->com.state != MPA_REQ_SENT)
1922 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1578 1923
1579 mutex_lock(&ep->com.mutex); 1924 mutex_lock(&ep->com.mutex);
1580 switch (ep->com.state) { 1925 switch (ep->com.state) {
@@ -1585,7 +1930,21 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1585 break; 1930 break;
1586 case MPA_REQ_SENT: 1931 case MPA_REQ_SENT:
1587 stop_ep_timer(ep); 1932 stop_ep_timer(ep);
1588 connect_reply_upcall(ep, -ECONNRESET); 1933 if (mpa_rev == 2 && ep->tried_with_mpa_v1)
1934 connect_reply_upcall(ep, -ECONNRESET);
1935 else {
1936 /*
1937 * we just don't send notification upwards because we
1938 * want to retry with mpa_v1 without upper layers even
1939 * knowing it.
1940 *
1941 * do some housekeeping so as to re-initiate the
1942 * connection
1943 */
1944 PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
1945 mpa_rev);
1946 ep->retry_with_mpa_v1 = 1;
1947 }
1589 break; 1948 break;
1590 case MPA_REP_SENT: 1949 case MPA_REP_SENT:
1591 break; 1950 break;
@@ -1621,7 +1980,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1621 dst_confirm(ep->dst); 1980 dst_confirm(ep->dst);
1622 if (ep->com.state != ABORTING) { 1981 if (ep->com.state != ABORTING) {
1623 __state_set(&ep->com, DEAD); 1982 __state_set(&ep->com, DEAD);
1624 release = 1; 1983 /* we don't release if we want to retry with mpa_v1 */
1984 if (!ep->retry_with_mpa_v1)
1985 release = 1;
1625 } 1986 }
1626 mutex_unlock(&ep->com.mutex); 1987 mutex_unlock(&ep->com.mutex);
1627 1988
@@ -1641,6 +2002,15 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1641out: 2002out:
1642 if (release) 2003 if (release)
1643 release_ep_resources(ep); 2004 release_ep_resources(ep);
2005
2006 /* retry with mpa-v1 */
2007 if (ep && ep->retry_with_mpa_v1) {
2008 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
2009 dst_release(ep->dst);
2010 cxgb4_l2t_release(ep->l2t);
2011 c4iw_reconnect(ep);
2012 }
2013
1644 return 0; 2014 return 0;
1645} 2015}
1646 2016
@@ -1792,18 +2162,40 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1792 goto err; 2162 goto err;
1793 } 2163 }
1794 2164
1795 cm_id->add_ref(cm_id); 2165 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1796 ep->com.cm_id = cm_id; 2166 if (conn_param->ord > ep->ird) {
1797 ep->com.qp = qp; 2167 ep->ird = conn_param->ird;
2168 ep->ord = conn_param->ord;
2169 send_mpa_reject(ep, conn_param->private_data,
2170 conn_param->private_data_len);
2171 abort_connection(ep, NULL, GFP_KERNEL);
2172 err = -ENOMEM;
2173 goto err;
2174 }
2175 if (conn_param->ird > ep->ord) {
2176 if (!ep->ord)
2177 conn_param->ird = 1;
2178 else {
2179 abort_connection(ep, NULL, GFP_KERNEL);
2180 err = -ENOMEM;
2181 goto err;
2182 }
2183 }
1798 2184
2185 }
1799 ep->ird = conn_param->ird; 2186 ep->ird = conn_param->ird;
1800 ep->ord = conn_param->ord; 2187 ep->ord = conn_param->ord;
1801 2188
1802 if (peer2peer && ep->ird == 0) 2189 if (ep->mpa_attr.version != 2)
1803 ep->ird = 1; 2190 if (peer2peer && ep->ird == 0)
2191 ep->ird = 1;
1804 2192
1805 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); 2193 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
1806 2194
2195 cm_id->add_ref(cm_id);
2196 ep->com.cm_id = cm_id;
2197 ep->com.qp = qp;
2198
1807 /* bind QP to EP and move to RTS */ 2199 /* bind QP to EP and move to RTS */
1808 attrs.mpa_attr = ep->mpa_attr; 2200 attrs.mpa_attr = ep->mpa_attr;
1809 attrs.max_ird = ep->ird; 2201 attrs.max_ird = ep->ird;
@@ -1944,6 +2336,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1944 ep->com.dev->rdev.lldi.nchan; 2336 ep->com.dev->rdev.lldi.nchan;
1945 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ 2337 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
1946 cxgb4_port_idx(neigh->dev) * step]; 2338 cxgb4_port_idx(neigh->dev) * step];
2339 ep->retry_with_mpa_v1 = 0;
2340 ep->tried_with_mpa_v1 = 0;
1947 } 2341 }
1948 if (!ep->l2t) { 2342 if (!ep->l2t) {
1949 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); 2343 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
@@ -2323,8 +2717,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
2323 2717
2324 /* 2718 /*
2325 * Wake up any threads in rdma_init() or rdma_fini(). 2719 * Wake up any threads in rdma_init() or rdma_fini().
2720 * However, this is not needed if com state is just
2721 * MPA_REQ_SENT
2326 */ 2722 */
2327 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); 2723 if (ep->com.state != MPA_REQ_SENT)
2724 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2328 sched(dev, skb); 2725 sched(dev, skb);
2329 return 0; 2726 return 0;
2330} 2727}