aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorHariprasad Shenai <hariprasad@chelsio.com>2014-06-06 12:10:43 -0400
committerDavid S. Miller <davem@davemloft.net>2014-06-11 01:49:54 -0400
commit92e7ae71726ca9e16a8a88ebeee14eb5177575a1 (patch)
tree5dc54ac594b4e1a50c90f38518d16b18269030a4 /drivers/infiniband
parentcf38be6d61001b234d5b980d6e98702587638190 (diff)
iw_cxgb4: Choose appropriate hw mtu index and ISS for iWARP connections
Select the appropriate hw mtu index and initial sequence number to optimize hw memory performance. Add new cxgb4_best_aligned_mtu() which allows callers to provide enough information to be used to [possibly] select an MTU which will result in the TCP Data Segment Size (AKA Maximum Segment Size) to be an aligned value. If an RTR message exhange is required, then align the ISS to 8B - 1 + 4, so that after the SYN the send seqno will align on a 4B boundary. The RTR message exchange will leave the send seqno aligned on an 8B boundary. If an RTR is not required, then align the ISS to 8B - 1. The goal is to have the send seqno be 8B aligned when we send the first FPDU. Based on original work by Casey Leedom <leeedom@chelsio.com> and Steve Wise <swise@opengridcomputing.com> Signed-off-by: Casey Leedom <leedom@chelsio.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c73
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h1
2 files changed, 63 insertions, 11 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 1f863a96a480..d2e9f72ec9bf 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -232,12 +232,16 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
232 232
233static void set_emss(struct c4iw_ep *ep, u16 opt) 233static void set_emss(struct c4iw_ep *ep, u16 opt)
234{ 234{
235 ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40; 235 ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] -
236 sizeof(struct iphdr) - sizeof(struct tcphdr);
236 ep->mss = ep->emss; 237 ep->mss = ep->emss;
237 if (GET_TCPOPT_TSTAMP(opt)) 238 if (GET_TCPOPT_TSTAMP(opt))
238 ep->emss -= 12; 239 ep->emss -= 12;
239 if (ep->emss < 128) 240 if (ep->emss < 128)
240 ep->emss = 128; 241 ep->emss = 128;
242 if (ep->emss & 7)
243 PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
244 GET_TCPOPT_MSS(opt), ep->mss, ep->emss);
241 PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt), 245 PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
242 ep->mss, ep->emss); 246 ep->mss, ep->emss);
243} 247}
@@ -528,6 +532,17 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
528 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 532 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
529} 533}
530 534
535static void best_mtu(const unsigned short *mtus, unsigned short mtu,
536 unsigned int *idx, int use_ts)
537{
538 unsigned short hdr_size = sizeof(struct iphdr) +
539 sizeof(struct tcphdr) +
540 (use_ts ? 12 : 0);
541 unsigned short data_size = mtu - hdr_size;
542
543 cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
544}
545
531static int send_connect(struct c4iw_ep *ep) 546static int send_connect(struct c4iw_ep *ep)
532{ 547{
533 struct cpl_act_open_req *req; 548 struct cpl_act_open_req *req;
@@ -565,7 +580,8 @@ static int send_connect(struct c4iw_ep *ep)
565 } 580 }
566 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 581 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
567 582
568 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); 583 best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
584 enable_tcp_timestamps);
569 wscale = compute_wscale(rcv_win); 585 wscale = compute_wscale(rcv_win);
570 opt0 = (nocong ? NO_CONG(1) : 0) | 586 opt0 = (nocong ? NO_CONG(1) : 0) |
571 KEEP_ALIVE(1) | 587 KEEP_ALIVE(1) |
@@ -633,6 +649,13 @@ static int send_connect(struct c4iw_ep *ep)
633 req6->opt2 = cpu_to_be32(opt2); 649 req6->opt2 = cpu_to_be32(opt2);
634 } 650 }
635 } else { 651 } else {
652 u32 isn = (prandom_u32() & ~7UL) - 1;
653
654 opt2 |= T5_OPT_2_VALID;
655 opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
656 if (peer2peer)
657 isn += 4;
658
636 if (ep->com.remote_addr.ss_family == AF_INET) { 659 if (ep->com.remote_addr.ss_family == AF_INET) {
637 t5_req = (struct cpl_t5_act_open_req *) 660 t5_req = (struct cpl_t5_act_open_req *)
638 skb_put(skb, wrlen); 661 skb_put(skb, wrlen);
@@ -649,6 +672,9 @@ static int send_connect(struct c4iw_ep *ep)
649 cxgb4_select_ntuple( 672 cxgb4_select_ntuple(
650 ep->com.dev->rdev.lldi.ports[0], 673 ep->com.dev->rdev.lldi.ports[0],
651 ep->l2t))); 674 ep->l2t)));
675 t5_req->rsvd = cpu_to_be32(isn);
676 PDBG("%s snd_isn %u\n", __func__,
677 be32_to_cpu(t5_req->rsvd));
652 t5_req->opt2 = cpu_to_be32(opt2); 678 t5_req->opt2 = cpu_to_be32(opt2);
653 } else { 679 } else {
654 t5_req6 = (struct cpl_t5_act_open_req6 *) 680 t5_req6 = (struct cpl_t5_act_open_req6 *)
@@ -672,6 +698,9 @@ static int send_connect(struct c4iw_ep *ep)
672 cxgb4_select_ntuple( 698 cxgb4_select_ntuple(
673 ep->com.dev->rdev.lldi.ports[0], 699 ep->com.dev->rdev.lldi.ports[0],
674 ep->l2t)); 700 ep->l2t));
701 t5_req6->rsvd = cpu_to_be32(isn);
702 PDBG("%s snd_isn %u\n", __func__,
703 be32_to_cpu(t5_req6->rsvd));
675 t5_req6->opt2 = cpu_to_be32(opt2); 704 t5_req6->opt2 = cpu_to_be32(opt2);
676 } 705 }
677 } 706 }
@@ -1640,7 +1669,8 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1640 htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); 1669 htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK);
1641 req->tcb.tx_max = (__force __be32) jiffies; 1670 req->tcb.tx_max = (__force __be32) jiffies;
1642 req->tcb.rcv_adv = htons(1); 1671 req->tcb.rcv_adv = htons(1);
1643 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); 1672 best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1673 enable_tcp_timestamps);
1644 wscale = compute_wscale(rcv_win); 1674 wscale = compute_wscale(rcv_win);
1645 req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | 1675 req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) |
1646 (nocong ? NO_CONG(1) : 0) | 1676 (nocong ? NO_CONG(1) : 0) |
@@ -1986,12 +2016,26 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
1986 u64 opt0; 2016 u64 opt0;
1987 u32 opt2; 2017 u32 opt2;
1988 int wscale; 2018 int wscale;
2019 struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
1989 2020
1990 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2021 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1991 BUG_ON(skb_cloned(skb)); 2022 BUG_ON(skb_cloned(skb));
1992 skb_trim(skb, sizeof(*rpl)); 2023
1993 skb_get(skb); 2024 skb_get(skb);
1994 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); 2025 rpl = cplhdr(skb);
2026 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
2027 skb_trim(skb, roundup(sizeof(*rpl5), 16));
2028 rpl5 = (void *)rpl;
2029 INIT_TP_WR(rpl5, ep->hwtid);
2030 } else {
2031 skb_trim(skb, sizeof(*rpl));
2032 INIT_TP_WR(rpl, ep->hwtid);
2033 }
2034 OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2035 ep->hwtid));
2036
2037 best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2038 enable_tcp_timestamps && req->tcpopt.tstamp);
1995 wscale = compute_wscale(rcv_win); 2039 wscale = compute_wscale(rcv_win);
1996 opt0 = (nocong ? NO_CONG(1) : 0) | 2040 opt0 = (nocong ? NO_CONG(1) : 0) |
1997 KEEP_ALIVE(1) | 2041 KEEP_ALIVE(1) |
@@ -2023,14 +2067,18 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2023 opt2 |= CCTRL_ECN(1); 2067 opt2 |= CCTRL_ECN(1);
2024 } 2068 }
2025 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 2069 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
2070 u32 isn = (prandom_u32() & ~7UL) - 1;
2026 opt2 |= T5_OPT_2_VALID; 2071 opt2 |= T5_OPT_2_VALID;
2027 opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); 2072 opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
2073 opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
2074 rpl5 = (void *)rpl;
2075 memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
2076 if (peer2peer)
2077 isn += 4;
2078 rpl5->iss = cpu_to_be32(isn);
2079 PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
2028 } 2080 }
2029 2081
2030 rpl = cplhdr(skb);
2031 INIT_TP_WR(rpl, ep->hwtid);
2032 OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2033 ep->hwtid));
2034 rpl->opt0 = cpu_to_be64(opt0); 2082 rpl->opt0 = cpu_to_be64(opt0);
2035 rpl->opt2 = cpu_to_be32(opt2); 2083 rpl->opt2 = cpu_to_be32(opt2);
2036 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 2084 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
@@ -2095,6 +2143,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2095 int err; 2143 int err;
2096 u16 peer_mss = ntohs(req->tcpopt.mss); 2144 u16 peer_mss = ntohs(req->tcpopt.mss);
2097 int iptype; 2145 int iptype;
2146 unsigned short hdrs;
2098 2147
2099 parent_ep = lookup_stid(t, stid); 2148 parent_ep = lookup_stid(t, stid);
2100 if (!parent_ep) { 2149 if (!parent_ep) {
@@ -2152,8 +2201,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2152 goto reject; 2201 goto reject;
2153 } 2202 }
2154 2203
2155 if (peer_mss && child_ep->mtu > (peer_mss + 40)) 2204 hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
2156 child_ep->mtu = peer_mss + 40; 2205 ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2206 if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2207 child_ep->mtu = peer_mss + hdrs;
2157 2208
2158 state_set(&child_ep->com, CONNECTING); 2209 state_set(&child_ep->com, CONNECTING);
2159 child_ep->com.dev = dev; 2210 child_ep->com.dev = dev;
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 6121ca08fe58..91289a051af9 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -848,6 +848,7 @@ enum { /* TCP congestion control algorithms */
848#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) 848#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL)
849#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) 849#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL)
850 850
851#define CONG_CNTRL_VALID (1 << 18)
851#define T5_OPT_2_VALID (1 << 31) 852#define T5_OPT_2_VALID (1 << 31)
852 853
853#endif /* _T4FW_RI_API_H_ */ 854#endif /* _T4FW_RI_API_H_ */