diff options
author | Ralph Campbell <ralph.campbell@qlogic.com> | 2007-03-15 17:44:51 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-04-18 23:20:55 -0400 |
commit | 3859e39d75b72f35f7d38c618fbbacb39a440c22 (patch) | |
tree | 51d57723574395b54914c08260b9d0a8467a91b1 /drivers/infiniband/hw/ipath | |
parent | 7b21d26ddad6912bf345e8e88a51a5ce98a036ad (diff) |
IB/ipath: Support larger IB_QP_MAX_DEST_RD_ATOMIC and IB_QP_MAX_QP_RD_ATOMIC
This patch adds support for multiple RDMA reads and atomics to be sent
before an ACK is required to be seen by the requester.
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_qp.c | 26 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_rc.c | 800 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ruc.c | 58 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_uc.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ud.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.h | 52 |
7 files changed, 548 insertions, 403 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 64f07b19349f..c122fea9145b 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c | |||
@@ -320,7 +320,8 @@ static void ipath_reset_qp(struct ipath_qp *qp) | |||
320 | qp->remote_qpn = 0; | 320 | qp->remote_qpn = 0; |
321 | qp->qkey = 0; | 321 | qp->qkey = 0; |
322 | qp->qp_access_flags = 0; | 322 | qp->qp_access_flags = 0; |
323 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | 323 | qp->s_busy = 0; |
324 | qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR; | ||
324 | qp->s_hdrwords = 0; | 325 | qp->s_hdrwords = 0; |
325 | qp->s_psn = 0; | 326 | qp->s_psn = 0; |
326 | qp->r_psn = 0; | 327 | qp->r_psn = 0; |
@@ -333,7 +334,6 @@ static void ipath_reset_qp(struct ipath_qp *qp) | |||
333 | qp->r_state = IB_OPCODE_UC_SEND_LAST; | 334 | qp->r_state = IB_OPCODE_UC_SEND_LAST; |
334 | } | 335 | } |
335 | qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; | 336 | qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; |
336 | qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; | ||
337 | qp->r_nak_state = 0; | 337 | qp->r_nak_state = 0; |
338 | qp->r_wrid_valid = 0; | 338 | qp->r_wrid_valid = 0; |
339 | qp->s_rnr_timeout = 0; | 339 | qp->s_rnr_timeout = 0; |
@@ -344,6 +344,10 @@ static void ipath_reset_qp(struct ipath_qp *qp) | |||
344 | qp->s_ssn = 1; | 344 | qp->s_ssn = 1; |
345 | qp->s_lsn = 0; | 345 | qp->s_lsn = 0; |
346 | qp->s_wait_credit = 0; | 346 | qp->s_wait_credit = 0; |
347 | memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); | ||
348 | qp->r_head_ack_queue = 0; | ||
349 | qp->s_tail_ack_queue = 0; | ||
350 | qp->s_num_rd_atomic = 0; | ||
347 | if (qp->r_rq.wq) { | 351 | if (qp->r_rq.wq) { |
348 | qp->r_rq.wq->head = 0; | 352 | qp->r_rq.wq->head = 0; |
349 | qp->r_rq.wq->tail = 0; | 353 | qp->r_rq.wq->tail = 0; |
@@ -503,6 +507,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | |||
503 | attr->path_mig_state != IB_MIG_REARM) | 507 | attr->path_mig_state != IB_MIG_REARM) |
504 | goto inval; | 508 | goto inval; |
505 | 509 | ||
510 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) | ||
511 | if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC) | ||
512 | goto inval; | ||
513 | |||
506 | switch (new_state) { | 514 | switch (new_state) { |
507 | case IB_QPS_RESET: | 515 | case IB_QPS_RESET: |
508 | ipath_reset_qp(qp); | 516 | ipath_reset_qp(qp); |
@@ -559,6 +567,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | |||
559 | if (attr_mask & IB_QP_QKEY) | 567 | if (attr_mask & IB_QP_QKEY) |
560 | qp->qkey = attr->qkey; | 568 | qp->qkey = attr->qkey; |
561 | 569 | ||
570 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) | ||
571 | qp->r_max_rd_atomic = attr->max_dest_rd_atomic; | ||
572 | |||
573 | if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) | ||
574 | qp->s_max_rd_atomic = attr->max_rd_atomic; | ||
575 | |||
562 | qp->state = new_state; | 576 | qp->state = new_state; |
563 | spin_unlock_irqrestore(&qp->s_lock, flags); | 577 | spin_unlock_irqrestore(&qp->s_lock, flags); |
564 | 578 | ||
@@ -598,8 +612,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | |||
598 | attr->alt_pkey_index = 0; | 612 | attr->alt_pkey_index = 0; |
599 | attr->en_sqd_async_notify = 0; | 613 | attr->en_sqd_async_notify = 0; |
600 | attr->sq_draining = 0; | 614 | attr->sq_draining = 0; |
601 | attr->max_rd_atomic = 1; | 615 | attr->max_rd_atomic = qp->s_max_rd_atomic; |
602 | attr->max_dest_rd_atomic = 1; | 616 | attr->max_dest_rd_atomic = qp->r_max_rd_atomic; |
603 | attr->min_rnr_timer = qp->r_min_rnr_timer; | 617 | attr->min_rnr_timer = qp->r_min_rnr_timer; |
604 | attr->port_num = 1; | 618 | attr->port_num = 1; |
605 | attr->timeout = qp->timeout; | 619 | attr->timeout = qp->timeout; |
@@ -614,7 +628,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | |||
614 | init_attr->recv_cq = qp->ibqp.recv_cq; | 628 | init_attr->recv_cq = qp->ibqp.recv_cq; |
615 | init_attr->srq = qp->ibqp.srq; | 629 | init_attr->srq = qp->ibqp.srq; |
616 | init_attr->cap = attr->cap; | 630 | init_attr->cap = attr->cap; |
617 | if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) | 631 | if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR) |
618 | init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; | 632 | init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; |
619 | else | 633 | else |
620 | init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; | 634 | init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; |
@@ -786,7 +800,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, | |||
786 | qp->s_size = init_attr->cap.max_send_wr + 1; | 800 | qp->s_size = init_attr->cap.max_send_wr + 1; |
787 | qp->s_max_sge = init_attr->cap.max_send_sge; | 801 | qp->s_max_sge = init_attr->cap.max_send_sge; |
788 | if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) | 802 | if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) |
789 | qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; | 803 | qp->s_flags = IPATH_S_SIGNAL_REQ_WR; |
790 | else | 804 | else |
791 | qp->s_flags = 0; | 805 | qp->s_flags = 0; |
792 | dev = to_idev(ibpd->device); | 806 | dev = to_idev(ibpd->device); |
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5ff20cb04494..c9c3d7cd2923 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c | |||
@@ -37,6 +37,19 @@ | |||
37 | /* cut down ridiculously long IB macro names */ | 37 | /* cut down ridiculously long IB macro names */ |
38 | #define OP(x) IB_OPCODE_RC_##x | 38 | #define OP(x) IB_OPCODE_RC_##x |
39 | 39 | ||
40 | static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, | ||
41 | u32 psn, u32 pmtu) | ||
42 | { | ||
43 | u32 len; | ||
44 | |||
45 | len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; | ||
46 | ss->sge = wqe->sg_list[0]; | ||
47 | ss->sg_list = wqe->sg_list + 1; | ||
48 | ss->num_sge = wqe->wr.num_sge; | ||
49 | ipath_skip_sge(ss, len); | ||
50 | return wqe->length - len; | ||
51 | } | ||
52 | |||
40 | /** | 53 | /** |
41 | * ipath_init_restart- initialize the qp->s_sge after a restart | 54 | * ipath_init_restart- initialize the qp->s_sge after a restart |
42 | * @qp: the QP who's SGE we're restarting | 55 | * @qp: the QP who's SGE we're restarting |
@@ -47,15 +60,9 @@ | |||
47 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) | 60 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) |
48 | { | 61 | { |
49 | struct ipath_ibdev *dev; | 62 | struct ipath_ibdev *dev; |
50 | u32 len; | ||
51 | 63 | ||
52 | len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * | 64 | qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, |
53 | ib_mtu_enum_to_int(qp->path_mtu); | 65 | ib_mtu_enum_to_int(qp->path_mtu)); |
54 | qp->s_sge.sge = wqe->sg_list[0]; | ||
55 | qp->s_sge.sg_list = wqe->sg_list + 1; | ||
56 | qp->s_sge.num_sge = wqe->wr.num_sge; | ||
57 | ipath_skip_sge(&qp->s_sge, len); | ||
58 | qp->s_len = wqe->length - len; | ||
59 | dev = to_idev(qp->ibqp.device); | 66 | dev = to_idev(qp->ibqp.device); |
60 | spin_lock(&dev->pending_lock); | 67 | spin_lock(&dev->pending_lock); |
61 | if (list_empty(&qp->timerwait)) | 68 | if (list_empty(&qp->timerwait)) |
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) | |||
70 | * @ohdr: a pointer to the IB header being constructed | 77 | * @ohdr: a pointer to the IB header being constructed |
71 | * @pmtu: the path MTU | 78 | * @pmtu: the path MTU |
72 | * | 79 | * |
73 | * Return bth0 if constructed; otherwise, return 0. | 80 | * Return 1 if constructed; otherwise, return 0. |
81 | * Note that we are in the responder's side of the QP context. | ||
74 | * Note the QP s_lock must be held. | 82 | * Note the QP s_lock must be held. |
75 | */ | 83 | */ |
76 | u32 ipath_make_rc_ack(struct ipath_qp *qp, | 84 | static int ipath_make_rc_ack(struct ipath_qp *qp, |
77 | struct ipath_other_headers *ohdr, | 85 | struct ipath_other_headers *ohdr, |
78 | u32 pmtu) | 86 | u32 pmtu, u32 *bth0p, u32 *bth2p) |
79 | { | 87 | { |
88 | struct ipath_ack_entry *e; | ||
80 | u32 hwords; | 89 | u32 hwords; |
81 | u32 len; | 90 | u32 len; |
82 | u32 bth0; | 91 | u32 bth0; |
92 | u32 bth2; | ||
83 | 93 | ||
84 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | 94 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ |
85 | hwords = 5; | 95 | hwords = 5; |
86 | 96 | ||
87 | /* | ||
88 | * Send a response. Note that we are in the responder's | ||
89 | * side of the QP context. | ||
90 | */ | ||
91 | switch (qp->s_ack_state) { | 97 | switch (qp->s_ack_state) { |
92 | case OP(RDMA_READ_REQUEST): | 98 | case OP(RDMA_READ_RESPONSE_LAST): |
93 | qp->s_cur_sge = &qp->s_rdma_sge; | 99 | case OP(RDMA_READ_RESPONSE_ONLY): |
94 | len = qp->s_rdma_len; | 100 | case OP(ATOMIC_ACKNOWLEDGE): |
95 | if (len > pmtu) { | 101 | qp->s_ack_state = OP(ACKNOWLEDGE); |
96 | len = pmtu; | 102 | /* FALLTHROUGH */ |
97 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); | 103 | case OP(ACKNOWLEDGE): |
98 | } else | 104 | /* Check for no next entry in the queue. */ |
99 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); | 105 | if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { |
100 | qp->s_rdma_len -= len; | 106 | if (qp->s_flags & IPATH_S_ACK_PENDING) |
107 | goto normal; | ||
108 | goto bail; | ||
109 | } | ||
110 | |||
111 | e = &qp->s_ack_queue[qp->s_tail_ack_queue]; | ||
112 | if (e->opcode == OP(RDMA_READ_REQUEST)) { | ||
113 | /* Copy SGE state in case we need to resend */ | ||
114 | qp->s_ack_rdma_sge = e->rdma_sge; | ||
115 | qp->s_cur_sge = &qp->s_ack_rdma_sge; | ||
116 | len = e->rdma_sge.sge.sge_length; | ||
117 | if (len > pmtu) { | ||
118 | len = pmtu; | ||
119 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); | ||
120 | } else { | ||
121 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); | ||
122 | if (++qp->s_tail_ack_queue > | ||
123 | IPATH_MAX_RDMA_ATOMIC) | ||
124 | qp->s_tail_ack_queue = 0; | ||
125 | } | ||
126 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
127 | hwords++; | ||
128 | qp->s_ack_rdma_psn = e->psn; | ||
129 | bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; | ||
130 | } else { | ||
131 | /* COMPARE_SWAP or FETCH_ADD */ | ||
132 | qp->s_cur_sge = NULL; | ||
133 | len = 0; | ||
134 | qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); | ||
135 | ohdr->u.at.aeth = ipath_compute_aeth(qp); | ||
136 | ohdr->u.at.atomic_ack_eth[0] = | ||
137 | cpu_to_be32(e->atomic_data >> 32); | ||
138 | ohdr->u.at.atomic_ack_eth[1] = | ||
139 | cpu_to_be32(e->atomic_data); | ||
140 | hwords += sizeof(ohdr->u.at) / sizeof(u32); | ||
141 | bth2 = e->psn; | ||
142 | if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) | ||
143 | qp->s_tail_ack_queue = 0; | ||
144 | } | ||
101 | bth0 = qp->s_ack_state << 24; | 145 | bth0 = qp->s_ack_state << 24; |
102 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
103 | hwords++; | ||
104 | break; | 146 | break; |
105 | 147 | ||
106 | case OP(RDMA_READ_RESPONSE_FIRST): | 148 | case OP(RDMA_READ_RESPONSE_FIRST): |
107 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); | 149 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); |
108 | /* FALLTHROUGH */ | 150 | /* FALLTHROUGH */ |
109 | case OP(RDMA_READ_RESPONSE_MIDDLE): | 151 | case OP(RDMA_READ_RESPONSE_MIDDLE): |
110 | qp->s_cur_sge = &qp->s_rdma_sge; | 152 | len = qp->s_ack_rdma_sge.sge.sge_length; |
111 | len = qp->s_rdma_len; | ||
112 | if (len > pmtu) | 153 | if (len > pmtu) |
113 | len = pmtu; | 154 | len = pmtu; |
114 | else { | 155 | else { |
115 | ohdr->u.aeth = ipath_compute_aeth(qp); | 156 | ohdr->u.aeth = ipath_compute_aeth(qp); |
116 | hwords++; | 157 | hwords++; |
117 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | 158 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); |
159 | if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) | ||
160 | qp->s_tail_ack_queue = 0; | ||
118 | } | 161 | } |
119 | qp->s_rdma_len -= len; | ||
120 | bth0 = qp->s_ack_state << 24; | 162 | bth0 = qp->s_ack_state << 24; |
121 | break; | 163 | bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; |
122 | |||
123 | case OP(RDMA_READ_RESPONSE_LAST): | ||
124 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
125 | /* | ||
126 | * We have to prevent new requests from changing | ||
127 | * the r_sge state while a ipath_verbs_send() | ||
128 | * is in progress. | ||
129 | */ | ||
130 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
131 | bth0 = 0; | ||
132 | goto bail; | ||
133 | |||
134 | case OP(COMPARE_SWAP): | ||
135 | case OP(FETCH_ADD): | ||
136 | qp->s_cur_sge = NULL; | ||
137 | len = 0; | ||
138 | /* | ||
139 | * Set the s_ack_state so the receive interrupt handler | ||
140 | * won't try to send an ACK (out of order) until this one | ||
141 | * is actually sent. | ||
142 | */ | ||
143 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | ||
144 | bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24; | ||
145 | ohdr->u.at.aeth = ipath_compute_aeth(qp); | ||
146 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); | ||
147 | hwords += sizeof(ohdr->u.at) / 4; | ||
148 | break; | 164 | break; |
149 | 165 | ||
150 | default: | 166 | default: |
151 | /* Send a regular ACK. */ | 167 | normal: |
152 | qp->s_cur_sge = NULL; | ||
153 | len = 0; | ||
154 | /* | 168 | /* |
155 | * Set the s_ack_state so the receive interrupt handler | 169 | * Send a regular ACK. |
156 | * won't try to send an ACK (out of order) until this one | 170 | * Set the s_ack_state so we wait until after sending |
157 | * is actually sent. | 171 | * the ACK before setting s_ack_state to ACKNOWLEDGE |
172 | * (see above). | ||
158 | */ | 173 | */ |
159 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | 174 | qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); |
160 | bth0 = OP(ACKNOWLEDGE) << 24; | 175 | qp->s_flags &= ~IPATH_S_ACK_PENDING; |
176 | qp->s_cur_sge = NULL; | ||
161 | if (qp->s_nak_state) | 177 | if (qp->s_nak_state) |
162 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | | 178 | ohdr->u.aeth = |
163 | (qp->s_nak_state << | 179 | cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | |
164 | IPATH_AETH_CREDIT_SHIFT)); | 180 | (qp->s_nak_state << |
181 | IPATH_AETH_CREDIT_SHIFT)); | ||
165 | else | 182 | else |
166 | ohdr->u.aeth = ipath_compute_aeth(qp); | 183 | ohdr->u.aeth = ipath_compute_aeth(qp); |
167 | hwords++; | 184 | hwords++; |
185 | len = 0; | ||
186 | bth0 = OP(ACKNOWLEDGE) << 24; | ||
187 | bth2 = qp->s_ack_psn & IPATH_PSN_MASK; | ||
168 | } | 188 | } |
169 | qp->s_hdrwords = hwords; | 189 | qp->s_hdrwords = hwords; |
170 | qp->s_cur_size = len; | 190 | qp->s_cur_size = len; |
191 | *bth0p = bth0; | ||
192 | *bth2p = bth2; | ||
193 | return 1; | ||
171 | 194 | ||
172 | bail: | 195 | bail: |
173 | return bth0; | 196 | return 0; |
174 | } | 197 | } |
175 | 198 | ||
176 | /** | 199 | /** |
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
197 | u32 bth2; | 220 | u32 bth2; |
198 | char newreq; | 221 | char newreq; |
199 | 222 | ||
223 | /* Sending responses has higher priority over sending requests. */ | ||
224 | if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || | ||
225 | (qp->s_flags & IPATH_S_ACK_PENDING) || | ||
226 | qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) && | ||
227 | ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) | ||
228 | goto done; | ||
229 | |||
200 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || | 230 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || |
201 | qp->s_rnr_timeout) | 231 | qp->s_rnr_timeout) |
202 | goto done; | 232 | goto bail; |
203 | 233 | ||
204 | /* Limit the number of packets sent without an ACK. */ | 234 | /* Limit the number of packets sent without an ACK. */ |
205 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { | 235 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { |
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
210 | list_add_tail(&qp->timerwait, | 240 | list_add_tail(&qp->timerwait, |
211 | &dev->pending[dev->pending_index]); | 241 | &dev->pending[dev->pending_index]); |
212 | spin_unlock(&dev->pending_lock); | 242 | spin_unlock(&dev->pending_lock); |
213 | goto done; | 243 | goto bail; |
214 | } | 244 | } |
215 | 245 | ||
216 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | 246 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ |
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
232 | if (qp->s_cur == qp->s_tail) { | 262 | if (qp->s_cur == qp->s_tail) { |
233 | /* Check if send work queue is empty. */ | 263 | /* Check if send work queue is empty. */ |
234 | if (qp->s_tail == qp->s_head) | 264 | if (qp->s_tail == qp->s_head) |
235 | goto done; | 265 | goto bail; |
266 | /* | ||
267 | * If a fence is requested, wait for previous | ||
268 | * RDMA read and atomic operations to finish. | ||
269 | */ | ||
270 | if ((wqe->wr.send_flags & IB_SEND_FENCE) && | ||
271 | qp->s_num_rd_atomic) { | ||
272 | qp->s_flags |= IPATH_S_FENCE_PENDING; | ||
273 | goto bail; | ||
274 | } | ||
236 | wqe->psn = qp->s_next_psn; | 275 | wqe->psn = qp->s_next_psn; |
237 | newreq = 1; | 276 | newreq = 1; |
238 | } | 277 | } |
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
250 | /* If no credit, return. */ | 289 | /* If no credit, return. */ |
251 | if (qp->s_lsn != (u32) -1 && | 290 | if (qp->s_lsn != (u32) -1 && |
252 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) | 291 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) |
253 | goto done; | 292 | goto bail; |
254 | wqe->lpsn = wqe->psn; | 293 | wqe->lpsn = wqe->psn; |
255 | if (len > pmtu) { | 294 | if (len > pmtu) { |
256 | wqe->lpsn += (len - 1) / pmtu; | 295 | wqe->lpsn += (len - 1) / pmtu; |
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
281 | /* If no credit, return. */ | 320 | /* If no credit, return. */ |
282 | if (qp->s_lsn != (u32) -1 && | 321 | if (qp->s_lsn != (u32) -1 && |
283 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) | 322 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) |
284 | goto done; | 323 | goto bail; |
285 | ohdr->u.rc.reth.vaddr = | 324 | ohdr->u.rc.reth.vaddr = |
286 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | 325 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); |
287 | ohdr->u.rc.reth.rkey = | 326 | ohdr->u.rc.reth.rkey = |
288 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 327 | cpu_to_be32(wqe->wr.wr.rdma.rkey); |
289 | ohdr->u.rc.reth.length = cpu_to_be32(len); | 328 | ohdr->u.rc.reth.length = cpu_to_be32(len); |
290 | hwords += sizeof(struct ib_reth) / 4; | 329 | hwords += sizeof(struct ib_reth) / sizeof(u32); |
291 | wqe->lpsn = wqe->psn; | 330 | wqe->lpsn = wqe->psn; |
292 | if (len > pmtu) { | 331 | if (len > pmtu) { |
293 | wqe->lpsn += (len - 1) / pmtu; | 332 | wqe->lpsn += (len - 1) / pmtu; |
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
312 | break; | 351 | break; |
313 | 352 | ||
314 | case IB_WR_RDMA_READ: | 353 | case IB_WR_RDMA_READ: |
315 | ohdr->u.rc.reth.vaddr = | 354 | /* |
316 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | 355 | * Don't allow more operations to be started |
317 | ohdr->u.rc.reth.rkey = | 356 | * than the QP limits allow. |
318 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 357 | */ |
319 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
320 | qp->s_state = OP(RDMA_READ_REQUEST); | ||
321 | hwords += sizeof(ohdr->u.rc.reth) / 4; | ||
322 | if (newreq) { | 358 | if (newreq) { |
359 | if (qp->s_num_rd_atomic >= | ||
360 | qp->s_max_rd_atomic) { | ||
361 | qp->s_flags |= IPATH_S_RDMAR_PENDING; | ||
362 | goto bail; | ||
363 | } | ||
364 | qp->s_num_rd_atomic++; | ||
323 | if (qp->s_lsn != (u32) -1) | 365 | if (qp->s_lsn != (u32) -1) |
324 | qp->s_lsn++; | 366 | qp->s_lsn++; |
325 | /* | 367 | /* |
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
330 | qp->s_next_psn += (len - 1) / pmtu; | 372 | qp->s_next_psn += (len - 1) / pmtu; |
331 | wqe->lpsn = qp->s_next_psn++; | 373 | wqe->lpsn = qp->s_next_psn++; |
332 | } | 374 | } |
375 | ohdr->u.rc.reth.vaddr = | ||
376 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | ||
377 | ohdr->u.rc.reth.rkey = | ||
378 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | ||
379 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
380 | qp->s_state = OP(RDMA_READ_REQUEST); | ||
381 | hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); | ||
333 | ss = NULL; | 382 | ss = NULL; |
334 | len = 0; | 383 | len = 0; |
335 | if (++qp->s_cur == qp->s_size) | 384 | if (++qp->s_cur == qp->s_size) |
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
338 | 387 | ||
339 | case IB_WR_ATOMIC_CMP_AND_SWP: | 388 | case IB_WR_ATOMIC_CMP_AND_SWP: |
340 | case IB_WR_ATOMIC_FETCH_AND_ADD: | 389 | case IB_WR_ATOMIC_FETCH_AND_ADD: |
341 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) | 390 | /* |
342 | qp->s_state = OP(COMPARE_SWAP); | 391 | * Don't allow more operations to be started |
343 | else | 392 | * than the QP limits allow. |
344 | qp->s_state = OP(FETCH_ADD); | 393 | */ |
345 | ohdr->u.atomic_eth.vaddr = cpu_to_be64( | ||
346 | wqe->wr.wr.atomic.remote_addr); | ||
347 | ohdr->u.atomic_eth.rkey = cpu_to_be32( | ||
348 | wqe->wr.wr.atomic.rkey); | ||
349 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
350 | wqe->wr.wr.atomic.swap); | ||
351 | ohdr->u.atomic_eth.compare_data = cpu_to_be64( | ||
352 | wqe->wr.wr.atomic.compare_add); | ||
353 | hwords += sizeof(struct ib_atomic_eth) / 4; | ||
354 | if (newreq) { | 394 | if (newreq) { |
395 | if (qp->s_num_rd_atomic >= | ||
396 | qp->s_max_rd_atomic) { | ||
397 | qp->s_flags |= IPATH_S_RDMAR_PENDING; | ||
398 | goto bail; | ||
399 | } | ||
400 | qp->s_num_rd_atomic++; | ||
355 | if (qp->s_lsn != (u32) -1) | 401 | if (qp->s_lsn != (u32) -1) |
356 | qp->s_lsn++; | 402 | qp->s_lsn++; |
357 | wqe->lpsn = wqe->psn; | 403 | wqe->lpsn = wqe->psn; |
358 | } | 404 | } |
359 | if (++qp->s_cur == qp->s_size) | 405 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { |
360 | qp->s_cur = 0; | 406 | qp->s_state = OP(COMPARE_SWAP); |
407 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
408 | wqe->wr.wr.atomic.swap); | ||
409 | ohdr->u.atomic_eth.compare_data = cpu_to_be64( | ||
410 | wqe->wr.wr.atomic.compare_add); | ||
411 | } else { | ||
412 | qp->s_state = OP(FETCH_ADD); | ||
413 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
414 | wqe->wr.wr.atomic.compare_add); | ||
415 | ohdr->u.atomic_eth.compare_data = 0; | ||
416 | } | ||
417 | ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( | ||
418 | wqe->wr.wr.atomic.remote_addr >> 32); | ||
419 | ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( | ||
420 | wqe->wr.wr.atomic.remote_addr); | ||
421 | ohdr->u.atomic_eth.rkey = cpu_to_be32( | ||
422 | wqe->wr.wr.atomic.rkey); | ||
423 | hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); | ||
361 | ss = NULL; | 424 | ss = NULL; |
362 | len = 0; | 425 | len = 0; |
426 | if (++qp->s_cur == qp->s_size) | ||
427 | qp->s_cur = 0; | ||
363 | break; | 428 | break; |
364 | 429 | ||
365 | default: | 430 | default: |
366 | goto done; | 431 | goto bail; |
367 | } | 432 | } |
368 | qp->s_sge.sge = wqe->sg_list[0]; | 433 | qp->s_sge.sge = wqe->sg_list[0]; |
369 | qp->s_sge.sg_list = wqe->sg_list + 1; | 434 | qp->s_sge.sg_list = wqe->sg_list + 1; |
@@ -479,7 +544,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
479 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 544 | cpu_to_be32(wqe->wr.wr.rdma.rkey); |
480 | ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); | 545 | ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); |
481 | qp->s_state = OP(RDMA_READ_REQUEST); | 546 | qp->s_state = OP(RDMA_READ_REQUEST); |
482 | hwords += sizeof(ohdr->u.rc.reth) / 4; | 547 | hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); |
483 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; | 548 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; |
484 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | 549 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) |
485 | qp->s_next_psn = qp->s_psn; | 550 | qp->s_next_psn = qp->s_psn; |
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
489 | if (qp->s_cur == qp->s_size) | 554 | if (qp->s_cur == qp->s_size) |
490 | qp->s_cur = 0; | 555 | qp->s_cur = 0; |
491 | break; | 556 | break; |
492 | |||
493 | case OP(RDMA_READ_REQUEST): | ||
494 | case OP(COMPARE_SWAP): | ||
495 | case OP(FETCH_ADD): | ||
496 | /* | ||
497 | * We shouldn't start anything new until this request is | ||
498 | * finished. The ACK will handle rescheduling us. XXX The | ||
499 | * number of outstanding ones is negotiated at connection | ||
500 | * setup time (see pg. 258,289)? XXX Also, if we support | ||
501 | * multiple outstanding requests, we need to check the WQE | ||
502 | * IB_SEND_FENCE flag and not send a new request if a RDMA | ||
503 | * read or atomic is pending. | ||
504 | */ | ||
505 | goto done; | ||
506 | } | 557 | } |
507 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) | 558 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) |
508 | bth2 |= 1 << 31; /* Request ACK. */ | 559 | bth2 |= 1 << 31; /* Request ACK. */ |
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
512 | qp->s_cur_size = len; | 563 | qp->s_cur_size = len; |
513 | *bth0p = bth0 | (qp->s_state << 24); | 564 | *bth0p = bth0 | (qp->s_state << 24); |
514 | *bth2p = bth2; | 565 | *bth2p = bth2; |
566 | done: | ||
515 | return 1; | 567 | return 1; |
516 | 568 | ||
517 | done: | 569 | bail: |
518 | return 0; | 570 | return 0; |
519 | } | 571 | } |
520 | 572 | ||
@@ -524,7 +576,8 @@ done: | |||
524 | * | 576 | * |
525 | * This is called from ipath_rc_rcv() and only uses the receive | 577 | * This is called from ipath_rc_rcv() and only uses the receive |
526 | * side QP state. | 578 | * side QP state. |
527 | * Note that RDMA reads are handled in the send side QP state and tasklet. | 579 | * Note that RDMA reads and atomics are handled in the |
580 | * send side QP state and tasklet. | ||
528 | */ | 581 | */ |
529 | static void send_rc_ack(struct ipath_qp *qp) | 582 | static void send_rc_ack(struct ipath_qp *qp) |
530 | { | 583 | { |
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
535 | struct ipath_ib_header hdr; | 588 | struct ipath_ib_header hdr; |
536 | struct ipath_other_headers *ohdr; | 589 | struct ipath_other_headers *ohdr; |
537 | 590 | ||
591 | /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ | ||
592 | if (qp->r_head_ack_queue != qp->s_tail_ack_queue) | ||
593 | goto queue_ack; | ||
594 | |||
538 | /* Construct the header. */ | 595 | /* Construct the header. */ |
539 | ohdr = &hdr.u.oth; | 596 | ohdr = &hdr.u.oth; |
540 | lrh0 = IPATH_LRH_BTH; | 597 | lrh0 = IPATH_LRH_BTH; |
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
548 | lrh0 = IPATH_LRH_GRH; | 605 | lrh0 = IPATH_LRH_GRH; |
549 | } | 606 | } |
550 | /* read pkey_index w/o lock (its atomic) */ | 607 | /* read pkey_index w/o lock (its atomic) */ |
551 | bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); | 608 | bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | |
609 | OP(ACKNOWLEDGE) << 24; | ||
552 | if (qp->r_nak_state) | 610 | if (qp->r_nak_state) |
553 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | | 611 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | |
554 | (qp->r_nak_state << | 612 | (qp->r_nak_state << |
555 | IPATH_AETH_CREDIT_SHIFT)); | 613 | IPATH_AETH_CREDIT_SHIFT)); |
556 | else | 614 | else |
557 | ohdr->u.aeth = ipath_compute_aeth(qp); | 615 | ohdr->u.aeth = ipath_compute_aeth(qp); |
558 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) { | ||
559 | bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24; | ||
560 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); | ||
561 | hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; | ||
562 | } else | ||
563 | bth0 |= OP(ACKNOWLEDGE) << 24; | ||
564 | lrh0 |= qp->remote_ah_attr.sl << 4; | 616 | lrh0 |= qp->remote_ah_attr.sl << 4; |
565 | hdr.lrh[0] = cpu_to_be16(lrh0); | 617 | hdr.lrh[0] = cpu_to_be16(lrh0); |
566 | hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); | 618 | hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); |
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
574 | * If we can send the ACK, clear the ACK state. | 626 | * If we can send the ACK, clear the ACK state. |
575 | */ | 627 | */ |
576 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { | 628 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { |
577 | qp->r_ack_state = OP(ACKNOWLEDGE); | ||
578 | dev->n_unicast_xmit++; | 629 | dev->n_unicast_xmit++; |
579 | } else { | 630 | goto done; |
580 | /* | ||
581 | * We are out of PIO buffers at the moment. | ||
582 | * Pass responsibility for sending the ACK to the | ||
583 | * send tasklet so that when a PIO buffer becomes | ||
584 | * available, the ACK is sent ahead of other outgoing | ||
585 | * packets. | ||
586 | */ | ||
587 | dev->n_rc_qacks++; | ||
588 | spin_lock_irq(&qp->s_lock); | ||
589 | /* Don't coalesce if a RDMA read or atomic is pending. */ | ||
590 | if (qp->s_ack_state == OP(ACKNOWLEDGE) || | ||
591 | qp->s_ack_state < OP(RDMA_READ_REQUEST)) { | ||
592 | qp->s_ack_state = qp->r_ack_state; | ||
593 | qp->s_nak_state = qp->r_nak_state; | ||
594 | qp->s_ack_psn = qp->r_ack_psn; | ||
595 | qp->r_ack_state = OP(ACKNOWLEDGE); | ||
596 | } | ||
597 | spin_unlock_irq(&qp->s_lock); | ||
598 | |||
599 | /* Call ipath_do_rc_send() in another thread. */ | ||
600 | tasklet_hi_schedule(&qp->s_task); | ||
601 | } | 631 | } |
632 | |||
633 | /* | ||
634 | * We are out of PIO buffers at the moment. | ||
635 | * Pass responsibility for sending the ACK to the | ||
636 | * send tasklet so that when a PIO buffer becomes | ||
637 | * available, the ACK is sent ahead of other outgoing | ||
638 | * packets. | ||
639 | */ | ||
640 | dev->n_rc_qacks++; | ||
641 | |||
642 | queue_ack: | ||
643 | spin_lock_irq(&qp->s_lock); | ||
644 | qp->s_flags |= IPATH_S_ACK_PENDING; | ||
645 | qp->s_nak_state = qp->r_nak_state; | ||
646 | qp->s_ack_psn = qp->r_ack_psn; | ||
647 | spin_unlock_irq(&qp->s_lock); | ||
648 | |||
649 | /* Call ipath_do_rc_send() in another thread. */ | ||
650 | tasklet_hi_schedule(&qp->s_task); | ||
651 | |||
652 | done: | ||
653 | return; | ||
602 | } | 654 | } |
603 | 655 | ||
604 | /** | 656 | /** |
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
775 | list_del_init(&qp->timerwait); | 827 | list_del_init(&qp->timerwait); |
776 | spin_unlock(&dev->pending_lock); | 828 | spin_unlock(&dev->pending_lock); |
777 | 829 | ||
778 | /* Nothing is pending to ACK/NAK. */ | ||
779 | if (unlikely(qp->s_last == qp->s_tail)) | ||
780 | goto bail; | ||
781 | |||
782 | /* | 830 | /* |
783 | * Note that NAKs implicitly ACK outstanding SEND and RDMA write | 831 | * Note that NAKs implicitly ACK outstanding SEND and RDMA write |
784 | * requests and implicitly NAK RDMA read and atomic requests issued | 832 | * requests and implicitly NAK RDMA read and atomic requests issued |
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
806 | */ | 854 | */ |
807 | if ((wqe->wr.opcode == IB_WR_RDMA_READ && | 855 | if ((wqe->wr.opcode == IB_WR_RDMA_READ && |
808 | (opcode != OP(RDMA_READ_RESPONSE_LAST) || | 856 | (opcode != OP(RDMA_READ_RESPONSE_LAST) || |
809 | ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || | 857 | ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || |
810 | ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | 858 | ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || |
811 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && | 859 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && |
812 | (opcode != OP(ATOMIC_ACKNOWLEDGE) || | 860 | (opcode != OP(ATOMIC_ACKNOWLEDGE) || |
@@ -824,12 +872,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
824 | */ | 872 | */ |
825 | goto bail; | 873 | goto bail; |
826 | } | 874 | } |
827 | if (wqe->wr.opcode == IB_WR_RDMA_READ || | 875 | if (qp->s_num_rd_atomic && |
828 | wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | 876 | (wqe->wr.opcode == IB_WR_RDMA_READ || |
829 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) | 877 | wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || |
830 | tasklet_hi_schedule(&qp->s_task); | 878 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { |
879 | qp->s_num_rd_atomic--; | ||
880 | /* Restart sending task if fence is complete */ | ||
881 | if ((qp->s_flags & IPATH_S_FENCE_PENDING) && | ||
882 | !qp->s_num_rd_atomic) { | ||
883 | qp->s_flags &= ~IPATH_S_FENCE_PENDING; | ||
884 | tasklet_hi_schedule(&qp->s_task); | ||
885 | } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { | ||
886 | qp->s_flags &= ~IPATH_S_RDMAR_PENDING; | ||
887 | tasklet_hi_schedule(&qp->s_task); | ||
888 | } | ||
889 | } | ||
831 | /* Post a send completion queue entry if requested. */ | 890 | /* Post a send completion queue entry if requested. */ |
832 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || | 891 | if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || |
833 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | 892 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { |
834 | wc.wr_id = wqe->wr.wr_id; | 893 | wc.wr_id = wqe->wr.wr_id; |
835 | wc.status = IB_WC_SUCCESS; | 894 | wc.status = IB_WC_SUCCESS; |
@@ -1003,6 +1062,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1003 | u32 psn, u32 hdrsize, u32 pmtu, | 1062 | u32 psn, u32 hdrsize, u32 pmtu, |
1004 | int header_in_data) | 1063 | int header_in_data) |
1005 | { | 1064 | { |
1065 | struct ipath_swqe *wqe; | ||
1006 | unsigned long flags; | 1066 | unsigned long flags; |
1007 | struct ib_wc wc; | 1067 | struct ib_wc wc; |
1008 | int diff; | 1068 | int diff; |
@@ -1032,6 +1092,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1032 | goto ack_done; | 1092 | goto ack_done; |
1033 | } | 1093 | } |
1034 | 1094 | ||
1095 | if (unlikely(qp->s_last == qp->s_tail)) | ||
1096 | goto ack_done; | ||
1097 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
1098 | |||
1035 | switch (opcode) { | 1099 | switch (opcode) { |
1036 | case OP(ACKNOWLEDGE): | 1100 | case OP(ACKNOWLEDGE): |
1037 | case OP(ATOMIC_ACKNOWLEDGE): | 1101 | case OP(ATOMIC_ACKNOWLEDGE): |
@@ -1042,38 +1106,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1042 | aeth = be32_to_cpu(((__be32 *) data)[0]); | 1106 | aeth = be32_to_cpu(((__be32 *) data)[0]); |
1043 | data += sizeof(__be32); | 1107 | data += sizeof(__be32); |
1044 | } | 1108 | } |
1045 | if (opcode == OP(ATOMIC_ACKNOWLEDGE)) | 1109 | if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { |
1046 | *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; | 1110 | u64 val; |
1111 | |||
1112 | if (!header_in_data) { | ||
1113 | __be32 *p = ohdr->u.at.atomic_ack_eth; | ||
1114 | |||
1115 | val = ((u64) be32_to_cpu(p[0]) << 32) | | ||
1116 | be32_to_cpu(p[1]); | ||
1117 | } else | ||
1118 | val = be64_to_cpu(((__be64 *) data)[0]); | ||
1119 | *(u64 *) wqe->sg_list[0].vaddr = val; | ||
1120 | } | ||
1047 | if (!do_rc_ack(qp, aeth, psn, opcode) || | 1121 | if (!do_rc_ack(qp, aeth, psn, opcode) || |
1048 | opcode != OP(RDMA_READ_RESPONSE_FIRST)) | 1122 | opcode != OP(RDMA_READ_RESPONSE_FIRST)) |
1049 | goto ack_done; | 1123 | goto ack_done; |
1050 | hdrsize += 4; | 1124 | hdrsize += 4; |
1125 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) | ||
1126 | goto ack_done; | ||
1051 | /* | 1127 | /* |
1052 | * do_rc_ack() has already checked the PSN so skip | 1128 | * If this is a response to a resent RDMA read, we |
1053 | * the sequence check. | 1129 | * have to be careful to copy the data to the right |
1130 | * location. | ||
1054 | */ | 1131 | */ |
1055 | goto rdma_read; | 1132 | qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, |
1133 | wqe, psn, pmtu); | ||
1134 | goto read_middle; | ||
1056 | 1135 | ||
1057 | case OP(RDMA_READ_RESPONSE_MIDDLE): | 1136 | case OP(RDMA_READ_RESPONSE_MIDDLE): |
1058 | /* no AETH, no ACK */ | 1137 | /* no AETH, no ACK */ |
1059 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | 1138 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { |
1060 | dev->n_rdma_seq++; | 1139 | dev->n_rdma_seq++; |
1061 | if (qp->s_last != qp->s_tail) | 1140 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); |
1062 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1063 | goto ack_done; | 1141 | goto ack_done; |
1064 | } | 1142 | } |
1065 | rdma_read: | 1143 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) |
1066 | if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) | ||
1067 | goto ack_done; | 1144 | goto ack_done; |
1145 | read_middle: | ||
1068 | if (unlikely(tlen != (hdrsize + pmtu + 4))) | 1146 | if (unlikely(tlen != (hdrsize + pmtu + 4))) |
1069 | goto ack_done; | 1147 | goto ack_done; |
1070 | if (unlikely(pmtu >= qp->s_len)) | 1148 | if (unlikely(pmtu >= qp->s_rdma_read_len)) |
1071 | goto ack_done; | 1149 | goto ack_done; |
1150 | |||
1072 | /* We got a response so update the timeout. */ | 1151 | /* We got a response so update the timeout. */ |
1073 | if (unlikely(qp->s_last == qp->s_tail || | ||
1074 | get_swqe_ptr(qp, qp->s_last)->wr.opcode != | ||
1075 | IB_WR_RDMA_READ)) | ||
1076 | goto ack_done; | ||
1077 | spin_lock(&dev->pending_lock); | 1152 | spin_lock(&dev->pending_lock); |
1078 | if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) | 1153 | if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) |
1079 | list_move_tail(&qp->timerwait, | 1154 | list_move_tail(&qp->timerwait, |
@@ -1082,27 +1157,41 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1082 | /* | 1157 | /* |
1083 | * Update the RDMA receive state but do the copy w/o | 1158 | * Update the RDMA receive state but do the copy w/o |
1084 | * holding the locks and blocking interrupts. | 1159 | * holding the locks and blocking interrupts. |
1085 | * XXX Yet another place that affects relaxed RDMA order | ||
1086 | * since we don't want s_sge modified. | ||
1087 | */ | 1160 | */ |
1088 | qp->s_len -= pmtu; | 1161 | qp->s_rdma_read_len -= pmtu; |
1089 | update_last_psn(qp, psn); | 1162 | update_last_psn(qp, psn); |
1090 | spin_unlock_irqrestore(&qp->s_lock, flags); | 1163 | spin_unlock_irqrestore(&qp->s_lock, flags); |
1091 | ipath_copy_sge(&qp->s_sge, data, pmtu); | 1164 | ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); |
1092 | goto bail; | 1165 | goto bail; |
1093 | 1166 | ||
1167 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
1168 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | ||
1169 | dev->n_rdma_seq++; | ||
1170 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1171 | goto ack_done; | ||
1172 | } | ||
1173 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) | ||
1174 | goto ack_done; | ||
1175 | /* | ||
1176 | * If this is a response to a resent RDMA read, we | ||
1177 | * have to be careful to copy the data to the right | ||
1178 | * location. | ||
1179 | * XXX should check PSN and wqe opcode first. | ||
1180 | */ | ||
1181 | qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, | ||
1182 | wqe, psn, pmtu); | ||
1183 | goto read_last; | ||
1184 | |||
1094 | case OP(RDMA_READ_RESPONSE_LAST): | 1185 | case OP(RDMA_READ_RESPONSE_LAST): |
1095 | /* ACKs READ req. */ | 1186 | /* ACKs READ req. */ |
1096 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | 1187 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { |
1097 | dev->n_rdma_seq++; | 1188 | dev->n_rdma_seq++; |
1098 | if (qp->s_last != qp->s_tail) | 1189 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); |
1099 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1100 | goto ack_done; | 1190 | goto ack_done; |
1101 | } | 1191 | } |
1102 | /* FALLTHROUGH */ | 1192 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) |
1103 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
1104 | if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) | ||
1105 | goto ack_done; | 1193 | goto ack_done; |
1194 | read_last: | ||
1106 | /* | 1195 | /* |
1107 | * Get the number of bytes the message was padded by. | 1196 | * Get the number of bytes the message was padded by. |
1108 | */ | 1197 | */ |
@@ -1117,7 +1206,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1117 | goto ack_done; | 1206 | goto ack_done; |
1118 | } | 1207 | } |
1119 | tlen -= hdrsize + pad + 8; | 1208 | tlen -= hdrsize + pad + 8; |
1120 | if (unlikely(tlen != qp->s_len)) { | 1209 | if (unlikely(tlen != qp->s_rdma_read_len)) { |
1121 | /* XXX Need to generate an error CQ entry. */ | 1210 | /* XXX Need to generate an error CQ entry. */ |
1122 | goto ack_done; | 1211 | goto ack_done; |
1123 | } | 1212 | } |
@@ -1127,17 +1216,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1127 | aeth = be32_to_cpu(((__be32 *) data)[0]); | 1216 | aeth = be32_to_cpu(((__be32 *) data)[0]); |
1128 | data += sizeof(__be32); | 1217 | data += sizeof(__be32); |
1129 | } | 1218 | } |
1130 | ipath_copy_sge(&qp->s_sge, data, tlen); | 1219 | ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); |
1131 | if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { | 1220 | (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); |
1132 | /* | ||
1133 | * Change the state so we contimue | ||
1134 | * processing new requests and wake up the | ||
1135 | * tasklet if there are posted sends. | ||
1136 | */ | ||
1137 | qp->s_state = OP(SEND_LAST); | ||
1138 | if (qp->s_tail != qp->s_head) | ||
1139 | tasklet_hi_schedule(&qp->s_task); | ||
1140 | } | ||
1141 | goto ack_done; | 1221 | goto ack_done; |
1142 | } | 1222 | } |
1143 | 1223 | ||
@@ -1162,7 +1242,7 @@ bail: | |||
1162 | * incoming RC packet for the given QP. | 1242 | * incoming RC packet for the given QP. |
1163 | * Called at interrupt level. | 1243 | * Called at interrupt level. |
1164 | * Return 1 if no more processing is needed; otherwise return 0 to | 1244 | * Return 1 if no more processing is needed; otherwise return 0 to |
1165 | * schedule a response to be sent and the s_lock unlocked. | 1245 | * schedule a response to be sent. |
1166 | */ | 1246 | */ |
1167 | static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | 1247 | static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, |
1168 | struct ipath_other_headers *ohdr, | 1248 | struct ipath_other_headers *ohdr, |
@@ -1173,25 +1253,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1173 | int diff, | 1253 | int diff, |
1174 | int header_in_data) | 1254 | int header_in_data) |
1175 | { | 1255 | { |
1176 | struct ib_reth *reth; | 1256 | struct ipath_ack_entry *e; |
1257 | u8 i, prev; | ||
1258 | int old_req; | ||
1177 | 1259 | ||
1178 | if (diff > 0) { | 1260 | if (diff > 0) { |
1179 | /* | 1261 | /* |
1180 | * Packet sequence error. | 1262 | * Packet sequence error. |
1181 | * A NAK will ACK earlier sends and RDMA writes. | 1263 | * A NAK will ACK earlier sends and RDMA writes. |
1182 | * Don't queue the NAK if a RDMA read, atomic, or | 1264 | * Don't queue the NAK if we already sent one. |
1183 | * NAK is pending though. | ||
1184 | */ | 1265 | */ |
1185 | if (qp->s_ack_state != OP(ACKNOWLEDGE) || | 1266 | if (!qp->r_nak_state) { |
1186 | qp->r_nak_state != 0) | ||
1187 | goto done; | ||
1188 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1189 | qp->r_ack_state = OP(SEND_ONLY); | ||
1190 | qp->r_nak_state = IB_NAK_PSN_ERROR; | 1267 | qp->r_nak_state = IB_NAK_PSN_ERROR; |
1191 | /* Use the expected PSN. */ | 1268 | /* Use the expected PSN. */ |
1192 | qp->r_ack_psn = qp->r_psn; | 1269 | qp->r_ack_psn = qp->r_psn; |
1270 | goto send_ack; | ||
1193 | } | 1271 | } |
1194 | goto send_ack; | 1272 | goto done; |
1195 | } | 1273 | } |
1196 | 1274 | ||
1197 | /* | 1275 | /* |
@@ -1204,8 +1282,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1204 | * can coalesce an outstanding duplicate ACK. We have to | 1282 | * can coalesce an outstanding duplicate ACK. We have to |
1205 | * send the earliest so that RDMA reads can be restarted at | 1283 | * send the earliest so that RDMA reads can be restarted at |
1206 | * the requester's expected PSN. | 1284 | * the requester's expected PSN. |
1285 | * | ||
1286 | * First, find where this duplicate PSN falls within the | ||
1287 | * ACKs previously sent. | ||
1207 | */ | 1288 | */ |
1208 | if (opcode == OP(RDMA_READ_REQUEST)) { | 1289 | psn &= IPATH_PSN_MASK; |
1290 | e = NULL; | ||
1291 | old_req = 1; | ||
1292 | spin_lock_irq(&qp->s_lock); | ||
1293 | for (i = qp->r_head_ack_queue; ; i = prev) { | ||
1294 | if (i == qp->s_tail_ack_queue) | ||
1295 | old_req = 0; | ||
1296 | if (i) | ||
1297 | prev = i - 1; | ||
1298 | else | ||
1299 | prev = IPATH_MAX_RDMA_ATOMIC; | ||
1300 | if (prev == qp->r_head_ack_queue) { | ||
1301 | e = NULL; | ||
1302 | break; | ||
1303 | } | ||
1304 | e = &qp->s_ack_queue[prev]; | ||
1305 | if (!e->opcode) { | ||
1306 | e = NULL; | ||
1307 | break; | ||
1308 | } | ||
1309 | if (ipath_cmp24(psn, e->psn) >= 0) | ||
1310 | break; | ||
1311 | } | ||
1312 | switch (opcode) { | ||
1313 | case OP(RDMA_READ_REQUEST): { | ||
1314 | struct ib_reth *reth; | ||
1315 | u32 offset; | ||
1316 | u32 len; | ||
1317 | |||
1318 | /* | ||
1319 | * If we didn't find the RDMA read request in the ack queue, | ||
1320 | * or the send tasklet is already backed up to send an | ||
1321 | * earlier entry, we can ignore this request. | ||
1322 | */ | ||
1323 | if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) | ||
1324 | goto unlock_done; | ||
1209 | /* RETH comes after BTH */ | 1325 | /* RETH comes after BTH */ |
1210 | if (!header_in_data) | 1326 | if (!header_in_data) |
1211 | reth = &ohdr->u.rc.reth; | 1327 | reth = &ohdr->u.rc.reth; |
@@ -1214,88 +1330,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1214 | data += sizeof(*reth); | 1330 | data += sizeof(*reth); |
1215 | } | 1331 | } |
1216 | /* | 1332 | /* |
1217 | * If we receive a duplicate RDMA request, it means the | 1333 | * Address range must be a subset of the original |
1218 | * requester saw a sequence error and needs to restart | 1334 | * request and start on pmtu boundaries. |
1219 | * from an earlier point. We can abort the current | 1335 | * We reuse the old ack_queue slot since the requester |
1220 | * RDMA read send in that case. | 1336 | * should not back up and request an earlier PSN for the |
1337 | * same request. | ||
1221 | */ | 1338 | */ |
1222 | spin_lock_irq(&qp->s_lock); | 1339 | offset = ((psn - e->psn) & IPATH_PSN_MASK) * |
1223 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | 1340 | ib_mtu_enum_to_int(qp->path_mtu); |
1224 | (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { | 1341 | len = be32_to_cpu(reth->length); |
1225 | /* | 1342 | if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) |
1226 | * We are already sending earlier requested data. | 1343 | goto unlock_done; |
1227 | * Don't abort it to send later out of sequence data. | 1344 | if (len != 0) { |
1228 | */ | ||
1229 | spin_unlock_irq(&qp->s_lock); | ||
1230 | goto done; | ||
1231 | } | ||
1232 | qp->s_rdma_len = be32_to_cpu(reth->length); | ||
1233 | if (qp->s_rdma_len != 0) { | ||
1234 | u32 rkey = be32_to_cpu(reth->rkey); | 1345 | u32 rkey = be32_to_cpu(reth->rkey); |
1235 | u64 vaddr = be64_to_cpu(reth->vaddr); | 1346 | u64 vaddr = be64_to_cpu(reth->vaddr); |
1236 | int ok; | 1347 | int ok; |
1237 | 1348 | ||
1238 | /* | 1349 | ok = ipath_rkey_ok(qp, &e->rdma_sge, |
1239 | * Address range must be a subset of the original | 1350 | len, vaddr, rkey, |
1240 | * request and start on pmtu boundaries. | ||
1241 | */ | ||
1242 | ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, | ||
1243 | qp->s_rdma_len, vaddr, rkey, | ||
1244 | IB_ACCESS_REMOTE_READ); | 1351 | IB_ACCESS_REMOTE_READ); |
1245 | if (unlikely(!ok)) { | 1352 | if (unlikely(!ok)) |
1246 | spin_unlock_irq(&qp->s_lock); | 1353 | goto unlock_done; |
1247 | goto done; | ||
1248 | } | ||
1249 | } else { | 1354 | } else { |
1250 | qp->s_rdma_sge.sg_list = NULL; | 1355 | e->rdma_sge.sg_list = NULL; |
1251 | qp->s_rdma_sge.num_sge = 0; | 1356 | e->rdma_sge.num_sge = 0; |
1252 | qp->s_rdma_sge.sge.mr = NULL; | 1357 | e->rdma_sge.sge.mr = NULL; |
1253 | qp->s_rdma_sge.sge.vaddr = NULL; | 1358 | e->rdma_sge.sge.vaddr = NULL; |
1254 | qp->s_rdma_sge.sge.length = 0; | 1359 | e->rdma_sge.sge.length = 0; |
1255 | qp->s_rdma_sge.sge.sge_length = 0; | 1360 | e->rdma_sge.sge.sge_length = 0; |
1256 | } | 1361 | } |
1257 | qp->s_ack_state = opcode; | 1362 | e->psn = psn; |
1258 | qp->s_ack_psn = psn; | 1363 | qp->s_ack_state = OP(ACKNOWLEDGE); |
1259 | spin_unlock_irq(&qp->s_lock); | 1364 | qp->s_tail_ack_queue = prev; |
1260 | tasklet_hi_schedule(&qp->s_task); | 1365 | break; |
1261 | goto send_ack; | ||
1262 | } | 1366 | } |
1263 | 1367 | ||
1264 | /* | ||
1265 | * A pending RDMA read will ACK anything before it so | ||
1266 | * ignore earlier duplicate requests. | ||
1267 | */ | ||
1268 | if (qp->s_ack_state != OP(ACKNOWLEDGE)) | ||
1269 | goto done; | ||
1270 | |||
1271 | /* | ||
1272 | * If an ACK is pending, don't replace the pending ACK | ||
1273 | * with an earlier one since the later one will ACK the earlier. | ||
1274 | * Also, if we already have a pending atomic, send it. | ||
1275 | */ | ||
1276 | if (qp->r_ack_state != OP(ACKNOWLEDGE) && | ||
1277 | (ipath_cmp24(psn, qp->r_ack_psn) <= 0 || | ||
1278 | qp->r_ack_state >= OP(COMPARE_SWAP))) | ||
1279 | goto send_ack; | ||
1280 | switch (opcode) { | ||
1281 | case OP(COMPARE_SWAP): | 1368 | case OP(COMPARE_SWAP): |
1282 | case OP(FETCH_ADD): | 1369 | case OP(FETCH_ADD): { |
1283 | /* | 1370 | /* |
1284 | * Check for the PSN of the last atomic operation | 1371 | * If we didn't find the atomic request in the ack queue |
1285 | * performed and resend the result if found. | 1372 | * or the send tasklet is already backed up to send an |
1373 | * earlier entry, we can ignore this request. | ||
1286 | */ | 1374 | */ |
1287 | if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn) | 1375 | if (!e || e->opcode != (u8) opcode || old_req) |
1288 | goto done; | 1376 | goto unlock_done; |
1377 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
1378 | qp->s_tail_ack_queue = prev; | ||
1379 | break; | ||
1380 | } | ||
1381 | |||
1382 | default: | ||
1383 | if (old_req) | ||
1384 | goto unlock_done; | ||
1385 | /* | ||
1386 | * Resend the most recent ACK if this request is | ||
1387 | * after all the previous RDMA reads and atomics. | ||
1388 | */ | ||
1389 | if (i == qp->r_head_ack_queue) { | ||
1390 | spin_unlock_irq(&qp->s_lock); | ||
1391 | qp->r_nak_state = 0; | ||
1392 | qp->r_ack_psn = qp->r_psn - 1; | ||
1393 | goto send_ack; | ||
1394 | } | ||
1395 | /* | ||
1396 | * Resend the RDMA read or atomic op which | ||
1397 | * ACKs this duplicate request. | ||
1398 | */ | ||
1399 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
1400 | qp->s_tail_ack_queue = i; | ||
1289 | break; | 1401 | break; |
1290 | } | 1402 | } |
1291 | qp->r_ack_state = opcode; | ||
1292 | qp->r_nak_state = 0; | 1403 | qp->r_nak_state = 0; |
1293 | qp->r_ack_psn = psn; | 1404 | spin_unlock_irq(&qp->s_lock); |
1294 | send_ack: | 1405 | tasklet_hi_schedule(&qp->s_task); |
1295 | return 0; | ||
1296 | 1406 | ||
1407 | unlock_done: | ||
1408 | spin_unlock_irq(&qp->s_lock); | ||
1297 | done: | 1409 | done: |
1298 | return 1; | 1410 | return 1; |
1411 | |||
1412 | send_ack: | ||
1413 | return 0; | ||
1299 | } | 1414 | } |
1300 | 1415 | ||
1301 | static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) | 1416 | static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) |
@@ -1391,15 +1506,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1391 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) | 1506 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) |
1392 | break; | 1507 | break; |
1393 | nack_inv: | 1508 | nack_inv: |
1394 | /* | ||
1395 | * A NAK will ACK earlier sends and RDMA writes. | ||
1396 | * Don't queue the NAK if a RDMA read, atomic, or NAK | ||
1397 | * is pending though. | ||
1398 | */ | ||
1399 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) | ||
1400 | goto send_ack; | ||
1401 | ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); | 1509 | ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); |
1402 | qp->r_ack_state = OP(SEND_ONLY); | ||
1403 | qp->r_nak_state = IB_NAK_INVALID_REQUEST; | 1510 | qp->r_nak_state = IB_NAK_INVALID_REQUEST; |
1404 | qp->r_ack_psn = qp->r_psn; | 1511 | qp->r_ack_psn = qp->r_psn; |
1405 | goto send_ack; | 1512 | goto send_ack; |
@@ -1441,9 +1548,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1441 | * Don't queue the NAK if a RDMA read or atomic | 1548 | * Don't queue the NAK if a RDMA read or atomic |
1442 | * is pending though. | 1549 | * is pending though. |
1443 | */ | 1550 | */ |
1444 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) | 1551 | if (qp->r_nak_state) |
1445 | goto send_ack; | 1552 | goto done; |
1446 | qp->r_ack_state = OP(SEND_ONLY); | ||
1447 | qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; | 1553 | qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; |
1448 | qp->r_ack_psn = qp->r_psn; | 1554 | qp->r_ack_psn = qp->r_psn; |
1449 | goto send_ack; | 1555 | goto send_ack; |
@@ -1567,7 +1673,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1567 | goto rnr_nak; | 1673 | goto rnr_nak; |
1568 | goto send_last_imm; | 1674 | goto send_last_imm; |
1569 | 1675 | ||
1570 | case OP(RDMA_READ_REQUEST): | 1676 | case OP(RDMA_READ_REQUEST): { |
1677 | struct ipath_ack_entry *e; | ||
1678 | u32 len; | ||
1679 | u8 next; | ||
1680 | |||
1681 | if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) | ||
1682 | goto nack_acc; | ||
1683 | next = qp->r_head_ack_queue + 1; | ||
1684 | if (next > IPATH_MAX_RDMA_ATOMIC) | ||
1685 | next = 0; | ||
1686 | if (unlikely(next == qp->s_tail_ack_queue)) | ||
1687 | goto nack_inv; | ||
1688 | e = &qp->s_ack_queue[qp->r_head_ack_queue]; | ||
1571 | /* RETH comes after BTH */ | 1689 | /* RETH comes after BTH */ |
1572 | if (!header_in_data) | 1690 | if (!header_in_data) |
1573 | reth = &ohdr->u.rc.reth; | 1691 | reth = &ohdr->u.rc.reth; |
@@ -1575,72 +1693,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1575 | reth = (struct ib_reth *)data; | 1693 | reth = (struct ib_reth *)data; |
1576 | data += sizeof(*reth); | 1694 | data += sizeof(*reth); |
1577 | } | 1695 | } |
1578 | if (unlikely(!(qp->qp_access_flags & | 1696 | len = be32_to_cpu(reth->length); |
1579 | IB_ACCESS_REMOTE_READ))) | 1697 | if (len) { |
1580 | goto nack_acc; | ||
1581 | spin_lock_irq(&qp->s_lock); | ||
1582 | qp->s_rdma_len = be32_to_cpu(reth->length); | ||
1583 | if (qp->s_rdma_len != 0) { | ||
1584 | u32 rkey = be32_to_cpu(reth->rkey); | 1698 | u32 rkey = be32_to_cpu(reth->rkey); |
1585 | u64 vaddr = be64_to_cpu(reth->vaddr); | 1699 | u64 vaddr = be64_to_cpu(reth->vaddr); |
1586 | int ok; | 1700 | int ok; |
1587 | 1701 | ||
1588 | /* Check rkey & NAK */ | 1702 | /* Check rkey & NAK */ |
1589 | ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, | 1703 | ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, |
1590 | qp->s_rdma_len, vaddr, rkey, | 1704 | rkey, IB_ACCESS_REMOTE_READ); |
1591 | IB_ACCESS_REMOTE_READ); | 1705 | if (unlikely(!ok)) |
1592 | if (unlikely(!ok)) { | ||
1593 | spin_unlock_irq(&qp->s_lock); | ||
1594 | goto nack_acc; | 1706 | goto nack_acc; |
1595 | } | ||
1596 | /* | 1707 | /* |
1597 | * Update the next expected PSN. We add 1 later | 1708 | * Update the next expected PSN. We add 1 later |
1598 | * below, so only add the remainder here. | 1709 | * below, so only add the remainder here. |
1599 | */ | 1710 | */ |
1600 | if (qp->s_rdma_len > pmtu) | 1711 | if (len > pmtu) |
1601 | qp->r_psn += (qp->s_rdma_len - 1) / pmtu; | 1712 | qp->r_psn += (len - 1) / pmtu; |
1602 | } else { | 1713 | } else { |
1603 | qp->s_rdma_sge.sg_list = NULL; | 1714 | e->rdma_sge.sg_list = NULL; |
1604 | qp->s_rdma_sge.num_sge = 0; | 1715 | e->rdma_sge.num_sge = 0; |
1605 | qp->s_rdma_sge.sge.mr = NULL; | 1716 | e->rdma_sge.sge.mr = NULL; |
1606 | qp->s_rdma_sge.sge.vaddr = NULL; | 1717 | e->rdma_sge.sge.vaddr = NULL; |
1607 | qp->s_rdma_sge.sge.length = 0; | 1718 | e->rdma_sge.sge.length = 0; |
1608 | qp->s_rdma_sge.sge.sge_length = 0; | 1719 | e->rdma_sge.sge.sge_length = 0; |
1609 | } | 1720 | } |
1721 | e->opcode = opcode; | ||
1722 | e->psn = psn; | ||
1610 | /* | 1723 | /* |
1611 | * We need to increment the MSN here instead of when we | 1724 | * We need to increment the MSN here instead of when we |
1612 | * finish sending the result since a duplicate request would | 1725 | * finish sending the result since a duplicate request would |
1613 | * increment it more than once. | 1726 | * increment it more than once. |
1614 | */ | 1727 | */ |
1615 | qp->r_msn++; | 1728 | qp->r_msn++; |
1616 | |||
1617 | qp->s_ack_state = opcode; | ||
1618 | qp->s_ack_psn = psn; | ||
1619 | spin_unlock_irq(&qp->s_lock); | ||
1620 | |||
1621 | qp->r_psn++; | 1729 | qp->r_psn++; |
1622 | qp->r_state = opcode; | 1730 | qp->r_state = opcode; |
1623 | qp->r_nak_state = 0; | 1731 | qp->r_nak_state = 0; |
1732 | barrier(); | ||
1733 | qp->r_head_ack_queue = next; | ||
1624 | 1734 | ||
1625 | /* Call ipath_do_rc_send() in another thread. */ | 1735 | /* Call ipath_do_rc_send() in another thread. */ |
1626 | tasklet_hi_schedule(&qp->s_task); | 1736 | tasklet_hi_schedule(&qp->s_task); |
1627 | 1737 | ||
1628 | goto done; | 1738 | goto done; |
1739 | } | ||
1629 | 1740 | ||
1630 | case OP(COMPARE_SWAP): | 1741 | case OP(COMPARE_SWAP): |
1631 | case OP(FETCH_ADD): { | 1742 | case OP(FETCH_ADD): { |
1632 | struct ib_atomic_eth *ateth; | 1743 | struct ib_atomic_eth *ateth; |
1744 | struct ipath_ack_entry *e; | ||
1633 | u64 vaddr; | 1745 | u64 vaddr; |
1746 | atomic64_t *maddr; | ||
1634 | u64 sdata; | 1747 | u64 sdata; |
1635 | u32 rkey; | 1748 | u32 rkey; |
1749 | u8 next; | ||
1636 | 1750 | ||
1751 | if (unlikely(!(qp->qp_access_flags & | ||
1752 | IB_ACCESS_REMOTE_ATOMIC))) | ||
1753 | goto nack_acc; | ||
1754 | next = qp->r_head_ack_queue + 1; | ||
1755 | if (next > IPATH_MAX_RDMA_ATOMIC) | ||
1756 | next = 0; | ||
1757 | if (unlikely(next == qp->s_tail_ack_queue)) | ||
1758 | goto nack_inv; | ||
1637 | if (!header_in_data) | 1759 | if (!header_in_data) |
1638 | ateth = &ohdr->u.atomic_eth; | 1760 | ateth = &ohdr->u.atomic_eth; |
1639 | else { | 1761 | else |
1640 | ateth = (struct ib_atomic_eth *)data; | 1762 | ateth = (struct ib_atomic_eth *)data; |
1641 | data += sizeof(*ateth); | 1763 | vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | |
1642 | } | 1764 | be32_to_cpu(ateth->vaddr[1]); |
1643 | vaddr = be64_to_cpu(ateth->vaddr); | ||
1644 | if (unlikely(vaddr & (sizeof(u64) - 1))) | 1765 | if (unlikely(vaddr & (sizeof(u64) - 1))) |
1645 | goto nack_inv; | 1766 | goto nack_inv; |
1646 | rkey = be32_to_cpu(ateth->rkey); | 1767 | rkey = be32_to_cpu(ateth->rkey); |
@@ -1649,63 +1770,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1649 | sizeof(u64), vaddr, rkey, | 1770 | sizeof(u64), vaddr, rkey, |
1650 | IB_ACCESS_REMOTE_ATOMIC))) | 1771 | IB_ACCESS_REMOTE_ATOMIC))) |
1651 | goto nack_acc; | 1772 | goto nack_acc; |
1652 | if (unlikely(!(qp->qp_access_flags & | ||
1653 | IB_ACCESS_REMOTE_ATOMIC))) | ||
1654 | goto nack_acc; | ||
1655 | /* Perform atomic OP and save result. */ | 1773 | /* Perform atomic OP and save result. */ |
1774 | maddr = (atomic64_t *) qp->r_sge.sge.vaddr; | ||
1656 | sdata = be64_to_cpu(ateth->swap_data); | 1775 | sdata = be64_to_cpu(ateth->swap_data); |
1657 | spin_lock_irq(&dev->pending_lock); | 1776 | e = &qp->s_ack_queue[qp->r_head_ack_queue]; |
1658 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; | 1777 | e->atomic_data = (opcode == OP(FETCH_ADD)) ? |
1659 | if (opcode == OP(FETCH_ADD)) | 1778 | (u64) atomic64_add_return(sdata, maddr) - sdata : |
1660 | *(u64 *) qp->r_sge.sge.vaddr = | 1779 | (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, |
1661 | qp->r_atomic_data + sdata; | 1780 | be64_to_cpu(ateth->compare_data), |
1662 | else if (qp->r_atomic_data == | 1781 | sdata); |
1663 | be64_to_cpu(ateth->compare_data)) | 1782 | e->opcode = opcode; |
1664 | *(u64 *) qp->r_sge.sge.vaddr = sdata; | 1783 | e->psn = psn & IPATH_PSN_MASK; |
1665 | spin_unlock_irq(&dev->pending_lock); | ||
1666 | qp->r_msn++; | 1784 | qp->r_msn++; |
1667 | qp->r_atomic_psn = psn & IPATH_PSN_MASK; | 1785 | qp->r_psn++; |
1668 | psn |= 1 << 31; | 1786 | qp->r_state = opcode; |
1669 | break; | 1787 | qp->r_nak_state = 0; |
1788 | barrier(); | ||
1789 | qp->r_head_ack_queue = next; | ||
1790 | |||
1791 | /* Call ipath_do_rc_send() in another thread. */ | ||
1792 | tasklet_hi_schedule(&qp->s_task); | ||
1793 | |||
1794 | goto done; | ||
1670 | } | 1795 | } |
1671 | 1796 | ||
1672 | default: | 1797 | default: |
1673 | /* Drop packet for unknown opcodes. */ | 1798 | /* NAK unknown opcodes. */ |
1674 | goto done; | 1799 | goto nack_inv; |
1675 | } | 1800 | } |
1676 | qp->r_psn++; | 1801 | qp->r_psn++; |
1677 | qp->r_state = opcode; | 1802 | qp->r_state = opcode; |
1803 | qp->r_ack_psn = psn; | ||
1678 | qp->r_nak_state = 0; | 1804 | qp->r_nak_state = 0; |
1679 | /* Send an ACK if requested or required. */ | 1805 | /* Send an ACK if requested or required. */ |
1680 | if (psn & (1 << 31)) { | 1806 | if (psn & (1 << 31)) |
1681 | /* | ||
1682 | * Coalesce ACKs unless there is a RDMA READ or | ||
1683 | * ATOMIC pending. | ||
1684 | */ | ||
1685 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1686 | qp->r_ack_state = opcode; | ||
1687 | qp->r_ack_psn = psn; | ||
1688 | } | ||
1689 | goto send_ack; | 1807 | goto send_ack; |
1690 | } | ||
1691 | goto done; | 1808 | goto done; |
1692 | 1809 | ||
1693 | nack_acc: | 1810 | nack_acc: |
1694 | /* | 1811 | ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); |
1695 | * A NAK will ACK earlier sends and RDMA writes. | 1812 | qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; |
1696 | * Don't queue the NAK if a RDMA read, atomic, or NAK | 1813 | qp->r_ack_psn = qp->r_psn; |
1697 | * is pending though. | 1814 | |
1698 | */ | ||
1699 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1700 | ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); | ||
1701 | qp->r_ack_state = OP(RDMA_WRITE_ONLY); | ||
1702 | qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; | ||
1703 | qp->r_ack_psn = qp->r_psn; | ||
1704 | } | ||
1705 | send_ack: | 1815 | send_ack: |
1706 | /* Send ACK right away unless the send tasklet has a pending ACK. */ | 1816 | send_rc_ack(qp); |
1707 | if (qp->s_ack_state == OP(ACKNOWLEDGE)) | ||
1708 | send_rc_ack(qp); | ||
1709 | 1817 | ||
1710 | done: | 1818 | done: |
1711 | return; | 1819 | return; |
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index cda84933bb43..d9c2a9b15d86 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c | |||
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) | |||
255 | unsigned long flags; | 255 | unsigned long flags; |
256 | struct ib_wc wc; | 256 | struct ib_wc wc; |
257 | u64 sdata; | 257 | u64 sdata; |
258 | atomic64_t *maddr; | ||
258 | 259 | ||
259 | qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); | 260 | qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); |
260 | if (!qp) { | 261 | if (!qp) { |
@@ -311,7 +312,7 @@ again: | |||
311 | sqp->s_rnr_retry--; | 312 | sqp->s_rnr_retry--; |
312 | dev->n_rnr_naks++; | 313 | dev->n_rnr_naks++; |
313 | sqp->s_rnr_timeout = | 314 | sqp->s_rnr_timeout = |
314 | ib_ipath_rnr_table[sqp->r_min_rnr_timer]; | 315 | ib_ipath_rnr_table[qp->r_min_rnr_timer]; |
315 | ipath_insert_rnr_queue(sqp); | 316 | ipath_insert_rnr_queue(sqp); |
316 | goto done; | 317 | goto done; |
317 | } | 318 | } |
@@ -344,20 +345,22 @@ again: | |||
344 | wc.sl = sqp->remote_ah_attr.sl; | 345 | wc.sl = sqp->remote_ah_attr.sl; |
345 | wc.dlid_path_bits = 0; | 346 | wc.dlid_path_bits = 0; |
346 | wc.port_num = 0; | 347 | wc.port_num = 0; |
348 | spin_lock_irqsave(&sqp->s_lock, flags); | ||
347 | ipath_sqerror_qp(sqp, &wc); | 349 | ipath_sqerror_qp(sqp, &wc); |
350 | spin_unlock_irqrestore(&sqp->s_lock, flags); | ||
348 | goto done; | 351 | goto done; |
349 | } | 352 | } |
350 | break; | 353 | break; |
351 | 354 | ||
352 | case IB_WR_RDMA_READ: | 355 | case IB_WR_RDMA_READ: |
356 | if (unlikely(!(qp->qp_access_flags & | ||
357 | IB_ACCESS_REMOTE_READ))) | ||
358 | goto acc_err; | ||
353 | if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, | 359 | if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, |
354 | wqe->wr.wr.rdma.remote_addr, | 360 | wqe->wr.wr.rdma.remote_addr, |
355 | wqe->wr.wr.rdma.rkey, | 361 | wqe->wr.wr.rdma.rkey, |
356 | IB_ACCESS_REMOTE_READ))) | 362 | IB_ACCESS_REMOTE_READ))) |
357 | goto acc_err; | 363 | goto acc_err; |
358 | if (unlikely(!(qp->qp_access_flags & | ||
359 | IB_ACCESS_REMOTE_READ))) | ||
360 | goto acc_err; | ||
361 | qp->r_sge.sge = wqe->sg_list[0]; | 364 | qp->r_sge.sge = wqe->sg_list[0]; |
362 | qp->r_sge.sg_list = wqe->sg_list + 1; | 365 | qp->r_sge.sg_list = wqe->sg_list + 1; |
363 | qp->r_sge.num_sge = wqe->wr.num_sge; | 366 | qp->r_sge.num_sge = wqe->wr.num_sge; |
@@ -365,22 +368,22 @@ again: | |||
365 | 368 | ||
366 | case IB_WR_ATOMIC_CMP_AND_SWP: | 369 | case IB_WR_ATOMIC_CMP_AND_SWP: |
367 | case IB_WR_ATOMIC_FETCH_AND_ADD: | 370 | case IB_WR_ATOMIC_FETCH_AND_ADD: |
371 | if (unlikely(!(qp->qp_access_flags & | ||
372 | IB_ACCESS_REMOTE_ATOMIC))) | ||
373 | goto acc_err; | ||
368 | if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), | 374 | if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), |
369 | wqe->wr.wr.rdma.remote_addr, | 375 | wqe->wr.wr.atomic.remote_addr, |
370 | wqe->wr.wr.rdma.rkey, | 376 | wqe->wr.wr.atomic.rkey, |
371 | IB_ACCESS_REMOTE_ATOMIC))) | 377 | IB_ACCESS_REMOTE_ATOMIC))) |
372 | goto acc_err; | 378 | goto acc_err; |
373 | /* Perform atomic OP and save result. */ | 379 | /* Perform atomic OP and save result. */ |
374 | sdata = wqe->wr.wr.atomic.swap; | 380 | maddr = (atomic64_t *) qp->r_sge.sge.vaddr; |
375 | spin_lock_irqsave(&dev->pending_lock, flags); | 381 | sdata = wqe->wr.wr.atomic.compare_add; |
376 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; | 382 | *(u64 *) sqp->s_sge.sge.vaddr = |
377 | if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) | 383 | (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? |
378 | *(u64 *) qp->r_sge.sge.vaddr = | 384 | (u64) atomic64_add_return(sdata, maddr) - sdata : |
379 | qp->r_atomic_data + sdata; | 385 | (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, |
380 | else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) | 386 | sdata, wqe->wr.wr.atomic.swap); |
381 | *(u64 *) qp->r_sge.sge.vaddr = sdata; | ||
382 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
383 | *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data; | ||
384 | goto send_comp; | 387 | goto send_comp; |
385 | 388 | ||
386 | default: | 389 | default: |
@@ -441,7 +444,7 @@ again: | |||
441 | send_comp: | 444 | send_comp: |
442 | sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; | 445 | sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; |
443 | 446 | ||
444 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || | 447 | if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) || |
445 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | 448 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { |
446 | wc.wr_id = wqe->wr.wr_id; | 449 | wc.wr_id = wqe->wr.wr_id; |
447 | wc.status = IB_WC_SUCCESS; | 450 | wc.status = IB_WC_SUCCESS; |
@@ -503,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) | |||
503 | * We clear the tasklet flag now since we are committing to return | 506 | * We clear the tasklet flag now since we are committing to return |
504 | * from the tasklet function. | 507 | * from the tasklet function. |
505 | */ | 508 | */ |
506 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | 509 | clear_bit(IPATH_S_BUSY, &qp->s_busy); |
507 | tasklet_unlock(&qp->s_task); | 510 | tasklet_unlock(&qp->s_task); |
508 | want_buffer(dev->dd); | 511 | want_buffer(dev->dd); |
509 | dev->n_piowait++; | 512 | dev->n_piowait++; |
@@ -542,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr) | |||
542 | wr->sg_list[0].addr & (sizeof(u64) - 1))) { | 545 | wr->sg_list[0].addr & (sizeof(u64) - 1))) { |
543 | ret = -EINVAL; | 546 | ret = -EINVAL; |
544 | goto bail; | 547 | goto bail; |
548 | } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { | ||
549 | ret = -EINVAL; | ||
550 | goto bail; | ||
545 | } | 551 | } |
546 | /* IB spec says that num_sge == 0 is OK. */ | 552 | /* IB spec says that num_sge == 0 is OK. */ |
547 | if (wr->num_sge > qp->s_max_sge) { | 553 | if (wr->num_sge > qp->s_max_sge) { |
@@ -648,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data) | |||
648 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); | 654 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); |
649 | struct ipath_other_headers *ohdr; | 655 | struct ipath_other_headers *ohdr; |
650 | 656 | ||
651 | if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) | 657 | if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) |
652 | goto bail; | 658 | goto bail; |
653 | 659 | ||
654 | if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { | 660 | if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { |
@@ -684,19 +690,15 @@ again: | |||
684 | */ | 690 | */ |
685 | spin_lock_irqsave(&qp->s_lock, flags); | 691 | spin_lock_irqsave(&qp->s_lock, flags); |
686 | 692 | ||
687 | /* Sending responses has higher priority over sending requests. */ | 693 | if (!((qp->ibqp.qp_type == IB_QPT_RC) ? |
688 | if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE && | 694 | ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : |
689 | (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) | 695 | ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { |
690 | bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK; | ||
691 | else if (!((qp->ibqp.qp_type == IB_QPT_RC) ? | ||
692 | ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : | ||
693 | ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { | ||
694 | /* | 696 | /* |
695 | * Clear the busy bit before unlocking to avoid races with | 697 | * Clear the busy bit before unlocking to avoid races with |
696 | * adding new work queue items and then failing to process | 698 | * adding new work queue items and then failing to process |
697 | * them. | 699 | * them. |
698 | */ | 700 | */ |
699 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | 701 | clear_bit(IPATH_S_BUSY, &qp->s_busy); |
700 | spin_unlock_irqrestore(&qp->s_lock, flags); | 702 | spin_unlock_irqrestore(&qp->s_lock, flags); |
701 | goto bail; | 703 | goto bail; |
702 | } | 704 | } |
@@ -729,7 +731,7 @@ again: | |||
729 | goto again; | 731 | goto again; |
730 | 732 | ||
731 | clear: | 733 | clear: |
732 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | 734 | clear_bit(IPATH_S_BUSY, &qp->s_busy); |
733 | bail: | 735 | bail: |
734 | return; | 736 | return; |
735 | } | 737 | } |
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 325d6634ff53..1c2b03c2ef5e 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c | |||
@@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe, | |||
42 | { | 42 | { |
43 | if (++qp->s_last == qp->s_size) | 43 | if (++qp->s_last == qp->s_size) |
44 | qp->s_last = 0; | 44 | qp->s_last = 0; |
45 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || | 45 | if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || |
46 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | 46 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { |
47 | wc->wr_id = wqe->wr.wr_id; | 47 | wc->wr_id = wqe->wr.wr_id; |
48 | wc->status = IB_WC_SUCCESS; | 48 | wc->status = IB_WC_SUCCESS; |
@@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
344 | send_first: | 344 | send_first: |
345 | if (qp->r_reuse_sge) { | 345 | if (qp->r_reuse_sge) { |
346 | qp->r_reuse_sge = 0; | 346 | qp->r_reuse_sge = 0; |
347 | qp->r_sge = qp->s_rdma_sge; | 347 | qp->r_sge = qp->s_rdma_read_sge; |
348 | } else if (!ipath_get_rwqe(qp, 0)) { | 348 | } else if (!ipath_get_rwqe(qp, 0)) { |
349 | dev->n_pkt_drops++; | 349 | dev->n_pkt_drops++; |
350 | goto done; | 350 | goto done; |
351 | } | 351 | } |
352 | /* Save the WQE so we can reuse it in case of an error. */ | 352 | /* Save the WQE so we can reuse it in case of an error. */ |
353 | qp->s_rdma_sge = qp->r_sge; | 353 | qp->s_rdma_read_sge = qp->r_sge; |
354 | qp->r_rcv_len = 0; | 354 | qp->r_rcv_len = 0; |
355 | if (opcode == OP(SEND_ONLY)) | 355 | if (opcode == OP(SEND_ONLY)) |
356 | goto send_last; | 356 | goto send_last; |
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 9a3e54664ee4..a20261c0b4f8 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c | |||
@@ -467,7 +467,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) | |||
467 | 467 | ||
468 | done: | 468 | done: |
469 | /* Queue the completion status entry. */ | 469 | /* Queue the completion status entry. */ |
470 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || | 470 | if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || |
471 | (wr->send_flags & IB_SEND_SIGNALED)) { | 471 | (wr->send_flags & IB_SEND_SIGNALED)) { |
472 | wc.wr_id = wr->wr_id; | 472 | wc.wr_id = wr->wr_id; |
473 | wc.status = IB_WC_SUCCESS; | 473 | wc.status = IB_WC_SUCCESS; |
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 2aaacdb7e52a..9bec5a9b4557 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c | |||
@@ -773,7 +773,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, | |||
773 | /* +1 is for the qword padding of pbc */ | 773 | /* +1 is for the qword padding of pbc */ |
774 | plen = hdrwords + ((len + 3) >> 2) + 1; | 774 | plen = hdrwords + ((len + 3) >> 2) + 1; |
775 | if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { | 775 | if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { |
776 | ipath_dbg("packet len 0x%x too long, failing\n", plen); | ||
777 | ret = -EINVAL; | 776 | ret = -EINVAL; |
778 | goto bail; | 777 | goto bail; |
779 | } | 778 | } |
@@ -980,14 +979,14 @@ static int ipath_query_device(struct ib_device *ibdev, | |||
980 | props->max_cqe = ib_ipath_max_cqes; | 979 | props->max_cqe = ib_ipath_max_cqes; |
981 | props->max_mr = dev->lk_table.max; | 980 | props->max_mr = dev->lk_table.max; |
982 | props->max_pd = ib_ipath_max_pds; | 981 | props->max_pd = ib_ipath_max_pds; |
983 | props->max_qp_rd_atom = 1; | 982 | props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; |
984 | props->max_qp_init_rd_atom = 1; | 983 | props->max_qp_init_rd_atom = 255; |
985 | /* props->max_res_rd_atom */ | 984 | /* props->max_res_rd_atom */ |
986 | props->max_srq = ib_ipath_max_srqs; | 985 | props->max_srq = ib_ipath_max_srqs; |
987 | props->max_srq_wr = ib_ipath_max_srq_wrs; | 986 | props->max_srq_wr = ib_ipath_max_srq_wrs; |
988 | props->max_srq_sge = ib_ipath_max_srq_sges; | 987 | props->max_srq_sge = ib_ipath_max_srq_sges; |
989 | /* props->local_ca_ack_delay */ | 988 | /* props->local_ca_ack_delay */ |
990 | props->atomic_cap = IB_ATOMIC_HCA; | 989 | props->atomic_cap = IB_ATOMIC_GLOB; |
991 | props->max_pkeys = ipath_get_npkeys(dev->dd); | 990 | props->max_pkeys = ipath_get_npkeys(dev->dd); |
992 | props->max_mcast_grp = ib_ipath_max_mcast_grps; | 991 | props->max_mcast_grp = ib_ipath_max_mcast_grps; |
993 | props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; | 992 | props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; |
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index c0c8d5b24a7d..b0b29d97d56e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h | |||
@@ -43,6 +43,8 @@ | |||
43 | 43 | ||
44 | #include "ipath_layer.h" | 44 | #include "ipath_layer.h" |
45 | 45 | ||
46 | #define IPATH_MAX_RDMA_ATOMIC 4 | ||
47 | |||
46 | #define QPN_MAX (1 << 24) | 48 | #define QPN_MAX (1 << 24) |
47 | #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) | 49 | #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) |
48 | 50 | ||
@@ -89,7 +91,7 @@ struct ib_reth { | |||
89 | } __attribute__ ((packed)); | 91 | } __attribute__ ((packed)); |
90 | 92 | ||
91 | struct ib_atomic_eth { | 93 | struct ib_atomic_eth { |
92 | __be64 vaddr; | 94 | __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ |
93 | __be32 rkey; | 95 | __be32 rkey; |
94 | __be64 swap_data; | 96 | __be64 swap_data; |
95 | __be64 compare_data; | 97 | __be64 compare_data; |
@@ -108,7 +110,7 @@ struct ipath_other_headers { | |||
108 | } rc; | 110 | } rc; |
109 | struct { | 111 | struct { |
110 | __be32 aeth; | 112 | __be32 aeth; |
111 | __be64 atomic_ack_eth; | 113 | __be32 atomic_ack_eth[2]; |
112 | } at; | 114 | } at; |
113 | __be32 imm_data; | 115 | __be32 imm_data; |
114 | __be32 aeth; | 116 | __be32 aeth; |
@@ -312,6 +314,19 @@ struct ipath_sge_state { | |||
312 | }; | 314 | }; |
313 | 315 | ||
314 | /* | 316 | /* |
317 | * This structure holds the information that the send tasklet needs | ||
318 | * to send a RDMA read response or atomic operation. | ||
319 | */ | ||
320 | struct ipath_ack_entry { | ||
321 | u8 opcode; | ||
322 | u32 psn; | ||
323 | union { | ||
324 | struct ipath_sge_state rdma_sge; | ||
325 | u64 atomic_data; | ||
326 | }; | ||
327 | }; | ||
328 | |||
329 | /* | ||
315 | * Variables prefixed with s_ are for the requester (sender). | 330 | * Variables prefixed with s_ are for the requester (sender). |
316 | * Variables prefixed with r_ are for the responder (receiver). | 331 | * Variables prefixed with r_ are for the responder (receiver). |
317 | * Variables prefixed with ack_ are for responder replies. | 332 | * Variables prefixed with ack_ are for responder replies. |
@@ -333,24 +348,24 @@ struct ipath_qp { | |||
333 | struct ipath_mmap_info *ip; | 348 | struct ipath_mmap_info *ip; |
334 | struct ipath_sge_state *s_cur_sge; | 349 | struct ipath_sge_state *s_cur_sge; |
335 | struct ipath_sge_state s_sge; /* current send request data */ | 350 | struct ipath_sge_state s_sge; /* current send request data */ |
336 | /* current RDMA read send data */ | 351 | struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1]; |
337 | struct ipath_sge_state s_rdma_sge; | 352 | struct ipath_sge_state s_ack_rdma_sge; |
353 | struct ipath_sge_state s_rdma_read_sge; | ||
338 | struct ipath_sge_state r_sge; /* current receive data */ | 354 | struct ipath_sge_state r_sge; /* current receive data */ |
339 | spinlock_t s_lock; | 355 | spinlock_t s_lock; |
340 | unsigned long s_flags; | 356 | unsigned long s_busy; |
341 | u32 s_hdrwords; /* size of s_hdr in 32 bit words */ | 357 | u32 s_hdrwords; /* size of s_hdr in 32 bit words */ |
342 | u32 s_cur_size; /* size of send packet in bytes */ | 358 | u32 s_cur_size; /* size of send packet in bytes */ |
343 | u32 s_len; /* total length of s_sge */ | 359 | u32 s_len; /* total length of s_sge */ |
344 | u32 s_rdma_len; /* total length of s_rdma_sge */ | 360 | u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ |
345 | u32 s_next_psn; /* PSN for next request */ | 361 | u32 s_next_psn; /* PSN for next request */ |
346 | u32 s_last_psn; /* last response PSN processed */ | 362 | u32 s_last_psn; /* last response PSN processed */ |
347 | u32 s_psn; /* current packet sequence number */ | 363 | u32 s_psn; /* current packet sequence number */ |
348 | u32 s_ack_psn; /* PSN for RDMA_READ */ | 364 | u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ |
365 | u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ | ||
349 | u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ | 366 | u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ |
350 | u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ | 367 | u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ |
351 | u64 r_wr_id; /* ID for current receive WQE */ | 368 | u64 r_wr_id; /* ID for current receive WQE */ |
352 | u64 r_atomic_data; /* data for last atomic op */ | ||
353 | u32 r_atomic_psn; /* PSN of last atomic op */ | ||
354 | u32 r_len; /* total length of r_sge */ | 369 | u32 r_len; /* total length of r_sge */ |
355 | u32 r_rcv_len; /* receive data len processed */ | 370 | u32 r_rcv_len; /* receive data len processed */ |
356 | u32 r_psn; /* expected rcv packet sequence number */ | 371 | u32 r_psn; /* expected rcv packet sequence number */ |
@@ -360,12 +375,13 @@ struct ipath_qp { | |||
360 | u8 s_ack_state; /* opcode of packet to ACK */ | 375 | u8 s_ack_state; /* opcode of packet to ACK */ |
361 | u8 s_nak_state; /* non-zero if NAK is pending */ | 376 | u8 s_nak_state; /* non-zero if NAK is pending */ |
362 | u8 r_state; /* opcode of last packet received */ | 377 | u8 r_state; /* opcode of last packet received */ |
363 | u8 r_ack_state; /* opcode of packet to ACK */ | ||
364 | u8 r_nak_state; /* non-zero if NAK is pending */ | 378 | u8 r_nak_state; /* non-zero if NAK is pending */ |
365 | u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ | 379 | u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ |
366 | u8 r_reuse_sge; /* for UC receive errors */ | 380 | u8 r_reuse_sge; /* for UC receive errors */ |
367 | u8 r_sge_inx; /* current index into sg_list */ | 381 | u8 r_sge_inx; /* current index into sg_list */ |
368 | u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ | 382 | u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ |
383 | u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ | ||
384 | u8 r_head_ack_queue; /* index into s_ack_queue[] */ | ||
369 | u8 qp_access_flags; | 385 | u8 qp_access_flags; |
370 | u8 s_max_sge; /* size of s_wq->sg_list */ | 386 | u8 s_max_sge; /* size of s_wq->sg_list */ |
371 | u8 s_retry_cnt; /* number of times to retry */ | 387 | u8 s_retry_cnt; /* number of times to retry */ |
@@ -374,6 +390,10 @@ struct ipath_qp { | |||
374 | u8 s_rnr_retry; /* requester RNR retry counter */ | 390 | u8 s_rnr_retry; /* requester RNR retry counter */ |
375 | u8 s_wait_credit; /* limit number of unacked packets sent */ | 391 | u8 s_wait_credit; /* limit number of unacked packets sent */ |
376 | u8 s_pkey_index; /* PKEY index to use */ | 392 | u8 s_pkey_index; /* PKEY index to use */ |
393 | u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ | ||
394 | u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ | ||
395 | u8 s_tail_ack_queue; /* index into s_ack_queue[] */ | ||
396 | u8 s_flags; | ||
377 | u8 timeout; /* Timeout for this QP */ | 397 | u8 timeout; /* Timeout for this QP */ |
378 | enum ib_mtu path_mtu; | 398 | enum ib_mtu path_mtu; |
379 | u32 remote_qpn; | 399 | u32 remote_qpn; |
@@ -390,11 +410,16 @@ struct ipath_qp { | |||
390 | struct ipath_sge r_sg_list[0]; /* verified SGEs */ | 410 | struct ipath_sge r_sg_list[0]; /* verified SGEs */ |
391 | }; | 411 | }; |
392 | 412 | ||
413 | /* Bit definition for s_busy. */ | ||
414 | #define IPATH_S_BUSY 0 | ||
415 | |||
393 | /* | 416 | /* |
394 | * Bit definitions for s_flags. | 417 | * Bit definitions for s_flags. |
395 | */ | 418 | */ |
396 | #define IPATH_S_BUSY 0 | 419 | #define IPATH_S_SIGNAL_REQ_WR 0x01 |
397 | #define IPATH_S_SIGNAL_REQ_WR 1 | 420 | #define IPATH_S_FENCE_PENDING 0x02 |
421 | #define IPATH_S_RDMAR_PENDING 0x04 | ||
422 | #define IPATH_S_ACK_PENDING 0x08 | ||
398 | 423 | ||
399 | #define IPATH_PSN_CREDIT 2048 | 424 | #define IPATH_PSN_CREDIT 2048 |
400 | 425 | ||
@@ -757,9 +782,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, | |||
757 | 782 | ||
758 | void ipath_do_ruc_send(unsigned long data); | 783 | void ipath_do_ruc_send(unsigned long data); |
759 | 784 | ||
760 | u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr, | ||
761 | u32 pmtu); | ||
762 | |||
763 | int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, | 785 | int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, |
764 | u32 pmtu, u32 *bth0p, u32 *bth2p); | 786 | u32 pmtu, u32 *bth0p, u32 *bth2p); |
765 | 787 | ||