aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath/ipath_rc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_rc.c')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c383
1 files changed, 197 insertions, 186 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index bd2c405c4bf0..8568dd0538cf 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -42,7 +42,7 @@
42 * @qp: the QP who's SGE we're restarting 42 * @qp: the QP who's SGE we're restarting
43 * @wqe: the work queue to initialize the QP's SGE from 43 * @wqe: the work queue to initialize the QP's SGE from
44 * 44 *
45 * The QP s_lock should be held. 45 * The QP s_lock should be held and interrupts disabled.
46 */ 46 */
47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) 47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
48{ 48{
@@ -77,7 +77,6 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
77 struct ipath_other_headers *ohdr, 77 struct ipath_other_headers *ohdr,
78 u32 pmtu) 78 u32 pmtu)
79{ 79{
80 struct ipath_sge_state *ss;
81 u32 hwords; 80 u32 hwords;
82 u32 len; 81 u32 len;
83 u32 bth0; 82 u32 bth0;
@@ -91,7 +90,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
91 */ 90 */
92 switch (qp->s_ack_state) { 91 switch (qp->s_ack_state) {
93 case OP(RDMA_READ_REQUEST): 92 case OP(RDMA_READ_REQUEST):
94 ss = &qp->s_rdma_sge; 93 qp->s_cur_sge = &qp->s_rdma_sge;
95 len = qp->s_rdma_len; 94 len = qp->s_rdma_len;
96 if (len > pmtu) { 95 if (len > pmtu) {
97 len = pmtu; 96 len = pmtu;
@@ -108,7 +107,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
108 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 107 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
109 /* FALLTHROUGH */ 108 /* FALLTHROUGH */
110 case OP(RDMA_READ_RESPONSE_MIDDLE): 109 case OP(RDMA_READ_RESPONSE_MIDDLE):
111 ss = &qp->s_rdma_sge; 110 qp->s_cur_sge = &qp->s_rdma_sge;
112 len = qp->s_rdma_len; 111 len = qp->s_rdma_len;
113 if (len > pmtu) 112 if (len > pmtu)
114 len = pmtu; 113 len = pmtu;
@@ -127,41 +126,50 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
127 * We have to prevent new requests from changing 126 * We have to prevent new requests from changing
128 * the r_sge state while a ipath_verbs_send() 127 * the r_sge state while a ipath_verbs_send()
129 * is in progress. 128 * is in progress.
130 * Changing r_state allows the receiver
131 * to continue processing new packets.
132 * We do it here now instead of above so
133 * that we are sure the packet was sent before
134 * changing the state.
135 */ 129 */
136 qp->r_state = OP(RDMA_READ_RESPONSE_LAST);
137 qp->s_ack_state = OP(ACKNOWLEDGE); 130 qp->s_ack_state = OP(ACKNOWLEDGE);
138 return 0; 131 bth0 = 0;
132 goto bail;
139 133
140 case OP(COMPARE_SWAP): 134 case OP(COMPARE_SWAP):
141 case OP(FETCH_ADD): 135 case OP(FETCH_ADD):
142 ss = NULL; 136 qp->s_cur_sge = NULL;
143 len = 0; 137 len = 0;
144 qp->r_state = OP(SEND_LAST); 138 /*
145 qp->s_ack_state = OP(ACKNOWLEDGE); 139 * Set the s_ack_state so the receive interrupt handler
146 bth0 = IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; 140 * won't try to send an ACK (out of order) until this one
141 * is actually sent.
142 */
143 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
144 bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
147 ohdr->u.at.aeth = ipath_compute_aeth(qp); 145 ohdr->u.at.aeth = ipath_compute_aeth(qp);
148 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); 146 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
149 hwords += sizeof(ohdr->u.at) / 4; 147 hwords += sizeof(ohdr->u.at) / 4;
150 break; 148 break;
151 149
152 default: 150 default:
153 /* Send a regular ACK. */ 151 /* Send a regular ACK. */
154 ss = NULL; 152 qp->s_cur_sge = NULL;
155 len = 0; 153 len = 0;
156 qp->s_ack_state = OP(ACKNOWLEDGE); 154 /*
157 bth0 = qp->s_ack_state << 24; 155 * Set the s_ack_state so the receive interrupt handler
158 ohdr->u.aeth = ipath_compute_aeth(qp); 156 * won't try to send an ACK (out of order) until this one
157 * is actually sent.
158 */
159 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
160 bth0 = OP(ACKNOWLEDGE) << 24;
161 if (qp->s_nak_state)
162 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPS_MSN_MASK) |
163 (qp->s_nak_state <<
164 IPS_AETH_CREDIT_SHIFT));
165 else
166 ohdr->u.aeth = ipath_compute_aeth(qp);
159 hwords++; 167 hwords++;
160 } 168 }
161 qp->s_hdrwords = hwords; 169 qp->s_hdrwords = hwords;
162 qp->s_cur_sge = ss;
163 qp->s_cur_size = len; 170 qp->s_cur_size = len;
164 171
172bail:
165 return bth0; 173 return bth0;
166} 174}
167 175
@@ -174,7 +182,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
174 * @bth2p: pointer to the BTH PSN word 182 * @bth2p: pointer to the BTH PSN word
175 * 183 *
176 * Return 1 if constructed; otherwise, return 0. 184 * Return 1 if constructed; otherwise, return 0.
177 * Note the QP s_lock must be held. 185 * Note the QP s_lock must be held and interrupts disabled.
178 */ 186 */
179int ipath_make_rc_req(struct ipath_qp *qp, 187int ipath_make_rc_req(struct ipath_qp *qp,
180 struct ipath_other_headers *ohdr, 188 struct ipath_other_headers *ohdr,
@@ -356,6 +364,11 @@ int ipath_make_rc_req(struct ipath_qp *qp,
356 bth2 |= qp->s_psn++ & IPS_PSN_MASK; 364 bth2 |= qp->s_psn++ & IPS_PSN_MASK;
357 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 365 if ((int)(qp->s_psn - qp->s_next_psn) > 0)
358 qp->s_next_psn = qp->s_psn; 366 qp->s_next_psn = qp->s_psn;
367 /*
368 * Put the QP on the pending list so lost ACKs will cause
369 * a retry. More than one request can be pending so the
370 * QP may already be on the dev->pending list.
371 */
359 spin_lock(&dev->pending_lock); 372 spin_lock(&dev->pending_lock);
360 if (list_empty(&qp->timerwait)) 373 if (list_empty(&qp->timerwait))
361 list_add_tail(&qp->timerwait, 374 list_add_tail(&qp->timerwait,
@@ -365,8 +378,8 @@ int ipath_make_rc_req(struct ipath_qp *qp,
365 378
366 case OP(RDMA_READ_RESPONSE_FIRST): 379 case OP(RDMA_READ_RESPONSE_FIRST):
367 /* 380 /*
368 * This case can only happen if a send is restarted. See 381 * This case can only happen if a send is restarted.
369 * ipath_restart_rc(). 382 * See ipath_restart_rc().
370 */ 383 */
371 ipath_init_restart(qp, wqe); 384 ipath_init_restart(qp, wqe);
372 /* FALLTHROUGH */ 385 /* FALLTHROUGH */
@@ -526,11 +539,17 @@ static void send_rc_ack(struct ipath_qp *qp)
526 ohdr = &hdr.u.l.oth; 539 ohdr = &hdr.u.l.oth;
527 lrh0 = IPS_LRH_GRH; 540 lrh0 = IPS_LRH_GRH;
528 } 541 }
542 /* read pkey_index w/o lock (its atomic) */
529 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); 543 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
530 ohdr->u.aeth = ipath_compute_aeth(qp); 544 if (qp->r_nak_state)
531 if (qp->s_ack_state >= OP(COMPARE_SWAP)) { 545 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPS_MSN_MASK) |
532 bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; 546 (qp->r_nak_state <<
533 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); 547 IPS_AETH_CREDIT_SHIFT));
548 else
549 ohdr->u.aeth = ipath_compute_aeth(qp);
550 if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
551 bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
552 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
534 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; 553 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
535 } else 554 } else
536 bth0 |= OP(ACKNOWLEDGE) << 24; 555 bth0 |= OP(ACKNOWLEDGE) << 24;
@@ -541,15 +560,36 @@ static void send_rc_ack(struct ipath_qp *qp)
541 hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); 560 hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
542 ohdr->bth[0] = cpu_to_be32(bth0); 561 ohdr->bth[0] = cpu_to_be32(bth0);
543 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); 562 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
544 ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK); 563 ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPS_PSN_MASK);
545 564
546 /* 565 /*
547 * If we can send the ACK, clear the ACK state. 566 * If we can send the ACK, clear the ACK state.
548 */ 567 */
549 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { 568 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
550 qp->s_ack_state = OP(ACKNOWLEDGE); 569 qp->r_ack_state = OP(ACKNOWLEDGE);
551 dev->n_rc_qacks++;
552 dev->n_unicast_xmit++; 570 dev->n_unicast_xmit++;
571 } else {
572 /*
573 * We are out of PIO buffers at the moment.
574 * Pass responsibility for sending the ACK to the
575 * send tasklet so that when a PIO buffer becomes
576 * available, the ACK is sent ahead of other outgoing
577 * packets.
578 */
579 dev->n_rc_qacks++;
580 spin_lock_irq(&qp->s_lock);
581 /* Don't coalesce if a RDMA read or atomic is pending. */
582 if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
583 qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
584 qp->s_ack_state = qp->r_ack_state;
585 qp->s_nak_state = qp->r_nak_state;
586 qp->s_ack_psn = qp->r_ack_psn;
587 qp->r_ack_state = OP(ACKNOWLEDGE);
588 }
589 spin_unlock_irq(&qp->s_lock);
590
591 /* Call ipath_do_rc_send() in another thread. */
592 tasklet_hi_schedule(&qp->s_task);
553 } 593 }
554} 594}
555 595
@@ -641,7 +681,7 @@ done:
641 * @psn: packet sequence number for the request 681 * @psn: packet sequence number for the request
642 * @wc: the work completion request 682 * @wc: the work completion request
643 * 683 *
644 * The QP s_lock should be held. 684 * The QP s_lock should be held and interrupts disabled.
645 */ 685 */
646void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) 686void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
647{ 687{
@@ -705,7 +745,7 @@ bail:
705 * 745 *
706 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK 746 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
707 * for the given QP. 747 * for the given QP.
708 * Called at interrupt level with the QP s_lock held. 748 * Called at interrupt level with the QP s_lock held and interrupts disabled.
709 * Returns 1 if OK, 0 if current operation should be aborted (NAK). 749 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
710 */ 750 */
711static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) 751static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
@@ -1126,18 +1166,16 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1126 * Don't queue the NAK if a RDMA read, atomic, or 1166 * Don't queue the NAK if a RDMA read, atomic, or
1127 * NAK is pending though. 1167 * NAK is pending though.
1128 */ 1168 */
1129 spin_lock(&qp->s_lock); 1169 if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
1130 if ((qp->s_ack_state >= OP(RDMA_READ_REQUEST) && 1170 qp->r_nak_state != 0)
1131 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) ||
1132 qp->s_nak_state != 0) {
1133 spin_unlock(&qp->s_lock);
1134 goto done; 1171 goto done;
1172 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1173 qp->r_ack_state = OP(SEND_ONLY);
1174 qp->r_nak_state = IB_NAK_PSN_ERROR;
1175 /* Use the expected PSN. */
1176 qp->r_ack_psn = qp->r_psn;
1135 } 1177 }
1136 qp->s_ack_state = OP(SEND_ONLY); 1178 goto send_ack;
1137 qp->s_nak_state = IB_NAK_PSN_ERROR;
1138 /* Use the expected PSN. */
1139 qp->s_ack_psn = qp->r_psn;
1140 goto resched;
1141 } 1179 }
1142 1180
1143 /* 1181 /*
@@ -1151,27 +1189,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1151 * send the earliest so that RDMA reads can be restarted at 1189 * send the earliest so that RDMA reads can be restarted at
1152 * the requester's expected PSN. 1190 * the requester's expected PSN.
1153 */ 1191 */
1154 spin_lock(&qp->s_lock); 1192 if (opcode == OP(RDMA_READ_REQUEST)) {
1155 if (qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE &&
1156 ipath_cmp24(psn, qp->s_ack_psn) >= 0) {
1157 if (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST)
1158 qp->s_ack_psn = psn;
1159 spin_unlock(&qp->s_lock);
1160 goto done;
1161 }
1162 switch (opcode) {
1163 case OP(RDMA_READ_REQUEST):
1164 /*
1165 * We have to be careful to not change s_rdma_sge
1166 * while ipath_do_rc_send() is using it and not
1167 * holding the s_lock.
1168 */
1169 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
1170 qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {
1171 spin_unlock(&qp->s_lock);
1172 dev->n_rdma_dup_busy++;
1173 goto done;
1174 }
1175 /* RETH comes after BTH */ 1193 /* RETH comes after BTH */
1176 if (!header_in_data) 1194 if (!header_in_data)
1177 reth = &ohdr->u.rc.reth; 1195 reth = &ohdr->u.rc.reth;
@@ -1179,6 +1197,22 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1179 reth = (struct ib_reth *)data; 1197 reth = (struct ib_reth *)data;
1180 data += sizeof(*reth); 1198 data += sizeof(*reth);
1181 } 1199 }
1200 /*
1201 * If we receive a duplicate RDMA request, it means the
1202 * requester saw a sequence error and needs to restart
1203 * from an earlier point. We can abort the current
1204 * RDMA read send in that case.
1205 */
1206 spin_lock_irq(&qp->s_lock);
1207 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
1208 (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
1209 /*
1210 * We are already sending earlier requested data.
1211 * Don't abort it to send later out of sequence data.
1212 */
1213 spin_unlock_irq(&qp->s_lock);
1214 goto done;
1215 }
1182 qp->s_rdma_len = be32_to_cpu(reth->length); 1216 qp->s_rdma_len = be32_to_cpu(reth->length);
1183 if (qp->s_rdma_len != 0) { 1217 if (qp->s_rdma_len != 0) {
1184 u32 rkey = be32_to_cpu(reth->rkey); 1218 u32 rkey = be32_to_cpu(reth->rkey);
@@ -1192,8 +1226,10 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1192 ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, 1226 ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
1193 qp->s_rdma_len, vaddr, rkey, 1227 qp->s_rdma_len, vaddr, rkey,
1194 IB_ACCESS_REMOTE_READ); 1228 IB_ACCESS_REMOTE_READ);
1195 if (unlikely(!ok)) 1229 if (unlikely(!ok)) {
1230 spin_unlock_irq(&qp->s_lock);
1196 goto done; 1231 goto done;
1232 }
1197 } else { 1233 } else {
1198 qp->s_rdma_sge.sg_list = NULL; 1234 qp->s_rdma_sge.sg_list = NULL;
1199 qp->s_rdma_sge.num_sge = 0; 1235 qp->s_rdma_sge.num_sge = 0;
@@ -1202,25 +1238,44 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1202 qp->s_rdma_sge.sge.length = 0; 1238 qp->s_rdma_sge.sge.length = 0;
1203 qp->s_rdma_sge.sge.sge_length = 0; 1239 qp->s_rdma_sge.sge.sge_length = 0;
1204 } 1240 }
1205 break; 1241 qp->s_ack_state = opcode;
1242 qp->s_ack_psn = psn;
1243 spin_unlock_irq(&qp->s_lock);
1244 tasklet_hi_schedule(&qp->s_task);
1245 goto send_ack;
1246 }
1247
1248 /*
1249 * A pending RDMA read will ACK anything before it so
1250 * ignore earlier duplicate requests.
1251 */
1252 if (qp->s_ack_state != OP(ACKNOWLEDGE))
1253 goto done;
1206 1254
1255 /*
1256 * If an ACK is pending, don't replace the pending ACK
1257 * with an earlier one since the later one will ACK the earlier.
1258 * Also, if we already have a pending atomic, send it.
1259 */
1260 if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
1261 (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
1262 qp->r_ack_state >= OP(COMPARE_SWAP)))
1263 goto send_ack;
1264 switch (opcode) {
1207 case OP(COMPARE_SWAP): 1265 case OP(COMPARE_SWAP):
1208 case OP(FETCH_ADD): 1266 case OP(FETCH_ADD):
1209 /* 1267 /*
1210 * Check for the PSN of the last atomic operation 1268 * Check for the PSN of the last atomic operation
1211 * performed and resend the result if found. 1269 * performed and resend the result if found.
1212 */ 1270 */
1213 if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) { 1271 if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn)
1214 spin_unlock(&qp->s_lock);
1215 goto done; 1272 goto done;
1216 }
1217 qp->s_ack_atomic = qp->r_atomic_data;
1218 break; 1273 break;
1219 } 1274 }
1220 qp->s_ack_state = opcode; 1275 qp->r_ack_state = opcode;
1221 qp->s_nak_state = 0; 1276 qp->r_nak_state = 0;
1222 qp->s_ack_psn = psn; 1277 qp->r_ack_psn = psn;
1223resched: 1278send_ack:
1224 return 0; 1279 return 0;
1225 1280
1226done: 1281done:
@@ -1248,7 +1303,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1248 u32 hdrsize; 1303 u32 hdrsize;
1249 u32 psn; 1304 u32 psn;
1250 u32 pad; 1305 u32 pad;
1251 unsigned long flags;
1252 struct ib_wc wc; 1306 struct ib_wc wc;
1253 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 1307 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
1254 int diff; 1308 int diff;
@@ -1289,18 +1343,16 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1289 opcode <= OP(ATOMIC_ACKNOWLEDGE)) { 1343 opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1290 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, 1344 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
1291 hdrsize, pmtu, header_in_data); 1345 hdrsize, pmtu, header_in_data);
1292 goto bail; 1346 goto done;
1293 } 1347 }
1294 1348
1295 spin_lock_irqsave(&qp->r_rq.lock, flags);
1296
1297 /* Compute 24 bits worth of difference. */ 1349 /* Compute 24 bits worth of difference. */
1298 diff = ipath_cmp24(psn, qp->r_psn); 1350 diff = ipath_cmp24(psn, qp->r_psn);
1299 if (unlikely(diff)) { 1351 if (unlikely(diff)) {
1300 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, 1352 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
1301 psn, diff, header_in_data)) 1353 psn, diff, header_in_data))
1302 goto done; 1354 goto done;
1303 goto resched; 1355 goto send_ack;
1304 } 1356 }
1305 1357
1306 /* Check for opcode sequence errors. */ 1358 /* Check for opcode sequence errors. */
@@ -1312,22 +1364,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1312 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1364 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1313 break; 1365 break;
1314 nack_inv: 1366 nack_inv:
1315 /* 1367 /*
1316 * A NAK will ACK earlier sends and RDMA writes. Don't queue the 1368 * A NAK will ACK earlier sends and RDMA writes.
1317 * NAK if a RDMA read, atomic, or NAK is pending though. 1369 * Don't queue the NAK if a RDMA read, atomic, or NAK
1318 */ 1370 * is pending though.
1319 spin_lock(&qp->s_lock); 1371 */
1320 if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) && 1372 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1321 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { 1373 goto send_ack;
1322 spin_unlock(&qp->s_lock); 1374 /* XXX Flush WQEs */
1323 goto done; 1375 qp->state = IB_QPS_ERR;
1324 } 1376 qp->r_ack_state = OP(SEND_ONLY);
1325 /* XXX Flush WQEs */ 1377 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1326 qp->state = IB_QPS_ERR; 1378 qp->r_ack_psn = qp->r_psn;
1327 qp->s_ack_state = OP(SEND_ONLY); 1379 goto send_ack;
1328 qp->s_nak_state = IB_NAK_INVALID_REQUEST;
1329 qp->s_ack_psn = qp->r_psn;
1330 goto resched;
1331 1380
1332 case OP(RDMA_WRITE_FIRST): 1381 case OP(RDMA_WRITE_FIRST):
1333 case OP(RDMA_WRITE_MIDDLE): 1382 case OP(RDMA_WRITE_MIDDLE):
@@ -1337,20 +1386,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1337 break; 1386 break;
1338 goto nack_inv; 1387 goto nack_inv;
1339 1388
1340 case OP(RDMA_READ_REQUEST):
1341 case OP(COMPARE_SWAP):
1342 case OP(FETCH_ADD):
1343 /*
1344 * Drop all new requests until a response has been sent. A
1345 * new request then ACKs the RDMA response we sent. Relaxed
1346 * ordering would allow new requests to be processed but we
1347 * would need to keep a queue of rwqe's for all that are in
1348 * progress. Note that we can't RNR NAK this request since
1349 * the RDMA READ or atomic response is already queued to be
1350 * sent (unless we implement a response send queue).
1351 */
1352 goto done;
1353
1354 default: 1389 default:
1355 if (opcode == OP(SEND_MIDDLE) || 1390 if (opcode == OP(SEND_MIDDLE) ||
1356 opcode == OP(SEND_LAST) || 1391 opcode == OP(SEND_LAST) ||
@@ -1359,6 +1394,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1359 opcode == OP(RDMA_WRITE_LAST) || 1394 opcode == OP(RDMA_WRITE_LAST) ||
1360 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1395 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1361 goto nack_inv; 1396 goto nack_inv;
1397 /*
1398 * Note that it is up to the requester to not send a new
1399 * RDMA read or atomic operation before receiving an ACK
1400 * for the previous operation.
1401 */
1362 break; 1402 break;
1363 } 1403 }
1364 1404
@@ -1375,17 +1415,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1375 * Don't queue the NAK if a RDMA read or atomic 1415 * Don't queue the NAK if a RDMA read or atomic
1376 * is pending though. 1416 * is pending though.
1377 */ 1417 */
1378 spin_lock(&qp->s_lock); 1418 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1379 if (qp->s_ack_state >= 1419 goto send_ack;
1380 OP(RDMA_READ_REQUEST) && 1420 qp->r_ack_state = OP(SEND_ONLY);
1381 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { 1421 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1382 spin_unlock(&qp->s_lock); 1422 qp->r_ack_psn = qp->r_psn;
1383 goto done; 1423 goto send_ack;
1384 }
1385 qp->s_ack_state = OP(SEND_ONLY);
1386 qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer;
1387 qp->s_ack_psn = qp->r_psn;
1388 goto resched;
1389 } 1424 }
1390 qp->r_rcv_len = 0; 1425 qp->r_rcv_len = 0;
1391 /* FALLTHROUGH */ 1426 /* FALLTHROUGH */
@@ -1442,7 +1477,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1442 if (unlikely(wc.byte_len > qp->r_len)) 1477 if (unlikely(wc.byte_len > qp->r_len))
1443 goto nack_inv; 1478 goto nack_inv;
1444 ipath_copy_sge(&qp->r_sge, data, tlen); 1479 ipath_copy_sge(&qp->r_sge, data, tlen);
1445 atomic_inc(&qp->msn); 1480 qp->r_msn++;
1446 if (opcode == OP(RDMA_WRITE_LAST) || 1481 if (opcode == OP(RDMA_WRITE_LAST) ||
1447 opcode == OP(RDMA_WRITE_ONLY)) 1482 opcode == OP(RDMA_WRITE_ONLY))
1448 break; 1483 break;
@@ -1486,29 +1521,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1486 ok = ipath_rkey_ok(dev, &qp->r_sge, 1521 ok = ipath_rkey_ok(dev, &qp->r_sge,
1487 qp->r_len, vaddr, rkey, 1522 qp->r_len, vaddr, rkey,
1488 IB_ACCESS_REMOTE_WRITE); 1523 IB_ACCESS_REMOTE_WRITE);
1489 if (unlikely(!ok)) { 1524 if (unlikely(!ok))
1490 nack_acc: 1525 goto nack_acc;
1491 /*
1492 * A NAK will ACK earlier sends and RDMA
1493 * writes. Don't queue the NAK if a RDMA
1494 * read, atomic, or NAK is pending though.
1495 */
1496 spin_lock(&qp->s_lock);
1497 if (qp->s_ack_state >=
1498 OP(RDMA_READ_REQUEST) &&
1499 qp->s_ack_state !=
1500 IB_OPCODE_ACKNOWLEDGE) {
1501 spin_unlock(&qp->s_lock);
1502 goto done;
1503 }
1504 /* XXX Flush WQEs */
1505 qp->state = IB_QPS_ERR;
1506 qp->s_ack_state = OP(RDMA_WRITE_ONLY);
1507 qp->s_nak_state =
1508 IB_NAK_REMOTE_ACCESS_ERROR;
1509 qp->s_ack_psn = qp->r_psn;
1510 goto resched;
1511 }
1512 } else { 1526 } else {
1513 qp->r_sge.sg_list = NULL; 1527 qp->r_sge.sg_list = NULL;
1514 qp->r_sge.sge.mr = NULL; 1528 qp->r_sge.sge.mr = NULL;
@@ -1535,12 +1549,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1535 reth = (struct ib_reth *)data; 1549 reth = (struct ib_reth *)data;
1536 data += sizeof(*reth); 1550 data += sizeof(*reth);
1537 } 1551 }
1538 spin_lock(&qp->s_lock); 1552 if (unlikely(!(qp->qp_access_flags &
1539 if (qp->s_ack_state != OP(ACKNOWLEDGE) && 1553 IB_ACCESS_REMOTE_READ)))
1540 qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) { 1554 goto nack_acc;
1541 spin_unlock(&qp->s_lock); 1555 spin_lock_irq(&qp->s_lock);
1542 goto done;
1543 }
1544 qp->s_rdma_len = be32_to_cpu(reth->length); 1556 qp->s_rdma_len = be32_to_cpu(reth->length);
1545 if (qp->s_rdma_len != 0) { 1557 if (qp->s_rdma_len != 0) {
1546 u32 rkey = be32_to_cpu(reth->rkey); 1558 u32 rkey = be32_to_cpu(reth->rkey);
@@ -1552,7 +1564,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1552 qp->s_rdma_len, vaddr, rkey, 1564 qp->s_rdma_len, vaddr, rkey,
1553 IB_ACCESS_REMOTE_READ); 1565 IB_ACCESS_REMOTE_READ);
1554 if (unlikely(!ok)) { 1566 if (unlikely(!ok)) {
1555 spin_unlock(&qp->s_lock); 1567 spin_unlock_irq(&qp->s_lock);
1556 goto nack_acc; 1568 goto nack_acc;
1557 } 1569 }
1558 /* 1570 /*
@@ -1569,21 +1581,25 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1569 qp->s_rdma_sge.sge.length = 0; 1581 qp->s_rdma_sge.sge.length = 0;
1570 qp->s_rdma_sge.sge.sge_length = 0; 1582 qp->s_rdma_sge.sge.sge_length = 0;
1571 } 1583 }
1572 if (unlikely(!(qp->qp_access_flags &
1573 IB_ACCESS_REMOTE_READ)))
1574 goto nack_acc;
1575 /* 1584 /*
1576 * We need to increment the MSN here instead of when we 1585 * We need to increment the MSN here instead of when we
1577 * finish sending the result since a duplicate request would 1586 * finish sending the result since a duplicate request would
1578 * increment it more than once. 1587 * increment it more than once.
1579 */ 1588 */
1580 atomic_inc(&qp->msn); 1589 qp->r_msn++;
1590
1581 qp->s_ack_state = opcode; 1591 qp->s_ack_state = opcode;
1582 qp->s_nak_state = 0;
1583 qp->s_ack_psn = psn; 1592 qp->s_ack_psn = psn;
1593 spin_unlock_irq(&qp->s_lock);
1594
1584 qp->r_psn++; 1595 qp->r_psn++;
1585 qp->r_state = opcode; 1596 qp->r_state = opcode;
1586 goto rdmadone; 1597 qp->r_nak_state = 0;
1598
1599 /* Call ipath_do_rc_send() in another thread. */
1600 tasklet_hi_schedule(&qp->s_task);
1601
1602 goto done;
1587 1603
1588 case OP(COMPARE_SWAP): 1604 case OP(COMPARE_SWAP):
1589 case OP(FETCH_ADD): { 1605 case OP(FETCH_ADD): {
@@ -1612,7 +1628,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1612 goto nack_acc; 1628 goto nack_acc;
1613 /* Perform atomic OP and save result. */ 1629 /* Perform atomic OP and save result. */
1614 sdata = be64_to_cpu(ateth->swap_data); 1630 sdata = be64_to_cpu(ateth->swap_data);
1615 spin_lock(&dev->pending_lock); 1631 spin_lock_irq(&dev->pending_lock);
1616 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 1632 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
1617 if (opcode == OP(FETCH_ADD)) 1633 if (opcode == OP(FETCH_ADD))
1618 *(u64 *) qp->r_sge.sge.vaddr = 1634 *(u64 *) qp->r_sge.sge.vaddr =
@@ -1620,8 +1636,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1620 else if (qp->r_atomic_data == 1636 else if (qp->r_atomic_data ==
1621 be64_to_cpu(ateth->compare_data)) 1637 be64_to_cpu(ateth->compare_data))
1622 *(u64 *) qp->r_sge.sge.vaddr = sdata; 1638 *(u64 *) qp->r_sge.sge.vaddr = sdata;
1623 spin_unlock(&dev->pending_lock); 1639 spin_unlock_irq(&dev->pending_lock);
1624 atomic_inc(&qp->msn); 1640 qp->r_msn++;
1625 qp->r_atomic_psn = psn & IPS_PSN_MASK; 1641 qp->r_atomic_psn = psn & IPS_PSN_MASK;
1626 psn |= 1 << 31; 1642 psn |= 1 << 31;
1627 break; 1643 break;
@@ -1633,44 +1649,39 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1633 } 1649 }
1634 qp->r_psn++; 1650 qp->r_psn++;
1635 qp->r_state = opcode; 1651 qp->r_state = opcode;
1652 qp->r_nak_state = 0;
1636 /* Send an ACK if requested or required. */ 1653 /* Send an ACK if requested or required. */
1637 if (psn & (1 << 31)) { 1654 if (psn & (1 << 31)) {
1638 /* 1655 /*
1639 * Coalesce ACKs unless there is a RDMA READ or 1656 * Coalesce ACKs unless there is a RDMA READ or
1640 * ATOMIC pending. 1657 * ATOMIC pending.
1641 */ 1658 */
1642 spin_lock(&qp->s_lock); 1659 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1643 if (qp->s_ack_state == OP(ACKNOWLEDGE) || 1660 qp->r_ack_state = opcode;
1644 qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) { 1661 qp->r_ack_psn = psn;
1645 qp->s_ack_state = opcode;
1646 qp->s_nak_state = 0;
1647 qp->s_ack_psn = psn;
1648 qp->s_ack_atomic = qp->r_atomic_data;
1649 goto resched;
1650 } 1662 }
1651 spin_unlock(&qp->s_lock); 1663 goto send_ack;
1652 } 1664 }
1653done: 1665 goto done;
1654 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1655 goto bail;
1656 1666
1657resched: 1667nack_acc:
1658 /* 1668 /*
1659 * Try to send ACK right away but not if ipath_do_rc_send() is 1669 * A NAK will ACK earlier sends and RDMA writes.
1660 * active. 1670 * Don't queue the NAK if a RDMA read, atomic, or NAK
1671 * is pending though.
1661 */ 1672 */
1662 if (qp->s_hdrwords == 0 && 1673 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1663 (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST || 1674 /* XXX Flush WQEs */
1664 qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP)) 1675 qp->state = IB_QPS_ERR;
1676 qp->r_ack_state = OP(RDMA_WRITE_ONLY);
1677 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1678 qp->r_ack_psn = qp->r_psn;
1679 }
1680send_ack:
1681 /* Send ACK right away unless the send tasklet has a pending ACK. */
1682 if (qp->s_ack_state == OP(ACKNOWLEDGE))
1665 send_rc_ack(qp); 1683 send_rc_ack(qp);
1666 1684
1667rdmadone: 1685done:
1668 spin_unlock(&qp->s_lock);
1669 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1670
1671 /* Call ipath_do_rc_send() in another thread. */
1672 tasklet_hi_schedule(&qp->s_task);
1673
1674bail:
1675 return; 1686 return;
1676} 1687}