aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath/ipath_rc.c
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2006-07-01 07:36:10 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-01 12:56:01 -0400
commit12eef41f8b72b6e11e36b48c78849c17e49781c8 (patch)
tree3a8bd77d77c7542e19d200d3abf25fc07f0d8f51 /drivers/infiniband/hw/ipath/ipath_rc.c
parentfba75200ad92892bf32d8d6f1443c6f1e4f48676 (diff)
[PATCH] IB/ipath: rC receive interrupt performance changes
This patch separates QP state used for sending and receiving RC packets so the processing in the receive interrupt handler can be done mostly without locks being held. ACK packets are now sent without requiring synchronization with the send tasklet. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com> Cc: "Michael S. Tsirkin" <mst@mellanox.co.il> Cc: Roland Dreier <rolandd@cisco.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_rc.c')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c383
1 files changed, 197 insertions, 186 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index bd2c405c4bf0..8568dd0538cf 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -42,7 +42,7 @@
42 * @qp: the QP who's SGE we're restarting 42 * @qp: the QP who's SGE we're restarting
43 * @wqe: the work queue to initialize the QP's SGE from 43 * @wqe: the work queue to initialize the QP's SGE from
44 * 44 *
45 * The QP s_lock should be held. 45 * The QP s_lock should be held and interrupts disabled.
46 */ 46 */
47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) 47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
48{ 48{
@@ -77,7 +77,6 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
77 struct ipath_other_headers *ohdr, 77 struct ipath_other_headers *ohdr,
78 u32 pmtu) 78 u32 pmtu)
79{ 79{
80 struct ipath_sge_state *ss;
81 u32 hwords; 80 u32 hwords;
82 u32 len; 81 u32 len;
83 u32 bth0; 82 u32 bth0;
@@ -91,7 +90,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
91 */ 90 */
92 switch (qp->s_ack_state) { 91 switch (qp->s_ack_state) {
93 case OP(RDMA_READ_REQUEST): 92 case OP(RDMA_READ_REQUEST):
94 ss = &qp->s_rdma_sge; 93 qp->s_cur_sge = &qp->s_rdma_sge;
95 len = qp->s_rdma_len; 94 len = qp->s_rdma_len;
96 if (len > pmtu) { 95 if (len > pmtu) {
97 len = pmtu; 96 len = pmtu;
@@ -108,7 +107,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
108 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 107 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
109 /* FALLTHROUGH */ 108 /* FALLTHROUGH */
110 case OP(RDMA_READ_RESPONSE_MIDDLE): 109 case OP(RDMA_READ_RESPONSE_MIDDLE):
111 ss = &qp->s_rdma_sge; 110 qp->s_cur_sge = &qp->s_rdma_sge;
112 len = qp->s_rdma_len; 111 len = qp->s_rdma_len;
113 if (len > pmtu) 112 if (len > pmtu)
114 len = pmtu; 113 len = pmtu;
@@ -127,41 +126,50 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
127 * We have to prevent new requests from changing 126 * We have to prevent new requests from changing
128 * the r_sge state while a ipath_verbs_send() 127 * the r_sge state while a ipath_verbs_send()
129 * is in progress. 128 * is in progress.
130 * Changing r_state allows the receiver
131 * to continue processing new packets.
132 * We do it here now instead of above so
133 * that we are sure the packet was sent before
134 * changing the state.
135 */ 129 */
136 qp->r_state = OP(RDMA_READ_RESPONSE_LAST);
137 qp->s_ack_state = OP(ACKNOWLEDGE); 130 qp->s_ack_state = OP(ACKNOWLEDGE);
138 return 0; 131 bth0 = 0;
132 goto bail;
139 133
140 case OP(COMPARE_SWAP): 134 case OP(COMPARE_SWAP):
141 case OP(FETCH_ADD): 135 case OP(FETCH_ADD):
142 ss = NULL; 136 qp->s_cur_sge = NULL;
143 len = 0; 137 len = 0;
144 qp->r_state = OP(SEND_LAST); 138 /*
145 qp->s_ack_state = OP(ACKNOWLEDGE); 139 * Set the s_ack_state so the receive interrupt handler
146 bth0 = IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; 140 * won't try to send an ACK (out of order) until this one
141 * is actually sent.
142 */
143 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
144 bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
147 ohdr->u.at.aeth = ipath_compute_aeth(qp); 145 ohdr->u.at.aeth = ipath_compute_aeth(qp);
148 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); 146 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
149 hwords += sizeof(ohdr->u.at) / 4; 147 hwords += sizeof(ohdr->u.at) / 4;
150 break; 148 break;
151 149
152 default: 150 default:
153 /* Send a regular ACK. */ 151 /* Send a regular ACK. */
154 ss = NULL; 152 qp->s_cur_sge = NULL;
155 len = 0; 153 len = 0;
156 qp->s_ack_state = OP(ACKNOWLEDGE); 154 /*
157 bth0 = qp->s_ack_state << 24; 155 * Set the s_ack_state so the receive interrupt handler
158 ohdr->u.aeth = ipath_compute_aeth(qp); 156 * won't try to send an ACK (out of order) until this one
157 * is actually sent.
158 */
159 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
160 bth0 = OP(ACKNOWLEDGE) << 24;
161 if (qp->s_nak_state)
162 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPS_MSN_MASK) |
163 (qp->s_nak_state <<
164 IPS_AETH_CREDIT_SHIFT));
165 else
166 ohdr->u.aeth = ipath_compute_aeth(qp);
159 hwords++; 167 hwords++;
160 } 168 }
161 qp->s_hdrwords = hwords; 169 qp->s_hdrwords = hwords;
162 qp->s_cur_sge = ss;
163 qp->s_cur_size = len; 170 qp->s_cur_size = len;
164 171
172bail:
165 return bth0; 173 return bth0;
166} 174}
167 175
@@ -174,7 +182,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
174 * @bth2p: pointer to the BTH PSN word 182 * @bth2p: pointer to the BTH PSN word
175 * 183 *
176 * Return 1 if constructed; otherwise, return 0. 184 * Return 1 if constructed; otherwise, return 0.
177 * Note the QP s_lock must be held. 185 * Note the QP s_lock must be held and interrupts disabled.
178 */ 186 */
179int ipath_make_rc_req(struct ipath_qp *qp, 187int ipath_make_rc_req(struct ipath_qp *qp,
180 struct ipath_other_headers *ohdr, 188 struct ipath_other_headers *ohdr,
@@ -356,6 +364,11 @@ int ipath_make_rc_req(struct ipath_qp *qp,
356 bth2 |= qp->s_psn++ & IPS_PSN_MASK; 364 bth2 |= qp->s_psn++ & IPS_PSN_MASK;
357 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 365 if ((int)(qp->s_psn - qp->s_next_psn) > 0)
358 qp->s_next_psn = qp->s_psn; 366 qp->s_next_psn = qp->s_psn;
367 /*
368 * Put the QP on the pending list so lost ACKs will cause
369 * a retry. More than one request can be pending so the
370 * QP may already be on the dev->pending list.
371 */
359 spin_lock(&dev->pending_lock); 372 spin_lock(&dev->pending_lock);
360 if (list_empty(&qp->timerwait)) 373 if (list_empty(&qp->timerwait))
361 list_add_tail(&qp->timerwait, 374 list_add_tail(&qp->timerwait,
@@ -365,8 +378,8 @@ int ipath_make_rc_req(struct ipath_qp *qp,
365 378
366 case OP(RDMA_READ_RESPONSE_FIRST): 379 case OP(RDMA_READ_RESPONSE_FIRST):
367 /* 380 /*
368 * This case can only happen if a send is restarted. See 381 * This case can only happen if a send is restarted.
369 * ipath_restart_rc(). 382 * See ipath_restart_rc().
370 */ 383 */
371 ipath_init_restart(qp, wqe); 384 ipath_init_restart(qp, wqe);
372 /* FALLTHROUGH */ 385 /* FALLTHROUGH */
@@ -526,11 +539,17 @@ static void send_rc_ack(struct ipath_qp *qp)
526 ohdr = &hdr.u.l.oth; 539 ohdr = &hdr.u.l.oth;
527 lrh0 = IPS_LRH_GRH; 540 lrh0 = IPS_LRH_GRH;
528 } 541 }
542 /* read pkey_index w/o lock (its atomic) */
529 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); 543 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
530 ohdr->u.aeth = ipath_compute_aeth(qp); 544 if (qp->r_nak_state)
531 if (qp->s_ack_state >= OP(COMPARE_SWAP)) { 545 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPS_MSN_MASK) |
532 bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; 546 (qp->r_nak_state <<
533 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); 547 IPS_AETH_CREDIT_SHIFT));
548 else
549 ohdr->u.aeth = ipath_compute_aeth(qp);
550 if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
551 bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
552 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
534 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; 553 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
535 } else 554 } else
536 bth0 |= OP(ACKNOWLEDGE) << 24; 555 bth0 |= OP(ACKNOWLEDGE) << 24;
@@ -541,15 +560,36 @@ static void send_rc_ack(struct ipath_qp *qp)
541 hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); 560 hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
542 ohdr->bth[0] = cpu_to_be32(bth0); 561 ohdr->bth[0] = cpu_to_be32(bth0);
543 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); 562 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
544 ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK); 563 ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPS_PSN_MASK);
545 564
546 /* 565 /*
547 * If we can send the ACK, clear the ACK state. 566 * If we can send the ACK, clear the ACK state.
548 */ 567 */
549 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { 568 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
550 qp->s_ack_state = OP(ACKNOWLEDGE); 569 qp->r_ack_state = OP(ACKNOWLEDGE);
551 dev->n_rc_qacks++;
552 dev->n_unicast_xmit++; 570 dev->n_unicast_xmit++;
571 } else {
572 /*
573 * We are out of PIO buffers at the moment.
574 * Pass responsibility for sending the ACK to the
575 * send tasklet so that when a PIO buffer becomes
576 * available, the ACK is sent ahead of other outgoing
577 * packets.
578 */
579 dev->n_rc_qacks++;
580 spin_lock_irq(&qp->s_lock);
581 /* Don't coalesce if a RDMA read or atomic is pending. */
582 if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
583 qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
584 qp->s_ack_state = qp->r_ack_state;
585 qp->s_nak_state = qp->r_nak_state;
586 qp->s_ack_psn = qp->r_ack_psn;
587 qp->r_ack_state = OP(ACKNOWLEDGE);
588 }
589 spin_unlock_irq(&qp->s_lock);
590
591 /* Call ipath_do_rc_send() in another thread. */
592 tasklet_hi_schedule(&qp->s_task);
553 } 593 }
554} 594}
555 595
@@ -641,7 +681,7 @@ done:
641 * @psn: packet sequence number for the request 681 * @psn: packet sequence number for the request
642 * @wc: the work completion request 682 * @wc: the work completion request
643 * 683 *
644 * The QP s_lock should be held. 684 * The QP s_lock should be held and interrupts disabled.
645 */ 685 */
646void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) 686void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
647{ 687{
@@ -705,7 +745,7 @@ bail:
705 * 745 *
706 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK 746 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
707 * for the given QP. 747 * for the given QP.
708 * Called at interrupt level with the QP s_lock held. 748 * Called at interrupt level with the QP s_lock held and interrupts disabled.
709 * Returns 1 if OK, 0 if current operation should be aborted (NAK). 749 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
710 */ 750 */
711static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) 751static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
@@ -1126,18 +1166,16 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1126 * Don't queue the NAK if a RDMA read, atomic, or 1166 * Don't queue the NAK if a RDMA read, atomic, or
1127 * NAK is pending though. 1167 * NAK is pending though.
1128 */ 1168 */
1129 spin_lock(&qp->s_lock); 1169 if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
1130 if ((qp->s_ack_state >= OP(RDMA_READ_REQUEST) && 1170 qp->r_nak_state != 0)
1131 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) ||
1132 qp->s_nak_state != 0) {
1133 spin_unlock(&qp->s_lock);
1134 goto done; 1171 goto done;
1172 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1173 qp->r_ack_state = OP(SEND_ONLY);
1174 qp->r_nak_state = IB_NAK_PSN_ERROR;
1175 /* Use the expected PSN. */
1176 qp->r_ack_psn = qp->r_psn;
1135 } 1177 }
1136 qp->s_ack_state = OP(SEND_ONLY); 1178 goto send_ack;
1137 qp->s_nak_state = IB_NAK_PSN_ERROR;
1138 /* Use the expected PSN. */
1139 qp->s_ack_psn = qp->r_psn;
1140 goto resched;
1141 } 1179 }
1142 1180
1143 /* 1181 /*
@@ -1151,27 +1189,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1151 * send the earliest so that RDMA reads can be restarted at 1189 * send the earliest so that RDMA reads can be restarted at
1152 * the requester's expected PSN. 1190 * the requester's expected PSN.
1153 */ 1191 */
1154 spin_lock(&qp->s_lock); 1192 if (opcode == OP(RDMA_READ_REQUEST)) {
1155 if (qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE &&
1156 ipath_cmp24(psn, qp->s_ack_psn) >= 0) {
1157 if (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST)
1158 qp->s_ack_psn = psn;
1159 spin_unlock(&qp->s_lock);
1160 goto done;
1161 }
1162 switch (opcode) {
1163 case OP(RDMA_READ_REQUEST):
1164 /*
1165 * We have to be careful to not change s_rdma_sge
1166 * while ipath_do_rc_send() is using it and not
1167 * holding the s_lock.
1168 */
1169 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
1170 qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {
1171 spin_unlock(&qp->s_lock);
1172 dev->n_rdma_dup_busy++;
1173 goto done;
1174 }
1175 /* RETH comes after BTH */ 1193 /* RETH comes after BTH */
1176 if (!header_in_data) 1194 if (!header_in_data)
1177 reth = &ohdr->u.rc.reth; 1195 reth = &ohdr->u.rc.reth;
@@ -1179,6 +1197,22 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1179 reth = (struct ib_reth *)data; 1197 reth = (struct ib_reth *)data;
1180 data += sizeof(*reth); 1198 data += sizeof(*reth);
1181 } 1199 }
1200 /*
1201 * If we receive a duplicate RDMA request, it means the
1202 * requester saw a sequence error and needs to restart
1203 * from an earlier point. We can abort the current
1204 * RDMA read send in that case.
1205 */
1206 spin_lock_irq(&qp->s_lock);
1207 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
1208 (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
1209 /*
1210 * We are already sending earlier requested data.
1211 * Don't abort it to send later out of sequence data.
1212 */
1213 spin_unlock_irq(&qp->s_lock);
1214 goto done;
1215 }
1182 qp->s_rdma_len = be32_to_cpu(reth->length); 1216 qp->s_rdma_len = be32_to_cpu(reth->length);
1183 if (qp->s_rdma_len != 0) { 1217 if (qp->s_rdma_len != 0) {
1184 u32 rkey = be32_to_cpu(reth->rkey); 1218 u32 rkey = be32_to_cpu(reth->rkey);
@@ -1192,8 +1226,10 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1192 ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, 1226 ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
1193 qp->s_rdma_len, vaddr, rkey, 1227 qp->s_rdma_len, vaddr, rkey,
1194 IB_ACCESS_REMOTE_READ); 1228 IB_ACCESS_REMOTE_READ);
1195 if (unlikely(!ok)) 1229 if (unlikely(!ok)) {
1230 spin_unlock_irq(&qp->s_lock);
1196 goto done; 1231 goto done;
1232 }
1197 } else { 1233 } else {
1198 qp->s_rdma_sge.sg_list = NULL; 1234 qp->s_rdma_sge.sg_list = NULL;
1199 qp->s_rdma_sge.num_sge = 0; 1235 qp->s_rdma_sge.num_sge = 0;
@@ -1202,25 +1238,44 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1202 qp->s_rdma_sge.sge.length = 0; 1238 qp->s_rdma_sge.sge.length = 0;
1203 qp->s_rdma_sge.sge.sge_length = 0; 1239 qp->s_rdma_sge.sge.sge_length = 0;
1204 } 1240 }
1205 break; 1241 qp->s_ack_state = opcode;
1242 qp->s_ack_psn = psn;
1243 spin_unlock_irq(&qp->s_lock);
1244 tasklet_hi_schedule(&qp->s_task);
1245 goto send_ack;
1246 }
1247
1248 /*
1249 * A pending RDMA read will ACK anything before it so
1250 * ignore earlier duplicate requests.
1251 */
1252 if (qp->s_ack_state != OP(ACKNOWLEDGE))
1253 goto done;
1206 1254
1255 /*
1256 * If an ACK is pending, don't replace the pending ACK
1257 * with an earlier one since the later one will ACK the earlier.
1258 * Also, if we already have a pending atomic, send it.
1259 */
1260 if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
1261 (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
1262 qp->r_ack_state >= OP(COMPARE_SWAP)))
1263 goto send_ack;
1264 switch (opcode) {
1207 case OP(COMPARE_SWAP): 1265 case OP(COMPARE_SWAP):
1208 case OP(FETCH_ADD): 1266 case OP(FETCH_ADD):
1209 /* 1267 /*
1210 * Check for the PSN of the last atomic operation 1268 * Check for the PSN of the last atomic operation
1211 * performed and resend the result if found. 1269 * performed and resend the result if found.
1212 */ 1270 */
1213 if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) { 1271 if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn)
1214 spin_unlock(&qp->s_lock);
1215 goto done; 1272 goto done;
1216 }
1217 qp->s_ack_atomic = qp->r_atomic_data;
1218 break; 1273 break;
1219 } 1274 }
1220 qp->s_ack_state = opcode; 1275 qp->r_ack_state = opcode;
1221 qp->s_nak_state = 0; 1276 qp->r_nak_state = 0;
1222 qp->s_ack_psn = psn; 1277 qp->r_ack_psn = psn;
1223resched: 1278send_ack:
1224 return 0; 1279 return 0;
1225 1280
1226done: 1281done:
@@ -1248,7 +1303,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1248 u32 hdrsize; 1303 u32 hdrsize;
1249 u32 psn; 1304 u32 psn;
1250 u32 pad; 1305 u32 pad;
1251 unsigned long flags;
1252 struct ib_wc wc; 1306 struct ib_wc wc;
1253 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 1307 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
1254 int diff; 1308 int diff;
@@ -1289,18 +1343,16 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1289 opcode <= OP(ATOMIC_ACKNOWLEDGE)) { 1343 opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1290 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, 1344 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
1291 hdrsize, pmtu, header_in_data); 1345 hdrsize, pmtu, header_in_data);
1292 goto bail; 1346 goto done;
1293 } 1347 }
1294 1348
1295 spin_lock_irqsave(&qp->r_rq.lock, flags);
1296
1297 /* Compute 24 bits worth of difference. */ 1349 /* Compute 24 bits worth of difference. */
1298 diff = ipath_cmp24(psn, qp->r_psn); 1350 diff = ipath_cmp24(psn, qp->r_psn);
1299 if (unlikely(diff)) { 1351 if (unlikely(diff)) {
1300 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, 1352 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
1301 psn, diff, header_in_data)) 1353 psn, diff, header_in_data))
1302 goto done; 1354 goto done;
1303 goto resched; 1355 goto send_ack;
1304 } 1356 }
1305 1357
1306 /* Check for opcode sequence errors. */ 1358 /* Check for opcode sequence errors. */
@@ -1312,22 +1364,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1312 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1364 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1313 break; 1365 break;
1314 nack_inv: 1366 nack_inv:
1315 /* 1367 /*
1316 * A NAK will ACK earlier sends and RDMA writes. Don't queue the 1368 * A NAK will ACK earlier sends and RDMA writes.
1317 * NAK if a RDMA read, atomic, or NAK is pending though. 1369 * Don't queue the NAK if a RDMA read, atomic, or NAK
1318 */ 1370 * is pending though.
1319 spin_lock(&qp->s_lock); 1371 */
1320 if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) && 1372 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1321 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { 1373 goto send_ack;
1322 spin_unlock(&qp->s_lock); 1374 /* XXX Flush WQEs */
1323 goto done; 1375 qp->state = IB_QPS_ERR;
1324 } 1376 qp->r_ack_state = OP(SEND_ONLY);
1325 /* XXX Flush WQEs */ 1377 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1326 qp->state = IB_QPS_ERR; 1378 qp->r_ack_psn = qp->r_psn;
1327 qp->s_ack_state = OP(SEND_ONLY); 1379 goto send_ack;
1328 qp->s_nak_state = IB_NAK_INVALID_REQUEST;
1329 qp->s_ack_psn = qp->r_psn;
1330 goto resched;
1331 1380
1332 case OP(RDMA_WRITE_FIRST): 1381 case OP(RDMA_WRITE_FIRST):
1333 case OP(RDMA_WRITE_MIDDLE): 1382 case OP(RDMA_WRITE_MIDDLE):
@@ -1337,20 +1386,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1337 break; 1386 break;
1338 goto nack_inv; 1387 goto nack_inv;
1339 1388
1340 case OP(RDMA_READ_REQUEST):
1341 case OP(COMPARE_SWAP):
1342 case OP(FETCH_ADD):
1343 /*
1344 * Drop all new requests until a response has been sent. A
1345 * new request then ACKs the RDMA response we sent. Relaxed
1346 * ordering would allow new requests to be processed but we
1347 * would need to keep a queue of rwqe's for all that are in
1348 * progress. Note that we can't RNR NAK this request since
1349 * the RDMA READ or atomic response is already queued to be
1350 * sent (unless we implement a response send queue).
1351 */
1352 goto done;
1353
1354 default: 1389 default:
1355 if (opcode == OP(SEND_MIDDLE) || 1390 if (opcode == OP(SEND_MIDDLE) ||
1356 opcode == OP(SEND_LAST) || 1391 opcode == OP(SEND_LAST) ||
@@ -1359,6 +1394,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1359 opcode == OP(RDMA_WRITE_LAST) || 1394 opcode == OP(RDMA_WRITE_LAST) ||
1360 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1395 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1361 goto nack_inv; 1396 goto nack_inv;
1397 /*
1398 * Note that it is up to the requester to not send a new
1399 * RDMA read or atomic operation before receiving an ACK
1400 * for the previous operation.
1401 */
1362 break; 1402 break;
1363 } 1403 }
1364 1404
@@ -1375,17 +1415,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1375 * Don't queue the NAK if a RDMA read or atomic 1415 * Don't queue the NAK if a RDMA read or atomic
1376 * is pending though. 1416 * is pending though.
1377 */ 1417 */
1378 spin_lock(&qp->s_lock); 1418 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1379 if (qp->s_ack_state >= 1419 goto send_ack;
1380 OP(RDMA_READ_REQUEST) && 1420 qp->r_ack_state = OP(SEND_ONLY);
1381 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { 1421 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1382 spin_unlock(&qp->s_lock); 1422 qp->r_ack_psn = qp->r_psn;
1383 goto done; 1423 goto send_ack;
1384 }
1385 qp->s_ack_state = OP(SEND_ONLY);
1386 qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer;
1387 qp->s_ack_psn = qp->r_psn;
1388 goto resched;
1389 } 1424 }
1390 qp->r_rcv_len = 0; 1425 qp->r_rcv_len = 0;
1391 /* FALLTHROUGH */ 1426 /* FALLTHROUGH */
@@ -1442,7 +1477,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1442 if (unlikely(wc.byte_len > qp->r_len)) 1477 if (unlikely(wc.byte_len > qp->r_len))
1443 goto nack_inv; 1478 goto nack_inv;
1444 ipath_copy_sge(&qp->r_sge, data, tlen); 1479 ipath_copy_sge(&qp->r_sge, data, tlen);
1445 atomic_inc(&qp->msn); 1480 qp->r_msn++;
1446 if (opcode == OP(RDMA_WRITE_LAST) || 1481 if (opcode == OP(RDMA_WRITE_LAST) ||
1447 opcode == OP(RDMA_WRITE_ONLY)) 1482 opcode == OP(RDMA_WRITE_ONLY))
1448 break; 1483 break;
@@ -1486,29 +1521,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1486 ok = ipath_rkey_ok(dev, &qp->r_sge, 1521 ok = ipath_rkey_ok(dev, &qp->r_sge,
1487 qp->r_len, vaddr, rkey, 1522 qp->r_len, vaddr, rkey,
1488 IB_ACCESS_REMOTE_WRITE); 1523 IB_ACCESS_REMOTE_WRITE);
1489 if (unlikely(!ok)) { 1524 if (unlikely(!ok))
1490 nack_acc: 1525 goto nack_acc;
1491 /*
1492 * A NAK will ACK earlier sends and RDMA
1493 * writes. Don't queue the NAK if a RDMA
1494 * read, atomic, or NAK is pending though.
1495 */
1496 spin_lock(&qp->s_lock);
1497 if (qp->s_ack_state >=
1498 OP(RDMA_READ_REQUEST) &&
1499 qp->s_ack_state !=
1500 IB_OPCODE_ACKNOWLEDGE) {
1501 spin_unlock(&qp->s_lock);
1502 goto done;
1503 }
1504 /* XXX Flush WQEs */
1505 qp->state = IB_QPS_ERR;
1506 qp->s_ack_state = OP(RDMA_WRITE_ONLY);
1507 qp->s_nak_state =
1508 IB_NAK_REMOTE_ACCESS_ERROR;
1509 qp->s_ack_psn = qp->r_psn;
1510 goto resched;
1511 }
1512 } else { 1526 } else {
1513 qp->r_sge.sg_list = NULL; 1527 qp->r_sge.sg_list = NULL;
1514 qp->r_sge.sge.mr = NULL; 1528 qp->r_sge.sge.mr = NULL;
@@ -1535,12 +1549,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1535 reth = (struct ib_reth *)data; 1549 reth = (struct ib_reth *)data;
1536 data += sizeof(*reth); 1550 data += sizeof(*reth);
1537 } 1551 }
1538 spin_lock(&qp->s_lock); 1552 if (unlikely(!(qp->qp_access_flags &
1539 if (qp->s_ack_state != OP(ACKNOWLEDGE) && 1553 IB_ACCESS_REMOTE_READ)))
1540 qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) { 1554 goto nack_acc;
1541 spin_unlock(&qp->s_lock); 1555 spin_lock_irq(&qp->s_lock);
1542 goto done;
1543 }
1544 qp->s_rdma_len = be32_to_cpu(reth->length); 1556 qp->s_rdma_len = be32_to_cpu(reth->length);
1545 if (qp->s_rdma_len != 0) { 1557 if (qp->s_rdma_len != 0) {
1546 u32 rkey = be32_to_cpu(reth->rkey); 1558 u32 rkey = be32_to_cpu(reth->rkey);
@@ -1552,7 +1564,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1552 qp->s_rdma_len, vaddr, rkey, 1564 qp->s_rdma_len, vaddr, rkey,
1553 IB_ACCESS_REMOTE_READ); 1565 IB_ACCESS_REMOTE_READ);
1554 if (unlikely(!ok)) { 1566 if (unlikely(!ok)) {
1555 spin_unlock(&qp->s_lock); 1567 spin_unlock_irq(&qp->s_lock);
1556 goto nack_acc; 1568 goto nack_acc;
1557 } 1569 }
1558 /* 1570 /*
@@ -1569,21 +1581,25 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1569 qp->s_rdma_sge.sge.length = 0; 1581 qp->s_rdma_sge.sge.length = 0;
1570 qp->s_rdma_sge.sge.sge_length = 0; 1582 qp->s_rdma_sge.sge.sge_length = 0;
1571 } 1583 }
1572 if (unlikely(!(qp->qp_access_flags &
1573 IB_ACCESS_REMOTE_READ)))
1574 goto nack_acc;
1575 /* 1584 /*
1576 * We need to increment the MSN here instead of when we 1585 * We need to increment the MSN here instead of when we
1577 * finish sending the result since a duplicate request would 1586 * finish sending the result since a duplicate request would
1578 * increment it more than once. 1587 * increment it more than once.
1579 */ 1588 */
1580 atomic_inc(&qp->msn); 1589 qp->r_msn++;
1590
1581 qp->s_ack_state = opcode; 1591 qp->s_ack_state = opcode;
1582 qp->s_nak_state = 0;
1583 qp->s_ack_psn = psn; 1592 qp->s_ack_psn = psn;
1593 spin_unlock_irq(&qp->s_lock);
1594
1584 qp->r_psn++; 1595 qp->r_psn++;
1585 qp->r_state = opcode; 1596 qp->r_state = opcode;
1586 goto rdmadone; 1597 qp->r_nak_state = 0;
1598
1599 /* Call ipath_do_rc_send() in another thread. */
1600 tasklet_hi_schedule(&qp->s_task);
1601
1602 goto done;
1587 1603
1588 case OP(COMPARE_SWAP): 1604 case OP(COMPARE_SWAP):
1589 case OP(FETCH_ADD): { 1605 case OP(FETCH_ADD): {
@@ -1612,7 +1628,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1612 goto nack_acc; 1628 goto nack_acc;
1613 /* Perform atomic OP and save result. */ 1629 /* Perform atomic OP and save result. */
1614 sdata = be64_to_cpu(ateth->swap_data); 1630 sdata = be64_to_cpu(ateth->swap_data);
1615 spin_lock(&dev->pending_lock); 1631 spin_lock_irq(&dev->pending_lock);
1616 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 1632 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
1617 if (opcode == OP(FETCH_ADD)) 1633 if (opcode == OP(FETCH_ADD))
1618 *(u64 *) qp->r_sge.sge.vaddr = 1634 *(u64 *) qp->r_sge.sge.vaddr =
@@ -1620,8 +1636,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1620 else if (qp->r_atomic_data == 1636 else if (qp->r_atomic_data ==
1621 be64_to_cpu(ateth->compare_data)) 1637 be64_to_cpu(ateth->compare_data))
1622 *(u64 *) qp->r_sge.sge.vaddr = sdata; 1638 *(u64 *) qp->r_sge.sge.vaddr = sdata;
1623 spin_unlock(&dev->pending_lock); 1639 spin_unlock_irq(&dev->pending_lock);
1624 atomic_inc(&qp->msn); 1640 qp->r_msn++;
1625 qp->r_atomic_psn = psn & IPS_PSN_MASK; 1641 qp->r_atomic_psn = psn & IPS_PSN_MASK;
1626 psn |= 1 << 31; 1642 psn |= 1 << 31;
1627 break; 1643 break;
@@ -1633,44 +1649,39 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1633 } 1649 }
1634 qp->r_psn++; 1650 qp->r_psn++;
1635 qp->r_state = opcode; 1651 qp->r_state = opcode;
1652 qp->r_nak_state = 0;
1636 /* Send an ACK if requested or required. */ 1653 /* Send an ACK if requested or required. */
1637 if (psn & (1 << 31)) { 1654 if (psn & (1 << 31)) {
1638 /* 1655 /*
1639 * Coalesce ACKs unless there is a RDMA READ or 1656 * Coalesce ACKs unless there is a RDMA READ or
1640 * ATOMIC pending. 1657 * ATOMIC pending.
1641 */ 1658 */
1642 spin_lock(&qp->s_lock); 1659 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1643 if (qp->s_ack_state == OP(ACKNOWLEDGE) || 1660 qp->r_ack_state = opcode;
1644 qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) { 1661 qp->r_ack_psn = psn;
1645 qp->s_ack_state = opcode;
1646 qp->s_nak_state = 0;
1647 qp->s_ack_psn = psn;
1648 qp->s_ack_atomic = qp->r_atomic_data;
1649 goto resched;
1650 } 1662 }
1651 spin_unlock(&qp->s_lock); 1663 goto send_ack;
1652 } 1664 }
1653done: 1665 goto done;
1654 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1655 goto bail;
1656 1666
1657resched: 1667nack_acc:
1658 /* 1668 /*
1659 * Try to send ACK right away but not if ipath_do_rc_send() is 1669 * A NAK will ACK earlier sends and RDMA writes.
1660 * active. 1670 * Don't queue the NAK if a RDMA read, atomic, or NAK
1671 * is pending though.
1661 */ 1672 */
1662 if (qp->s_hdrwords == 0 && 1673 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1663 (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST || 1674 /* XXX Flush WQEs */
1664 qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP)) 1675 qp->state = IB_QPS_ERR;
1676 qp->r_ack_state = OP(RDMA_WRITE_ONLY);
1677 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1678 qp->r_ack_psn = qp->r_psn;
1679 }
1680send_ack:
1681 /* Send ACK right away unless the send tasklet has a pending ACK. */
1682 if (qp->s_ack_state == OP(ACKNOWLEDGE))
1665 send_rc_ack(qp); 1683 send_rc_ack(qp);
1666 1684
1667rdmadone: 1685done:
1668 spin_unlock(&qp->s_lock);
1669 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1670
1671 /* Call ipath_do_rc_send() in another thread. */
1672 tasklet_hi_schedule(&qp->s_task);
1673
1674bail:
1675 return; 1686 return;
1676} 1687}