diff options
author | Bryan O'Sullivan <bos@pathscale.com> | 2006-07-01 07:36:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-01 12:56:01 -0400 |
commit | 12eef41f8b72b6e11e36b48c78849c17e49781c8 (patch) | |
tree | 3a8bd77d77c7542e19d200d3abf25fc07f0d8f51 /drivers/infiniband/hw/ipath/ipath_rc.c | |
parent | fba75200ad92892bf32d8d6f1443c6f1e4f48676 (diff) |
[PATCH] IB/ipath: rC receive interrupt performance changes
This patch separates QP state used for sending and receiving RC packets so the
processing in the receive interrupt handler can be done mostly without locks
being held. ACK packets are now sent without requiring synchronization with
the send tasklet.
Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Cc: "Michael S. Tsirkin" <mst@mellanox.co.il>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_rc.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_rc.c | 383 |
1 files changed, 197 insertions, 186 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index bd2c405c4bf0..8568dd0538cf 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c | |||
@@ -42,7 +42,7 @@ | |||
42 | * @qp: the QP who's SGE we're restarting | 42 | * @qp: the QP who's SGE we're restarting |
43 | * @wqe: the work queue to initialize the QP's SGE from | 43 | * @wqe: the work queue to initialize the QP's SGE from |
44 | * | 44 | * |
45 | * The QP s_lock should be held. | 45 | * The QP s_lock should be held and interrupts disabled. |
46 | */ | 46 | */ |
47 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) | 47 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) |
48 | { | 48 | { |
@@ -77,7 +77,6 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp, | |||
77 | struct ipath_other_headers *ohdr, | 77 | struct ipath_other_headers *ohdr, |
78 | u32 pmtu) | 78 | u32 pmtu) |
79 | { | 79 | { |
80 | struct ipath_sge_state *ss; | ||
81 | u32 hwords; | 80 | u32 hwords; |
82 | u32 len; | 81 | u32 len; |
83 | u32 bth0; | 82 | u32 bth0; |
@@ -91,7 +90,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp, | |||
91 | */ | 90 | */ |
92 | switch (qp->s_ack_state) { | 91 | switch (qp->s_ack_state) { |
93 | case OP(RDMA_READ_REQUEST): | 92 | case OP(RDMA_READ_REQUEST): |
94 | ss = &qp->s_rdma_sge; | 93 | qp->s_cur_sge = &qp->s_rdma_sge; |
95 | len = qp->s_rdma_len; | 94 | len = qp->s_rdma_len; |
96 | if (len > pmtu) { | 95 | if (len > pmtu) { |
97 | len = pmtu; | 96 | len = pmtu; |
@@ -108,7 +107,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp, | |||
108 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); | 107 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); |
109 | /* FALLTHROUGH */ | 108 | /* FALLTHROUGH */ |
110 | case OP(RDMA_READ_RESPONSE_MIDDLE): | 109 | case OP(RDMA_READ_RESPONSE_MIDDLE): |
111 | ss = &qp->s_rdma_sge; | 110 | qp->s_cur_sge = &qp->s_rdma_sge; |
112 | len = qp->s_rdma_len; | 111 | len = qp->s_rdma_len; |
113 | if (len > pmtu) | 112 | if (len > pmtu) |
114 | len = pmtu; | 113 | len = pmtu; |
@@ -127,41 +126,50 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp, | |||
127 | * We have to prevent new requests from changing | 126 | * We have to prevent new requests from changing |
128 | * the r_sge state while a ipath_verbs_send() | 127 | * the r_sge state while a ipath_verbs_send() |
129 | * is in progress. | 128 | * is in progress. |
130 | * Changing r_state allows the receiver | ||
131 | * to continue processing new packets. | ||
132 | * We do it here now instead of above so | ||
133 | * that we are sure the packet was sent before | ||
134 | * changing the state. | ||
135 | */ | 129 | */ |
136 | qp->r_state = OP(RDMA_READ_RESPONSE_LAST); | ||
137 | qp->s_ack_state = OP(ACKNOWLEDGE); | 130 | qp->s_ack_state = OP(ACKNOWLEDGE); |
138 | return 0; | 131 | bth0 = 0; |
132 | goto bail; | ||
139 | 133 | ||
140 | case OP(COMPARE_SWAP): | 134 | case OP(COMPARE_SWAP): |
141 | case OP(FETCH_ADD): | 135 | case OP(FETCH_ADD): |
142 | ss = NULL; | 136 | qp->s_cur_sge = NULL; |
143 | len = 0; | 137 | len = 0; |
144 | qp->r_state = OP(SEND_LAST); | 138 | /* |
145 | qp->s_ack_state = OP(ACKNOWLEDGE); | 139 | * Set the s_ack_state so the receive interrupt handler |
146 | bth0 = IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; | 140 | * won't try to send an ACK (out of order) until this one |
141 | * is actually sent. | ||
142 | */ | ||
143 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | ||
144 | bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24; | ||
147 | ohdr->u.at.aeth = ipath_compute_aeth(qp); | 145 | ohdr->u.at.aeth = ipath_compute_aeth(qp); |
148 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); | 146 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); |
149 | hwords += sizeof(ohdr->u.at) / 4; | 147 | hwords += sizeof(ohdr->u.at) / 4; |
150 | break; | 148 | break; |
151 | 149 | ||
152 | default: | 150 | default: |
153 | /* Send a regular ACK. */ | 151 | /* Send a regular ACK. */ |
154 | ss = NULL; | 152 | qp->s_cur_sge = NULL; |
155 | len = 0; | 153 | len = 0; |
156 | qp->s_ack_state = OP(ACKNOWLEDGE); | 154 | /* |
157 | bth0 = qp->s_ack_state << 24; | 155 | * Set the s_ack_state so the receive interrupt handler |
158 | ohdr->u.aeth = ipath_compute_aeth(qp); | 156 | * won't try to send an ACK (out of order) until this one |
157 | * is actually sent. | ||
158 | */ | ||
159 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | ||
160 | bth0 = OP(ACKNOWLEDGE) << 24; | ||
161 | if (qp->s_nak_state) | ||
162 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPS_MSN_MASK) | | ||
163 | (qp->s_nak_state << | ||
164 | IPS_AETH_CREDIT_SHIFT)); | ||
165 | else | ||
166 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
159 | hwords++; | 167 | hwords++; |
160 | } | 168 | } |
161 | qp->s_hdrwords = hwords; | 169 | qp->s_hdrwords = hwords; |
162 | qp->s_cur_sge = ss; | ||
163 | qp->s_cur_size = len; | 170 | qp->s_cur_size = len; |
164 | 171 | ||
172 | bail: | ||
165 | return bth0; | 173 | return bth0; |
166 | } | 174 | } |
167 | 175 | ||
@@ -174,7 +182,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp, | |||
174 | * @bth2p: pointer to the BTH PSN word | 182 | * @bth2p: pointer to the BTH PSN word |
175 | * | 183 | * |
176 | * Return 1 if constructed; otherwise, return 0. | 184 | * Return 1 if constructed; otherwise, return 0. |
177 | * Note the QP s_lock must be held. | 185 | * Note the QP s_lock must be held and interrupts disabled. |
178 | */ | 186 | */ |
179 | int ipath_make_rc_req(struct ipath_qp *qp, | 187 | int ipath_make_rc_req(struct ipath_qp *qp, |
180 | struct ipath_other_headers *ohdr, | 188 | struct ipath_other_headers *ohdr, |
@@ -356,6 +364,11 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
356 | bth2 |= qp->s_psn++ & IPS_PSN_MASK; | 364 | bth2 |= qp->s_psn++ & IPS_PSN_MASK; |
357 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | 365 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) |
358 | qp->s_next_psn = qp->s_psn; | 366 | qp->s_next_psn = qp->s_psn; |
367 | /* | ||
368 | * Put the QP on the pending list so lost ACKs will cause | ||
369 | * a retry. More than one request can be pending so the | ||
370 | * QP may already be on the dev->pending list. | ||
371 | */ | ||
359 | spin_lock(&dev->pending_lock); | 372 | spin_lock(&dev->pending_lock); |
360 | if (list_empty(&qp->timerwait)) | 373 | if (list_empty(&qp->timerwait)) |
361 | list_add_tail(&qp->timerwait, | 374 | list_add_tail(&qp->timerwait, |
@@ -365,8 +378,8 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
365 | 378 | ||
366 | case OP(RDMA_READ_RESPONSE_FIRST): | 379 | case OP(RDMA_READ_RESPONSE_FIRST): |
367 | /* | 380 | /* |
368 | * This case can only happen if a send is restarted. See | 381 | * This case can only happen if a send is restarted. |
369 | * ipath_restart_rc(). | 382 | * See ipath_restart_rc(). |
370 | */ | 383 | */ |
371 | ipath_init_restart(qp, wqe); | 384 | ipath_init_restart(qp, wqe); |
372 | /* FALLTHROUGH */ | 385 | /* FALLTHROUGH */ |
@@ -526,11 +539,17 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
526 | ohdr = &hdr.u.l.oth; | 539 | ohdr = &hdr.u.l.oth; |
527 | lrh0 = IPS_LRH_GRH; | 540 | lrh0 = IPS_LRH_GRH; |
528 | } | 541 | } |
542 | /* read pkey_index w/o lock (its atomic) */ | ||
529 | bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); | 543 | bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); |
530 | ohdr->u.aeth = ipath_compute_aeth(qp); | 544 | if (qp->r_nak_state) |
531 | if (qp->s_ack_state >= OP(COMPARE_SWAP)) { | 545 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPS_MSN_MASK) | |
532 | bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; | 546 | (qp->r_nak_state << |
533 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); | 547 | IPS_AETH_CREDIT_SHIFT)); |
548 | else | ||
549 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
550 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) { | ||
551 | bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24; | ||
552 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); | ||
534 | hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; | 553 | hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; |
535 | } else | 554 | } else |
536 | bth0 |= OP(ACKNOWLEDGE) << 24; | 555 | bth0 |= OP(ACKNOWLEDGE) << 24; |
@@ -541,15 +560,36 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
541 | hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); | 560 | hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); |
542 | ohdr->bth[0] = cpu_to_be32(bth0); | 561 | ohdr->bth[0] = cpu_to_be32(bth0); |
543 | ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); | 562 | ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); |
544 | ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK); | 563 | ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPS_PSN_MASK); |
545 | 564 | ||
546 | /* | 565 | /* |
547 | * If we can send the ACK, clear the ACK state. | 566 | * If we can send the ACK, clear the ACK state. |
548 | */ | 567 | */ |
549 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { | 568 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { |
550 | qp->s_ack_state = OP(ACKNOWLEDGE); | 569 | qp->r_ack_state = OP(ACKNOWLEDGE); |
551 | dev->n_rc_qacks++; | ||
552 | dev->n_unicast_xmit++; | 570 | dev->n_unicast_xmit++; |
571 | } else { | ||
572 | /* | ||
573 | * We are out of PIO buffers at the moment. | ||
574 | * Pass responsibility for sending the ACK to the | ||
575 | * send tasklet so that when a PIO buffer becomes | ||
576 | * available, the ACK is sent ahead of other outgoing | ||
577 | * packets. | ||
578 | */ | ||
579 | dev->n_rc_qacks++; | ||
580 | spin_lock_irq(&qp->s_lock); | ||
581 | /* Don't coalesce if a RDMA read or atomic is pending. */ | ||
582 | if (qp->s_ack_state == OP(ACKNOWLEDGE) || | ||
583 | qp->s_ack_state < OP(RDMA_READ_REQUEST)) { | ||
584 | qp->s_ack_state = qp->r_ack_state; | ||
585 | qp->s_nak_state = qp->r_nak_state; | ||
586 | qp->s_ack_psn = qp->r_ack_psn; | ||
587 | qp->r_ack_state = OP(ACKNOWLEDGE); | ||
588 | } | ||
589 | spin_unlock_irq(&qp->s_lock); | ||
590 | |||
591 | /* Call ipath_do_rc_send() in another thread. */ | ||
592 | tasklet_hi_schedule(&qp->s_task); | ||
553 | } | 593 | } |
554 | } | 594 | } |
555 | 595 | ||
@@ -641,7 +681,7 @@ done: | |||
641 | * @psn: packet sequence number for the request | 681 | * @psn: packet sequence number for the request |
642 | * @wc: the work completion request | 682 | * @wc: the work completion request |
643 | * | 683 | * |
644 | * The QP s_lock should be held. | 684 | * The QP s_lock should be held and interrupts disabled. |
645 | */ | 685 | */ |
646 | void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) | 686 | void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) |
647 | { | 687 | { |
@@ -705,7 +745,7 @@ bail: | |||
705 | * | 745 | * |
706 | * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK | 746 | * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK |
707 | * for the given QP. | 747 | * for the given QP. |
708 | * Called at interrupt level with the QP s_lock held. | 748 | * Called at interrupt level with the QP s_lock held and interrupts disabled. |
709 | * Returns 1 if OK, 0 if current operation should be aborted (NAK). | 749 | * Returns 1 if OK, 0 if current operation should be aborted (NAK). |
710 | */ | 750 | */ |
711 | static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | 751 | static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) |
@@ -1126,18 +1166,16 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1126 | * Don't queue the NAK if a RDMA read, atomic, or | 1166 | * Don't queue the NAK if a RDMA read, atomic, or |
1127 | * NAK is pending though. | 1167 | * NAK is pending though. |
1128 | */ | 1168 | */ |
1129 | spin_lock(&qp->s_lock); | 1169 | if (qp->s_ack_state != OP(ACKNOWLEDGE) || |
1130 | if ((qp->s_ack_state >= OP(RDMA_READ_REQUEST) && | 1170 | qp->r_nak_state != 0) |
1131 | qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) || | ||
1132 | qp->s_nak_state != 0) { | ||
1133 | spin_unlock(&qp->s_lock); | ||
1134 | goto done; | 1171 | goto done; |
1172 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1173 | qp->r_ack_state = OP(SEND_ONLY); | ||
1174 | qp->r_nak_state = IB_NAK_PSN_ERROR; | ||
1175 | /* Use the expected PSN. */ | ||
1176 | qp->r_ack_psn = qp->r_psn; | ||
1135 | } | 1177 | } |
1136 | qp->s_ack_state = OP(SEND_ONLY); | 1178 | goto send_ack; |
1137 | qp->s_nak_state = IB_NAK_PSN_ERROR; | ||
1138 | /* Use the expected PSN. */ | ||
1139 | qp->s_ack_psn = qp->r_psn; | ||
1140 | goto resched; | ||
1141 | } | 1179 | } |
1142 | 1180 | ||
1143 | /* | 1181 | /* |
@@ -1151,27 +1189,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1151 | * send the earliest so that RDMA reads can be restarted at | 1189 | * send the earliest so that RDMA reads can be restarted at |
1152 | * the requester's expected PSN. | 1190 | * the requester's expected PSN. |
1153 | */ | 1191 | */ |
1154 | spin_lock(&qp->s_lock); | 1192 | if (opcode == OP(RDMA_READ_REQUEST)) { |
1155 | if (qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE && | ||
1156 | ipath_cmp24(psn, qp->s_ack_psn) >= 0) { | ||
1157 | if (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) | ||
1158 | qp->s_ack_psn = psn; | ||
1159 | spin_unlock(&qp->s_lock); | ||
1160 | goto done; | ||
1161 | } | ||
1162 | switch (opcode) { | ||
1163 | case OP(RDMA_READ_REQUEST): | ||
1164 | /* | ||
1165 | * We have to be careful to not change s_rdma_sge | ||
1166 | * while ipath_do_rc_send() is using it and not | ||
1167 | * holding the s_lock. | ||
1168 | */ | ||
1169 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | ||
1170 | qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) { | ||
1171 | spin_unlock(&qp->s_lock); | ||
1172 | dev->n_rdma_dup_busy++; | ||
1173 | goto done; | ||
1174 | } | ||
1175 | /* RETH comes after BTH */ | 1193 | /* RETH comes after BTH */ |
1176 | if (!header_in_data) | 1194 | if (!header_in_data) |
1177 | reth = &ohdr->u.rc.reth; | 1195 | reth = &ohdr->u.rc.reth; |
@@ -1179,6 +1197,22 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1179 | reth = (struct ib_reth *)data; | 1197 | reth = (struct ib_reth *)data; |
1180 | data += sizeof(*reth); | 1198 | data += sizeof(*reth); |
1181 | } | 1199 | } |
1200 | /* | ||
1201 | * If we receive a duplicate RDMA request, it means the | ||
1202 | * requester saw a sequence error and needs to restart | ||
1203 | * from an earlier point. We can abort the current | ||
1204 | * RDMA read send in that case. | ||
1205 | */ | ||
1206 | spin_lock_irq(&qp->s_lock); | ||
1207 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | ||
1208 | (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { | ||
1209 | /* | ||
1210 | * We are already sending earlier requested data. | ||
1211 | * Don't abort it to send later out of sequence data. | ||
1212 | */ | ||
1213 | spin_unlock_irq(&qp->s_lock); | ||
1214 | goto done; | ||
1215 | } | ||
1182 | qp->s_rdma_len = be32_to_cpu(reth->length); | 1216 | qp->s_rdma_len = be32_to_cpu(reth->length); |
1183 | if (qp->s_rdma_len != 0) { | 1217 | if (qp->s_rdma_len != 0) { |
1184 | u32 rkey = be32_to_cpu(reth->rkey); | 1218 | u32 rkey = be32_to_cpu(reth->rkey); |
@@ -1192,8 +1226,10 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1192 | ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, | 1226 | ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, |
1193 | qp->s_rdma_len, vaddr, rkey, | 1227 | qp->s_rdma_len, vaddr, rkey, |
1194 | IB_ACCESS_REMOTE_READ); | 1228 | IB_ACCESS_REMOTE_READ); |
1195 | if (unlikely(!ok)) | 1229 | if (unlikely(!ok)) { |
1230 | spin_unlock_irq(&qp->s_lock); | ||
1196 | goto done; | 1231 | goto done; |
1232 | } | ||
1197 | } else { | 1233 | } else { |
1198 | qp->s_rdma_sge.sg_list = NULL; | 1234 | qp->s_rdma_sge.sg_list = NULL; |
1199 | qp->s_rdma_sge.num_sge = 0; | 1235 | qp->s_rdma_sge.num_sge = 0; |
@@ -1202,25 +1238,44 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1202 | qp->s_rdma_sge.sge.length = 0; | 1238 | qp->s_rdma_sge.sge.length = 0; |
1203 | qp->s_rdma_sge.sge.sge_length = 0; | 1239 | qp->s_rdma_sge.sge.sge_length = 0; |
1204 | } | 1240 | } |
1205 | break; | 1241 | qp->s_ack_state = opcode; |
1242 | qp->s_ack_psn = psn; | ||
1243 | spin_unlock_irq(&qp->s_lock); | ||
1244 | tasklet_hi_schedule(&qp->s_task); | ||
1245 | goto send_ack; | ||
1246 | } | ||
1247 | |||
1248 | /* | ||
1249 | * A pending RDMA read will ACK anything before it so | ||
1250 | * ignore earlier duplicate requests. | ||
1251 | */ | ||
1252 | if (qp->s_ack_state != OP(ACKNOWLEDGE)) | ||
1253 | goto done; | ||
1206 | 1254 | ||
1255 | /* | ||
1256 | * If an ACK is pending, don't replace the pending ACK | ||
1257 | * with an earlier one since the later one will ACK the earlier. | ||
1258 | * Also, if we already have a pending atomic, send it. | ||
1259 | */ | ||
1260 | if (qp->r_ack_state != OP(ACKNOWLEDGE) && | ||
1261 | (ipath_cmp24(psn, qp->r_ack_psn) <= 0 || | ||
1262 | qp->r_ack_state >= OP(COMPARE_SWAP))) | ||
1263 | goto send_ack; | ||
1264 | switch (opcode) { | ||
1207 | case OP(COMPARE_SWAP): | 1265 | case OP(COMPARE_SWAP): |
1208 | case OP(FETCH_ADD): | 1266 | case OP(FETCH_ADD): |
1209 | /* | 1267 | /* |
1210 | * Check for the PSN of the last atomic operation | 1268 | * Check for the PSN of the last atomic operation |
1211 | * performed and resend the result if found. | 1269 | * performed and resend the result if found. |
1212 | */ | 1270 | */ |
1213 | if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) { | 1271 | if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) |
1214 | spin_unlock(&qp->s_lock); | ||
1215 | goto done; | 1272 | goto done; |
1216 | } | ||
1217 | qp->s_ack_atomic = qp->r_atomic_data; | ||
1218 | break; | 1273 | break; |
1219 | } | 1274 | } |
1220 | qp->s_ack_state = opcode; | 1275 | qp->r_ack_state = opcode; |
1221 | qp->s_nak_state = 0; | 1276 | qp->r_nak_state = 0; |
1222 | qp->s_ack_psn = psn; | 1277 | qp->r_ack_psn = psn; |
1223 | resched: | 1278 | send_ack: |
1224 | return 0; | 1279 | return 0; |
1225 | 1280 | ||
1226 | done: | 1281 | done: |
@@ -1248,7 +1303,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1248 | u32 hdrsize; | 1303 | u32 hdrsize; |
1249 | u32 psn; | 1304 | u32 psn; |
1250 | u32 pad; | 1305 | u32 pad; |
1251 | unsigned long flags; | ||
1252 | struct ib_wc wc; | 1306 | struct ib_wc wc; |
1253 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); | 1307 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); |
1254 | int diff; | 1308 | int diff; |
@@ -1289,18 +1343,16 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1289 | opcode <= OP(ATOMIC_ACKNOWLEDGE)) { | 1343 | opcode <= OP(ATOMIC_ACKNOWLEDGE)) { |
1290 | ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, | 1344 | ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, |
1291 | hdrsize, pmtu, header_in_data); | 1345 | hdrsize, pmtu, header_in_data); |
1292 | goto bail; | 1346 | goto done; |
1293 | } | 1347 | } |
1294 | 1348 | ||
1295 | spin_lock_irqsave(&qp->r_rq.lock, flags); | ||
1296 | |||
1297 | /* Compute 24 bits worth of difference. */ | 1349 | /* Compute 24 bits worth of difference. */ |
1298 | diff = ipath_cmp24(psn, qp->r_psn); | 1350 | diff = ipath_cmp24(psn, qp->r_psn); |
1299 | if (unlikely(diff)) { | 1351 | if (unlikely(diff)) { |
1300 | if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, | 1352 | if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, |
1301 | psn, diff, header_in_data)) | 1353 | psn, diff, header_in_data)) |
1302 | goto done; | 1354 | goto done; |
1303 | goto resched; | 1355 | goto send_ack; |
1304 | } | 1356 | } |
1305 | 1357 | ||
1306 | /* Check for opcode sequence errors. */ | 1358 | /* Check for opcode sequence errors. */ |
@@ -1312,22 +1364,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1312 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) | 1364 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) |
1313 | break; | 1365 | break; |
1314 | nack_inv: | 1366 | nack_inv: |
1315 | /* | 1367 | /* |
1316 | * A NAK will ACK earlier sends and RDMA writes. Don't queue the | 1368 | * A NAK will ACK earlier sends and RDMA writes. |
1317 | * NAK if a RDMA read, atomic, or NAK is pending though. | 1369 | * Don't queue the NAK if a RDMA read, atomic, or NAK |
1318 | */ | 1370 | * is pending though. |
1319 | spin_lock(&qp->s_lock); | 1371 | */ |
1320 | if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) && | 1372 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) |
1321 | qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { | 1373 | goto send_ack; |
1322 | spin_unlock(&qp->s_lock); | 1374 | /* XXX Flush WQEs */ |
1323 | goto done; | 1375 | qp->state = IB_QPS_ERR; |
1324 | } | 1376 | qp->r_ack_state = OP(SEND_ONLY); |
1325 | /* XXX Flush WQEs */ | 1377 | qp->r_nak_state = IB_NAK_INVALID_REQUEST; |
1326 | qp->state = IB_QPS_ERR; | 1378 | qp->r_ack_psn = qp->r_psn; |
1327 | qp->s_ack_state = OP(SEND_ONLY); | 1379 | goto send_ack; |
1328 | qp->s_nak_state = IB_NAK_INVALID_REQUEST; | ||
1329 | qp->s_ack_psn = qp->r_psn; | ||
1330 | goto resched; | ||
1331 | 1380 | ||
1332 | case OP(RDMA_WRITE_FIRST): | 1381 | case OP(RDMA_WRITE_FIRST): |
1333 | case OP(RDMA_WRITE_MIDDLE): | 1382 | case OP(RDMA_WRITE_MIDDLE): |
@@ -1337,20 +1386,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1337 | break; | 1386 | break; |
1338 | goto nack_inv; | 1387 | goto nack_inv; |
1339 | 1388 | ||
1340 | case OP(RDMA_READ_REQUEST): | ||
1341 | case OP(COMPARE_SWAP): | ||
1342 | case OP(FETCH_ADD): | ||
1343 | /* | ||
1344 | * Drop all new requests until a response has been sent. A | ||
1345 | * new request then ACKs the RDMA response we sent. Relaxed | ||
1346 | * ordering would allow new requests to be processed but we | ||
1347 | * would need to keep a queue of rwqe's for all that are in | ||
1348 | * progress. Note that we can't RNR NAK this request since | ||
1349 | * the RDMA READ or atomic response is already queued to be | ||
1350 | * sent (unless we implement a response send queue). | ||
1351 | */ | ||
1352 | goto done; | ||
1353 | |||
1354 | default: | 1389 | default: |
1355 | if (opcode == OP(SEND_MIDDLE) || | 1390 | if (opcode == OP(SEND_MIDDLE) || |
1356 | opcode == OP(SEND_LAST) || | 1391 | opcode == OP(SEND_LAST) || |
@@ -1359,6 +1394,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1359 | opcode == OP(RDMA_WRITE_LAST) || | 1394 | opcode == OP(RDMA_WRITE_LAST) || |
1360 | opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) | 1395 | opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) |
1361 | goto nack_inv; | 1396 | goto nack_inv; |
1397 | /* | ||
1398 | * Note that it is up to the requester to not send a new | ||
1399 | * RDMA read or atomic operation before receiving an ACK | ||
1400 | * for the previous operation. | ||
1401 | */ | ||
1362 | break; | 1402 | break; |
1363 | } | 1403 | } |
1364 | 1404 | ||
@@ -1375,17 +1415,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1375 | * Don't queue the NAK if a RDMA read or atomic | 1415 | * Don't queue the NAK if a RDMA read or atomic |
1376 | * is pending though. | 1416 | * is pending though. |
1377 | */ | 1417 | */ |
1378 | spin_lock(&qp->s_lock); | 1418 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) |
1379 | if (qp->s_ack_state >= | 1419 | goto send_ack; |
1380 | OP(RDMA_READ_REQUEST) && | 1420 | qp->r_ack_state = OP(SEND_ONLY); |
1381 | qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { | 1421 | qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; |
1382 | spin_unlock(&qp->s_lock); | 1422 | qp->r_ack_psn = qp->r_psn; |
1383 | goto done; | 1423 | goto send_ack; |
1384 | } | ||
1385 | qp->s_ack_state = OP(SEND_ONLY); | ||
1386 | qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer; | ||
1387 | qp->s_ack_psn = qp->r_psn; | ||
1388 | goto resched; | ||
1389 | } | 1424 | } |
1390 | qp->r_rcv_len = 0; | 1425 | qp->r_rcv_len = 0; |
1391 | /* FALLTHROUGH */ | 1426 | /* FALLTHROUGH */ |
@@ -1442,7 +1477,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1442 | if (unlikely(wc.byte_len > qp->r_len)) | 1477 | if (unlikely(wc.byte_len > qp->r_len)) |
1443 | goto nack_inv; | 1478 | goto nack_inv; |
1444 | ipath_copy_sge(&qp->r_sge, data, tlen); | 1479 | ipath_copy_sge(&qp->r_sge, data, tlen); |
1445 | atomic_inc(&qp->msn); | 1480 | qp->r_msn++; |
1446 | if (opcode == OP(RDMA_WRITE_LAST) || | 1481 | if (opcode == OP(RDMA_WRITE_LAST) || |
1447 | opcode == OP(RDMA_WRITE_ONLY)) | 1482 | opcode == OP(RDMA_WRITE_ONLY)) |
1448 | break; | 1483 | break; |
@@ -1486,29 +1521,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1486 | ok = ipath_rkey_ok(dev, &qp->r_sge, | 1521 | ok = ipath_rkey_ok(dev, &qp->r_sge, |
1487 | qp->r_len, vaddr, rkey, | 1522 | qp->r_len, vaddr, rkey, |
1488 | IB_ACCESS_REMOTE_WRITE); | 1523 | IB_ACCESS_REMOTE_WRITE); |
1489 | if (unlikely(!ok)) { | 1524 | if (unlikely(!ok)) |
1490 | nack_acc: | 1525 | goto nack_acc; |
1491 | /* | ||
1492 | * A NAK will ACK earlier sends and RDMA | ||
1493 | * writes. Don't queue the NAK if a RDMA | ||
1494 | * read, atomic, or NAK is pending though. | ||
1495 | */ | ||
1496 | spin_lock(&qp->s_lock); | ||
1497 | if (qp->s_ack_state >= | ||
1498 | OP(RDMA_READ_REQUEST) && | ||
1499 | qp->s_ack_state != | ||
1500 | IB_OPCODE_ACKNOWLEDGE) { | ||
1501 | spin_unlock(&qp->s_lock); | ||
1502 | goto done; | ||
1503 | } | ||
1504 | /* XXX Flush WQEs */ | ||
1505 | qp->state = IB_QPS_ERR; | ||
1506 | qp->s_ack_state = OP(RDMA_WRITE_ONLY); | ||
1507 | qp->s_nak_state = | ||
1508 | IB_NAK_REMOTE_ACCESS_ERROR; | ||
1509 | qp->s_ack_psn = qp->r_psn; | ||
1510 | goto resched; | ||
1511 | } | ||
1512 | } else { | 1526 | } else { |
1513 | qp->r_sge.sg_list = NULL; | 1527 | qp->r_sge.sg_list = NULL; |
1514 | qp->r_sge.sge.mr = NULL; | 1528 | qp->r_sge.sge.mr = NULL; |
@@ -1535,12 +1549,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1535 | reth = (struct ib_reth *)data; | 1549 | reth = (struct ib_reth *)data; |
1536 | data += sizeof(*reth); | 1550 | data += sizeof(*reth); |
1537 | } | 1551 | } |
1538 | spin_lock(&qp->s_lock); | 1552 | if (unlikely(!(qp->qp_access_flags & |
1539 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | 1553 | IB_ACCESS_REMOTE_READ))) |
1540 | qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) { | 1554 | goto nack_acc; |
1541 | spin_unlock(&qp->s_lock); | 1555 | spin_lock_irq(&qp->s_lock); |
1542 | goto done; | ||
1543 | } | ||
1544 | qp->s_rdma_len = be32_to_cpu(reth->length); | 1556 | qp->s_rdma_len = be32_to_cpu(reth->length); |
1545 | if (qp->s_rdma_len != 0) { | 1557 | if (qp->s_rdma_len != 0) { |
1546 | u32 rkey = be32_to_cpu(reth->rkey); | 1558 | u32 rkey = be32_to_cpu(reth->rkey); |
@@ -1552,7 +1564,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1552 | qp->s_rdma_len, vaddr, rkey, | 1564 | qp->s_rdma_len, vaddr, rkey, |
1553 | IB_ACCESS_REMOTE_READ); | 1565 | IB_ACCESS_REMOTE_READ); |
1554 | if (unlikely(!ok)) { | 1566 | if (unlikely(!ok)) { |
1555 | spin_unlock(&qp->s_lock); | 1567 | spin_unlock_irq(&qp->s_lock); |
1556 | goto nack_acc; | 1568 | goto nack_acc; |
1557 | } | 1569 | } |
1558 | /* | 1570 | /* |
@@ -1569,21 +1581,25 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1569 | qp->s_rdma_sge.sge.length = 0; | 1581 | qp->s_rdma_sge.sge.length = 0; |
1570 | qp->s_rdma_sge.sge.sge_length = 0; | 1582 | qp->s_rdma_sge.sge.sge_length = 0; |
1571 | } | 1583 | } |
1572 | if (unlikely(!(qp->qp_access_flags & | ||
1573 | IB_ACCESS_REMOTE_READ))) | ||
1574 | goto nack_acc; | ||
1575 | /* | 1584 | /* |
1576 | * We need to increment the MSN here instead of when we | 1585 | * We need to increment the MSN here instead of when we |
1577 | * finish sending the result since a duplicate request would | 1586 | * finish sending the result since a duplicate request would |
1578 | * increment it more than once. | 1587 | * increment it more than once. |
1579 | */ | 1588 | */ |
1580 | atomic_inc(&qp->msn); | 1589 | qp->r_msn++; |
1590 | |||
1581 | qp->s_ack_state = opcode; | 1591 | qp->s_ack_state = opcode; |
1582 | qp->s_nak_state = 0; | ||
1583 | qp->s_ack_psn = psn; | 1592 | qp->s_ack_psn = psn; |
1593 | spin_unlock_irq(&qp->s_lock); | ||
1594 | |||
1584 | qp->r_psn++; | 1595 | qp->r_psn++; |
1585 | qp->r_state = opcode; | 1596 | qp->r_state = opcode; |
1586 | goto rdmadone; | 1597 | qp->r_nak_state = 0; |
1598 | |||
1599 | /* Call ipath_do_rc_send() in another thread. */ | ||
1600 | tasklet_hi_schedule(&qp->s_task); | ||
1601 | |||
1602 | goto done; | ||
1587 | 1603 | ||
1588 | case OP(COMPARE_SWAP): | 1604 | case OP(COMPARE_SWAP): |
1589 | case OP(FETCH_ADD): { | 1605 | case OP(FETCH_ADD): { |
@@ -1612,7 +1628,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1612 | goto nack_acc; | 1628 | goto nack_acc; |
1613 | /* Perform atomic OP and save result. */ | 1629 | /* Perform atomic OP and save result. */ |
1614 | sdata = be64_to_cpu(ateth->swap_data); | 1630 | sdata = be64_to_cpu(ateth->swap_data); |
1615 | spin_lock(&dev->pending_lock); | 1631 | spin_lock_irq(&dev->pending_lock); |
1616 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; | 1632 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; |
1617 | if (opcode == OP(FETCH_ADD)) | 1633 | if (opcode == OP(FETCH_ADD)) |
1618 | *(u64 *) qp->r_sge.sge.vaddr = | 1634 | *(u64 *) qp->r_sge.sge.vaddr = |
@@ -1620,8 +1636,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1620 | else if (qp->r_atomic_data == | 1636 | else if (qp->r_atomic_data == |
1621 | be64_to_cpu(ateth->compare_data)) | 1637 | be64_to_cpu(ateth->compare_data)) |
1622 | *(u64 *) qp->r_sge.sge.vaddr = sdata; | 1638 | *(u64 *) qp->r_sge.sge.vaddr = sdata; |
1623 | spin_unlock(&dev->pending_lock); | 1639 | spin_unlock_irq(&dev->pending_lock); |
1624 | atomic_inc(&qp->msn); | 1640 | qp->r_msn++; |
1625 | qp->r_atomic_psn = psn & IPS_PSN_MASK; | 1641 | qp->r_atomic_psn = psn & IPS_PSN_MASK; |
1626 | psn |= 1 << 31; | 1642 | psn |= 1 << 31; |
1627 | break; | 1643 | break; |
@@ -1633,44 +1649,39 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1633 | } | 1649 | } |
1634 | qp->r_psn++; | 1650 | qp->r_psn++; |
1635 | qp->r_state = opcode; | 1651 | qp->r_state = opcode; |
1652 | qp->r_nak_state = 0; | ||
1636 | /* Send an ACK if requested or required. */ | 1653 | /* Send an ACK if requested or required. */ |
1637 | if (psn & (1 << 31)) { | 1654 | if (psn & (1 << 31)) { |
1638 | /* | 1655 | /* |
1639 | * Coalesce ACKs unless there is a RDMA READ or | 1656 | * Coalesce ACKs unless there is a RDMA READ or |
1640 | * ATOMIC pending. | 1657 | * ATOMIC pending. |
1641 | */ | 1658 | */ |
1642 | spin_lock(&qp->s_lock); | 1659 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { |
1643 | if (qp->s_ack_state == OP(ACKNOWLEDGE) || | 1660 | qp->r_ack_state = opcode; |
1644 | qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) { | 1661 | qp->r_ack_psn = psn; |
1645 | qp->s_ack_state = opcode; | ||
1646 | qp->s_nak_state = 0; | ||
1647 | qp->s_ack_psn = psn; | ||
1648 | qp->s_ack_atomic = qp->r_atomic_data; | ||
1649 | goto resched; | ||
1650 | } | 1662 | } |
1651 | spin_unlock(&qp->s_lock); | 1663 | goto send_ack; |
1652 | } | 1664 | } |
1653 | done: | 1665 | goto done; |
1654 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
1655 | goto bail; | ||
1656 | 1666 | ||
1657 | resched: | 1667 | nack_acc: |
1658 | /* | 1668 | /* |
1659 | * Try to send ACK right away but not if ipath_do_rc_send() is | 1669 | * A NAK will ACK earlier sends and RDMA writes. |
1660 | * active. | 1670 | * Don't queue the NAK if a RDMA read, atomic, or NAK |
1671 | * is pending though. | ||
1661 | */ | 1672 | */ |
1662 | if (qp->s_hdrwords == 0 && | 1673 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { |
1663 | (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST || | 1674 | /* XXX Flush WQEs */ |
1664 | qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP)) | 1675 | qp->state = IB_QPS_ERR; |
1676 | qp->r_ack_state = OP(RDMA_WRITE_ONLY); | ||
1677 | qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; | ||
1678 | qp->r_ack_psn = qp->r_psn; | ||
1679 | } | ||
1680 | send_ack: | ||
1681 | /* Send ACK right away unless the send tasklet has a pending ACK. */ | ||
1682 | if (qp->s_ack_state == OP(ACKNOWLEDGE)) | ||
1665 | send_rc_ack(qp); | 1683 | send_rc_ack(qp); |
1666 | 1684 | ||
1667 | rdmadone: | 1685 | done: |
1668 | spin_unlock(&qp->s_lock); | ||
1669 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
1670 | |||
1671 | /* Call ipath_do_rc_send() in another thread. */ | ||
1672 | tasklet_hi_schedule(&qp->s_task); | ||
1673 | |||
1674 | bail: | ||
1675 | return; | 1686 | return; |
1676 | } | 1687 | } |