diff options
author | Ralph Campbell <ralph.campbell@qlogic.com> | 2007-03-15 17:44:51 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-04-18 23:20:55 -0400 |
commit | 3859e39d75b72f35f7d38c618fbbacb39a440c22 (patch) | |
tree | 51d57723574395b54914c08260b9d0a8467a91b1 /drivers/infiniband/hw/ipath/ipath_rc.c | |
parent | 7b21d26ddad6912bf345e8e88a51a5ce98a036ad (diff) |
IB/ipath: Support larger IB_QP_MAX_DEST_RD_ATOMIC and IB_QP_MAX_QP_RD_ATOMIC
This patch adds support for multiple RDMA reads and atomics to be sent
before an ACK is required to be seen by the requester.
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_rc.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_rc.c | 800 |
1 files changed, 454 insertions, 346 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5ff20cb0449..c9c3d7cd292 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c | |||
@@ -37,6 +37,19 @@ | |||
37 | /* cut down ridiculously long IB macro names */ | 37 | /* cut down ridiculously long IB macro names */ |
38 | #define OP(x) IB_OPCODE_RC_##x | 38 | #define OP(x) IB_OPCODE_RC_##x |
39 | 39 | ||
40 | static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, | ||
41 | u32 psn, u32 pmtu) | ||
42 | { | ||
43 | u32 len; | ||
44 | |||
45 | len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; | ||
46 | ss->sge = wqe->sg_list[0]; | ||
47 | ss->sg_list = wqe->sg_list + 1; | ||
48 | ss->num_sge = wqe->wr.num_sge; | ||
49 | ipath_skip_sge(ss, len); | ||
50 | return wqe->length - len; | ||
51 | } | ||
52 | |||
40 | /** | 53 | /** |
41 | * ipath_init_restart- initialize the qp->s_sge after a restart | 54 | * ipath_init_restart- initialize the qp->s_sge after a restart |
42 | * @qp: the QP who's SGE we're restarting | 55 | * @qp: the QP who's SGE we're restarting |
@@ -47,15 +60,9 @@ | |||
47 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) | 60 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) |
48 | { | 61 | { |
49 | struct ipath_ibdev *dev; | 62 | struct ipath_ibdev *dev; |
50 | u32 len; | ||
51 | 63 | ||
52 | len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * | 64 | qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, |
53 | ib_mtu_enum_to_int(qp->path_mtu); | 65 | ib_mtu_enum_to_int(qp->path_mtu)); |
54 | qp->s_sge.sge = wqe->sg_list[0]; | ||
55 | qp->s_sge.sg_list = wqe->sg_list + 1; | ||
56 | qp->s_sge.num_sge = wqe->wr.num_sge; | ||
57 | ipath_skip_sge(&qp->s_sge, len); | ||
58 | qp->s_len = wqe->length - len; | ||
59 | dev = to_idev(qp->ibqp.device); | 66 | dev = to_idev(qp->ibqp.device); |
60 | spin_lock(&dev->pending_lock); | 67 | spin_lock(&dev->pending_lock); |
61 | if (list_empty(&qp->timerwait)) | 68 | if (list_empty(&qp->timerwait)) |
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) | |||
70 | * @ohdr: a pointer to the IB header being constructed | 77 | * @ohdr: a pointer to the IB header being constructed |
71 | * @pmtu: the path MTU | 78 | * @pmtu: the path MTU |
72 | * | 79 | * |
73 | * Return bth0 if constructed; otherwise, return 0. | 80 | * Return 1 if constructed; otherwise, return 0. |
81 | * Note that we are in the responder's side of the QP context. | ||
74 | * Note the QP s_lock must be held. | 82 | * Note the QP s_lock must be held. |
75 | */ | 83 | */ |
76 | u32 ipath_make_rc_ack(struct ipath_qp *qp, | 84 | static int ipath_make_rc_ack(struct ipath_qp *qp, |
77 | struct ipath_other_headers *ohdr, | 85 | struct ipath_other_headers *ohdr, |
78 | u32 pmtu) | 86 | u32 pmtu, u32 *bth0p, u32 *bth2p) |
79 | { | 87 | { |
88 | struct ipath_ack_entry *e; | ||
80 | u32 hwords; | 89 | u32 hwords; |
81 | u32 len; | 90 | u32 len; |
82 | u32 bth0; | 91 | u32 bth0; |
92 | u32 bth2; | ||
83 | 93 | ||
84 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | 94 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ |
85 | hwords = 5; | 95 | hwords = 5; |
86 | 96 | ||
87 | /* | ||
88 | * Send a response. Note that we are in the responder's | ||
89 | * side of the QP context. | ||
90 | */ | ||
91 | switch (qp->s_ack_state) { | 97 | switch (qp->s_ack_state) { |
92 | case OP(RDMA_READ_REQUEST): | 98 | case OP(RDMA_READ_RESPONSE_LAST): |
93 | qp->s_cur_sge = &qp->s_rdma_sge; | 99 | case OP(RDMA_READ_RESPONSE_ONLY): |
94 | len = qp->s_rdma_len; | 100 | case OP(ATOMIC_ACKNOWLEDGE): |
95 | if (len > pmtu) { | 101 | qp->s_ack_state = OP(ACKNOWLEDGE); |
96 | len = pmtu; | 102 | /* FALLTHROUGH */ |
97 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); | 103 | case OP(ACKNOWLEDGE): |
98 | } else | 104 | /* Check for no next entry in the queue. */ |
99 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); | 105 | if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { |
100 | qp->s_rdma_len -= len; | 106 | if (qp->s_flags & IPATH_S_ACK_PENDING) |
107 | goto normal; | ||
108 | goto bail; | ||
109 | } | ||
110 | |||
111 | e = &qp->s_ack_queue[qp->s_tail_ack_queue]; | ||
112 | if (e->opcode == OP(RDMA_READ_REQUEST)) { | ||
113 | /* Copy SGE state in case we need to resend */ | ||
114 | qp->s_ack_rdma_sge = e->rdma_sge; | ||
115 | qp->s_cur_sge = &qp->s_ack_rdma_sge; | ||
116 | len = e->rdma_sge.sge.sge_length; | ||
117 | if (len > pmtu) { | ||
118 | len = pmtu; | ||
119 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); | ||
120 | } else { | ||
121 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); | ||
122 | if (++qp->s_tail_ack_queue > | ||
123 | IPATH_MAX_RDMA_ATOMIC) | ||
124 | qp->s_tail_ack_queue = 0; | ||
125 | } | ||
126 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
127 | hwords++; | ||
128 | qp->s_ack_rdma_psn = e->psn; | ||
129 | bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; | ||
130 | } else { | ||
131 | /* COMPARE_SWAP or FETCH_ADD */ | ||
132 | qp->s_cur_sge = NULL; | ||
133 | len = 0; | ||
134 | qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); | ||
135 | ohdr->u.at.aeth = ipath_compute_aeth(qp); | ||
136 | ohdr->u.at.atomic_ack_eth[0] = | ||
137 | cpu_to_be32(e->atomic_data >> 32); | ||
138 | ohdr->u.at.atomic_ack_eth[1] = | ||
139 | cpu_to_be32(e->atomic_data); | ||
140 | hwords += sizeof(ohdr->u.at) / sizeof(u32); | ||
141 | bth2 = e->psn; | ||
142 | if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) | ||
143 | qp->s_tail_ack_queue = 0; | ||
144 | } | ||
101 | bth0 = qp->s_ack_state << 24; | 145 | bth0 = qp->s_ack_state << 24; |
102 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
103 | hwords++; | ||
104 | break; | 146 | break; |
105 | 147 | ||
106 | case OP(RDMA_READ_RESPONSE_FIRST): | 148 | case OP(RDMA_READ_RESPONSE_FIRST): |
107 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); | 149 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); |
108 | /* FALLTHROUGH */ | 150 | /* FALLTHROUGH */ |
109 | case OP(RDMA_READ_RESPONSE_MIDDLE): | 151 | case OP(RDMA_READ_RESPONSE_MIDDLE): |
110 | qp->s_cur_sge = &qp->s_rdma_sge; | 152 | len = qp->s_ack_rdma_sge.sge.sge_length; |
111 | len = qp->s_rdma_len; | ||
112 | if (len > pmtu) | 153 | if (len > pmtu) |
113 | len = pmtu; | 154 | len = pmtu; |
114 | else { | 155 | else { |
115 | ohdr->u.aeth = ipath_compute_aeth(qp); | 156 | ohdr->u.aeth = ipath_compute_aeth(qp); |
116 | hwords++; | 157 | hwords++; |
117 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | 158 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); |
159 | if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) | ||
160 | qp->s_tail_ack_queue = 0; | ||
118 | } | 161 | } |
119 | qp->s_rdma_len -= len; | ||
120 | bth0 = qp->s_ack_state << 24; | 162 | bth0 = qp->s_ack_state << 24; |
121 | break; | 163 | bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; |
122 | |||
123 | case OP(RDMA_READ_RESPONSE_LAST): | ||
124 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
125 | /* | ||
126 | * We have to prevent new requests from changing | ||
127 | * the r_sge state while a ipath_verbs_send() | ||
128 | * is in progress. | ||
129 | */ | ||
130 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
131 | bth0 = 0; | ||
132 | goto bail; | ||
133 | |||
134 | case OP(COMPARE_SWAP): | ||
135 | case OP(FETCH_ADD): | ||
136 | qp->s_cur_sge = NULL; | ||
137 | len = 0; | ||
138 | /* | ||
139 | * Set the s_ack_state so the receive interrupt handler | ||
140 | * won't try to send an ACK (out of order) until this one | ||
141 | * is actually sent. | ||
142 | */ | ||
143 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | ||
144 | bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24; | ||
145 | ohdr->u.at.aeth = ipath_compute_aeth(qp); | ||
146 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); | ||
147 | hwords += sizeof(ohdr->u.at) / 4; | ||
148 | break; | 164 | break; |
149 | 165 | ||
150 | default: | 166 | default: |
151 | /* Send a regular ACK. */ | 167 | normal: |
152 | qp->s_cur_sge = NULL; | ||
153 | len = 0; | ||
154 | /* | 168 | /* |
155 | * Set the s_ack_state so the receive interrupt handler | 169 | * Send a regular ACK. |
156 | * won't try to send an ACK (out of order) until this one | 170 | * Set the s_ack_state so we wait until after sending |
157 | * is actually sent. | 171 | * the ACK before setting s_ack_state to ACKNOWLEDGE |
172 | * (see above). | ||
158 | */ | 173 | */ |
159 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | 174 | qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); |
160 | bth0 = OP(ACKNOWLEDGE) << 24; | 175 | qp->s_flags &= ~IPATH_S_ACK_PENDING; |
176 | qp->s_cur_sge = NULL; | ||
161 | if (qp->s_nak_state) | 177 | if (qp->s_nak_state) |
162 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | | 178 | ohdr->u.aeth = |
163 | (qp->s_nak_state << | 179 | cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | |
164 | IPATH_AETH_CREDIT_SHIFT)); | 180 | (qp->s_nak_state << |
181 | IPATH_AETH_CREDIT_SHIFT)); | ||
165 | else | 182 | else |
166 | ohdr->u.aeth = ipath_compute_aeth(qp); | 183 | ohdr->u.aeth = ipath_compute_aeth(qp); |
167 | hwords++; | 184 | hwords++; |
185 | len = 0; | ||
186 | bth0 = OP(ACKNOWLEDGE) << 24; | ||
187 | bth2 = qp->s_ack_psn & IPATH_PSN_MASK; | ||
168 | } | 188 | } |
169 | qp->s_hdrwords = hwords; | 189 | qp->s_hdrwords = hwords; |
170 | qp->s_cur_size = len; | 190 | qp->s_cur_size = len; |
191 | *bth0p = bth0; | ||
192 | *bth2p = bth2; | ||
193 | return 1; | ||
171 | 194 | ||
172 | bail: | 195 | bail: |
173 | return bth0; | 196 | return 0; |
174 | } | 197 | } |
175 | 198 | ||
176 | /** | 199 | /** |
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
197 | u32 bth2; | 220 | u32 bth2; |
198 | char newreq; | 221 | char newreq; |
199 | 222 | ||
223 | /* Sending responses has higher priority over sending requests. */ | ||
224 | if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || | ||
225 | (qp->s_flags & IPATH_S_ACK_PENDING) || | ||
226 | qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) && | ||
227 | ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) | ||
228 | goto done; | ||
229 | |||
200 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || | 230 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || |
201 | qp->s_rnr_timeout) | 231 | qp->s_rnr_timeout) |
202 | goto done; | 232 | goto bail; |
203 | 233 | ||
204 | /* Limit the number of packets sent without an ACK. */ | 234 | /* Limit the number of packets sent without an ACK. */ |
205 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { | 235 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { |
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
210 | list_add_tail(&qp->timerwait, | 240 | list_add_tail(&qp->timerwait, |
211 | &dev->pending[dev->pending_index]); | 241 | &dev->pending[dev->pending_index]); |
212 | spin_unlock(&dev->pending_lock); | 242 | spin_unlock(&dev->pending_lock); |
213 | goto done; | 243 | goto bail; |
214 | } | 244 | } |
215 | 245 | ||
216 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | 246 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ |
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
232 | if (qp->s_cur == qp->s_tail) { | 262 | if (qp->s_cur == qp->s_tail) { |
233 | /* Check if send work queue is empty. */ | 263 | /* Check if send work queue is empty. */ |
234 | if (qp->s_tail == qp->s_head) | 264 | if (qp->s_tail == qp->s_head) |
235 | goto done; | 265 | goto bail; |
266 | /* | ||
267 | * If a fence is requested, wait for previous | ||
268 | * RDMA read and atomic operations to finish. | ||
269 | */ | ||
270 | if ((wqe->wr.send_flags & IB_SEND_FENCE) && | ||
271 | qp->s_num_rd_atomic) { | ||
272 | qp->s_flags |= IPATH_S_FENCE_PENDING; | ||
273 | goto bail; | ||
274 | } | ||
236 | wqe->psn = qp->s_next_psn; | 275 | wqe->psn = qp->s_next_psn; |
237 | newreq = 1; | 276 | newreq = 1; |
238 | } | 277 | } |
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
250 | /* If no credit, return. */ | 289 | /* If no credit, return. */ |
251 | if (qp->s_lsn != (u32) -1 && | 290 | if (qp->s_lsn != (u32) -1 && |
252 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) | 291 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) |
253 | goto done; | 292 | goto bail; |
254 | wqe->lpsn = wqe->psn; | 293 | wqe->lpsn = wqe->psn; |
255 | if (len > pmtu) { | 294 | if (len > pmtu) { |
256 | wqe->lpsn += (len - 1) / pmtu; | 295 | wqe->lpsn += (len - 1) / pmtu; |
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
281 | /* If no credit, return. */ | 320 | /* If no credit, return. */ |
282 | if (qp->s_lsn != (u32) -1 && | 321 | if (qp->s_lsn != (u32) -1 && |
283 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) | 322 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) |
284 | goto done; | 323 | goto bail; |
285 | ohdr->u.rc.reth.vaddr = | 324 | ohdr->u.rc.reth.vaddr = |
286 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | 325 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); |
287 | ohdr->u.rc.reth.rkey = | 326 | ohdr->u.rc.reth.rkey = |
288 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 327 | cpu_to_be32(wqe->wr.wr.rdma.rkey); |
289 | ohdr->u.rc.reth.length = cpu_to_be32(len); | 328 | ohdr->u.rc.reth.length = cpu_to_be32(len); |
290 | hwords += sizeof(struct ib_reth) / 4; | 329 | hwords += sizeof(struct ib_reth) / sizeof(u32); |
291 | wqe->lpsn = wqe->psn; | 330 | wqe->lpsn = wqe->psn; |
292 | if (len > pmtu) { | 331 | if (len > pmtu) { |
293 | wqe->lpsn += (len - 1) / pmtu; | 332 | wqe->lpsn += (len - 1) / pmtu; |
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
312 | break; | 351 | break; |
313 | 352 | ||
314 | case IB_WR_RDMA_READ: | 353 | case IB_WR_RDMA_READ: |
315 | ohdr->u.rc.reth.vaddr = | 354 | /* |
316 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | 355 | * Don't allow more operations to be started |
317 | ohdr->u.rc.reth.rkey = | 356 | * than the QP limits allow. |
318 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 357 | */ |
319 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
320 | qp->s_state = OP(RDMA_READ_REQUEST); | ||
321 | hwords += sizeof(ohdr->u.rc.reth) / 4; | ||
322 | if (newreq) { | 358 | if (newreq) { |
359 | if (qp->s_num_rd_atomic >= | ||
360 | qp->s_max_rd_atomic) { | ||
361 | qp->s_flags |= IPATH_S_RDMAR_PENDING; | ||
362 | goto bail; | ||
363 | } | ||
364 | qp->s_num_rd_atomic++; | ||
323 | if (qp->s_lsn != (u32) -1) | 365 | if (qp->s_lsn != (u32) -1) |
324 | qp->s_lsn++; | 366 | qp->s_lsn++; |
325 | /* | 367 | /* |
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
330 | qp->s_next_psn += (len - 1) / pmtu; | 372 | qp->s_next_psn += (len - 1) / pmtu; |
331 | wqe->lpsn = qp->s_next_psn++; | 373 | wqe->lpsn = qp->s_next_psn++; |
332 | } | 374 | } |
375 | ohdr->u.rc.reth.vaddr = | ||
376 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | ||
377 | ohdr->u.rc.reth.rkey = | ||
378 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | ||
379 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
380 | qp->s_state = OP(RDMA_READ_REQUEST); | ||
381 | hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); | ||
333 | ss = NULL; | 382 | ss = NULL; |
334 | len = 0; | 383 | len = 0; |
335 | if (++qp->s_cur == qp->s_size) | 384 | if (++qp->s_cur == qp->s_size) |
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
338 | 387 | ||
339 | case IB_WR_ATOMIC_CMP_AND_SWP: | 388 | case IB_WR_ATOMIC_CMP_AND_SWP: |
340 | case IB_WR_ATOMIC_FETCH_AND_ADD: | 389 | case IB_WR_ATOMIC_FETCH_AND_ADD: |
341 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) | 390 | /* |
342 | qp->s_state = OP(COMPARE_SWAP); | 391 | * Don't allow more operations to be started |
343 | else | 392 | * than the QP limits allow. |
344 | qp->s_state = OP(FETCH_ADD); | 393 | */ |
345 | ohdr->u.atomic_eth.vaddr = cpu_to_be64( | ||
346 | wqe->wr.wr.atomic.remote_addr); | ||
347 | ohdr->u.atomic_eth.rkey = cpu_to_be32( | ||
348 | wqe->wr.wr.atomic.rkey); | ||
349 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
350 | wqe->wr.wr.atomic.swap); | ||
351 | ohdr->u.atomic_eth.compare_data = cpu_to_be64( | ||
352 | wqe->wr.wr.atomic.compare_add); | ||
353 | hwords += sizeof(struct ib_atomic_eth) / 4; | ||
354 | if (newreq) { | 394 | if (newreq) { |
395 | if (qp->s_num_rd_atomic >= | ||
396 | qp->s_max_rd_atomic) { | ||
397 | qp->s_flags |= IPATH_S_RDMAR_PENDING; | ||
398 | goto bail; | ||
399 | } | ||
400 | qp->s_num_rd_atomic++; | ||
355 | if (qp->s_lsn != (u32) -1) | 401 | if (qp->s_lsn != (u32) -1) |
356 | qp->s_lsn++; | 402 | qp->s_lsn++; |
357 | wqe->lpsn = wqe->psn; | 403 | wqe->lpsn = wqe->psn; |
358 | } | 404 | } |
359 | if (++qp->s_cur == qp->s_size) | 405 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { |
360 | qp->s_cur = 0; | 406 | qp->s_state = OP(COMPARE_SWAP); |
407 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
408 | wqe->wr.wr.atomic.swap); | ||
409 | ohdr->u.atomic_eth.compare_data = cpu_to_be64( | ||
410 | wqe->wr.wr.atomic.compare_add); | ||
411 | } else { | ||
412 | qp->s_state = OP(FETCH_ADD); | ||
413 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
414 | wqe->wr.wr.atomic.compare_add); | ||
415 | ohdr->u.atomic_eth.compare_data = 0; | ||
416 | } | ||
417 | ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( | ||
418 | wqe->wr.wr.atomic.remote_addr >> 32); | ||
419 | ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( | ||
420 | wqe->wr.wr.atomic.remote_addr); | ||
421 | ohdr->u.atomic_eth.rkey = cpu_to_be32( | ||
422 | wqe->wr.wr.atomic.rkey); | ||
423 | hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); | ||
361 | ss = NULL; | 424 | ss = NULL; |
362 | len = 0; | 425 | len = 0; |
426 | if (++qp->s_cur == qp->s_size) | ||
427 | qp->s_cur = 0; | ||
363 | break; | 428 | break; |
364 | 429 | ||
365 | default: | 430 | default: |
366 | goto done; | 431 | goto bail; |
367 | } | 432 | } |
368 | qp->s_sge.sge = wqe->sg_list[0]; | 433 | qp->s_sge.sge = wqe->sg_list[0]; |
369 | qp->s_sge.sg_list = wqe->sg_list + 1; | 434 | qp->s_sge.sg_list = wqe->sg_list + 1; |
@@ -479,7 +544,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
479 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 544 | cpu_to_be32(wqe->wr.wr.rdma.rkey); |
480 | ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); | 545 | ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); |
481 | qp->s_state = OP(RDMA_READ_REQUEST); | 546 | qp->s_state = OP(RDMA_READ_REQUEST); |
482 | hwords += sizeof(ohdr->u.rc.reth) / 4; | 547 | hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); |
483 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; | 548 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; |
484 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | 549 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) |
485 | qp->s_next_psn = qp->s_psn; | 550 | qp->s_next_psn = qp->s_psn; |
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
489 | if (qp->s_cur == qp->s_size) | 554 | if (qp->s_cur == qp->s_size) |
490 | qp->s_cur = 0; | 555 | qp->s_cur = 0; |
491 | break; | 556 | break; |
492 | |||
493 | case OP(RDMA_READ_REQUEST): | ||
494 | case OP(COMPARE_SWAP): | ||
495 | case OP(FETCH_ADD): | ||
496 | /* | ||
497 | * We shouldn't start anything new until this request is | ||
498 | * finished. The ACK will handle rescheduling us. XXX The | ||
499 | * number of outstanding ones is negotiated at connection | ||
500 | * setup time (see pg. 258,289)? XXX Also, if we support | ||
501 | * multiple outstanding requests, we need to check the WQE | ||
502 | * IB_SEND_FENCE flag and not send a new request if a RDMA | ||
503 | * read or atomic is pending. | ||
504 | */ | ||
505 | goto done; | ||
506 | } | 557 | } |
507 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) | 558 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) |
508 | bth2 |= 1 << 31; /* Request ACK. */ | 559 | bth2 |= 1 << 31; /* Request ACK. */ |
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
512 | qp->s_cur_size = len; | 563 | qp->s_cur_size = len; |
513 | *bth0p = bth0 | (qp->s_state << 24); | 564 | *bth0p = bth0 | (qp->s_state << 24); |
514 | *bth2p = bth2; | 565 | *bth2p = bth2; |
566 | done: | ||
515 | return 1; | 567 | return 1; |
516 | 568 | ||
517 | done: | 569 | bail: |
518 | return 0; | 570 | return 0; |
519 | } | 571 | } |
520 | 572 | ||
@@ -524,7 +576,8 @@ done: | |||
524 | * | 576 | * |
525 | * This is called from ipath_rc_rcv() and only uses the receive | 577 | * This is called from ipath_rc_rcv() and only uses the receive |
526 | * side QP state. | 578 | * side QP state. |
527 | * Note that RDMA reads are handled in the send side QP state and tasklet. | 579 | * Note that RDMA reads and atomics are handled in the |
580 | * send side QP state and tasklet. | ||
528 | */ | 581 | */ |
529 | static void send_rc_ack(struct ipath_qp *qp) | 582 | static void send_rc_ack(struct ipath_qp *qp) |
530 | { | 583 | { |
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
535 | struct ipath_ib_header hdr; | 588 | struct ipath_ib_header hdr; |
536 | struct ipath_other_headers *ohdr; | 589 | struct ipath_other_headers *ohdr; |
537 | 590 | ||
591 | /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ | ||
592 | if (qp->r_head_ack_queue != qp->s_tail_ack_queue) | ||
593 | goto queue_ack; | ||
594 | |||
538 | /* Construct the header. */ | 595 | /* Construct the header. */ |
539 | ohdr = &hdr.u.oth; | 596 | ohdr = &hdr.u.oth; |
540 | lrh0 = IPATH_LRH_BTH; | 597 | lrh0 = IPATH_LRH_BTH; |
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
548 | lrh0 = IPATH_LRH_GRH; | 605 | lrh0 = IPATH_LRH_GRH; |
549 | } | 606 | } |
550 | /* read pkey_index w/o lock (its atomic) */ | 607 | /* read pkey_index w/o lock (its atomic) */ |
551 | bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); | 608 | bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | |
609 | OP(ACKNOWLEDGE) << 24; | ||
552 | if (qp->r_nak_state) | 610 | if (qp->r_nak_state) |
553 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | | 611 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | |
554 | (qp->r_nak_state << | 612 | (qp->r_nak_state << |
555 | IPATH_AETH_CREDIT_SHIFT)); | 613 | IPATH_AETH_CREDIT_SHIFT)); |
556 | else | 614 | else |
557 | ohdr->u.aeth = ipath_compute_aeth(qp); | 615 | ohdr->u.aeth = ipath_compute_aeth(qp); |
558 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) { | ||
559 | bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24; | ||
560 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); | ||
561 | hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; | ||
562 | } else | ||
563 | bth0 |= OP(ACKNOWLEDGE) << 24; | ||
564 | lrh0 |= qp->remote_ah_attr.sl << 4; | 616 | lrh0 |= qp->remote_ah_attr.sl << 4; |
565 | hdr.lrh[0] = cpu_to_be16(lrh0); | 617 | hdr.lrh[0] = cpu_to_be16(lrh0); |
566 | hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); | 618 | hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); |
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
574 | * If we can send the ACK, clear the ACK state. | 626 | * If we can send the ACK, clear the ACK state. |
575 | */ | 627 | */ |
576 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { | 628 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { |
577 | qp->r_ack_state = OP(ACKNOWLEDGE); | ||
578 | dev->n_unicast_xmit++; | 629 | dev->n_unicast_xmit++; |
579 | } else { | 630 | goto done; |
580 | /* | ||
581 | * We are out of PIO buffers at the moment. | ||
582 | * Pass responsibility for sending the ACK to the | ||
583 | * send tasklet so that when a PIO buffer becomes | ||
584 | * available, the ACK is sent ahead of other outgoing | ||
585 | * packets. | ||
586 | */ | ||
587 | dev->n_rc_qacks++; | ||
588 | spin_lock_irq(&qp->s_lock); | ||
589 | /* Don't coalesce if a RDMA read or atomic is pending. */ | ||
590 | if (qp->s_ack_state == OP(ACKNOWLEDGE) || | ||
591 | qp->s_ack_state < OP(RDMA_READ_REQUEST)) { | ||
592 | qp->s_ack_state = qp->r_ack_state; | ||
593 | qp->s_nak_state = qp->r_nak_state; | ||
594 | qp->s_ack_psn = qp->r_ack_psn; | ||
595 | qp->r_ack_state = OP(ACKNOWLEDGE); | ||
596 | } | ||
597 | spin_unlock_irq(&qp->s_lock); | ||
598 | |||
599 | /* Call ipath_do_rc_send() in another thread. */ | ||
600 | tasklet_hi_schedule(&qp->s_task); | ||
601 | } | 631 | } |
632 | |||
633 | /* | ||
634 | * We are out of PIO buffers at the moment. | ||
635 | * Pass responsibility for sending the ACK to the | ||
636 | * send tasklet so that when a PIO buffer becomes | ||
637 | * available, the ACK is sent ahead of other outgoing | ||
638 | * packets. | ||
639 | */ | ||
640 | dev->n_rc_qacks++; | ||
641 | |||
642 | queue_ack: | ||
643 | spin_lock_irq(&qp->s_lock); | ||
644 | qp->s_flags |= IPATH_S_ACK_PENDING; | ||
645 | qp->s_nak_state = qp->r_nak_state; | ||
646 | qp->s_ack_psn = qp->r_ack_psn; | ||
647 | spin_unlock_irq(&qp->s_lock); | ||
648 | |||
649 | /* Call ipath_do_rc_send() in another thread. */ | ||
650 | tasklet_hi_schedule(&qp->s_task); | ||
651 | |||
652 | done: | ||
653 | return; | ||
602 | } | 654 | } |
603 | 655 | ||
604 | /** | 656 | /** |
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
775 | list_del_init(&qp->timerwait); | 827 | list_del_init(&qp->timerwait); |
776 | spin_unlock(&dev->pending_lock); | 828 | spin_unlock(&dev->pending_lock); |
777 | 829 | ||
778 | /* Nothing is pending to ACK/NAK. */ | ||
779 | if (unlikely(qp->s_last == qp->s_tail)) | ||
780 | goto bail; | ||
781 | |||
782 | /* | 830 | /* |
783 | * Note that NAKs implicitly ACK outstanding SEND and RDMA write | 831 | * Note that NAKs implicitly ACK outstanding SEND and RDMA write |
784 | * requests and implicitly NAK RDMA read and atomic requests issued | 832 | * requests and implicitly NAK RDMA read and atomic requests issued |
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
806 | */ | 854 | */ |
807 | if ((wqe->wr.opcode == IB_WR_RDMA_READ && | 855 | if ((wqe->wr.opcode == IB_WR_RDMA_READ && |
808 | (opcode != OP(RDMA_READ_RESPONSE_LAST) || | 856 | (opcode != OP(RDMA_READ_RESPONSE_LAST) || |
809 | ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || | 857 | ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || |
810 | ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | 858 | ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || |
811 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && | 859 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && |
812 | (opcode != OP(ATOMIC_ACKNOWLEDGE) || | 860 | (opcode != OP(ATOMIC_ACKNOWLEDGE) || |
@@ -824,12 +872,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
824 | */ | 872 | */ |
825 | goto bail; | 873 | goto bail; |
826 | } | 874 | } |
827 | if (wqe->wr.opcode == IB_WR_RDMA_READ || | 875 | if (qp->s_num_rd_atomic && |
828 | wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | 876 | (wqe->wr.opcode == IB_WR_RDMA_READ || |
829 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) | 877 | wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || |
830 | tasklet_hi_schedule(&qp->s_task); | 878 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { |
879 | qp->s_num_rd_atomic--; | ||
880 | /* Restart sending task if fence is complete */ | ||
881 | if ((qp->s_flags & IPATH_S_FENCE_PENDING) && | ||
882 | !qp->s_num_rd_atomic) { | ||
883 | qp->s_flags &= ~IPATH_S_FENCE_PENDING; | ||
884 | tasklet_hi_schedule(&qp->s_task); | ||
885 | } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { | ||
886 | qp->s_flags &= ~IPATH_S_RDMAR_PENDING; | ||
887 | tasklet_hi_schedule(&qp->s_task); | ||
888 | } | ||
889 | } | ||
831 | /* Post a send completion queue entry if requested. */ | 890 | /* Post a send completion queue entry if requested. */ |
832 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || | 891 | if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || |
833 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | 892 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { |
834 | wc.wr_id = wqe->wr.wr_id; | 893 | wc.wr_id = wqe->wr.wr_id; |
835 | wc.status = IB_WC_SUCCESS; | 894 | wc.status = IB_WC_SUCCESS; |
@@ -1003,6 +1062,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1003 | u32 psn, u32 hdrsize, u32 pmtu, | 1062 | u32 psn, u32 hdrsize, u32 pmtu, |
1004 | int header_in_data) | 1063 | int header_in_data) |
1005 | { | 1064 | { |
1065 | struct ipath_swqe *wqe; | ||
1006 | unsigned long flags; | 1066 | unsigned long flags; |
1007 | struct ib_wc wc; | 1067 | struct ib_wc wc; |
1008 | int diff; | 1068 | int diff; |
@@ -1032,6 +1092,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1032 | goto ack_done; | 1092 | goto ack_done; |
1033 | } | 1093 | } |
1034 | 1094 | ||
1095 | if (unlikely(qp->s_last == qp->s_tail)) | ||
1096 | goto ack_done; | ||
1097 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
1098 | |||
1035 | switch (opcode) { | 1099 | switch (opcode) { |
1036 | case OP(ACKNOWLEDGE): | 1100 | case OP(ACKNOWLEDGE): |
1037 | case OP(ATOMIC_ACKNOWLEDGE): | 1101 | case OP(ATOMIC_ACKNOWLEDGE): |
@@ -1042,38 +1106,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1042 | aeth = be32_to_cpu(((__be32 *) data)[0]); | 1106 | aeth = be32_to_cpu(((__be32 *) data)[0]); |
1043 | data += sizeof(__be32); | 1107 | data += sizeof(__be32); |
1044 | } | 1108 | } |
1045 | if (opcode == OP(ATOMIC_ACKNOWLEDGE)) | 1109 | if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { |
1046 | *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; | 1110 | u64 val; |
1111 | |||
1112 | if (!header_in_data) { | ||
1113 | __be32 *p = ohdr->u.at.atomic_ack_eth; | ||
1114 | |||
1115 | val = ((u64) be32_to_cpu(p[0]) << 32) | | ||
1116 | be32_to_cpu(p[1]); | ||
1117 | } else | ||
1118 | val = be64_to_cpu(((__be64 *) data)[0]); | ||
1119 | *(u64 *) wqe->sg_list[0].vaddr = val; | ||
1120 | } | ||
1047 | if (!do_rc_ack(qp, aeth, psn, opcode) || | 1121 | if (!do_rc_ack(qp, aeth, psn, opcode) || |
1048 | opcode != OP(RDMA_READ_RESPONSE_FIRST)) | 1122 | opcode != OP(RDMA_READ_RESPONSE_FIRST)) |
1049 | goto ack_done; | 1123 | goto ack_done; |
1050 | hdrsize += 4; | 1124 | hdrsize += 4; |
1125 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) | ||
1126 | goto ack_done; | ||
1051 | /* | 1127 | /* |
1052 | * do_rc_ack() has already checked the PSN so skip | 1128 | * If this is a response to a resent RDMA read, we |
1053 | * the sequence check. | 1129 | * have to be careful to copy the data to the right |
1130 | * location. | ||
1054 | */ | 1131 | */ |
1055 | goto rdma_read; | 1132 | qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, |
1133 | wqe, psn, pmtu); | ||
1134 | goto read_middle; | ||
1056 | 1135 | ||
1057 | case OP(RDMA_READ_RESPONSE_MIDDLE): | 1136 | case OP(RDMA_READ_RESPONSE_MIDDLE): |
1058 | /* no AETH, no ACK */ | 1137 | /* no AETH, no ACK */ |
1059 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | 1138 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { |
1060 | dev->n_rdma_seq++; | 1139 | dev->n_rdma_seq++; |
1061 | if (qp->s_last != qp->s_tail) | 1140 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); |
1062 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1063 | goto ack_done; | 1141 | goto ack_done; |
1064 | } | 1142 | } |
1065 | rdma_read: | 1143 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) |
1066 | if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) | ||
1067 | goto ack_done; | 1144 | goto ack_done; |
1145 | read_middle: | ||
1068 | if (unlikely(tlen != (hdrsize + pmtu + 4))) | 1146 | if (unlikely(tlen != (hdrsize + pmtu + 4))) |
1069 | goto ack_done; | 1147 | goto ack_done; |
1070 | if (unlikely(pmtu >= qp->s_len)) | 1148 | if (unlikely(pmtu >= qp->s_rdma_read_len)) |
1071 | goto ack_done; | 1149 | goto ack_done; |
1150 | |||
1072 | /* We got a response so update the timeout. */ | 1151 | /* We got a response so update the timeout. */ |
1073 | if (unlikely(qp->s_last == qp->s_tail || | ||
1074 | get_swqe_ptr(qp, qp->s_last)->wr.opcode != | ||
1075 | IB_WR_RDMA_READ)) | ||
1076 | goto ack_done; | ||
1077 | spin_lock(&dev->pending_lock); | 1152 | spin_lock(&dev->pending_lock); |
1078 | if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) | 1153 | if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) |
1079 | list_move_tail(&qp->timerwait, | 1154 | list_move_tail(&qp->timerwait, |
@@ -1082,27 +1157,41 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1082 | /* | 1157 | /* |
1083 | * Update the RDMA receive state but do the copy w/o | 1158 | * Update the RDMA receive state but do the copy w/o |
1084 | * holding the locks and blocking interrupts. | 1159 | * holding the locks and blocking interrupts. |
1085 | * XXX Yet another place that affects relaxed RDMA order | ||
1086 | * since we don't want s_sge modified. | ||
1087 | */ | 1160 | */ |
1088 | qp->s_len -= pmtu; | 1161 | qp->s_rdma_read_len -= pmtu; |
1089 | update_last_psn(qp, psn); | 1162 | update_last_psn(qp, psn); |
1090 | spin_unlock_irqrestore(&qp->s_lock, flags); | 1163 | spin_unlock_irqrestore(&qp->s_lock, flags); |
1091 | ipath_copy_sge(&qp->s_sge, data, pmtu); | 1164 | ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); |
1092 | goto bail; | 1165 | goto bail; |
1093 | 1166 | ||
1167 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
1168 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | ||
1169 | dev->n_rdma_seq++; | ||
1170 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1171 | goto ack_done; | ||
1172 | } | ||
1173 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) | ||
1174 | goto ack_done; | ||
1175 | /* | ||
1176 | * If this is a response to a resent RDMA read, we | ||
1177 | * have to be careful to copy the data to the right | ||
1178 | * location. | ||
1179 | * XXX should check PSN and wqe opcode first. | ||
1180 | */ | ||
1181 | qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, | ||
1182 | wqe, psn, pmtu); | ||
1183 | goto read_last; | ||
1184 | |||
1094 | case OP(RDMA_READ_RESPONSE_LAST): | 1185 | case OP(RDMA_READ_RESPONSE_LAST): |
1095 | /* ACKs READ req. */ | 1186 | /* ACKs READ req. */ |
1096 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | 1187 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { |
1097 | dev->n_rdma_seq++; | 1188 | dev->n_rdma_seq++; |
1098 | if (qp->s_last != qp->s_tail) | 1189 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); |
1099 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1100 | goto ack_done; | 1190 | goto ack_done; |
1101 | } | 1191 | } |
1102 | /* FALLTHROUGH */ | 1192 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) |
1103 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
1104 | if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) | ||
1105 | goto ack_done; | 1193 | goto ack_done; |
1194 | read_last: | ||
1106 | /* | 1195 | /* |
1107 | * Get the number of bytes the message was padded by. | 1196 | * Get the number of bytes the message was padded by. |
1108 | */ | 1197 | */ |
@@ -1117,7 +1206,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1117 | goto ack_done; | 1206 | goto ack_done; |
1118 | } | 1207 | } |
1119 | tlen -= hdrsize + pad + 8; | 1208 | tlen -= hdrsize + pad + 8; |
1120 | if (unlikely(tlen != qp->s_len)) { | 1209 | if (unlikely(tlen != qp->s_rdma_read_len)) { |
1121 | /* XXX Need to generate an error CQ entry. */ | 1210 | /* XXX Need to generate an error CQ entry. */ |
1122 | goto ack_done; | 1211 | goto ack_done; |
1123 | } | 1212 | } |
@@ -1127,17 +1216,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1127 | aeth = be32_to_cpu(((__be32 *) data)[0]); | 1216 | aeth = be32_to_cpu(((__be32 *) data)[0]); |
1128 | data += sizeof(__be32); | 1217 | data += sizeof(__be32); |
1129 | } | 1218 | } |
1130 | ipath_copy_sge(&qp->s_sge, data, tlen); | 1219 | ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); |
1131 | if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { | 1220 | (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); |
1132 | /* | ||
1133 | * Change the state so we contimue | ||
1134 | * processing new requests and wake up the | ||
1135 | * tasklet if there are posted sends. | ||
1136 | */ | ||
1137 | qp->s_state = OP(SEND_LAST); | ||
1138 | if (qp->s_tail != qp->s_head) | ||
1139 | tasklet_hi_schedule(&qp->s_task); | ||
1140 | } | ||
1141 | goto ack_done; | 1221 | goto ack_done; |
1142 | } | 1222 | } |
1143 | 1223 | ||
@@ -1162,7 +1242,7 @@ bail: | |||
1162 | * incoming RC packet for the given QP. | 1242 | * incoming RC packet for the given QP. |
1163 | * Called at interrupt level. | 1243 | * Called at interrupt level. |
1164 | * Return 1 if no more processing is needed; otherwise return 0 to | 1244 | * Return 1 if no more processing is needed; otherwise return 0 to |
1165 | * schedule a response to be sent and the s_lock unlocked. | 1245 | * schedule a response to be sent. |
1166 | */ | 1246 | */ |
1167 | static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | 1247 | static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, |
1168 | struct ipath_other_headers *ohdr, | 1248 | struct ipath_other_headers *ohdr, |
@@ -1173,25 +1253,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1173 | int diff, | 1253 | int diff, |
1174 | int header_in_data) | 1254 | int header_in_data) |
1175 | { | 1255 | { |
1176 | struct ib_reth *reth; | 1256 | struct ipath_ack_entry *e; |
1257 | u8 i, prev; | ||
1258 | int old_req; | ||
1177 | 1259 | ||
1178 | if (diff > 0) { | 1260 | if (diff > 0) { |
1179 | /* | 1261 | /* |
1180 | * Packet sequence error. | 1262 | * Packet sequence error. |
1181 | * A NAK will ACK earlier sends and RDMA writes. | 1263 | * A NAK will ACK earlier sends and RDMA writes. |
1182 | * Don't queue the NAK if a RDMA read, atomic, or | 1264 | * Don't queue the NAK if we already sent one. |
1183 | * NAK is pending though. | ||
1184 | */ | 1265 | */ |
1185 | if (qp->s_ack_state != OP(ACKNOWLEDGE) || | 1266 | if (!qp->r_nak_state) { |
1186 | qp->r_nak_state != 0) | ||
1187 | goto done; | ||
1188 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1189 | qp->r_ack_state = OP(SEND_ONLY); | ||
1190 | qp->r_nak_state = IB_NAK_PSN_ERROR; | 1267 | qp->r_nak_state = IB_NAK_PSN_ERROR; |
1191 | /* Use the expected PSN. */ | 1268 | /* Use the expected PSN. */ |
1192 | qp->r_ack_psn = qp->r_psn; | 1269 | qp->r_ack_psn = qp->r_psn; |
1270 | goto send_ack; | ||
1193 | } | 1271 | } |
1194 | goto send_ack; | 1272 | goto done; |
1195 | } | 1273 | } |
1196 | 1274 | ||
1197 | /* | 1275 | /* |
@@ -1204,8 +1282,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1204 | * can coalesce an outstanding duplicate ACK. We have to | 1282 | * can coalesce an outstanding duplicate ACK. We have to |
1205 | * send the earliest so that RDMA reads can be restarted at | 1283 | * send the earliest so that RDMA reads can be restarted at |
1206 | * the requester's expected PSN. | 1284 | * the requester's expected PSN. |
1285 | * | ||
1286 | * First, find where this duplicate PSN falls within the | ||
1287 | * ACKs previously sent. | ||
1207 | */ | 1288 | */ |
1208 | if (opcode == OP(RDMA_READ_REQUEST)) { | 1289 | psn &= IPATH_PSN_MASK; |
1290 | e = NULL; | ||
1291 | old_req = 1; | ||
1292 | spin_lock_irq(&qp->s_lock); | ||
1293 | for (i = qp->r_head_ack_queue; ; i = prev) { | ||
1294 | if (i == qp->s_tail_ack_queue) | ||
1295 | old_req = 0; | ||
1296 | if (i) | ||
1297 | prev = i - 1; | ||
1298 | else | ||
1299 | prev = IPATH_MAX_RDMA_ATOMIC; | ||
1300 | if (prev == qp->r_head_ack_queue) { | ||
1301 | e = NULL; | ||
1302 | break; | ||
1303 | } | ||
1304 | e = &qp->s_ack_queue[prev]; | ||
1305 | if (!e->opcode) { | ||
1306 | e = NULL; | ||
1307 | break; | ||
1308 | } | ||
1309 | if (ipath_cmp24(psn, e->psn) >= 0) | ||
1310 | break; | ||
1311 | } | ||
1312 | switch (opcode) { | ||
1313 | case OP(RDMA_READ_REQUEST): { | ||
1314 | struct ib_reth *reth; | ||
1315 | u32 offset; | ||
1316 | u32 len; | ||
1317 | |||
1318 | /* | ||
1319 | * If we didn't find the RDMA read request in the ack queue, | ||
1320 | * or the send tasklet is already backed up to send an | ||
1321 | * earlier entry, we can ignore this request. | ||
1322 | */ | ||
1323 | if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) | ||
1324 | goto unlock_done; | ||
1209 | /* RETH comes after BTH */ | 1325 | /* RETH comes after BTH */ |
1210 | if (!header_in_data) | 1326 | if (!header_in_data) |
1211 | reth = &ohdr->u.rc.reth; | 1327 | reth = &ohdr->u.rc.reth; |
@@ -1214,88 +1330,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1214 | data += sizeof(*reth); | 1330 | data += sizeof(*reth); |
1215 | } | 1331 | } |
1216 | /* | 1332 | /* |
1217 | * If we receive a duplicate RDMA request, it means the | 1333 | * Address range must be a subset of the original |
1218 | * requester saw a sequence error and needs to restart | 1334 | * request and start on pmtu boundaries. |
1219 | * from an earlier point. We can abort the current | 1335 | * We reuse the old ack_queue slot since the requester |
1220 | * RDMA read send in that case. | 1336 | * should not back up and request an earlier PSN for the |
1337 | * same request. | ||
1221 | */ | 1338 | */ |
1222 | spin_lock_irq(&qp->s_lock); | 1339 | offset = ((psn - e->psn) & IPATH_PSN_MASK) * |
1223 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | 1340 | ib_mtu_enum_to_int(qp->path_mtu); |
1224 | (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { | 1341 | len = be32_to_cpu(reth->length); |
1225 | /* | 1342 | if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) |
1226 | * We are already sending earlier requested data. | 1343 | goto unlock_done; |
1227 | * Don't abort it to send later out of sequence data. | 1344 | if (len != 0) { |
1228 | */ | ||
1229 | spin_unlock_irq(&qp->s_lock); | ||
1230 | goto done; | ||
1231 | } | ||
1232 | qp->s_rdma_len = be32_to_cpu(reth->length); | ||
1233 | if (qp->s_rdma_len != 0) { | ||
1234 | u32 rkey = be32_to_cpu(reth->rkey); | 1345 | u32 rkey = be32_to_cpu(reth->rkey); |
1235 | u64 vaddr = be64_to_cpu(reth->vaddr); | 1346 | u64 vaddr = be64_to_cpu(reth->vaddr); |
1236 | int ok; | 1347 | int ok; |
1237 | 1348 | ||
1238 | /* | 1349 | ok = ipath_rkey_ok(qp, &e->rdma_sge, |
1239 | * Address range must be a subset of the original | 1350 | len, vaddr, rkey, |
1240 | * request and start on pmtu boundaries. | ||
1241 | */ | ||
1242 | ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, | ||
1243 | qp->s_rdma_len, vaddr, rkey, | ||
1244 | IB_ACCESS_REMOTE_READ); | 1351 | IB_ACCESS_REMOTE_READ); |
1245 | if (unlikely(!ok)) { | 1352 | if (unlikely(!ok)) |
1246 | spin_unlock_irq(&qp->s_lock); | 1353 | goto unlock_done; |
1247 | goto done; | ||
1248 | } | ||
1249 | } else { | 1354 | } else { |
1250 | qp->s_rdma_sge.sg_list = NULL; | 1355 | e->rdma_sge.sg_list = NULL; |
1251 | qp->s_rdma_sge.num_sge = 0; | 1356 | e->rdma_sge.num_sge = 0; |
1252 | qp->s_rdma_sge.sge.mr = NULL; | 1357 | e->rdma_sge.sge.mr = NULL; |
1253 | qp->s_rdma_sge.sge.vaddr = NULL; | 1358 | e->rdma_sge.sge.vaddr = NULL; |
1254 | qp->s_rdma_sge.sge.length = 0; | 1359 | e->rdma_sge.sge.length = 0; |
1255 | qp->s_rdma_sge.sge.sge_length = 0; | 1360 | e->rdma_sge.sge.sge_length = 0; |
1256 | } | 1361 | } |
1257 | qp->s_ack_state = opcode; | 1362 | e->psn = psn; |
1258 | qp->s_ack_psn = psn; | 1363 | qp->s_ack_state = OP(ACKNOWLEDGE); |
1259 | spin_unlock_irq(&qp->s_lock); | 1364 | qp->s_tail_ack_queue = prev; |
1260 | tasklet_hi_schedule(&qp->s_task); | 1365 | break; |
1261 | goto send_ack; | ||
1262 | } | 1366 | } |
1263 | 1367 | ||
1264 | /* | ||
1265 | * A pending RDMA read will ACK anything before it so | ||
1266 | * ignore earlier duplicate requests. | ||
1267 | */ | ||
1268 | if (qp->s_ack_state != OP(ACKNOWLEDGE)) | ||
1269 | goto done; | ||
1270 | |||
1271 | /* | ||
1272 | * If an ACK is pending, don't replace the pending ACK | ||
1273 | * with an earlier one since the later one will ACK the earlier. | ||
1274 | * Also, if we already have a pending atomic, send it. | ||
1275 | */ | ||
1276 | if (qp->r_ack_state != OP(ACKNOWLEDGE) && | ||
1277 | (ipath_cmp24(psn, qp->r_ack_psn) <= 0 || | ||
1278 | qp->r_ack_state >= OP(COMPARE_SWAP))) | ||
1279 | goto send_ack; | ||
1280 | switch (opcode) { | ||
1281 | case OP(COMPARE_SWAP): | 1368 | case OP(COMPARE_SWAP): |
1282 | case OP(FETCH_ADD): | 1369 | case OP(FETCH_ADD): { |
1283 | /* | 1370 | /* |
1284 | * Check for the PSN of the last atomic operation | 1371 | * If we didn't find the atomic request in the ack queue |
1285 | * performed and resend the result if found. | 1372 | * or the send tasklet is already backed up to send an |
1373 | * earlier entry, we can ignore this request. | ||
1286 | */ | 1374 | */ |
1287 | if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn) | 1375 | if (!e || e->opcode != (u8) opcode || old_req) |
1288 | goto done; | 1376 | goto unlock_done; |
1377 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
1378 | qp->s_tail_ack_queue = prev; | ||
1379 | break; | ||
1380 | } | ||
1381 | |||
1382 | default: | ||
1383 | if (old_req) | ||
1384 | goto unlock_done; | ||
1385 | /* | ||
1386 | * Resend the most recent ACK if this request is | ||
1387 | * after all the previous RDMA reads and atomics. | ||
1388 | */ | ||
1389 | if (i == qp->r_head_ack_queue) { | ||
1390 | spin_unlock_irq(&qp->s_lock); | ||
1391 | qp->r_nak_state = 0; | ||
1392 | qp->r_ack_psn = qp->r_psn - 1; | ||
1393 | goto send_ack; | ||
1394 | } | ||
1395 | /* | ||
1396 | * Resend the RDMA read or atomic op which | ||
1397 | * ACKs this duplicate request. | ||
1398 | */ | ||
1399 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
1400 | qp->s_tail_ack_queue = i; | ||
1289 | break; | 1401 | break; |
1290 | } | 1402 | } |
1291 | qp->r_ack_state = opcode; | ||
1292 | qp->r_nak_state = 0; | 1403 | qp->r_nak_state = 0; |
1293 | qp->r_ack_psn = psn; | 1404 | spin_unlock_irq(&qp->s_lock); |
1294 | send_ack: | 1405 | tasklet_hi_schedule(&qp->s_task); |
1295 | return 0; | ||
1296 | 1406 | ||
1407 | unlock_done: | ||
1408 | spin_unlock_irq(&qp->s_lock); | ||
1297 | done: | 1409 | done: |
1298 | return 1; | 1410 | return 1; |
1411 | |||
1412 | send_ack: | ||
1413 | return 0; | ||
1299 | } | 1414 | } |
1300 | 1415 | ||
1301 | static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) | 1416 | static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) |
@@ -1391,15 +1506,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1391 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) | 1506 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) |
1392 | break; | 1507 | break; |
1393 | nack_inv: | 1508 | nack_inv: |
1394 | /* | ||
1395 | * A NAK will ACK earlier sends and RDMA writes. | ||
1396 | * Don't queue the NAK if a RDMA read, atomic, or NAK | ||
1397 | * is pending though. | ||
1398 | */ | ||
1399 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) | ||
1400 | goto send_ack; | ||
1401 | ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); | 1509 | ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); |
1402 | qp->r_ack_state = OP(SEND_ONLY); | ||
1403 | qp->r_nak_state = IB_NAK_INVALID_REQUEST; | 1510 | qp->r_nak_state = IB_NAK_INVALID_REQUEST; |
1404 | qp->r_ack_psn = qp->r_psn; | 1511 | qp->r_ack_psn = qp->r_psn; |
1405 | goto send_ack; | 1512 | goto send_ack; |
@@ -1441,9 +1548,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1441 | * Don't queue the NAK if a RDMA read or atomic | 1548 | * Don't queue the NAK if a RDMA read or atomic |
1442 | * is pending though. | 1549 | * is pending though. |
1443 | */ | 1550 | */ |
1444 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) | 1551 | if (qp->r_nak_state) |
1445 | goto send_ack; | 1552 | goto done; |
1446 | qp->r_ack_state = OP(SEND_ONLY); | ||
1447 | qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; | 1553 | qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; |
1448 | qp->r_ack_psn = qp->r_psn; | 1554 | qp->r_ack_psn = qp->r_psn; |
1449 | goto send_ack; | 1555 | goto send_ack; |
@@ -1567,7 +1673,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1567 | goto rnr_nak; | 1673 | goto rnr_nak; |
1568 | goto send_last_imm; | 1674 | goto send_last_imm; |
1569 | 1675 | ||
1570 | case OP(RDMA_READ_REQUEST): | 1676 | case OP(RDMA_READ_REQUEST): { |
1677 | struct ipath_ack_entry *e; | ||
1678 | u32 len; | ||
1679 | u8 next; | ||
1680 | |||
1681 | if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) | ||
1682 | goto nack_acc; | ||
1683 | next = qp->r_head_ack_queue + 1; | ||
1684 | if (next > IPATH_MAX_RDMA_ATOMIC) | ||
1685 | next = 0; | ||
1686 | if (unlikely(next == qp->s_tail_ack_queue)) | ||
1687 | goto nack_inv; | ||
1688 | e = &qp->s_ack_queue[qp->r_head_ack_queue]; | ||
1571 | /* RETH comes after BTH */ | 1689 | /* RETH comes after BTH */ |
1572 | if (!header_in_data) | 1690 | if (!header_in_data) |
1573 | reth = &ohdr->u.rc.reth; | 1691 | reth = &ohdr->u.rc.reth; |
@@ -1575,72 +1693,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1575 | reth = (struct ib_reth *)data; | 1693 | reth = (struct ib_reth *)data; |
1576 | data += sizeof(*reth); | 1694 | data += sizeof(*reth); |
1577 | } | 1695 | } |
1578 | if (unlikely(!(qp->qp_access_flags & | 1696 | len = be32_to_cpu(reth->length); |
1579 | IB_ACCESS_REMOTE_READ))) | 1697 | if (len) { |
1580 | goto nack_acc; | ||
1581 | spin_lock_irq(&qp->s_lock); | ||
1582 | qp->s_rdma_len = be32_to_cpu(reth->length); | ||
1583 | if (qp->s_rdma_len != 0) { | ||
1584 | u32 rkey = be32_to_cpu(reth->rkey); | 1698 | u32 rkey = be32_to_cpu(reth->rkey); |
1585 | u64 vaddr = be64_to_cpu(reth->vaddr); | 1699 | u64 vaddr = be64_to_cpu(reth->vaddr); |
1586 | int ok; | 1700 | int ok; |
1587 | 1701 | ||
1588 | /* Check rkey & NAK */ | 1702 | /* Check rkey & NAK */ |
1589 | ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, | 1703 | ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, |
1590 | qp->s_rdma_len, vaddr, rkey, | 1704 | rkey, IB_ACCESS_REMOTE_READ); |
1591 | IB_ACCESS_REMOTE_READ); | 1705 | if (unlikely(!ok)) |
1592 | if (unlikely(!ok)) { | ||
1593 | spin_unlock_irq(&qp->s_lock); | ||
1594 | goto nack_acc; | 1706 | goto nack_acc; |
1595 | } | ||
1596 | /* | 1707 | /* |
1597 | * Update the next expected PSN. We add 1 later | 1708 | * Update the next expected PSN. We add 1 later |
1598 | * below, so only add the remainder here. | 1709 | * below, so only add the remainder here. |
1599 | */ | 1710 | */ |
1600 | if (qp->s_rdma_len > pmtu) | 1711 | if (len > pmtu) |
1601 | qp->r_psn += (qp->s_rdma_len - 1) / pmtu; | 1712 | qp->r_psn += (len - 1) / pmtu; |
1602 | } else { | 1713 | } else { |
1603 | qp->s_rdma_sge.sg_list = NULL; | 1714 | e->rdma_sge.sg_list = NULL; |
1604 | qp->s_rdma_sge.num_sge = 0; | 1715 | e->rdma_sge.num_sge = 0; |
1605 | qp->s_rdma_sge.sge.mr = NULL; | 1716 | e->rdma_sge.sge.mr = NULL; |
1606 | qp->s_rdma_sge.sge.vaddr = NULL; | 1717 | e->rdma_sge.sge.vaddr = NULL; |
1607 | qp->s_rdma_sge.sge.length = 0; | 1718 | e->rdma_sge.sge.length = 0; |
1608 | qp->s_rdma_sge.sge.sge_length = 0; | 1719 | e->rdma_sge.sge.sge_length = 0; |
1609 | } | 1720 | } |
1721 | e->opcode = opcode; | ||
1722 | e->psn = psn; | ||
1610 | /* | 1723 | /* |
1611 | * We need to increment the MSN here instead of when we | 1724 | * We need to increment the MSN here instead of when we |
1612 | * finish sending the result since a duplicate request would | 1725 | * finish sending the result since a duplicate request would |
1613 | * increment it more than once. | 1726 | * increment it more than once. |
1614 | */ | 1727 | */ |
1615 | qp->r_msn++; | 1728 | qp->r_msn++; |
1616 | |||
1617 | qp->s_ack_state = opcode; | ||
1618 | qp->s_ack_psn = psn; | ||
1619 | spin_unlock_irq(&qp->s_lock); | ||
1620 | |||
1621 | qp->r_psn++; | 1729 | qp->r_psn++; |
1622 | qp->r_state = opcode; | 1730 | qp->r_state = opcode; |
1623 | qp->r_nak_state = 0; | 1731 | qp->r_nak_state = 0; |
1732 | barrier(); | ||
1733 | qp->r_head_ack_queue = next; | ||
1624 | 1734 | ||
1625 | /* Call ipath_do_rc_send() in another thread. */ | 1735 | /* Call ipath_do_rc_send() in another thread. */ |
1626 | tasklet_hi_schedule(&qp->s_task); | 1736 | tasklet_hi_schedule(&qp->s_task); |
1627 | 1737 | ||
1628 | goto done; | 1738 | goto done; |
1739 | } | ||
1629 | 1740 | ||
1630 | case OP(COMPARE_SWAP): | 1741 | case OP(COMPARE_SWAP): |
1631 | case OP(FETCH_ADD): { | 1742 | case OP(FETCH_ADD): { |
1632 | struct ib_atomic_eth *ateth; | 1743 | struct ib_atomic_eth *ateth; |
1744 | struct ipath_ack_entry *e; | ||
1633 | u64 vaddr; | 1745 | u64 vaddr; |
1746 | atomic64_t *maddr; | ||
1634 | u64 sdata; | 1747 | u64 sdata; |
1635 | u32 rkey; | 1748 | u32 rkey; |
1749 | u8 next; | ||
1636 | 1750 | ||
1751 | if (unlikely(!(qp->qp_access_flags & | ||
1752 | IB_ACCESS_REMOTE_ATOMIC))) | ||
1753 | goto nack_acc; | ||
1754 | next = qp->r_head_ack_queue + 1; | ||
1755 | if (next > IPATH_MAX_RDMA_ATOMIC) | ||
1756 | next = 0; | ||
1757 | if (unlikely(next == qp->s_tail_ack_queue)) | ||
1758 | goto nack_inv; | ||
1637 | if (!header_in_data) | 1759 | if (!header_in_data) |
1638 | ateth = &ohdr->u.atomic_eth; | 1760 | ateth = &ohdr->u.atomic_eth; |
1639 | else { | 1761 | else |
1640 | ateth = (struct ib_atomic_eth *)data; | 1762 | ateth = (struct ib_atomic_eth *)data; |
1641 | data += sizeof(*ateth); | 1763 | vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | |
1642 | } | 1764 | be32_to_cpu(ateth->vaddr[1]); |
1643 | vaddr = be64_to_cpu(ateth->vaddr); | ||
1644 | if (unlikely(vaddr & (sizeof(u64) - 1))) | 1765 | if (unlikely(vaddr & (sizeof(u64) - 1))) |
1645 | goto nack_inv; | 1766 | goto nack_inv; |
1646 | rkey = be32_to_cpu(ateth->rkey); | 1767 | rkey = be32_to_cpu(ateth->rkey); |
@@ -1649,63 +1770,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1649 | sizeof(u64), vaddr, rkey, | 1770 | sizeof(u64), vaddr, rkey, |
1650 | IB_ACCESS_REMOTE_ATOMIC))) | 1771 | IB_ACCESS_REMOTE_ATOMIC))) |
1651 | goto nack_acc; | 1772 | goto nack_acc; |
1652 | if (unlikely(!(qp->qp_access_flags & | ||
1653 | IB_ACCESS_REMOTE_ATOMIC))) | ||
1654 | goto nack_acc; | ||
1655 | /* Perform atomic OP and save result. */ | 1773 | /* Perform atomic OP and save result. */ |
1774 | maddr = (atomic64_t *) qp->r_sge.sge.vaddr; | ||
1656 | sdata = be64_to_cpu(ateth->swap_data); | 1775 | sdata = be64_to_cpu(ateth->swap_data); |
1657 | spin_lock_irq(&dev->pending_lock); | 1776 | e = &qp->s_ack_queue[qp->r_head_ack_queue]; |
1658 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; | 1777 | e->atomic_data = (opcode == OP(FETCH_ADD)) ? |
1659 | if (opcode == OP(FETCH_ADD)) | 1778 | (u64) atomic64_add_return(sdata, maddr) - sdata : |
1660 | *(u64 *) qp->r_sge.sge.vaddr = | 1779 | (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, |
1661 | qp->r_atomic_data + sdata; | 1780 | be64_to_cpu(ateth->compare_data), |
1662 | else if (qp->r_atomic_data == | 1781 | sdata); |
1663 | be64_to_cpu(ateth->compare_data)) | 1782 | e->opcode = opcode; |
1664 | *(u64 *) qp->r_sge.sge.vaddr = sdata; | 1783 | e->psn = psn & IPATH_PSN_MASK; |
1665 | spin_unlock_irq(&dev->pending_lock); | ||
1666 | qp->r_msn++; | 1784 | qp->r_msn++; |
1667 | qp->r_atomic_psn = psn & IPATH_PSN_MASK; | 1785 | qp->r_psn++; |
1668 | psn |= 1 << 31; | 1786 | qp->r_state = opcode; |
1669 | break; | 1787 | qp->r_nak_state = 0; |
1788 | barrier(); | ||
1789 | qp->r_head_ack_queue = next; | ||
1790 | |||
1791 | /* Call ipath_do_rc_send() in another thread. */ | ||
1792 | tasklet_hi_schedule(&qp->s_task); | ||
1793 | |||
1794 | goto done; | ||
1670 | } | 1795 | } |
1671 | 1796 | ||
1672 | default: | 1797 | default: |
1673 | /* Drop packet for unknown opcodes. */ | 1798 | /* NAK unknown opcodes. */ |
1674 | goto done; | 1799 | goto nack_inv; |
1675 | } | 1800 | } |
1676 | qp->r_psn++; | 1801 | qp->r_psn++; |
1677 | qp->r_state = opcode; | 1802 | qp->r_state = opcode; |
1803 | qp->r_ack_psn = psn; | ||
1678 | qp->r_nak_state = 0; | 1804 | qp->r_nak_state = 0; |
1679 | /* Send an ACK if requested or required. */ | 1805 | /* Send an ACK if requested or required. */ |
1680 | if (psn & (1 << 31)) { | 1806 | if (psn & (1 << 31)) |
1681 | /* | ||
1682 | * Coalesce ACKs unless there is a RDMA READ or | ||
1683 | * ATOMIC pending. | ||
1684 | */ | ||
1685 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1686 | qp->r_ack_state = opcode; | ||
1687 | qp->r_ack_psn = psn; | ||
1688 | } | ||
1689 | goto send_ack; | 1807 | goto send_ack; |
1690 | } | ||
1691 | goto done; | 1808 | goto done; |
1692 | 1809 | ||
1693 | nack_acc: | 1810 | nack_acc: |
1694 | /* | 1811 | ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); |
1695 | * A NAK will ACK earlier sends and RDMA writes. | 1812 | qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; |
1696 | * Don't queue the NAK if a RDMA read, atomic, or NAK | 1813 | qp->r_ack_psn = qp->r_psn; |
1697 | * is pending though. | 1814 | |
1698 | */ | ||
1699 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1700 | ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); | ||
1701 | qp->r_ack_state = OP(RDMA_WRITE_ONLY); | ||
1702 | qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; | ||
1703 | qp->r_ack_psn = qp->r_psn; | ||
1704 | } | ||
1705 | send_ack: | 1815 | send_ack: |
1706 | /* Send ACK right away unless the send tasklet has a pending ACK. */ | 1816 | send_rc_ack(qp); |
1707 | if (qp->s_ack_state == OP(ACKNOWLEDGE)) | ||
1708 | send_rc_ack(qp); | ||
1709 | 1817 | ||
1710 | done: | 1818 | done: |
1711 | return; | 1819 | return; |