diff options
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_rc.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_rc.c | 920 |
1 files changed, 533 insertions, 387 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5ff20cb04494..b4b88d0b53f5 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c | |||
@@ -37,6 +37,19 @@ | |||
37 | /* cut down ridiculously long IB macro names */ | 37 | /* cut down ridiculously long IB macro names */ |
38 | #define OP(x) IB_OPCODE_RC_##x | 38 | #define OP(x) IB_OPCODE_RC_##x |
39 | 39 | ||
40 | static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, | ||
41 | u32 psn, u32 pmtu) | ||
42 | { | ||
43 | u32 len; | ||
44 | |||
45 | len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; | ||
46 | ss->sge = wqe->sg_list[0]; | ||
47 | ss->sg_list = wqe->sg_list + 1; | ||
48 | ss->num_sge = wqe->wr.num_sge; | ||
49 | ipath_skip_sge(ss, len); | ||
50 | return wqe->length - len; | ||
51 | } | ||
52 | |||
40 | /** | 53 | /** |
41 | * ipath_init_restart- initialize the qp->s_sge after a restart | 54 | * ipath_init_restart- initialize the qp->s_sge after a restart |
42 | * @qp: the QP who's SGE we're restarting | 55 | * @qp: the QP who's SGE we're restarting |
@@ -47,15 +60,9 @@ | |||
47 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) | 60 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) |
48 | { | 61 | { |
49 | struct ipath_ibdev *dev; | 62 | struct ipath_ibdev *dev; |
50 | u32 len; | ||
51 | 63 | ||
52 | len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * | 64 | qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, |
53 | ib_mtu_enum_to_int(qp->path_mtu); | 65 | ib_mtu_enum_to_int(qp->path_mtu)); |
54 | qp->s_sge.sge = wqe->sg_list[0]; | ||
55 | qp->s_sge.sg_list = wqe->sg_list + 1; | ||
56 | qp->s_sge.num_sge = wqe->wr.num_sge; | ||
57 | ipath_skip_sge(&qp->s_sge, len); | ||
58 | qp->s_len = wqe->length - len; | ||
59 | dev = to_idev(qp->ibqp.device); | 66 | dev = to_idev(qp->ibqp.device); |
60 | spin_lock(&dev->pending_lock); | 67 | spin_lock(&dev->pending_lock); |
61 | if (list_empty(&qp->timerwait)) | 68 | if (list_empty(&qp->timerwait)) |
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) | |||
70 | * @ohdr: a pointer to the IB header being constructed | 77 | * @ohdr: a pointer to the IB header being constructed |
71 | * @pmtu: the path MTU | 78 | * @pmtu: the path MTU |
72 | * | 79 | * |
73 | * Return bth0 if constructed; otherwise, return 0. | 80 | * Return 1 if constructed; otherwise, return 0. |
81 | * Note that we are in the responder's side of the QP context. | ||
74 | * Note the QP s_lock must be held. | 82 | * Note the QP s_lock must be held. |
75 | */ | 83 | */ |
76 | u32 ipath_make_rc_ack(struct ipath_qp *qp, | 84 | static int ipath_make_rc_ack(struct ipath_qp *qp, |
77 | struct ipath_other_headers *ohdr, | 85 | struct ipath_other_headers *ohdr, |
78 | u32 pmtu) | 86 | u32 pmtu, u32 *bth0p, u32 *bth2p) |
79 | { | 87 | { |
88 | struct ipath_ack_entry *e; | ||
80 | u32 hwords; | 89 | u32 hwords; |
81 | u32 len; | 90 | u32 len; |
82 | u32 bth0; | 91 | u32 bth0; |
92 | u32 bth2; | ||
83 | 93 | ||
84 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | 94 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ |
85 | hwords = 5; | 95 | hwords = 5; |
86 | 96 | ||
87 | /* | ||
88 | * Send a response. Note that we are in the responder's | ||
89 | * side of the QP context. | ||
90 | */ | ||
91 | switch (qp->s_ack_state) { | 97 | switch (qp->s_ack_state) { |
92 | case OP(RDMA_READ_REQUEST): | 98 | case OP(RDMA_READ_RESPONSE_LAST): |
93 | qp->s_cur_sge = &qp->s_rdma_sge; | 99 | case OP(RDMA_READ_RESPONSE_ONLY): |
94 | len = qp->s_rdma_len; | 100 | case OP(ATOMIC_ACKNOWLEDGE): |
95 | if (len > pmtu) { | 101 | qp->s_ack_state = OP(ACKNOWLEDGE); |
96 | len = pmtu; | 102 | /* FALLTHROUGH */ |
97 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); | 103 | case OP(ACKNOWLEDGE): |
98 | } else | 104 | /* Check for no next entry in the queue. */ |
99 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); | 105 | if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { |
100 | qp->s_rdma_len -= len; | 106 | if (qp->s_flags & IPATH_S_ACK_PENDING) |
107 | goto normal; | ||
108 | goto bail; | ||
109 | } | ||
110 | |||
111 | e = &qp->s_ack_queue[qp->s_tail_ack_queue]; | ||
112 | if (e->opcode == OP(RDMA_READ_REQUEST)) { | ||
113 | /* Copy SGE state in case we need to resend */ | ||
114 | qp->s_ack_rdma_sge = e->rdma_sge; | ||
115 | qp->s_cur_sge = &qp->s_ack_rdma_sge; | ||
116 | len = e->rdma_sge.sge.sge_length; | ||
117 | if (len > pmtu) { | ||
118 | len = pmtu; | ||
119 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); | ||
120 | } else { | ||
121 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); | ||
122 | if (++qp->s_tail_ack_queue > | ||
123 | IPATH_MAX_RDMA_ATOMIC) | ||
124 | qp->s_tail_ack_queue = 0; | ||
125 | } | ||
126 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
127 | hwords++; | ||
128 | qp->s_ack_rdma_psn = e->psn; | ||
129 | bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; | ||
130 | } else { | ||
131 | /* COMPARE_SWAP or FETCH_ADD */ | ||
132 | qp->s_cur_sge = NULL; | ||
133 | len = 0; | ||
134 | qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); | ||
135 | ohdr->u.at.aeth = ipath_compute_aeth(qp); | ||
136 | ohdr->u.at.atomic_ack_eth[0] = | ||
137 | cpu_to_be32(e->atomic_data >> 32); | ||
138 | ohdr->u.at.atomic_ack_eth[1] = | ||
139 | cpu_to_be32(e->atomic_data); | ||
140 | hwords += sizeof(ohdr->u.at) / sizeof(u32); | ||
141 | bth2 = e->psn; | ||
142 | if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) | ||
143 | qp->s_tail_ack_queue = 0; | ||
144 | } | ||
101 | bth0 = qp->s_ack_state << 24; | 145 | bth0 = qp->s_ack_state << 24; |
102 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
103 | hwords++; | ||
104 | break; | 146 | break; |
105 | 147 | ||
106 | case OP(RDMA_READ_RESPONSE_FIRST): | 148 | case OP(RDMA_READ_RESPONSE_FIRST): |
107 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); | 149 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); |
108 | /* FALLTHROUGH */ | 150 | /* FALLTHROUGH */ |
109 | case OP(RDMA_READ_RESPONSE_MIDDLE): | 151 | case OP(RDMA_READ_RESPONSE_MIDDLE): |
110 | qp->s_cur_sge = &qp->s_rdma_sge; | 152 | len = qp->s_ack_rdma_sge.sge.sge_length; |
111 | len = qp->s_rdma_len; | ||
112 | if (len > pmtu) | 153 | if (len > pmtu) |
113 | len = pmtu; | 154 | len = pmtu; |
114 | else { | 155 | else { |
115 | ohdr->u.aeth = ipath_compute_aeth(qp); | 156 | ohdr->u.aeth = ipath_compute_aeth(qp); |
116 | hwords++; | 157 | hwords++; |
117 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | 158 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); |
159 | if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) | ||
160 | qp->s_tail_ack_queue = 0; | ||
118 | } | 161 | } |
119 | qp->s_rdma_len -= len; | ||
120 | bth0 = qp->s_ack_state << 24; | 162 | bth0 = qp->s_ack_state << 24; |
121 | break; | 163 | bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; |
122 | |||
123 | case OP(RDMA_READ_RESPONSE_LAST): | ||
124 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
125 | /* | ||
126 | * We have to prevent new requests from changing | ||
127 | * the r_sge state while a ipath_verbs_send() | ||
128 | * is in progress. | ||
129 | */ | ||
130 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
131 | bth0 = 0; | ||
132 | goto bail; | ||
133 | |||
134 | case OP(COMPARE_SWAP): | ||
135 | case OP(FETCH_ADD): | ||
136 | qp->s_cur_sge = NULL; | ||
137 | len = 0; | ||
138 | /* | ||
139 | * Set the s_ack_state so the receive interrupt handler | ||
140 | * won't try to send an ACK (out of order) until this one | ||
141 | * is actually sent. | ||
142 | */ | ||
143 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | ||
144 | bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24; | ||
145 | ohdr->u.at.aeth = ipath_compute_aeth(qp); | ||
146 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); | ||
147 | hwords += sizeof(ohdr->u.at) / 4; | ||
148 | break; | 164 | break; |
149 | 165 | ||
150 | default: | 166 | default: |
151 | /* Send a regular ACK. */ | 167 | normal: |
152 | qp->s_cur_sge = NULL; | ||
153 | len = 0; | ||
154 | /* | 168 | /* |
155 | * Set the s_ack_state so the receive interrupt handler | 169 | * Send a regular ACK. |
156 | * won't try to send an ACK (out of order) until this one | 170 | * Set the s_ack_state so we wait until after sending |
157 | * is actually sent. | 171 | * the ACK before setting s_ack_state to ACKNOWLEDGE |
172 | * (see above). | ||
158 | */ | 173 | */ |
159 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | 174 | qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); |
160 | bth0 = OP(ACKNOWLEDGE) << 24; | 175 | qp->s_flags &= ~IPATH_S_ACK_PENDING; |
176 | qp->s_cur_sge = NULL; | ||
161 | if (qp->s_nak_state) | 177 | if (qp->s_nak_state) |
162 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | | 178 | ohdr->u.aeth = |
163 | (qp->s_nak_state << | 179 | cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | |
164 | IPATH_AETH_CREDIT_SHIFT)); | 180 | (qp->s_nak_state << |
181 | IPATH_AETH_CREDIT_SHIFT)); | ||
165 | else | 182 | else |
166 | ohdr->u.aeth = ipath_compute_aeth(qp); | 183 | ohdr->u.aeth = ipath_compute_aeth(qp); |
167 | hwords++; | 184 | hwords++; |
185 | len = 0; | ||
186 | bth0 = OP(ACKNOWLEDGE) << 24; | ||
187 | bth2 = qp->s_ack_psn & IPATH_PSN_MASK; | ||
168 | } | 188 | } |
169 | qp->s_hdrwords = hwords; | 189 | qp->s_hdrwords = hwords; |
170 | qp->s_cur_size = len; | 190 | qp->s_cur_size = len; |
191 | *bth0p = bth0; | ||
192 | *bth2p = bth2; | ||
193 | return 1; | ||
171 | 194 | ||
172 | bail: | 195 | bail: |
173 | return bth0; | 196 | return 0; |
174 | } | 197 | } |
175 | 198 | ||
176 | /** | 199 | /** |
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
197 | u32 bth2; | 220 | u32 bth2; |
198 | char newreq; | 221 | char newreq; |
199 | 222 | ||
223 | /* Sending responses has higher priority over sending requests. */ | ||
224 | if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || | ||
225 | (qp->s_flags & IPATH_S_ACK_PENDING) || | ||
226 | qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) && | ||
227 | ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) | ||
228 | goto done; | ||
229 | |||
200 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || | 230 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || |
201 | qp->s_rnr_timeout) | 231 | qp->s_rnr_timeout) |
202 | goto done; | 232 | goto bail; |
203 | 233 | ||
204 | /* Limit the number of packets sent without an ACK. */ | 234 | /* Limit the number of packets sent without an ACK. */ |
205 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { | 235 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { |
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
210 | list_add_tail(&qp->timerwait, | 240 | list_add_tail(&qp->timerwait, |
211 | &dev->pending[dev->pending_index]); | 241 | &dev->pending[dev->pending_index]); |
212 | spin_unlock(&dev->pending_lock); | 242 | spin_unlock(&dev->pending_lock); |
213 | goto done; | 243 | goto bail; |
214 | } | 244 | } |
215 | 245 | ||
216 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | 246 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ |
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
232 | if (qp->s_cur == qp->s_tail) { | 262 | if (qp->s_cur == qp->s_tail) { |
233 | /* Check if send work queue is empty. */ | 263 | /* Check if send work queue is empty. */ |
234 | if (qp->s_tail == qp->s_head) | 264 | if (qp->s_tail == qp->s_head) |
235 | goto done; | 265 | goto bail; |
266 | /* | ||
267 | * If a fence is requested, wait for previous | ||
268 | * RDMA read and atomic operations to finish. | ||
269 | */ | ||
270 | if ((wqe->wr.send_flags & IB_SEND_FENCE) && | ||
271 | qp->s_num_rd_atomic) { | ||
272 | qp->s_flags |= IPATH_S_FENCE_PENDING; | ||
273 | goto bail; | ||
274 | } | ||
236 | wqe->psn = qp->s_next_psn; | 275 | wqe->psn = qp->s_next_psn; |
237 | newreq = 1; | 276 | newreq = 1; |
238 | } | 277 | } |
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
250 | /* If no credit, return. */ | 289 | /* If no credit, return. */ |
251 | if (qp->s_lsn != (u32) -1 && | 290 | if (qp->s_lsn != (u32) -1 && |
252 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) | 291 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) |
253 | goto done; | 292 | goto bail; |
254 | wqe->lpsn = wqe->psn; | 293 | wqe->lpsn = wqe->psn; |
255 | if (len > pmtu) { | 294 | if (len > pmtu) { |
256 | wqe->lpsn += (len - 1) / pmtu; | 295 | wqe->lpsn += (len - 1) / pmtu; |
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
281 | /* If no credit, return. */ | 320 | /* If no credit, return. */ |
282 | if (qp->s_lsn != (u32) -1 && | 321 | if (qp->s_lsn != (u32) -1 && |
283 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) | 322 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) |
284 | goto done; | 323 | goto bail; |
285 | ohdr->u.rc.reth.vaddr = | 324 | ohdr->u.rc.reth.vaddr = |
286 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | 325 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); |
287 | ohdr->u.rc.reth.rkey = | 326 | ohdr->u.rc.reth.rkey = |
288 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 327 | cpu_to_be32(wqe->wr.wr.rdma.rkey); |
289 | ohdr->u.rc.reth.length = cpu_to_be32(len); | 328 | ohdr->u.rc.reth.length = cpu_to_be32(len); |
290 | hwords += sizeof(struct ib_reth) / 4; | 329 | hwords += sizeof(struct ib_reth) / sizeof(u32); |
291 | wqe->lpsn = wqe->psn; | 330 | wqe->lpsn = wqe->psn; |
292 | if (len > pmtu) { | 331 | if (len > pmtu) { |
293 | wqe->lpsn += (len - 1) / pmtu; | 332 | wqe->lpsn += (len - 1) / pmtu; |
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
312 | break; | 351 | break; |
313 | 352 | ||
314 | case IB_WR_RDMA_READ: | 353 | case IB_WR_RDMA_READ: |
315 | ohdr->u.rc.reth.vaddr = | 354 | /* |
316 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | 355 | * Don't allow more operations to be started |
317 | ohdr->u.rc.reth.rkey = | 356 | * than the QP limits allow. |
318 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 357 | */ |
319 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
320 | qp->s_state = OP(RDMA_READ_REQUEST); | ||
321 | hwords += sizeof(ohdr->u.rc.reth) / 4; | ||
322 | if (newreq) { | 358 | if (newreq) { |
359 | if (qp->s_num_rd_atomic >= | ||
360 | qp->s_max_rd_atomic) { | ||
361 | qp->s_flags |= IPATH_S_RDMAR_PENDING; | ||
362 | goto bail; | ||
363 | } | ||
364 | qp->s_num_rd_atomic++; | ||
323 | if (qp->s_lsn != (u32) -1) | 365 | if (qp->s_lsn != (u32) -1) |
324 | qp->s_lsn++; | 366 | qp->s_lsn++; |
325 | /* | 367 | /* |
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
330 | qp->s_next_psn += (len - 1) / pmtu; | 372 | qp->s_next_psn += (len - 1) / pmtu; |
331 | wqe->lpsn = qp->s_next_psn++; | 373 | wqe->lpsn = qp->s_next_psn++; |
332 | } | 374 | } |
375 | ohdr->u.rc.reth.vaddr = | ||
376 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | ||
377 | ohdr->u.rc.reth.rkey = | ||
378 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | ||
379 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
380 | qp->s_state = OP(RDMA_READ_REQUEST); | ||
381 | hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); | ||
333 | ss = NULL; | 382 | ss = NULL; |
334 | len = 0; | 383 | len = 0; |
335 | if (++qp->s_cur == qp->s_size) | 384 | if (++qp->s_cur == qp->s_size) |
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
338 | 387 | ||
339 | case IB_WR_ATOMIC_CMP_AND_SWP: | 388 | case IB_WR_ATOMIC_CMP_AND_SWP: |
340 | case IB_WR_ATOMIC_FETCH_AND_ADD: | 389 | case IB_WR_ATOMIC_FETCH_AND_ADD: |
341 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) | 390 | /* |
342 | qp->s_state = OP(COMPARE_SWAP); | 391 | * Don't allow more operations to be started |
343 | else | 392 | * than the QP limits allow. |
344 | qp->s_state = OP(FETCH_ADD); | 393 | */ |
345 | ohdr->u.atomic_eth.vaddr = cpu_to_be64( | ||
346 | wqe->wr.wr.atomic.remote_addr); | ||
347 | ohdr->u.atomic_eth.rkey = cpu_to_be32( | ||
348 | wqe->wr.wr.atomic.rkey); | ||
349 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
350 | wqe->wr.wr.atomic.swap); | ||
351 | ohdr->u.atomic_eth.compare_data = cpu_to_be64( | ||
352 | wqe->wr.wr.atomic.compare_add); | ||
353 | hwords += sizeof(struct ib_atomic_eth) / 4; | ||
354 | if (newreq) { | 394 | if (newreq) { |
395 | if (qp->s_num_rd_atomic >= | ||
396 | qp->s_max_rd_atomic) { | ||
397 | qp->s_flags |= IPATH_S_RDMAR_PENDING; | ||
398 | goto bail; | ||
399 | } | ||
400 | qp->s_num_rd_atomic++; | ||
355 | if (qp->s_lsn != (u32) -1) | 401 | if (qp->s_lsn != (u32) -1) |
356 | qp->s_lsn++; | 402 | qp->s_lsn++; |
357 | wqe->lpsn = wqe->psn; | 403 | wqe->lpsn = wqe->psn; |
358 | } | 404 | } |
359 | if (++qp->s_cur == qp->s_size) | 405 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { |
360 | qp->s_cur = 0; | 406 | qp->s_state = OP(COMPARE_SWAP); |
407 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
408 | wqe->wr.wr.atomic.swap); | ||
409 | ohdr->u.atomic_eth.compare_data = cpu_to_be64( | ||
410 | wqe->wr.wr.atomic.compare_add); | ||
411 | } else { | ||
412 | qp->s_state = OP(FETCH_ADD); | ||
413 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
414 | wqe->wr.wr.atomic.compare_add); | ||
415 | ohdr->u.atomic_eth.compare_data = 0; | ||
416 | } | ||
417 | ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( | ||
418 | wqe->wr.wr.atomic.remote_addr >> 32); | ||
419 | ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( | ||
420 | wqe->wr.wr.atomic.remote_addr); | ||
421 | ohdr->u.atomic_eth.rkey = cpu_to_be32( | ||
422 | wqe->wr.wr.atomic.rkey); | ||
423 | hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); | ||
361 | ss = NULL; | 424 | ss = NULL; |
362 | len = 0; | 425 | len = 0; |
426 | if (++qp->s_cur == qp->s_size) | ||
427 | qp->s_cur = 0; | ||
363 | break; | 428 | break; |
364 | 429 | ||
365 | default: | 430 | default: |
366 | goto done; | 431 | goto bail; |
367 | } | 432 | } |
368 | qp->s_sge.sge = wqe->sg_list[0]; | 433 | qp->s_sge.sge = wqe->sg_list[0]; |
369 | qp->s_sge.sg_list = wqe->sg_list + 1; | 434 | qp->s_sge.sg_list = wqe->sg_list + 1; |
@@ -379,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
379 | qp->s_psn = wqe->lpsn + 1; | 444 | qp->s_psn = wqe->lpsn + 1; |
380 | else { | 445 | else { |
381 | qp->s_psn++; | 446 | qp->s_psn++; |
382 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | 447 | if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) |
383 | qp->s_next_psn = qp->s_psn; | 448 | qp->s_next_psn = qp->s_psn; |
384 | } | 449 | } |
385 | /* | 450 | /* |
@@ -406,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
406 | /* FALLTHROUGH */ | 471 | /* FALLTHROUGH */ |
407 | case OP(SEND_MIDDLE): | 472 | case OP(SEND_MIDDLE): |
408 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; | 473 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; |
409 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | 474 | if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) |
410 | qp->s_next_psn = qp->s_psn; | 475 | qp->s_next_psn = qp->s_psn; |
411 | ss = &qp->s_sge; | 476 | ss = &qp->s_sge; |
412 | len = qp->s_len; | 477 | len = qp->s_len; |
@@ -442,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
442 | /* FALLTHROUGH */ | 507 | /* FALLTHROUGH */ |
443 | case OP(RDMA_WRITE_MIDDLE): | 508 | case OP(RDMA_WRITE_MIDDLE): |
444 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; | 509 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; |
445 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | 510 | if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) |
446 | qp->s_next_psn = qp->s_psn; | 511 | qp->s_next_psn = qp->s_psn; |
447 | ss = &qp->s_sge; | 512 | ss = &qp->s_sge; |
448 | len = qp->s_len; | 513 | len = qp->s_len; |
@@ -479,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
479 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | 544 | cpu_to_be32(wqe->wr.wr.rdma.rkey); |
480 | ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); | 545 | ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); |
481 | qp->s_state = OP(RDMA_READ_REQUEST); | 546 | qp->s_state = OP(RDMA_READ_REQUEST); |
482 | hwords += sizeof(ohdr->u.rc.reth) / 4; | 547 | hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); |
483 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; | 548 | bth2 = qp->s_psn++ & IPATH_PSN_MASK; |
484 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | 549 | if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) |
485 | qp->s_next_psn = qp->s_psn; | 550 | qp->s_next_psn = qp->s_psn; |
486 | ss = NULL; | 551 | ss = NULL; |
487 | len = 0; | 552 | len = 0; |
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
489 | if (qp->s_cur == qp->s_size) | 554 | if (qp->s_cur == qp->s_size) |
490 | qp->s_cur = 0; | 555 | qp->s_cur = 0; |
491 | break; | 556 | break; |
492 | |||
493 | case OP(RDMA_READ_REQUEST): | ||
494 | case OP(COMPARE_SWAP): | ||
495 | case OP(FETCH_ADD): | ||
496 | /* | ||
497 | * We shouldn't start anything new until this request is | ||
498 | * finished. The ACK will handle rescheduling us. XXX The | ||
499 | * number of outstanding ones is negotiated at connection | ||
500 | * setup time (see pg. 258,289)? XXX Also, if we support | ||
501 | * multiple outstanding requests, we need to check the WQE | ||
502 | * IB_SEND_FENCE flag and not send a new request if a RDMA | ||
503 | * read or atomic is pending. | ||
504 | */ | ||
505 | goto done; | ||
506 | } | 557 | } |
507 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) | 558 | if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) |
508 | bth2 |= 1 << 31; /* Request ACK. */ | 559 | bth2 |= 1 << 31; /* Request ACK. */ |
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp, | |||
512 | qp->s_cur_size = len; | 563 | qp->s_cur_size = len; |
513 | *bth0p = bth0 | (qp->s_state << 24); | 564 | *bth0p = bth0 | (qp->s_state << 24); |
514 | *bth2p = bth2; | 565 | *bth2p = bth2; |
566 | done: | ||
515 | return 1; | 567 | return 1; |
516 | 568 | ||
517 | done: | 569 | bail: |
518 | return 0; | 570 | return 0; |
519 | } | 571 | } |
520 | 572 | ||
@@ -524,7 +576,8 @@ done: | |||
524 | * | 576 | * |
525 | * This is called from ipath_rc_rcv() and only uses the receive | 577 | * This is called from ipath_rc_rcv() and only uses the receive |
526 | * side QP state. | 578 | * side QP state. |
527 | * Note that RDMA reads are handled in the send side QP state and tasklet. | 579 | * Note that RDMA reads and atomics are handled in the |
580 | * send side QP state and tasklet. | ||
528 | */ | 581 | */ |
529 | static void send_rc_ack(struct ipath_qp *qp) | 582 | static void send_rc_ack(struct ipath_qp *qp) |
530 | { | 583 | { |
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
535 | struct ipath_ib_header hdr; | 588 | struct ipath_ib_header hdr; |
536 | struct ipath_other_headers *ohdr; | 589 | struct ipath_other_headers *ohdr; |
537 | 590 | ||
591 | /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ | ||
592 | if (qp->r_head_ack_queue != qp->s_tail_ack_queue) | ||
593 | goto queue_ack; | ||
594 | |||
538 | /* Construct the header. */ | 595 | /* Construct the header. */ |
539 | ohdr = &hdr.u.oth; | 596 | ohdr = &hdr.u.oth; |
540 | lrh0 = IPATH_LRH_BTH; | 597 | lrh0 = IPATH_LRH_BTH; |
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
548 | lrh0 = IPATH_LRH_GRH; | 605 | lrh0 = IPATH_LRH_GRH; |
549 | } | 606 | } |
550 | /* read pkey_index w/o lock (its atomic) */ | 607 | /* read pkey_index w/o lock (its atomic) */ |
551 | bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); | 608 | bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | |
609 | OP(ACKNOWLEDGE) << 24; | ||
552 | if (qp->r_nak_state) | 610 | if (qp->r_nak_state) |
553 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | | 611 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | |
554 | (qp->r_nak_state << | 612 | (qp->r_nak_state << |
555 | IPATH_AETH_CREDIT_SHIFT)); | 613 | IPATH_AETH_CREDIT_SHIFT)); |
556 | else | 614 | else |
557 | ohdr->u.aeth = ipath_compute_aeth(qp); | 615 | ohdr->u.aeth = ipath_compute_aeth(qp); |
558 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) { | ||
559 | bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24; | ||
560 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); | ||
561 | hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; | ||
562 | } else | ||
563 | bth0 |= OP(ACKNOWLEDGE) << 24; | ||
564 | lrh0 |= qp->remote_ah_attr.sl << 4; | 616 | lrh0 |= qp->remote_ah_attr.sl << 4; |
565 | hdr.lrh[0] = cpu_to_be16(lrh0); | 617 | hdr.lrh[0] = cpu_to_be16(lrh0); |
566 | hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); | 618 | hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); |
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp) | |||
574 | * If we can send the ACK, clear the ACK state. | 626 | * If we can send the ACK, clear the ACK state. |
575 | */ | 627 | */ |
576 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { | 628 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { |
577 | qp->r_ack_state = OP(ACKNOWLEDGE); | ||
578 | dev->n_unicast_xmit++; | 629 | dev->n_unicast_xmit++; |
579 | } else { | 630 | goto done; |
580 | /* | ||
581 | * We are out of PIO buffers at the moment. | ||
582 | * Pass responsibility for sending the ACK to the | ||
583 | * send tasklet so that when a PIO buffer becomes | ||
584 | * available, the ACK is sent ahead of other outgoing | ||
585 | * packets. | ||
586 | */ | ||
587 | dev->n_rc_qacks++; | ||
588 | spin_lock_irq(&qp->s_lock); | ||
589 | /* Don't coalesce if a RDMA read or atomic is pending. */ | ||
590 | if (qp->s_ack_state == OP(ACKNOWLEDGE) || | ||
591 | qp->s_ack_state < OP(RDMA_READ_REQUEST)) { | ||
592 | qp->s_ack_state = qp->r_ack_state; | ||
593 | qp->s_nak_state = qp->r_nak_state; | ||
594 | qp->s_ack_psn = qp->r_ack_psn; | ||
595 | qp->r_ack_state = OP(ACKNOWLEDGE); | ||
596 | } | ||
597 | spin_unlock_irq(&qp->s_lock); | ||
598 | |||
599 | /* Call ipath_do_rc_send() in another thread. */ | ||
600 | tasklet_hi_schedule(&qp->s_task); | ||
601 | } | 631 | } |
632 | |||
633 | /* | ||
634 | * We are out of PIO buffers at the moment. | ||
635 | * Pass responsibility for sending the ACK to the | ||
636 | * send tasklet so that when a PIO buffer becomes | ||
637 | * available, the ACK is sent ahead of other outgoing | ||
638 | * packets. | ||
639 | */ | ||
640 | dev->n_rc_qacks++; | ||
641 | |||
642 | queue_ack: | ||
643 | spin_lock_irq(&qp->s_lock); | ||
644 | qp->s_flags |= IPATH_S_ACK_PENDING; | ||
645 | qp->s_nak_state = qp->r_nak_state; | ||
646 | qp->s_ack_psn = qp->r_ack_psn; | ||
647 | spin_unlock_irq(&qp->s_lock); | ||
648 | |||
649 | /* Call ipath_do_rc_send() in another thread. */ | ||
650 | tasklet_hi_schedule(&qp->s_task); | ||
651 | |||
652 | done: | ||
653 | return; | ||
602 | } | 654 | } |
603 | 655 | ||
604 | /** | 656 | /** |
@@ -727,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) | |||
727 | if (wqe->wr.opcode == IB_WR_RDMA_READ) | 779 | if (wqe->wr.opcode == IB_WR_RDMA_READ) |
728 | dev->n_rc_resends++; | 780 | dev->n_rc_resends++; |
729 | else | 781 | else |
730 | dev->n_rc_resends += (int)qp->s_psn - (int)psn; | 782 | dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; |
731 | 783 | ||
732 | reset_psn(qp, psn); | 784 | reset_psn(qp, psn); |
733 | tasklet_hi_schedule(&qp->s_task); | 785 | tasklet_hi_schedule(&qp->s_task); |
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
775 | list_del_init(&qp->timerwait); | 827 | list_del_init(&qp->timerwait); |
776 | spin_unlock(&dev->pending_lock); | 828 | spin_unlock(&dev->pending_lock); |
777 | 829 | ||
778 | /* Nothing is pending to ACK/NAK. */ | ||
779 | if (unlikely(qp->s_last == qp->s_tail)) | ||
780 | goto bail; | ||
781 | |||
782 | /* | 830 | /* |
783 | * Note that NAKs implicitly ACK outstanding SEND and RDMA write | 831 | * Note that NAKs implicitly ACK outstanding SEND and RDMA write |
784 | * requests and implicitly NAK RDMA read and atomic requests issued | 832 | * requests and implicitly NAK RDMA read and atomic requests issued |
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
806 | */ | 854 | */ |
807 | if ((wqe->wr.opcode == IB_WR_RDMA_READ && | 855 | if ((wqe->wr.opcode == IB_WR_RDMA_READ && |
808 | (opcode != OP(RDMA_READ_RESPONSE_LAST) || | 856 | (opcode != OP(RDMA_READ_RESPONSE_LAST) || |
809 | ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || | 857 | ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || |
810 | ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | 858 | ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || |
811 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && | 859 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && |
812 | (opcode != OP(ATOMIC_ACKNOWLEDGE) || | 860 | (opcode != OP(ATOMIC_ACKNOWLEDGE) || |
@@ -824,20 +872,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
824 | */ | 872 | */ |
825 | goto bail; | 873 | goto bail; |
826 | } | 874 | } |
827 | if (wqe->wr.opcode == IB_WR_RDMA_READ || | 875 | if (qp->s_num_rd_atomic && |
828 | wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | 876 | (wqe->wr.opcode == IB_WR_RDMA_READ || |
829 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) | 877 | wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || |
830 | tasklet_hi_schedule(&qp->s_task); | 878 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { |
879 | qp->s_num_rd_atomic--; | ||
880 | /* Restart sending task if fence is complete */ | ||
881 | if ((qp->s_flags & IPATH_S_FENCE_PENDING) && | ||
882 | !qp->s_num_rd_atomic) { | ||
883 | qp->s_flags &= ~IPATH_S_FENCE_PENDING; | ||
884 | tasklet_hi_schedule(&qp->s_task); | ||
885 | } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { | ||
886 | qp->s_flags &= ~IPATH_S_RDMAR_PENDING; | ||
887 | tasklet_hi_schedule(&qp->s_task); | ||
888 | } | ||
889 | } | ||
831 | /* Post a send completion queue entry if requested. */ | 890 | /* Post a send completion queue entry if requested. */ |
832 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || | 891 | if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || |
833 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | 892 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { |
834 | wc.wr_id = wqe->wr.wr_id; | 893 | wc.wr_id = wqe->wr.wr_id; |
835 | wc.status = IB_WC_SUCCESS; | 894 | wc.status = IB_WC_SUCCESS; |
836 | wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | 895 | wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; |
837 | wc.vendor_err = 0; | 896 | wc.vendor_err = 0; |
838 | wc.byte_len = wqe->length; | 897 | wc.byte_len = wqe->length; |
898 | wc.imm_data = 0; | ||
839 | wc.qp = &qp->ibqp; | 899 | wc.qp = &qp->ibqp; |
840 | wc.src_qp = qp->remote_qpn; | 900 | wc.src_qp = qp->remote_qpn; |
901 | wc.wc_flags = 0; | ||
841 | wc.pkey_index = 0; | 902 | wc.pkey_index = 0; |
842 | wc.slid = qp->remote_ah_attr.dlid; | 903 | wc.slid = qp->remote_ah_attr.dlid; |
843 | wc.sl = qp->remote_ah_attr.sl; | 904 | wc.sl = qp->remote_ah_attr.sl; |
@@ -854,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
854 | if (qp->s_last == qp->s_cur) { | 915 | if (qp->s_last == qp->s_cur) { |
855 | if (++qp->s_cur >= qp->s_size) | 916 | if (++qp->s_cur >= qp->s_size) |
856 | qp->s_cur = 0; | 917 | qp->s_cur = 0; |
918 | qp->s_last = qp->s_cur; | ||
919 | if (qp->s_last == qp->s_tail) | ||
920 | break; | ||
857 | wqe = get_swqe_ptr(qp, qp->s_cur); | 921 | wqe = get_swqe_ptr(qp, qp->s_cur); |
858 | qp->s_state = OP(SEND_LAST); | 922 | qp->s_state = OP(SEND_LAST); |
859 | qp->s_psn = wqe->psn; | 923 | qp->s_psn = wqe->psn; |
924 | } else { | ||
925 | if (++qp->s_last >= qp->s_size) | ||
926 | qp->s_last = 0; | ||
927 | if (qp->s_last == qp->s_tail) | ||
928 | break; | ||
929 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
860 | } | 930 | } |
861 | if (++qp->s_last >= qp->s_size) | ||
862 | qp->s_last = 0; | ||
863 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
864 | if (qp->s_last == qp->s_tail) | ||
865 | break; | ||
866 | } | 931 | } |
867 | 932 | ||
868 | switch (aeth >> 29) { | 933 | switch (aeth >> 29) { |
@@ -874,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
874 | list_add_tail(&qp->timerwait, | 939 | list_add_tail(&qp->timerwait, |
875 | &dev->pending[dev->pending_index]); | 940 | &dev->pending[dev->pending_index]); |
876 | spin_unlock(&dev->pending_lock); | 941 | spin_unlock(&dev->pending_lock); |
942 | /* | ||
943 | * If we get a partial ACK for a resent operation, | ||
944 | * we can stop resending the earlier packets and | ||
945 | * continue with the next packet the receiver wants. | ||
946 | */ | ||
947 | if (ipath_cmp24(qp->s_psn, psn) <= 0) { | ||
948 | reset_psn(qp, psn + 1); | ||
949 | tasklet_hi_schedule(&qp->s_task); | ||
950 | } | ||
951 | } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { | ||
952 | qp->s_state = OP(SEND_LAST); | ||
953 | qp->s_psn = psn + 1; | ||
877 | } | 954 | } |
878 | ipath_get_credit(qp, aeth); | 955 | ipath_get_credit(qp, aeth); |
879 | qp->s_rnr_retry = qp->s_rnr_retry_cnt; | 956 | qp->s_rnr_retry = qp->s_rnr_retry_cnt; |
@@ -884,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
884 | 961 | ||
885 | case 1: /* RNR NAK */ | 962 | case 1: /* RNR NAK */ |
886 | dev->n_rnr_naks++; | 963 | dev->n_rnr_naks++; |
964 | if (qp->s_last == qp->s_tail) | ||
965 | goto bail; | ||
887 | if (qp->s_rnr_retry == 0) { | 966 | if (qp->s_rnr_retry == 0) { |
888 | if (qp->s_last == qp->s_tail) | ||
889 | goto bail; | ||
890 | |||
891 | wc.status = IB_WC_RNR_RETRY_EXC_ERR; | 967 | wc.status = IB_WC_RNR_RETRY_EXC_ERR; |
892 | goto class_b; | 968 | goto class_b; |
893 | } | 969 | } |
894 | if (qp->s_rnr_retry_cnt < 7) | 970 | if (qp->s_rnr_retry_cnt < 7) |
895 | qp->s_rnr_retry--; | 971 | qp->s_rnr_retry--; |
896 | if (qp->s_last == qp->s_tail) | ||
897 | goto bail; | ||
898 | 972 | ||
899 | /* The last valid PSN is the previous PSN. */ | 973 | /* The last valid PSN is the previous PSN. */ |
900 | update_last_psn(qp, psn - 1); | 974 | update_last_psn(qp, psn - 1); |
901 | 975 | ||
902 | dev->n_rc_resends += (int)qp->s_psn - (int)psn; | 976 | if (wqe->wr.opcode == IB_WR_RDMA_READ) |
977 | dev->n_rc_resends++; | ||
978 | else | ||
979 | dev->n_rc_resends += | ||
980 | (qp->s_psn - psn) & IPATH_PSN_MASK; | ||
903 | 981 | ||
904 | reset_psn(qp, psn); | 982 | reset_psn(qp, psn); |
905 | 983 | ||
@@ -910,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | |||
910 | goto bail; | 988 | goto bail; |
911 | 989 | ||
912 | case 3: /* NAK */ | 990 | case 3: /* NAK */ |
913 | /* The last valid PSN seen is the previous request's. */ | 991 | if (qp->s_last == qp->s_tail) |
914 | if (qp->s_last != qp->s_tail) | 992 | goto bail; |
915 | update_last_psn(qp, wqe->psn - 1); | 993 | /* The last valid PSN is the previous PSN. */ |
994 | update_last_psn(qp, psn - 1); | ||
916 | switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & | 995 | switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & |
917 | IPATH_AETH_CREDIT_MASK) { | 996 | IPATH_AETH_CREDIT_MASK) { |
918 | case 0: /* PSN sequence error */ | 997 | case 0: /* PSN sequence error */ |
919 | dev->n_seq_naks++; | 998 | dev->n_seq_naks++; |
920 | /* | 999 | /* |
921 | * Back up to the responder's expected PSN. XXX | 1000 | * Back up to the responder's expected PSN. |
922 | * Note that we might get a NAK in the middle of an | 1001 | * Note that we might get a NAK in the middle of an |
923 | * RDMA READ response which terminates the RDMA | 1002 | * RDMA READ response which terminates the RDMA |
924 | * READ. | 1003 | * READ. |
925 | */ | 1004 | */ |
926 | if (qp->s_last == qp->s_tail) | ||
927 | break; | ||
928 | |||
929 | if (ipath_cmp24(psn, wqe->psn) < 0) | ||
930 | break; | ||
931 | |||
932 | /* Retry the request. */ | ||
933 | ipath_restart_rc(qp, psn, &wc); | 1005 | ipath_restart_rc(qp, psn, &wc); |
934 | break; | 1006 | break; |
935 | 1007 | ||
@@ -1003,6 +1075,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1003 | u32 psn, u32 hdrsize, u32 pmtu, | 1075 | u32 psn, u32 hdrsize, u32 pmtu, |
1004 | int header_in_data) | 1076 | int header_in_data) |
1005 | { | 1077 | { |
1078 | struct ipath_swqe *wqe; | ||
1006 | unsigned long flags; | 1079 | unsigned long flags; |
1007 | struct ib_wc wc; | 1080 | struct ib_wc wc; |
1008 | int diff; | 1081 | int diff; |
@@ -1032,6 +1105,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1032 | goto ack_done; | 1105 | goto ack_done; |
1033 | } | 1106 | } |
1034 | 1107 | ||
1108 | if (unlikely(qp->s_last == qp->s_tail)) | ||
1109 | goto ack_done; | ||
1110 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
1111 | |||
1035 | switch (opcode) { | 1112 | switch (opcode) { |
1036 | case OP(ACKNOWLEDGE): | 1113 | case OP(ACKNOWLEDGE): |
1037 | case OP(ATOMIC_ACKNOWLEDGE): | 1114 | case OP(ATOMIC_ACKNOWLEDGE): |
@@ -1042,38 +1119,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1042 | aeth = be32_to_cpu(((__be32 *) data)[0]); | 1119 | aeth = be32_to_cpu(((__be32 *) data)[0]); |
1043 | data += sizeof(__be32); | 1120 | data += sizeof(__be32); |
1044 | } | 1121 | } |
1045 | if (opcode == OP(ATOMIC_ACKNOWLEDGE)) | 1122 | if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { |
1046 | *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; | 1123 | u64 val; |
1124 | |||
1125 | if (!header_in_data) { | ||
1126 | __be32 *p = ohdr->u.at.atomic_ack_eth; | ||
1127 | |||
1128 | val = ((u64) be32_to_cpu(p[0]) << 32) | | ||
1129 | be32_to_cpu(p[1]); | ||
1130 | } else | ||
1131 | val = be64_to_cpu(((__be64 *) data)[0]); | ||
1132 | *(u64 *) wqe->sg_list[0].vaddr = val; | ||
1133 | } | ||
1047 | if (!do_rc_ack(qp, aeth, psn, opcode) || | 1134 | if (!do_rc_ack(qp, aeth, psn, opcode) || |
1048 | opcode != OP(RDMA_READ_RESPONSE_FIRST)) | 1135 | opcode != OP(RDMA_READ_RESPONSE_FIRST)) |
1049 | goto ack_done; | 1136 | goto ack_done; |
1050 | hdrsize += 4; | 1137 | hdrsize += 4; |
1138 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) | ||
1139 | goto ack_op_err; | ||
1051 | /* | 1140 | /* |
1052 | * do_rc_ack() has already checked the PSN so skip | 1141 | * If this is a response to a resent RDMA read, we |
1053 | * the sequence check. | 1142 | * have to be careful to copy the data to the right |
1143 | * location. | ||
1054 | */ | 1144 | */ |
1055 | goto rdma_read; | 1145 | qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, |
1146 | wqe, psn, pmtu); | ||
1147 | goto read_middle; | ||
1056 | 1148 | ||
1057 | case OP(RDMA_READ_RESPONSE_MIDDLE): | 1149 | case OP(RDMA_READ_RESPONSE_MIDDLE): |
1058 | /* no AETH, no ACK */ | 1150 | /* no AETH, no ACK */ |
1059 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | 1151 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { |
1060 | dev->n_rdma_seq++; | 1152 | dev->n_rdma_seq++; |
1061 | if (qp->s_last != qp->s_tail) | 1153 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); |
1062 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1063 | goto ack_done; | 1154 | goto ack_done; |
1064 | } | 1155 | } |
1065 | rdma_read: | 1156 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) |
1066 | if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) | 1157 | goto ack_op_err; |
1067 | goto ack_done; | 1158 | read_middle: |
1068 | if (unlikely(tlen != (hdrsize + pmtu + 4))) | 1159 | if (unlikely(tlen != (hdrsize + pmtu + 4))) |
1069 | goto ack_done; | 1160 | goto ack_len_err; |
1070 | if (unlikely(pmtu >= qp->s_len)) | 1161 | if (unlikely(pmtu >= qp->s_rdma_read_len)) |
1071 | goto ack_done; | 1162 | goto ack_len_err; |
1163 | |||
1072 | /* We got a response so update the timeout. */ | 1164 | /* We got a response so update the timeout. */ |
1073 | if (unlikely(qp->s_last == qp->s_tail || | ||
1074 | get_swqe_ptr(qp, qp->s_last)->wr.opcode != | ||
1075 | IB_WR_RDMA_READ)) | ||
1076 | goto ack_done; | ||
1077 | spin_lock(&dev->pending_lock); | 1165 | spin_lock(&dev->pending_lock); |
1078 | if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) | 1166 | if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) |
1079 | list_move_tail(&qp->timerwait, | 1167 | list_move_tail(&qp->timerwait, |
@@ -1082,67 +1170,97 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | |||
1082 | /* | 1170 | /* |
1083 | * Update the RDMA receive state but do the copy w/o | 1171 | * Update the RDMA receive state but do the copy w/o |
1084 | * holding the locks and blocking interrupts. | 1172 | * holding the locks and blocking interrupts. |
1085 | * XXX Yet another place that affects relaxed RDMA order | ||
1086 | * since we don't want s_sge modified. | ||
1087 | */ | 1173 | */ |
1088 | qp->s_len -= pmtu; | 1174 | qp->s_rdma_read_len -= pmtu; |
1089 | update_last_psn(qp, psn); | 1175 | update_last_psn(qp, psn); |
1090 | spin_unlock_irqrestore(&qp->s_lock, flags); | 1176 | spin_unlock_irqrestore(&qp->s_lock, flags); |
1091 | ipath_copy_sge(&qp->s_sge, data, pmtu); | 1177 | ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); |
1092 | goto bail; | 1178 | goto bail; |
1093 | 1179 | ||
1094 | case OP(RDMA_READ_RESPONSE_LAST): | 1180 | case OP(RDMA_READ_RESPONSE_ONLY): |
1095 | /* ACKs READ req. */ | ||
1096 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | 1181 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { |
1097 | dev->n_rdma_seq++; | 1182 | dev->n_rdma_seq++; |
1098 | if (qp->s_last != qp->s_tail) | 1183 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); |
1099 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1100 | goto ack_done; | 1184 | goto ack_done; |
1101 | } | 1185 | } |
1102 | /* FALLTHROUGH */ | 1186 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) |
1103 | case OP(RDMA_READ_RESPONSE_ONLY): | 1187 | goto ack_op_err; |
1104 | if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) | 1188 | /* Get the number of bytes the message was padded by. */ |
1105 | goto ack_done; | 1189 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; |
1190 | /* | ||
1191 | * Check that the data size is >= 0 && <= pmtu. | ||
1192 | * Remember to account for the AETH header (4) and | ||
1193 | * ICRC (4). | ||
1194 | */ | ||
1195 | if (unlikely(tlen < (hdrsize + pad + 8))) | ||
1196 | goto ack_len_err; | ||
1106 | /* | 1197 | /* |
1107 | * Get the number of bytes the message was padded by. | 1198 | * If this is a response to a resent RDMA read, we |
1199 | * have to be careful to copy the data to the right | ||
1200 | * location. | ||
1108 | */ | 1201 | */ |
1202 | qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, | ||
1203 | wqe, psn, pmtu); | ||
1204 | goto read_last; | ||
1205 | |||
1206 | case OP(RDMA_READ_RESPONSE_LAST): | ||
1207 | /* ACKs READ req. */ | ||
1208 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | ||
1209 | dev->n_rdma_seq++; | ||
1210 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1211 | goto ack_done; | ||
1212 | } | ||
1213 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) | ||
1214 | goto ack_op_err; | ||
1215 | /* Get the number of bytes the message was padded by. */ | ||
1109 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; | 1216 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; |
1110 | /* | 1217 | /* |
1111 | * Check that the data size is >= 1 && <= pmtu. | 1218 | * Check that the data size is >= 1 && <= pmtu. |
1112 | * Remember to account for the AETH header (4) and | 1219 | * Remember to account for the AETH header (4) and |
1113 | * ICRC (4). | 1220 | * ICRC (4). |
1114 | */ | 1221 | */ |
1115 | if (unlikely(tlen <= (hdrsize + pad + 8))) { | 1222 | if (unlikely(tlen <= (hdrsize + pad + 8))) |
1116 | /* XXX Need to generate an error CQ entry. */ | 1223 | goto ack_len_err; |
1117 | goto ack_done; | 1224 | read_last: |
1118 | } | ||
1119 | tlen -= hdrsize + pad + 8; | 1225 | tlen -= hdrsize + pad + 8; |
1120 | if (unlikely(tlen != qp->s_len)) { | 1226 | if (unlikely(tlen != qp->s_rdma_read_len)) |
1121 | /* XXX Need to generate an error CQ entry. */ | 1227 | goto ack_len_err; |
1122 | goto ack_done; | ||
1123 | } | ||
1124 | if (!header_in_data) | 1228 | if (!header_in_data) |
1125 | aeth = be32_to_cpu(ohdr->u.aeth); | 1229 | aeth = be32_to_cpu(ohdr->u.aeth); |
1126 | else { | 1230 | else { |
1127 | aeth = be32_to_cpu(((__be32 *) data)[0]); | 1231 | aeth = be32_to_cpu(((__be32 *) data)[0]); |
1128 | data += sizeof(__be32); | 1232 | data += sizeof(__be32); |
1129 | } | 1233 | } |
1130 | ipath_copy_sge(&qp->s_sge, data, tlen); | 1234 | ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); |
1131 | if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { | 1235 | (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); |
1132 | /* | ||
1133 | * Change the state so we contimue | ||
1134 | * processing new requests and wake up the | ||
1135 | * tasklet if there are posted sends. | ||
1136 | */ | ||
1137 | qp->s_state = OP(SEND_LAST); | ||
1138 | if (qp->s_tail != qp->s_head) | ||
1139 | tasklet_hi_schedule(&qp->s_task); | ||
1140 | } | ||
1141 | goto ack_done; | 1236 | goto ack_done; |
1142 | } | 1237 | } |
1143 | 1238 | ||
1144 | ack_done: | 1239 | ack_done: |
1145 | spin_unlock_irqrestore(&qp->s_lock, flags); | 1240 | spin_unlock_irqrestore(&qp->s_lock, flags); |
1241 | goto bail; | ||
1242 | |||
1243 | ack_op_err: | ||
1244 | wc.status = IB_WC_LOC_QP_OP_ERR; | ||
1245 | goto ack_err; | ||
1246 | |||
1247 | ack_len_err: | ||
1248 | wc.status = IB_WC_LOC_LEN_ERR; | ||
1249 | ack_err: | ||
1250 | wc.wr_id = wqe->wr.wr_id; | ||
1251 | wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
1252 | wc.vendor_err = 0; | ||
1253 | wc.byte_len = 0; | ||
1254 | wc.imm_data = 0; | ||
1255 | wc.qp = &qp->ibqp; | ||
1256 | wc.src_qp = qp->remote_qpn; | ||
1257 | wc.wc_flags = 0; | ||
1258 | wc.pkey_index = 0; | ||
1259 | wc.slid = qp->remote_ah_attr.dlid; | ||
1260 | wc.sl = qp->remote_ah_attr.sl; | ||
1261 | wc.dlid_path_bits = 0; | ||
1262 | wc.port_num = 0; | ||
1263 | ipath_sqerror_qp(qp, &wc); | ||
1146 | bail: | 1264 | bail: |
1147 | return; | 1265 | return; |
1148 | } | 1266 | } |
@@ -1162,7 +1280,7 @@ bail: | |||
1162 | * incoming RC packet for the given QP. | 1280 | * incoming RC packet for the given QP. |
1163 | * Called at interrupt level. | 1281 | * Called at interrupt level. |
1164 | * Return 1 if no more processing is needed; otherwise return 0 to | 1282 | * Return 1 if no more processing is needed; otherwise return 0 to |
1165 | * schedule a response to be sent and the s_lock unlocked. | 1283 | * schedule a response to be sent. |
1166 | */ | 1284 | */ |
1167 | static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | 1285 | static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, |
1168 | struct ipath_other_headers *ohdr, | 1286 | struct ipath_other_headers *ohdr, |
@@ -1173,25 +1291,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1173 | int diff, | 1291 | int diff, |
1174 | int header_in_data) | 1292 | int header_in_data) |
1175 | { | 1293 | { |
1176 | struct ib_reth *reth; | 1294 | struct ipath_ack_entry *e; |
1295 | u8 i, prev; | ||
1296 | int old_req; | ||
1177 | 1297 | ||
1178 | if (diff > 0) { | 1298 | if (diff > 0) { |
1179 | /* | 1299 | /* |
1180 | * Packet sequence error. | 1300 | * Packet sequence error. |
1181 | * A NAK will ACK earlier sends and RDMA writes. | 1301 | * A NAK will ACK earlier sends and RDMA writes. |
1182 | * Don't queue the NAK if a RDMA read, atomic, or | 1302 | * Don't queue the NAK if we already sent one. |
1183 | * NAK is pending though. | ||
1184 | */ | 1303 | */ |
1185 | if (qp->s_ack_state != OP(ACKNOWLEDGE) || | 1304 | if (!qp->r_nak_state) { |
1186 | qp->r_nak_state != 0) | ||
1187 | goto done; | ||
1188 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1189 | qp->r_ack_state = OP(SEND_ONLY); | ||
1190 | qp->r_nak_state = IB_NAK_PSN_ERROR; | 1305 | qp->r_nak_state = IB_NAK_PSN_ERROR; |
1191 | /* Use the expected PSN. */ | 1306 | /* Use the expected PSN. */ |
1192 | qp->r_ack_psn = qp->r_psn; | 1307 | qp->r_ack_psn = qp->r_psn; |
1308 | goto send_ack; | ||
1193 | } | 1309 | } |
1194 | goto send_ack; | 1310 | goto done; |
1195 | } | 1311 | } |
1196 | 1312 | ||
1197 | /* | 1313 | /* |
@@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1204 | * can coalesce an outstanding duplicate ACK. We have to | 1320 | * can coalesce an outstanding duplicate ACK. We have to |
1205 | * send the earliest so that RDMA reads can be restarted at | 1321 | * send the earliest so that RDMA reads can be restarted at |
1206 | * the requester's expected PSN. | 1322 | * the requester's expected PSN. |
1323 | * | ||
1324 | * First, find where this duplicate PSN falls within the | ||
1325 | * ACKs previously sent. | ||
1207 | */ | 1326 | */ |
1208 | if (opcode == OP(RDMA_READ_REQUEST)) { | 1327 | psn &= IPATH_PSN_MASK; |
1328 | e = NULL; | ||
1329 | old_req = 1; | ||
1330 | spin_lock_irq(&qp->s_lock); | ||
1331 | for (i = qp->r_head_ack_queue; ; i = prev) { | ||
1332 | if (i == qp->s_tail_ack_queue) | ||
1333 | old_req = 0; | ||
1334 | if (i) | ||
1335 | prev = i - 1; | ||
1336 | else | ||
1337 | prev = IPATH_MAX_RDMA_ATOMIC; | ||
1338 | if (prev == qp->r_head_ack_queue) { | ||
1339 | e = NULL; | ||
1340 | break; | ||
1341 | } | ||
1342 | e = &qp->s_ack_queue[prev]; | ||
1343 | if (!e->opcode) { | ||
1344 | e = NULL; | ||
1345 | break; | ||
1346 | } | ||
1347 | if (ipath_cmp24(psn, e->psn) >= 0) | ||
1348 | break; | ||
1349 | } | ||
1350 | switch (opcode) { | ||
1351 | case OP(RDMA_READ_REQUEST): { | ||
1352 | struct ib_reth *reth; | ||
1353 | u32 offset; | ||
1354 | u32 len; | ||
1355 | |||
1356 | /* | ||
1357 | * If we didn't find the RDMA read request in the ack queue, | ||
1358 | * or the send tasklet is already backed up to send an | ||
1359 | * earlier entry, we can ignore this request. | ||
1360 | */ | ||
1361 | if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) | ||
1362 | goto unlock_done; | ||
1209 | /* RETH comes after BTH */ | 1363 | /* RETH comes after BTH */ |
1210 | if (!header_in_data) | 1364 | if (!header_in_data) |
1211 | reth = &ohdr->u.rc.reth; | 1365 | reth = &ohdr->u.rc.reth; |
@@ -1214,88 +1368,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | |||
1214 | data += sizeof(*reth); | 1368 | data += sizeof(*reth); |
1215 | } | 1369 | } |
1216 | /* | 1370 | /* |
1217 | * If we receive a duplicate RDMA request, it means the | 1371 | * Address range must be a subset of the original |
1218 | * requester saw a sequence error and needs to restart | 1372 | * request and start on pmtu boundaries. |
1219 | * from an earlier point. We can abort the current | 1373 | * We reuse the old ack_queue slot since the requester |
1220 | * RDMA read send in that case. | 1374 | * should not back up and request an earlier PSN for the |
1375 | * same request. | ||
1221 | */ | 1376 | */ |
1222 | spin_lock_irq(&qp->s_lock); | 1377 | offset = ((psn - e->psn) & IPATH_PSN_MASK) * |
1223 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | 1378 | ib_mtu_enum_to_int(qp->path_mtu); |
1224 | (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { | 1379 | len = be32_to_cpu(reth->length); |
1225 | /* | 1380 | if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) |
1226 | * We are already sending earlier requested data. | 1381 | goto unlock_done; |
1227 | * Don't abort it to send later out of sequence data. | 1382 | if (len != 0) { |
1228 | */ | ||
1229 | spin_unlock_irq(&qp->s_lock); | ||
1230 | goto done; | ||
1231 | } | ||
1232 | qp->s_rdma_len = be32_to_cpu(reth->length); | ||
1233 | if (qp->s_rdma_len != 0) { | ||
1234 | u32 rkey = be32_to_cpu(reth->rkey); | 1383 | u32 rkey = be32_to_cpu(reth->rkey); |
1235 | u64 vaddr = be64_to_cpu(reth->vaddr); | 1384 | u64 vaddr = be64_to_cpu(reth->vaddr); |
1236 | int ok; | 1385 | int ok; |
1237 | 1386 | ||
1238 | /* | 1387 | ok = ipath_rkey_ok(qp, &e->rdma_sge, |
1239 | * Address range must be a subset of the original | 1388 | len, vaddr, rkey, |
1240 | * request and start on pmtu boundaries. | ||
1241 | */ | ||
1242 | ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, | ||
1243 | qp->s_rdma_len, vaddr, rkey, | ||
1244 | IB_ACCESS_REMOTE_READ); | 1389 | IB_ACCESS_REMOTE_READ); |
1245 | if (unlikely(!ok)) { | 1390 | if (unlikely(!ok)) |
1246 | spin_unlock_irq(&qp->s_lock); | 1391 | goto unlock_done; |
1247 | goto done; | ||
1248 | } | ||
1249 | } else { | 1392 | } else { |
1250 | qp->s_rdma_sge.sg_list = NULL; | 1393 | e->rdma_sge.sg_list = NULL; |
1251 | qp->s_rdma_sge.num_sge = 0; | 1394 | e->rdma_sge.num_sge = 0; |
1252 | qp->s_rdma_sge.sge.mr = NULL; | 1395 | e->rdma_sge.sge.mr = NULL; |
1253 | qp->s_rdma_sge.sge.vaddr = NULL; | 1396 | e->rdma_sge.sge.vaddr = NULL; |
1254 | qp->s_rdma_sge.sge.length = 0; | 1397 | e->rdma_sge.sge.length = 0; |
1255 | qp->s_rdma_sge.sge.sge_length = 0; | 1398 | e->rdma_sge.sge.sge_length = 0; |
1256 | } | 1399 | } |
1257 | qp->s_ack_state = opcode; | 1400 | e->psn = psn; |
1258 | qp->s_ack_psn = psn; | 1401 | qp->s_ack_state = OP(ACKNOWLEDGE); |
1259 | spin_unlock_irq(&qp->s_lock); | 1402 | qp->s_tail_ack_queue = prev; |
1260 | tasklet_hi_schedule(&qp->s_task); | 1403 | break; |
1261 | goto send_ack; | ||
1262 | } | 1404 | } |
1263 | 1405 | ||
1264 | /* | ||
1265 | * A pending RDMA read will ACK anything before it so | ||
1266 | * ignore earlier duplicate requests. | ||
1267 | */ | ||
1268 | if (qp->s_ack_state != OP(ACKNOWLEDGE)) | ||
1269 | goto done; | ||
1270 | |||
1271 | /* | ||
1272 | * If an ACK is pending, don't replace the pending ACK | ||
1273 | * with an earlier one since the later one will ACK the earlier. | ||
1274 | * Also, if we already have a pending atomic, send it. | ||
1275 | */ | ||
1276 | if (qp->r_ack_state != OP(ACKNOWLEDGE) && | ||
1277 | (ipath_cmp24(psn, qp->r_ack_psn) <= 0 || | ||
1278 | qp->r_ack_state >= OP(COMPARE_SWAP))) | ||
1279 | goto send_ack; | ||
1280 | switch (opcode) { | ||
1281 | case OP(COMPARE_SWAP): | 1406 | case OP(COMPARE_SWAP): |
1282 | case OP(FETCH_ADD): | 1407 | case OP(FETCH_ADD): { |
1283 | /* | 1408 | /* |
1284 | * Check for the PSN of the last atomic operation | 1409 | * If we didn't find the atomic request in the ack queue |
1285 | * performed and resend the result if found. | 1410 | * or the send tasklet is already backed up to send an |
1411 | * earlier entry, we can ignore this request. | ||
1286 | */ | 1412 | */ |
1287 | if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn) | 1413 | if (!e || e->opcode != (u8) opcode || old_req) |
1288 | goto done; | 1414 | goto unlock_done; |
1415 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
1416 | qp->s_tail_ack_queue = prev; | ||
1417 | break; | ||
1418 | } | ||
1419 | |||
1420 | default: | ||
1421 | if (old_req) | ||
1422 | goto unlock_done; | ||
1423 | /* | ||
1424 | * Resend the most recent ACK if this request is | ||
1425 | * after all the previous RDMA reads and atomics. | ||
1426 | */ | ||
1427 | if (i == qp->r_head_ack_queue) { | ||
1428 | spin_unlock_irq(&qp->s_lock); | ||
1429 | qp->r_nak_state = 0; | ||
1430 | qp->r_ack_psn = qp->r_psn - 1; | ||
1431 | goto send_ack; | ||
1432 | } | ||
1433 | /* | ||
1434 | * Resend the RDMA read or atomic op which | ||
1435 | * ACKs this duplicate request. | ||
1436 | */ | ||
1437 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
1438 | qp->s_tail_ack_queue = i; | ||
1289 | break; | 1439 | break; |
1290 | } | 1440 | } |
1291 | qp->r_ack_state = opcode; | ||
1292 | qp->r_nak_state = 0; | 1441 | qp->r_nak_state = 0; |
1293 | qp->r_ack_psn = psn; | 1442 | spin_unlock_irq(&qp->s_lock); |
1294 | send_ack: | 1443 | tasklet_hi_schedule(&qp->s_task); |
1295 | return 0; | ||
1296 | 1444 | ||
1445 | unlock_done: | ||
1446 | spin_unlock_irq(&qp->s_lock); | ||
1297 | done: | 1447 | done: |
1298 | return 1; | 1448 | return 1; |
1449 | |||
1450 | send_ack: | ||
1451 | return 0; | ||
1299 | } | 1452 | } |
1300 | 1453 | ||
1301 | static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) | 1454 | static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) |
@@ -1391,15 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1391 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) | 1544 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) |
1392 | break; | 1545 | break; |
1393 | nack_inv: | 1546 | nack_inv: |
1394 | /* | ||
1395 | * A NAK will ACK earlier sends and RDMA writes. | ||
1396 | * Don't queue the NAK if a RDMA read, atomic, or NAK | ||
1397 | * is pending though. | ||
1398 | */ | ||
1399 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) | ||
1400 | goto send_ack; | ||
1401 | ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); | 1547 | ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); |
1402 | qp->r_ack_state = OP(SEND_ONLY); | ||
1403 | qp->r_nak_state = IB_NAK_INVALID_REQUEST; | 1548 | qp->r_nak_state = IB_NAK_INVALID_REQUEST; |
1404 | qp->r_ack_psn = qp->r_psn; | 1549 | qp->r_ack_psn = qp->r_psn; |
1405 | goto send_ack; | 1550 | goto send_ack; |
@@ -1441,9 +1586,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1441 | * Don't queue the NAK if a RDMA read or atomic | 1586 | * Don't queue the NAK if a RDMA read or atomic |
1442 | * is pending though. | 1587 | * is pending though. |
1443 | */ | 1588 | */ |
1444 | if (qp->r_ack_state >= OP(COMPARE_SWAP)) | 1589 | if (qp->r_nak_state) |
1445 | goto send_ack; | 1590 | goto done; |
1446 | qp->r_ack_state = OP(SEND_ONLY); | ||
1447 | qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; | 1591 | qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; |
1448 | qp->r_ack_psn = qp->r_psn; | 1592 | qp->r_ack_psn = qp->r_psn; |
1449 | goto send_ack; | 1593 | goto send_ack; |
@@ -1567,7 +1711,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1567 | goto rnr_nak; | 1711 | goto rnr_nak; |
1568 | goto send_last_imm; | 1712 | goto send_last_imm; |
1569 | 1713 | ||
1570 | case OP(RDMA_READ_REQUEST): | 1714 | case OP(RDMA_READ_REQUEST): { |
1715 | struct ipath_ack_entry *e; | ||
1716 | u32 len; | ||
1717 | u8 next; | ||
1718 | |||
1719 | if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) | ||
1720 | goto nack_acc; | ||
1721 | next = qp->r_head_ack_queue + 1; | ||
1722 | if (next > IPATH_MAX_RDMA_ATOMIC) | ||
1723 | next = 0; | ||
1724 | if (unlikely(next == qp->s_tail_ack_queue)) | ||
1725 | goto nack_inv; | ||
1726 | e = &qp->s_ack_queue[qp->r_head_ack_queue]; | ||
1571 | /* RETH comes after BTH */ | 1727 | /* RETH comes after BTH */ |
1572 | if (!header_in_data) | 1728 | if (!header_in_data) |
1573 | reth = &ohdr->u.rc.reth; | 1729 | reth = &ohdr->u.rc.reth; |
@@ -1575,72 +1731,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1575 | reth = (struct ib_reth *)data; | 1731 | reth = (struct ib_reth *)data; |
1576 | data += sizeof(*reth); | 1732 | data += sizeof(*reth); |
1577 | } | 1733 | } |
1578 | if (unlikely(!(qp->qp_access_flags & | 1734 | len = be32_to_cpu(reth->length); |
1579 | IB_ACCESS_REMOTE_READ))) | 1735 | if (len) { |
1580 | goto nack_acc; | ||
1581 | spin_lock_irq(&qp->s_lock); | ||
1582 | qp->s_rdma_len = be32_to_cpu(reth->length); | ||
1583 | if (qp->s_rdma_len != 0) { | ||
1584 | u32 rkey = be32_to_cpu(reth->rkey); | 1736 | u32 rkey = be32_to_cpu(reth->rkey); |
1585 | u64 vaddr = be64_to_cpu(reth->vaddr); | 1737 | u64 vaddr = be64_to_cpu(reth->vaddr); |
1586 | int ok; | 1738 | int ok; |
1587 | 1739 | ||
1588 | /* Check rkey & NAK */ | 1740 | /* Check rkey & NAK */ |
1589 | ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, | 1741 | ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, |
1590 | qp->s_rdma_len, vaddr, rkey, | 1742 | rkey, IB_ACCESS_REMOTE_READ); |
1591 | IB_ACCESS_REMOTE_READ); | 1743 | if (unlikely(!ok)) |
1592 | if (unlikely(!ok)) { | ||
1593 | spin_unlock_irq(&qp->s_lock); | ||
1594 | goto nack_acc; | 1744 | goto nack_acc; |
1595 | } | ||
1596 | /* | 1745 | /* |
1597 | * Update the next expected PSN. We add 1 later | 1746 | * Update the next expected PSN. We add 1 later |
1598 | * below, so only add the remainder here. | 1747 | * below, so only add the remainder here. |
1599 | */ | 1748 | */ |
1600 | if (qp->s_rdma_len > pmtu) | 1749 | if (len > pmtu) |
1601 | qp->r_psn += (qp->s_rdma_len - 1) / pmtu; | 1750 | qp->r_psn += (len - 1) / pmtu; |
1602 | } else { | 1751 | } else { |
1603 | qp->s_rdma_sge.sg_list = NULL; | 1752 | e->rdma_sge.sg_list = NULL; |
1604 | qp->s_rdma_sge.num_sge = 0; | 1753 | e->rdma_sge.num_sge = 0; |
1605 | qp->s_rdma_sge.sge.mr = NULL; | 1754 | e->rdma_sge.sge.mr = NULL; |
1606 | qp->s_rdma_sge.sge.vaddr = NULL; | 1755 | e->rdma_sge.sge.vaddr = NULL; |
1607 | qp->s_rdma_sge.sge.length = 0; | 1756 | e->rdma_sge.sge.length = 0; |
1608 | qp->s_rdma_sge.sge.sge_length = 0; | 1757 | e->rdma_sge.sge.sge_length = 0; |
1609 | } | 1758 | } |
1759 | e->opcode = opcode; | ||
1760 | e->psn = psn; | ||
1610 | /* | 1761 | /* |
1611 | * We need to increment the MSN here instead of when we | 1762 | * We need to increment the MSN here instead of when we |
1612 | * finish sending the result since a duplicate request would | 1763 | * finish sending the result since a duplicate request would |
1613 | * increment it more than once. | 1764 | * increment it more than once. |
1614 | */ | 1765 | */ |
1615 | qp->r_msn++; | 1766 | qp->r_msn++; |
1616 | |||
1617 | qp->s_ack_state = opcode; | ||
1618 | qp->s_ack_psn = psn; | ||
1619 | spin_unlock_irq(&qp->s_lock); | ||
1620 | |||
1621 | qp->r_psn++; | 1767 | qp->r_psn++; |
1622 | qp->r_state = opcode; | 1768 | qp->r_state = opcode; |
1623 | qp->r_nak_state = 0; | 1769 | qp->r_nak_state = 0; |
1770 | barrier(); | ||
1771 | qp->r_head_ack_queue = next; | ||
1624 | 1772 | ||
1625 | /* Call ipath_do_rc_send() in another thread. */ | 1773 | /* Call ipath_do_rc_send() in another thread. */ |
1626 | tasklet_hi_schedule(&qp->s_task); | 1774 | tasklet_hi_schedule(&qp->s_task); |
1627 | 1775 | ||
1628 | goto done; | 1776 | goto done; |
1777 | } | ||
1629 | 1778 | ||
1630 | case OP(COMPARE_SWAP): | 1779 | case OP(COMPARE_SWAP): |
1631 | case OP(FETCH_ADD): { | 1780 | case OP(FETCH_ADD): { |
1632 | struct ib_atomic_eth *ateth; | 1781 | struct ib_atomic_eth *ateth; |
1782 | struct ipath_ack_entry *e; | ||
1633 | u64 vaddr; | 1783 | u64 vaddr; |
1784 | atomic64_t *maddr; | ||
1634 | u64 sdata; | 1785 | u64 sdata; |
1635 | u32 rkey; | 1786 | u32 rkey; |
1787 | u8 next; | ||
1636 | 1788 | ||
1789 | if (unlikely(!(qp->qp_access_flags & | ||
1790 | IB_ACCESS_REMOTE_ATOMIC))) | ||
1791 | goto nack_acc; | ||
1792 | next = qp->r_head_ack_queue + 1; | ||
1793 | if (next > IPATH_MAX_RDMA_ATOMIC) | ||
1794 | next = 0; | ||
1795 | if (unlikely(next == qp->s_tail_ack_queue)) | ||
1796 | goto nack_inv; | ||
1637 | if (!header_in_data) | 1797 | if (!header_in_data) |
1638 | ateth = &ohdr->u.atomic_eth; | 1798 | ateth = &ohdr->u.atomic_eth; |
1639 | else { | 1799 | else |
1640 | ateth = (struct ib_atomic_eth *)data; | 1800 | ateth = (struct ib_atomic_eth *)data; |
1641 | data += sizeof(*ateth); | 1801 | vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | |
1642 | } | 1802 | be32_to_cpu(ateth->vaddr[1]); |
1643 | vaddr = be64_to_cpu(ateth->vaddr); | ||
1644 | if (unlikely(vaddr & (sizeof(u64) - 1))) | 1803 | if (unlikely(vaddr & (sizeof(u64) - 1))) |
1645 | goto nack_inv; | 1804 | goto nack_inv; |
1646 | rkey = be32_to_cpu(ateth->rkey); | 1805 | rkey = be32_to_cpu(ateth->rkey); |
@@ -1649,63 +1808,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | |||
1649 | sizeof(u64), vaddr, rkey, | 1808 | sizeof(u64), vaddr, rkey, |
1650 | IB_ACCESS_REMOTE_ATOMIC))) | 1809 | IB_ACCESS_REMOTE_ATOMIC))) |
1651 | goto nack_acc; | 1810 | goto nack_acc; |
1652 | if (unlikely(!(qp->qp_access_flags & | ||
1653 | IB_ACCESS_REMOTE_ATOMIC))) | ||
1654 | goto nack_acc; | ||
1655 | /* Perform atomic OP and save result. */ | 1811 | /* Perform atomic OP and save result. */ |
1812 | maddr = (atomic64_t *) qp->r_sge.sge.vaddr; | ||
1656 | sdata = be64_to_cpu(ateth->swap_data); | 1813 | sdata = be64_to_cpu(ateth->swap_data); |
1657 | spin_lock_irq(&dev->pending_lock); | 1814 | e = &qp->s_ack_queue[qp->r_head_ack_queue]; |
1658 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; | 1815 | e->atomic_data = (opcode == OP(FETCH_ADD)) ? |
1659 | if (opcode == OP(FETCH_ADD)) | 1816 | (u64) atomic64_add_return(sdata, maddr) - sdata : |
1660 | *(u64 *) qp->r_sge.sge.vaddr = | 1817 | (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, |
1661 | qp->r_atomic_data + sdata; | 1818 | be64_to_cpu(ateth->compare_data), |
1662 | else if (qp->r_atomic_data == | 1819 | sdata); |
1663 | be64_to_cpu(ateth->compare_data)) | 1820 | e->opcode = opcode; |
1664 | *(u64 *) qp->r_sge.sge.vaddr = sdata; | 1821 | e->psn = psn & IPATH_PSN_MASK; |
1665 | spin_unlock_irq(&dev->pending_lock); | ||
1666 | qp->r_msn++; | 1822 | qp->r_msn++; |
1667 | qp->r_atomic_psn = psn & IPATH_PSN_MASK; | 1823 | qp->r_psn++; |
1668 | psn |= 1 << 31; | 1824 | qp->r_state = opcode; |
1669 | break; | 1825 | qp->r_nak_state = 0; |
1826 | barrier(); | ||
1827 | qp->r_head_ack_queue = next; | ||
1828 | |||
1829 | /* Call ipath_do_rc_send() in another thread. */ | ||
1830 | tasklet_hi_schedule(&qp->s_task); | ||
1831 | |||
1832 | goto done; | ||
1670 | } | 1833 | } |
1671 | 1834 | ||
1672 | default: | 1835 | default: |
1673 | /* Drop packet for unknown opcodes. */ | 1836 | /* NAK unknown opcodes. */ |
1674 | goto done; | 1837 | goto nack_inv; |
1675 | } | 1838 | } |
1676 | qp->r_psn++; | 1839 | qp->r_psn++; |
1677 | qp->r_state = opcode; | 1840 | qp->r_state = opcode; |
1841 | qp->r_ack_psn = psn; | ||
1678 | qp->r_nak_state = 0; | 1842 | qp->r_nak_state = 0; |
1679 | /* Send an ACK if requested or required. */ | 1843 | /* Send an ACK if requested or required. */ |
1680 | if (psn & (1 << 31)) { | 1844 | if (psn & (1 << 31)) |
1681 | /* | ||
1682 | * Coalesce ACKs unless there is a RDMA READ or | ||
1683 | * ATOMIC pending. | ||
1684 | */ | ||
1685 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1686 | qp->r_ack_state = opcode; | ||
1687 | qp->r_ack_psn = psn; | ||
1688 | } | ||
1689 | goto send_ack; | 1845 | goto send_ack; |
1690 | } | ||
1691 | goto done; | 1846 | goto done; |
1692 | 1847 | ||
1693 | nack_acc: | 1848 | nack_acc: |
1694 | /* | 1849 | ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); |
1695 | * A NAK will ACK earlier sends and RDMA writes. | 1850 | qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; |
1696 | * Don't queue the NAK if a RDMA read, atomic, or NAK | 1851 | qp->r_ack_psn = qp->r_psn; |
1697 | * is pending though. | 1852 | |
1698 | */ | ||
1699 | if (qp->r_ack_state < OP(COMPARE_SWAP)) { | ||
1700 | ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); | ||
1701 | qp->r_ack_state = OP(RDMA_WRITE_ONLY); | ||
1702 | qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; | ||
1703 | qp->r_ack_psn = qp->r_psn; | ||
1704 | } | ||
1705 | send_ack: | 1853 | send_ack: |
1706 | /* Send ACK right away unless the send tasklet has a pending ACK. */ | 1854 | send_rc_ack(qp); |
1707 | if (qp->s_ack_state == OP(ACKNOWLEDGE)) | ||
1708 | send_rc_ack(qp); | ||
1709 | 1855 | ||
1710 | done: | 1856 | done: |
1711 | return; | 1857 | return; |