diff options
author | Steve Wise <swise@opengridcomputing.com> | 2010-06-10 15:03:00 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-07-21 14:16:20 -0400 |
commit | d37ac31ddc24c1a0beed134278bc074c98812210 (patch) | |
tree | 20b61b408fb31cd4b16d50c73d0445784a1255cd /drivers/infiniband | |
parent | d3c814e8b2a094dc3bcbe6a0d93ec4824b26e86a (diff) |
RDMA/cxgb4: Support variable sized work requests
T4 EQ entries are in multiples of 64 bytes. Currently the RDMA SQ and
RQ use fixed sized entries composed of 4 EQ entries for the SQ and 2
EQ entries for the RQ. For optimial latency with small IO, we need to
change this so the HW only needs to DMA the EQ entries actually used
by a given work request.
Implementation:
- add wq_pidx counter to track where we are in the EQ. cidx/pidx are
used for the sw sq/rq tracking and flow control.
- the variable part of work requests is the SGL. Add new functions to
build the SGL and/or immediate data directly in the EQ memory
wrapping when needed.
- adjust the min burst size for the EQ contexts to 64B.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 220 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 32 |
2 files changed, 130 insertions, 122 deletions
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index b88b1af28c30..657a5b300b23 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c | |||
@@ -162,7 +162,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, | |||
162 | res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( | 162 | res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( |
163 | V_FW_RI_RES_WR_DCAEN(0) | | 163 | V_FW_RI_RES_WR_DCAEN(0) | |
164 | V_FW_RI_RES_WR_DCACPU(0) | | 164 | V_FW_RI_RES_WR_DCACPU(0) | |
165 | V_FW_RI_RES_WR_FBMIN(3) | | 165 | V_FW_RI_RES_WR_FBMIN(2) | |
166 | V_FW_RI_RES_WR_FBMAX(3) | | 166 | V_FW_RI_RES_WR_FBMAX(3) | |
167 | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | | 167 | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | |
168 | V_FW_RI_RES_WR_CIDXFTHRESH(0) | | 168 | V_FW_RI_RES_WR_CIDXFTHRESH(0) | |
@@ -185,7 +185,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, | |||
185 | res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( | 185 | res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( |
186 | V_FW_RI_RES_WR_DCAEN(0) | | 186 | V_FW_RI_RES_WR_DCAEN(0) | |
187 | V_FW_RI_RES_WR_DCACPU(0) | | 187 | V_FW_RI_RES_WR_DCACPU(0) | |
188 | V_FW_RI_RES_WR_FBMIN(3) | | 188 | V_FW_RI_RES_WR_FBMIN(2) | |
189 | V_FW_RI_RES_WR_FBMAX(3) | | 189 | V_FW_RI_RES_WR_FBMAX(3) | |
190 | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | | 190 | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | |
191 | V_FW_RI_RES_WR_CIDXFTHRESH(0) | | 191 | V_FW_RI_RES_WR_CIDXFTHRESH(0) | |
@@ -235,12 +235,78 @@ err1: | |||
235 | return -ENOMEM; | 235 | return -ENOMEM; |
236 | } | 236 | } |
237 | 237 | ||
238 | static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) | 238 | static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, |
239 | struct ib_send_wr *wr, int max, u32 *plenp) | ||
239 | { | 240 | { |
241 | u8 *dstp, *srcp; | ||
242 | u32 plen = 0; | ||
240 | int i; | 243 | int i; |
244 | int rem, len; | ||
245 | |||
246 | dstp = (u8 *)immdp->data; | ||
247 | for (i = 0; i < wr->num_sge; i++) { | ||
248 | if ((plen + wr->sg_list[i].length) > max) | ||
249 | return -EMSGSIZE; | ||
250 | srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; | ||
251 | plen += wr->sg_list[i].length; | ||
252 | rem = wr->sg_list[i].length; | ||
253 | while (rem) { | ||
254 | if (dstp == (u8 *)&sq->queue[sq->size]) | ||
255 | dstp = (u8 *)sq->queue; | ||
256 | if (rem <= (u8 *)&sq->queue[sq->size] - dstp) | ||
257 | len = rem; | ||
258 | else | ||
259 | len = (u8 *)&sq->queue[sq->size] - dstp; | ||
260 | memcpy(dstp, srcp, len); | ||
261 | dstp += len; | ||
262 | srcp += len; | ||
263 | rem -= len; | ||
264 | } | ||
265 | } | ||
266 | immdp->op = FW_RI_DATA_IMMD; | ||
267 | immdp->r1 = 0; | ||
268 | immdp->r2 = 0; | ||
269 | immdp->immdlen = cpu_to_be32(plen); | ||
270 | *plenp = plen; | ||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | static int build_isgl(__be64 *queue_start, __be64 *queue_end, | ||
275 | struct fw_ri_isgl *isglp, struct ib_sge *sg_list, | ||
276 | int num_sge, u32 *plenp) | ||
277 | |||
278 | { | ||
279 | int i; | ||
280 | u32 plen = 0; | ||
281 | __be64 *flitp = (__be64 *)isglp->sge; | ||
282 | |||
283 | for (i = 0; i < num_sge; i++) { | ||
284 | if ((plen + sg_list[i].length) < plen) | ||
285 | return -EMSGSIZE; | ||
286 | plen += sg_list[i].length; | ||
287 | *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) | | ||
288 | sg_list[i].length); | ||
289 | if (++flitp == queue_end) | ||
290 | flitp = queue_start; | ||
291 | *flitp = cpu_to_be64(sg_list[i].addr); | ||
292 | if (++flitp == queue_end) | ||
293 | flitp = queue_start; | ||
294 | } | ||
295 | isglp->op = FW_RI_DATA_ISGL; | ||
296 | isglp->r1 = 0; | ||
297 | isglp->nsge = cpu_to_be16(num_sge); | ||
298 | isglp->r2 = 0; | ||
299 | if (plenp) | ||
300 | *plenp = plen; | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, | ||
305 | struct ib_send_wr *wr, u8 *len16) | ||
306 | { | ||
241 | u32 plen; | 307 | u32 plen; |
242 | int size; | 308 | int size; |
243 | u8 *datap; | 309 | int ret; |
244 | 310 | ||
245 | if (wr->num_sge > T4_MAX_SEND_SGE) | 311 | if (wr->num_sge > T4_MAX_SEND_SGE) |
246 | return -EINVAL; | 312 | return -EINVAL; |
@@ -267,43 +333,23 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) | |||
267 | default: | 333 | default: |
268 | return -EINVAL; | 334 | return -EINVAL; |
269 | } | 335 | } |
336 | |||
270 | plen = 0; | 337 | plen = 0; |
271 | if (wr->num_sge) { | 338 | if (wr->num_sge) { |
272 | if (wr->send_flags & IB_SEND_INLINE) { | 339 | if (wr->send_flags & IB_SEND_INLINE) { |
273 | datap = (u8 *)wqe->send.u.immd_src[0].data; | 340 | ret = build_immd(sq, wqe->send.u.immd_src, wr, |
274 | for (i = 0; i < wr->num_sge; i++) { | 341 | T4_MAX_SEND_INLINE, &plen); |
275 | if ((plen + wr->sg_list[i].length) > | 342 | if (ret) |
276 | T4_MAX_SEND_INLINE) { | 343 | return ret; |
277 | return -EMSGSIZE; | ||
278 | } | ||
279 | plen += wr->sg_list[i].length; | ||
280 | memcpy(datap, | ||
281 | (void *)(unsigned long)wr->sg_list[i].addr, | ||
282 | wr->sg_list[i].length); | ||
283 | datap += wr->sg_list[i].length; | ||
284 | } | ||
285 | wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; | ||
286 | wqe->send.u.immd_src[0].r1 = 0; | ||
287 | wqe->send.u.immd_src[0].r2 = 0; | ||
288 | wqe->send.u.immd_src[0].immdlen = cpu_to_be32(plen); | ||
289 | size = sizeof wqe->send + sizeof(struct fw_ri_immd) + | 344 | size = sizeof wqe->send + sizeof(struct fw_ri_immd) + |
290 | plen; | 345 | plen; |
291 | } else { | 346 | } else { |
292 | for (i = 0; i < wr->num_sge; i++) { | 347 | ret = build_isgl((__be64 *)sq->queue, |
293 | if ((plen + wr->sg_list[i].length) < plen) | 348 | (__be64 *)&sq->queue[sq->size], |
294 | return -EMSGSIZE; | 349 | wqe->send.u.isgl_src, |
295 | plen += wr->sg_list[i].length; | 350 | wr->sg_list, wr->num_sge, &plen); |
296 | wqe->send.u.isgl_src[0].sge[i].stag = | 351 | if (ret) |
297 | cpu_to_be32(wr->sg_list[i].lkey); | 352 | return ret; |
298 | wqe->send.u.isgl_src[0].sge[i].len = | ||
299 | cpu_to_be32(wr->sg_list[i].length); | ||
300 | wqe->send.u.isgl_src[0].sge[i].to = | ||
301 | cpu_to_be64(wr->sg_list[i].addr); | ||
302 | } | ||
303 | wqe->send.u.isgl_src[0].op = FW_RI_DATA_ISGL; | ||
304 | wqe->send.u.isgl_src[0].r1 = 0; | ||
305 | wqe->send.u.isgl_src[0].nsge = cpu_to_be16(wr->num_sge); | ||
306 | wqe->send.u.isgl_src[0].r2 = 0; | ||
307 | size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + | 353 | size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + |
308 | wr->num_sge * sizeof(struct fw_ri_sge); | 354 | wr->num_sge * sizeof(struct fw_ri_sge); |
309 | } | 355 | } |
@@ -313,62 +359,40 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) | |||
313 | wqe->send.u.immd_src[0].r2 = 0; | 359 | wqe->send.u.immd_src[0].r2 = 0; |
314 | wqe->send.u.immd_src[0].immdlen = 0; | 360 | wqe->send.u.immd_src[0].immdlen = 0; |
315 | size = sizeof wqe->send + sizeof(struct fw_ri_immd); | 361 | size = sizeof wqe->send + sizeof(struct fw_ri_immd); |
362 | plen = 0; | ||
316 | } | 363 | } |
317 | *len16 = DIV_ROUND_UP(size, 16); | 364 | *len16 = DIV_ROUND_UP(size, 16); |
318 | wqe->send.plen = cpu_to_be32(plen); | 365 | wqe->send.plen = cpu_to_be32(plen); |
319 | return 0; | 366 | return 0; |
320 | } | 367 | } |
321 | 368 | ||
322 | static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) | 369 | static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, |
370 | struct ib_send_wr *wr, u8 *len16) | ||
323 | { | 371 | { |
324 | int i; | ||
325 | u32 plen; | 372 | u32 plen; |
326 | int size; | 373 | int size; |
327 | u8 *datap; | 374 | int ret; |
328 | 375 | ||
329 | if (wr->num_sge > T4_MAX_WRITE_SGE) | 376 | if (wr->num_sge > T4_MAX_SEND_SGE) |
330 | return -EINVAL; | 377 | return -EINVAL; |
331 | wqe->write.r2 = 0; | 378 | wqe->write.r2 = 0; |
332 | wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); | 379 | wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); |
333 | wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); | 380 | wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); |
334 | plen = 0; | ||
335 | if (wr->num_sge) { | 381 | if (wr->num_sge) { |
336 | if (wr->send_flags & IB_SEND_INLINE) { | 382 | if (wr->send_flags & IB_SEND_INLINE) { |
337 | datap = (u8 *)wqe->write.u.immd_src[0].data; | 383 | ret = build_immd(sq, wqe->write.u.immd_src, wr, |
338 | for (i = 0; i < wr->num_sge; i++) { | 384 | T4_MAX_WRITE_INLINE, &plen); |
339 | if ((plen + wr->sg_list[i].length) > | 385 | if (ret) |
340 | T4_MAX_WRITE_INLINE) { | 386 | return ret; |
341 | return -EMSGSIZE; | ||
342 | } | ||
343 | plen += wr->sg_list[i].length; | ||
344 | memcpy(datap, | ||
345 | (void *)(unsigned long)wr->sg_list[i].addr, | ||
346 | wr->sg_list[i].length); | ||
347 | datap += wr->sg_list[i].length; | ||
348 | } | ||
349 | wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; | ||
350 | wqe->write.u.immd_src[0].r1 = 0; | ||
351 | wqe->write.u.immd_src[0].r2 = 0; | ||
352 | wqe->write.u.immd_src[0].immdlen = cpu_to_be32(plen); | ||
353 | size = sizeof wqe->write + sizeof(struct fw_ri_immd) + | 387 | size = sizeof wqe->write + sizeof(struct fw_ri_immd) + |
354 | plen; | 388 | plen; |
355 | } else { | 389 | } else { |
356 | for (i = 0; i < wr->num_sge; i++) { | 390 | ret = build_isgl((__be64 *)sq->queue, |
357 | if ((plen + wr->sg_list[i].length) < plen) | 391 | (__be64 *)&sq->queue[sq->size], |
358 | return -EMSGSIZE; | 392 | wqe->write.u.isgl_src, |
359 | plen += wr->sg_list[i].length; | 393 | wr->sg_list, wr->num_sge, &plen); |
360 | wqe->write.u.isgl_src[0].sge[i].stag = | 394 | if (ret) |
361 | cpu_to_be32(wr->sg_list[i].lkey); | 395 | return ret; |
362 | wqe->write.u.isgl_src[0].sge[i].len = | ||
363 | cpu_to_be32(wr->sg_list[i].length); | ||
364 | wqe->write.u.isgl_src[0].sge[i].to = | ||
365 | cpu_to_be64(wr->sg_list[i].addr); | ||
366 | } | ||
367 | wqe->write.u.isgl_src[0].op = FW_RI_DATA_ISGL; | ||
368 | wqe->write.u.isgl_src[0].r1 = 0; | ||
369 | wqe->write.u.isgl_src[0].nsge = | ||
370 | cpu_to_be16(wr->num_sge); | ||
371 | wqe->write.u.isgl_src[0].r2 = 0; | ||
372 | size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + | 396 | size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + |
373 | wr->num_sge * sizeof(struct fw_ri_sge); | 397 | wr->num_sge * sizeof(struct fw_ri_sge); |
374 | } | 398 | } |
@@ -378,6 +402,7 @@ static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) | |||
378 | wqe->write.u.immd_src[0].r2 = 0; | 402 | wqe->write.u.immd_src[0].r2 = 0; |
379 | wqe->write.u.immd_src[0].immdlen = 0; | 403 | wqe->write.u.immd_src[0].immdlen = 0; |
380 | size = sizeof wqe->write + sizeof(struct fw_ri_immd); | 404 | size = sizeof wqe->write + sizeof(struct fw_ri_immd); |
405 | plen = 0; | ||
381 | } | 406 | } |
382 | *len16 = DIV_ROUND_UP(size, 16); | 407 | *len16 = DIV_ROUND_UP(size, 16); |
383 | wqe->write.plen = cpu_to_be32(plen); | 408 | wqe->write.plen = cpu_to_be32(plen); |
@@ -416,29 +441,13 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) | |||
416 | static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, | 441 | static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, |
417 | struct ib_recv_wr *wr, u8 *len16) | 442 | struct ib_recv_wr *wr, u8 *len16) |
418 | { | 443 | { |
419 | int i; | 444 | int ret; |
420 | int plen = 0; | ||
421 | 445 | ||
422 | for (i = 0; i < wr->num_sge; i++) { | 446 | ret = build_isgl((__be64 *)qhp->wq.rq.queue, |
423 | if ((plen + wr->sg_list[i].length) < plen) | 447 | (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size], |
424 | return -EMSGSIZE; | 448 | &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); |
425 | plen += wr->sg_list[i].length; | 449 | if (ret) |
426 | wqe->recv.isgl.sge[i].stag = | 450 | return ret; |
427 | cpu_to_be32(wr->sg_list[i].lkey); | ||
428 | wqe->recv.isgl.sge[i].len = | ||
429 | cpu_to_be32(wr->sg_list[i].length); | ||
430 | wqe->recv.isgl.sge[i].to = | ||
431 | cpu_to_be64(wr->sg_list[i].addr); | ||
432 | } | ||
433 | for (; i < T4_MAX_RECV_SGE; i++) { | ||
434 | wqe->recv.isgl.sge[i].stag = 0; | ||
435 | wqe->recv.isgl.sge[i].len = 0; | ||
436 | wqe->recv.isgl.sge[i].to = 0; | ||
437 | } | ||
438 | wqe->recv.isgl.op = FW_RI_DATA_ISGL; | ||
439 | wqe->recv.isgl.r1 = 0; | ||
440 | wqe->recv.isgl.nsge = cpu_to_be16(wr->num_sge); | ||
441 | wqe->recv.isgl.r2 = 0; | ||
442 | *len16 = DIV_ROUND_UP(sizeof wqe->recv + | 451 | *len16 = DIV_ROUND_UP(sizeof wqe->recv + |
443 | wr->num_sge * sizeof(struct fw_ri_sge), 16); | 452 | wr->num_sge * sizeof(struct fw_ri_sge), 16); |
444 | return 0; | 453 | return 0; |
@@ -547,7 +556,9 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
547 | *bad_wr = wr; | 556 | *bad_wr = wr; |
548 | break; | 557 | break; |
549 | } | 558 | } |
550 | wqe = &qhp->wq.sq.queue[qhp->wq.sq.pidx]; | 559 | wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + |
560 | qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); | ||
561 | |||
551 | fw_flags = 0; | 562 | fw_flags = 0; |
552 | if (wr->send_flags & IB_SEND_SOLICITED) | 563 | if (wr->send_flags & IB_SEND_SOLICITED) |
553 | fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; | 564 | fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; |
@@ -564,12 +575,12 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
564 | swsqe->opcode = FW_RI_SEND; | 575 | swsqe->opcode = FW_RI_SEND; |
565 | else | 576 | else |
566 | swsqe->opcode = FW_RI_SEND_WITH_INV; | 577 | swsqe->opcode = FW_RI_SEND_WITH_INV; |
567 | err = build_rdma_send(wqe, wr, &len16); | 578 | err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); |
568 | break; | 579 | break; |
569 | case IB_WR_RDMA_WRITE: | 580 | case IB_WR_RDMA_WRITE: |
570 | fw_opcode = FW_RI_RDMA_WRITE_WR; | 581 | fw_opcode = FW_RI_RDMA_WRITE_WR; |
571 | swsqe->opcode = FW_RI_RDMA_WRITE; | 582 | swsqe->opcode = FW_RI_RDMA_WRITE; |
572 | err = build_rdma_write(wqe, wr, &len16); | 583 | err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); |
573 | break; | 584 | break; |
574 | case IB_WR_RDMA_READ: | 585 | case IB_WR_RDMA_READ: |
575 | case IB_WR_RDMA_READ_WITH_INV: | 586 | case IB_WR_RDMA_READ_WITH_INV: |
@@ -619,8 +630,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
619 | swsqe->opcode, swsqe->read_len); | 630 | swsqe->opcode, swsqe->read_len); |
620 | wr = wr->next; | 631 | wr = wr->next; |
621 | num_wrs--; | 632 | num_wrs--; |
622 | t4_sq_produce(&qhp->wq); | 633 | t4_sq_produce(&qhp->wq, len16); |
623 | idx++; | 634 | idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); |
624 | } | 635 | } |
625 | if (t4_wq_db_enabled(&qhp->wq)) | 636 | if (t4_wq_db_enabled(&qhp->wq)) |
626 | t4_ring_sq_db(&qhp->wq, idx); | 637 | t4_ring_sq_db(&qhp->wq, idx); |
@@ -656,7 +667,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, | |||
656 | *bad_wr = wr; | 667 | *bad_wr = wr; |
657 | break; | 668 | break; |
658 | } | 669 | } |
659 | wqe = &qhp->wq.rq.queue[qhp->wq.rq.pidx]; | 670 | wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue + |
671 | qhp->wq.rq.wq_pidx * | ||
672 | T4_EQ_ENTRY_SIZE); | ||
660 | if (num_wrs) | 673 | if (num_wrs) |
661 | err = build_rdma_recv(qhp, wqe, wr, &len16); | 674 | err = build_rdma_recv(qhp, wqe, wr, &len16); |
662 | else | 675 | else |
@@ -675,15 +688,12 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, | |||
675 | wqe->recv.r2[1] = 0; | 688 | wqe->recv.r2[1] = 0; |
676 | wqe->recv.r2[2] = 0; | 689 | wqe->recv.r2[2] = 0; |
677 | wqe->recv.len16 = len16; | 690 | wqe->recv.len16 = len16; |
678 | if (len16 < 5) | ||
679 | wqe->flits[8] = 0; | ||
680 | |||
681 | PDBG("%s cookie 0x%llx pidx %u\n", __func__, | 691 | PDBG("%s cookie 0x%llx pidx %u\n", __func__, |
682 | (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); | 692 | (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); |
683 | t4_rq_produce(&qhp->wq); | 693 | t4_rq_produce(&qhp->wq, len16); |
694 | idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); | ||
684 | wr = wr->next; | 695 | wr = wr->next; |
685 | num_wrs--; | 696 | num_wrs--; |
686 | idx++; | ||
687 | } | 697 | } |
688 | if (t4_wq_db_enabled(&qhp->wq)) | 698 | if (t4_wq_db_enabled(&qhp->wq)) |
689 | t4_ring_rq_db(&qhp->wq, idx); | 699 | t4_ring_rq_db(&qhp->wq, idx); |
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 9cf8d85bfcff..aef55f42bea4 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h | |||
@@ -65,10 +65,10 @@ struct t4_status_page { | |||
65 | u8 db_off; | 65 | u8 db_off; |
66 | }; | 66 | }; |
67 | 67 | ||
68 | #define T4_EQ_SIZE 64 | 68 | #define T4_EQ_ENTRY_SIZE 64 |
69 | 69 | ||
70 | #define T4_SQ_NUM_SLOTS 4 | 70 | #define T4_SQ_NUM_SLOTS 4 |
71 | #define T4_SQ_NUM_BYTES (T4_EQ_SIZE * T4_SQ_NUM_SLOTS) | 71 | #define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) |
72 | #define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ | 72 | #define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ |
73 | sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) | 73 | sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) |
74 | #define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ | 74 | #define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ |
@@ -84,7 +84,7 @@ struct t4_status_page { | |||
84 | #define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) | 84 | #define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) |
85 | 85 | ||
86 | #define T4_RQ_NUM_SLOTS 2 | 86 | #define T4_RQ_NUM_SLOTS 2 |
87 | #define T4_RQ_NUM_BYTES (T4_EQ_SIZE * T4_RQ_NUM_SLOTS) | 87 | #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) |
88 | #define T4_MAX_RECV_SGE 4 | 88 | #define T4_MAX_RECV_SGE 4 |
89 | 89 | ||
90 | union t4_wr { | 90 | union t4_wr { |
@@ -97,20 +97,18 @@ union t4_wr { | |||
97 | struct fw_ri_fr_nsmr_wr fr; | 97 | struct fw_ri_fr_nsmr_wr fr; |
98 | struct fw_ri_inv_lstag_wr inv; | 98 | struct fw_ri_inv_lstag_wr inv; |
99 | struct t4_status_page status; | 99 | struct t4_status_page status; |
100 | __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; | 100 | __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; |
101 | }; | 101 | }; |
102 | 102 | ||
103 | union t4_recv_wr { | 103 | union t4_recv_wr { |
104 | struct fw_ri_recv_wr recv; | 104 | struct fw_ri_recv_wr recv; |
105 | struct t4_status_page status; | 105 | struct t4_status_page status; |
106 | __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; | 106 | __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; |
107 | }; | 107 | }; |
108 | 108 | ||
109 | static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, | 109 | static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, |
110 | enum fw_wr_opcodes opcode, u8 flags, u8 len16) | 110 | enum fw_wr_opcodes opcode, u8 flags, u8 len16) |
111 | { | 111 | { |
112 | int slots_used; | ||
113 | |||
114 | wqe->send.opcode = (u8)opcode; | 112 | wqe->send.opcode = (u8)opcode; |
115 | wqe->send.flags = flags; | 113 | wqe->send.flags = flags; |
116 | wqe->send.wrid = wrid; | 114 | wqe->send.wrid = wrid; |
@@ -118,12 +116,6 @@ static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, | |||
118 | wqe->send.r1[1] = 0; | 116 | wqe->send.r1[1] = 0; |
119 | wqe->send.r1[2] = 0; | 117 | wqe->send.r1[2] = 0; |
120 | wqe->send.len16 = len16; | 118 | wqe->send.len16 = len16; |
121 | |||
122 | slots_used = DIV_ROUND_UP(len16*16, T4_EQ_SIZE); | ||
123 | while (slots_used < T4_SQ_NUM_SLOTS) { | ||
124 | wqe->flits[slots_used * T4_EQ_SIZE / sizeof(__be64)] = 0; | ||
125 | slots_used++; | ||
126 | } | ||
127 | } | 119 | } |
128 | 120 | ||
129 | /* CQE/AE status codes */ | 121 | /* CQE/AE status codes */ |
@@ -289,6 +281,7 @@ struct t4_sq { | |||
289 | u16 size; | 281 | u16 size; |
290 | u16 cidx; | 282 | u16 cidx; |
291 | u16 pidx; | 283 | u16 pidx; |
284 | u16 wq_pidx; | ||
292 | }; | 285 | }; |
293 | 286 | ||
294 | struct t4_swrqe { | 287 | struct t4_swrqe { |
@@ -310,6 +303,7 @@ struct t4_rq { | |||
310 | u16 size; | 303 | u16 size; |
311 | u16 cidx; | 304 | u16 cidx; |
312 | u16 pidx; | 305 | u16 pidx; |
306 | u16 wq_pidx; | ||
313 | }; | 307 | }; |
314 | 308 | ||
315 | struct t4_wq { | 309 | struct t4_wq { |
@@ -340,11 +334,14 @@ static inline u32 t4_rq_avail(struct t4_wq *wq) | |||
340 | return wq->rq.size - 1 - wq->rq.in_use; | 334 | return wq->rq.size - 1 - wq->rq.in_use; |
341 | } | 335 | } |
342 | 336 | ||
343 | static inline void t4_rq_produce(struct t4_wq *wq) | 337 | static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) |
344 | { | 338 | { |
345 | wq->rq.in_use++; | 339 | wq->rq.in_use++; |
346 | if (++wq->rq.pidx == wq->rq.size) | 340 | if (++wq->rq.pidx == wq->rq.size) |
347 | wq->rq.pidx = 0; | 341 | wq->rq.pidx = 0; |
342 | wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); | ||
343 | if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS) | ||
344 | wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS; | ||
348 | } | 345 | } |
349 | 346 | ||
350 | static inline void t4_rq_consume(struct t4_wq *wq) | 347 | static inline void t4_rq_consume(struct t4_wq *wq) |
@@ -370,11 +367,14 @@ static inline u32 t4_sq_avail(struct t4_wq *wq) | |||
370 | return wq->sq.size - 1 - wq->sq.in_use; | 367 | return wq->sq.size - 1 - wq->sq.in_use; |
371 | } | 368 | } |
372 | 369 | ||
373 | static inline void t4_sq_produce(struct t4_wq *wq) | 370 | static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) |
374 | { | 371 | { |
375 | wq->sq.in_use++; | 372 | wq->sq.in_use++; |
376 | if (++wq->sq.pidx == wq->sq.size) | 373 | if (++wq->sq.pidx == wq->sq.size) |
377 | wq->sq.pidx = 0; | 374 | wq->sq.pidx = 0; |
375 | wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); | ||
376 | if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS) | ||
377 | wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS; | ||
378 | } | 378 | } |
379 | 379 | ||
380 | static inline void t4_sq_consume(struct t4_wq *wq) | 380 | static inline void t4_sq_consume(struct t4_wq *wq) |
@@ -386,14 +386,12 @@ static inline void t4_sq_consume(struct t4_wq *wq) | |||
386 | 386 | ||
387 | static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) | 387 | static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) |
388 | { | 388 | { |
389 | inc *= T4_SQ_NUM_SLOTS; | ||
390 | wmb(); | 389 | wmb(); |
391 | writel(QID(wq->sq.qid) | PIDX(inc), wq->db); | 390 | writel(QID(wq->sq.qid) | PIDX(inc), wq->db); |
392 | } | 391 | } |
393 | 392 | ||
394 | static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) | 393 | static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) |
395 | { | 394 | { |
396 | inc *= T4_RQ_NUM_SLOTS; | ||
397 | wmb(); | 395 | wmb(); |
398 | writel(QID(wq->rq.qid) | PIDX(inc), wq->db); | 396 | writel(QID(wq->rq.qid) | PIDX(inc), wq->db); |
399 | } | 397 | } |