aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2010-06-10 15:03:00 -0400
committerRoland Dreier <rolandd@cisco.com>2010-07-21 14:16:20 -0400
commitd37ac31ddc24c1a0beed134278bc074c98812210 (patch)
tree20b61b408fb31cd4b16d50c73d0445784a1255cd /drivers
parentd3c814e8b2a094dc3bcbe6a0d93ec4824b26e86a (diff)
RDMA/cxgb4: Support variable sized work requests
T4 EQ entries are in multiples of 64 bytes. Currently the RDMA SQ and RQ use fixed sized entries composed of 4 EQ entries for the SQ and 2 EQ entries for the RQ. For optimial latency with small IO, we need to change this so the HW only needs to DMA the EQ entries actually used by a given work request. Implementation: - add wq_pidx counter to track where we are in the EQ. cidx/pidx are used for the sw sq/rq tracking and flow control. - the variable part of work requests is the SGL. Add new functions to build the SGL and/or immediate data directly in the EQ memory wrapping when needed. - adjust the min burst size for the EQ contexts to 64B. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c220
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h32
2 files changed, 130 insertions, 122 deletions
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index b88b1af28c30..657a5b300b23 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -162,7 +162,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
162 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( 162 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
163 V_FW_RI_RES_WR_DCAEN(0) | 163 V_FW_RI_RES_WR_DCAEN(0) |
164 V_FW_RI_RES_WR_DCACPU(0) | 164 V_FW_RI_RES_WR_DCACPU(0) |
165 V_FW_RI_RES_WR_FBMIN(3) | 165 V_FW_RI_RES_WR_FBMIN(2) |
166 V_FW_RI_RES_WR_FBMAX(3) | 166 V_FW_RI_RES_WR_FBMAX(3) |
167 V_FW_RI_RES_WR_CIDXFTHRESHO(0) | 167 V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
168 V_FW_RI_RES_WR_CIDXFTHRESH(0) | 168 V_FW_RI_RES_WR_CIDXFTHRESH(0) |
@@ -185,7 +185,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
185 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( 185 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
186 V_FW_RI_RES_WR_DCAEN(0) | 186 V_FW_RI_RES_WR_DCAEN(0) |
187 V_FW_RI_RES_WR_DCACPU(0) | 187 V_FW_RI_RES_WR_DCACPU(0) |
188 V_FW_RI_RES_WR_FBMIN(3) | 188 V_FW_RI_RES_WR_FBMIN(2) |
189 V_FW_RI_RES_WR_FBMAX(3) | 189 V_FW_RI_RES_WR_FBMAX(3) |
190 V_FW_RI_RES_WR_CIDXFTHRESHO(0) | 190 V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
191 V_FW_RI_RES_WR_CIDXFTHRESH(0) | 191 V_FW_RI_RES_WR_CIDXFTHRESH(0) |
@@ -235,12 +235,78 @@ err1:
235 return -ENOMEM; 235 return -ENOMEM;
236} 236}
237 237
238static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) 238static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
239 struct ib_send_wr *wr, int max, u32 *plenp)
239{ 240{
241 u8 *dstp, *srcp;
242 u32 plen = 0;
240 int i; 243 int i;
244 int rem, len;
245
246 dstp = (u8 *)immdp->data;
247 for (i = 0; i < wr->num_sge; i++) {
248 if ((plen + wr->sg_list[i].length) > max)
249 return -EMSGSIZE;
250 srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
251 plen += wr->sg_list[i].length;
252 rem = wr->sg_list[i].length;
253 while (rem) {
254 if (dstp == (u8 *)&sq->queue[sq->size])
255 dstp = (u8 *)sq->queue;
256 if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
257 len = rem;
258 else
259 len = (u8 *)&sq->queue[sq->size] - dstp;
260 memcpy(dstp, srcp, len);
261 dstp += len;
262 srcp += len;
263 rem -= len;
264 }
265 }
266 immdp->op = FW_RI_DATA_IMMD;
267 immdp->r1 = 0;
268 immdp->r2 = 0;
269 immdp->immdlen = cpu_to_be32(plen);
270 *plenp = plen;
271 return 0;
272}
273
274static int build_isgl(__be64 *queue_start, __be64 *queue_end,
275 struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
276 int num_sge, u32 *plenp)
277
278{
279 int i;
280 u32 plen = 0;
281 __be64 *flitp = (__be64 *)isglp->sge;
282
283 for (i = 0; i < num_sge; i++) {
284 if ((plen + sg_list[i].length) < plen)
285 return -EMSGSIZE;
286 plen += sg_list[i].length;
287 *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
288 sg_list[i].length);
289 if (++flitp == queue_end)
290 flitp = queue_start;
291 *flitp = cpu_to_be64(sg_list[i].addr);
292 if (++flitp == queue_end)
293 flitp = queue_start;
294 }
295 isglp->op = FW_RI_DATA_ISGL;
296 isglp->r1 = 0;
297 isglp->nsge = cpu_to_be16(num_sge);
298 isglp->r2 = 0;
299 if (plenp)
300 *plenp = plen;
301 return 0;
302}
303
304static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
305 struct ib_send_wr *wr, u8 *len16)
306{
241 u32 plen; 307 u32 plen;
242 int size; 308 int size;
243 u8 *datap; 309 int ret;
244 310
245 if (wr->num_sge > T4_MAX_SEND_SGE) 311 if (wr->num_sge > T4_MAX_SEND_SGE)
246 return -EINVAL; 312 return -EINVAL;
@@ -267,43 +333,23 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
267 default: 333 default:
268 return -EINVAL; 334 return -EINVAL;
269 } 335 }
336
270 plen = 0; 337 plen = 0;
271 if (wr->num_sge) { 338 if (wr->num_sge) {
272 if (wr->send_flags & IB_SEND_INLINE) { 339 if (wr->send_flags & IB_SEND_INLINE) {
273 datap = (u8 *)wqe->send.u.immd_src[0].data; 340 ret = build_immd(sq, wqe->send.u.immd_src, wr,
274 for (i = 0; i < wr->num_sge; i++) { 341 T4_MAX_SEND_INLINE, &plen);
275 if ((plen + wr->sg_list[i].length) > 342 if (ret)
276 T4_MAX_SEND_INLINE) { 343 return ret;
277 return -EMSGSIZE;
278 }
279 plen += wr->sg_list[i].length;
280 memcpy(datap,
281 (void *)(unsigned long)wr->sg_list[i].addr,
282 wr->sg_list[i].length);
283 datap += wr->sg_list[i].length;
284 }
285 wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
286 wqe->send.u.immd_src[0].r1 = 0;
287 wqe->send.u.immd_src[0].r2 = 0;
288 wqe->send.u.immd_src[0].immdlen = cpu_to_be32(plen);
289 size = sizeof wqe->send + sizeof(struct fw_ri_immd) + 344 size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
290 plen; 345 plen;
291 } else { 346 } else {
292 for (i = 0; i < wr->num_sge; i++) { 347 ret = build_isgl((__be64 *)sq->queue,
293 if ((plen + wr->sg_list[i].length) < plen) 348 (__be64 *)&sq->queue[sq->size],
294 return -EMSGSIZE; 349 wqe->send.u.isgl_src,
295 plen += wr->sg_list[i].length; 350 wr->sg_list, wr->num_sge, &plen);
296 wqe->send.u.isgl_src[0].sge[i].stag = 351 if (ret)
297 cpu_to_be32(wr->sg_list[i].lkey); 352 return ret;
298 wqe->send.u.isgl_src[0].sge[i].len =
299 cpu_to_be32(wr->sg_list[i].length);
300 wqe->send.u.isgl_src[0].sge[i].to =
301 cpu_to_be64(wr->sg_list[i].addr);
302 }
303 wqe->send.u.isgl_src[0].op = FW_RI_DATA_ISGL;
304 wqe->send.u.isgl_src[0].r1 = 0;
305 wqe->send.u.isgl_src[0].nsge = cpu_to_be16(wr->num_sge);
306 wqe->send.u.isgl_src[0].r2 = 0;
307 size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + 353 size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
308 wr->num_sge * sizeof(struct fw_ri_sge); 354 wr->num_sge * sizeof(struct fw_ri_sge);
309 } 355 }
@@ -313,62 +359,40 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
313 wqe->send.u.immd_src[0].r2 = 0; 359 wqe->send.u.immd_src[0].r2 = 0;
314 wqe->send.u.immd_src[0].immdlen = 0; 360 wqe->send.u.immd_src[0].immdlen = 0;
315 size = sizeof wqe->send + sizeof(struct fw_ri_immd); 361 size = sizeof wqe->send + sizeof(struct fw_ri_immd);
362 plen = 0;
316 } 363 }
317 *len16 = DIV_ROUND_UP(size, 16); 364 *len16 = DIV_ROUND_UP(size, 16);
318 wqe->send.plen = cpu_to_be32(plen); 365 wqe->send.plen = cpu_to_be32(plen);
319 return 0; 366 return 0;
320} 367}
321 368
322static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) 369static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
370 struct ib_send_wr *wr, u8 *len16)
323{ 371{
324 int i;
325 u32 plen; 372 u32 plen;
326 int size; 373 int size;
327 u8 *datap; 374 int ret;
328 375
329 if (wr->num_sge > T4_MAX_WRITE_SGE) 376 if (wr->num_sge > T4_MAX_SEND_SGE)
330 return -EINVAL; 377 return -EINVAL;
331 wqe->write.r2 = 0; 378 wqe->write.r2 = 0;
332 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); 379 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
333 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); 380 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
334 plen = 0;
335 if (wr->num_sge) { 381 if (wr->num_sge) {
336 if (wr->send_flags & IB_SEND_INLINE) { 382 if (wr->send_flags & IB_SEND_INLINE) {
337 datap = (u8 *)wqe->write.u.immd_src[0].data; 383 ret = build_immd(sq, wqe->write.u.immd_src, wr,
338 for (i = 0; i < wr->num_sge; i++) { 384 T4_MAX_WRITE_INLINE, &plen);
339 if ((plen + wr->sg_list[i].length) > 385 if (ret)
340 T4_MAX_WRITE_INLINE) { 386 return ret;
341 return -EMSGSIZE;
342 }
343 plen += wr->sg_list[i].length;
344 memcpy(datap,
345 (void *)(unsigned long)wr->sg_list[i].addr,
346 wr->sg_list[i].length);
347 datap += wr->sg_list[i].length;
348 }
349 wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
350 wqe->write.u.immd_src[0].r1 = 0;
351 wqe->write.u.immd_src[0].r2 = 0;
352 wqe->write.u.immd_src[0].immdlen = cpu_to_be32(plen);
353 size = sizeof wqe->write + sizeof(struct fw_ri_immd) + 387 size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
354 plen; 388 plen;
355 } else { 389 } else {
356 for (i = 0; i < wr->num_sge; i++) { 390 ret = build_isgl((__be64 *)sq->queue,
357 if ((plen + wr->sg_list[i].length) < plen) 391 (__be64 *)&sq->queue[sq->size],
358 return -EMSGSIZE; 392 wqe->write.u.isgl_src,
359 plen += wr->sg_list[i].length; 393 wr->sg_list, wr->num_sge, &plen);
360 wqe->write.u.isgl_src[0].sge[i].stag = 394 if (ret)
361 cpu_to_be32(wr->sg_list[i].lkey); 395 return ret;
362 wqe->write.u.isgl_src[0].sge[i].len =
363 cpu_to_be32(wr->sg_list[i].length);
364 wqe->write.u.isgl_src[0].sge[i].to =
365 cpu_to_be64(wr->sg_list[i].addr);
366 }
367 wqe->write.u.isgl_src[0].op = FW_RI_DATA_ISGL;
368 wqe->write.u.isgl_src[0].r1 = 0;
369 wqe->write.u.isgl_src[0].nsge =
370 cpu_to_be16(wr->num_sge);
371 wqe->write.u.isgl_src[0].r2 = 0;
372 size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + 396 size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
373 wr->num_sge * sizeof(struct fw_ri_sge); 397 wr->num_sge * sizeof(struct fw_ri_sge);
374 } 398 }
@@ -378,6 +402,7 @@ static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
378 wqe->write.u.immd_src[0].r2 = 0; 402 wqe->write.u.immd_src[0].r2 = 0;
379 wqe->write.u.immd_src[0].immdlen = 0; 403 wqe->write.u.immd_src[0].immdlen = 0;
380 size = sizeof wqe->write + sizeof(struct fw_ri_immd); 404 size = sizeof wqe->write + sizeof(struct fw_ri_immd);
405 plen = 0;
381 } 406 }
382 *len16 = DIV_ROUND_UP(size, 16); 407 *len16 = DIV_ROUND_UP(size, 16);
383 wqe->write.plen = cpu_to_be32(plen); 408 wqe->write.plen = cpu_to_be32(plen);
@@ -416,29 +441,13 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
416static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, 441static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
417 struct ib_recv_wr *wr, u8 *len16) 442 struct ib_recv_wr *wr, u8 *len16)
418{ 443{
419 int i; 444 int ret;
420 int plen = 0;
421 445
422 for (i = 0; i < wr->num_sge; i++) { 446 ret = build_isgl((__be64 *)qhp->wq.rq.queue,
423 if ((plen + wr->sg_list[i].length) < plen) 447 (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
424 return -EMSGSIZE; 448 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
425 plen += wr->sg_list[i].length; 449 if (ret)
426 wqe->recv.isgl.sge[i].stag = 450 return ret;
427 cpu_to_be32(wr->sg_list[i].lkey);
428 wqe->recv.isgl.sge[i].len =
429 cpu_to_be32(wr->sg_list[i].length);
430 wqe->recv.isgl.sge[i].to =
431 cpu_to_be64(wr->sg_list[i].addr);
432 }
433 for (; i < T4_MAX_RECV_SGE; i++) {
434 wqe->recv.isgl.sge[i].stag = 0;
435 wqe->recv.isgl.sge[i].len = 0;
436 wqe->recv.isgl.sge[i].to = 0;
437 }
438 wqe->recv.isgl.op = FW_RI_DATA_ISGL;
439 wqe->recv.isgl.r1 = 0;
440 wqe->recv.isgl.nsge = cpu_to_be16(wr->num_sge);
441 wqe->recv.isgl.r2 = 0;
442 *len16 = DIV_ROUND_UP(sizeof wqe->recv + 451 *len16 = DIV_ROUND_UP(sizeof wqe->recv +
443 wr->num_sge * sizeof(struct fw_ri_sge), 16); 452 wr->num_sge * sizeof(struct fw_ri_sge), 16);
444 return 0; 453 return 0;
@@ -547,7 +556,9 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
547 *bad_wr = wr; 556 *bad_wr = wr;
548 break; 557 break;
549 } 558 }
550 wqe = &qhp->wq.sq.queue[qhp->wq.sq.pidx]; 559 wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
560 qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
561
551 fw_flags = 0; 562 fw_flags = 0;
552 if (wr->send_flags & IB_SEND_SOLICITED) 563 if (wr->send_flags & IB_SEND_SOLICITED)
553 fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; 564 fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
@@ -564,12 +575,12 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
564 swsqe->opcode = FW_RI_SEND; 575 swsqe->opcode = FW_RI_SEND;
565 else 576 else
566 swsqe->opcode = FW_RI_SEND_WITH_INV; 577 swsqe->opcode = FW_RI_SEND_WITH_INV;
567 err = build_rdma_send(wqe, wr, &len16); 578 err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
568 break; 579 break;
569 case IB_WR_RDMA_WRITE: 580 case IB_WR_RDMA_WRITE:
570 fw_opcode = FW_RI_RDMA_WRITE_WR; 581 fw_opcode = FW_RI_RDMA_WRITE_WR;
571 swsqe->opcode = FW_RI_RDMA_WRITE; 582 swsqe->opcode = FW_RI_RDMA_WRITE;
572 err = build_rdma_write(wqe, wr, &len16); 583 err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
573 break; 584 break;
574 case IB_WR_RDMA_READ: 585 case IB_WR_RDMA_READ:
575 case IB_WR_RDMA_READ_WITH_INV: 586 case IB_WR_RDMA_READ_WITH_INV:
@@ -619,8 +630,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
619 swsqe->opcode, swsqe->read_len); 630 swsqe->opcode, swsqe->read_len);
620 wr = wr->next; 631 wr = wr->next;
621 num_wrs--; 632 num_wrs--;
622 t4_sq_produce(&qhp->wq); 633 t4_sq_produce(&qhp->wq, len16);
623 idx++; 634 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
624 } 635 }
625 if (t4_wq_db_enabled(&qhp->wq)) 636 if (t4_wq_db_enabled(&qhp->wq))
626 t4_ring_sq_db(&qhp->wq, idx); 637 t4_ring_sq_db(&qhp->wq, idx);
@@ -656,7 +667,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
656 *bad_wr = wr; 667 *bad_wr = wr;
657 break; 668 break;
658 } 669 }
659 wqe = &qhp->wq.rq.queue[qhp->wq.rq.pidx]; 670 wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
671 qhp->wq.rq.wq_pidx *
672 T4_EQ_ENTRY_SIZE);
660 if (num_wrs) 673 if (num_wrs)
661 err = build_rdma_recv(qhp, wqe, wr, &len16); 674 err = build_rdma_recv(qhp, wqe, wr, &len16);
662 else 675 else
@@ -675,15 +688,12 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
675 wqe->recv.r2[1] = 0; 688 wqe->recv.r2[1] = 0;
676 wqe->recv.r2[2] = 0; 689 wqe->recv.r2[2] = 0;
677 wqe->recv.len16 = len16; 690 wqe->recv.len16 = len16;
678 if (len16 < 5)
679 wqe->flits[8] = 0;
680
681 PDBG("%s cookie 0x%llx pidx %u\n", __func__, 691 PDBG("%s cookie 0x%llx pidx %u\n", __func__,
682 (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); 692 (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
683 t4_rq_produce(&qhp->wq); 693 t4_rq_produce(&qhp->wq, len16);
694 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
684 wr = wr->next; 695 wr = wr->next;
685 num_wrs--; 696 num_wrs--;
686 idx++;
687 } 697 }
688 if (t4_wq_db_enabled(&qhp->wq)) 698 if (t4_wq_db_enabled(&qhp->wq))
689 t4_ring_rq_db(&qhp->wq, idx); 699 t4_ring_rq_db(&qhp->wq, idx);
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 9cf8d85bfcff..aef55f42bea4 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -65,10 +65,10 @@ struct t4_status_page {
65 u8 db_off; 65 u8 db_off;
66}; 66};
67 67
68#define T4_EQ_SIZE 64 68#define T4_EQ_ENTRY_SIZE 64
69 69
70#define T4_SQ_NUM_SLOTS 4 70#define T4_SQ_NUM_SLOTS 4
71#define T4_SQ_NUM_BYTES (T4_EQ_SIZE * T4_SQ_NUM_SLOTS) 71#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS)
72#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ 72#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
73 sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) 73 sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
74#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ 74#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
@@ -84,7 +84,7 @@ struct t4_status_page {
84#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) 84#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
85 85
86#define T4_RQ_NUM_SLOTS 2 86#define T4_RQ_NUM_SLOTS 2
87#define T4_RQ_NUM_BYTES (T4_EQ_SIZE * T4_RQ_NUM_SLOTS) 87#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
88#define T4_MAX_RECV_SGE 4 88#define T4_MAX_RECV_SGE 4
89 89
90union t4_wr { 90union t4_wr {
@@ -97,20 +97,18 @@ union t4_wr {
97 struct fw_ri_fr_nsmr_wr fr; 97 struct fw_ri_fr_nsmr_wr fr;
98 struct fw_ri_inv_lstag_wr inv; 98 struct fw_ri_inv_lstag_wr inv;
99 struct t4_status_page status; 99 struct t4_status_page status;
100 __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; 100 __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS];
101}; 101};
102 102
103union t4_recv_wr { 103union t4_recv_wr {
104 struct fw_ri_recv_wr recv; 104 struct fw_ri_recv_wr recv;
105 struct t4_status_page status; 105 struct t4_status_page status;
106 __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; 106 __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS];
107}; 107};
108 108
109static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, 109static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid,
110 enum fw_wr_opcodes opcode, u8 flags, u8 len16) 110 enum fw_wr_opcodes opcode, u8 flags, u8 len16)
111{ 111{
112 int slots_used;
113
114 wqe->send.opcode = (u8)opcode; 112 wqe->send.opcode = (u8)opcode;
115 wqe->send.flags = flags; 113 wqe->send.flags = flags;
116 wqe->send.wrid = wrid; 114 wqe->send.wrid = wrid;
@@ -118,12 +116,6 @@ static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid,
118 wqe->send.r1[1] = 0; 116 wqe->send.r1[1] = 0;
119 wqe->send.r1[2] = 0; 117 wqe->send.r1[2] = 0;
120 wqe->send.len16 = len16; 118 wqe->send.len16 = len16;
121
122 slots_used = DIV_ROUND_UP(len16*16, T4_EQ_SIZE);
123 while (slots_used < T4_SQ_NUM_SLOTS) {
124 wqe->flits[slots_used * T4_EQ_SIZE / sizeof(__be64)] = 0;
125 slots_used++;
126 }
127} 119}
128 120
129/* CQE/AE status codes */ 121/* CQE/AE status codes */
@@ -289,6 +281,7 @@ struct t4_sq {
289 u16 size; 281 u16 size;
290 u16 cidx; 282 u16 cidx;
291 u16 pidx; 283 u16 pidx;
284 u16 wq_pidx;
292}; 285};
293 286
294struct t4_swrqe { 287struct t4_swrqe {
@@ -310,6 +303,7 @@ struct t4_rq {
310 u16 size; 303 u16 size;
311 u16 cidx; 304 u16 cidx;
312 u16 pidx; 305 u16 pidx;
306 u16 wq_pidx;
313}; 307};
314 308
315struct t4_wq { 309struct t4_wq {
@@ -340,11 +334,14 @@ static inline u32 t4_rq_avail(struct t4_wq *wq)
340 return wq->rq.size - 1 - wq->rq.in_use; 334 return wq->rq.size - 1 - wq->rq.in_use;
341} 335}
342 336
343static inline void t4_rq_produce(struct t4_wq *wq) 337static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
344{ 338{
345 wq->rq.in_use++; 339 wq->rq.in_use++;
346 if (++wq->rq.pidx == wq->rq.size) 340 if (++wq->rq.pidx == wq->rq.size)
347 wq->rq.pidx = 0; 341 wq->rq.pidx = 0;
342 wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
343 if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS)
344 wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS;
348} 345}
349 346
350static inline void t4_rq_consume(struct t4_wq *wq) 347static inline void t4_rq_consume(struct t4_wq *wq)
@@ -370,11 +367,14 @@ static inline u32 t4_sq_avail(struct t4_wq *wq)
370 return wq->sq.size - 1 - wq->sq.in_use; 367 return wq->sq.size - 1 - wq->sq.in_use;
371} 368}
372 369
373static inline void t4_sq_produce(struct t4_wq *wq) 370static inline void t4_sq_produce(struct t4_wq *wq, u8 len16)
374{ 371{
375 wq->sq.in_use++; 372 wq->sq.in_use++;
376 if (++wq->sq.pidx == wq->sq.size) 373 if (++wq->sq.pidx == wq->sq.size)
377 wq->sq.pidx = 0; 374 wq->sq.pidx = 0;
375 wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
376 if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS)
377 wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS;
378} 378}
379 379
380static inline void t4_sq_consume(struct t4_wq *wq) 380static inline void t4_sq_consume(struct t4_wq *wq)
@@ -386,14 +386,12 @@ static inline void t4_sq_consume(struct t4_wq *wq)
386 386
387static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) 387static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
388{ 388{
389 inc *= T4_SQ_NUM_SLOTS;
390 wmb(); 389 wmb();
391 writel(QID(wq->sq.qid) | PIDX(inc), wq->db); 390 writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
392} 391}
393 392
394static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) 393static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc)
395{ 394{
396 inc *= T4_RQ_NUM_SLOTS;
397 wmb(); 395 wmb();
398 writel(QID(wq->rq.qid) | PIDX(inc), wq->db); 396 writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
399} 397}