aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/cxgb4/qp.c
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2010-06-10 15:03:00 -0400
committerRoland Dreier <rolandd@cisco.com>2010-07-21 14:16:20 -0400
commitd37ac31ddc24c1a0beed134278bc074c98812210 (patch)
tree20b61b408fb31cd4b16d50c73d0445784a1255cd /drivers/infiniband/hw/cxgb4/qp.c
parentd3c814e8b2a094dc3bcbe6a0d93ec4824b26e86a (diff)
RDMA/cxgb4: Support variable sized work requests
T4 EQ entries are in multiples of 64 bytes. Currently the RDMA SQ and RQ use fixed sized entries composed of 4 EQ entries for the SQ and 2 EQ entries for the RQ. For optimial latency with small IO, we need to change this so the HW only needs to DMA the EQ entries actually used by a given work request. Implementation: - add wq_pidx counter to track where we are in the EQ. cidx/pidx are used for the sw sq/rq tracking and flow control. - the variable part of work requests is the SGL. Add new functions to build the SGL and/or immediate data directly in the EQ memory wrapping when needed. - adjust the min burst size for the EQ contexts to 64B. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/qp.c')
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c220
1 files changed, 115 insertions, 105 deletions
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index b88b1af28c30..657a5b300b23 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -162,7 +162,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
162 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( 162 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
163 V_FW_RI_RES_WR_DCAEN(0) | 163 V_FW_RI_RES_WR_DCAEN(0) |
164 V_FW_RI_RES_WR_DCACPU(0) | 164 V_FW_RI_RES_WR_DCACPU(0) |
165 V_FW_RI_RES_WR_FBMIN(3) | 165 V_FW_RI_RES_WR_FBMIN(2) |
166 V_FW_RI_RES_WR_FBMAX(3) | 166 V_FW_RI_RES_WR_FBMAX(3) |
167 V_FW_RI_RES_WR_CIDXFTHRESHO(0) | 167 V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
168 V_FW_RI_RES_WR_CIDXFTHRESH(0) | 168 V_FW_RI_RES_WR_CIDXFTHRESH(0) |
@@ -185,7 +185,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
185 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( 185 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
186 V_FW_RI_RES_WR_DCAEN(0) | 186 V_FW_RI_RES_WR_DCAEN(0) |
187 V_FW_RI_RES_WR_DCACPU(0) | 187 V_FW_RI_RES_WR_DCACPU(0) |
188 V_FW_RI_RES_WR_FBMIN(3) | 188 V_FW_RI_RES_WR_FBMIN(2) |
189 V_FW_RI_RES_WR_FBMAX(3) | 189 V_FW_RI_RES_WR_FBMAX(3) |
190 V_FW_RI_RES_WR_CIDXFTHRESHO(0) | 190 V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
191 V_FW_RI_RES_WR_CIDXFTHRESH(0) | 191 V_FW_RI_RES_WR_CIDXFTHRESH(0) |
@@ -235,12 +235,78 @@ err1:
235 return -ENOMEM; 235 return -ENOMEM;
236} 236}
237 237
238static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) 238static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
239 struct ib_send_wr *wr, int max, u32 *plenp)
239{ 240{
241 u8 *dstp, *srcp;
242 u32 plen = 0;
240 int i; 243 int i;
244 int rem, len;
245
246 dstp = (u8 *)immdp->data;
247 for (i = 0; i < wr->num_sge; i++) {
248 if ((plen + wr->sg_list[i].length) > max)
249 return -EMSGSIZE;
250 srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
251 plen += wr->sg_list[i].length;
252 rem = wr->sg_list[i].length;
253 while (rem) {
254 if (dstp == (u8 *)&sq->queue[sq->size])
255 dstp = (u8 *)sq->queue;
256 if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
257 len = rem;
258 else
259 len = (u8 *)&sq->queue[sq->size] - dstp;
260 memcpy(dstp, srcp, len);
261 dstp += len;
262 srcp += len;
263 rem -= len;
264 }
265 }
266 immdp->op = FW_RI_DATA_IMMD;
267 immdp->r1 = 0;
268 immdp->r2 = 0;
269 immdp->immdlen = cpu_to_be32(plen);
270 *plenp = plen;
271 return 0;
272}
273
274static int build_isgl(__be64 *queue_start, __be64 *queue_end,
275 struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
276 int num_sge, u32 *plenp)
277
278{
279 int i;
280 u32 plen = 0;
281 __be64 *flitp = (__be64 *)isglp->sge;
282
283 for (i = 0; i < num_sge; i++) {
284 if ((plen + sg_list[i].length) < plen)
285 return -EMSGSIZE;
286 plen += sg_list[i].length;
287 *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
288 sg_list[i].length);
289 if (++flitp == queue_end)
290 flitp = queue_start;
291 *flitp = cpu_to_be64(sg_list[i].addr);
292 if (++flitp == queue_end)
293 flitp = queue_start;
294 }
295 isglp->op = FW_RI_DATA_ISGL;
296 isglp->r1 = 0;
297 isglp->nsge = cpu_to_be16(num_sge);
298 isglp->r2 = 0;
299 if (plenp)
300 *plenp = plen;
301 return 0;
302}
303
304static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
305 struct ib_send_wr *wr, u8 *len16)
306{
241 u32 plen; 307 u32 plen;
242 int size; 308 int size;
243 u8 *datap; 309 int ret;
244 310
245 if (wr->num_sge > T4_MAX_SEND_SGE) 311 if (wr->num_sge > T4_MAX_SEND_SGE)
246 return -EINVAL; 312 return -EINVAL;
@@ -267,43 +333,23 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
267 default: 333 default:
268 return -EINVAL; 334 return -EINVAL;
269 } 335 }
336
270 plen = 0; 337 plen = 0;
271 if (wr->num_sge) { 338 if (wr->num_sge) {
272 if (wr->send_flags & IB_SEND_INLINE) { 339 if (wr->send_flags & IB_SEND_INLINE) {
273 datap = (u8 *)wqe->send.u.immd_src[0].data; 340 ret = build_immd(sq, wqe->send.u.immd_src, wr,
274 for (i = 0; i < wr->num_sge; i++) { 341 T4_MAX_SEND_INLINE, &plen);
275 if ((plen + wr->sg_list[i].length) > 342 if (ret)
276 T4_MAX_SEND_INLINE) { 343 return ret;
277 return -EMSGSIZE;
278 }
279 plen += wr->sg_list[i].length;
280 memcpy(datap,
281 (void *)(unsigned long)wr->sg_list[i].addr,
282 wr->sg_list[i].length);
283 datap += wr->sg_list[i].length;
284 }
285 wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
286 wqe->send.u.immd_src[0].r1 = 0;
287 wqe->send.u.immd_src[0].r2 = 0;
288 wqe->send.u.immd_src[0].immdlen = cpu_to_be32(plen);
289 size = sizeof wqe->send + sizeof(struct fw_ri_immd) + 344 size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
290 plen; 345 plen;
291 } else { 346 } else {
292 for (i = 0; i < wr->num_sge; i++) { 347 ret = build_isgl((__be64 *)sq->queue,
293 if ((plen + wr->sg_list[i].length) < plen) 348 (__be64 *)&sq->queue[sq->size],
294 return -EMSGSIZE; 349 wqe->send.u.isgl_src,
295 plen += wr->sg_list[i].length; 350 wr->sg_list, wr->num_sge, &plen);
296 wqe->send.u.isgl_src[0].sge[i].stag = 351 if (ret)
297 cpu_to_be32(wr->sg_list[i].lkey); 352 return ret;
298 wqe->send.u.isgl_src[0].sge[i].len =
299 cpu_to_be32(wr->sg_list[i].length);
300 wqe->send.u.isgl_src[0].sge[i].to =
301 cpu_to_be64(wr->sg_list[i].addr);
302 }
303 wqe->send.u.isgl_src[0].op = FW_RI_DATA_ISGL;
304 wqe->send.u.isgl_src[0].r1 = 0;
305 wqe->send.u.isgl_src[0].nsge = cpu_to_be16(wr->num_sge);
306 wqe->send.u.isgl_src[0].r2 = 0;
307 size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + 353 size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
308 wr->num_sge * sizeof(struct fw_ri_sge); 354 wr->num_sge * sizeof(struct fw_ri_sge);
309 } 355 }
@@ -313,62 +359,40 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
313 wqe->send.u.immd_src[0].r2 = 0; 359 wqe->send.u.immd_src[0].r2 = 0;
314 wqe->send.u.immd_src[0].immdlen = 0; 360 wqe->send.u.immd_src[0].immdlen = 0;
315 size = sizeof wqe->send + sizeof(struct fw_ri_immd); 361 size = sizeof wqe->send + sizeof(struct fw_ri_immd);
362 plen = 0;
316 } 363 }
317 *len16 = DIV_ROUND_UP(size, 16); 364 *len16 = DIV_ROUND_UP(size, 16);
318 wqe->send.plen = cpu_to_be32(plen); 365 wqe->send.plen = cpu_to_be32(plen);
319 return 0; 366 return 0;
320} 367}
321 368
322static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) 369static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
370 struct ib_send_wr *wr, u8 *len16)
323{ 371{
324 int i;
325 u32 plen; 372 u32 plen;
326 int size; 373 int size;
327 u8 *datap; 374 int ret;
328 375
329 if (wr->num_sge > T4_MAX_WRITE_SGE) 376 if (wr->num_sge > T4_MAX_SEND_SGE)
330 return -EINVAL; 377 return -EINVAL;
331 wqe->write.r2 = 0; 378 wqe->write.r2 = 0;
332 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); 379 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
333 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); 380 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
334 plen = 0;
335 if (wr->num_sge) { 381 if (wr->num_sge) {
336 if (wr->send_flags & IB_SEND_INLINE) { 382 if (wr->send_flags & IB_SEND_INLINE) {
337 datap = (u8 *)wqe->write.u.immd_src[0].data; 383 ret = build_immd(sq, wqe->write.u.immd_src, wr,
338 for (i = 0; i < wr->num_sge; i++) { 384 T4_MAX_WRITE_INLINE, &plen);
339 if ((plen + wr->sg_list[i].length) > 385 if (ret)
340 T4_MAX_WRITE_INLINE) { 386 return ret;
341 return -EMSGSIZE;
342 }
343 plen += wr->sg_list[i].length;
344 memcpy(datap,
345 (void *)(unsigned long)wr->sg_list[i].addr,
346 wr->sg_list[i].length);
347 datap += wr->sg_list[i].length;
348 }
349 wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
350 wqe->write.u.immd_src[0].r1 = 0;
351 wqe->write.u.immd_src[0].r2 = 0;
352 wqe->write.u.immd_src[0].immdlen = cpu_to_be32(plen);
353 size = sizeof wqe->write + sizeof(struct fw_ri_immd) + 387 size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
354 plen; 388 plen;
355 } else { 389 } else {
356 for (i = 0; i < wr->num_sge; i++) { 390 ret = build_isgl((__be64 *)sq->queue,
357 if ((plen + wr->sg_list[i].length) < plen) 391 (__be64 *)&sq->queue[sq->size],
358 return -EMSGSIZE; 392 wqe->write.u.isgl_src,
359 plen += wr->sg_list[i].length; 393 wr->sg_list, wr->num_sge, &plen);
360 wqe->write.u.isgl_src[0].sge[i].stag = 394 if (ret)
361 cpu_to_be32(wr->sg_list[i].lkey); 395 return ret;
362 wqe->write.u.isgl_src[0].sge[i].len =
363 cpu_to_be32(wr->sg_list[i].length);
364 wqe->write.u.isgl_src[0].sge[i].to =
365 cpu_to_be64(wr->sg_list[i].addr);
366 }
367 wqe->write.u.isgl_src[0].op = FW_RI_DATA_ISGL;
368 wqe->write.u.isgl_src[0].r1 = 0;
369 wqe->write.u.isgl_src[0].nsge =
370 cpu_to_be16(wr->num_sge);
371 wqe->write.u.isgl_src[0].r2 = 0;
372 size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + 396 size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
373 wr->num_sge * sizeof(struct fw_ri_sge); 397 wr->num_sge * sizeof(struct fw_ri_sge);
374 } 398 }
@@ -378,6 +402,7 @@ static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
378 wqe->write.u.immd_src[0].r2 = 0; 402 wqe->write.u.immd_src[0].r2 = 0;
379 wqe->write.u.immd_src[0].immdlen = 0; 403 wqe->write.u.immd_src[0].immdlen = 0;
380 size = sizeof wqe->write + sizeof(struct fw_ri_immd); 404 size = sizeof wqe->write + sizeof(struct fw_ri_immd);
405 plen = 0;
381 } 406 }
382 *len16 = DIV_ROUND_UP(size, 16); 407 *len16 = DIV_ROUND_UP(size, 16);
383 wqe->write.plen = cpu_to_be32(plen); 408 wqe->write.plen = cpu_to_be32(plen);
@@ -416,29 +441,13 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
416static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, 441static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
417 struct ib_recv_wr *wr, u8 *len16) 442 struct ib_recv_wr *wr, u8 *len16)
418{ 443{
419 int i; 444 int ret;
420 int plen = 0;
421 445
422 for (i = 0; i < wr->num_sge; i++) { 446 ret = build_isgl((__be64 *)qhp->wq.rq.queue,
423 if ((plen + wr->sg_list[i].length) < plen) 447 (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
424 return -EMSGSIZE; 448 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
425 plen += wr->sg_list[i].length; 449 if (ret)
426 wqe->recv.isgl.sge[i].stag = 450 return ret;
427 cpu_to_be32(wr->sg_list[i].lkey);
428 wqe->recv.isgl.sge[i].len =
429 cpu_to_be32(wr->sg_list[i].length);
430 wqe->recv.isgl.sge[i].to =
431 cpu_to_be64(wr->sg_list[i].addr);
432 }
433 for (; i < T4_MAX_RECV_SGE; i++) {
434 wqe->recv.isgl.sge[i].stag = 0;
435 wqe->recv.isgl.sge[i].len = 0;
436 wqe->recv.isgl.sge[i].to = 0;
437 }
438 wqe->recv.isgl.op = FW_RI_DATA_ISGL;
439 wqe->recv.isgl.r1 = 0;
440 wqe->recv.isgl.nsge = cpu_to_be16(wr->num_sge);
441 wqe->recv.isgl.r2 = 0;
442 *len16 = DIV_ROUND_UP(sizeof wqe->recv + 451 *len16 = DIV_ROUND_UP(sizeof wqe->recv +
443 wr->num_sge * sizeof(struct fw_ri_sge), 16); 452 wr->num_sge * sizeof(struct fw_ri_sge), 16);
444 return 0; 453 return 0;
@@ -547,7 +556,9 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
547 *bad_wr = wr; 556 *bad_wr = wr;
548 break; 557 break;
549 } 558 }
550 wqe = &qhp->wq.sq.queue[qhp->wq.sq.pidx]; 559 wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
560 qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
561
551 fw_flags = 0; 562 fw_flags = 0;
552 if (wr->send_flags & IB_SEND_SOLICITED) 563 if (wr->send_flags & IB_SEND_SOLICITED)
553 fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; 564 fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
@@ -564,12 +575,12 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
564 swsqe->opcode = FW_RI_SEND; 575 swsqe->opcode = FW_RI_SEND;
565 else 576 else
566 swsqe->opcode = FW_RI_SEND_WITH_INV; 577 swsqe->opcode = FW_RI_SEND_WITH_INV;
567 err = build_rdma_send(wqe, wr, &len16); 578 err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
568 break; 579 break;
569 case IB_WR_RDMA_WRITE: 580 case IB_WR_RDMA_WRITE:
570 fw_opcode = FW_RI_RDMA_WRITE_WR; 581 fw_opcode = FW_RI_RDMA_WRITE_WR;
571 swsqe->opcode = FW_RI_RDMA_WRITE; 582 swsqe->opcode = FW_RI_RDMA_WRITE;
572 err = build_rdma_write(wqe, wr, &len16); 583 err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
573 break; 584 break;
574 case IB_WR_RDMA_READ: 585 case IB_WR_RDMA_READ:
575 case IB_WR_RDMA_READ_WITH_INV: 586 case IB_WR_RDMA_READ_WITH_INV:
@@ -619,8 +630,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
619 swsqe->opcode, swsqe->read_len); 630 swsqe->opcode, swsqe->read_len);
620 wr = wr->next; 631 wr = wr->next;
621 num_wrs--; 632 num_wrs--;
622 t4_sq_produce(&qhp->wq); 633 t4_sq_produce(&qhp->wq, len16);
623 idx++; 634 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
624 } 635 }
625 if (t4_wq_db_enabled(&qhp->wq)) 636 if (t4_wq_db_enabled(&qhp->wq))
626 t4_ring_sq_db(&qhp->wq, idx); 637 t4_ring_sq_db(&qhp->wq, idx);
@@ -656,7 +667,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
656 *bad_wr = wr; 667 *bad_wr = wr;
657 break; 668 break;
658 } 669 }
659 wqe = &qhp->wq.rq.queue[qhp->wq.rq.pidx]; 670 wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
671 qhp->wq.rq.wq_pidx *
672 T4_EQ_ENTRY_SIZE);
660 if (num_wrs) 673 if (num_wrs)
661 err = build_rdma_recv(qhp, wqe, wr, &len16); 674 err = build_rdma_recv(qhp, wqe, wr, &len16);
662 else 675 else
@@ -675,15 +688,12 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
675 wqe->recv.r2[1] = 0; 688 wqe->recv.r2[1] = 0;
676 wqe->recv.r2[2] = 0; 689 wqe->recv.r2[2] = 0;
677 wqe->recv.len16 = len16; 690 wqe->recv.len16 = len16;
678 if (len16 < 5)
679 wqe->flits[8] = 0;
680
681 PDBG("%s cookie 0x%llx pidx %u\n", __func__, 691 PDBG("%s cookie 0x%llx pidx %u\n", __func__,
682 (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); 692 (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
683 t4_rq_produce(&qhp->wq); 693 t4_rq_produce(&qhp->wq, len16);
694 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
684 wr = wr->next; 695 wr = wr->next;
685 num_wrs--; 696 num_wrs--;
686 idx++;
687 } 697 }
688 if (t4_wq_db_enabled(&qhp->wq)) 698 if (t4_wq_db_enabled(&qhp->wq))
689 t4_ring_rq_db(&qhp->wq, idx); 699 t4_ring_rq_db(&qhp->wq, idx);