diff options
author | Ralph Campbell <ralph.campbell@qlogic.com> | 2007-03-15 17:44:51 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-04-18 23:20:55 -0400 |
commit | 3859e39d75b72f35f7d38c618fbbacb39a440c22 (patch) | |
tree | 51d57723574395b54914c08260b9d0a8467a91b1 /drivers/infiniband/hw/ipath/ipath_ruc.c | |
parent | 7b21d26ddad6912bf345e8e88a51a5ce98a036ad (diff) |
IB/ipath: Support larger IB_QP_MAX_DEST_RD_ATOMIC and IB_QP_MAX_QP_RD_ATOMIC
This patch adds support for multiple RDMA reads and atomics to be sent
before an ACK is required to be seen by the requester.
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_ruc.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ruc.c | 58 |
1 files changed, 30 insertions, 28 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index cda84933bb43..d9c2a9b15d86 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c | |||
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) | |||
255 | unsigned long flags; | 255 | unsigned long flags; |
256 | struct ib_wc wc; | 256 | struct ib_wc wc; |
257 | u64 sdata; | 257 | u64 sdata; |
258 | atomic64_t *maddr; | ||
258 | 259 | ||
259 | qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); | 260 | qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); |
260 | if (!qp) { | 261 | if (!qp) { |
@@ -311,7 +312,7 @@ again: | |||
311 | sqp->s_rnr_retry--; | 312 | sqp->s_rnr_retry--; |
312 | dev->n_rnr_naks++; | 313 | dev->n_rnr_naks++; |
313 | sqp->s_rnr_timeout = | 314 | sqp->s_rnr_timeout = |
314 | ib_ipath_rnr_table[sqp->r_min_rnr_timer]; | 315 | ib_ipath_rnr_table[qp->r_min_rnr_timer]; |
315 | ipath_insert_rnr_queue(sqp); | 316 | ipath_insert_rnr_queue(sqp); |
316 | goto done; | 317 | goto done; |
317 | } | 318 | } |
@@ -344,20 +345,22 @@ again: | |||
344 | wc.sl = sqp->remote_ah_attr.sl; | 345 | wc.sl = sqp->remote_ah_attr.sl; |
345 | wc.dlid_path_bits = 0; | 346 | wc.dlid_path_bits = 0; |
346 | wc.port_num = 0; | 347 | wc.port_num = 0; |
348 | spin_lock_irqsave(&sqp->s_lock, flags); | ||
347 | ipath_sqerror_qp(sqp, &wc); | 349 | ipath_sqerror_qp(sqp, &wc); |
350 | spin_unlock_irqrestore(&sqp->s_lock, flags); | ||
348 | goto done; | 351 | goto done; |
349 | } | 352 | } |
350 | break; | 353 | break; |
351 | 354 | ||
352 | case IB_WR_RDMA_READ: | 355 | case IB_WR_RDMA_READ: |
356 | if (unlikely(!(qp->qp_access_flags & | ||
357 | IB_ACCESS_REMOTE_READ))) | ||
358 | goto acc_err; | ||
353 | if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, | 359 | if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, |
354 | wqe->wr.wr.rdma.remote_addr, | 360 | wqe->wr.wr.rdma.remote_addr, |
355 | wqe->wr.wr.rdma.rkey, | 361 | wqe->wr.wr.rdma.rkey, |
356 | IB_ACCESS_REMOTE_READ))) | 362 | IB_ACCESS_REMOTE_READ))) |
357 | goto acc_err; | 363 | goto acc_err; |
358 | if (unlikely(!(qp->qp_access_flags & | ||
359 | IB_ACCESS_REMOTE_READ))) | ||
360 | goto acc_err; | ||
361 | qp->r_sge.sge = wqe->sg_list[0]; | 364 | qp->r_sge.sge = wqe->sg_list[0]; |
362 | qp->r_sge.sg_list = wqe->sg_list + 1; | 365 | qp->r_sge.sg_list = wqe->sg_list + 1; |
363 | qp->r_sge.num_sge = wqe->wr.num_sge; | 366 | qp->r_sge.num_sge = wqe->wr.num_sge; |
@@ -365,22 +368,22 @@ again: | |||
365 | 368 | ||
366 | case IB_WR_ATOMIC_CMP_AND_SWP: | 369 | case IB_WR_ATOMIC_CMP_AND_SWP: |
367 | case IB_WR_ATOMIC_FETCH_AND_ADD: | 370 | case IB_WR_ATOMIC_FETCH_AND_ADD: |
371 | if (unlikely(!(qp->qp_access_flags & | ||
372 | IB_ACCESS_REMOTE_ATOMIC))) | ||
373 | goto acc_err; | ||
368 | if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), | 374 | if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), |
369 | wqe->wr.wr.rdma.remote_addr, | 375 | wqe->wr.wr.atomic.remote_addr, |
370 | wqe->wr.wr.rdma.rkey, | 376 | wqe->wr.wr.atomic.rkey, |
371 | IB_ACCESS_REMOTE_ATOMIC))) | 377 | IB_ACCESS_REMOTE_ATOMIC))) |
372 | goto acc_err; | 378 | goto acc_err; |
373 | /* Perform atomic OP and save result. */ | 379 | /* Perform atomic OP and save result. */ |
374 | sdata = wqe->wr.wr.atomic.swap; | 380 | maddr = (atomic64_t *) qp->r_sge.sge.vaddr; |
375 | spin_lock_irqsave(&dev->pending_lock, flags); | 381 | sdata = wqe->wr.wr.atomic.compare_add; |
376 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; | 382 | *(u64 *) sqp->s_sge.sge.vaddr = |
377 | if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) | 383 | (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? |
378 | *(u64 *) qp->r_sge.sge.vaddr = | 384 | (u64) atomic64_add_return(sdata, maddr) - sdata : |
379 | qp->r_atomic_data + sdata; | 385 | (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, |
380 | else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) | 386 | sdata, wqe->wr.wr.atomic.swap); |
381 | *(u64 *) qp->r_sge.sge.vaddr = sdata; | ||
382 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
383 | *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data; | ||
384 | goto send_comp; | 387 | goto send_comp; |
385 | 388 | ||
386 | default: | 389 | default: |
@@ -441,7 +444,7 @@ again: | |||
441 | send_comp: | 444 | send_comp: |
442 | sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; | 445 | sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; |
443 | 446 | ||
444 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || | 447 | if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) || |
445 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | 448 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { |
446 | wc.wr_id = wqe->wr.wr_id; | 449 | wc.wr_id = wqe->wr.wr_id; |
447 | wc.status = IB_WC_SUCCESS; | 450 | wc.status = IB_WC_SUCCESS; |
@@ -503,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) | |||
503 | * We clear the tasklet flag now since we are committing to return | 506 | * We clear the tasklet flag now since we are committing to return |
504 | * from the tasklet function. | 507 | * from the tasklet function. |
505 | */ | 508 | */ |
506 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | 509 | clear_bit(IPATH_S_BUSY, &qp->s_busy); |
507 | tasklet_unlock(&qp->s_task); | 510 | tasklet_unlock(&qp->s_task); |
508 | want_buffer(dev->dd); | 511 | want_buffer(dev->dd); |
509 | dev->n_piowait++; | 512 | dev->n_piowait++; |
@@ -542,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr) | |||
542 | wr->sg_list[0].addr & (sizeof(u64) - 1))) { | 545 | wr->sg_list[0].addr & (sizeof(u64) - 1))) { |
543 | ret = -EINVAL; | 546 | ret = -EINVAL; |
544 | goto bail; | 547 | goto bail; |
548 | } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { | ||
549 | ret = -EINVAL; | ||
550 | goto bail; | ||
545 | } | 551 | } |
546 | /* IB spec says that num_sge == 0 is OK. */ | 552 | /* IB spec says that num_sge == 0 is OK. */ |
547 | if (wr->num_sge > qp->s_max_sge) { | 553 | if (wr->num_sge > qp->s_max_sge) { |
@@ -648,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data) | |||
648 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); | 654 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); |
649 | struct ipath_other_headers *ohdr; | 655 | struct ipath_other_headers *ohdr; |
650 | 656 | ||
651 | if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) | 657 | if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) |
652 | goto bail; | 658 | goto bail; |
653 | 659 | ||
654 | if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { | 660 | if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { |
@@ -684,19 +690,15 @@ again: | |||
684 | */ | 690 | */ |
685 | spin_lock_irqsave(&qp->s_lock, flags); | 691 | spin_lock_irqsave(&qp->s_lock, flags); |
686 | 692 | ||
687 | /* Sending responses has higher priority over sending requests. */ | 693 | if (!((qp->ibqp.qp_type == IB_QPT_RC) ? |
688 | if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE && | 694 | ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : |
689 | (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) | 695 | ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { |
690 | bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK; | ||
691 | else if (!((qp->ibqp.qp_type == IB_QPT_RC) ? | ||
692 | ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : | ||
693 | ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { | ||
694 | /* | 696 | /* |
695 | * Clear the busy bit before unlocking to avoid races with | 697 | * Clear the busy bit before unlocking to avoid races with |
696 | * adding new work queue items and then failing to process | 698 | * adding new work queue items and then failing to process |
697 | * them. | 699 | * them. |
698 | */ | 700 | */ |
699 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | 701 | clear_bit(IPATH_S_BUSY, &qp->s_busy); |
700 | spin_unlock_irqrestore(&qp->s_lock, flags); | 702 | spin_unlock_irqrestore(&qp->s_lock, flags); |
701 | goto bail; | 703 | goto bail; |
702 | } | 704 | } |
@@ -729,7 +731,7 @@ again: | |||
729 | goto again; | 731 | goto again; |
730 | 732 | ||
731 | clear: | 733 | clear: |
732 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | 734 | clear_bit(IPATH_S_BUSY, &qp->s_busy); |
733 | bail: | 735 | bail: |
734 | return; | 736 | return; |
735 | } | 737 | } |