diff options
author | Steve Wise <swise@opengridcomputing.com> | 2014-04-09 10:38:25 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-04-11 14:36:01 -0400 |
commit | fa658a98a2d08352c514758b3394caf91360aa44 (patch) | |
tree | 78793d424958259fe51c859141d2f42f45a45501 | |
parent | 877f075aac900288ce2e6a64075cceff09210a7e (diff) |
RDMA/cxgb4: Use the BAR2/WC path for kernel QPs and T5 devices
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
[ Fix cast from u64* to integer. - Roland ]
Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 41 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 57 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 60 |
4 files changed, 127 insertions, 33 deletions
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 9489a388376c..f4fa50a609e2 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c | |||
@@ -682,7 +682,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx) | |||
682 | idr_destroy(&ctx->dev->hwtid_idr); | 682 | idr_destroy(&ctx->dev->hwtid_idr); |
683 | idr_destroy(&ctx->dev->stid_idr); | 683 | idr_destroy(&ctx->dev->stid_idr); |
684 | idr_destroy(&ctx->dev->atid_idr); | 684 | idr_destroy(&ctx->dev->atid_idr); |
685 | iounmap(ctx->dev->rdev.oc_mw_kva); | 685 | if (ctx->dev->rdev.bar2_kva) |
686 | iounmap(ctx->dev->rdev.bar2_kva); | ||
687 | if (ctx->dev->rdev.oc_mw_kva) | ||
688 | iounmap(ctx->dev->rdev.oc_mw_kva); | ||
686 | ib_dealloc_device(&ctx->dev->ibdev); | 689 | ib_dealloc_device(&ctx->dev->ibdev); |
687 | ctx->dev = NULL; | 690 | ctx->dev = NULL; |
688 | } | 691 | } |
@@ -722,11 +725,31 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) | |||
722 | } | 725 | } |
723 | devp->rdev.lldi = *infop; | 726 | devp->rdev.lldi = *infop; |
724 | 727 | ||
725 | devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + | 728 | /* |
726 | (pci_resource_len(devp->rdev.lldi.pdev, 2) - | 729 | * For T5 devices, we map all of BAR2 with WC. |
727 | roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); | 730 | * For T4 devices with onchip qp mem, we map only that part |
728 | devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, | 731 | * of BAR2 with WC. |
729 | devp->rdev.lldi.vr->ocq.size); | 732 | */ |
733 | devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); | ||
734 | if (is_t5(devp->rdev.lldi.adapter_type)) { | ||
735 | devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, | ||
736 | pci_resource_len(devp->rdev.lldi.pdev, 2)); | ||
737 | if (!devp->rdev.bar2_kva) { | ||
738 | pr_err(MOD "Unable to ioremap BAR2\n"); | ||
739 | return ERR_PTR(-EINVAL); | ||
740 | } | ||
741 | } else if (ocqp_supported(infop)) { | ||
742 | devp->rdev.oc_mw_pa = | ||
743 | pci_resource_start(devp->rdev.lldi.pdev, 2) + | ||
744 | pci_resource_len(devp->rdev.lldi.pdev, 2) - | ||
745 | roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size); | ||
746 | devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, | ||
747 | devp->rdev.lldi.vr->ocq.size); | ||
748 | if (!devp->rdev.oc_mw_kva) { | ||
749 | pr_err(MOD "Unable to ioremap onchip mem\n"); | ||
750 | return ERR_PTR(-EINVAL); | ||
751 | } | ||
752 | } | ||
730 | 753 | ||
731 | PDBG(KERN_INFO MOD "ocq memory: " | 754 | PDBG(KERN_INFO MOD "ocq memory: " |
732 | "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", | 755 | "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", |
@@ -1003,9 +1026,11 @@ static int enable_qp_db(int id, void *p, void *data) | |||
1003 | static void resume_rc_qp(struct c4iw_qp *qp) | 1026 | static void resume_rc_qp(struct c4iw_qp *qp) |
1004 | { | 1027 | { |
1005 | spin_lock(&qp->lock); | 1028 | spin_lock(&qp->lock); |
1006 | t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc); | 1029 | t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, |
1030 | is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); | ||
1007 | qp->wq.sq.wq_pidx_inc = 0; | 1031 | qp->wq.sq.wq_pidx_inc = 0; |
1008 | t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc); | 1032 | t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, |
1033 | is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); | ||
1009 | qp->wq.rq.wq_pidx_inc = 0; | 1034 | qp->wq.rq.wq_pidx_inc = 0; |
1010 | spin_unlock(&qp->lock); | 1035 | spin_unlock(&qp->lock); |
1011 | } | 1036 | } |
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index e872203c5424..7b8c5806a09d 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h | |||
@@ -149,6 +149,8 @@ struct c4iw_rdev { | |||
149 | struct gen_pool *ocqp_pool; | 149 | struct gen_pool *ocqp_pool; |
150 | u32 flags; | 150 | u32 flags; |
151 | struct cxgb4_lld_info lldi; | 151 | struct cxgb4_lld_info lldi; |
152 | unsigned long bar2_pa; | ||
153 | void __iomem *bar2_kva; | ||
152 | unsigned long oc_mw_pa; | 154 | unsigned long oc_mw_pa; |
153 | void __iomem *oc_mw_kva; | 155 | void __iomem *oc_mw_kva; |
154 | struct c4iw_stats stats; | 156 | struct c4iw_stats stats; |
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cb76eb5eee1f..e2fcbf4814f2 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c | |||
@@ -212,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, | |||
212 | 212 | ||
213 | wq->db = rdev->lldi.db_reg; | 213 | wq->db = rdev->lldi.db_reg; |
214 | wq->gts = rdev->lldi.gts_reg; | 214 | wq->gts = rdev->lldi.gts_reg; |
215 | if (user) { | 215 | if (user || is_t5(rdev->lldi.adapter_type)) { |
216 | wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + | 216 | u32 off; |
217 | (wq->sq.qid << rdev->qpshift); | 217 | |
218 | wq->sq.udb &= PAGE_MASK; | 218 | off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK; |
219 | wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + | 219 | if (user) { |
220 | (wq->rq.qid << rdev->qpshift); | 220 | wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off); |
221 | wq->rq.udb &= PAGE_MASK; | 221 | } else { |
222 | off += 128 * (wq->sq.qid & rdev->qpmask) + 8; | ||
223 | wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off); | ||
224 | } | ||
225 | off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK; | ||
226 | if (user) { | ||
227 | wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off); | ||
228 | } else { | ||
229 | off += 128 * (wq->rq.qid & rdev->qpmask) + 8; | ||
230 | wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off); | ||
231 | } | ||
222 | } | 232 | } |
223 | wq->rdev = rdev; | 233 | wq->rdev = rdev; |
224 | wq->rq.msn = 1; | 234 | wq->rq.msn = 1; |
@@ -299,9 +309,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, | |||
299 | if (ret) | 309 | if (ret) |
300 | goto free_dma; | 310 | goto free_dma; |
301 | 311 | ||
302 | PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", | 312 | PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n", |
303 | __func__, wq->sq.qid, wq->rq.qid, wq->db, | 313 | __func__, wq->sq.qid, wq->rq.qid, wq->db, |
304 | (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); | 314 | (__force unsigned long) wq->sq.udb, |
315 | (__force unsigned long) wq->rq.udb); | ||
305 | 316 | ||
306 | return 0; | 317 | return 0; |
307 | free_dma: | 318 | free_dma: |
@@ -650,9 +661,10 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) | |||
650 | 661 | ||
651 | spin_lock_irqsave(&qhp->rhp->lock, flags); | 662 | spin_lock_irqsave(&qhp->rhp->lock, flags); |
652 | spin_lock(&qhp->lock); | 663 | spin_lock(&qhp->lock); |
653 | if (qhp->rhp->db_state == NORMAL) { | 664 | if (qhp->rhp->db_state == NORMAL) |
654 | t4_ring_sq_db(&qhp->wq, inc); | 665 | t4_ring_sq_db(&qhp->wq, inc, |
655 | } else { | 666 | is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); |
667 | else { | ||
656 | add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); | 668 | add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); |
657 | qhp->wq.sq.wq_pidx_inc += inc; | 669 | qhp->wq.sq.wq_pidx_inc += inc; |
658 | } | 670 | } |
@@ -667,9 +679,10 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) | |||
667 | 679 | ||
668 | spin_lock_irqsave(&qhp->rhp->lock, flags); | 680 | spin_lock_irqsave(&qhp->rhp->lock, flags); |
669 | spin_lock(&qhp->lock); | 681 | spin_lock(&qhp->lock); |
670 | if (qhp->rhp->db_state == NORMAL) { | 682 | if (qhp->rhp->db_state == NORMAL) |
671 | t4_ring_rq_db(&qhp->wq, inc); | 683 | t4_ring_rq_db(&qhp->wq, inc, |
672 | } else { | 684 | is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); |
685 | else { | ||
673 | add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); | 686 | add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); |
674 | qhp->wq.rq.wq_pidx_inc += inc; | 687 | qhp->wq.rq.wq_pidx_inc += inc; |
675 | } | 688 | } |
@@ -686,7 +699,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
686 | enum fw_wr_opcodes fw_opcode = 0; | 699 | enum fw_wr_opcodes fw_opcode = 0; |
687 | enum fw_ri_wr_flags fw_flags; | 700 | enum fw_ri_wr_flags fw_flags; |
688 | struct c4iw_qp *qhp; | 701 | struct c4iw_qp *qhp; |
689 | union t4_wr *wqe; | 702 | union t4_wr *wqe = NULL; |
690 | u32 num_wrs; | 703 | u32 num_wrs; |
691 | struct t4_swsqe *swsqe; | 704 | struct t4_swsqe *swsqe; |
692 | unsigned long flag; | 705 | unsigned long flag; |
@@ -792,7 +805,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
792 | idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); | 805 | idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); |
793 | } | 806 | } |
794 | if (!qhp->rhp->rdev.status_page->db_off) { | 807 | if (!qhp->rhp->rdev.status_page->db_off) { |
795 | t4_ring_sq_db(&qhp->wq, idx); | 808 | t4_ring_sq_db(&qhp->wq, idx, |
809 | is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); | ||
796 | spin_unlock_irqrestore(&qhp->lock, flag); | 810 | spin_unlock_irqrestore(&qhp->lock, flag); |
797 | } else { | 811 | } else { |
798 | spin_unlock_irqrestore(&qhp->lock, flag); | 812 | spin_unlock_irqrestore(&qhp->lock, flag); |
@@ -806,7 +820,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, | |||
806 | { | 820 | { |
807 | int err = 0; | 821 | int err = 0; |
808 | struct c4iw_qp *qhp; | 822 | struct c4iw_qp *qhp; |
809 | union t4_recv_wr *wqe; | 823 | union t4_recv_wr *wqe = NULL; |
810 | u32 num_wrs; | 824 | u32 num_wrs; |
811 | u8 len16 = 0; | 825 | u8 len16 = 0; |
812 | unsigned long flag; | 826 | unsigned long flag; |
@@ -858,7 +872,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, | |||
858 | num_wrs--; | 872 | num_wrs--; |
859 | } | 873 | } |
860 | if (!qhp->rhp->rdev.status_page->db_off) { | 874 | if (!qhp->rhp->rdev.status_page->db_off) { |
861 | t4_ring_rq_db(&qhp->wq, idx); | 875 | t4_ring_rq_db(&qhp->wq, idx, |
876 | is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); | ||
862 | spin_unlock_irqrestore(&qhp->lock, flag); | 877 | spin_unlock_irqrestore(&qhp->lock, flag); |
863 | } else { | 878 | } else { |
864 | spin_unlock_irqrestore(&qhp->lock, flag); | 879 | spin_unlock_irqrestore(&qhp->lock, flag); |
@@ -1677,11 +1692,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, | |||
1677 | mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); | 1692 | mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); |
1678 | insert_mmap(ucontext, mm2); | 1693 | insert_mmap(ucontext, mm2); |
1679 | mm3->key = uresp.sq_db_gts_key; | 1694 | mm3->key = uresp.sq_db_gts_key; |
1680 | mm3->addr = qhp->wq.sq.udb; | 1695 | mm3->addr = (__force unsigned long) qhp->wq.sq.udb; |
1681 | mm3->len = PAGE_SIZE; | 1696 | mm3->len = PAGE_SIZE; |
1682 | insert_mmap(ucontext, mm3); | 1697 | insert_mmap(ucontext, mm3); |
1683 | mm4->key = uresp.rq_db_gts_key; | 1698 | mm4->key = uresp.rq_db_gts_key; |
1684 | mm4->addr = qhp->wq.rq.udb; | 1699 | mm4->addr = (__force unsigned long) qhp->wq.rq.udb; |
1685 | mm4->len = PAGE_SIZE; | 1700 | mm4->len = PAGE_SIZE; |
1686 | insert_mmap(ucontext, mm4); | 1701 | insert_mmap(ucontext, mm4); |
1687 | if (mm5) { | 1702 | if (mm5) { |
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index eeca8b1e6376..931bfd105c49 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h | |||
@@ -292,7 +292,7 @@ struct t4_sq { | |||
292 | unsigned long phys_addr; | 292 | unsigned long phys_addr; |
293 | struct t4_swsqe *sw_sq; | 293 | struct t4_swsqe *sw_sq; |
294 | struct t4_swsqe *oldest_read; | 294 | struct t4_swsqe *oldest_read; |
295 | u64 udb; | 295 | u64 __iomem *udb; |
296 | size_t memsize; | 296 | size_t memsize; |
297 | u32 qid; | 297 | u32 qid; |
298 | u16 in_use; | 298 | u16 in_use; |
@@ -314,7 +314,7 @@ struct t4_rq { | |||
314 | dma_addr_t dma_addr; | 314 | dma_addr_t dma_addr; |
315 | DEFINE_DMA_UNMAP_ADDR(mapping); | 315 | DEFINE_DMA_UNMAP_ADDR(mapping); |
316 | struct t4_swrqe *sw_rq; | 316 | struct t4_swrqe *sw_rq; |
317 | u64 udb; | 317 | u64 __iomem *udb; |
318 | size_t memsize; | 318 | size_t memsize; |
319 | u32 qid; | 319 | u32 qid; |
320 | u32 msn; | 320 | u32 msn; |
@@ -435,15 +435,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq) | |||
435 | return wq->sq.size * T4_SQ_NUM_SLOTS; | 435 | return wq->sq.size * T4_SQ_NUM_SLOTS; |
436 | } | 436 | } |
437 | 437 | ||
438 | static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) | 438 | /* This function copies 64 byte coalesced work request to memory |
439 | * mapped BAR2 space. For coalesced WRs, the SGE fetches data | ||
440 | * from the FIFO instead of from Host. | ||
441 | */ | ||
442 | static inline void pio_copy(u64 __iomem *dst, u64 *src) | ||
443 | { | ||
444 | int count = 8; | ||
445 | |||
446 | while (count) { | ||
447 | writeq(*src, dst); | ||
448 | src++; | ||
449 | dst++; | ||
450 | count--; | ||
451 | } | ||
452 | } | ||
453 | |||
454 | static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, | ||
455 | union t4_wr *wqe) | ||
439 | { | 456 | { |
457 | |||
458 | /* Flush host queue memory writes. */ | ||
440 | wmb(); | 459 | wmb(); |
460 | if (t5) { | ||
461 | if (inc == 1 && wqe) { | ||
462 | PDBG("%s: WC wq->sq.pidx = %d\n", | ||
463 | __func__, wq->sq.pidx); | ||
464 | pio_copy(wq->sq.udb + 7, (void *)wqe); | ||
465 | } else { | ||
466 | PDBG("%s: DB wq->sq.pidx = %d\n", | ||
467 | __func__, wq->sq.pidx); | ||
468 | writel(PIDX_T5(inc), wq->sq.udb); | ||
469 | } | ||
470 | |||
471 | /* Flush user doorbell area writes. */ | ||
472 | wmb(); | ||
473 | return; | ||
474 | } | ||
441 | writel(QID(wq->sq.qid) | PIDX(inc), wq->db); | 475 | writel(QID(wq->sq.qid) | PIDX(inc), wq->db); |
442 | } | 476 | } |
443 | 477 | ||
444 | static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) | 478 | static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, |
479 | union t4_recv_wr *wqe) | ||
445 | { | 480 | { |
481 | |||
482 | /* Flush host queue memory writes. */ | ||
446 | wmb(); | 483 | wmb(); |
484 | if (t5) { | ||
485 | if (inc == 1 && wqe) { | ||
486 | PDBG("%s: WC wq->rq.pidx = %d\n", | ||
487 | __func__, wq->rq.pidx); | ||
488 | pio_copy(wq->rq.udb + 7, (void *)wqe); | ||
489 | } else { | ||
490 | PDBG("%s: DB wq->rq.pidx = %d\n", | ||
491 | __func__, wq->rq.pidx); | ||
492 | writel(PIDX_T5(inc), wq->rq.udb); | ||
493 | } | ||
494 | |||
495 | /* Flush user doorbell area writes. */ | ||
496 | wmb(); | ||
497 | return; | ||
498 | } | ||
447 | writel(QID(wq->rq.qid) | PIDX(inc), wq->db); | 499 | writel(QID(wq->rq.qid) | PIDX(inc), wq->db); |
448 | } | 500 | } |
449 | 501 | ||