aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2014-04-09 10:38:25 -0400
committerRoland Dreier <roland@purestorage.com>2014-04-11 14:36:01 -0400
commitfa658a98a2d08352c514758b3394caf91360aa44 (patch)
tree78793d424958259fe51c859141d2f42f45a45501
parent877f075aac900288ce2e6a64075cceff09210a7e (diff)
RDMA/cxgb4: Use the BAR2/WC path for kernel QPs and T5 devices
Signed-off-by: Steve Wise <swise@opengridcomputing.com> [ Fix cast from u64* to integer. - Roland ] Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c41
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c57
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h60
4 files changed, 127 insertions, 33 deletions
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 9489a388376c..f4fa50a609e2 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -682,7 +682,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
682 idr_destroy(&ctx->dev->hwtid_idr); 682 idr_destroy(&ctx->dev->hwtid_idr);
683 idr_destroy(&ctx->dev->stid_idr); 683 idr_destroy(&ctx->dev->stid_idr);
684 idr_destroy(&ctx->dev->atid_idr); 684 idr_destroy(&ctx->dev->atid_idr);
685 iounmap(ctx->dev->rdev.oc_mw_kva); 685 if (ctx->dev->rdev.bar2_kva)
686 iounmap(ctx->dev->rdev.bar2_kva);
687 if (ctx->dev->rdev.oc_mw_kva)
688 iounmap(ctx->dev->rdev.oc_mw_kva);
686 ib_dealloc_device(&ctx->dev->ibdev); 689 ib_dealloc_device(&ctx->dev->ibdev);
687 ctx->dev = NULL; 690 ctx->dev = NULL;
688} 691}
@@ -722,11 +725,31 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
722 } 725 }
723 devp->rdev.lldi = *infop; 726 devp->rdev.lldi = *infop;
724 727
725 devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + 728 /*
726 (pci_resource_len(devp->rdev.lldi.pdev, 2) - 729 * For T5 devices, we map all of BAR2 with WC.
727 roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); 730 * For T4 devices with onchip qp mem, we map only that part
728 devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, 731 * of BAR2 with WC.
729 devp->rdev.lldi.vr->ocq.size); 732 */
733 devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
734 if (is_t5(devp->rdev.lldi.adapter_type)) {
735 devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
736 pci_resource_len(devp->rdev.lldi.pdev, 2));
737 if (!devp->rdev.bar2_kva) {
738 pr_err(MOD "Unable to ioremap BAR2\n");
739 return ERR_PTR(-EINVAL);
740 }
741 } else if (ocqp_supported(infop)) {
742 devp->rdev.oc_mw_pa =
743 pci_resource_start(devp->rdev.lldi.pdev, 2) +
744 pci_resource_len(devp->rdev.lldi.pdev, 2) -
745 roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
746 devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
747 devp->rdev.lldi.vr->ocq.size);
748 if (!devp->rdev.oc_mw_kva) {
749 pr_err(MOD "Unable to ioremap onchip mem\n");
750 return ERR_PTR(-EINVAL);
751 }
752 }
730 753
731 PDBG(KERN_INFO MOD "ocq memory: " 754 PDBG(KERN_INFO MOD "ocq memory: "
732 "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", 755 "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
@@ -1003,9 +1026,11 @@ static int enable_qp_db(int id, void *p, void *data)
1003static void resume_rc_qp(struct c4iw_qp *qp) 1026static void resume_rc_qp(struct c4iw_qp *qp)
1004{ 1027{
1005 spin_lock(&qp->lock); 1028 spin_lock(&qp->lock);
1006 t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc); 1029 t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
1030 is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
1007 qp->wq.sq.wq_pidx_inc = 0; 1031 qp->wq.sq.wq_pidx_inc = 0;
1008 t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc); 1032 t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
1033 is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
1009 qp->wq.rq.wq_pidx_inc = 0; 1034 qp->wq.rq.wq_pidx_inc = 0;
1010 spin_unlock(&qp->lock); 1035 spin_unlock(&qp->lock);
1011} 1036}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index e872203c5424..7b8c5806a09d 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -149,6 +149,8 @@ struct c4iw_rdev {
149 struct gen_pool *ocqp_pool; 149 struct gen_pool *ocqp_pool;
150 u32 flags; 150 u32 flags;
151 struct cxgb4_lld_info lldi; 151 struct cxgb4_lld_info lldi;
152 unsigned long bar2_pa;
153 void __iomem *bar2_kva;
152 unsigned long oc_mw_pa; 154 unsigned long oc_mw_pa;
153 void __iomem *oc_mw_kva; 155 void __iomem *oc_mw_kva;
154 struct c4iw_stats stats; 156 struct c4iw_stats stats;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index cb76eb5eee1f..e2fcbf4814f2 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -212,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
212 212
213 wq->db = rdev->lldi.db_reg; 213 wq->db = rdev->lldi.db_reg;
214 wq->gts = rdev->lldi.gts_reg; 214 wq->gts = rdev->lldi.gts_reg;
215 if (user) { 215 if (user || is_t5(rdev->lldi.adapter_type)) {
216 wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + 216 u32 off;
217 (wq->sq.qid << rdev->qpshift); 217
218 wq->sq.udb &= PAGE_MASK; 218 off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK;
219 wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + 219 if (user) {
220 (wq->rq.qid << rdev->qpshift); 220 wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
221 wq->rq.udb &= PAGE_MASK; 221 } else {
222 off += 128 * (wq->sq.qid & rdev->qpmask) + 8;
223 wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
224 }
225 off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK;
226 if (user) {
227 wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
228 } else {
229 off += 128 * (wq->rq.qid & rdev->qpmask) + 8;
230 wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
231 }
222 } 232 }
223 wq->rdev = rdev; 233 wq->rdev = rdev;
224 wq->rq.msn = 1; 234 wq->rq.msn = 1;
@@ -299,9 +309,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
299 if (ret) 309 if (ret)
300 goto free_dma; 310 goto free_dma;
301 311
302 PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", 312 PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n",
303 __func__, wq->sq.qid, wq->rq.qid, wq->db, 313 __func__, wq->sq.qid, wq->rq.qid, wq->db,
304 (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); 314 (__force unsigned long) wq->sq.udb,
315 (__force unsigned long) wq->rq.udb);
305 316
306 return 0; 317 return 0;
307free_dma: 318free_dma:
@@ -650,9 +661,10 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
650 661
651 spin_lock_irqsave(&qhp->rhp->lock, flags); 662 spin_lock_irqsave(&qhp->rhp->lock, flags);
652 spin_lock(&qhp->lock); 663 spin_lock(&qhp->lock);
653 if (qhp->rhp->db_state == NORMAL) { 664 if (qhp->rhp->db_state == NORMAL)
654 t4_ring_sq_db(&qhp->wq, inc); 665 t4_ring_sq_db(&qhp->wq, inc,
655 } else { 666 is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
667 else {
656 add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); 668 add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
657 qhp->wq.sq.wq_pidx_inc += inc; 669 qhp->wq.sq.wq_pidx_inc += inc;
658 } 670 }
@@ -667,9 +679,10 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
667 679
668 spin_lock_irqsave(&qhp->rhp->lock, flags); 680 spin_lock_irqsave(&qhp->rhp->lock, flags);
669 spin_lock(&qhp->lock); 681 spin_lock(&qhp->lock);
670 if (qhp->rhp->db_state == NORMAL) { 682 if (qhp->rhp->db_state == NORMAL)
671 t4_ring_rq_db(&qhp->wq, inc); 683 t4_ring_rq_db(&qhp->wq, inc,
672 } else { 684 is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
685 else {
673 add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); 686 add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
674 qhp->wq.rq.wq_pidx_inc += inc; 687 qhp->wq.rq.wq_pidx_inc += inc;
675 } 688 }
@@ -686,7 +699,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
686 enum fw_wr_opcodes fw_opcode = 0; 699 enum fw_wr_opcodes fw_opcode = 0;
687 enum fw_ri_wr_flags fw_flags; 700 enum fw_ri_wr_flags fw_flags;
688 struct c4iw_qp *qhp; 701 struct c4iw_qp *qhp;
689 union t4_wr *wqe; 702 union t4_wr *wqe = NULL;
690 u32 num_wrs; 703 u32 num_wrs;
691 struct t4_swsqe *swsqe; 704 struct t4_swsqe *swsqe;
692 unsigned long flag; 705 unsigned long flag;
@@ -792,7 +805,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
792 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); 805 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
793 } 806 }
794 if (!qhp->rhp->rdev.status_page->db_off) { 807 if (!qhp->rhp->rdev.status_page->db_off) {
795 t4_ring_sq_db(&qhp->wq, idx); 808 t4_ring_sq_db(&qhp->wq, idx,
809 is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
796 spin_unlock_irqrestore(&qhp->lock, flag); 810 spin_unlock_irqrestore(&qhp->lock, flag);
797 } else { 811 } else {
798 spin_unlock_irqrestore(&qhp->lock, flag); 812 spin_unlock_irqrestore(&qhp->lock, flag);
@@ -806,7 +820,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
806{ 820{
807 int err = 0; 821 int err = 0;
808 struct c4iw_qp *qhp; 822 struct c4iw_qp *qhp;
809 union t4_recv_wr *wqe; 823 union t4_recv_wr *wqe = NULL;
810 u32 num_wrs; 824 u32 num_wrs;
811 u8 len16 = 0; 825 u8 len16 = 0;
812 unsigned long flag; 826 unsigned long flag;
@@ -858,7 +872,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
858 num_wrs--; 872 num_wrs--;
859 } 873 }
860 if (!qhp->rhp->rdev.status_page->db_off) { 874 if (!qhp->rhp->rdev.status_page->db_off) {
861 t4_ring_rq_db(&qhp->wq, idx); 875 t4_ring_rq_db(&qhp->wq, idx,
876 is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
862 spin_unlock_irqrestore(&qhp->lock, flag); 877 spin_unlock_irqrestore(&qhp->lock, flag);
863 } else { 878 } else {
864 spin_unlock_irqrestore(&qhp->lock, flag); 879 spin_unlock_irqrestore(&qhp->lock, flag);
@@ -1677,11 +1692,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1677 mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); 1692 mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
1678 insert_mmap(ucontext, mm2); 1693 insert_mmap(ucontext, mm2);
1679 mm3->key = uresp.sq_db_gts_key; 1694 mm3->key = uresp.sq_db_gts_key;
1680 mm3->addr = qhp->wq.sq.udb; 1695 mm3->addr = (__force unsigned long) qhp->wq.sq.udb;
1681 mm3->len = PAGE_SIZE; 1696 mm3->len = PAGE_SIZE;
1682 insert_mmap(ucontext, mm3); 1697 insert_mmap(ucontext, mm3);
1683 mm4->key = uresp.rq_db_gts_key; 1698 mm4->key = uresp.rq_db_gts_key;
1684 mm4->addr = qhp->wq.rq.udb; 1699 mm4->addr = (__force unsigned long) qhp->wq.rq.udb;
1685 mm4->len = PAGE_SIZE; 1700 mm4->len = PAGE_SIZE;
1686 insert_mmap(ucontext, mm4); 1701 insert_mmap(ucontext, mm4);
1687 if (mm5) { 1702 if (mm5) {
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index eeca8b1e6376..931bfd105c49 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -292,7 +292,7 @@ struct t4_sq {
292 unsigned long phys_addr; 292 unsigned long phys_addr;
293 struct t4_swsqe *sw_sq; 293 struct t4_swsqe *sw_sq;
294 struct t4_swsqe *oldest_read; 294 struct t4_swsqe *oldest_read;
295 u64 udb; 295 u64 __iomem *udb;
296 size_t memsize; 296 size_t memsize;
297 u32 qid; 297 u32 qid;
298 u16 in_use; 298 u16 in_use;
@@ -314,7 +314,7 @@ struct t4_rq {
314 dma_addr_t dma_addr; 314 dma_addr_t dma_addr;
315 DEFINE_DMA_UNMAP_ADDR(mapping); 315 DEFINE_DMA_UNMAP_ADDR(mapping);
316 struct t4_swrqe *sw_rq; 316 struct t4_swrqe *sw_rq;
317 u64 udb; 317 u64 __iomem *udb;
318 size_t memsize; 318 size_t memsize;
319 u32 qid; 319 u32 qid;
320 u32 msn; 320 u32 msn;
@@ -435,15 +435,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq)
435 return wq->sq.size * T4_SQ_NUM_SLOTS; 435 return wq->sq.size * T4_SQ_NUM_SLOTS;
436} 436}
437 437
438static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) 438/* This function copies 64 byte coalesced work request to memory
439 * mapped BAR2 space. For coalesced WRs, the SGE fetches data
440 * from the FIFO instead of from Host.
441 */
442static inline void pio_copy(u64 __iomem *dst, u64 *src)
443{
444 int count = 8;
445
446 while (count) {
447 writeq(*src, dst);
448 src++;
449 dst++;
450 count--;
451 }
452}
453
454static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
455 union t4_wr *wqe)
439{ 456{
457
458 /* Flush host queue memory writes. */
440 wmb(); 459 wmb();
460 if (t5) {
461 if (inc == 1 && wqe) {
462 PDBG("%s: WC wq->sq.pidx = %d\n",
463 __func__, wq->sq.pidx);
464 pio_copy(wq->sq.udb + 7, (void *)wqe);
465 } else {
466 PDBG("%s: DB wq->sq.pidx = %d\n",
467 __func__, wq->sq.pidx);
468 writel(PIDX_T5(inc), wq->sq.udb);
469 }
470
471 /* Flush user doorbell area writes. */
472 wmb();
473 return;
474 }
441 writel(QID(wq->sq.qid) | PIDX(inc), wq->db); 475 writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
442} 476}
443 477
444static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) 478static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
479 union t4_recv_wr *wqe)
445{ 480{
481
482 /* Flush host queue memory writes. */
446 wmb(); 483 wmb();
484 if (t5) {
485 if (inc == 1 && wqe) {
486 PDBG("%s: WC wq->rq.pidx = %d\n",
487 __func__, wq->rq.pidx);
488 pio_copy(wq->rq.udb + 7, (void *)wqe);
489 } else {
490 PDBG("%s: DB wq->rq.pidx = %d\n",
491 __func__, wq->rq.pidx);
492 writel(PIDX_T5(inc), wq->rq.udb);
493 }
494
495 /* Flush user doorbell area writes. */
496 wmb();
497 return;
498 }
447 writel(QID(wq->rq.qid) | PIDX(inc), wq->db); 499 writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
448} 500}
449 501