diff options
author | Steve Wise <swise@opengridcomputing.com> | 2010-09-13 12:23:57 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-09-28 13:46:35 -0400 |
commit | c6d7b26791a2aefdf97f2af1e93161ed05acd631 (patch) | |
tree | 61b9b3af001fe5983aa84034d20e44eb43c039a8 /drivers/infiniband/hw | |
parent | aadc4df3087a33ca7fc37f91a024e7b7efdafa75 (diff) |
RDMA/cxgb4: Support on-chip SQs
T4 support on-chip SQs to reduce latency. This patch adds support for
this in iw_cxgb4:
- Manage ocqp memory like other adapter mem resources.
- Allocate user mode SQs from ocqp mem if available.
- Map ocqp mem to user process using write combining.
- Map PCIE_MA_SYNC reg to user process.
Bump uverbs ABI.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 19 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 28 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 98 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/resource.c | 56 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 40 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/user.h | 7 |
7 files changed, 226 insertions, 29 deletions
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 2851bf831fb..986cfd76502 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c | |||
@@ -364,7 +364,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) | |||
364 | printk(KERN_ERR MOD "error %d initializing rqt pool\n", err); | 364 | printk(KERN_ERR MOD "error %d initializing rqt pool\n", err); |
365 | goto err3; | 365 | goto err3; |
366 | } | 366 | } |
367 | err = c4iw_ocqp_pool_create(rdev); | ||
368 | if (err) { | ||
369 | printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); | ||
370 | goto err4; | ||
371 | } | ||
367 | return 0; | 372 | return 0; |
373 | err4: | ||
374 | c4iw_rqtpool_destroy(rdev); | ||
368 | err3: | 375 | err3: |
369 | c4iw_pblpool_destroy(rdev); | 376 | c4iw_pblpool_destroy(rdev); |
370 | err2: | 377 | err2: |
@@ -391,6 +398,7 @@ static void c4iw_remove(struct c4iw_dev *dev) | |||
391 | idr_destroy(&dev->cqidr); | 398 | idr_destroy(&dev->cqidr); |
392 | idr_destroy(&dev->qpidr); | 399 | idr_destroy(&dev->qpidr); |
393 | idr_destroy(&dev->mmidr); | 400 | idr_destroy(&dev->mmidr); |
401 | iounmap(dev->rdev.oc_mw_kva); | ||
394 | ib_dealloc_device(&dev->ibdev); | 402 | ib_dealloc_device(&dev->ibdev); |
395 | } | 403 | } |
396 | 404 | ||
@@ -406,6 +414,17 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) | |||
406 | } | 414 | } |
407 | devp->rdev.lldi = *infop; | 415 | devp->rdev.lldi = *infop; |
408 | 416 | ||
417 | devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + | ||
418 | (pci_resource_len(devp->rdev.lldi.pdev, 2) - | ||
419 | roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); | ||
420 | devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, | ||
421 | devp->rdev.lldi.vr->ocq.size); | ||
422 | |||
423 | printk(KERN_INFO MOD "ocq memory: " | ||
424 | "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", | ||
425 | devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size, | ||
426 | devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva); | ||
427 | |||
409 | mutex_lock(&dev_mutex); | 428 | mutex_lock(&dev_mutex); |
410 | 429 | ||
411 | ret = c4iw_rdev_open(&devp->rdev); | 430 | ret = c4iw_rdev_open(&devp->rdev); |
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 77801163cd0..1c269223945 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h | |||
@@ -112,8 +112,11 @@ struct c4iw_rdev { | |||
112 | struct c4iw_dev_ucontext uctx; | 112 | struct c4iw_dev_ucontext uctx; |
113 | struct gen_pool *pbl_pool; | 113 | struct gen_pool *pbl_pool; |
114 | struct gen_pool *rqt_pool; | 114 | struct gen_pool *rqt_pool; |
115 | struct gen_pool *ocqp_pool; | ||
115 | u32 flags; | 116 | u32 flags; |
116 | struct cxgb4_lld_info lldi; | 117 | struct cxgb4_lld_info lldi; |
118 | unsigned long oc_mw_pa; | ||
119 | void __iomem *oc_mw_kva; | ||
117 | }; | 120 | }; |
118 | 121 | ||
119 | static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) | 122 | static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) |
@@ -675,8 +678,10 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); | |||
675 | int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); | 678 | int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); |
676 | int c4iw_pblpool_create(struct c4iw_rdev *rdev); | 679 | int c4iw_pblpool_create(struct c4iw_rdev *rdev); |
677 | int c4iw_rqtpool_create(struct c4iw_rdev *rdev); | 680 | int c4iw_rqtpool_create(struct c4iw_rdev *rdev); |
681 | int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev); | ||
678 | void c4iw_pblpool_destroy(struct c4iw_rdev *rdev); | 682 | void c4iw_pblpool_destroy(struct c4iw_rdev *rdev); |
679 | void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev); | 683 | void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev); |
684 | void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev); | ||
680 | void c4iw_destroy_resource(struct c4iw_resource *rscp); | 685 | void c4iw_destroy_resource(struct c4iw_resource *rscp); |
681 | int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); | 686 | int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); |
682 | int c4iw_register_device(struct c4iw_dev *dev); | 687 | int c4iw_register_device(struct c4iw_dev *dev); |
@@ -742,6 +747,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size); | |||
742 | void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); | 747 | void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); |
743 | u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); | 748 | u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); |
744 | void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); | 749 | void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); |
750 | u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); | ||
751 | void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); | ||
745 | int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); | 752 | int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); |
746 | void c4iw_flush_hw_cq(struct t4_cq *cq); | 753 | void c4iw_flush_hw_cq(struct t4_cq *cq); |
747 | void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); | 754 | void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); |
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8f645c83a12..a49a9c1275a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c | |||
@@ -149,19 +149,28 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) | |||
149 | addr = mm->addr; | 149 | addr = mm->addr; |
150 | kfree(mm); | 150 | kfree(mm); |
151 | 151 | ||
152 | if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && | 152 | if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) && |
153 | (addr < (pci_resource_start(rdev->lldi.pdev, 2) + | 153 | (addr < (pci_resource_start(rdev->lldi.pdev, 0) + |
154 | pci_resource_len(rdev->lldi.pdev, 2)))) { | 154 | pci_resource_len(rdev->lldi.pdev, 0)))) { |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * Map T4 DB register. | 157 | * MA_SYNC register... |
158 | */ | 158 | */ |
159 | if (vma->vm_flags & VM_READ) | ||
160 | return -EPERM; | ||
161 | |||
162 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | 159 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); |
163 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; | 160 | ret = io_remap_pfn_range(vma, vma->vm_start, |
164 | vma->vm_flags &= ~VM_MAYREAD; | 161 | addr >> PAGE_SHIFT, |
162 | len, vma->vm_page_prot); | ||
163 | } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && | ||
164 | (addr < (pci_resource_start(rdev->lldi.pdev, 2) + | ||
165 | pci_resource_len(rdev->lldi.pdev, 2)))) { | ||
166 | |||
167 | /* | ||
168 | * Map user DB or OCQP memory... | ||
169 | */ | ||
170 | if (addr >= rdev->oc_mw_pa) | ||
171 | vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); | ||
172 | else | ||
173 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
165 | ret = io_remap_pfn_range(vma, vma->vm_start, | 174 | ret = io_remap_pfn_range(vma, vma->vm_start, |
166 | addr >> PAGE_SHIFT, | 175 | addr >> PAGE_SHIFT, |
167 | len, vma->vm_page_prot); | 176 | len, vma->vm_page_prot); |
@@ -472,6 +481,7 @@ int c4iw_register_device(struct c4iw_dev *dev) | |||
472 | dev->ibdev.post_send = c4iw_post_send; | 481 | dev->ibdev.post_send = c4iw_post_send; |
473 | dev->ibdev.post_recv = c4iw_post_receive; | 482 | dev->ibdev.post_recv = c4iw_post_receive; |
474 | dev->ibdev.get_protocol_stats = c4iw_get_mib; | 483 | dev->ibdev.get_protocol_stats = c4iw_get_mib; |
484 | dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; | ||
475 | 485 | ||
476 | dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); | 486 | dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); |
477 | if (!dev->ibdev.iwcm) | 487 | if (!dev->ibdev.iwcm) |
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 40187e26d2b..7e45f733428 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c | |||
@@ -31,6 +31,55 @@ | |||
31 | */ | 31 | */ |
32 | #include "iw_cxgb4.h" | 32 | #include "iw_cxgb4.h" |
33 | 33 | ||
34 | static int ocqp_support; | ||
35 | module_param(ocqp_support, int, 0644); | ||
36 | MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=0)"); | ||
37 | |||
38 | static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) | ||
39 | { | ||
40 | c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize); | ||
41 | } | ||
42 | |||
43 | static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) | ||
44 | { | ||
45 | dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue, | ||
46 | pci_unmap_addr(sq, mapping)); | ||
47 | } | ||
48 | |||
49 | static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) | ||
50 | { | ||
51 | if (t4_sq_onchip(sq)) | ||
52 | dealloc_oc_sq(rdev, sq); | ||
53 | else | ||
54 | dealloc_host_sq(rdev, sq); | ||
55 | } | ||
56 | |||
57 | static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) | ||
58 | { | ||
59 | if (!ocqp_support || !t4_ocqp_supported()) | ||
60 | return -ENOSYS; | ||
61 | sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize); | ||
62 | if (!sq->dma_addr) | ||
63 | return -ENOMEM; | ||
64 | sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr - | ||
65 | rdev->lldi.vr->ocq.start; | ||
66 | sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr - | ||
67 | rdev->lldi.vr->ocq.start); | ||
68 | sq->flags |= T4_SQ_ONCHIP; | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) | ||
73 | { | ||
74 | sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize, | ||
75 | &(sq->dma_addr), GFP_KERNEL); | ||
76 | if (!sq->queue) | ||
77 | return -ENOMEM; | ||
78 | sq->phys_addr = virt_to_phys(sq->queue); | ||
79 | pci_unmap_addr_set(sq, mapping, sq->dma_addr); | ||
80 | return 0; | ||
81 | } | ||
82 | |||
34 | static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, | 83 | static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, |
35 | struct c4iw_dev_ucontext *uctx) | 84 | struct c4iw_dev_ucontext *uctx) |
36 | { | 85 | { |
@@ -41,9 +90,7 @@ static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, | |||
41 | dma_free_coherent(&(rdev->lldi.pdev->dev), | 90 | dma_free_coherent(&(rdev->lldi.pdev->dev), |
42 | wq->rq.memsize, wq->rq.queue, | 91 | wq->rq.memsize, wq->rq.queue, |
43 | dma_unmap_addr(&wq->rq, mapping)); | 92 | dma_unmap_addr(&wq->rq, mapping)); |
44 | dma_free_coherent(&(rdev->lldi.pdev->dev), | 93 | dealloc_sq(rdev, &wq->sq); |
45 | wq->sq.memsize, wq->sq.queue, | ||
46 | dma_unmap_addr(&wq->sq, mapping)); | ||
47 | c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); | 94 | c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); |
48 | kfree(wq->rq.sw_rq); | 95 | kfree(wq->rq.sw_rq); |
49 | kfree(wq->sq.sw_sq); | 96 | kfree(wq->sq.sw_sq); |
@@ -93,11 +140,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, | |||
93 | if (!wq->rq.rqt_hwaddr) | 140 | if (!wq->rq.rqt_hwaddr) |
94 | goto err4; | 141 | goto err4; |
95 | 142 | ||
96 | wq->sq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), | 143 | if (user) { |
97 | wq->sq.memsize, &(wq->sq.dma_addr), | 144 | if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq)) |
98 | GFP_KERNEL); | 145 | goto err5; |
99 | if (!wq->sq.queue) | 146 | } else |
100 | goto err5; | 147 | if (alloc_host_sq(rdev, &wq->sq)) |
148 | goto err5; | ||
101 | memset(wq->sq.queue, 0, wq->sq.memsize); | 149 | memset(wq->sq.queue, 0, wq->sq.memsize); |
102 | dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); | 150 | dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); |
103 | 151 | ||
@@ -158,6 +206,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, | |||
158 | V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ | 206 | V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ |
159 | V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ | 207 | V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ |
160 | V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ | 208 | V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ |
209 | t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0 | | ||
161 | V_FW_RI_RES_WR_IQID(scq->cqid)); | 210 | V_FW_RI_RES_WR_IQID(scq->cqid)); |
162 | res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( | 211 | res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( |
163 | V_FW_RI_RES_WR_DCAEN(0) | | 212 | V_FW_RI_RES_WR_DCAEN(0) | |
@@ -212,9 +261,7 @@ err7: | |||
212 | wq->rq.memsize, wq->rq.queue, | 261 | wq->rq.memsize, wq->rq.queue, |
213 | dma_unmap_addr(&wq->rq, mapping)); | 262 | dma_unmap_addr(&wq->rq, mapping)); |
214 | err6: | 263 | err6: |
215 | dma_free_coherent(&(rdev->lldi.pdev->dev), | 264 | dealloc_sq(rdev, &wq->sq); |
216 | wq->sq.memsize, wq->sq.queue, | ||
217 | dma_unmap_addr(&wq->sq, mapping)); | ||
218 | err5: | 265 | err5: |
219 | c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); | 266 | c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); |
220 | err4: | 267 | err4: |
@@ -1361,7 +1408,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, | |||
1361 | int sqsize, rqsize; | 1408 | int sqsize, rqsize; |
1362 | struct c4iw_ucontext *ucontext; | 1409 | struct c4iw_ucontext *ucontext; |
1363 | int ret; | 1410 | int ret; |
1364 | struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4; | 1411 | struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; |
1365 | 1412 | ||
1366 | PDBG("%s ib_pd %p\n", __func__, pd); | 1413 | PDBG("%s ib_pd %p\n", __func__, pd); |
1367 | 1414 | ||
@@ -1459,7 +1506,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, | |||
1459 | ret = -ENOMEM; | 1506 | ret = -ENOMEM; |
1460 | goto err6; | 1507 | goto err6; |
1461 | } | 1508 | } |
1462 | 1509 | if (t4_sq_onchip(&qhp->wq.sq)) { | |
1510 | mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); | ||
1511 | if (!mm5) { | ||
1512 | ret = -ENOMEM; | ||
1513 | goto err7; | ||
1514 | } | ||
1515 | uresp.flags = C4IW_QPF_ONCHIP; | ||
1516 | } else | ||
1517 | uresp.flags = 0; | ||
1463 | uresp.qid_mask = rhp->rdev.qpmask; | 1518 | uresp.qid_mask = rhp->rdev.qpmask; |
1464 | uresp.sqid = qhp->wq.sq.qid; | 1519 | uresp.sqid = qhp->wq.sq.qid; |
1465 | uresp.sq_size = qhp->wq.sq.size; | 1520 | uresp.sq_size = qhp->wq.sq.size; |
@@ -1468,6 +1523,10 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, | |||
1468 | uresp.rq_size = qhp->wq.rq.size; | 1523 | uresp.rq_size = qhp->wq.rq.size; |
1469 | uresp.rq_memsize = qhp->wq.rq.memsize; | 1524 | uresp.rq_memsize = qhp->wq.rq.memsize; |
1470 | spin_lock(&ucontext->mmap_lock); | 1525 | spin_lock(&ucontext->mmap_lock); |
1526 | if (mm5) { | ||
1527 | uresp.ma_sync_key = ucontext->key; | ||
1528 | ucontext->key += PAGE_SIZE; | ||
1529 | } | ||
1471 | uresp.sq_key = ucontext->key; | 1530 | uresp.sq_key = ucontext->key; |
1472 | ucontext->key += PAGE_SIZE; | 1531 | ucontext->key += PAGE_SIZE; |
1473 | uresp.rq_key = ucontext->key; | 1532 | uresp.rq_key = ucontext->key; |
@@ -1479,9 +1538,9 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, | |||
1479 | spin_unlock(&ucontext->mmap_lock); | 1538 | spin_unlock(&ucontext->mmap_lock); |
1480 | ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); | 1539 | ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); |
1481 | if (ret) | 1540 | if (ret) |
1482 | goto err7; | 1541 | goto err8; |
1483 | mm1->key = uresp.sq_key; | 1542 | mm1->key = uresp.sq_key; |
1484 | mm1->addr = virt_to_phys(qhp->wq.sq.queue); | 1543 | mm1->addr = qhp->wq.sq.phys_addr; |
1485 | mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); | 1544 | mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); |
1486 | insert_mmap(ucontext, mm1); | 1545 | insert_mmap(ucontext, mm1); |
1487 | mm2->key = uresp.rq_key; | 1546 | mm2->key = uresp.rq_key; |
@@ -1496,6 +1555,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, | |||
1496 | mm4->addr = qhp->wq.rq.udb; | 1555 | mm4->addr = qhp->wq.rq.udb; |
1497 | mm4->len = PAGE_SIZE; | 1556 | mm4->len = PAGE_SIZE; |
1498 | insert_mmap(ucontext, mm4); | 1557 | insert_mmap(ucontext, mm4); |
1558 | if (mm5) { | ||
1559 | mm5->key = uresp.ma_sync_key; | ||
1560 | mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) | ||
1561 | + A_PCIE_MA_SYNC) & PAGE_MASK; | ||
1562 | mm5->len = PAGE_SIZE; | ||
1563 | insert_mmap(ucontext, mm5); | ||
1564 | } | ||
1499 | } | 1565 | } |
1500 | qhp->ibqp.qp_num = qhp->wq.sq.qid; | 1566 | qhp->ibqp.qp_num = qhp->wq.sq.qid; |
1501 | init_timer(&(qhp->timer)); | 1567 | init_timer(&(qhp->timer)); |
@@ -1503,6 +1569,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, | |||
1503 | __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, | 1569 | __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, |
1504 | qhp->wq.sq.qid); | 1570 | qhp->wq.sq.qid); |
1505 | return &qhp->ibqp; | 1571 | return &qhp->ibqp; |
1572 | err8: | ||
1573 | kfree(mm5); | ||
1506 | err7: | 1574 | err7: |
1507 | kfree(mm4); | 1575 | kfree(mm4); |
1508 | err6: | 1576 | err6: |
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 26365f6ed84..4fb50d58b49 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c | |||
@@ -422,3 +422,59 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) | |||
422 | { | 422 | { |
423 | gen_pool_destroy(rdev->rqt_pool); | 423 | gen_pool_destroy(rdev->rqt_pool); |
424 | } | 424 | } |
425 | |||
426 | /* | ||
427 | * On-Chip QP Memory. | ||
428 | */ | ||
429 | #define MIN_OCQP_SHIFT 12 /* 4KB == min ocqp size */ | ||
430 | |||
431 | u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) | ||
432 | { | ||
433 | unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); | ||
434 | PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); | ||
435 | return (u32)addr; | ||
436 | } | ||
437 | |||
438 | void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) | ||
439 | { | ||
440 | PDBG("%s addr 0x%x size %d\n", __func__, addr, size); | ||
441 | gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); | ||
442 | } | ||
443 | |||
444 | int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev) | ||
445 | { | ||
446 | unsigned start, chunk, top; | ||
447 | |||
448 | rdev->ocqp_pool = gen_pool_create(MIN_OCQP_SHIFT, -1); | ||
449 | if (!rdev->ocqp_pool) | ||
450 | return -ENOMEM; | ||
451 | |||
452 | start = rdev->lldi.vr->ocq.start; | ||
453 | chunk = rdev->lldi.vr->ocq.size; | ||
454 | top = start + chunk; | ||
455 | |||
456 | while (start < top) { | ||
457 | chunk = min(top - start + 1, chunk); | ||
458 | if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) { | ||
459 | PDBG("%s failed to add OCQP chunk (%x/%x)\n", | ||
460 | __func__, start, chunk); | ||
461 | if (chunk <= 1024 << MIN_OCQP_SHIFT) { | ||
462 | printk(KERN_WARNING MOD | ||
463 | "Failed to add all OCQP chunks (%x/%x)\n", | ||
464 | start, top - start); | ||
465 | return 0; | ||
466 | } | ||
467 | chunk >>= 1; | ||
468 | } else { | ||
469 | PDBG("%s added OCQP chunk (%x/%x)\n", | ||
470 | __func__, start, chunk); | ||
471 | start += chunk; | ||
472 | } | ||
473 | } | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev) | ||
478 | { | ||
479 | gen_pool_destroy(rdev->ocqp_pool); | ||
480 | } | ||
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 24f369046ef..17ea5fcb37e 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h | |||
@@ -52,6 +52,7 @@ | |||
52 | #define T4_STAG_UNSET 0xffffffff | 52 | #define T4_STAG_UNSET 0xffffffff |
53 | #define T4_FW_MAJ 0 | 53 | #define T4_FW_MAJ 0 |
54 | #define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) | 54 | #define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) |
55 | #define A_PCIE_MA_SYNC 0x30b4 | ||
55 | 56 | ||
56 | struct t4_status_page { | 57 | struct t4_status_page { |
57 | __be32 rsvd1; /* flit 0 - hw owns */ | 58 | __be32 rsvd1; /* flit 0 - hw owns */ |
@@ -266,10 +267,36 @@ struct t4_swsqe { | |||
266 | u16 idx; | 267 | u16 idx; |
267 | }; | 268 | }; |
268 | 269 | ||
270 | static inline pgprot_t t4_pgprot_wc(pgprot_t prot) | ||
271 | { | ||
272 | #if defined(__i386__) || defined(__x86_64__) | ||
273 | return pgprot_writecombine(prot); | ||
274 | #elif defined(CONFIG_PPC64) | ||
275 | return __pgprot((pgprot_val(prot) | _PAGE_NO_CACHE) & | ||
276 | ~(pgprot_t)_PAGE_GUARDED); | ||
277 | #else | ||
278 | return pgprot_noncached(prot); | ||
279 | #endif | ||
280 | } | ||
281 | |||
282 | static inline int t4_ocqp_supported(void) | ||
283 | { | ||
284 | #if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) | ||
285 | return 1; | ||
286 | #else | ||
287 | return 0; | ||
288 | #endif | ||
289 | } | ||
290 | |||
291 | enum { | ||
292 | T4_SQ_ONCHIP = (1<<0), | ||
293 | }; | ||
294 | |||
269 | struct t4_sq { | 295 | struct t4_sq { |
270 | union t4_wr *queue; | 296 | union t4_wr *queue; |
271 | dma_addr_t dma_addr; | 297 | dma_addr_t dma_addr; |
272 | DEFINE_DMA_UNMAP_ADDR(mapping); | 298 | DEFINE_DMA_UNMAP_ADDR(mapping); |
299 | unsigned long phys_addr; | ||
273 | struct t4_swsqe *sw_sq; | 300 | struct t4_swsqe *sw_sq; |
274 | struct t4_swsqe *oldest_read; | 301 | struct t4_swsqe *oldest_read; |
275 | u64 udb; | 302 | u64 udb; |
@@ -280,6 +307,7 @@ struct t4_sq { | |||
280 | u16 cidx; | 307 | u16 cidx; |
281 | u16 pidx; | 308 | u16 pidx; |
282 | u16 wq_pidx; | 309 | u16 wq_pidx; |
310 | u16 flags; | ||
283 | }; | 311 | }; |
284 | 312 | ||
285 | struct t4_swrqe { | 313 | struct t4_swrqe { |
@@ -350,6 +378,11 @@ static inline void t4_rq_consume(struct t4_wq *wq) | |||
350 | wq->rq.cidx = 0; | 378 | wq->rq.cidx = 0; |
351 | } | 379 | } |
352 | 380 | ||
381 | static inline int t4_sq_onchip(struct t4_sq *sq) | ||
382 | { | ||
383 | return sq->flags & T4_SQ_ONCHIP; | ||
384 | } | ||
385 | |||
353 | static inline int t4_sq_empty(struct t4_wq *wq) | 386 | static inline int t4_sq_empty(struct t4_wq *wq) |
354 | { | 387 | { |
355 | return wq->sq.in_use == 0; | 388 | return wq->sq.in_use == 0; |
@@ -396,30 +429,27 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) | |||
396 | 429 | ||
397 | static inline int t4_wq_in_error(struct t4_wq *wq) | 430 | static inline int t4_wq_in_error(struct t4_wq *wq) |
398 | { | 431 | { |
399 | return wq->sq.queue[wq->sq.size].status.qp_err; | 432 | return wq->rq.queue[wq->rq.size].status.qp_err; |
400 | } | 433 | } |
401 | 434 | ||
402 | static inline void t4_set_wq_in_error(struct t4_wq *wq) | 435 | static inline void t4_set_wq_in_error(struct t4_wq *wq) |
403 | { | 436 | { |
404 | wq->sq.queue[wq->sq.size].status.qp_err = 1; | ||
405 | wq->rq.queue[wq->rq.size].status.qp_err = 1; | 437 | wq->rq.queue[wq->rq.size].status.qp_err = 1; |
406 | } | 438 | } |
407 | 439 | ||
408 | static inline void t4_disable_wq_db(struct t4_wq *wq) | 440 | static inline void t4_disable_wq_db(struct t4_wq *wq) |
409 | { | 441 | { |
410 | wq->sq.queue[wq->sq.size].status.db_off = 1; | ||
411 | wq->rq.queue[wq->rq.size].status.db_off = 1; | 442 | wq->rq.queue[wq->rq.size].status.db_off = 1; |
412 | } | 443 | } |
413 | 444 | ||
414 | static inline void t4_enable_wq_db(struct t4_wq *wq) | 445 | static inline void t4_enable_wq_db(struct t4_wq *wq) |
415 | { | 446 | { |
416 | wq->sq.queue[wq->sq.size].status.db_off = 0; | ||
417 | wq->rq.queue[wq->rq.size].status.db_off = 0; | 447 | wq->rq.queue[wq->rq.size].status.db_off = 0; |
418 | } | 448 | } |
419 | 449 | ||
420 | static inline int t4_wq_db_enabled(struct t4_wq *wq) | 450 | static inline int t4_wq_db_enabled(struct t4_wq *wq) |
421 | { | 451 | { |
422 | return !wq->sq.queue[wq->sq.size].status.db_off; | 452 | return !wq->rq.queue[wq->rq.size].status.db_off; |
423 | } | 453 | } |
424 | 454 | ||
425 | struct t4_cq { | 455 | struct t4_cq { |
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index ed6414abde0..e6669d54770 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h | |||
@@ -50,7 +50,13 @@ struct c4iw_create_cq_resp { | |||
50 | __u32 qid_mask; | 50 | __u32 qid_mask; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | |||
54 | enum { | ||
55 | C4IW_QPF_ONCHIP = (1<<0) | ||
56 | }; | ||
57 | |||
53 | struct c4iw_create_qp_resp { | 58 | struct c4iw_create_qp_resp { |
59 | __u64 ma_sync_key; | ||
54 | __u64 sq_key; | 60 | __u64 sq_key; |
55 | __u64 rq_key; | 61 | __u64 rq_key; |
56 | __u64 sq_db_gts_key; | 62 | __u64 sq_db_gts_key; |
@@ -62,5 +68,6 @@ struct c4iw_create_qp_resp { | |||
62 | __u32 sq_size; | 68 | __u32 sq_size; |
63 | __u32 rq_size; | 69 | __u32 rq_size; |
64 | __u32 qid_mask; | 70 | __u32 qid_mask; |
71 | __u32 flags; | ||
65 | }; | 72 | }; |
66 | #endif | 73 | #endif |