RDMA/cxgb4: Avoid false GTS CIDX_INC overflows

The T4 IQ hw design assumes CIDX_INC credits will be returned on a regular basis and always before the CIDX counter crosses over the PIDX counter. For RDMA CQs, however, returning CIDX_INC credits is only needed and desired when and if the CQ is armed for notification. This can lead to a GTS write returning credits that causes the HW to reject the credit update because it causes CIDX to pass PIDX. Once this happens, the CIDX/PIDX counters get out of whack and an application can miss a notification and get stuck blocked awaiting a notification. To avoid this, we allocate the HW IQ 2x times the requested size. This seems to avoid the false overflow failures. If we see more issues with this, then we'll have to add code in the poll path to return credits periodically like when the amount reaches 1/2 the queue depth). I would like to avoid this as it adds a PCI write transaction for applications that never arm the CQ (like most MPIs). Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
author: Steve Wise <swise@opengridcomputing.com> 2010-06-10 15:03:06 -0400
committer: Roland Dreier <rolandd@cisco.com> 2010-07-06 17:04:04 -0400
commit: 1973e8b8edea68d2408328d25b318ee7401293be (patch)
tree: 1bd38f30660409b7aa77a7801d0525a4247c2e3c /drivers/infiniband/hw
parent: b21ef16a8b956aee2fb3d7fc9d24a0b4dae2ae72 (diff)
1 files changed, 20 insertions, 5 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index e1317f581168..fac5c6e68011 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -764,7 +764,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
        struct c4iw_create_cq_resp uresp;
        struct c4iw_ucontext *ucontext = NULL;
        int ret;
-        size_t memsize;
+        size_t memsize, hwentries;
        struct c4iw_mm_entry *mm, *mm2;
        PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
@@ -788,14 +788,29 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
         * entries must be multiple of 16 for HW.
         */
        entries = roundup(entries, 16);
-        memsize = entries * sizeof *chp->cq.queue;
+        /*
+         * Make actual HW queue 2x to avoid cdix_inc overflows.
+         */
+        hwentries = entries * 2;
+        /*
+         * Make HW queue at least 64 entries so GTS updates aren't too
+         * frequent.
+         */
+        if (hwentries < 64)
+                hwentries = 64;
+        memsize = hwentries * sizeof *chp->cq.queue;
        /*
         * memsize must be a multiple of the page size if its a user cq.
         */
-        if (ucontext)
+        if (ucontext) {
                memsize = roundup(memsize, PAGE_SIZE);
-        chp->cq.size = entries;
+                hwentries = memsize / sizeof *chp->cq.queue;
+        }
+        chp->cq.size = hwentries;
        chp->cq.memsize = memsize;
        ret = create_cq(&rhp->rdev, &chp->cq,
@@ -805,7 +820,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
        chp->rhp = rhp;
        chp->cq.size--;                         /* status page */
-        chp->ibcq.cqe = chp->cq.size - 1;
+        chp->ibcq.cqe = entries - 2;
        spin_lock_init(&chp->lock);
        atomic_set(&chp->refcnt, 1);
        init_waitqueue_head(&chp->wait);
author	Steve Wise <swise@opengridcomputing.com>	2010-06-10 15:03:06 -0400
committer	Roland Dreier <rolandd@cisco.com>	2010-07-06 17:04:04 -0400
commit	1973e8b8edea68d2408328d25b318ee7401293be (patch)
tree	1bd38f30660409b7aa77a7801d0525a4247c2e3c /drivers/infiniband/hw
parent	b21ef16a8b956aee2fb3d7fc9d24a0b4dae2ae72 (diff)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index e1317f581168..fac5c6e68011 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -764,7 +764,7 @@ struct ib_cq c4iw_create_cq(struct ib_device ibdev, int entries,
764	struct c4iw_create_cq_resp uresp;	764	struct c4iw_create_cq_resp uresp;
765	struct c4iw_ucontext *ucontext = NULL;	765	struct c4iw_ucontext *ucontext = NULL;
766	int ret;	766	int ret;
767	size_t memsize;	767	size_t memsize, hwentries;
768	struct c4iw_mm_entry mm, mm2;	768	struct c4iw_mm_entry mm, mm2;
769		769
770	PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);	770	PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
@@ -788,14 +788,29 @@ struct ib_cq c4iw_create_cq(struct ib_device ibdev, int entries,
788	* entries must be multiple of 16 for HW.	788	* entries must be multiple of 16 for HW.
789	*/	789	*/
790	entries = roundup(entries, 16);	790	entries = roundup(entries, 16);
791	memsize = entries * sizeof *chp->cq.queue;	791
		792	/*
		793	* Make actual HW queue 2x to avoid cdix_inc overflows.
		794	*/
		795	hwentries = entries * 2;
		796
		797	/*
		798	* Make HW queue at least 64 entries so GTS updates aren't too
		799	* frequent.
		800	*/
		801	if (hwentries < 64)
		802	hwentries = 64;
		803
		804	memsize = hwentries * sizeof *chp->cq.queue;
792		805
793	/*	806	/*
794	* memsize must be a multiple of the page size if its a user cq.	807	* memsize must be a multiple of the page size if its a user cq.
795	*/	808	*/
796	if (ucontext)	809	if (ucontext) {
797	memsize = roundup(memsize, PAGE_SIZE);	810	memsize = roundup(memsize, PAGE_SIZE);
798	chp->cq.size = entries;	811	hwentries = memsize / sizeof *chp->cq.queue;
		812	}
		813	chp->cq.size = hwentries;
799	chp->cq.memsize = memsize;	814	chp->cq.memsize = memsize;
800		815
801	ret = create_cq(&rhp->rdev, &chp->cq,	816	ret = create_cq(&rhp->rdev, &chp->cq,
@@ -805,7 +820,7 @@ struct ib_cq c4iw_create_cq(struct ib_device ibdev, int entries,
805		820
806	chp->rhp = rhp;	821	chp->rhp = rhp;
807	chp->cq.size--; /* status page */	822	chp->cq.size--; /* status page */
808	chp->ibcq.cqe = chp->cq.size - 1;	823	chp->ibcq.cqe = entries - 2;
809	spin_lock_init(&chp->lock);	824	spin_lock_init(&chp->lock);
810	atomic_set(&chp->refcnt, 1);	825	atomic_set(&chp->refcnt, 1);
811	init_waitqueue_head(&chp->wait);	826	init_waitqueue_head(&chp->wait);