aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2010-06-10 15:03:06 -0400
committerRoland Dreier <rolandd@cisco.com>2010-07-06 17:04:04 -0400
commit1973e8b8edea68d2408328d25b318ee7401293be (patch)
tree1bd38f30660409b7aa77a7801d0525a4247c2e3c
parentb21ef16a8b956aee2fb3d7fc9d24a0b4dae2ae72 (diff)
RDMA/cxgb4: Avoid false GTS CIDX_INC overflows
The T4 IQ hw design assumes CIDX_INC credits will be returned on a regular basis and always before the CIDX counter crosses over the PIDX counter. For RDMA CQs, however, returning CIDX_INC credits is only needed and desired when and if the CQ is armed for notification. This can lead to a GTS write returning credits that causes the HW to reject the credit update because it causes CIDX to pass PIDX. Once this happens, the CIDX/PIDX counters get out of whack and an application can miss a notification and get stuck blocked awaiting a notification. To avoid this, we allocate the HW IQ 2x times the requested size. This seems to avoid the false overflow failures. If we see more issues with this, then we'll have to add code in the poll path to return credits periodically like when the amount reaches 1/2 the queue depth). I would like to avoid this as it adds a PCI write transaction for applications that never arm the CQ (like most MPIs). Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c25
1 files changed, 20 insertions, 5 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index e1317f581168..fac5c6e68011 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -764,7 +764,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
764 struct c4iw_create_cq_resp uresp; 764 struct c4iw_create_cq_resp uresp;
765 struct c4iw_ucontext *ucontext = NULL; 765 struct c4iw_ucontext *ucontext = NULL;
766 int ret; 766 int ret;
767 size_t memsize; 767 size_t memsize, hwentries;
768 struct c4iw_mm_entry *mm, *mm2; 768 struct c4iw_mm_entry *mm, *mm2;
769 769
770 PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); 770 PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
@@ -788,14 +788,29 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
788 * entries must be multiple of 16 for HW. 788 * entries must be multiple of 16 for HW.
789 */ 789 */
790 entries = roundup(entries, 16); 790 entries = roundup(entries, 16);
791 memsize = entries * sizeof *chp->cq.queue; 791
792 /*
793 * Make actual HW queue 2x to avoid cdix_inc overflows.
794 */
795 hwentries = entries * 2;
796
797 /*
798 * Make HW queue at least 64 entries so GTS updates aren't too
799 * frequent.
800 */
801 if (hwentries < 64)
802 hwentries = 64;
803
804 memsize = hwentries * sizeof *chp->cq.queue;
792 805
793 /* 806 /*
794 * memsize must be a multiple of the page size if its a user cq. 807 * memsize must be a multiple of the page size if its a user cq.
795 */ 808 */
796 if (ucontext) 809 if (ucontext) {
797 memsize = roundup(memsize, PAGE_SIZE); 810 memsize = roundup(memsize, PAGE_SIZE);
798 chp->cq.size = entries; 811 hwentries = memsize / sizeof *chp->cq.queue;
812 }
813 chp->cq.size = hwentries;
799 chp->cq.memsize = memsize; 814 chp->cq.memsize = memsize;
800 815
801 ret = create_cq(&rhp->rdev, &chp->cq, 816 ret = create_cq(&rhp->rdev, &chp->cq,
@@ -805,7 +820,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
805 820
806 chp->rhp = rhp; 821 chp->rhp = rhp;
807 chp->cq.size--; /* status page */ 822 chp->cq.size--; /* status page */
808 chp->ibcq.cqe = chp->cq.size - 1; 823 chp->ibcq.cqe = entries - 2;
809 spin_lock_init(&chp->lock); 824 spin_lock_init(&chp->lock);
810 atomic_set(&chp->refcnt, 1); 825 atomic_set(&chp->refcnt, 1);
811 init_waitqueue_head(&chp->wait); 826 init_waitqueue_head(&chp->wait);