aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2010-01-27 12:03:34 -0500
committerRoland Dreier <rolandd@cisco.com>2010-02-24 13:40:28 -0500
commite998f245c4b2d36ae2c35446e54ccbf1fb29d9de (patch)
tree9a0232f2d99d37f8d47ca1f7b4dbe5e9bcde5642 /drivers/infiniband/hw
parent2542322485be45853cc72d542d8ed84fae82c981 (diff)
RDMA/cxgb3: Doorbell overflow avoidance and recovery
T3 hardware doorbell FIFO overflows can cause application stalls due to lost doorbell ring events. This has been seen when running large NP IMB alltoall MPI jobs. The T3 hardware supports an xon/xoff-type flow control mechanism to help avoid overflowing the HW doorbell FIFO. This patch uses these interrupts to disable RDMA QP doorbell rings when we near an overflow condition, and then turn them back on (and ring all the active QP doorbells) when when the doorbell FIFO empties out. In addition if an doorbell ring is dropped by the hardware, the code will now recover. Design: cxgb3: - enable these DB interrupts - in the interrupt handler, schedule work tasks to call the ULPs event handlers with the new events. - ring all the qset txqs when an overflow is detected. iw_cxgb3: - disable db ringing on all active qps when we get the DB_FULL event - enable db ringing on all active qps and ring all active dbs when we get the DB_EMPTY event - On DB_DROP event: - disable db rings in the event handler - delay-schedule a work task which rings and enables the dbs on all active qps. - in post_send and post_recv logic, don't ring the db if it's disabled. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h17
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c79
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c9
4 files changed, 99 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index a197a5b7ac7f..15073b2da1c5 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -730,7 +730,22 @@ struct t3_cq {
730 730
731static inline void cxio_set_wq_in_error(struct t3_wq *wq) 731static inline void cxio_set_wq_in_error(struct t3_wq *wq)
732{ 732{
733 wq->queue->wq_in_err.err = 1; 733 wq->queue->wq_in_err.err |= 1;
734}
735
736static inline void cxio_disable_wq_db(struct t3_wq *wq)
737{
738 wq->queue->wq_in_err.err |= 2;
739}
740
741static inline void cxio_enable_wq_db(struct t3_wq *wq)
742{
743 wq->queue->wq_in_err.err &= ~2;
744}
745
746static inline int cxio_wq_db_enabled(struct t3_wq *wq)
747{
748 return !(wq->queue->wq_in_err.err & 2);
734} 749}
735 750
736static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) 751static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index b0ea0105ddf6..d992543890ee 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -65,6 +65,46 @@ struct cxgb3_client t3c_client = {
65static LIST_HEAD(dev_list); 65static LIST_HEAD(dev_list);
66static DEFINE_MUTEX(dev_mutex); 66static DEFINE_MUTEX(dev_mutex);
67 67
68static int disable_qp_db(int id, void *p, void *data)
69{
70 struct iwch_qp *qhp = p;
71
72 cxio_disable_wq_db(&qhp->wq);
73 return 0;
74}
75
76static int enable_qp_db(int id, void *p, void *data)
77{
78 struct iwch_qp *qhp = p;
79
80 if (data)
81 ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid);
82 cxio_enable_wq_db(&qhp->wq);
83 return 0;
84}
85
86static void disable_dbs(struct iwch_dev *rnicp)
87{
88 spin_lock_irq(&rnicp->lock);
89 idr_for_each(&rnicp->qpidr, disable_qp_db, NULL);
90 spin_unlock_irq(&rnicp->lock);
91}
92
93static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
94{
95 spin_lock_irq(&rnicp->lock);
96 idr_for_each(&rnicp->qpidr, enable_qp_db,
97 (void *)(unsigned long)ring_db);
98 spin_unlock_irq(&rnicp->lock);
99}
100
101static void iwch_db_drop_task(struct work_struct *work)
102{
103 struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
104 db_drop_task.work);
105 enable_dbs(rnicp, 1);
106}
107
68static void rnic_init(struct iwch_dev *rnicp) 108static void rnic_init(struct iwch_dev *rnicp)
69{ 109{
70 PDBG("%s iwch_dev %p\n", __func__, rnicp); 110 PDBG("%s iwch_dev %p\n", __func__, rnicp);
@@ -72,6 +112,7 @@ static void rnic_init(struct iwch_dev *rnicp)
72 idr_init(&rnicp->qpidr); 112 idr_init(&rnicp->qpidr);
73 idr_init(&rnicp->mmidr); 113 idr_init(&rnicp->mmidr);
74 spin_lock_init(&rnicp->lock); 114 spin_lock_init(&rnicp->lock);
115 INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
75 116
76 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; 117 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
77 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; 118 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
@@ -147,6 +188,7 @@ static void close_rnic_dev(struct t3cdev *tdev)
147 mutex_lock(&dev_mutex); 188 mutex_lock(&dev_mutex);
148 list_for_each_entry_safe(dev, tmp, &dev_list, entry) { 189 list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
149 if (dev->rdev.t3cdev_p == tdev) { 190 if (dev->rdev.t3cdev_p == tdev) {
191 cancel_delayed_work_sync(&dev->db_drop_task);
150 list_del(&dev->entry); 192 list_del(&dev->entry);
151 iwch_unregister_device(dev); 193 iwch_unregister_device(dev);
152 cxio_rdev_close(&dev->rdev); 194 cxio_rdev_close(&dev->rdev);
@@ -165,7 +207,8 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
165 struct cxio_rdev *rdev = tdev->ulp; 207 struct cxio_rdev *rdev = tdev->ulp;
166 struct iwch_dev *rnicp; 208 struct iwch_dev *rnicp;
167 struct ib_event event; 209 struct ib_event event;
168 u32 portnum = port_id + 1; 210 u32 portnum = port_id + 1;
211 int dispatch = 0;
169 212
170 if (!rdev) 213 if (!rdev)
171 return; 214 return;
@@ -174,21 +217,49 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
174 case OFFLOAD_STATUS_DOWN: { 217 case OFFLOAD_STATUS_DOWN: {
175 rdev->flags = CXIO_ERROR_FATAL; 218 rdev->flags = CXIO_ERROR_FATAL;
176 event.event = IB_EVENT_DEVICE_FATAL; 219 event.event = IB_EVENT_DEVICE_FATAL;
220 dispatch = 1;
177 break; 221 break;
178 } 222 }
179 case OFFLOAD_PORT_DOWN: { 223 case OFFLOAD_PORT_DOWN: {
180 event.event = IB_EVENT_PORT_ERR; 224 event.event = IB_EVENT_PORT_ERR;
225 dispatch = 1;
181 break; 226 break;
182 } 227 }
183 case OFFLOAD_PORT_UP: { 228 case OFFLOAD_PORT_UP: {
184 event.event = IB_EVENT_PORT_ACTIVE; 229 event.event = IB_EVENT_PORT_ACTIVE;
230 dispatch = 1;
231 break;
232 }
233 case OFFLOAD_DB_FULL: {
234 disable_dbs(rnicp);
235 break;
236 }
237 case OFFLOAD_DB_EMPTY: {
238 enable_dbs(rnicp, 1);
239 break;
240 }
241 case OFFLOAD_DB_DROP: {
242 unsigned long delay = 1000;
243 unsigned short r;
244
245 disable_dbs(rnicp);
246 get_random_bytes(&r, 2);
247 delay += r & 1023;
248
249 /*
250 * delay is between 1000-2023 usecs.
251 */
252 schedule_delayed_work(&rnicp->db_drop_task,
253 usecs_to_jiffies(delay));
185 break; 254 break;
186 } 255 }
187 } 256 }
188 257
189 event.device = &rnicp->ibdev; 258 if (dispatch) {
190 event.element.port_num = portnum; 259 event.device = &rnicp->ibdev;
191 ib_dispatch_event(&event); 260 event.element.port_num = portnum;
261 ib_dispatch_event(&event);
262 }
192 263
193 return; 264 return;
194} 265}
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 84735506333f..a1c44578e039 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -36,6 +36,7 @@
36#include <linux/list.h> 36#include <linux/list.h>
37#include <linux/spinlock.h> 37#include <linux/spinlock.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/workqueue.h>
39 40
40#include <rdma/ib_verbs.h> 41#include <rdma/ib_verbs.h>
41 42
@@ -110,6 +111,7 @@ struct iwch_dev {
110 struct idr mmidr; 111 struct idr mmidr;
111 spinlock_t lock; 112 spinlock_t lock;
112 struct list_head entry; 113 struct list_head entry;
114 struct delayed_work db_drop_task;
113}; 115};
114 116
115static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) 117static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 3eb8cecf81d7..b4d893de3650 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -452,7 +452,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
452 ++(qhp->wq.sq_wptr); 452 ++(qhp->wq.sq_wptr);
453 } 453 }
454 spin_unlock_irqrestore(&qhp->lock, flag); 454 spin_unlock_irqrestore(&qhp->lock, flag);
455 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 455 if (cxio_wq_db_enabled(&qhp->wq))
456 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
456 457
457out: 458out:
458 if (err) 459 if (err)
@@ -514,7 +515,8 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
514 num_wrs--; 515 num_wrs--;
515 } 516 }
516 spin_unlock_irqrestore(&qhp->lock, flag); 517 spin_unlock_irqrestore(&qhp->lock, flag);
517 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 518 if (cxio_wq_db_enabled(&qhp->wq))
519 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
518 520
519out: 521out:
520 if (err) 522 if (err)
@@ -597,7 +599,8 @@ int iwch_bind_mw(struct ib_qp *qp,
597 ++(qhp->wq.sq_wptr); 599 ++(qhp->wq.sq_wptr);
598 spin_unlock_irqrestore(&qhp->lock, flag); 600 spin_unlock_irqrestore(&qhp->lock, flag);
599 601
600 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 602 if (cxio_wq_db_enabled(&qhp->wq))
603 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
601 604
602 return err; 605 return err;
603} 606}