aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/cxgb3/iwch.c
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2010-01-27 12:03:34 -0500
committerRoland Dreier <rolandd@cisco.com>2010-02-24 13:40:28 -0500
commite998f245c4b2d36ae2c35446e54ccbf1fb29d9de (patch)
tree9a0232f2d99d37f8d47ca1f7b4dbe5e9bcde5642 /drivers/infiniband/hw/cxgb3/iwch.c
parent2542322485be45853cc72d542d8ed84fae82c981 (diff)
RDMA/cxgb3: Doorbell overflow avoidance and recovery
T3 hardware doorbell FIFO overflows can cause application stalls due to lost doorbell ring events. This has been seen when running large NP IMB alltoall MPI jobs. The T3 hardware supports an xon/xoff-type flow control mechanism to help avoid overflowing the HW doorbell FIFO. This patch uses these interrupts to disable RDMA QP doorbell rings when we near an overflow condition, and then turn them back on (and ring all the active QP doorbells) when when the doorbell FIFO empties out. In addition if an doorbell ring is dropped by the hardware, the code will now recover. Design: cxgb3: - enable these DB interrupts - in the interrupt handler, schedule work tasks to call the ULPs event handlers with the new events. - ring all the qset txqs when an overflow is detected. iw_cxgb3: - disable db ringing on all active qps when we get the DB_FULL event - enable db ringing on all active qps and ring all active dbs when we get the DB_EMPTY event - On DB_DROP event: - disable db rings in the event handler - delay-schedule a work task which rings and enables the dbs on all active qps. - in post_send and post_recv logic, don't ring the db if it's disabled. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb3/iwch.c')
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c79
1 files changed, 75 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index b0ea0105ddf6..d992543890ee 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -65,6 +65,46 @@ struct cxgb3_client t3c_client = {
65static LIST_HEAD(dev_list); 65static LIST_HEAD(dev_list);
66static DEFINE_MUTEX(dev_mutex); 66static DEFINE_MUTEX(dev_mutex);
67 67
68static int disable_qp_db(int id, void *p, void *data)
69{
70 struct iwch_qp *qhp = p;
71
72 cxio_disable_wq_db(&qhp->wq);
73 return 0;
74}
75
76static int enable_qp_db(int id, void *p, void *data)
77{
78 struct iwch_qp *qhp = p;
79
80 if (data)
81 ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid);
82 cxio_enable_wq_db(&qhp->wq);
83 return 0;
84}
85
86static void disable_dbs(struct iwch_dev *rnicp)
87{
88 spin_lock_irq(&rnicp->lock);
89 idr_for_each(&rnicp->qpidr, disable_qp_db, NULL);
90 spin_unlock_irq(&rnicp->lock);
91}
92
93static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
94{
95 spin_lock_irq(&rnicp->lock);
96 idr_for_each(&rnicp->qpidr, enable_qp_db,
97 (void *)(unsigned long)ring_db);
98 spin_unlock_irq(&rnicp->lock);
99}
100
101static void iwch_db_drop_task(struct work_struct *work)
102{
103 struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
104 db_drop_task.work);
105 enable_dbs(rnicp, 1);
106}
107
68static void rnic_init(struct iwch_dev *rnicp) 108static void rnic_init(struct iwch_dev *rnicp)
69{ 109{
70 PDBG("%s iwch_dev %p\n", __func__, rnicp); 110 PDBG("%s iwch_dev %p\n", __func__, rnicp);
@@ -72,6 +112,7 @@ static void rnic_init(struct iwch_dev *rnicp)
72 idr_init(&rnicp->qpidr); 112 idr_init(&rnicp->qpidr);
73 idr_init(&rnicp->mmidr); 113 idr_init(&rnicp->mmidr);
74 spin_lock_init(&rnicp->lock); 114 spin_lock_init(&rnicp->lock);
115 INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
75 116
76 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; 117 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
77 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; 118 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
@@ -147,6 +188,7 @@ static void close_rnic_dev(struct t3cdev *tdev)
147 mutex_lock(&dev_mutex); 188 mutex_lock(&dev_mutex);
148 list_for_each_entry_safe(dev, tmp, &dev_list, entry) { 189 list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
149 if (dev->rdev.t3cdev_p == tdev) { 190 if (dev->rdev.t3cdev_p == tdev) {
191 cancel_delayed_work_sync(&dev->db_drop_task);
150 list_del(&dev->entry); 192 list_del(&dev->entry);
151 iwch_unregister_device(dev); 193 iwch_unregister_device(dev);
152 cxio_rdev_close(&dev->rdev); 194 cxio_rdev_close(&dev->rdev);
@@ -165,7 +207,8 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
165 struct cxio_rdev *rdev = tdev->ulp; 207 struct cxio_rdev *rdev = tdev->ulp;
166 struct iwch_dev *rnicp; 208 struct iwch_dev *rnicp;
167 struct ib_event event; 209 struct ib_event event;
168 u32 portnum = port_id + 1; 210 u32 portnum = port_id + 1;
211 int dispatch = 0;
169 212
170 if (!rdev) 213 if (!rdev)
171 return; 214 return;
@@ -174,21 +217,49 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
174 case OFFLOAD_STATUS_DOWN: { 217 case OFFLOAD_STATUS_DOWN: {
175 rdev->flags = CXIO_ERROR_FATAL; 218 rdev->flags = CXIO_ERROR_FATAL;
176 event.event = IB_EVENT_DEVICE_FATAL; 219 event.event = IB_EVENT_DEVICE_FATAL;
220 dispatch = 1;
177 break; 221 break;
178 } 222 }
179 case OFFLOAD_PORT_DOWN: { 223 case OFFLOAD_PORT_DOWN: {
180 event.event = IB_EVENT_PORT_ERR; 224 event.event = IB_EVENT_PORT_ERR;
225 dispatch = 1;
181 break; 226 break;
182 } 227 }
183 case OFFLOAD_PORT_UP: { 228 case OFFLOAD_PORT_UP: {
184 event.event = IB_EVENT_PORT_ACTIVE; 229 event.event = IB_EVENT_PORT_ACTIVE;
230 dispatch = 1;
231 break;
232 }
233 case OFFLOAD_DB_FULL: {
234 disable_dbs(rnicp);
235 break;
236 }
237 case OFFLOAD_DB_EMPTY: {
238 enable_dbs(rnicp, 1);
239 break;
240 }
241 case OFFLOAD_DB_DROP: {
242 unsigned long delay = 1000;
243 unsigned short r;
244
245 disable_dbs(rnicp);
246 get_random_bytes(&r, 2);
247 delay += r & 1023;
248
249 /*
250 * delay is between 1000-2023 usecs.
251 */
252 schedule_delayed_work(&rnicp->db_drop_task,
253 usecs_to_jiffies(delay));
185 break; 254 break;
186 } 255 }
187 } 256 }
188 257
189 event.device = &rnicp->ibdev; 258 if (dispatch) {
190 event.element.port_num = portnum; 259 event.device = &rnicp->ibdev;
191 ib_dispatch_event(&event); 260 event.element.port_num = portnum;
261 ib_dispatch_event(&event);
262 }
192 263
193 return; 264 return;
194} 265}