diff options
author | Steve Wise <swise@opengridcomputing.com> | 2010-01-27 12:03:34 -0500 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-02-24 13:40:28 -0500 |
commit | e998f245c4b2d36ae2c35446e54ccbf1fb29d9de (patch) | |
tree | 9a0232f2d99d37f8d47ca1f7b4dbe5e9bcde5642 /drivers/net/cxgb3/adapter.h | |
parent | 2542322485be45853cc72d542d8ed84fae82c981 (diff) |
RDMA/cxgb3: Doorbell overflow avoidance and recovery
T3 hardware doorbell FIFO overflows can cause application stalls due
to lost doorbell ring events. This has been seen when running large
NP IMB alltoall MPI jobs. The T3 hardware supports an xon/xoff-type
flow control mechanism to help avoid overflowing the HW doorbell FIFO.
This patch uses these interrupts to disable RDMA QP doorbell rings
when we near an overflow condition, and then turn them back on (and
ring all the active QP doorbells) when when the doorbell FIFO empties
out. In addition if an doorbell ring is dropped by the hardware, the
code will now recover.
Design:
cxgb3:
- enable these DB interrupts
- in the interrupt handler, schedule work tasks to call the ULPs event
handlers with the new events.
- ring all the qset txqs when an overflow is detected.
iw_cxgb3:
- disable db ringing on all active qps when we get the DB_FULL event
- enable db ringing on all active qps and ring all active dbs when we get
the DB_EMPTY event
- On DB_DROP event:
- disable db rings in the event handler
- delay-schedule a work task which rings and enables the dbs on
all active qps.
- in post_send and post_recv logic, don't ring the db if it's disabled.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/net/cxgb3/adapter.h')
-rw-r--r-- | drivers/net/cxgb3/adapter.h | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index 3e8618b4efbc..4cd7f420766a 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h | |||
@@ -264,6 +264,10 @@ struct adapter { | |||
264 | struct work_struct fatal_error_handler_task; | 264 | struct work_struct fatal_error_handler_task; |
265 | struct work_struct link_fault_handler_task; | 265 | struct work_struct link_fault_handler_task; |
266 | 266 | ||
267 | struct work_struct db_full_task; | ||
268 | struct work_struct db_empty_task; | ||
269 | struct work_struct db_drop_task; | ||
270 | |||
267 | struct dentry *debugfs_root; | 271 | struct dentry *debugfs_root; |
268 | 272 | ||
269 | struct mutex mdio_lock; | 273 | struct mutex mdio_lock; |
@@ -335,6 +339,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, | |||
335 | int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, | 339 | int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, |
336 | unsigned char *data); | 340 | unsigned char *data); |
337 | irqreturn_t t3_sge_intr_msix(int irq, void *cookie); | 341 | irqreturn_t t3_sge_intr_msix(int irq, void *cookie); |
342 | extern struct workqueue_struct *cxgb3_wq; | ||
338 | 343 | ||
339 | int t3_get_edc_fw(struct cphy *phy, int edc_idx, int size); | 344 | int t3_get_edc_fw(struct cphy *phy, int edc_idx, int size); |
340 | 345 | ||