aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVipul Pandya <vipul@chelsio.com>2012-05-18 05:59:28 -0400
committerRoland Dreier <roland@purestorage.com>2012-05-18 16:22:31 -0400
commit2c97478106880a5fb241a473252e61845a69386e (patch)
treeb17a7486865794efacf6409263ba0462a1d112e0
parent8d81ef34b249109084b2f3c4bb826d0417ef5814 (diff)
RDMA/cxgb4: Add DB Overflow Avoidance
Get FULL/EMPTY/DROP events from LLD. On FULL event, disable normal user mode DB rings. Add modify_qp semantics to allow user processes to call into the kernel to ring doobells without overflowing. Add DB Full/Empty/Drop stats. Mark queues when created indicating the doorbell state. If we're in the middle of db overflow avoidance, then newly created queues should start out in this mode. Bump the C4IW_UVERBS_ABI_VERSION to 2 so the user mode library can know if the driver supports the kernel mode db ringing. Signed-off-by: Vipul Pandya <vipul@chelsio.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c84
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h37
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c51
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h2
4 files changed, 162 insertions, 12 deletions
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 84831119c596..9062ed90ea93 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -44,6 +44,12 @@ MODULE_DESCRIPTION("Chelsio T4 RDMA Driver");
44MODULE_LICENSE("Dual BSD/GPL"); 44MODULE_LICENSE("Dual BSD/GPL");
45MODULE_VERSION(DRV_VERSION); 45MODULE_VERSION(DRV_VERSION);
46 46
47struct uld_ctx {
48 struct list_head entry;
49 struct cxgb4_lld_info lldi;
50 struct c4iw_dev *dev;
51};
52
47static LIST_HEAD(uld_ctx_list); 53static LIST_HEAD(uld_ctx_list);
48static DEFINE_MUTEX(dev_mutex); 54static DEFINE_MUTEX(dev_mutex);
49 55
@@ -263,6 +269,9 @@ static int stats_show(struct seq_file *seq, void *v)
263 seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu\n", 269 seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu\n",
264 dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur, 270 dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
265 dev->rdev.stats.ocqp.max); 271 dev->rdev.stats.ocqp.max);
272 seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
273 seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
274 seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
266 return 0; 275 return 0;
267} 276}
268 277
@@ -283,6 +292,9 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
283 dev->rdev.stats.pbl.max = 0; 292 dev->rdev.stats.pbl.max = 0;
284 dev->rdev.stats.rqt.max = 0; 293 dev->rdev.stats.rqt.max = 0;
285 dev->rdev.stats.ocqp.max = 0; 294 dev->rdev.stats.ocqp.max = 0;
295 dev->rdev.stats.db_full = 0;
296 dev->rdev.stats.db_empty = 0;
297 dev->rdev.stats.db_drop = 0;
286 mutex_unlock(&dev->rdev.stats.lock); 298 mutex_unlock(&dev->rdev.stats.lock);
287 return count; 299 return count;
288} 300}
@@ -443,12 +455,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
443 c4iw_destroy_resource(&rdev->resource); 455 c4iw_destroy_resource(&rdev->resource);
444} 456}
445 457
446struct uld_ctx {
447 struct list_head entry;
448 struct cxgb4_lld_info lldi;
449 struct c4iw_dev *dev;
450};
451
452static void c4iw_dealloc(struct uld_ctx *ctx) 458static void c4iw_dealloc(struct uld_ctx *ctx)
453{ 459{
454 c4iw_rdev_close(&ctx->dev->rdev); 460 c4iw_rdev_close(&ctx->dev->rdev);
@@ -514,6 +520,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
514 idr_init(&devp->mmidr); 520 idr_init(&devp->mmidr);
515 spin_lock_init(&devp->lock); 521 spin_lock_init(&devp->lock);
516 mutex_init(&devp->rdev.stats.lock); 522 mutex_init(&devp->rdev.stats.lock);
523 mutex_init(&devp->db_mutex);
517 524
518 if (c4iw_debugfs_root) { 525 if (c4iw_debugfs_root) {
519 devp->debugfs_root = debugfs_create_dir( 526 devp->debugfs_root = debugfs_create_dir(
@@ -659,11 +666,76 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
659 return 0; 666 return 0;
660} 667}
661 668
669static int disable_qp_db(int id, void *p, void *data)
670{
671 struct c4iw_qp *qp = p;
672
673 t4_disable_wq_db(&qp->wq);
674 return 0;
675}
676
677static void stop_queues(struct uld_ctx *ctx)
678{
679 spin_lock_irq(&ctx->dev->lock);
680 ctx->dev->db_state = FLOW_CONTROL;
681 idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
682 spin_unlock_irq(&ctx->dev->lock);
683}
684
685static int enable_qp_db(int id, void *p, void *data)
686{
687 struct c4iw_qp *qp = p;
688
689 t4_enable_wq_db(&qp->wq);
690 return 0;
691}
692
693static void resume_queues(struct uld_ctx *ctx)
694{
695 spin_lock_irq(&ctx->dev->lock);
696 ctx->dev->db_state = NORMAL;
697 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
698 spin_unlock_irq(&ctx->dev->lock);
699}
700
701static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
702{
703 struct uld_ctx *ctx = handle;
704
705 switch (control) {
706 case CXGB4_CONTROL_DB_FULL:
707 stop_queues(ctx);
708 mutex_lock(&ctx->dev->rdev.stats.lock);
709 ctx->dev->rdev.stats.db_full++;
710 mutex_unlock(&ctx->dev->rdev.stats.lock);
711 break;
712 case CXGB4_CONTROL_DB_EMPTY:
713 resume_queues(ctx);
714 mutex_lock(&ctx->dev->rdev.stats.lock);
715 ctx->dev->rdev.stats.db_empty++;
716 mutex_unlock(&ctx->dev->rdev.stats.lock);
717 break;
718 case CXGB4_CONTROL_DB_DROP:
719 printk(KERN_WARNING MOD "%s: Fatal DB DROP\n",
720 pci_name(ctx->lldi.pdev));
721 mutex_lock(&ctx->dev->rdev.stats.lock);
722 ctx->dev->rdev.stats.db_drop++;
723 mutex_unlock(&ctx->dev->rdev.stats.lock);
724 break;
725 default:
726 printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
727 pci_name(ctx->lldi.pdev), control);
728 break;
729 }
730 return 0;
731}
732
662static struct cxgb4_uld_info c4iw_uld_info = { 733static struct cxgb4_uld_info c4iw_uld_info = {
663 .name = DRV_NAME, 734 .name = DRV_NAME,
664 .add = c4iw_uld_add, 735 .add = c4iw_uld_add,
665 .rx_handler = c4iw_uld_rx_handler, 736 .rx_handler = c4iw_uld_rx_handler,
666 .state_change = c4iw_uld_state_change, 737 .state_change = c4iw_uld_state_change,
738 .control = c4iw_uld_control,
667}; 739};
668 740
669static int __init c4iw_init_module(void) 741static int __init c4iw_init_module(void)
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index a8490746d86c..a11ed5ce536a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -117,6 +117,9 @@ struct c4iw_stats {
117 struct c4iw_stat pbl; 117 struct c4iw_stat pbl;
118 struct c4iw_stat rqt; 118 struct c4iw_stat rqt;
119 struct c4iw_stat ocqp; 119 struct c4iw_stat ocqp;
120 u64 db_full;
121 u64 db_empty;
122 u64 db_drop;
120}; 123};
121 124
122struct c4iw_rdev { 125struct c4iw_rdev {
@@ -192,6 +195,12 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
192 return wr_waitp->ret; 195 return wr_waitp->ret;
193} 196}
194 197
198enum db_state {
199 NORMAL = 0,
200 FLOW_CONTROL = 1,
201 RECOVERY = 2
202};
203
195struct c4iw_dev { 204struct c4iw_dev {
196 struct ib_device ibdev; 205 struct ib_device ibdev;
197 struct c4iw_rdev rdev; 206 struct c4iw_rdev rdev;
@@ -200,7 +209,9 @@ struct c4iw_dev {
200 struct idr qpidr; 209 struct idr qpidr;
201 struct idr mmidr; 210 struct idr mmidr;
202 spinlock_t lock; 211 spinlock_t lock;
212 struct mutex db_mutex;
203 struct dentry *debugfs_root; 213 struct dentry *debugfs_root;
214 enum db_state db_state;
204}; 215};
205 216
206static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) 217static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -228,8 +239,8 @@ static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
228 return idr_find(&rhp->mmidr, mmid); 239 return idr_find(&rhp->mmidr, mmid);
229} 240}
230 241
231static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, 242static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
232 void *handle, u32 id) 243 void *handle, u32 id, int lock)
233{ 244{
234 int ret; 245 int ret;
235 int newid; 246 int newid;
@@ -237,15 +248,29 @@ static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
237 do { 248 do {
238 if (!idr_pre_get(idr, GFP_KERNEL)) 249 if (!idr_pre_get(idr, GFP_KERNEL))
239 return -ENOMEM; 250 return -ENOMEM;
240 spin_lock_irq(&rhp->lock); 251 if (lock)
252 spin_lock_irq(&rhp->lock);
241 ret = idr_get_new_above(idr, handle, id, &newid); 253 ret = idr_get_new_above(idr, handle, id, &newid);
242 BUG_ON(newid != id); 254 BUG_ON(newid != id);
243 spin_unlock_irq(&rhp->lock); 255 if (lock)
256 spin_unlock_irq(&rhp->lock);
244 } while (ret == -EAGAIN); 257 } while (ret == -EAGAIN);
245 258
246 return ret; 259 return ret;
247} 260}
248 261
262static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
263 void *handle, u32 id)
264{
265 return _insert_handle(rhp, idr, handle, id, 1);
266}
267
268static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
269 void *handle, u32 id)
270{
271 return _insert_handle(rhp, idr, handle, id, 0);
272}
273
249static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) 274static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
250{ 275{
251 spin_lock_irq(&rhp->lock); 276 spin_lock_irq(&rhp->lock);
@@ -370,6 +395,8 @@ struct c4iw_qp_attributes {
370 struct c4iw_ep *llp_stream_handle; 395 struct c4iw_ep *llp_stream_handle;
371 u8 layer_etype; 396 u8 layer_etype;
372 u8 ecode; 397 u8 ecode;
398 u16 sq_db_inc;
399 u16 rq_db_inc;
373}; 400};
374 401
375struct c4iw_qp { 402struct c4iw_qp {
@@ -444,6 +471,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext,
444 471
445enum c4iw_qp_attr_mask { 472enum c4iw_qp_attr_mask {
446 C4IW_QP_ATTR_NEXT_STATE = 1 << 0, 473 C4IW_QP_ATTR_NEXT_STATE = 1 << 0,
474 C4IW_QP_ATTR_SQ_DB = 1<<1,
475 C4IW_QP_ATTR_RQ_DB = 1<<2,
447 C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, 476 C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
448 C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, 477 C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
449 C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, 478 C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 5f940aeaab1e..beec66758aec 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -34,6 +34,10 @@
34 34
35#include "iw_cxgb4.h" 35#include "iw_cxgb4.h"
36 36
37static int db_delay_usecs = 1;
38module_param(db_delay_usecs, int, 0644);
39MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
40
37static int ocqp_support = 1; 41static int ocqp_support = 1;
38module_param(ocqp_support, int, 0644); 42module_param(ocqp_support, int, 0644);
39MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); 43MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
@@ -1128,6 +1132,29 @@ out:
1128 return ret; 1132 return ret;
1129} 1133}
1130 1134
1135/*
1136 * Called by the library when the qp has user dbs disabled due to
1137 * a DB_FULL condition. This function will single-thread all user
1138 * DB rings to avoid overflowing the hw db-fifo.
1139 */
1140static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
1141{
1142 int delay = db_delay_usecs;
1143
1144 mutex_lock(&qhp->rhp->db_mutex);
1145 do {
1146 if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < 768) {
1147 writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db);
1148 break;
1149 }
1150 set_current_state(TASK_UNINTERRUPTIBLE);
1151 schedule_timeout(usecs_to_jiffies(delay));
1152 delay = min(delay << 1, 200000);
1153 } while (1);
1154 mutex_unlock(&qhp->rhp->db_mutex);
1155 return 0;
1156}
1157
1131int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, 1158int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1132 enum c4iw_qp_attr_mask mask, 1159 enum c4iw_qp_attr_mask mask,
1133 struct c4iw_qp_attributes *attrs, 1160 struct c4iw_qp_attributes *attrs,
@@ -1176,6 +1203,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1176 qhp->attr = newattr; 1203 qhp->attr = newattr;
1177 } 1204 }
1178 1205
1206 if (mask & C4IW_QP_ATTR_SQ_DB) {
1207 ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
1208 goto out;
1209 }
1210 if (mask & C4IW_QP_ATTR_RQ_DB) {
1211 ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
1212 goto out;
1213 }
1214
1179 if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) 1215 if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
1180 goto out; 1216 goto out;
1181 if (qhp->attr.state == attrs->next_state) 1217 if (qhp->attr.state == attrs->next_state)
@@ -1469,7 +1505,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1469 init_waitqueue_head(&qhp->wait); 1505 init_waitqueue_head(&qhp->wait);
1470 atomic_set(&qhp->refcnt, 1); 1506 atomic_set(&qhp->refcnt, 1);
1471 1507
1472 ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); 1508 spin_lock_irq(&rhp->lock);
1509 if (rhp->db_state != NORMAL)
1510 t4_disable_wq_db(&qhp->wq);
1511 ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
1512 spin_unlock_irq(&rhp->lock);
1473 if (ret) 1513 if (ret)
1474 goto err2; 1514 goto err2;
1475 1515
@@ -1613,6 +1653,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1613 C4IW_QP_ATTR_ENABLE_RDMA_WRITE | 1653 C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
1614 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; 1654 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
1615 1655
1656 /*
1657 * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
1658 * ringing the queue db when we're in DB_FULL mode.
1659 */
1660 attrs.sq_db_inc = attr->sq_psn;
1661 attrs.rq_db_inc = attr->rq_psn;
1662 mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
1663 mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
1664
1616 return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); 1665 return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
1617} 1666}
1618 1667
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index e6669d54770e..32b754c35ab7 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -32,7 +32,7 @@
32#ifndef __C4IW_USER_H__ 32#ifndef __C4IW_USER_H__
33#define __C4IW_USER_H__ 33#define __C4IW_USER_H__
34 34
35#define C4IW_UVERBS_ABI_VERSION 1 35#define C4IW_UVERBS_ABI_VERSION 2
36 36
37/* 37/*
38 * Make sure that all structs defined in this file remain laid out so 38 * Make sure that all structs defined in this file remain laid out so