aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorVipul Pandya <vipul@chelsio.com>2012-05-18 05:59:30 -0400
committerRoland Dreier <roland@purestorage.com>2012-05-18 16:22:33 -0400
commit422eea0a8cf658bc9564726d74e8384b89a8f4fa (patch)
tree6c2ad0bf5112cfef2cd5bb025468cc3318d337d0 /drivers/infiniband
parent4984037bef54253d4d010d3e57f175ab694bee26 (diff)
RDMA/cxgb4: DB Drop Recovery for RDMA and LLD queues
Add module option db_fc_threshold which is the count of active QPs that trigger automatic db flow control mode. Automatically transition to/from flow control mode when the active qp count crosses db_fc_theshold. Add more db debugfs stats On DB DROP event from the LLD, recover all the iwarp queues. Signed-off-by: Vipul Pandya <vipul@chelsio.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c176
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h24
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c47
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h24
4 files changed, 259 insertions, 12 deletions
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 9062ed90ea93..bdb398f54a64 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -246,6 +246,8 @@ static const struct file_operations stag_debugfs_fops = {
246 .llseek = default_llseek, 246 .llseek = default_llseek,
247}; 247};
248 248
249static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
250
249static int stats_show(struct seq_file *seq, void *v) 251static int stats_show(struct seq_file *seq, void *v)
250{ 252{
251 struct c4iw_dev *dev = seq->private; 253 struct c4iw_dev *dev = seq->private;
@@ -272,6 +274,9 @@ static int stats_show(struct seq_file *seq, void *v)
272 seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full); 274 seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
273 seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty); 275 seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
274 seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop); 276 seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
277 seq_printf(seq, " DB State: %s Transitions %llu\n",
278 db_state_str[dev->db_state],
279 dev->rdev.stats.db_state_transitions);
275 return 0; 280 return 0;
276} 281}
277 282
@@ -295,6 +300,7 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
295 dev->rdev.stats.db_full = 0; 300 dev->rdev.stats.db_full = 0;
296 dev->rdev.stats.db_empty = 0; 301 dev->rdev.stats.db_empty = 0;
297 dev->rdev.stats.db_drop = 0; 302 dev->rdev.stats.db_drop = 0;
303 dev->rdev.stats.db_state_transitions = 0;
298 mutex_unlock(&dev->rdev.stats.lock); 304 mutex_unlock(&dev->rdev.stats.lock);
299 return count; 305 return count;
300} 306}
@@ -677,8 +683,11 @@ static int disable_qp_db(int id, void *p, void *data)
677static void stop_queues(struct uld_ctx *ctx) 683static void stop_queues(struct uld_ctx *ctx)
678{ 684{
679 spin_lock_irq(&ctx->dev->lock); 685 spin_lock_irq(&ctx->dev->lock);
680 ctx->dev->db_state = FLOW_CONTROL; 686 if (ctx->dev->db_state == NORMAL) {
681 idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); 687 ctx->dev->rdev.stats.db_state_transitions++;
688 ctx->dev->db_state = FLOW_CONTROL;
689 idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
690 }
682 spin_unlock_irq(&ctx->dev->lock); 691 spin_unlock_irq(&ctx->dev->lock);
683} 692}
684 693
@@ -693,9 +702,165 @@ static int enable_qp_db(int id, void *p, void *data)
693static void resume_queues(struct uld_ctx *ctx) 702static void resume_queues(struct uld_ctx *ctx)
694{ 703{
695 spin_lock_irq(&ctx->dev->lock); 704 spin_lock_irq(&ctx->dev->lock);
696 ctx->dev->db_state = NORMAL; 705 if (ctx->dev->qpcnt <= db_fc_threshold &&
697 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); 706 ctx->dev->db_state == FLOW_CONTROL) {
707 ctx->dev->db_state = NORMAL;
708 ctx->dev->rdev.stats.db_state_transitions++;
709 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
710 }
711 spin_unlock_irq(&ctx->dev->lock);
712}
713
714struct qp_list {
715 unsigned idx;
716 struct c4iw_qp **qps;
717};
718
719static int add_and_ref_qp(int id, void *p, void *data)
720{
721 struct qp_list *qp_listp = data;
722 struct c4iw_qp *qp = p;
723
724 c4iw_qp_add_ref(&qp->ibqp);
725 qp_listp->qps[qp_listp->idx++] = qp;
726 return 0;
727}
728
729static int count_qps(int id, void *p, void *data)
730{
731 unsigned *countp = data;
732 (*countp)++;
733 return 0;
734}
735
736static void deref_qps(struct qp_list qp_list)
737{
738 int idx;
739
740 for (idx = 0; idx < qp_list.idx; idx++)
741 c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
742}
743
744static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
745{
746 int idx;
747 int ret;
748
749 for (idx = 0; idx < qp_list->idx; idx++) {
750 struct c4iw_qp *qp = qp_list->qps[idx];
751
752 ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
753 qp->wq.sq.qid,
754 t4_sq_host_wq_pidx(&qp->wq),
755 t4_sq_wq_size(&qp->wq));
756 if (ret) {
757 printk(KERN_ERR MOD "%s: Fatal error - "
758 "DB overflow recovery failed - "
759 "error syncing SQ qid %u\n",
760 pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
761 return;
762 }
763
764 ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
765 qp->wq.rq.qid,
766 t4_rq_host_wq_pidx(&qp->wq),
767 t4_rq_wq_size(&qp->wq));
768
769 if (ret) {
770 printk(KERN_ERR MOD "%s: Fatal error - "
771 "DB overflow recovery failed - "
772 "error syncing RQ qid %u\n",
773 pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
774 return;
775 }
776
777 /* Wait for the dbfifo to drain */
778 while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
779 set_current_state(TASK_UNINTERRUPTIBLE);
780 schedule_timeout(usecs_to_jiffies(10));
781 }
782 }
783}
784
785static void recover_queues(struct uld_ctx *ctx)
786{
787 int count = 0;
788 struct qp_list qp_list;
789 int ret;
790
791 /* lock out kernel db ringers */
792 mutex_lock(&ctx->dev->db_mutex);
793
794 /* put all queues in to recovery mode */
795 spin_lock_irq(&ctx->dev->lock);
796 ctx->dev->db_state = RECOVERY;
797 ctx->dev->rdev.stats.db_state_transitions++;
798 idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
799 spin_unlock_irq(&ctx->dev->lock);
800
801 /* slow everybody down */
802 set_current_state(TASK_UNINTERRUPTIBLE);
803 schedule_timeout(usecs_to_jiffies(1000));
804
805 /* Wait for the dbfifo to completely drain. */
806 while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
807 set_current_state(TASK_UNINTERRUPTIBLE);
808 schedule_timeout(usecs_to_jiffies(10));
809 }
810
811 /* flush the SGE contexts */
812 ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
813 if (ret) {
814 printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
815 pci_name(ctx->lldi.pdev));
816 goto out;
817 }
818
819 /* Count active queues so we can build a list of queues to recover */
820 spin_lock_irq(&ctx->dev->lock);
821 idr_for_each(&ctx->dev->qpidr, count_qps, &count);
822
823 qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
824 if (!qp_list.qps) {
825 printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
826 pci_name(ctx->lldi.pdev));
827 spin_unlock_irq(&ctx->dev->lock);
828 goto out;
829 }
830 qp_list.idx = 0;
831
832 /* add and ref each qp so it doesn't get freed */
833 idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
834
698 spin_unlock_irq(&ctx->dev->lock); 835 spin_unlock_irq(&ctx->dev->lock);
836
837 /* now traverse the list in a safe context to recover the db state*/
838 recover_lost_dbs(ctx, &qp_list);
839
840 /* we're almost done! deref the qps and clean up */
841 deref_qps(qp_list);
842 kfree(qp_list.qps);
843
844 /* Wait for the dbfifo to completely drain again */
845 while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
846 set_current_state(TASK_UNINTERRUPTIBLE);
847 schedule_timeout(usecs_to_jiffies(10));
848 }
849
850 /* resume the queues */
851 spin_lock_irq(&ctx->dev->lock);
852 if (ctx->dev->qpcnt > db_fc_threshold)
853 ctx->dev->db_state = FLOW_CONTROL;
854 else {
855 ctx->dev->db_state = NORMAL;
856 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
857 }
858 ctx->dev->rdev.stats.db_state_transitions++;
859 spin_unlock_irq(&ctx->dev->lock);
860
861out:
862 /* start up kernel db ringers again */
863 mutex_unlock(&ctx->dev->db_mutex);
699} 864}
700 865
701static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) 866static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
@@ -716,8 +881,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
716 mutex_unlock(&ctx->dev->rdev.stats.lock); 881 mutex_unlock(&ctx->dev->rdev.stats.lock);
717 break; 882 break;
718 case CXGB4_CONTROL_DB_DROP: 883 case CXGB4_CONTROL_DB_DROP:
719 printk(KERN_WARNING MOD "%s: Fatal DB DROP\n", 884 recover_queues(ctx);
720 pci_name(ctx->lldi.pdev));
721 mutex_lock(&ctx->dev->rdev.stats.lock); 885 mutex_lock(&ctx->dev->rdev.stats.lock);
722 ctx->dev->rdev.stats.db_drop++; 886 ctx->dev->rdev.stats.db_drop++;
723 mutex_unlock(&ctx->dev->rdev.stats.lock); 887 mutex_unlock(&ctx->dev->rdev.stats.lock);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index e8b88a02cc77..6818659f2617 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -120,6 +120,7 @@ struct c4iw_stats {
120 u64 db_full; 120 u64 db_full;
121 u64 db_empty; 121 u64 db_empty;
122 u64 db_drop; 122 u64 db_drop;
123 u64 db_state_transitions;
123}; 124};
124 125
125struct c4iw_rdev { 126struct c4iw_rdev {
@@ -212,6 +213,7 @@ struct c4iw_dev {
212 struct mutex db_mutex; 213 struct mutex db_mutex;
213 struct dentry *debugfs_root; 214 struct dentry *debugfs_root;
214 enum db_state db_state; 215 enum db_state db_state;
216 int qpcnt;
215}; 217};
216 218
217static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) 219static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -271,11 +273,25 @@ static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
271 return _insert_handle(rhp, idr, handle, id, 0); 273 return _insert_handle(rhp, idr, handle, id, 0);
272} 274}
273 275
274static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) 276static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr,
277 u32 id, int lock)
275{ 278{
276 spin_lock_irq(&rhp->lock); 279 if (lock)
280 spin_lock_irq(&rhp->lock);
277 idr_remove(idr, id); 281 idr_remove(idr, id);
278 spin_unlock_irq(&rhp->lock); 282 if (lock)
283 spin_unlock_irq(&rhp->lock);
284}
285
286static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
287{
288 _remove_handle(rhp, idr, id, 1);
289}
290
291static inline void remove_handle_nolock(struct c4iw_dev *rhp,
292 struct idr *idr, u32 id)
293{
294 _remove_handle(rhp, idr, id, 0);
279} 295}
280 296
281struct c4iw_pd { 297struct c4iw_pd {
@@ -843,5 +859,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe);
843extern struct cxgb4_client t4c_client; 859extern struct cxgb4_client t4c_client;
844extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; 860extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
845extern int c4iw_max_read_depth; 861extern int c4iw_max_read_depth;
862extern int db_fc_threshold;
863
846 864
847#endif 865#endif
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index beec66758aec..ba1343ee1414 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -42,6 +42,11 @@ static int ocqp_support = 1;
42module_param(ocqp_support, int, 0644); 42module_param(ocqp_support, int, 0644);
43MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); 43MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
44 44
45int db_fc_threshold = 2000;
46module_param(db_fc_threshold, int, 0644);
47MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic "
48 "db flow control mode (default = 2000)");
49
45static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) 50static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
46{ 51{
47 unsigned long flag; 52 unsigned long flag;
@@ -1143,13 +1148,19 @@ static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
1143 1148
1144 mutex_lock(&qhp->rhp->db_mutex); 1149 mutex_lock(&qhp->rhp->db_mutex);
1145 do { 1150 do {
1146 if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < 768) { 1151
1152 /*
1153 * The interrupt threshold is dbfifo_int_thresh << 6. So
1154 * make sure we don't cross that and generate an interrupt.
1155 */
1156 if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) <
1157 (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) {
1147 writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db); 1158 writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db);
1148 break; 1159 break;
1149 } 1160 }
1150 set_current_state(TASK_UNINTERRUPTIBLE); 1161 set_current_state(TASK_UNINTERRUPTIBLE);
1151 schedule_timeout(usecs_to_jiffies(delay)); 1162 schedule_timeout(usecs_to_jiffies(delay));
1152 delay = min(delay << 1, 200000); 1163 delay = min(delay << 1, 2000);
1153 } while (1); 1164 } while (1);
1154 mutex_unlock(&qhp->rhp->db_mutex); 1165 mutex_unlock(&qhp->rhp->db_mutex);
1155 return 0; 1166 return 0;
@@ -1388,6 +1399,14 @@ out:
1388 return ret; 1399 return ret;
1389} 1400}
1390 1401
1402static int enable_qp_db(int id, void *p, void *data)
1403{
1404 struct c4iw_qp *qp = p;
1405
1406 t4_enable_wq_db(&qp->wq);
1407 return 0;
1408}
1409
1391int c4iw_destroy_qp(struct ib_qp *ib_qp) 1410int c4iw_destroy_qp(struct ib_qp *ib_qp)
1392{ 1411{
1393 struct c4iw_dev *rhp; 1412 struct c4iw_dev *rhp;
@@ -1405,7 +1424,16 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
1405 c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); 1424 c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1406 wait_event(qhp->wait, !qhp->ep); 1425 wait_event(qhp->wait, !qhp->ep);
1407 1426
1408 remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); 1427 spin_lock_irq(&rhp->lock);
1428 remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid);
1429 rhp->qpcnt--;
1430 BUG_ON(rhp->qpcnt < 0);
1431 if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) {
1432 rhp->rdev.stats.db_state_transitions++;
1433 rhp->db_state = NORMAL;
1434 idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
1435 }
1436 spin_unlock_irq(&rhp->lock);
1409 atomic_dec(&qhp->refcnt); 1437 atomic_dec(&qhp->refcnt);
1410 wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); 1438 wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
1411 1439
@@ -1419,6 +1447,14 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
1419 return 0; 1447 return 0;
1420} 1448}
1421 1449
1450static int disable_qp_db(int id, void *p, void *data)
1451{
1452 struct c4iw_qp *qp = p;
1453
1454 t4_disable_wq_db(&qp->wq);
1455 return 0;
1456}
1457
1422struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, 1458struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1423 struct ib_udata *udata) 1459 struct ib_udata *udata)
1424{ 1460{
@@ -1508,6 +1544,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1508 spin_lock_irq(&rhp->lock); 1544 spin_lock_irq(&rhp->lock);
1509 if (rhp->db_state != NORMAL) 1545 if (rhp->db_state != NORMAL)
1510 t4_disable_wq_db(&qhp->wq); 1546 t4_disable_wq_db(&qhp->wq);
1547 if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
1548 rhp->rdev.stats.db_state_transitions++;
1549 rhp->db_state = FLOW_CONTROL;
1550 idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
1551 }
1511 ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); 1552 ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
1512 spin_unlock_irq(&rhp->lock); 1553 spin_unlock_irq(&rhp->lock);
1513 if (ret) 1554 if (ret)
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index c0221eec8817..16f26ab29302 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -62,6 +62,10 @@ struct t4_status_page {
62 __be16 pidx; 62 __be16 pidx;
63 u8 qp_err; /* flit 1 - sw owns */ 63 u8 qp_err; /* flit 1 - sw owns */
64 u8 db_off; 64 u8 db_off;
65 u8 pad;
66 u16 host_wq_pidx;
67 u16 host_cidx;
68 u16 host_pidx;
65}; 69};
66 70
67#define T4_EQ_ENTRY_SIZE 64 71#define T4_EQ_ENTRY_SIZE 64
@@ -375,6 +379,16 @@ static inline void t4_rq_consume(struct t4_wq *wq)
375 wq->rq.cidx = 0; 379 wq->rq.cidx = 0;
376} 380}
377 381
382static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq)
383{
384 return wq->rq.queue[wq->rq.size].status.host_wq_pidx;
385}
386
387static inline u16 t4_rq_wq_size(struct t4_wq *wq)
388{
389 return wq->rq.size * T4_RQ_NUM_SLOTS;
390}
391
378static inline int t4_sq_onchip(struct t4_sq *sq) 392static inline int t4_sq_onchip(struct t4_sq *sq)
379{ 393{
380 return sq->flags & T4_SQ_ONCHIP; 394 return sq->flags & T4_SQ_ONCHIP;
@@ -412,6 +426,16 @@ static inline void t4_sq_consume(struct t4_wq *wq)
412 wq->sq.cidx = 0; 426 wq->sq.cidx = 0;
413} 427}
414 428
429static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq)
430{
431 return wq->sq.queue[wq->sq.size].status.host_wq_pidx;
432}
433
434static inline u16 t4_sq_wq_size(struct t4_wq *wq)
435{
436 return wq->sq.size * T4_SQ_NUM_SLOTS;
437}
438
415static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) 439static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
416{ 440{
417 wmb(); 441 wmb();