RDMA/cxgb4: DB Drop Recovery for RDMA and LLD queues

Add module option db_fc_threshold which is the count of active QPs that trigger automatic db flow control mode. Automatically transition to/from flow control mode when the active qp count crosses db_fc_theshold. Add more db debugfs stats On DB DROP event from the LLD, recover all the iwarp queues. Signed-off-by: Vipul Pandya <vipul@chelsio.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
author: Vipul Pandya <vipul@chelsio.com> 2012-05-18 05:59:30 -0400
committer: Roland Dreier <roland@purestorage.com> 2012-05-18 16:22:33 -0400
commit: 422eea0a8cf658bc9564726d74e8384b89a8f4fa (patch)
tree: 6c2ad0bf5112cfef2cd5bb025468cc3318d337d0 /drivers/infiniband/hw/cxgb4/device.c
parent: 4984037bef54253d4d010d3e57f175ab694bee26 (diff)
1 files changed, 170 insertions, 6 deletions
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 9062ed90ea93..bdb398f54a64 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -246,6 +246,8 @@ static const struct file_operations stag_debugfs_fops = {
        .llseek  = default_llseek,
 };
+static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
 static int stats_show(struct seq_file *seq, void *v)
 {
        struct c4iw_dev *dev = seq->private;
@@ -272,6 +274,9 @@ static int stats_show(struct seq_file *seq, void *v)
        seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
        seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
        seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
+        seq_printf(seq, " DB State: %s Transitions %llu\n",
+                   db_state_str[dev->db_state],
+                   dev->rdev.stats.db_state_transitions);
        return 0;
 }
@@ -295,6 +300,7 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
        dev->rdev.stats.db_full = 0;
        dev->rdev.stats.db_empty = 0;
        dev->rdev.stats.db_drop = 0;
+        dev->rdev.stats.db_state_transitions = 0;
        mutex_unlock(&dev->rdev.stats.lock);
        return count;
 }
@@ -677,8 +683,11 @@ static int disable_qp_db(int id, void *p, void *data)
 static void stop_queues(struct uld_ctx *ctx)
 {
        spin_lock_irq(&ctx->dev->lock);
-        ctx->dev->db_state = FLOW_CONTROL;
+        if (ctx->dev->db_state == NORMAL) {
-        idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
+                ctx->dev->rdev.stats.db_state_transitions++;
+                ctx->dev->db_state = FLOW_CONTROL;
+                idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
+        }
        spin_unlock_irq(&ctx->dev->lock);
 }
@@ -693,9 +702,165 @@ static int enable_qp_db(int id, void *p, void *data)
 static void resume_queues(struct uld_ctx *ctx)
 {
        spin_lock_irq(&ctx->dev->lock);
-        ctx->dev->db_state = NORMAL;
+        if (ctx->dev->qpcnt <= db_fc_threshold &&
-        idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+            ctx->dev->db_state == FLOW_CONTROL) {
+                ctx->dev->db_state = NORMAL;
+                ctx->dev->rdev.stats.db_state_transitions++;
+                idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+        }
+        spin_unlock_irq(&ctx->dev->lock);
+}
+struct qp_list {
+        unsigned idx;
+        struct c4iw_qp **qps;
+};
+static int add_and_ref_qp(int id, void *p, void *data)
+{
+        struct qp_list *qp_listp = data;
+        struct c4iw_qp *qp = p;
+        c4iw_qp_add_ref(&qp->ibqp);
+        qp_listp->qps[qp_listp->idx++] = qp;
+        return 0;
+}
+static int count_qps(int id, void *p, void *data)
+{
+        unsigned *countp = data;
+        (*countp)++;
+        return 0;
+}
+static void deref_qps(struct qp_list qp_list)
+{
+        int idx;
+        for (idx = 0; idx < qp_list.idx; idx++)
+                c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
+}
+static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
+{
+        int idx;
+        int ret;
+        for (idx = 0; idx < qp_list->idx; idx++) {
+                struct c4iw_qp *qp = qp_list->qps[idx];
+                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
+                                          qp->wq.sq.qid,
+                                          t4_sq_host_wq_pidx(&qp->wq),
+                                          t4_sq_wq_size(&qp->wq));
+                if (ret) {
+                        printk(KERN_ERR MOD "%s: Fatal error - "
+                               "DB overflow recovery failed - "
+                               "error syncing SQ qid %u\n",
+                               pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
+                        return;
+                }
+                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
+                                          qp->wq.rq.qid,
+                                          t4_rq_host_wq_pidx(&qp->wq),
+                                          t4_rq_wq_size(&qp->wq));
+                if (ret) {
+                        printk(KERN_ERR MOD "%s: Fatal error - "
+                               "DB overflow recovery failed - "
+                               "error syncing RQ qid %u\n",
+                               pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
+                        return;
+                }
+                /* Wait for the dbfifo to drain */
+                while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
+                        set_current_state(TASK_UNINTERRUPTIBLE);
+                        schedule_timeout(usecs_to_jiffies(10));
+                }
+        }
+}
+static void recover_queues(struct uld_ctx *ctx)
+{
+        int count = 0;
+        struct qp_list qp_list;
+        int ret;
+        /* lock out kernel db ringers */
+        mutex_lock(&ctx->dev->db_mutex);
+        /* put all queues in to recovery mode */
+        spin_lock_irq(&ctx->dev->lock);
+        ctx->dev->db_state = RECOVERY;
+        ctx->dev->rdev.stats.db_state_transitions++;
+        idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
+        spin_unlock_irq(&ctx->dev->lock);
+        /* slow everybody down */
+        set_current_state(TASK_UNINTERRUPTIBLE);
+        schedule_timeout(usecs_to_jiffies(1000));
+        /* Wait for the dbfifo to completely drain. */
+        while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
+                set_current_state(TASK_UNINTERRUPTIBLE);
+                schedule_timeout(usecs_to_jiffies(10));
+        }
+        /* flush the SGE contexts */
+        ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
+        if (ret) {
+                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+                       pci_name(ctx->lldi.pdev));
+                goto out;
+        }
+        /* Count active queues so we can build a list of queues to recover */
+        spin_lock_irq(&ctx->dev->lock);
+        idr_for_each(&ctx->dev->qpidr, count_qps, &count);
+        qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
+        if (!qp_list.qps) {
+                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+                       pci_name(ctx->lldi.pdev));
+                spin_unlock_irq(&ctx->dev->lock);
+                goto out;
+        }
+        qp_list.idx = 0;
+        /* add and ref each qp so it doesn't get freed */
+        idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
        spin_unlock_irq(&ctx->dev->lock);
+        /* now traverse the list in a safe context to recover the db state*/
+        recover_lost_dbs(ctx, &qp_list);
+        /* we're almost done!  deref the qps and clean up */
+        deref_qps(qp_list);
+        kfree(qp_list.qps);
+        /* Wait for the dbfifo to completely drain again */
+        while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
+                set_current_state(TASK_UNINTERRUPTIBLE);
+                schedule_timeout(usecs_to_jiffies(10));
+        }
+        /* resume the queues */
+        spin_lock_irq(&ctx->dev->lock);
+        if (ctx->dev->qpcnt > db_fc_threshold)
+                ctx->dev->db_state = FLOW_CONTROL;
+        else {
+                ctx->dev->db_state = NORMAL;
+                idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+        }
+        ctx->dev->rdev.stats.db_state_transitions++;
+        spin_unlock_irq(&ctx->dev->lock);
+out:
+        /* start up kernel db ringers again */
+        mutex_unlock(&ctx->dev->db_mutex);
 }
 static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
@@ -716,8 +881,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
                mutex_unlock(&ctx->dev->rdev.stats.lock);
                break;
        case CXGB4_CONTROL_DB_DROP:
-                printk(KERN_WARNING MOD "%s: Fatal DB DROP\n",
+                recover_queues(ctx);
-                       pci_name(ctx->lldi.pdev));
                mutex_lock(&ctx->dev->rdev.stats.lock);
                ctx->dev->rdev.stats.db_drop++;
                mutex_unlock(&ctx->dev->rdev.stats.lock);
author	Vipul Pandya <vipul@chelsio.com>	2012-05-18 05:59:30 -0400
committer	Roland Dreier <roland@purestorage.com>	2012-05-18 16:22:33 -0400
commit	422eea0a8cf658bc9564726d74e8384b89a8f4fa (patch)
tree	6c2ad0bf5112cfef2cd5bb025468cc3318d337d0 /drivers/infiniband/hw/cxgb4/device.c
parent	4984037bef54253d4d010d3e57f175ab694bee26 (diff)

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 9062ed90ea93..bdb398f54a64 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -246,6 +246,8 @@ static const struct file_operations stag_debugfs_fops = {
246	.llseek = default_llseek,	246	.llseek = default_llseek,
247	};	247	};
248		248
		249	static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
		250
249	static int stats_show(struct seq_file seq, void v)	251	static int stats_show(struct seq_file seq, void v)
250	{	252	{
251	struct c4iw_dev *dev = seq->private;	253	struct c4iw_dev *dev = seq->private;
@@ -272,6 +274,9 @@ static int stats_show(struct seq_file seq, void v)
272	seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);	274	seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
273	seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);	275	seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
274	seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);	276	seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
		277	seq_printf(seq, " DB State: %s Transitions %llu\n",
		278	db_state_str[dev->db_state],
		279	dev->rdev.stats.db_state_transitions);
275	return 0;	280	return 0;
276	}	281	}
277		282
@@ -295,6 +300,7 @@ static ssize_t stats_clear(struct file file, const char __user buf,
295	dev->rdev.stats.db_full = 0;	300	dev->rdev.stats.db_full = 0;
296	dev->rdev.stats.db_empty = 0;	301	dev->rdev.stats.db_empty = 0;
297	dev->rdev.stats.db_drop = 0;	302	dev->rdev.stats.db_drop = 0;
		303	dev->rdev.stats.db_state_transitions = 0;
298	mutex_unlock(&dev->rdev.stats.lock);	304	mutex_unlock(&dev->rdev.stats.lock);
299	return count;	305	return count;
300	}	306	}
@@ -677,8 +683,11 @@ static int disable_qp_db(int id, void p, void data)
677	static void stop_queues(struct uld_ctx *ctx)	683	static void stop_queues(struct uld_ctx *ctx)
678	{	684	{
679	spin_lock_irq(&ctx->dev->lock);	685	spin_lock_irq(&ctx->dev->lock);
680	ctx->dev->db_state = FLOW_CONTROL;	686	if (ctx->dev->db_state == NORMAL) {
681	idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);	687	ctx->dev->rdev.stats.db_state_transitions++;
		688	ctx->dev->db_state = FLOW_CONTROL;
		689	idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
		690	}
682	spin_unlock_irq(&ctx->dev->lock);	691	spin_unlock_irq(&ctx->dev->lock);
683	}	692	}
684		693
@@ -693,9 +702,165 @@ static int enable_qp_db(int id, void p, void data)
693	static void resume_queues(struct uld_ctx *ctx)	702	static void resume_queues(struct uld_ctx *ctx)
694	{	703	{
695	spin_lock_irq(&ctx->dev->lock);	704	spin_lock_irq(&ctx->dev->lock);
696	ctx->dev->db_state = NORMAL;	705	if (ctx->dev->qpcnt <= db_fc_threshold &&
697	idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);	706	ctx->dev->db_state == FLOW_CONTROL) {
		707	ctx->dev->db_state = NORMAL;
		708	ctx->dev->rdev.stats.db_state_transitions++;
		709	idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
		710	}
		711	spin_unlock_irq(&ctx->dev->lock);
		712	}
		713
		714	struct qp_list {
		715	unsigned idx;
		716	struct c4iw_qp **qps;
		717	};
		718
		719	static int add_and_ref_qp(int id, void p, void data)
		720	{
		721	struct qp_list *qp_listp = data;
		722	struct c4iw_qp *qp = p;
		723
		724	c4iw_qp_add_ref(&qp->ibqp);
		725	qp_listp->qps[qp_listp->idx++] = qp;
		726	return 0;
		727	}
		728
		729	static int count_qps(int id, void p, void data)
		730	{
		731	unsigned *countp = data;
		732	(*countp)++;
		733	return 0;
		734	}
		735
		736	static void deref_qps(struct qp_list qp_list)
		737	{
		738	int idx;
		739
		740	for (idx = 0; idx < qp_list.idx; idx++)
		741	c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
		742	}
		743
		744	static void recover_lost_dbs(struct uld_ctx ctx, struct qp_list qp_list)
		745	{
		746	int idx;
		747	int ret;
		748
		749	for (idx = 0; idx < qp_list->idx; idx++) {
		750	struct c4iw_qp *qp = qp_list->qps[idx];
		751
		752	ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
		753	qp->wq.sq.qid,
		754	t4_sq_host_wq_pidx(&qp->wq),
		755	t4_sq_wq_size(&qp->wq));
		756	if (ret) {
		757	printk(KERN_ERR MOD "%s: Fatal error - "
		758	"DB overflow recovery failed - "
		759	"error syncing SQ qid %u\n",
		760	pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
		761	return;
		762	}
		763
		764	ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
		765	qp->wq.rq.qid,
		766	t4_rq_host_wq_pidx(&qp->wq),
		767	t4_rq_wq_size(&qp->wq));
		768
		769	if (ret) {
		770	printk(KERN_ERR MOD "%s: Fatal error - "
		771	"DB overflow recovery failed - "
		772	"error syncing RQ qid %u\n",
		773	pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
		774	return;
		775	}
		776
		777	/* Wait for the dbfifo to drain */
		778	while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
		779	set_current_state(TASK_UNINTERRUPTIBLE);
		780	schedule_timeout(usecs_to_jiffies(10));
		781	}
		782	}
		783	}
		784
		785	static void recover_queues(struct uld_ctx *ctx)
		786	{
		787	int count = 0;
		788	struct qp_list qp_list;
		789	int ret;
		790
		791	/* lock out kernel db ringers */
		792	mutex_lock(&ctx->dev->db_mutex);
		793
		794	/* put all queues in to recovery mode */
		795	spin_lock_irq(&ctx->dev->lock);
		796	ctx->dev->db_state = RECOVERY;
		797	ctx->dev->rdev.stats.db_state_transitions++;
		798	idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
		799	spin_unlock_irq(&ctx->dev->lock);
		800
		801	/* slow everybody down */
		802	set_current_state(TASK_UNINTERRUPTIBLE);
		803	schedule_timeout(usecs_to_jiffies(1000));
		804
		805	/* Wait for the dbfifo to completely drain. */
		806	while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
		807	set_current_state(TASK_UNINTERRUPTIBLE);
		808	schedule_timeout(usecs_to_jiffies(10));
		809	}
		810
		811	/* flush the SGE contexts */
		812	ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
		813	if (ret) {
		814	printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
		815	pci_name(ctx->lldi.pdev));
		816	goto out;
		817	}
		818
		819	/* Count active queues so we can build a list of queues to recover */
		820	spin_lock_irq(&ctx->dev->lock);
		821	idr_for_each(&ctx->dev->qpidr, count_qps, &count);
		822
		823	qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
		824	if (!qp_list.qps) {
		825	printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
		826	pci_name(ctx->lldi.pdev));
		827	spin_unlock_irq(&ctx->dev->lock);
		828	goto out;
		829	}
		830	qp_list.idx = 0;
		831
		832	/* add and ref each qp so it doesn't get freed */
		833	idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
		834
698	spin_unlock_irq(&ctx->dev->lock);	835	spin_unlock_irq(&ctx->dev->lock);
		836
		837	/* now traverse the list in a safe context to recover the db state*/
		838	recover_lost_dbs(ctx, &qp_list);
		839
		840	/* we're almost done! deref the qps and clean up */
		841	deref_qps(qp_list);
		842	kfree(qp_list.qps);
		843
		844	/* Wait for the dbfifo to completely drain again */
		845	while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
		846	set_current_state(TASK_UNINTERRUPTIBLE);
		847	schedule_timeout(usecs_to_jiffies(10));
		848	}
		849
		850	/* resume the queues */
		851	spin_lock_irq(&ctx->dev->lock);
		852	if (ctx->dev->qpcnt > db_fc_threshold)
		853	ctx->dev->db_state = FLOW_CONTROL;
		854	else {
		855	ctx->dev->db_state = NORMAL;
		856	idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
		857	}
		858	ctx->dev->rdev.stats.db_state_transitions++;
		859	spin_unlock_irq(&ctx->dev->lock);
		860
		861	out:
		862	/* start up kernel db ringers again */
		863	mutex_unlock(&ctx->dev->db_mutex);
699	}	864	}
700		865
701	static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)	866	static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
@@ -716,8 +881,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
716	mutex_unlock(&ctx->dev->rdev.stats.lock);	881	mutex_unlock(&ctx->dev->rdev.stats.lock);
717	break;	882	break;
718	case CXGB4_CONTROL_DB_DROP:	883	case CXGB4_CONTROL_DB_DROP:
719	printk(KERN_WARNING MOD "%s: Fatal DB DROP\n",	884	recover_queues(ctx);
720	pci_name(ctx->lldi.pdev));
721	mutex_lock(&ctx->dev->rdev.stats.lock);	885	mutex_lock(&ctx->dev->rdev.stats.lock);
722	ctx->dev->rdev.stats.db_drop++;	886	ctx->dev->rdev.stats.db_drop++;
723	mutex_unlock(&ctx->dev->rdev.stats.lock);	887	mutex_unlock(&ctx->dev->rdev.stats.lock);