gpu: nvgpu: gv11b: preempt runlist if tsg preempt times out

-Even though tsg preempt timed out, teardown sequence would by design require s/w to issue another preempt. If recovery includes an ENGINE_RESET, to not have race conditions, use RUNLIST_PREEMPT to kick all work off, and cancel any context load which may be pending. This is also needed to make sure that all PBDMAs serving the engine are not loaded when engine is reset -Add max retries for pre-si platforms for runlist preempt done polling loop Bug 2125776 Bug 2108544 Bug 2105322 Bug 2092051 Bug 2048824 Bug 2043838 Bug 2039587 Bug 2028993 Bug 2029245 Bug 2065990 Bug 1945121 Bug 200401707 Bug 200393631 Bug 200327596 Change-Id: If9d1731fc17e7e7281b24a696ea0917cd269498c Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1709902 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Seema Khowala <seemaj@nvidia.com> 2018-05-07 18:08:56 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-06-24 12:53:40 -0400
commit: 5cf1eb145fef763f7153e449be60f1a7602e2c81 (patch)
tree: a1d27a3ef1273626485c5ca02bcdecf509ede21a /drivers
parent: a8d6f31bde3ccef22ee77023eaff4a62f6f88199 (diff)
1 files changed, 30 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 47c34d32..8f0f6b0c 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -755,10 +755,20 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
        struct nvgpu_timeout timeout;
        u32 delay = GR_IDLE_CHECK_DEFAULT;
        int ret = -EBUSY;
+        unsigned int loop_count = 0;
        nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
                           NVGPU_TIMER_CPU_TIMER);
        do {
+                if (!nvgpu_platform_is_silicon(g)) {
+                        if (loop_count >= MAX_PRE_SI_RETRIES) {
+                                nvgpu_err(g, "preempt runlist retries: %u",
+                                        loop_count);
+                                break;
+                        }
+                        loop_count++;
+                }
                if (!((gk20a_readl(g, fifo_runlist_preempt_r())) &
                                 runlists_mask)) {
                        ret = 0;
@@ -768,8 +778,11 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
                nvgpu_usleep_range(delay, delay * 2);
                delay = min_t(unsigned long,
                                delay << 1, GR_IDLE_CHECK_MAX);
-        } while (!nvgpu_timeout_expired_msg(&timeout,
+        } while (!nvgpu_timeout_expired(&timeout));
-                                 "runlist preempt timeout"));
+        if (ret)
+                nvgpu_err(g, "preempt runlist timeout, runlists_mask:0x%08x",
+                                runlists_mask);
        return ret;
 }
@@ -1068,6 +1081,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
        runlists_mask =  gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id,
                                         id_type, rc_type, mmfault);
+        /* Disable runlist scheduler */
        gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_DISABLED);
        g->fifo.deferred_reset_pending = false;
@@ -1111,9 +1125,21 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
        else if (refch)
                g->ops.fifo.disable_channel(refch);
-        /* Preempt tsg/ch */
+        /*
+         * Even though TSG preempt timed out, the RC sequence would by design
+         * require s/w to issue another preempt.
+         * If recovery includes an ENGINE_RESET, to not have race conditions,
+         * use RUNLIST_PREEMPT to kick all work off, and cancel any context
+         * load which may be pending. This is also needed to make sure
+         * that all PBDMAs serving the engine are not loaded when engine is
+         * reset.
+         */
        if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) {
-                g->ops.fifo.preempt_ch_tsg(g, id, id_type);
+                int preempt_failed;
+                preempt_failed = g->ops.fifo.preempt_ch_tsg(g, id, id_type);
+                if (preempt_failed)
+                        gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
        } else {
                gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
        }
author	Seema Khowala <seemaj@nvidia.com>	2018-05-07 18:08:56 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-06-24 12:53:40 -0400
commit	5cf1eb145fef763f7153e449be60f1a7602e2c81 (patch)
tree	a1d27a3ef1273626485c5ca02bcdecf509ede21a /drivers
parent	a8d6f31bde3ccef22ee77023eaff4a62f6f88199 (diff)

diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index 47c34d32..8f0f6b0c 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -755,10 +755,20 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
755	struct nvgpu_timeout timeout;	755	struct nvgpu_timeout timeout;
756	u32 delay = GR_IDLE_CHECK_DEFAULT;	756	u32 delay = GR_IDLE_CHECK_DEFAULT;
757	int ret = -EBUSY;	757	int ret = -EBUSY;
		758	unsigned int loop_count = 0;
758		759
759	nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),	760	nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
760	NVGPU_TIMER_CPU_TIMER);	761	NVGPU_TIMER_CPU_TIMER);
761	do {	762	do {
		763	if (!nvgpu_platform_is_silicon(g)) {
		764	if (loop_count >= MAX_PRE_SI_RETRIES) {
		765	nvgpu_err(g, "preempt runlist retries: %u",
		766	loop_count);
		767	break;
		768	}
		769	loop_count++;
		770	}
		771
762	if (!((gk20a_readl(g, fifo_runlist_preempt_r())) &	772	if (!((gk20a_readl(g, fifo_runlist_preempt_r())) &
763	runlists_mask)) {	773	runlists_mask)) {
764	ret = 0;	774	ret = 0;
@@ -768,8 +778,11 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
768	nvgpu_usleep_range(delay, delay * 2);	778	nvgpu_usleep_range(delay, delay * 2);
769	delay = min_t(unsigned long,	779	delay = min_t(unsigned long,
770	delay << 1, GR_IDLE_CHECK_MAX);	780	delay << 1, GR_IDLE_CHECK_MAX);
771	} while (!nvgpu_timeout_expired_msg(&timeout,	781	} while (!nvgpu_timeout_expired(&timeout));
772	"runlist preempt timeout"));	782
		783	if (ret)
		784	nvgpu_err(g, "preempt runlist timeout, runlists_mask:0x%08x",
		785	runlists_mask);
773	return ret;	786	return ret;
774	}	787	}
775		788
@@ -1068,6 +1081,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1068	runlists_mask = gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id,	1081	runlists_mask = gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id,
1069	id_type, rc_type, mmfault);	1082	id_type, rc_type, mmfault);
1070		1083
		1084	/* Disable runlist scheduler */
1071	gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_DISABLED);	1085	gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_DISABLED);
1072		1086
1073	g->fifo.deferred_reset_pending = false;	1087	g->fifo.deferred_reset_pending = false;
@@ -1111,9 +1125,21 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1111	else if (refch)	1125	else if (refch)
1112	g->ops.fifo.disable_channel(refch);	1126	g->ops.fifo.disable_channel(refch);
1113		1127
1114	/* Preempt tsg/ch */	1128	/*
		1129	* Even though TSG preempt timed out, the RC sequence would by design
		1130	* require s/w to issue another preempt.
		1131	* If recovery includes an ENGINE_RESET, to not have race conditions,
		1132	* use RUNLIST_PREEMPT to kick all work off, and cancel any context
		1133	* load which may be pending. This is also needed to make sure
		1134	* that all PBDMAs serving the engine are not loaded when engine is
		1135	* reset.
		1136	*/
1115	if (id_type == ID_TYPE_TSG \|\| id_type == ID_TYPE_CHANNEL) {	1137	if (id_type == ID_TYPE_TSG \|\| id_type == ID_TYPE_CHANNEL) {
1116	g->ops.fifo.preempt_ch_tsg(g, id, id_type);	1138	int preempt_failed;
		1139
		1140	preempt_failed = g->ops.fifo.preempt_ch_tsg(g, id, id_type);
		1141	if (preempt_failed)
		1142	gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
1117	} else {	1143	} else {
1118	gv11b_fifo_locked_preempt_runlists(g, runlists_mask);	1144	gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
1119	}	1145	}