From 4cbec6b2c7f74f309fb4bc5d4981c42ae6ea3bcd Mon Sep 17 00:00:00 2001
From: Seema Khowala <seemaj@nvidia.com>
Date: Tue, 26 Jun 2018 12:33:02 -0700
Subject: gpu: nvgpu: set preempt timeout

-For Si platforms, gk20a_get_gr_idle_timeout returns
 3000 ms i.e. 3 sec. Currently this time is used for
 preempt polling and this conflicts with channel
 timeout if polling times out. Use fifo_eng_timeout_us converted
 to ms for preempt polling.
-In case of preempt timeout, do not issue recovery
 for si platform. ctxsw timeout will trigger recovery
 if needed. For non si platforms, issue preempt timeout rc
 if preempt times out.

Bug 2113657
Bug 2064553
Bug 2038366
Bug 2028993
Bug 200426402

Change-Id: I8d9f58be9ac634e94defa92a20fb737bf256d841
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1762076
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 46 +++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 1bf38080..306f05a7 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -2743,6 +2743,17 @@ void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
 			fifo_preempt_type_channel_f());
 }
 
+static u32 gk20a_fifo_get_preempt_timeout(struct gk20a *g)
+{
+	/* Use fifo_eng_timeout converted to ms for preempt
+	 * polling. gr_idle_timeout i.e 3000 ms is and not appropriate
+	 * for polling preempt done as context switch timeout gets
+	 * triggered every 100 ms and context switch recovery
+	 * happens every 3000 ms */
+
+	return g->fifo_eng_timeout_us / 1000;
+}
+
 int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 		unsigned int id_type)
 {
@@ -2750,7 +2761,7 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
 	int ret = -EBUSY;
 
-	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
+	nvgpu_timeout_init(g, &timeout, gk20a_fifo_get_preempt_timeout(g),
 			   NVGPU_TIMER_CPU_TIMER);
 	do {
 		if (!(gk20a_readl(g, fifo_preempt_r()) &
@@ -2761,8 +2772,12 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
-	} while (!nvgpu_timeout_expired_msg(&timeout, "preempt timeout"));
+	} while (!nvgpu_timeout_expired(&timeout));
 
+	if (ret) {
+		nvgpu_err(g, "preempt timeout: id: %u id_type: %d ",
+			id, id_type);
+	}
 	return ret;
 }
 
@@ -2848,8 +2863,16 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, u32 chid)
 	for (i = 0; i < g->fifo.max_runlists; i++)
 		nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
 
-	if (ret)
-		gk20a_fifo_preempt_timeout_rc(g, chid, false);
+	if (ret) {
+		if (nvgpu_platform_is_silicon(g)) {
+			nvgpu_err(g, "preempt timed out for chid: %u, "
+			"ctxsw timeout will trigger recovery if needed", chid);
+		} else {
+			gk20a_fifo_preempt_timeout_rc(g, chid, false);
+		}
+	}
+
+
 
 	return ret;
 }
@@ -2880,8 +2903,14 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 	for (i = 0; i < g->fifo.max_runlists; i++)
 		nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
 
-	if (ret)
-		gk20a_fifo_preempt_timeout_rc(g, tsgid, true);
+	if (ret) {
+		if (nvgpu_platform_is_silicon(g)) {
+			nvgpu_err(g, "preempt timed out for tsgid: %u, "
+			"ctxsw timeout will trigger recovery if needed", tsgid);
+		} else {
+			gk20a_fifo_preempt_timeout_rc(g, tsgid, true);
+		}
+	}
 
 	return ret;
 }
@@ -3121,6 +3150,11 @@ int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
 		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
 	} while (!nvgpu_timeout_expired(&timeout));
 
+	if (ret) {
+		nvgpu_err(g, "runlist wait timeout: runlist id: %u",
+			runlist_id);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.2