From 5e440e63d67058834b17e4cd28d3e5c9e9b8c6e2 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 7 Jan 2019 15:39:09 +0200
Subject: gpu: nvgpu: abstract out timeout rewinding

The channel timeout ends up in a strange state during timeout handling
for a brief moment; it can become stopped and started again, and the
timeout lock is released in the middle. Add a more explicit rewind
function to reset the timeout to start if it's active. The active check
allows to use this from gk20a_channel_timeout_restart_all_channels(), so
that's also modified.

Also replace the return statements with more readable control flow in
gk20a_channel_timeout_handler().

Bug 200484795

Change-Id: Ia7d67242dfc149ace1f4f841a837e90b6c985308
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1989327
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
(cherry picked from commit 8979a97af3dbb65904f9db8a22a6a168a3f41447
in dev-kernel)
Reviewed-on: https://git-master.nvidia.com/r/2017922
Reviewed-by: Debarshi Dutta <ddutta@nvidia.com>
Tested-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/channel.c | 56 +++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 23 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 0174e369..fc82748b 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1473,6 +1473,25 @@ static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
 	nvgpu_raw_spinlock_release(&ch->timeout.lock);
 }
 
+/**
+ * Reset the counter of a timeout that is in effect.
+ *
+ * If this channel has an active timeout, act as if something happened on the
+ * channel right now.
+ *
+ * Rewinding a stopped counter is irrelevant; this is a no-op for non-running
+ * timeouts. Stopped timeouts can only be started (which is technically a
+ * rewind too) or continued (where the stop is actually pause).
+ */
+static void gk20a_channel_timeout_rewind(struct channel_gk20a *ch)
+{
+	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
+	if (ch->timeout.running) {
+		__gk20a_channel_timeout_start(ch);
+	}
+	nvgpu_raw_spinlock_release(&ch->timeout.lock);
+}
+
 /**
  * Rewind the timeout on each non-dormant channel.
  *
@@ -1491,11 +1510,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
 
 		if (ch != NULL) {
 			if (!gk20a_channel_check_timedout(ch)) {
-				nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
-				if (ch->timeout.running) {
-					__gk20a_channel_timeout_start(ch);
-				}
-				nvgpu_raw_spinlock_release(&ch->timeout.lock);
+				gk20a_channel_timeout_rewind(ch);
 			}
 			gk20a_channel_put(ch);
 		}
@@ -1538,28 +1553,23 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
 	new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch);
 
 	if (new_gp_get != gp_get || new_pb_get != pb_get) {
-		/* Channel has advanced, rewind timer */
-		gk20a_channel_timeout_stop(ch);
-		gk20a_channel_timeout_start(ch);
-		return;
-	}
-
-	if (nvgpu_timeout_peek_expired(&ch->timeout.timer) == 0) {
+		/* Channel has advanced, timer keeps going but resets */
+		gk20a_channel_timeout_rewind(ch);
+	} else if (nvgpu_timeout_peek_expired(&ch->timeout.timer) == 0) {
 		/* Seems stuck but waiting to time out */
-		return;
-	}
+	} else {
+		nvgpu_err(g, "Job on channel %d timed out",
+			  ch->chid);
 
-	nvgpu_err(g, "Job on channel %d timed out",
-		  ch->chid);
+		/* force reset calls gk20a_debug_dump but not this */
+		if (ch->timeout.debug_dump) {
+			gk20a_gr_debug_dump(g);
+		}
 
-	/* force reset calls gk20a_debug_dump but not this */
-	if (ch->timeout.debug_dump) {
-		gk20a_gr_debug_dump(g);
+		g->ops.fifo.force_reset_ch(ch,
+			NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
+			ch->timeout.debug_dump);
 	}
-
-	g->ops.fifo.force_reset_ch(ch,
-		NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
-		ch->timeout.debug_dump);
 }
 
 /**
-- 
cgit v1.2.2