From b8faddfe2ad3d52837b0f766d74feb8e6d6f4ce5 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Tue, 4 Aug 2015 17:14:54 +0530
Subject: gpu: nvgpu: fix runlist update timeout handling

bug 1625901
1) disable ELPG before doing GR reset when runlist update times out
2) add mutex for GR reset to avoid multiple threads resetting GR
3) protect GR reset with FECS mutex so that no one else submits methods

Change-Id: I02993fd1eabe6875ab1c58a40a06e6c79fcdeeae
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/793643
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 12 ++++++++++--
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c    | 31 ++++++++++++++++++++-----------
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h    |  1 +
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c      | 22 ++++++++++++++++------
 4 files changed, 47 insertions(+), 19 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index dae9c8cb..c18a4e5d 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -719,7 +719,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
 	struct vm_gk20a *ch_vm = ch->vm;
 	unsigned long timeout = gk20a_get_gr_idle_timeout(g);
 	struct dbg_session_gk20a *dbg_s;
-
+	bool was_reset;
 	gk20a_dbg_fn("");
 
 	WARN_ON(ch->g == NULL);
@@ -764,7 +764,15 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
 	if (g->fifo.deferred_reset_pending) {
 		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
 			   " deferred, running now");
-		gk20a_fifo_reset_engine(g, g->fifo.deferred_fault_engines);
+		was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
+		mutex_lock(&g->fifo.gr_reset_mutex);
+		/* if lock is already taken, a reset is taking place
+		so no need to repeat */
+		if (!was_reset) {
+			gk20a_fifo_reset_engine(g,
+				g->fifo.deferred_fault_engines);
+		}
+		mutex_unlock(&g->fifo.gr_reset_mutex);
 		g->fifo.deferred_fault_engines = 0;
 		g->fifo.deferred_reset_pending = false;
 	}
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 68c0ddcb..0bd75026 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -476,6 +476,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 	f->g = g;
 
 	mutex_init(&f->intr.isr.mutex);
+	mutex_init(&f->gr_reset_mutex);
 	gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */
 
 	f->num_channels = g->ops.fifo.get_num_fifos(g);
@@ -767,12 +768,15 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
 	gk20a_dbg_fn("");
 
 	if (engine_id == top_device_info_type_enum_graphics_v()) {
-		/*HALT_PIPELINE method, halt GR engine*/
-		if (gr_gk20a_halt_pipe(g))
-			gk20a_err(dev_from_gk20a(g), "failed to HALT gr pipe");
-		/* resetting engine using mc_enable_r() is not enough,
-		 * we do full init sequence */
-		gk20a_gr_reset(g);
+		if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
+			gk20a_pmu_disable_elpg(g);
+			/*HALT_PIPELINE method, halt GR engine*/
+			if (gr_gk20a_halt_pipe(g))
+				gk20a_err(dev_from_gk20a(g),
+					"failed to HALT gr pipe");
+			/* resetting engine using mc_enable_r() is not
+			enough, we do full init sequence */
+			gk20a_gr_reset(g);
 	}
 	if (engine_id == top_device_info_type_enum_copy0_v())
 		gk20a_reset(g, mc_enable_ce2_m());
@@ -950,6 +954,7 @@ static bool gk20a_fifo_handle_mmu_fault(
 		struct channel_gk20a *ch = NULL;
 		struct tsg_gk20a *tsg = NULL;
 		struct channel_gk20a *referenced_channel = NULL;
+		bool was_reset;
 		/* read and parse engine status */
 		u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
 		u32 ctx_status = fifo_engine_status_ctx_status_v(status);
@@ -1029,9 +1034,15 @@ static bool gk20a_fifo_handle_mmu_fault(
 
 			/* handled during channel free */
 			g->fifo.deferred_reset_pending = true;
-		} else if (engine_id != ~0)
-			gk20a_fifo_reset_engine(g, engine_id);
-
+		} else if (engine_id != ~0) {
+			was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
+			mutex_lock(&g->fifo.gr_reset_mutex);
+			/* if lock is already taken, a reset is taking place
+			so no need to repeat */
+			if (!was_reset)
+				gk20a_fifo_reset_engine(g, engine_id);
+			mutex_unlock(&g->fifo.gr_reset_mutex);
+		}
 		/* disable the channel/TSG from hw and increment
 		 * syncpoints */
 
@@ -2120,12 +2131,10 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 			gk20a_fifo_runlist_reset_engines(g, runlist_id);
 
 			/* engine reset needs the lock. drop it */
-			mutex_unlock(&runlist->mutex);
 			/* wait until the runlist is active again */
 			ret = gk20a_fifo_runlist_wait_pending(g, runlist_id);
 			/* get the lock back. at this point everything should
 			 * should be fine */
-			mutex_lock(&runlist->mutex);
 
 			if (ret)
 				gk20a_err(dev_from_gk20a(g),
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 7385f9be..3eb193f6 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -109,6 +109,7 @@ struct fifo_gk20a {
 	/* zero-kref'd channels here */
 	struct list_head free_chs;
 	struct mutex free_chs_mutex;
+	struct mutex gr_reset_mutex;
 
 	struct tsg_gk20a *tsg;
 	struct mutex tsg_inuse_mutex;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 512a7d6b..0ae44c6f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -4320,12 +4320,6 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g)
 	if (err)
 		goto out;
 
-	/* this appears query for sw states but fecs actually init
-	   ramchain, etc so this is hw init */
-	err = g->ops.gr.init_ctx_state(g);
-	if (err)
-		goto out;
-
 out:
 	if (err)
 		gk20a_err(dev_from_gk20a(g), "fail");
@@ -4553,6 +4547,12 @@ int gk20a_init_gr_support(struct gk20a *g)
 	if (err)
 		return err;
 
+	/* this appears query for sw states but fecs actually init
+	   ramchain, etc so this is hw init */
+	err = g->ops.gr.init_ctx_state(g);
+	if (err)
+		return err;
+
 	err = gk20a_init_gr_setup_sw(g);
 	if (err)
 		return err;
@@ -4776,6 +4776,8 @@ int gk20a_gr_reset(struct gk20a *g)
 	int err;
 	u32 size;
 
+	mutex_lock(&g->gr.fecs_mutex);
+
 	err = gk20a_enable_gr_hw(g);
 	if (err)
 		return err;
@@ -4788,6 +4790,14 @@ int gk20a_gr_reset(struct gk20a *g)
 	if (err)
 		return err;
 
+	mutex_unlock(&g->gr.fecs_mutex);
+
+	/* this appears query for sw states but fecs actually init
+	   ramchain, etc so this is hw init */
+	err = g->ops.gr.init_ctx_state(g);
+	if (err)
+		return err;
+
 	size = 0;
 	err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
 	if (err) {
-- 
cgit v1.2.2