From 0852c9f1aba1654e380ccdd13cd0540fbb5a8ab0 Mon Sep 17 00:00:00 2001
From: Seema Khowala <seemaj@nvidia.com>
Date: Thu, 22 Jun 2017 11:53:04 -0700
Subject: gpu: nvgpu: add sm lock_down gr ops

Add lock_down_sm and wait_for_sm_lock_down gr ops
Required to support multiple SM and t19x SM register
address changes

JIRA GPUT19X-75

Change-Id: I529babde51d9b2143fe3740a4f67c582b7eb404b
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master/r/1514042
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.h    |  4 +++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 55 +++++++++++++++++++-------------------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h |  8 +++---
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c |  2 ++
 4 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a0a67332..483cdd6e 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -296,6 +296,10 @@ struct gpu_ops {
 		u32 (*get_sm_hww_global_esr)(struct gk20a *g,
 						u32 gpc, u32 tpc, u32 sm);
 		u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g);
+		int  (*lock_down_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
+				u32 global_esr_mask, bool check_errors);
+		int  (*wait_for_sm_lock_down)(struct gk20a *g, u32 gpc, u32 tpc,
+				u32 sm, u32 global_esr_mask, bool check_errors);
 		void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc,
 					 u32 *esr_sm_sel);
 		int (*handle_sm_exception)(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d325f794..d2c77c93 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5467,16 +5467,14 @@ unlock:
 }
 
 int gk20a_gr_lock_down_sm(struct gk20a *g,
-				 u32 gpc, u32 tpc, u32 global_esr_mask,
-				 bool check_errors)
+			 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
+			 bool check_errors)
 {
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
 	u32 dbgr_control0;
 
 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-			"GPC%d TPC%d: locking down SM", gpc, tpc);
+			"GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);
 
 	/* assert stop trigger */
 	dbgr_control0 =
@@ -5485,7 +5483,7 @@ int gk20a_gr_lock_down_sm(struct gk20a *g,
 	gk20a_writel(g,
 		gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
 
-	return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask,
+	return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
 			check_errors);
 }
 
@@ -5598,7 +5596,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	}
 
 	if (do_warp_sync) {
-		ret = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, true);
+		ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
+				 global_mask, true);
 		if (ret) {
 			nvgpu_err(g, "sm did not lock down!");
 			return ret;
@@ -7906,25 +7905,21 @@ void gk20a_init_gr(struct gk20a *g)
 	nvgpu_cond_init(&g->gr.init_wq);
 }
 
-int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		u32 global_esr_mask, bool check_errors)
 {
 	bool locked_down;
 	bool no_error_pending;
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
 	bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-	u32 offset =
-		gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
 	u32 dbgr_status0 = 0, dbgr_control0 = 0;
 	u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
 	struct nvgpu_timeout timeout;
 	u32 warp_esr;
-	u32 sm = 0;
 
 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-		"GPC%d TPC%d: locking down SM", gpc, tpc);
+		"GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm);
 
 	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
 			   NVGPU_TIMER_CPU_TIMER);
@@ -7949,7 +7944,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
 
 		if (locked_down || no_error_pending) {
 			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-				  "GPC%d TPC%d: locked down SM", gpc, tpc);
+				  "GPC%d TPC%d SM%d: locked down SM",
+					gpc, tpc, sm);
 			return 0;
 		}
 
@@ -7959,7 +7955,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
 		     (g->ops.mm.mmu_fault_pending(g))) {
 			nvgpu_err(g,
 				"GPC%d TPC%d: mmu fault pending,"
-				" sm will never lock down!", gpc, tpc);
+				" SM%d will never lock down!", gpc, tpc, sm);
 			return -EFAULT;
 		}
 
@@ -8017,7 +8013,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
 	gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
 			dbgr_control0);
 
-	err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,
+	err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
 			global_esr_mask, check_errors);
 	if (err) {
 		nvgpu_err(g,
@@ -8030,9 +8026,10 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
 		u32 global_esr_mask, bool check_errors)
 {
 	struct gr_gk20a *gr = &g->gr;
-	u32 gpc, tpc;
+	u32 gpc, tpc, sm;
 	int err;
 	u32 dbgr_control0;
+	u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
 
 	/* if an SM debugger isn't attached, skip suspend */
 	if (!g->ops.gr.sm_debugger_attached(g)) {
@@ -8055,13 +8052,14 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
 
 	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
 		for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
-			err =
-			 gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,
+			for (sm = 0; sm < sm_per_tpc; sm++) {
+				err = g->ops.gr.wait_for_sm_lock_down(g,
+					gpc, tpc, sm,
 					global_esr_mask, check_errors);
-			if (err) {
-				nvgpu_err(g,
-					"SuspendAllSms failed");
-				return;
+				if (err) {
+					nvgpu_err(g, "SuspendAllSms failed");
+					return;
+				}
 			}
 		}
 	}
@@ -8371,7 +8369,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
 {
 	int err = 0;
 	struct gr_gk20a *gr = &g->gr;
-	u32 gpc, tpc, sm_id;
+	u32 gpc, tpc, sm, sm_id;
 	u32 global_mask;
 
 	/* Wait for the SMs to reach full stop. This condition is:
@@ -8386,9 +8384,10 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
 
 		gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
 		tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+		sm = g->gr.sm_to_cluster[sm_id].sm_index;
 
-		err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
-
+		err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
+				global_mask, false);
 		if (err) {
 			nvgpu_err(g, "sm did not lock down!");
 			return err;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 8230ba72..77db5cf6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -622,9 +622,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
 void gk20a_gr_suspend_all_sms(struct gk20a *g,
 		u32 global_esr_mask, bool check_errors);
 u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
-int gk20a_gr_lock_down_sm(struct gk20a *g,
-				 u32 gpc, u32 tpc, u32 global_esr_mask,
-				 bool check_errors);
 int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
 	struct channel_gk20a *ch, u64 sms, bool enable);
 bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
@@ -681,7 +678,10 @@ static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
 
 int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
 		struct gr_gk20a_isr_data *isr_data);
-int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+int gk20a_gr_lock_down_sm(struct gk20a *g,
+			 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
+			 bool check_errors);
+int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		u32 global_esr_mask, bool check_errors);
 void gk20a_gr_clear_sm_hww(struct gk20a *g,
 		u32 gpc, u32 tpc, u32 global_esr);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index fecd487f..333f0340 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1638,4 +1638,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
 	gops->gr.get_sm_no_lock_down_hww_global_esr_mask =
 			 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;
+	gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
+	gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
 }
-- 
cgit v1.2.2