From 9891cb117e538f1ea5d19171a3c88422cdce7162 Mon Sep 17 00:00:00 2001
From: Seema Khowala <seemaj@nvidia.com>
Date: Wed, 21 Jun 2017 21:28:10 -0700
Subject: gpu: nvgpu: add gr ops get_sm_hww_global_esr

Required for multiple SM support and t19x sm register
address changes

JIRA GPUT19X-75

Change-Id: I437095cb8f8d2ba31b85594a7609532991441a37
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master/r/1514040
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.h    |  2 +
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 87 +++++++++++++++++++-------------------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h |  1 +
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c |  1 +
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c |  3 +-
 5 files changed, 50 insertions(+), 44 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d45477fc..70b1ac5f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -293,6 +293,8 @@ struct gpu_ops {
 			bool *early_exit, bool *ignore_debugger);
 		u32 (*get_sm_hww_warp_esr)(struct gk20a *g,
 						u32 gpc, u32 tpc, u32 sm);
+		u32 (*get_sm_hww_global_esr)(struct gk20a *g,
+						u32 gpc, u32 tpc, u32 sm);
 		void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc,
 					 u32 *esr_sm_sel);
 		int (*handle_sm_exception)(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 6fe330c0..b714b2e2 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5525,9 +5525,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	int ret = 0;
 	bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
 	bool disable_sm_exceptions = true;
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	bool sm_debugger_attached;
+	u32 global_esr, warp_esr;
 
 	/* these three interrupts don't require locking down the SM. They can
 	 * be handled by usermode clients as they aren't fatal. Additionally,
@@ -5537,27 +5537,26 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	u32 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()   |
 			  gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
 			  gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
-	u32 global_esr, warp_esr;
-	bool sm_debugger_attached = g->ops.gr.sm_debugger_attached(g);
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
-	global_esr = gk20a_readl(g,
-				 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+	sm_debugger_attached = g->ops.gr.sm_debugger_attached(g);
+
+	global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm);
+	*hww_global_esr = global_esr;
 	warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
 
 	if (!sm_debugger_attached) {
-		nvgpu_err(g, "sm hww global %08x warp %08x",
+		nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x",
 			  global_esr, warp_esr);
 		return -EFAULT;
 	}
 
 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-		  "sm hww global %08x warp %08x", global_esr, warp_esr);
+		  "sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr);
 
 	gr_gk20a_elpg_protected_call(g,
 		g->ops.gr.record_sm_error_state(g, gpc, tpc));
-	*hww_global_esr = global_esr;
 
 	if (g->ops.gr.pre_process_sm_exception) {
 		ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, sm,
@@ -5615,7 +5614,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	}
 
 	if (ignore_debugger)
-		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "ignore_debugger set, skipping event posting");
+		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+			"ignore_debugger set, skipping event posting");
 	else
 		*post_event |= true;
 
@@ -5655,15 +5655,14 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
 		u32 *hww_global_esr)
 {
 	int ret = 0;
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
 	u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r()
 			+ offset);
 	u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
 
 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
 
+
 	/* check if an sm exeption is pending */
 	if (gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(tpc_exception) ==
 			gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v()) {
@@ -5685,6 +5684,11 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
 			ret = g->ops.gr.handle_sm_exception(g,
 				 gpc, tpc, sm, post_event, fault_ch,
 				hww_global_esr);
+			/* clear the hwws, also causes tpc and gpc
+			 * exceptions to be cleared
+			 */
+			gk20a_gr_clear_sm_hww(g, gpc, tpc, *hww_global_esr);
+
 		}
 
 	}
@@ -5704,12 +5708,10 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 		struct channel_gk20a *fault_ch, u32 *hww_global_esr)
 {
 	int ret = 0;
-	u32 gpc_offset, tpc_offset, gpc, tpc;
+	u32 gpc_offset, gpc, tpc;
 	struct gr_gk20a *gr = &g->gr;
 	u32 exception1 = gk20a_readl(g, gr_exception1_r());
-	u32 gpc_exception, global_esr;
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+	u32 gpc_exception;
 
 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
 
@@ -5720,7 +5722,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
 				"GPC%d exception pending", gpc);
 
-		gpc_offset = gpc_stride * gpc;
+		gpc_offset = gk20a_gr_gpc_offset(g, gpc);
 
 		gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r()
 				+ gpc_offset);
@@ -5734,18 +5736,9 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
 				  "GPC%d: TPC%d exception pending", gpc, tpc);
 
-			tpc_offset = tpc_in_gpc_stride * tpc;
-
-			global_esr = gk20a_readl(g,
-					gr_gpc0_tpc0_sm_hww_global_esr_r() +
-					gpc_offset + tpc_offset);
-
 			ret = gk20a_gr_handle_tpc_exception(g, gpc, tpc,
 					post_event, fault_ch, hww_global_esr);
 
-			/* clear the hwws, also causes tpc and gpc
-			 * exceptions to be cleared */
-			gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
 		}
 
 		/* Handle GCC exception */
@@ -7946,8 +7939,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
 
 	/* wait for the sm to lock down */
 	do {
-		u32 global_esr = gk20a_readl(g,
-				gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+		u32 global_esr = g->ops.gr.get_sm_hww_global_esr(g,
+						gpc, tpc, sm);
 		dbgr_status0 = gk20a_readl(g,
 				gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
 
@@ -8440,28 +8433,25 @@ int gr_gk20a_resume_from_pause(struct gk20a *g)
 int gr_gk20a_clear_sm_errors(struct gk20a *g)
 {
 	int ret = 0;
-	u32 gpc_offset, tpc_offset, gpc, tpc;
+	u32 gpc, tpc, sm;
 	struct gr_gk20a *gr = &g->gr;
 	u32 global_esr;
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+	u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
 
 	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
 
-		gpc_offset = gpc_stride * gpc;
-
 		/* check if any tpc has an exception */
 		for (tpc = 0; tpc < gr->tpc_count; tpc++) {
 
-			tpc_offset = tpc_in_gpc_stride * tpc;
+			for (sm = 0; sm < sm_per_tpc; sm++) {
+				global_esr = g->ops.gr.get_sm_hww_global_esr(g,
+							 gpc, tpc, sm);
 
-			global_esr = gk20a_readl(g,
-					gr_gpc0_tpc0_sm_hww_global_esr_r() +
-					gpc_offset + tpc_offset);
-
-			/* clear the hwws, also causes tpc and gpc
-			 * exceptions to be cleared */
-			gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
+				/* clearing hwws, also causes tpc and gpc
+				 * exceptions to be cleared
+				 */
+				gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
+			}
 		}
 	}
 
@@ -8498,3 +8488,14 @@ u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)
 			 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
 	return hww_warp_esr;
 }
+
+u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)
+{
+	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+
+	u32 hww_global_esr = gk20a_readl(g,
+				 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+
+	return hww_global_esr;
+}
+
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 924410c3..3dbf5697 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -694,6 +694,7 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
 		u32 *ctx_id);
 
 u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
+u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
 
 int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
 			  u32 expect_delay);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index f3ba11b7..3a1d579a 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1635,4 +1635,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.resume_single_sm = gk20a_gr_resume_single_sm;
 	gops->gr.resume_all_sms = gk20a_gr_resume_all_sms;
 	gops->gr.get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr;
+	gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 64ec5e1a..27d609d1 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1833,7 +1833,8 @@ static int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
 			}
 
 			/* reset the HWW errors after locking down */
-			global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+			global_esr_copy = g->ops.gr.get_sm_hww_global_esr(g,
+							gpc, tpc, sm);
 			gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy);
 			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
 					"CILP: HWWs cleared for gpc %d tpc %d\n",
-- 
cgit v1.2.2