From 6a46965eb3b7b657c089142579ab20d6efefc0fc Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Wed, 6 Jun 2018 20:46:03 -0700
Subject: gpu: nvgpu: correct calculation of sm_id for .record_sm_error_state

Starting with Volta, one TPC could have more than 1 SMs. So
.record_sm_error_state needs to have sm number as parameter.
Logic tpc id should be read from gr_gpc0_gpm_pd_sm_id_r.

Let the function return logical sm_id. RM server will need it to nofify
client.

Jira EVLR-2643
Bug 200405202

Change-Id: Iffaff05b89b1c5058616b8a6bf50dd73bd4e52f6
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1742165
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 18 ++++++++----------
 drivers/gpu/nvgpu/gv11b/gr_gv11b.h |  2 +-
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu/nvgpu/gv11b')

diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 9bd48fdc..f57be9dd 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -3263,24 +3263,22 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
 	return err;
 }
 
-int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc,
+int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 				struct channel_gk20a *fault_ch)
 {
 	int sm_id;
 	struct gr_gk20a *gr = &g->gr;
-	u32 offset, sm, sm_per_tpc;
-	u32 gpc_tpc_offset;
+	u32 offset, sm_per_tpc, tpc_id;
+	u32 gpc_offset, gpc_tpc_offset;
 
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
-	gpc_tpc_offset = gk20a_gr_gpc_offset(g, gpc) +
-				 gk20a_gr_tpc_offset(g, tpc);
+	gpc_offset = gk20a_gr_gpc_offset(g, gpc);
+	gpc_tpc_offset = gpc_offset + gk20a_gr_tpc_offset(g, tpc);
 
-	sm_id = gr_gpc0_tpc0_sm_cfg_tpc_id_v(gk20a_readl(g,
-			gr_gpc0_tpc0_sm_cfg_r() + gpc_tpc_offset));
-
-	sm = sm_id % sm_per_tpc;
+	tpc_id = gk20a_readl(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset);
+	sm_id = tpc_id * sm_per_tpc + sm;
 
 	offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm);
 
@@ -3301,7 +3299,7 @@ int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc,
 
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
-	return 0;
+	return sm_id;
 }
 
 void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index b4a7e411..f6f05a3b 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -171,7 +171,7 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
 		struct nvgpu_gr_sm_error_state *sm_error_state);
 int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
 	struct channel_gk20a *ch, u64 sms, bool enable);
-int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc,
+int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		struct channel_gk20a *fault_ch);
 void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g);
 bool gv11b_gr_sm_debugger_attached(struct gk20a *g);
-- 
cgit v1.2.2