gpu: nvgpu: correct calculation of sm_id for .record_sm_error_state

Starting with Volta, one TPC could have more than 1 SMs. So .record_sm_error_state needs to have sm number as parameter. Logic tpc id should be read from gr_gpc0_gpm_pd_sm_id_r. Let the function return logical sm_id. RM server will need it to nofify client. Jira EVLR-2643 Bug 200405202 Change-Id: Iffaff05b89b1c5058616b8a6bf50dd73bd4e52f6 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1742165 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Richard Zhao <rizhao@nvidia.com> 2018-06-06 23:46:03 -0400
committer: Tejal Kudav <tkudav@nvidia.com> 2018-06-14 09:44:08 -0400
commit: 6a46965eb3b7b657c089142579ab20d6efefc0fc (patch)
tree: 60aa4098d4b50af9db21e316098cfbe35c9a4797 /drivers/gpu/nvgpu/gv11b
parent: 7a5d498a711833990a9d8fc3f5d3f3e26bee301c (diff)
2 files changed, 9 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 9bd48fdc..f57be9dd 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -3263,24 +3263,22 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
        return err;
 }
-int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc,
+int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
                                struct channel_gk20a *fault_ch)
 {
        int sm_id;
        struct gr_gk20a *gr = &g->gr;
-        u32 offset, sm, sm_per_tpc;
+        u32 offset, sm_per_tpc, tpc_id;
-        u32 gpc_tpc_offset;
+        u32 gpc_offset, gpc_tpc_offset;
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
-        gpc_tpc_offset = gk20a_gr_gpc_offset(g, gpc) +
+        gpc_offset = gk20a_gr_gpc_offset(g, gpc);
-                                 gk20a_gr_tpc_offset(g, tpc);
+        gpc_tpc_offset = gpc_offset + gk20a_gr_tpc_offset(g, tpc);
-        sm_id = gr_gpc0_tpc0_sm_cfg_tpc_id_v(gk20a_readl(g,
+        tpc_id = gk20a_readl(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset);
-                        gr_gpc0_tpc0_sm_cfg_r() + gpc_tpc_offset));
+        sm_id = tpc_id * sm_per_tpc + sm;
-        sm = sm_id % sm_per_tpc;
        offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm);
@@ -3301,7 +3299,7 @@ int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc,
        nvgpu_mutex_release(&g->dbg_sessions_lock);
-        return 0;
+        return sm_id;
 }
 void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index b4a7e411..f6f05a3b 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -171,7 +171,7 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
                struct nvgpu_gr_sm_error_state *sm_error_state);
 int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
        struct channel_gk20a *ch, u64 sms, bool enable);
-int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc,
+int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
                struct channel_gk20a *fault_ch);
 void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g);
 bool gv11b_gr_sm_debugger_attached(struct gk20a *g);
author	Richard Zhao <rizhao@nvidia.com>	2018-06-06 23:46:03 -0400
committer	Tejal Kudav <tkudav@nvidia.com>	2018-06-14 09:44:08 -0400
commit	6a46965eb3b7b657c089142579ab20d6efefc0fc (patch)
tree	60aa4098d4b50af9db21e316098cfbe35c9a4797 /drivers/gpu/nvgpu/gv11b
parent	7a5d498a711833990a9d8fc3f5d3f3e26bee301c (diff)