From 6a46965eb3b7b657c089142579ab20d6efefc0fc Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Wed, 6 Jun 2018 20:46:03 -0700 Subject: gpu: nvgpu: correct calculation of sm_id for .record_sm_error_state Starting with Volta, one TPC could have more than 1 SMs. So .record_sm_error_state needs to have sm number as parameter. Logic tpc id should be read from gr_gpc0_gpm_pd_sm_id_r. Let the function return logical sm_id. RM server will need it to nofify client. Jira EVLR-2643 Bug 200405202 Change-Id: Iffaff05b89b1c5058616b8a6bf50dd73bd4e52f6 Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1742165 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 18 ++++++++---------- drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 9bd48fdc..f57be9dd 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3263,24 +3263,22 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g, return err; } -int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, +int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, struct channel_gk20a *fault_ch) { int sm_id; struct gr_gk20a *gr = &g->gr; - u32 offset, sm, sm_per_tpc; - u32 gpc_tpc_offset; + u32 offset, sm_per_tpc, tpc_id; + u32 gpc_offset, gpc_tpc_offset; nvgpu_mutex_acquire(&g->dbg_sessions_lock); sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - gpc_tpc_offset = gk20a_gr_gpc_offset(g, gpc) + - gk20a_gr_tpc_offset(g, tpc); + gpc_offset = gk20a_gr_gpc_offset(g, gpc); + gpc_tpc_offset = gpc_offset + gk20a_gr_tpc_offset(g, tpc); - sm_id = gr_gpc0_tpc0_sm_cfg_tpc_id_v(gk20a_readl(g, - gr_gpc0_tpc0_sm_cfg_r() + gpc_tpc_offset)); - - sm = sm_id % sm_per_tpc; + tpc_id = gk20a_readl(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset); + sm_id = tpc_id * sm_per_tpc + sm; offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm); @@ -3301,7 +3299,7 @@ int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, nvgpu_mutex_release(&g->dbg_sessions_lock); - return 0; + return sm_id; } void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index b4a7e411..f6f05a3b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -171,7 +171,7 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g, struct nvgpu_gr_sm_error_state *sm_error_state); int gv11b_gr_set_sm_debug_mode(struct gk20a *g, struct channel_gk20a *ch, u64 sms, bool enable); -int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, +int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, struct channel_gk20a *fault_ch); void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g); bool gv11b_gr_sm_debugger_attached(struct gk20a *g); -- cgit v1.2.2