summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2018-06-06 23:46:03 -0400
committerTejal Kudav <tkudav@nvidia.com>2018-06-14 09:44:08 -0400
commit6a46965eb3b7b657c089142579ab20d6efefc0fc (patch)
tree60aa4098d4b50af9db21e316098cfbe35c9a4797 /drivers/gpu/nvgpu/gv11b
parent7a5d498a711833990a9d8fc3f5d3f3e26bee301c (diff)
gpu: nvgpu: correct calculation of sm_id for .record_sm_error_state
Starting with Volta, one TPC could have more than 1 SMs. So .record_sm_error_state needs to have sm number as parameter. Logic tpc id should be read from gr_gpc0_gpm_pd_sm_id_r. Let the function return logical sm_id. RM server will need it to nofify client. Jira EVLR-2643 Bug 200405202 Change-Id: Iffaff05b89b1c5058616b8a6bf50dd73bd4e52f6 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1742165 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c18
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h2
2 files changed, 9 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 9bd48fdc..f57be9dd 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -3263,24 +3263,22 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
3263 return err; 3263 return err;
3264} 3264}
3265 3265
3266int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, 3266int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
3267 struct channel_gk20a *fault_ch) 3267 struct channel_gk20a *fault_ch)
3268{ 3268{
3269 int sm_id; 3269 int sm_id;
3270 struct gr_gk20a *gr = &g->gr; 3270 struct gr_gk20a *gr = &g->gr;
3271 u32 offset, sm, sm_per_tpc; 3271 u32 offset, sm_per_tpc, tpc_id;
3272 u32 gpc_tpc_offset; 3272 u32 gpc_offset, gpc_tpc_offset;
3273 3273
3274 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 3274 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
3275 3275
3276 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); 3276 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
3277 gpc_tpc_offset = gk20a_gr_gpc_offset(g, gpc) + 3277 gpc_offset = gk20a_gr_gpc_offset(g, gpc);
3278 gk20a_gr_tpc_offset(g, tpc); 3278 gpc_tpc_offset = gpc_offset + gk20a_gr_tpc_offset(g, tpc);
3279 3279
3280 sm_id = gr_gpc0_tpc0_sm_cfg_tpc_id_v(gk20a_readl(g, 3280 tpc_id = gk20a_readl(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset);
3281 gr_gpc0_tpc0_sm_cfg_r() + gpc_tpc_offset)); 3281 sm_id = tpc_id * sm_per_tpc + sm;
3282
3283 sm = sm_id % sm_per_tpc;
3284 3282
3285 offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm); 3283 offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm);
3286 3284
@@ -3301,7 +3299,7 @@ int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc,
3301 3299
3302 nvgpu_mutex_release(&g->dbg_sessions_lock); 3300 nvgpu_mutex_release(&g->dbg_sessions_lock);
3303 3301
3304 return 0; 3302 return sm_id;
3305} 3303}
3306 3304
3307void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) 3305void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index b4a7e411..f6f05a3b 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -171,7 +171,7 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
171 struct nvgpu_gr_sm_error_state *sm_error_state); 171 struct nvgpu_gr_sm_error_state *sm_error_state);
172int gv11b_gr_set_sm_debug_mode(struct gk20a *g, 172int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
173 struct channel_gk20a *ch, u64 sms, bool enable); 173 struct channel_gk20a *ch, u64 sms, bool enable);
174int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, 174int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
175 struct channel_gk20a *fault_ch); 175 struct channel_gk20a *fault_ch);
176void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g); 176void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g);
177bool gv11b_gr_sm_debugger_attached(struct gk20a *g); 177bool gv11b_gr_sm_debugger_attached(struct gk20a *g);