diff options
author | Richard Zhao <rizhao@nvidia.com> | 2018-06-06 23:46:03 -0400 |
---|---|---|
committer | Tejal Kudav <tkudav@nvidia.com> | 2018-06-14 09:44:08 -0400 |
commit | 6a46965eb3b7b657c089142579ab20d6efefc0fc (patch) | |
tree | 60aa4098d4b50af9db21e316098cfbe35c9a4797 /drivers/gpu/nvgpu/gv11b | |
parent | 7a5d498a711833990a9d8fc3f5d3f3e26bee301c (diff) |
gpu: nvgpu: correct calculation of sm_id for .record_sm_error_state
Starting with Volta, one TPC could have more than 1 SMs. So
.record_sm_error_state needs to have sm number as parameter.
Logic tpc id should be read from gr_gpc0_gpm_pd_sm_id_r.
Let the function return logical sm_id. RM server will need it to nofify
client.
Jira EVLR-2643
Bug 200405202
Change-Id: Iffaff05b89b1c5058616b8a6bf50dd73bd4e52f6
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1742165
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 2 |
2 files changed, 9 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 9bd48fdc..f57be9dd 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -3263,24 +3263,22 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g, | |||
3263 | return err; | 3263 | return err; |
3264 | } | 3264 | } |
3265 | 3265 | ||
3266 | int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, | 3266 | int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, |
3267 | struct channel_gk20a *fault_ch) | 3267 | struct channel_gk20a *fault_ch) |
3268 | { | 3268 | { |
3269 | int sm_id; | 3269 | int sm_id; |
3270 | struct gr_gk20a *gr = &g->gr; | 3270 | struct gr_gk20a *gr = &g->gr; |
3271 | u32 offset, sm, sm_per_tpc; | 3271 | u32 offset, sm_per_tpc, tpc_id; |
3272 | u32 gpc_tpc_offset; | 3272 | u32 gpc_offset, gpc_tpc_offset; |
3273 | 3273 | ||
3274 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 3274 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
3275 | 3275 | ||
3276 | sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | 3276 | sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); |
3277 | gpc_tpc_offset = gk20a_gr_gpc_offset(g, gpc) + | 3277 | gpc_offset = gk20a_gr_gpc_offset(g, gpc); |
3278 | gk20a_gr_tpc_offset(g, tpc); | 3278 | gpc_tpc_offset = gpc_offset + gk20a_gr_tpc_offset(g, tpc); |
3279 | 3279 | ||
3280 | sm_id = gr_gpc0_tpc0_sm_cfg_tpc_id_v(gk20a_readl(g, | 3280 | tpc_id = gk20a_readl(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset); |
3281 | gr_gpc0_tpc0_sm_cfg_r() + gpc_tpc_offset)); | 3281 | sm_id = tpc_id * sm_per_tpc + sm; |
3282 | |||
3283 | sm = sm_id % sm_per_tpc; | ||
3284 | 3282 | ||
3285 | offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm); | 3283 | offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm); |
3286 | 3284 | ||
@@ -3301,7 +3299,7 @@ int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, | |||
3301 | 3299 | ||
3302 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 3300 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
3303 | 3301 | ||
3304 | return 0; | 3302 | return sm_id; |
3305 | } | 3303 | } |
3306 | 3304 | ||
3307 | void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) | 3305 | void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index b4a7e411..f6f05a3b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h | |||
@@ -171,7 +171,7 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g, | |||
171 | struct nvgpu_gr_sm_error_state *sm_error_state); | 171 | struct nvgpu_gr_sm_error_state *sm_error_state); |
172 | int gv11b_gr_set_sm_debug_mode(struct gk20a *g, | 172 | int gv11b_gr_set_sm_debug_mode(struct gk20a *g, |
173 | struct channel_gk20a *ch, u64 sms, bool enable); | 173 | struct channel_gk20a *ch, u64 sms, bool enable); |
174 | int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, | 174 | int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, |
175 | struct channel_gk20a *fault_ch); | 175 | struct channel_gk20a *fault_ch); |
176 | void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g); | 176 | void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g); |
177 | bool gv11b_gr_sm_debugger_attached(struct gk20a *g); | 177 | bool gv11b_gr_sm_debugger_attached(struct gk20a *g); |