summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorAnup Mahindre <amahindre@nvidia.com>2018-09-21 02:22:47 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-09-27 18:06:37 -0400
commit30b571e31c7f8ee26fc58864272bc7c7e083d377 (patch)
tree9c45ffa9c6df41880163884ebdbab57a9920d041 /drivers/gpu/nvgpu/gv11b
parentf89ea110417e1c83cf21f495d6da7cdc8138a29f (diff)
gpu: nvgpu: Add gv11b_gr_clear_sm_error_state
All chips were currently using gm20b_gr_clear_sm_error_state It was wrong for chips based on volta and later as the implementation didn't consider non pes-aware vsms mapping Add new HAL implementation for clear_sm_error_state for volta based and later chips to fix this. Bug 200448172 Change-Id: I65988c8cbb35d13089ac628e8333d9a3b58e0eb1 Signed-off-by: Anup Mahindre <amahindre@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1837188 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c53
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h2
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c2
3 files changed, 56 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 288bd583..bb76178e 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -5010,3 +5010,56 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
5010 *num_registers = t; 5010 *num_registers = t;
5011 return 0; 5011 return 0;
5012} 5012}
5013
5014int gv11b_gr_clear_sm_error_state(struct gk20a *g,
5015 struct channel_gk20a *ch, u32 sm_id)
5016{
5017 u32 gpc, tpc, sm, offset;
5018 u32 val;
5019 struct tsg_gk20a *tsg;
5020
5021 int err = 0;
5022
5023 tsg = tsg_gk20a_from_ch(ch);
5024 if (tsg == NULL) {
5025 return -EINVAL;
5026 }
5027
5028 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
5029
5030 (void)memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states));
5031
5032 err = gr_gk20a_disable_ctxsw(g);
5033 if (err != 0) {
5034 nvgpu_err(g, "unable to stop gr ctxsw");
5035 goto fail;
5036 }
5037
5038 if (gk20a_is_channel_ctx_resident(ch)) {
5039 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
5040 if (g->ops.gr.get_nonpes_aware_tpc != NULL) {
5041 tpc = g->ops.gr.get_nonpes_aware_tpc(g,
5042 g->gr.sm_to_cluster[sm_id].gpc_index,
5043 g->gr.sm_to_cluster[sm_id].tpc_index);
5044 } else {
5045 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
5046 }
5047 sm = g->gr.sm_to_cluster[sm_id].sm_index;
5048
5049 offset = gk20a_gr_gpc_offset(g, gpc) +
5050 gk20a_gr_tpc_offset(g, tpc) +
5051 gv11b_gr_sm_offset(g, sm);
5052
5053 val = gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset);
5054 gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset,
5055 val);
5056 gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset,
5057 0);
5058 }
5059
5060 err = gr_gk20a_enable_ctxsw(g);
5061
5062fail:
5063 nvgpu_mutex_release(&g->dbg_sessions_lock);
5064 return err;
5065}
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index 20377acf..2f765336 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -170,6 +170,8 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
170 struct channel_gk20a *ch, u64 sms, bool enable); 170 struct channel_gk20a *ch, u64 sms, bool enable);
171int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, 171int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
172 struct channel_gk20a *fault_ch); 172 struct channel_gk20a *fault_ch);
173int gv11b_gr_clear_sm_error_state(struct gk20a *g,
174 struct channel_gk20a *ch, u32 sm_id);
173void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g); 175void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g);
174bool gv11b_gr_sm_debugger_attached(struct gk20a *g); 176bool gv11b_gr_sm_debugger_attached(struct gk20a *g);
175void gv11b_gr_suspend_single_sm(struct gk20a *g, 177void gv11b_gr_suspend_single_sm(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 6a2dae77..2548cd16 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -335,7 +335,7 @@ static const struct gpu_ops gv11b_ops = {
335 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, 335 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
336 .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register, 336 .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register,
337 .record_sm_error_state = gv11b_gr_record_sm_error_state, 337 .record_sm_error_state = gv11b_gr_record_sm_error_state,
338 .clear_sm_error_state = gm20b_gr_clear_sm_error_state, 338 .clear_sm_error_state = gv11b_gr_clear_sm_error_state,
339 .suspend_contexts = gr_gp10b_suspend_contexts, 339 .suspend_contexts = gr_gp10b_suspend_contexts,
340 .resume_contexts = gr_gk20a_resume_contexts, 340 .resume_contexts = gr_gk20a_resume_contexts,
341 .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, 341 .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,