From bfe65407bde2b5d0776724301e215c6553c989f3 Mon Sep 17 00:00:00 2001 From: Vinod G Date: Tue, 7 Aug 2018 23:09:30 -0700 Subject: gpu: nvgpu: Read sm error ioctl support for tsg Add READ_SM_ERROR IOCTL support to TSG level. Moved the struct to save the sm_error details from gr to tsg as the sm_error support is context based, not global. Also corrected MISRA 21.1 error in header file. nvgpu_dbg_gpu_ioctl_write_single_sm_error_state and nvgpu_dbg_gpu_ioctl_read_single_sm_error_state functions are modified to use the tsg struct nvgpu_tsg_sm_error_state. Bug 200412642 Change-Id: I9e334b059078a4bb0e360b945444cc4bf1cc56ec Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/1794856 Reviewed-by: svc-misra-checker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/nvgpu/vgpu') diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index fa64cb82..9ee57fb4 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -882,9 +882,6 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr) gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags); - nvgpu_kfree(gr->g, gr->sm_error_states); - gr->sm_error_states = NULL; - nvgpu_kfree(gr->g, gr->gpc_tpc_mask); gr->gpc_tpc_mask = NULL; @@ -935,14 +932,6 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) nvgpu_mutex_init(&gr->ctx_mutex); nvgpu_spinlock_init(&gr->ch_tlb_lock); - gr->sm_error_states = nvgpu_kzalloc(g, - sizeof(struct nvgpu_gr_sm_error_state) * - gr->no_of_sm); - if (!gr->sm_error_states) { - err = -ENOMEM; - goto clean_up; - } - gr->remove_support = vgpu_remove_gr_support; gr->sw_ready = true; @@ -1152,12 +1141,17 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, int vgpu_gr_clear_sm_error_state(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id) { - struct gr_gk20a *gr = &g->gr; struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_clear_sm_error_state *p = &msg.params.clear_sm_error_state; + struct tsg_gk20a *tsg; int err; + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) { + return -EINVAL; + } + nvgpu_mutex_acquire(&g->dbg_sessions_lock); msg.cmd = TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE; msg.handle = vgpu_get_handle(g); @@ -1167,7 +1161,7 @@ int vgpu_gr_clear_sm_error_state(struct gk20a *g, err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states)); + memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states)); nvgpu_mutex_release(&g->dbg_sessions_lock); return err ? err : msg.ret; @@ -1264,7 +1258,8 @@ int vgpu_gr_resume_contexts(struct gk20a *g, void vgpu_gr_handle_sm_esr_event(struct gk20a *g, struct tegra_vgpu_sm_esr_info *info) { - struct nvgpu_gr_sm_error_state *sm_error_states; + struct nvgpu_tsg_sm_error_state *sm_error_states; + struct tsg_gk20a *tsg; if (info->sm_id >= g->gr.no_of_sm) { nvgpu_err(g, "invalid smd_id %d / %d", @@ -1272,9 +1267,20 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g, return; } + if (info->tsg_id >= g->fifo.num_channels) { + nvgpu_err(g, "invalid tsg_id in sm esr event"); + return; + } + + tsg = &g->fifo.tsg[info->tsg_id]; + if (tsg == NULL) { + nvgpu_err(g, "invalid tsg"); + return; + } + nvgpu_mutex_acquire(&g->dbg_sessions_lock); - sm_error_states = &g->gr.sm_error_states[info->sm_id]; + sm_error_states = &tsg->sm_error_states[info->sm_id]; sm_error_states->hww_global_esr = info->hww_global_esr; sm_error_states->hww_warp_esr = info->hww_warp_esr; -- cgit v1.2.2