From f187e0bf442c3b0a08c46b21196f06a18c8220a0 Mon Sep 17 00:00:00 2001 From: Vinod G Date: Wed, 29 Aug 2018 12:32:25 -0700 Subject: gpu: nvgpu: Move SM_MASK_TYPE setting to TSG level Moved the SM_MASK_TYPE variable from GR to TSG struct. SM error registers are context based. In dbg_session IOCTL to SET_SM_MASK_TYPE, kernel code iterate the TSG associated with first channel and set the mask_type to that context. Bug 200412641 Change-Id: Ic91944037ad2447f403b4803d5266ae6250ba4c9 Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/1809322 Reviewed-by: svc-misra-checker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h | 6 --- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 5 --- drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | 2 + drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 4 ++ drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 37 ++++++++--------- drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 71 ++++++++++++++++++--------------- 6 files changed, 64 insertions(+), 61 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h index 4d3c4d74..50002557 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h @@ -72,12 +72,6 @@ struct dbg_session_gk20a { bool broadcast_stop_trigger; struct nvgpu_mutex ioctl_lock; - - /* - * sm set exception type mask flag, to check whether - * exception type mask is requested or not. - */ - bool is_sm_exception_type_mask_set; }; struct dbg_session_data { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 0d32cca3..303e1f53 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -420,11 +420,6 @@ struct gr_gk20a { u32 no_of_sm; struct sm_info *sm_to_cluster; -#define NVGPU_SM_EXCEPTION_TYPE_MASK_NONE (0x0U) -#define NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL (0x1U << 0) - u32 sm_exception_mask_type; - u32 sm_exception_mask_refcount; - #if defined(CONFIG_GK20A_CYCLE_STATS) struct nvgpu_mutex cs_lock; struct gk20a_cs_snapshot *cs_data; diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index 624ee1d7..506d4330 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c @@ -304,6 +304,7 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g, pid_t pid) tsg->timeslice_scale = 0; tsg->runlist_id = ~0; tsg->tgid = pid; + tsg->sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; if (g->ops.fifo.init_eng_method_buffers) g->ops.fifo.init_eng_method_buffers(g, tsg); @@ -373,6 +374,7 @@ void gk20a_tsg_release(struct nvgpu_ref *ref) release_used_tsg(&g->fifo, tsg); tsg->runlist_id = ~0; + tsg->sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; nvgpu_log(g, gpu_dbg_fn, "tsg released %d\n", tsg->tsgid); } diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 67ccb9f5..1e3be553 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h @@ -78,6 +78,10 @@ struct tsg_gk20a { bool in_use; struct nvgpu_tsg_sm_error_state *sm_error_states; + +#define NVGPU_SM_EXCEPTION_TYPE_MASK_NONE (0x0U) +#define NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL (0x1U << 0) + u32 sm_exception_mask_type; }; int gk20a_enable_tsg(struct tsg_gk20a *tsg); diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 9a6afa3e..aeb49982 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2239,7 +2239,7 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) { u32 index = 0U; - u32 esr_err = gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(); + bool esr_err = false; struct warp_esr_error_table_s { u32 error_value; @@ -2285,7 +2285,7 @@ static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) for (index = 0; index < ARRAY_SIZE(warp_esr_error_table); index++) { if (warp_esr_error_table[index].error_value == warp_esr_error) { - esr_err = warp_esr_error_table[index].error_value; + esr_err = true; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "WARP_ESR %s(0x%x)", warp_esr_error_table[index].error_name, @@ -2294,8 +2294,9 @@ static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) } } - return (esr_err == 0U) ? false : true; + return esr_err; } + static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 warp_esr_error, @@ -2316,24 +2317,24 @@ static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g, return 0; } - /* - * Check SET_EXCEPTION_TYPE_MASK is being set. - * If set, skip the recovery and trigger CILP - * If not set, trigger the recovery. - */ - if ((g->gr.sm_exception_mask_type & - NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL) == - NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL) { - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, - "SM Exception Type Mask set %d," - "skip recovery", - g->gr.sm_exception_mask_type); - return 0; - } - if (fault_ch) { tsg = &g->fifo.tsg[fault_ch->tsgid]; + /* + * Check SET_EXCEPTION_TYPE_MASK is being set. + * If set, skip the recovery and trigger CILP + * If not set, trigger the recovery. + */ + if ((tsg->sm_exception_mask_type & + NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL) == + NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL) { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "SM Exception Type Mask set %d," + "skip recovery", + tsg->sm_exception_mask_type); + return 0; + } + nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list, channel_gk20a, ch_entry) { diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 4ac4fb62..3931ab12 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -223,10 +223,6 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) nvgpu_kfree(g, prof_obj); } } - - nvgpu_set_sm_exception_type_mask_locked(dbg_s, - NVGPU_SM_EXCEPTION_TYPE_MASK_NONE); - nvgpu_mutex_release(&g->dbg_sessions_lock); nvgpu_mutex_destroy(&dbg_s->ch_list_lock); @@ -499,7 +495,6 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, dbg_s->is_profiler = is_profiler; dbg_s->is_pg_disabled = false; dbg_s->is_timeout_disabled = false; - dbg_s->is_sm_exception_type_mask_set = false; nvgpu_cond_init(&dbg_s->dbg_events.wait_queue); nvgpu_init_list_node(&dbg_s->ch_list); @@ -512,9 +507,6 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, dbg_s->dbg_events.events_enabled = false; dbg_s->dbg_events.num_pending_events = 0; - nvgpu_set_sm_exception_type_mask_locked(dbg_s, - NVGPU_SM_EXCEPTION_TYPE_MASK_NONE); - return 0; err_destroy_lock: @@ -1887,34 +1879,29 @@ static int nvgpu_set_sm_exception_type_mask_locked( u32 exception_mask) { struct gk20a *g = dbg_s->g; - struct gr_gk20a *gr = &g->gr; int err = 0; + struct channel_gk20a *ch = NULL; - switch (exception_mask) { - case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: - gr->sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; - if (dbg_s->is_sm_exception_type_mask_set == false) { - gr->sm_exception_mask_refcount++; - dbg_s->is_sm_exception_type_mask_set = true; - } - break; - case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_NONE: - if (dbg_s->is_sm_exception_type_mask_set) { - gr->sm_exception_mask_refcount--; - dbg_s->is_sm_exception_type_mask_set = false; + /* + * Obtain the fisrt channel from the channel list in + * dbg_session, find the context associated with channel + * and set the sm_mask_type to that context + */ + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (ch != NULL) { + struct tsg_gk20a *tsg; + + tsg = tsg_gk20a_from_ch(ch); + if (tsg != NULL) { + tsg->sm_exception_mask_type = exception_mask; + goto type_mask_end; } - if (gr->sm_exception_mask_refcount == 0) - gr->sm_exception_mask_type = - NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; - break; - default: - nvgpu_err(g, - "unrecognized dbg sm exception type mask: 0x%x", - exception_mask); - err = -EINVAL; - break; } + nvgpu_log_fn(g, "unable to find the TSG\n"); + err = -EINVAL; + +type_mask_end: return err; } @@ -1924,10 +1911,30 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask( { int err = 0; struct gk20a *g = dbg_s->g; + u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; + + switch (args->exception_type_mask) { + case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: + sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; + break; + case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_NONE: + sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; + break; + default: + nvgpu_err(g, + "unrecognized dbg sm exception type mask: 0x%x", + args->exception_type_mask); + err = -EINVAL; + break; + } + + if (err != 0) { + return err; + } nvgpu_mutex_acquire(&g->dbg_sessions_lock); err = nvgpu_set_sm_exception_type_mask_locked(dbg_s, - args->exception_type_mask); + sm_exception_mask_type); nvgpu_mutex_release(&g->dbg_sessions_lock); return err; -- cgit v1.2.2