From 04e45bc943e9703c26f229dfbe558d94418acbe1 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Wed, 9 Mar 2016 14:51:43 +0530 Subject: gpu: nvgpu: support storing/reading single SM error state Add support to store error state of single SM before preprocessing SM exception Error state is stored as : struct nvgpu_dbg_gpu_sm_error_state_record { u32 hww_global_esr; u32 hww_warp_esr; u64 hww_warp_esr_pc; u32 hww_global_esr_report_mask; u32 hww_warp_esr_report_mask; } Note that we can safely append new fields to above structure in the future if required Also, add IOCTL NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE to support reading SM's error state by user space Bug 200156699 Change-Id: I9a62cb01e8a35c720b52d5d202986347706c7308 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/1120329 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 44 ++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 2 ++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 45 +++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | 4 +++ drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 30 ++++++++++++++++++++++ drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | 8 ++++++ include/uapi/linux/nvgpu.h | 29 ++++++++++++++++++++- 8 files changed, 162 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 95957788..d9c96417 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -525,6 +525,45 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type( return 0; } +static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) +{ + struct gk20a *g = get_gk20a(dbg_s->dev); + struct gr_gk20a *gr = &g->gr; + struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state; + u32 sm_id; + int err = 0; + + sm_id = args->sm_id; + if (sm_id >= gr->no_of_sm) + return -EINVAL; + + sm_error_state = gr->sm_error_states + sm_id; + + if (args->sm_error_state_record_size > 0) { + size_t write_size = sizeof(*sm_error_state); + + if (write_size > args->sm_error_state_record_size) + write_size = args->sm_error_state_record_size; + + mutex_lock(&g->dbg_sessions_lock); + err = copy_to_user((void __user *)(uintptr_t) + args->sm_error_state_record_mem, + sm_error_state, + write_size); + mutex_unlock(&g->dbg_sessions_lock); + if (err) { + gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n"); + return err; + } + + args->sm_error_state_record_size = write_size; + } + + return 0; +} + long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -622,6 +661,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_timeout_args *)buf); break; + case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: + err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, + (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); + break; + default: gk20a_err(dev_from_gk20a(g), "unrecognized dbg gpu ioctl cmd: 0x%x", diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 61e8e641..c70217ea 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -240,6 +240,8 @@ struct gpu_ops { bool *post_event); void (*create_gr_sysfs)(struct device *dev); u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); + int (*record_sm_error_state)(struct gk20a *g, + u32 gpc, u32 tpc); } gr; const char *name; struct { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 734552a1..c0a25e68 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -58,6 +58,7 @@ #include "semaphore_gk20a.h" #include "platform_gk20a.h" #include "ctxsw_trace_gk20a.h" +#include "hw_proj_gk20a.h" #define BLK_SIZE (256) #define NV_PMM_FBP_STRIDE 0x1000 @@ -3129,6 +3130,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); + kfree(gr->sm_error_states); kfree(gr->gpc_tpc_count); kfree(gr->gpc_zcb_count); kfree(gr->gpc_ppc_count); @@ -4426,6 +4428,19 @@ restore_fe_go_idle: if (err) goto out; + kfree(gr->sm_error_states); + + /* we need to allocate this after g->ops.gr.init_fs_state() since + * we initialize gr->no_of_sm in this function + */ + gr->sm_error_states = kzalloc( + sizeof(struct nvgpu_dbg_gpu_sm_error_state_record) + * gr->no_of_sm, GFP_KERNEL); + if (!gr->sm_error_states) { + err = -ENOMEM; + goto restore_fe_go_idle; + } + out: gk20a_dbg_fn("done"); return 0; @@ -5494,6 +5509,32 @@ u32 gk20a_mask_hww_warp_esr(u32 hww_warp_esr) return hww_warp_esr; } +static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) +{ + int sm_id; + struct gr_gk20a *gr = &g->gr; + u32 offset = proj_gpc_stride_v() * gpc + + proj_tpc_in_gpc_stride_v() * tpc; + + mutex_lock(&g->dbg_sessions_lock); + + sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g, + gr_gpc0_tpc0_sm_cfg_r() + offset)); + + gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); + gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); + gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset); + gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset); + + mutex_unlock(&g->dbg_sessions_lock); + + return 0; +} + int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch) { @@ -5554,6 +5595,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "sm hww global %08x warp %08x", global_esr, warp_esr); + gr_gk20a_elpg_protected_call(g, + g->ops.gr.record_sm_error_state(g, gpc, tpc)); + if (g->ops.gr.pre_process_sm_exception) { ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, global_esr, warp_esr, @@ -8370,4 +8414,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.get_lrf_tex_ltc_dram_override = NULL; gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; + gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index c82cf75c..22ff1351 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -329,6 +329,7 @@ struct gr_gk20a { u32 fbp_en_mask; u32 no_of_sm; struct sm_info *sm_to_cluster; + struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states; #if defined(CONFIG_GK20A_CYCLE_STATS) struct mutex cs_lock; struct gk20a_cs_snapshot *cs_data; diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h index 48aa1524..ab2a975b 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h @@ -2122,6 +2122,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) { return (v & 0xffff) << 0; } +static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r) +{ + return (r >> 0) & 0xffff; +} static inline u32 gr_gpc0_tpc0_sm_arch_r(void) { return 0x0050469c; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index b49f2301..eeb70d76 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -31,6 +31,7 @@ #include "hw_fuse_gm20b.h" #include "pmu_gm20b.h" #include "acr_gm20b.h" +#include "hw_proj_gm20b.h" static void gr_gm20b_init_gpc_mmu(struct gk20a *g) { @@ -1190,6 +1191,34 @@ static void gr_gm20b_get_access_map(struct gk20a *g, *num_entries = ARRAY_SIZE(wl_addr_gm20b); } +static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) +{ + int sm_id; + struct gr_gk20a *gr = &g->gr; + u32 offset = proj_gpc_stride_v() * gpc + + proj_tpc_in_gpc_stride_v() * tpc; + + mutex_lock(&g->dbg_sessions_lock); + + sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g, + gr_gpc0_tpc0_sm_cfg_r() + offset)); + + gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); + gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); + gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g, + gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset); + gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset); + gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset); + + mutex_unlock(&g->dbg_sessions_lock); + + return 0; +} + void gm20b_init_gr(struct gpu_ops *gops) { gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; @@ -1256,4 +1285,5 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.get_lrf_tex_ltc_dram_override = NULL; gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; + gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state; } diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h index dbe54860..b796e2d3 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h @@ -2130,6 +2130,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) { return (v & 0xffff) << 0; } +static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r) +{ + return (r >> 0) & 0xffff; +} static inline u32 gr_gpc0_tpc0_sm_arch_r(void) { return 0x0050469c; @@ -3270,6 +3274,10 @@ static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void) { return 0x0; } +static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r(void) +{ + return 0x00504654; +} static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void) { return 0x00504770; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 16d60261..96619015 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -676,8 +676,35 @@ struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args { #define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \ _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args) + +struct nvgpu_dbg_gpu_sm_error_state_record { + __u32 hww_global_esr; + __u32 hww_warp_esr; + __u64 hww_warp_esr_pc; + __u32 hww_global_esr_report_mask; + __u32 hww_warp_esr_report_mask; + + /* + * Notes + * - This struct can be safely appended with new fields. However, always + * keep the structure size multiple of 8 and make sure that the binary + * layout does not change between 32-bit and 64-bit architectures. + */ +}; + +struct nvgpu_dbg_gpu_read_single_sm_error_state_args { + __u32 sm_id; + __u32 padding; + __u64 sm_error_state_record_mem; + __u64 sm_error_state_record_size; +}; + +#define NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \ + _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 14, struct nvgpu_dbg_gpu_read_single_sm_error_state_args) + + #define NVGPU_DBG_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE) + _IOC_NR(NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE) #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) -- cgit v1.2.2