From d84e822128a224eda4a703dad530716331dd36bd Mon Sep 17 00:00:00 2001 From: Vinod G Date: Wed, 23 May 2018 17:22:03 -0700 Subject: gpu: nvgpu: Add Ctrl API to read SM error state Expose IOCTL to Ctrl node to read Single SM error under NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE bug 200412642 JIRA NVGPU-700 Change-Id: I3cbcf4d7f23a53dbd2350b38a5e259559d5fd3af Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/1728931 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | 55 +++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c') diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c index b40efc0f..ee0739c9 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c @@ -1575,6 +1575,56 @@ out: return err; } +static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g, + struct nvgpu_gpu_read_single_sm_error_state_args *args) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr_sm_error_state *sm_error_state; + struct nvgpu_gpu_sm_error_state_record sm_error_state_record; + u32 sm_id; + int err = 0; + + sm_id = args->sm_id; + if (sm_id >= gr->no_of_sm) + return -EINVAL; + + nvgpu_speculation_barrier(); + + sm_error_state = gr->sm_error_states + sm_id; + sm_error_state_record.global_esr = + sm_error_state->hww_global_esr; + sm_error_state_record.warp_esr = + sm_error_state->hww_warp_esr; + sm_error_state_record.warp_esr_pc = + sm_error_state->hww_warp_esr_pc; + sm_error_state_record.global_esr_report_mask = + sm_error_state->hww_global_esr_report_mask; + sm_error_state_record.warp_esr_report_mask = + sm_error_state->hww_warp_esr_report_mask; + + if (args->record_size > 0) { + size_t write_size = sizeof(*sm_error_state); + + if (write_size > args->record_size) + write_size = args->record_size; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + err = copy_to_user((void __user *)(uintptr_t) + args->record_mem, + &sm_error_state_record, + write_size); + nvgpu_mutex_release(&g->dbg_sessions_lock); + if (err) { + nvgpu_err(g, "copy_to_user failed!"); + return err; + } + + args->record_size = write_size; + } + + return 0; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct gk20a_ctrl_priv *priv = filp->private_data; @@ -1887,6 +1937,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg (struct nvgpu_gpu_set_deterministic_opts_args *)buf); break; + case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: + err = nvgpu_gpu_read_single_sm_error_state(g, + (struct nvgpu_gpu_read_single_sm_error_state_args *)buf); + break; + default: nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; -- cgit v1.2.2