From d84e822128a224eda4a703dad530716331dd36bd Mon Sep 17 00:00:00 2001 From: Vinod G Date: Wed, 23 May 2018 17:22:03 -0700 Subject: gpu: nvgpu: Add Ctrl API to read SM error state Expose IOCTL to Ctrl node to read Single SM error under NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE bug 200412642 JIRA NVGPU-700 Change-Id: I3cbcf4d7f23a53dbd2350b38a5e259559d5fd3af Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/1728931 Reviewed-by: mobile promotions Tested-by: mobile promotions --- include/uapi/linux/nvgpu.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index b36509b0..908e5c57 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -864,6 +864,38 @@ struct nvgpu_gpu_set_deterministic_opts_args { __u64 channels; /* in */ }; +/* + * This struct helps to report the SM error state of a single SM. + * This acts upon the currently resident GR context. + * Global Error status register + * Warp Error status register + * Warp Error status register PC + * Global Error status register Report Mask + * Warp Error status register Report Mask + */ +struct nvgpu_gpu_sm_error_state_record { + __u32 global_esr; + __u32 warp_esr; + __u64 warp_esr_pc; + __u32 global_esr_report_mask; + __u32 warp_esr_report_mask; +}; + +/* + * This struct helps to read the SM error state. + */ +struct nvgpu_gpu_read_single_sm_error_state_args { + /* Valid SM ID */ + __u32 sm_id; + __u32 reserved; + /* + * This is pointer to the struct nvgpu_gpu_sm_error_state_record + */ + __u64 record_mem; + /* size of the record size to read */ + __u64 record_size; +}; + #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ @@ -949,8 +981,11 @@ struct nvgpu_gpu_set_deterministic_opts_args { #define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \ struct nvgpu_gpu_set_deterministic_opts_args) +#define NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 41, \ + struct nvgpu_gpu_read_single_sm_error_state_args) #define NVGPU_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS) + _IOC_NR(NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE) #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) -- cgit v1.2.2