diff options
author | Vinod G <vinodg@nvidia.com> | 2018-05-23 20:22:03 -0400 |
---|---|---|
committer | Tejal Kudav <tkudav@nvidia.com> | 2018-06-14 09:44:06 -0400 |
commit | d84e822128a224eda4a703dad530716331dd36bd (patch) | |
tree | 1be84de0d8e1407fa109b1e51e89cf8f5fa94b82 | |
parent | 40cefb666f3767059383052346d4c0faa9195a48 (diff) |
gpu: nvgpu: Add Ctrl API to read SM error state
Expose IOCTL to Ctrl node to read Single SM error
under NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE
bug 200412642
JIRA NVGPU-700
Change-Id: I3cbcf4d7f23a53dbd2350b38a5e259559d5fd3af
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1728931
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | 55 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 37 |
2 files changed, 91 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c index b40efc0f..ee0739c9 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | |||
@@ -1575,6 +1575,56 @@ out: | |||
1575 | return err; | 1575 | return err; |
1576 | } | 1576 | } |
1577 | 1577 | ||
1578 | static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g, | ||
1579 | struct nvgpu_gpu_read_single_sm_error_state_args *args) | ||
1580 | { | ||
1581 | struct gr_gk20a *gr = &g->gr; | ||
1582 | struct nvgpu_gr_sm_error_state *sm_error_state; | ||
1583 | struct nvgpu_gpu_sm_error_state_record sm_error_state_record; | ||
1584 | u32 sm_id; | ||
1585 | int err = 0; | ||
1586 | |||
1587 | sm_id = args->sm_id; | ||
1588 | if (sm_id >= gr->no_of_sm) | ||
1589 | return -EINVAL; | ||
1590 | |||
1591 | nvgpu_speculation_barrier(); | ||
1592 | |||
1593 | sm_error_state = gr->sm_error_states + sm_id; | ||
1594 | sm_error_state_record.global_esr = | ||
1595 | sm_error_state->hww_global_esr; | ||
1596 | sm_error_state_record.warp_esr = | ||
1597 | sm_error_state->hww_warp_esr; | ||
1598 | sm_error_state_record.warp_esr_pc = | ||
1599 | sm_error_state->hww_warp_esr_pc; | ||
1600 | sm_error_state_record.global_esr_report_mask = | ||
1601 | sm_error_state->hww_global_esr_report_mask; | ||
1602 | sm_error_state_record.warp_esr_report_mask = | ||
1603 | sm_error_state->hww_warp_esr_report_mask; | ||
1604 | |||
1605 | if (args->record_size > 0) { | ||
1606 | size_t write_size = sizeof(*sm_error_state); | ||
1607 | |||
1608 | if (write_size > args->record_size) | ||
1609 | write_size = args->record_size; | ||
1610 | |||
1611 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1612 | err = copy_to_user((void __user *)(uintptr_t) | ||
1613 | args->record_mem, | ||
1614 | &sm_error_state_record, | ||
1615 | write_size); | ||
1616 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1617 | if (err) { | ||
1618 | nvgpu_err(g, "copy_to_user failed!"); | ||
1619 | return err; | ||
1620 | } | ||
1621 | |||
1622 | args->record_size = write_size; | ||
1623 | } | ||
1624 | |||
1625 | return 0; | ||
1626 | } | ||
1627 | |||
1578 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 1628 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
1579 | { | 1629 | { |
1580 | struct gk20a_ctrl_priv *priv = filp->private_data; | 1630 | struct gk20a_ctrl_priv *priv = filp->private_data; |
@@ -1887,6 +1937,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
1887 | (struct nvgpu_gpu_set_deterministic_opts_args *)buf); | 1937 | (struct nvgpu_gpu_set_deterministic_opts_args *)buf); |
1888 | break; | 1938 | break; |
1889 | 1939 | ||
1940 | case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
1941 | err = nvgpu_gpu_read_single_sm_error_state(g, | ||
1942 | (struct nvgpu_gpu_read_single_sm_error_state_args *)buf); | ||
1943 | break; | ||
1944 | |||
1890 | default: | 1945 | default: |
1891 | nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); | 1946 | nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); |
1892 | err = -ENOTTY; | 1947 | err = -ENOTTY; |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index b36509b0..908e5c57 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -864,6 +864,38 @@ struct nvgpu_gpu_set_deterministic_opts_args { | |||
864 | __u64 channels; /* in */ | 864 | __u64 channels; /* in */ |
865 | }; | 865 | }; |
866 | 866 | ||
867 | /* | ||
868 | * This struct helps to report the SM error state of a single SM. | ||
869 | * This acts upon the currently resident GR context. | ||
870 | * Global Error status register | ||
871 | * Warp Error status register | ||
872 | * Warp Error status register PC | ||
873 | * Global Error status register Report Mask | ||
874 | * Warp Error status register Report Mask | ||
875 | */ | ||
876 | struct nvgpu_gpu_sm_error_state_record { | ||
877 | __u32 global_esr; | ||
878 | __u32 warp_esr; | ||
879 | __u64 warp_esr_pc; | ||
880 | __u32 global_esr_report_mask; | ||
881 | __u32 warp_esr_report_mask; | ||
882 | }; | ||
883 | |||
884 | /* | ||
885 | * This struct helps to read the SM error state. | ||
886 | */ | ||
887 | struct nvgpu_gpu_read_single_sm_error_state_args { | ||
888 | /* Valid SM ID */ | ||
889 | __u32 sm_id; | ||
890 | __u32 reserved; | ||
891 | /* | ||
892 | * This is pointer to the struct nvgpu_gpu_sm_error_state_record | ||
893 | */ | ||
894 | __u64 record_mem; | ||
895 | /* size of the record size to read */ | ||
896 | __u64 record_size; | ||
897 | }; | ||
898 | |||
867 | #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ | 899 | #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ |
868 | _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) | 900 | _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) |
869 | #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ | 901 | #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ |
@@ -949,8 +981,11 @@ struct nvgpu_gpu_set_deterministic_opts_args { | |||
949 | #define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \ | 981 | #define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \ |
950 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \ | 982 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \ |
951 | struct nvgpu_gpu_set_deterministic_opts_args) | 983 | struct nvgpu_gpu_set_deterministic_opts_args) |
984 | #define NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \ | ||
985 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 41, \ | ||
986 | struct nvgpu_gpu_read_single_sm_error_state_args) | ||
952 | #define NVGPU_GPU_IOCTL_LAST \ | 987 | #define NVGPU_GPU_IOCTL_LAST \ |
953 | _IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS) | 988 | _IOC_NR(NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE) |
954 | #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ | 989 | #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ |
955 | sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) | 990 | sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) |
956 | 991 | ||