From 04e45bc943e9703c26f229dfbe558d94418acbe1 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Wed, 9 Mar 2016 14:51:43 +0530
Subject: gpu: nvgpu: support storing/reading single SM error state

Add support to store error state of single SM before
preprocessing SM exception

Error state is stored as :
struct nvgpu_dbg_gpu_sm_error_state_record {
u32 hww_global_esr;
u32 hww_warp_esr;
u64 hww_warp_esr_pc;
u32 hww_global_esr_report_mask;
u32 hww_warp_esr_report_mask;
}

Note that we can safely append new fields to above
structure in the future if required

Also, add IOCTL NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE
to support reading SM's error state by user space

Bug 200156699

Change-Id: I9a62cb01e8a35c720b52d5d202986347706c7308
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1120329
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 44 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/gk20a.h         |  2 ++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c      | 45 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h      |  1 +
 drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h   |  4 +++
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c      | 30 ++++++++++++++++++++++
 drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h   |  8 ++++++
 include/uapi/linux/nvgpu.h              | 29 ++++++++++++++++++++-
 8 files changed, 162 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 95957788..d9c96417 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -525,6 +525,45 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
 	return 0;
 }
 
+static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
+{
+	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gr_gk20a *gr = &g->gr;
+	struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state;
+	u32 sm_id;
+	int err = 0;
+
+	sm_id = args->sm_id;
+	if (sm_id >= gr->no_of_sm)
+		return -EINVAL;
+
+	sm_error_state = gr->sm_error_states + sm_id;
+
+	if (args->sm_error_state_record_size > 0) {
+		size_t write_size = sizeof(*sm_error_state);
+
+		if (write_size > args->sm_error_state_record_size)
+			write_size = args->sm_error_state_record_size;
+
+		mutex_lock(&g->dbg_sessions_lock);
+		err = copy_to_user((void __user *)(uintptr_t)
+						args->sm_error_state_record_mem,
+				   sm_error_state,
+				   write_size);
+		mutex_unlock(&g->dbg_sessions_lock);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n");
+			return err;
+		}
+
+		args->sm_error_state_record_size = write_size;
+	}
+
+	return 0;
+}
+
 long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -622,6 +661,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			   (struct nvgpu_dbg_gpu_timeout_args *)buf);
 		break;
 
+	case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
+		err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
+		   (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
+		break;
+
 	default:
 		gk20a_err(dev_from_gk20a(g),
 			   "unrecognized dbg gpu ioctl cmd: 0x%x",
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 61e8e641..c70217ea 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -240,6 +240,8 @@ struct gpu_ops {
 						bool *post_event);
 		void (*create_gr_sysfs)(struct device *dev);
 		u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
+		int (*record_sm_error_state)(struct gk20a *g,
+				u32 gpc, u32 tpc);
 	} gr;
 	const char *name;
 	struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 734552a1..c0a25e68 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -58,6 +58,7 @@
 #include "semaphore_gk20a.h"
 #include "platform_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
+#include "hw_proj_gk20a.h"
 
 #define BLK_SIZE (256)
 #define NV_PMM_FBP_STRIDE	0x1000
@@ -3129,6 +3130,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
 
 	memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
 
+	kfree(gr->sm_error_states);
 	kfree(gr->gpc_tpc_count);
 	kfree(gr->gpc_zcb_count);
 	kfree(gr->gpc_ppc_count);
@@ -4426,6 +4428,19 @@ restore_fe_go_idle:
 	if (err)
 		goto out;
 
+	kfree(gr->sm_error_states);
+
+	/* we need to allocate this after g->ops.gr.init_fs_state() since
+	 * we initialize gr->no_of_sm in this function
+	 */
+	gr->sm_error_states = kzalloc(
+			sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)
+			* gr->no_of_sm, GFP_KERNEL);
+	if (!gr->sm_error_states) {
+		err = -ENOMEM;
+		goto restore_fe_go_idle;
+	}
+
 out:
 	gk20a_dbg_fn("done");
 	return 0;
@@ -5494,6 +5509,32 @@ u32 gk20a_mask_hww_warp_esr(u32 hww_warp_esr)
 	return hww_warp_esr;
 }
 
+static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
+{
+	int sm_id;
+	struct gr_gk20a *gr = &g->gr;
+	u32 offset = proj_gpc_stride_v() * gpc +
+		     proj_tpc_in_gpc_stride_v() * tpc;
+
+	mutex_lock(&g->dbg_sessions_lock);
+
+	sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
+			gr_gpc0_tpc0_sm_cfg_r() + offset));
+
+	gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
+			gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+	gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
+			gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+	gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
+		       gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset);
+	gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
+			gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
+
+	mutex_unlock(&g->dbg_sessions_lock);
+
+	return 0;
+}
+
 int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 		bool *post_event, struct channel_gk20a *fault_ch)
 {
@@ -5554,6 +5595,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
 		  "sm hww global %08x warp %08x", global_esr, warp_esr);
 
+	gr_gk20a_elpg_protected_call(g,
+		g->ops.gr.record_sm_error_state(g, gpc, tpc));
+
 	if (g->ops.gr.pre_process_sm_exception) {
 		ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc,
 				global_esr, warp_esr,
@@ -8370,4 +8414,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.get_lrf_tex_ltc_dram_override = NULL;
 	gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
 	gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
+	gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index c82cf75c..22ff1351 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -329,6 +329,7 @@ struct gr_gk20a {
 	u32 fbp_en_mask;
 	u32 no_of_sm;
 	struct sm_info *sm_to_cluster;
+	struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	struct mutex			cs_lock;
 	struct gk20a_cs_snapshot	*cs_data;
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 48aa1524..ab2a975b 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -2122,6 +2122,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
 static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
 {
 	return 0x0050469c;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index b49f2301..eeb70d76 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -31,6 +31,7 @@
 #include "hw_fuse_gm20b.h"
 #include "pmu_gm20b.h"
 #include "acr_gm20b.h"
+#include "hw_proj_gm20b.h"
 
 static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
 {
@@ -1190,6 +1191,34 @@ static void gr_gm20b_get_access_map(struct gk20a *g,
 	*num_entries = ARRAY_SIZE(wl_addr_gm20b);
 }
 
+static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
+{
+	int sm_id;
+	struct gr_gk20a *gr = &g->gr;
+	u32 offset = proj_gpc_stride_v() * gpc +
+		     proj_tpc_in_gpc_stride_v() * tpc;
+
+	mutex_lock(&g->dbg_sessions_lock);
+
+	sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
+			gr_gpc0_tpc0_sm_cfg_r() + offset));
+
+	gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
+			gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+	gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
+			gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+	gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
+			gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);
+	gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
+		       gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset);
+	gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
+			gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
+
+	mutex_unlock(&g->dbg_sessions_lock);
+
+	return 0;
+}
+
 void gm20b_init_gr(struct gpu_ops *gops)
 {
 	gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1256,4 +1285,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_lrf_tex_ltc_dram_override = NULL;
 	gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
 	gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
+	gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index dbe54860..b796e2d3 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -2130,6 +2130,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
 static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
 {
 	return 0x0050469c;
@@ -3270,6 +3274,10 @@ static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r(void)
+{
+	return 0x00504654;
+}
 static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
 {
 	return 0x00504770;
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 16d60261..96619015 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -676,8 +676,35 @@ struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args {
 #define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args)
 
+
+struct nvgpu_dbg_gpu_sm_error_state_record {
+	__u32 hww_global_esr;
+	__u32 hww_warp_esr;
+	__u64 hww_warp_esr_pc;
+	__u32 hww_global_esr_report_mask;
+	__u32 hww_warp_esr_report_mask;
+
+	/*
+	 * Notes
+	 * - This struct can be safely appended with new fields. However, always
+	 *   keep the structure size multiple of 8 and make sure that the binary
+	 *   layout does not change between 32-bit and 64-bit architectures.
+	 */
+};
+
+struct nvgpu_dbg_gpu_read_single_sm_error_state_args {
+	__u32 sm_id;
+	__u32 padding;
+	__u64 sm_error_state_record_mem;
+	__u64 sm_error_state_record_size;
+};
+
+#define NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE			\
+	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 14, struct nvgpu_dbg_gpu_read_single_sm_error_state_args)
+
+
 #define NVGPU_DBG_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE)
+	_IOC_NR(NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE)
 
 #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE		\
 	sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)
-- 
cgit v1.2.2