8 files changed, 162 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 95957788..d9c96417 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -525,6 +525,45 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
        return 0;
 }
+static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
+                struct dbg_session_gk20a *dbg_s,
+                struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
+{
+        struct gk20a *g = get_gk20a(dbg_s->dev);
+        struct gr_gk20a *gr = &g->gr;
+        struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state;
+        u32 sm_id;
+        int err = 0;
+        sm_id = args->sm_id;
+        if (sm_id >= gr->no_of_sm)
+                return -EINVAL;
+        sm_error_state = gr->sm_error_states + sm_id;
+        if (args->sm_error_state_record_size > 0) {
+                size_t write_size = sizeof(*sm_error_state);
+                if (write_size > args->sm_error_state_record_size)
+                        write_size = args->sm_error_state_record_size;
+                mutex_lock(&g->dbg_sessions_lock);
+                err = copy_to_user((void __user *)(uintptr_t)
+                                                args->sm_error_state_record_mem,
+                                   sm_error_state,
+                                   write_size);
+                mutex_unlock(&g->dbg_sessions_lock);
+                if (err) {
+                        gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n");
+                        return err;
+                }
+                args->sm_error_state_record_size = write_size;
+        }
+        return 0;
+}
 long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
                             unsigned long arg)
 {
@@ -622,6 +661,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
                           (struct nvgpu_dbg_gpu_timeout_args *)buf);
                break;
+        case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
+                err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
+                   (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
+                break;
        default:
                gk20a_err(dev_from_gk20a(g),
                           "unrecognized dbg gpu ioctl cmd: 0x%x",
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 61e8e641..c70217ea 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -240,6 +240,8 @@ struct gpu_ops {
                                                bool *post_event);
                void (*create_gr_sysfs)(struct device *dev);
                u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
+                int (*record_sm_error_state)(struct gk20a *g,
+                                u32 gpc, u32 tpc);
        } gr;
        const char *name;
        struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 734552a1..c0a25e68 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -58,6 +58,7 @@
 #include "semaphore_gk20a.h"
 #include "platform_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
+#include "hw_proj_gk20a.h"
 #define BLK_SIZE (256)
 #define NV_PMM_FBP_STRIDE       0x1000
@@ -3129,6 +3130,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
        memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
+        kfree(gr->sm_error_states);
        kfree(gr->gpc_tpc_count);
        kfree(gr->gpc_zcb_count);
        kfree(gr->gpc_ppc_count);
@@ -4426,6 +4428,19 @@ restore_fe_go_idle:
        if (err)
                goto out;
+        kfree(gr->sm_error_states);
+        /* we need to allocate this after g->ops.gr.init_fs_state() since
+         * we initialize gr->no_of_sm in this function
+         */
+        gr->sm_error_states = kzalloc(
+                        sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)
+                        * gr->no_of_sm, GFP_KERNEL);
+        if (!gr->sm_error_states) {
+                err = -ENOMEM;
+                goto restore_fe_go_idle;
+        }
 out:
        gk20a_dbg_fn("done");
        return 0;
@@ -5494,6 +5509,32 @@ u32 gk20a_mask_hww_warp_esr(u32 hww_warp_esr)
        return hww_warp_esr;
 }
+static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
+{
+        int sm_id;
+        struct gr_gk20a *gr = &g->gr;
+        u32 offset = proj_gpc_stride_v() * gpc +
+                     proj_tpc_in_gpc_stride_v() * tpc;
+        mutex_lock(&g->dbg_sessions_lock);
+        sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_cfg_r() + offset));
+        gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+        gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+        gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
+                       gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset);
+        gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
+                        gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
+        mutex_unlock(&g->dbg_sessions_lock);
+        return 0;
+}
 int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
                bool *post_event, struct channel_gk20a *fault_ch)
 {
@@ -5554,6 +5595,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
                  "sm hww global %08x warp %08x", global_esr, warp_esr);
+        gr_gk20a_elpg_protected_call(g,
+                g->ops.gr.record_sm_error_state(g, gpc, tpc));
        if (g->ops.gr.pre_process_sm_exception) {
                ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc,
                                global_esr, warp_esr,
@@ -8370,4 +8414,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
        gops->gr.get_lrf_tex_ltc_dram_override = NULL;
        gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
        gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
+        gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index c82cf75c..22ff1351 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -329,6 +329,7 @@ struct gr_gk20a {
        u32 fbp_en_mask;
        u32 no_of_sm;
        struct sm_info *sm_to_cluster;
+        struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
        struct mutex                    cs_lock;
        struct gk20a_cs_snapshot        *cs_data;
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 48aa1524..ab2a975b 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -2122,6 +2122,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
        return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
+{
+        return (r >> 0) & 0xffff;
+}
 static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
 {
        return 0x0050469c;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index b49f2301..eeb70d76 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -31,6 +31,7 @@
 #include "hw_fuse_gm20b.h"
 #include "pmu_gm20b.h"
 #include "acr_gm20b.h"
+#include "hw_proj_gm20b.h"
 static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
 {
@@ -1190,6 +1191,34 @@ static void gr_gm20b_get_access_map(struct gk20a *g,
        *num_entries = ARRAY_SIZE(wl_addr_gm20b);
 }
+static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
+{
+        int sm_id;
+        struct gr_gk20a *gr = &g->gr;
+        u32 offset = proj_gpc_stride_v() * gpc +
+                     proj_tpc_in_gpc_stride_v() * tpc;
+        mutex_lock(&g->dbg_sessions_lock);
+        sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_cfg_r() + offset));
+        gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+        gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+        gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);
+        gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
+                       gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset);
+        gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
+                        gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
+        mutex_unlock(&g->dbg_sessions_lock);
+        return 0;
+}
 void gm20b_init_gr(struct gpu_ops *gops)
 {
        gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1256,4 +1285,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
        gops->gr.get_lrf_tex_ltc_dram_override = NULL;
        gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
        gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
+        gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index dbe54860..b796e2d3 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -2130,6 +2130,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
        return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
+{
+        return (r >> 0) & 0xffff;
+}
 static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
 {
        return 0x0050469c;
@@ -3270,6 +3274,10 @@ static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
 {
        return 0x0;
 }
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r(void)
+{
+        return 0x00504654;
+}
 static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
 {
        return 0x00504770;
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 16d60261..96619015 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -676,8 +676,35 @@ struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args {
 #define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \
        _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args)
+struct nvgpu_dbg_gpu_sm_error_state_record {
+        __u32 hww_global_esr;
+        __u32 hww_warp_esr;
+        __u64 hww_warp_esr_pc;
+        __u32 hww_global_esr_report_mask;
+        __u32 hww_warp_esr_report_mask;
+        /*
+         * Notes
+         * - This struct can be safely appended with new fields. However, always
+         *   keep the structure size multiple of 8 and make sure that the binary
+         *   layout does not change between 32-bit and 64-bit architectures.
+         */
+};
+struct nvgpu_dbg_gpu_read_single_sm_error_state_args {
+        __u32 sm_id;
+        __u32 padding;
+        __u64 sm_error_state_record_mem;
+        __u64 sm_error_state_record_size;
+};
+#define NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE                  \
+        _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 14, struct nvgpu_dbg_gpu_read_single_sm_error_state_args)
 #define NVGPU_DBG_GPU_IOCTL_LAST                \
-        _IOC_NR(NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE)
+        _IOC_NR(NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE)
 #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE                \
        sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)

diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 95957788..d9c96417 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -525,6 +525,45 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
525	return 0;	525	return 0;
526	}	526	}
527		527
		528	static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
		529	struct dbg_session_gk20a *dbg_s,
		530	struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
		531	{
		532	struct gk20a *g = get_gk20a(dbg_s->dev);
		533	struct gr_gk20a *gr = &g->gr;
		534	struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state;
		535	u32 sm_id;
		536	int err = 0;
		537
		538	sm_id = args->sm_id;
		539	if (sm_id >= gr->no_of_sm)
		540	return -EINVAL;
		541
		542	sm_error_state = gr->sm_error_states + sm_id;
		543
		544	if (args->sm_error_state_record_size > 0) {
		545	size_t write_size = sizeof(*sm_error_state);
		546
		547	if (write_size > args->sm_error_state_record_size)
		548	write_size = args->sm_error_state_record_size;
		549
		550	mutex_lock(&g->dbg_sessions_lock);
		551	err = copy_to_user((void __user *)(uintptr_t)
		552	args->sm_error_state_record_mem,
		553	sm_error_state,
		554	write_size);
		555	mutex_unlock(&g->dbg_sessions_lock);
		556	if (err) {
		557	gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n");
		558	return err;
		559	}
		560
		561	args->sm_error_state_record_size = write_size;
		562	}
		563
		564	return 0;
		565	}
		566
528	long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,	567	long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
529	unsigned long arg)	568	unsigned long arg)
530	{	569	{
@@ -622,6 +661,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
622	(struct nvgpu_dbg_gpu_timeout_args *)buf);	661	(struct nvgpu_dbg_gpu_timeout_args *)buf);
623	break;	662	break;
624		663
		664	case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
		665	err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
		666	(struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
		667	break;
		668
625	default:	669	default:
626	gk20a_err(dev_from_gk20a(g),	670	gk20a_err(dev_from_gk20a(g),
627	"unrecognized dbg gpu ioctl cmd: 0x%x",	671	"unrecognized dbg gpu ioctl cmd: 0x%x",


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 61e8e641..c70217ea 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -240,6 +240,8 @@ struct gpu_ops {
240	bool *post_event);	240	bool *post_event);
241	void (create_gr_sysfs)(struct device dev);	241	void (create_gr_sysfs)(struct device dev);
242	u32 (get_lrf_tex_ltc_dram_override)(struct gk20a g);	242	u32 (get_lrf_tex_ltc_dram_override)(struct gk20a g);
		243	int (record_sm_error_state)(struct gk20a g,
		244	u32 gpc, u32 tpc);
243	} gr;	245	} gr;
244	const char *name;	246	const char *name;
245	struct {	247	struct {


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 734552a1..c0a25e68 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -58,6 +58,7 @@
58	#include "semaphore_gk20a.h"	58	#include "semaphore_gk20a.h"
59	#include "platform_gk20a.h"	59	#include "platform_gk20a.h"
60	#include "ctxsw_trace_gk20a.h"	60	#include "ctxsw_trace_gk20a.h"
		61	#include "hw_proj_gk20a.h"
61		62
62	#define BLK_SIZE (256)	63	#define BLK_SIZE (256)
63	#define NV_PMM_FBP_STRIDE 0x1000	64	#define NV_PMM_FBP_STRIDE 0x1000
@@ -3129,6 +3130,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3129		3130
3130	memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));	3131	memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
3131		3132
		3133	kfree(gr->sm_error_states);
3132	kfree(gr->gpc_tpc_count);	3134	kfree(gr->gpc_tpc_count);
3133	kfree(gr->gpc_zcb_count);	3135	kfree(gr->gpc_zcb_count);
3134	kfree(gr->gpc_ppc_count);	3136	kfree(gr->gpc_ppc_count);
@@ -4426,6 +4428,19 @@ restore_fe_go_idle:
4426	if (err)	4428	if (err)
4427	goto out;	4429	goto out;
4428		4430
		4431	kfree(gr->sm_error_states);
		4432
		4433	/* we need to allocate this after g->ops.gr.init_fs_state() since
		4434	* we initialize gr->no_of_sm in this function
		4435	*/
		4436	gr->sm_error_states = kzalloc(
		4437	sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)
		4438	* gr->no_of_sm, GFP_KERNEL);
		4439	if (!gr->sm_error_states) {
		4440	err = -ENOMEM;
		4441	goto restore_fe_go_idle;
		4442	}
		4443
4429	out:	4444	out:
4430	gk20a_dbg_fn("done");	4445	gk20a_dbg_fn("done");
4431	return 0;	4446	return 0;
@@ -5494,6 +5509,32 @@ u32 gk20a_mask_hww_warp_esr(u32 hww_warp_esr)
5494	return hww_warp_esr;	5509	return hww_warp_esr;
5495	}	5510	}
5496		5511
		5512	static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
		5513	{
		5514	int sm_id;
		5515	struct gr_gk20a *gr = &g->gr;
		5516	u32 offset = proj_gpc_stride_v() * gpc +
		5517	proj_tpc_in_gpc_stride_v() * tpc;
		5518
		5519	mutex_lock(&g->dbg_sessions_lock);
		5520
		5521	sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
		5522	gr_gpc0_tpc0_sm_cfg_r() + offset));
		5523
		5524	gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
		5525	gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
		5526	gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
		5527	gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
		5528	gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
		5529	gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset);
		5530	gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
		5531	gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
		5532
		5533	mutex_unlock(&g->dbg_sessions_lock);
		5534
		5535	return 0;
		5536	}
		5537
5497	int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,	5538	int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
5498	bool post_event, struct channel_gk20a fault_ch)	5539	bool post_event, struct channel_gk20a fault_ch)
5499	{	5540	{
@@ -5554,6 +5595,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
5554	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,	5595	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,
5555	"sm hww global %08x warp %08x", global_esr, warp_esr);	5596	"sm hww global %08x warp %08x", global_esr, warp_esr);
5556		5597
		5598	gr_gk20a_elpg_protected_call(g,
		5599	g->ops.gr.record_sm_error_state(g, gpc, tpc));
		5600
5557	if (g->ops.gr.pre_process_sm_exception) {	5601	if (g->ops.gr.pre_process_sm_exception) {
5558	ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc,	5602	ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc,
5559	global_esr, warp_esr,	5603	global_esr, warp_esr,
@@ -8370,4 +8414,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
8370	gops->gr.get_lrf_tex_ltc_dram_override = NULL;	8414	gops->gr.get_lrf_tex_ltc_dram_override = NULL;
8371	gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;	8415	gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
8372	gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;	8416	gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
		8417	gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state;
8373	}	8418	}


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index c82cf75c..22ff1351 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -329,6 +329,7 @@ struct gr_gk20a {
329	u32 fbp_en_mask;	329	u32 fbp_en_mask;
330	u32 no_of_sm;	330	u32 no_of_sm;
331	struct sm_info *sm_to_cluster;	331	struct sm_info *sm_to_cluster;
		332	struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
332	#if defined(CONFIG_GK20A_CYCLE_STATS)	333	#if defined(CONFIG_GK20A_CYCLE_STATS)
333	struct mutex cs_lock;	334	struct mutex cs_lock;
334	struct gk20a_cs_snapshot *cs_data;	335	struct gk20a_cs_snapshot *cs_data;


diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h index 48aa1524..ab2a975b 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -2122,6 +2122,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
2122	{	2122	{
2123	return (v & 0xffff) << 0;	2123	return (v & 0xffff) << 0;
2124	}	2124	}
		2125	static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
		2126	{
		2127	return (r >> 0) & 0xffff;
		2128	}
2125	static inline u32 gr_gpc0_tpc0_sm_arch_r(void)	2129	static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
2126	{	2130	{
2127	return 0x0050469c;	2131	return 0x0050469c;


diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index b49f2301..eeb70d76 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -31,6 +31,7 @@
31	#include "hw_fuse_gm20b.h"	31	#include "hw_fuse_gm20b.h"
32	#include "pmu_gm20b.h"	32	#include "pmu_gm20b.h"
33	#include "acr_gm20b.h"	33	#include "acr_gm20b.h"
		34	#include "hw_proj_gm20b.h"
34		35
35	static void gr_gm20b_init_gpc_mmu(struct gk20a *g)	36	static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
36	{	37	{
@@ -1190,6 +1191,34 @@ static void gr_gm20b_get_access_map(struct gk20a *g,
1190	*num_entries = ARRAY_SIZE(wl_addr_gm20b);	1191	*num_entries = ARRAY_SIZE(wl_addr_gm20b);
1191	}	1192	}
1192		1193
		1194	static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
		1195	{
		1196	int sm_id;
		1197	struct gr_gk20a *gr = &g->gr;
		1198	u32 offset = proj_gpc_stride_v() * gpc +
		1199	proj_tpc_in_gpc_stride_v() * tpc;
		1200
		1201	mutex_lock(&g->dbg_sessions_lock);
		1202
		1203	sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
		1204	gr_gpc0_tpc0_sm_cfg_r() + offset));
		1205
		1206	gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
		1207	gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
		1208	gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
		1209	gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
		1210	gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
		1211	gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);
		1212	gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
		1213	gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset);
		1214	gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
		1215	gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
		1216
		1217	mutex_unlock(&g->dbg_sessions_lock);
		1218
		1219	return 0;
		1220	}
		1221
1193	void gm20b_init_gr(struct gpu_ops *gops)	1222	void gm20b_init_gr(struct gpu_ops *gops)
1194	{	1223	{
1195	gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;	1224	gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1256,4 +1285,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
1256	gops->gr.get_lrf_tex_ltc_dram_override = NULL;	1285	gops->gr.get_lrf_tex_ltc_dram_override = NULL;
1257	gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;	1286	gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
1258	gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;	1287	gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
		1288	gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state;
1259	}	1289	}


diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h index dbe54860..b796e2d3 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -2130,6 +2130,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
2130	{	2130	{
2131	return (v & 0xffff) << 0;	2131	return (v & 0xffff) << 0;
2132	}	2132	}
		2133	static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
		2134	{
		2135	return (r >> 0) & 0xffff;
		2136	}
2133	static inline u32 gr_gpc0_tpc0_sm_arch_r(void)	2137	static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
2134	{	2138	{
2135	return 0x0050469c;	2139	return 0x0050469c;
@@ -3270,6 +3274,10 @@ static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
3270	{	3274	{
3271	return 0x0;	3275	return 0x0;
3272	}	3276	}
		3277	static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r(void)
		3278	{
		3279	return 0x00504654;
		3280	}
3273	static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)	3281	static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
3274	{	3282	{
3275	return 0x00504770;	3283	return 0x00504770;


diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 16d60261..96619015 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h
@@ -676,8 +676,35 @@ struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args {
676	#define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \	676	#define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \
677	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args)	677	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args)
678		678
		679
		680	struct nvgpu_dbg_gpu_sm_error_state_record {
		681	__u32 hww_global_esr;
		682	__u32 hww_warp_esr;
		683	__u64 hww_warp_esr_pc;
		684	__u32 hww_global_esr_report_mask;
		685	__u32 hww_warp_esr_report_mask;
		686
		687	/*
		688	* Notes
		689	* - This struct can be safely appended with new fields. However, always
		690	* keep the structure size multiple of 8 and make sure that the binary
		691	* layout does not change between 32-bit and 64-bit architectures.
		692	*/
		693	};
		694
		695	struct nvgpu_dbg_gpu_read_single_sm_error_state_args {
		696	__u32 sm_id;
		697	__u32 padding;
		698	__u64 sm_error_state_record_mem;
		699	__u64 sm_error_state_record_size;
		700	};
		701
		702	#define NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \
		703	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 14, struct nvgpu_dbg_gpu_read_single_sm_error_state_args)
		704
		705
679	#define NVGPU_DBG_GPU_IOCTL_LAST \	706	#define NVGPU_DBG_GPU_IOCTL_LAST \
680	_IOC_NR(NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE)	707	_IOC_NR(NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE)
681		708
682	#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \	709	#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
683	sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)	710	sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)