gpu: nvgpu: Read sm error ioctl support for tsg

Add READ_SM_ERROR IOCTL support to TSG level. Moved the struct to save the sm_error details from gr to tsg as the sm_error support is context based, not global. Also corrected MISRA 21.1 error in header file. nvgpu_dbg_gpu_ioctl_write_single_sm_error_state and nvgpu_dbg_gpu_ioctl_read_single_sm_error_state functions are modified to use the tsg struct nvgpu_tsg_sm_error_state. Bug 200412642 Change-Id: I9e334b059078a4bb0e360b945444cc4bf1cc56ec Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1794856 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Vinod G <vinodg@nvidia.com> 2018-08-08 02:09:30 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-08-25 05:10:43 -0400
commit: bfe65407bde2b5d0776724301e215c6553c989f3 (patch)
tree: f68a01361052afe1c30a0c6dcd5d359b762e647a /drivers/gpu/nvgpu/gm20b
parent: 3bd47da0954d3486d9ccd3c396f84445918f82b4 (diff)
2 files changed, 66 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 68ae91e8..fc4ab3dd 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1268,32 +1268,68 @@ void gr_gm20b_get_access_map(struct gk20a *g,
        *num_entries = ARRAY_SIZE(wl_addr_gm20b);
 }
+static void gm20b_gr_read_sm_error_state(struct gk20a *g,
+                        u32 offset,
+                        struct nvgpu_tsg_sm_error_state *sm_error_states)
+{
+        sm_error_states->hww_global_esr = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+        sm_error_states->hww_warp_esr = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+        sm_error_states->hww_warp_esr_pc = (u64)(gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset));
+        sm_error_states->hww_global_esr_report_mask = gk20a_readl(g,
+                       gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset);
+        sm_error_states->hww_warp_esr_report_mask = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
+}
+static void gm20b_gr_write_sm_error_state(struct gk20a *g,
+                        u32 offset,
+                        struct nvgpu_tsg_sm_error_state *sm_error_states)
+{
+        gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+                                sm_error_states->hww_global_esr);
+        gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
+                                sm_error_states->hww_warp_esr);
+        gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
+                                u64_lo32(sm_error_states->hww_warp_esr_pc));
+        gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
+                                sm_error_states->hww_global_esr_report_mask);
+        gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
+                                sm_error_states->hww_warp_esr_report_mask);
+}
 int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
                                struct channel_gk20a *fault_ch)
 {
        int sm_id;
-        struct gr_gk20a *gr = &g->gr;
        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
        u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+        struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
+        struct tsg_gk20a *tsg = NULL;
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
                        gr_gpc0_tpc0_sm_cfg_r() + offset));
-        gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
+        if (fault_ch != NULL) {
-                        gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+                tsg = tsg_gk20a_from_ch(fault_ch);
-        gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
+        }
-                        gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
-        gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
+        if (tsg == NULL) {
-                        gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);
+                nvgpu_err(g, "no valid tsg");
-        gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
+                goto record_fail;
-                       gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset);
+        }
-        gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
-                        gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
+        sm_error_states = tsg->sm_error_states + sm_id;
+        gm20b_gr_read_sm_error_state(g, offset, sm_error_states);
+record_fail:
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return sm_id;
@@ -1301,12 +1337,12 @@ int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 int gm20b_gr_update_sm_error_state(struct gk20a *g,
                struct channel_gk20a *ch, u32 sm_id,
-                struct nvgpu_gr_sm_error_state *sm_error_state)
+                struct nvgpu_tsg_sm_error_state *sm_error_state)
 {
        u32 gpc, tpc, offset;
-        struct gr_gk20a *gr = &g->gr;
        struct tsg_gk20a *tsg;
        struct nvgpu_gr_ctx *ch_ctx;
+        struct nvgpu_tsg_sm_error_state *tsg_sm_error_states;
        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
@@ -1320,16 +1356,8 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-        gr->sm_error_states[sm_id].hww_global_esr =
+        tsg_sm_error_states = tsg->sm_error_states + sm_id;
-                        sm_error_state->hww_global_esr;
+        gk20a_tsg_update_sm_error_state_locked(tsg, sm_id, sm_error_state);
-        gr->sm_error_states[sm_id].hww_warp_esr =
-                        sm_error_state->hww_warp_esr;
-        gr->sm_error_states[sm_id].hww_warp_esr_pc =
-                        sm_error_state->hww_warp_esr_pc;
-        gr->sm_error_states[sm_id].hww_global_esr_report_mask =
-                        sm_error_state->hww_global_esr_report_mask;
-        gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
-                        sm_error_state->hww_warp_esr_report_mask;
        err = gr_gk20a_disable_ctxsw(g);
        if (err) {
@@ -1343,29 +1371,20 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
        offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
        if (gk20a_is_channel_ctx_resident(ch)) {
-                gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+                gm20b_gr_write_sm_error_state(g, offset, tsg_sm_error_states);
-                                gr->sm_error_states[sm_id].hww_global_esr);
-                gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
-                                gr->sm_error_states[sm_id].hww_warp_esr);
-                gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
-                                gr->sm_error_states[sm_id].hww_warp_esr_pc);
-                gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
-                                gr->sm_error_states[sm_id].hww_global_esr_report_mask);
-                gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
-                                gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
        } else {
                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
                if (err)
                        goto enable_ctxsw;
                gr_gk20a_ctx_patch_write(g, ch_ctx,
-                                gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
+                        gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
-                                gr->sm_error_states[sm_id].hww_global_esr_report_mask,
+                        tsg_sm_error_states->hww_global_esr_report_mask,
-                                true);
+                        true);
                gr_gk20a_ctx_patch_write(g, ch_ctx,
-                                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
+                        gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
-                                gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
+                        tsg_sm_error_states->hww_warp_esr_report_mask,
-                                true);
+                        true);
                gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
        }
@@ -1383,15 +1402,20 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g,
 {
        u32 gpc, tpc, offset;
        u32 val;
-        struct gr_gk20a *gr = &g->gr;
+        struct tsg_gk20a *tsg;
        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
        int err = 0;
+        tsg = tsg_gk20a_from_ch(ch);
+        if (tsg == NULL) {
+                return -EINVAL;
+        }
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-        memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
+        memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states));
        err = gr_gk20a_disable_ctxsw(g);
        if (err) {
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
index 9d8e5cdf..7c3baa59 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -119,7 +119,7 @@ int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc,
                u32 tpc, u32 sm, struct channel_gk20a *fault_ch);
 int gm20b_gr_update_sm_error_state(struct gk20a *g,
                struct channel_gk20a *ch, u32 sm_id,
-                struct nvgpu_gr_sm_error_state *sm_error_state);
+                struct nvgpu_tsg_sm_error_state *sm_error_state);
 int gm20b_gr_clear_sm_error_state(struct gk20a *g,
                struct channel_gk20a *ch, u32 sm_id);
 int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
author	Vinod G <vinodg@nvidia.com>	2018-08-08 02:09:30 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-08-25 05:10:43 -0400
commit	bfe65407bde2b5d0776724301e215c6553c989f3 (patch)
tree	f68a01361052afe1c30a0c6dcd5d359b762e647a /drivers/gpu/nvgpu/gm20b
parent	3bd47da0954d3486d9ccd3c396f84445918f82b4 (diff)

diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 68ae91e8..fc4ab3dd 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1268,32 +1268,68 @@ void gr_gm20b_get_access_map(struct gk20a *g,
1268	*num_entries = ARRAY_SIZE(wl_addr_gm20b);	1268	*num_entries = ARRAY_SIZE(wl_addr_gm20b);
1269	}	1269	}
1270		1270
		1271	static void gm20b_gr_read_sm_error_state(struct gk20a *g,
		1272	u32 offset,
		1273	struct nvgpu_tsg_sm_error_state *sm_error_states)
		1274	{
		1275	sm_error_states->hww_global_esr = gk20a_readl(g,
		1276	gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
		1277	sm_error_states->hww_warp_esr = gk20a_readl(g,
		1278	gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
		1279	sm_error_states->hww_warp_esr_pc = (u64)(gk20a_readl(g,
		1280	gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset));
		1281	sm_error_states->hww_global_esr_report_mask = gk20a_readl(g,
		1282	gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset);
		1283	sm_error_states->hww_warp_esr_report_mask = gk20a_readl(g,
		1284	gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
		1285
		1286	}
		1287
		1288	static void gm20b_gr_write_sm_error_state(struct gk20a *g,
		1289	u32 offset,
		1290	struct nvgpu_tsg_sm_error_state *sm_error_states)
		1291	{
		1292	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
		1293	sm_error_states->hww_global_esr);
		1294	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
		1295	sm_error_states->hww_warp_esr);
		1296	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
		1297	u64_lo32(sm_error_states->hww_warp_esr_pc));
		1298	gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
		1299	sm_error_states->hww_global_esr_report_mask);
		1300	gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
		1301	sm_error_states->hww_warp_esr_report_mask);
		1302	}
		1303
1271	int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,	1304	int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
1272	struct channel_gk20a *fault_ch)	1305	struct channel_gk20a *fault_ch)
1273	{	1306	{
1274	int sm_id;	1307	int sm_id;
1275	struct gr_gk20a *gr = &g->gr;
1276	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);	1308	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1277	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,	1309	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1278	GPU_LIT_TPC_IN_GPC_STRIDE);	1310	GPU_LIT_TPC_IN_GPC_STRIDE);
1279	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;	1311	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
		1312	struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
		1313	struct tsg_gk20a *tsg = NULL;
1280		1314
1281	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	1315	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1282		1316
1283	sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,	1317	sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
1284	gr_gpc0_tpc0_sm_cfg_r() + offset));	1318	gr_gpc0_tpc0_sm_cfg_r() + offset));
1285		1319
1286	gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,	1320	if (fault_ch != NULL) {
1287	gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);	1321	tsg = tsg_gk20a_from_ch(fault_ch);
1288	gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,	1322	}
1289	gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);	1323
1290	gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,	1324	if (tsg == NULL) {
1291	gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);	1325	nvgpu_err(g, "no valid tsg");
1292	gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,	1326	goto record_fail;
1293	gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset);	1327	}
1294	gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,	1328
1295	gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);	1329	sm_error_states = tsg->sm_error_states + sm_id;
		1330	gm20b_gr_read_sm_error_state(g, offset, sm_error_states);
1296		1331
		1332	record_fail:
1297	nvgpu_mutex_release(&g->dbg_sessions_lock);	1333	nvgpu_mutex_release(&g->dbg_sessions_lock);
1298		1334
1299	return sm_id;	1335	return sm_id;
@@ -1301,12 +1337,12 @@ int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
1301		1337
1302	int gm20b_gr_update_sm_error_state(struct gk20a *g,	1338	int gm20b_gr_update_sm_error_state(struct gk20a *g,
1303	struct channel_gk20a *ch, u32 sm_id,	1339	struct channel_gk20a *ch, u32 sm_id,
1304	struct nvgpu_gr_sm_error_state *sm_error_state)	1340	struct nvgpu_tsg_sm_error_state *sm_error_state)
1305	{	1341	{
1306	u32 gpc, tpc, offset;	1342	u32 gpc, tpc, offset;
1307	struct gr_gk20a *gr = &g->gr;
1308	struct tsg_gk20a *tsg;	1343	struct tsg_gk20a *tsg;
1309	struct nvgpu_gr_ctx *ch_ctx;	1344	struct nvgpu_gr_ctx *ch_ctx;
		1345	struct nvgpu_tsg_sm_error_state *tsg_sm_error_states;
1310	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);	1346	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1311	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,	1347	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1312	GPU_LIT_TPC_IN_GPC_STRIDE);	1348	GPU_LIT_TPC_IN_GPC_STRIDE);
@@ -1320,16 +1356,8 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
1320		1356
1321	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	1357	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1322		1358
1323	gr->sm_error_states[sm_id].hww_global_esr =	1359	tsg_sm_error_states = tsg->sm_error_states + sm_id;
1324	sm_error_state->hww_global_esr;	1360	gk20a_tsg_update_sm_error_state_locked(tsg, sm_id, sm_error_state);
1325	gr->sm_error_states[sm_id].hww_warp_esr =
1326	sm_error_state->hww_warp_esr;
1327	gr->sm_error_states[sm_id].hww_warp_esr_pc =
1328	sm_error_state->hww_warp_esr_pc;
1329	gr->sm_error_states[sm_id].hww_global_esr_report_mask =
1330	sm_error_state->hww_global_esr_report_mask;
1331	gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
1332	sm_error_state->hww_warp_esr_report_mask;
1333		1361
1334	err = gr_gk20a_disable_ctxsw(g);	1362	err = gr_gk20a_disable_ctxsw(g);
1335	if (err) {	1363	if (err) {
@@ -1343,29 +1371,20 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
1343	offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;	1371	offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
1344		1372
1345	if (gk20a_is_channel_ctx_resident(ch)) {	1373	if (gk20a_is_channel_ctx_resident(ch)) {
1346	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,	1374	gm20b_gr_write_sm_error_state(g, offset, tsg_sm_error_states);
1347	gr->sm_error_states[sm_id].hww_global_esr);
1348	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
1349	gr->sm_error_states[sm_id].hww_warp_esr);
1350	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
1351	gr->sm_error_states[sm_id].hww_warp_esr_pc);
1352	gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
1353	gr->sm_error_states[sm_id].hww_global_esr_report_mask);
1354	gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
1355	gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
1356	} else {	1375	} else {
1357	err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);	1376	err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
1358	if (err)	1377	if (err)
1359	goto enable_ctxsw;	1378	goto enable_ctxsw;
1360		1379
1361	gr_gk20a_ctx_patch_write(g, ch_ctx,	1380	gr_gk20a_ctx_patch_write(g, ch_ctx,
1362	gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,	1381	gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
1363	gr->sm_error_states[sm_id].hww_global_esr_report_mask,	1382	tsg_sm_error_states->hww_global_esr_report_mask,
1364	true);	1383	true);
1365	gr_gk20a_ctx_patch_write(g, ch_ctx,	1384	gr_gk20a_ctx_patch_write(g, ch_ctx,
1366	gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,	1385	gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
1367	gr->sm_error_states[sm_id].hww_warp_esr_report_mask,	1386	tsg_sm_error_states->hww_warp_esr_report_mask,
1368	true);	1387	true);
1369		1388
1370	gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);	1389	gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
1371	}	1390	}
@@ -1383,15 +1402,20 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g,
1383	{	1402	{
1384	u32 gpc, tpc, offset;	1403	u32 gpc, tpc, offset;
1385	u32 val;	1404	u32 val;
1386	struct gr_gk20a *gr = &g->gr;	1405	struct tsg_gk20a *tsg;
1387	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);	1406	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1388	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,	1407	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1389	GPU_LIT_TPC_IN_GPC_STRIDE);	1408	GPU_LIT_TPC_IN_GPC_STRIDE);
1390	int err = 0;	1409	int err = 0;
1391		1410
		1411	tsg = tsg_gk20a_from_ch(ch);
		1412	if (tsg == NULL) {
		1413	return -EINVAL;
		1414	}
		1415
1392	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	1416	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1393		1417
1394	memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));	1418	memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states));
1395		1419
1396	err = gr_gk20a_disable_ctxsw(g);	1420	err = gr_gk20a_disable_ctxsw(g);
1397	if (err) {	1421	if (err) {


diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 9d8e5cdf..7c3baa59 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -119,7 +119,7 @@ int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc,
119	u32 tpc, u32 sm, struct channel_gk20a *fault_ch);	119	u32 tpc, u32 sm, struct channel_gk20a *fault_ch);
120	int gm20b_gr_update_sm_error_state(struct gk20a *g,	120	int gm20b_gr_update_sm_error_state(struct gk20a *g,
121	struct channel_gk20a *ch, u32 sm_id,	121	struct channel_gk20a *ch, u32 sm_id,
122	struct nvgpu_gr_sm_error_state *sm_error_state);	122	struct nvgpu_tsg_sm_error_state *sm_error_state);
123	int gm20b_gr_clear_sm_error_state(struct gk20a *g,	123	int gm20b_gr_clear_sm_error_state(struct gk20a *g,
124	struct channel_gk20a *ch, u32 sm_id);	124	struct channel_gk20a *ch, u32 sm_id);
125	int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,	125	int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,