gpu: nvgpu: add sm lock_down gr ops

Add lock_down_sm and wait_for_sm_lock_down gr ops Required to support multiple SM and t19x SM register address changes JIRA GPUT19X-75 Change-Id: I529babde51d9b2143fe3740a4f67c582b7eb404b Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master/r/1514042 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Seema Khowala <seemaj@nvidia.com> 2017-06-22 14:53:04 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-07-06 15:04:43 -0400
commit: 0852c9f1aba1654e380ccdd13cd0540fbb5a8ab0 (patch)
tree: 7f5a797567b0c76f89b8e8241f794b432129d8bc /drivers/gpu/nvgpu
parent: 4728761b6cd002404b9ccd8b318cc9f772962efb (diff)
4 files changed, 37 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a0a67332..483cdd6e 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -296,6 +296,10 @@ struct gpu_ops {
                u32 (*get_sm_hww_global_esr)(struct gk20a *g,
                                                u32 gpc, u32 tpc, u32 sm);
                u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g);
+                int  (*lock_down_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
+                                u32 global_esr_mask, bool check_errors);
+                int  (*wait_for_sm_lock_down)(struct gk20a *g, u32 gpc, u32 tpc,
+                                u32 sm, u32 global_esr_mask, bool check_errors);
                void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc,
                                         u32 *esr_sm_sel);
                int (*handle_sm_exception)(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d325f794..d2c77c93 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5467,16 +5467,14 @@ unlock:
 }
 int gk20a_gr_lock_down_sm(struct gk20a *g,
-                                 u32 gpc, u32 tpc, u32 global_esr_mask,
+                         u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
-                                 bool check_errors)
+                         bool check_errors)
 {
-        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
-        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-        u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
        u32 dbgr_control0;
        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-                        "GPC%d TPC%d: locking down SM", gpc, tpc);
+                        "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);
        /* assert stop trigger */
        dbgr_control0 =
@@ -5485,7 +5483,7 @@ int gk20a_gr_lock_down_sm(struct gk20a *g,
        gk20a_writel(g,
                gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
-        return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask,
+        return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
                        check_errors);
 }
@@ -5598,7 +5596,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
        }
        if (do_warp_sync) {
-                ret = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, true);
+                ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
+                                 global_mask, true);
                if (ret) {
                        nvgpu_err(g, "sm did not lock down!");
                        return ret;
@@ -7906,25 +7905,21 @@ void gk20a_init_gr(struct gk20a *g)
        nvgpu_cond_init(&g->gr.init_wq);
 }
-int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
                u32 global_esr_mask, bool check_errors)
 {
        bool locked_down;
        bool no_error_pending;
        u32 delay = GR_IDLE_CHECK_DEFAULT;
        bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
-        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
-        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-        u32 offset =
-                gpc_stride * gpc + tpc_in_gpc_stride * tpc;
        u32 dbgr_status0 = 0, dbgr_control0 = 0;
        u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
        struct nvgpu_timeout timeout;
        u32 warp_esr;
-        u32 sm = 0;
        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-                "GPC%d TPC%d: locking down SM", gpc, tpc);
+                "GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm);
        nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
                           NVGPU_TIMER_CPU_TIMER);
@@ -7949,7 +7944,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
                if (locked_down || no_error_pending) {
                        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-                                  "GPC%d TPC%d: locked down SM", gpc, tpc);
+                                  "GPC%d TPC%d SM%d: locked down SM",
+                                        gpc, tpc, sm);
                        return 0;
                }
@@ -7959,7 +7955,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
                     (g->ops.mm.mmu_fault_pending(g))) {
                        nvgpu_err(g,
                                "GPC%d TPC%d: mmu fault pending,"
-                                " sm will never lock down!", gpc, tpc);
+                                " SM%d will never lock down!", gpc, tpc, sm);
                        return -EFAULT;
                }
@@ -8017,7 +8013,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
        gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
                        dbgr_control0);
-        err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,
+        err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
                        global_esr_mask, check_errors);
        if (err) {
                nvgpu_err(g,
@@ -8030,9 +8026,10 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
                u32 global_esr_mask, bool check_errors)
 {
        struct gr_gk20a *gr = &g->gr;
-        u32 gpc, tpc;
+        u32 gpc, tpc, sm;
        int err;
        u32 dbgr_control0;
+        u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
        /* if an SM debugger isn't attached, skip suspend */
        if (!g->ops.gr.sm_debugger_attached(g)) {
@@ -8055,13 +8052,14 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
        for (gpc = 0; gpc < gr->gpc_count; gpc++) {
                for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
-                        err =
+                        for (sm = 0; sm < sm_per_tpc; sm++) {
-                         gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,
+                                err = g->ops.gr.wait_for_sm_lock_down(g,
+                                        gpc, tpc, sm,
                                        global_esr_mask, check_errors);
-                        if (err) {
+                                if (err) {
-                                nvgpu_err(g,
+                                        nvgpu_err(g, "SuspendAllSms failed");
-                                        "SuspendAllSms failed");
+                                        return;
-                                return;
+                                }
                        }
                }
        }
@@ -8371,7 +8369,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
 {
        int err = 0;
        struct gr_gk20a *gr = &g->gr;
-        u32 gpc, tpc, sm_id;
+        u32 gpc, tpc, sm, sm_id;
        u32 global_mask;
        /* Wait for the SMs to reach full stop. This condition is:
@@ -8386,9 +8384,10 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
                gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
                tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+                sm = g->gr.sm_to_cluster[sm_id].sm_index;
-                err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
+                err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
+                                global_mask, false);
                if (err) {
                        nvgpu_err(g, "sm did not lock down!");
                        return err;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 8230ba72..77db5cf6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -622,9 +622,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
 void gk20a_gr_suspend_all_sms(struct gk20a *g,
                u32 global_esr_mask, bool check_errors);
 u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
-int gk20a_gr_lock_down_sm(struct gk20a *g,
-                                 u32 gpc, u32 tpc, u32 global_esr_mask,
-                                 bool check_errors);
 int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
        struct channel_gk20a *ch, u64 sms, bool enable);
 bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
@@ -681,7 +678,10 @@ static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
 int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
                struct gr_gk20a_isr_data *isr_data);
-int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+int gk20a_gr_lock_down_sm(struct gk20a *g,
+                         u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
+                         bool check_errors);
+int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
                u32 global_esr_mask, bool check_errors);
 void gk20a_gr_clear_sm_hww(struct gk20a *g,
                u32 gpc, u32 tpc, u32 global_esr);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index fecd487f..333f0340 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1638,4 +1638,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
        gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
        gops->gr.get_sm_no_lock_down_hww_global_esr_mask =
                         gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;
+        gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
+        gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
 }
author	Seema Khowala <seemaj@nvidia.com>	2017-06-22 14:53:04 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-07-06 15:04:43 -0400
commit	0852c9f1aba1654e380ccdd13cd0540fbb5a8ab0 (patch)
tree	7f5a797567b0c76f89b8e8241f794b432129d8bc /drivers/gpu/nvgpu
parent	4728761b6cd002404b9ccd8b318cc9f772962efb (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a0a67332..483cdd6e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -296,6 +296,10 @@ struct gpu_ops {
296	u32 (get_sm_hww_global_esr)(struct gk20a g,	296	u32 (get_sm_hww_global_esr)(struct gk20a g,
297	u32 gpc, u32 tpc, u32 sm);	297	u32 gpc, u32 tpc, u32 sm);
298	u32 (get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a g);	298	u32 (get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a g);
		299	int (lock_down_sm)(struct gk20a g, u32 gpc, u32 tpc, u32 sm,
		300	u32 global_esr_mask, bool check_errors);
		301	int (wait_for_sm_lock_down)(struct gk20a g, u32 gpc, u32 tpc,
		302	u32 sm, u32 global_esr_mask, bool check_errors);
299	void (get_esr_sm_sel)(struct gk20a g, u32 gpc, u32 tpc,	303	void (get_esr_sm_sel)(struct gk20a g, u32 gpc, u32 tpc,
300	u32 *esr_sm_sel);	304	u32 *esr_sm_sel);
301	int (handle_sm_exception)(struct gk20a g,	305	int (handle_sm_exception)(struct gk20a g,


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index d325f794..d2c77c93 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5467,16 +5467,14 @@ unlock:
5467	}	5467	}
5468		5468
5469	int gk20a_gr_lock_down_sm(struct gk20a *g,	5469	int gk20a_gr_lock_down_sm(struct gk20a *g,
5470	u32 gpc, u32 tpc, u32 global_esr_mask,	5470	u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
5471	bool check_errors)	5471	bool check_errors)
5472	{	5472	{
5473	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);	5473	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
5474	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5475	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5476	u32 dbgr_control0;	5474	u32 dbgr_control0;
5477		5475
5478	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,	5476	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,
5479	"GPC%d TPC%d: locking down SM", gpc, tpc);	5477	"GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);
5480		5478
5481	/* assert stop trigger */	5479	/* assert stop trigger */
5482	dbgr_control0 =	5480	dbgr_control0 =
@@ -5485,7 +5483,7 @@ int gk20a_gr_lock_down_sm(struct gk20a *g,
5485	gk20a_writel(g,	5483	gk20a_writel(g,
5486	gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);	5484	gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
5487		5485
5488	return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask,	5486	return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
5489	check_errors);	5487	check_errors);
5490	}	5488	}
5491		5489
@@ -5598,7 +5596,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
5598	}	5596	}
5599		5597
5600	if (do_warp_sync) {	5598	if (do_warp_sync) {
5601	ret = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, true);	5599	ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
		5600	global_mask, true);
5602	if (ret) {	5601	if (ret) {
5603	nvgpu_err(g, "sm did not lock down!");	5602	nvgpu_err(g, "sm did not lock down!");
5604	return ret;	5603	return ret;
@@ -7906,25 +7905,21 @@ void gk20a_init_gr(struct gk20a *g)
7906	nvgpu_cond_init(&g->gr.init_wq);	7905	nvgpu_cond_init(&g->gr.init_wq);
7907	}	7906	}
7908		7907
7909	int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,	7908	int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
7910	u32 global_esr_mask, bool check_errors)	7909	u32 global_esr_mask, bool check_errors)
7911	{	7910	{
7912	bool locked_down;	7911	bool locked_down;
7913	bool no_error_pending;	7912	bool no_error_pending;
7914	u32 delay = GR_IDLE_CHECK_DEFAULT;	7913	u32 delay = GR_IDLE_CHECK_DEFAULT;
7915	bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);	7914	bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
7916	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);	7915	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
7917	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7918	u32 offset =
7919	gpc_stride * gpc + tpc_in_gpc_stride * tpc;
7920	u32 dbgr_status0 = 0, dbgr_control0 = 0;	7916	u32 dbgr_status0 = 0, dbgr_control0 = 0;
7921	u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;	7917	u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
7922	struct nvgpu_timeout timeout;	7918	struct nvgpu_timeout timeout;
7923	u32 warp_esr;	7919	u32 warp_esr;
7924	u32 sm = 0;
7925		7920
7926	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,	7921	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,
7927	"GPC%d TPC%d: locking down SM", gpc, tpc);	7922	"GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm);
7928		7923
7929	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),	7924	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
7930	NVGPU_TIMER_CPU_TIMER);	7925	NVGPU_TIMER_CPU_TIMER);
@@ -7949,7 +7944,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
7949		7944
7950	if (locked_down \|\| no_error_pending) {	7945	if (locked_down \|\| no_error_pending) {
7951	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,	7946	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,
7952	"GPC%d TPC%d: locked down SM", gpc, tpc);	7947	"GPC%d TPC%d SM%d: locked down SM",
		7948	gpc, tpc, sm);
7953	return 0;	7949	return 0;
7954	}	7950	}
7955		7951
@@ -7959,7 +7955,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
7959	(g->ops.mm.mmu_fault_pending(g))) {	7955	(g->ops.mm.mmu_fault_pending(g))) {
7960	nvgpu_err(g,	7956	nvgpu_err(g,
7961	"GPC%d TPC%d: mmu fault pending,"	7957	"GPC%d TPC%d: mmu fault pending,"
7962	" sm will never lock down!", gpc, tpc);	7958	" SM%d will never lock down!", gpc, tpc, sm);
7963	return -EFAULT;	7959	return -EFAULT;
7964	}	7960	}
7965		7961
@@ -8017,7 +8013,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
8017	gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,	8013	gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
8018	dbgr_control0);	8014	dbgr_control0);
8019		8015
8020	err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,	8016	err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
8021	global_esr_mask, check_errors);	8017	global_esr_mask, check_errors);
8022	if (err) {	8018	if (err) {
8023	nvgpu_err(g,	8019	nvgpu_err(g,
@@ -8030,9 +8026,10 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
8030	u32 global_esr_mask, bool check_errors)	8026	u32 global_esr_mask, bool check_errors)
8031	{	8027	{
8032	struct gr_gk20a *gr = &g->gr;	8028	struct gr_gk20a *gr = &g->gr;
8033	u32 gpc, tpc;	8029	u32 gpc, tpc, sm;
8034	int err;	8030	int err;
8035	u32 dbgr_control0;	8031	u32 dbgr_control0;
		8032	u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
8036		8033
8037	/* if an SM debugger isn't attached, skip suspend */	8034	/* if an SM debugger isn't attached, skip suspend */
8038	if (!g->ops.gr.sm_debugger_attached(g)) {	8035	if (!g->ops.gr.sm_debugger_attached(g)) {
@@ -8055,13 +8052,14 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
8055		8052
8056	for (gpc = 0; gpc < gr->gpc_count; gpc++) {	8053	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
8057	for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {	8054	for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
8058	err =	8055	for (sm = 0; sm < sm_per_tpc; sm++) {
8059	gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,	8056	err = g->ops.gr.wait_for_sm_lock_down(g,
		8057	gpc, tpc, sm,
8060	global_esr_mask, check_errors);	8058	global_esr_mask, check_errors);
8061	if (err) {	8059	if (err) {
8062	nvgpu_err(g,	8060	nvgpu_err(g, "SuspendAllSms failed");
8063	"SuspendAllSms failed");	8061	return;
8064	return;	8062	}
8065	}	8063	}
8066	}	8064	}
8067	}	8065	}
@@ -8371,7 +8369,7 @@ int gr_gk20a_wait_for_pause(struct gk20a g, struct warpstate w_state)
8371	{	8369	{
8372	int err = 0;	8370	int err = 0;
8373	struct gr_gk20a *gr = &g->gr;	8371	struct gr_gk20a *gr = &g->gr;
8374	u32 gpc, tpc, sm_id;	8372	u32 gpc, tpc, sm, sm_id;
8375	u32 global_mask;	8373	u32 global_mask;
8376		8374
8377	/* Wait for the SMs to reach full stop. This condition is:	8375	/* Wait for the SMs to reach full stop. This condition is:
@@ -8386,9 +8384,10 @@ int gr_gk20a_wait_for_pause(struct gk20a g, struct warpstate w_state)
8386		8384
8387	gpc = g->gr.sm_to_cluster[sm_id].gpc_index;	8385	gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8388	tpc = g->gr.sm_to_cluster[sm_id].tpc_index;	8386	tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
		8387	sm = g->gr.sm_to_cluster[sm_id].sm_index;
8389		8388
8390	err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);	8389	err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
8391		8390	global_mask, false);
8392	if (err) {	8391	if (err) {
8393	nvgpu_err(g, "sm did not lock down!");	8392	nvgpu_err(g, "sm did not lock down!");
8394	return err;	8393	return err;


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 8230ba72..77db5cf6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -622,9 +622,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
622	void gk20a_gr_suspend_all_sms(struct gk20a *g,	622	void gk20a_gr_suspend_all_sms(struct gk20a *g,
623	u32 global_esr_mask, bool check_errors);	623	u32 global_esr_mask, bool check_errors);
624	u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);	624	u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
625	int gk20a_gr_lock_down_sm(struct gk20a *g,
626	u32 gpc, u32 tpc, u32 global_esr_mask,
627	bool check_errors);
628	int gr_gk20a_set_sm_debug_mode(struct gk20a *g,	625	int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
629	struct channel_gk20a *ch, u64 sms, bool enable);	626	struct channel_gk20a *ch, u64 sms, bool enable);
630	bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);	627	bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
@@ -681,7 +678,10 @@ static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
681		678
682	int gk20a_gr_handle_fecs_error(struct gk20a g, struct channel_gk20a ch,	679	int gk20a_gr_handle_fecs_error(struct gk20a g, struct channel_gk20a ch,
683	struct gr_gk20a_isr_data *isr_data);	680	struct gr_gk20a_isr_data *isr_data);
684	int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,	681	int gk20a_gr_lock_down_sm(struct gk20a *g,
		682	u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
		683	bool check_errors);
		684	int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
685	u32 global_esr_mask, bool check_errors);	685	u32 global_esr_mask, bool check_errors);
686	void gk20a_gr_clear_sm_hww(struct gk20a *g,	686	void gk20a_gr_clear_sm_hww(struct gk20a *g,
687	u32 gpc, u32 tpc, u32 global_esr);	687	u32 gpc, u32 tpc, u32 global_esr);


diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index fecd487f..333f0340 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1638,4 +1638,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
1638	gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;	1638	gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
1639	gops->gr.get_sm_no_lock_down_hww_global_esr_mask =	1639	gops->gr.get_sm_no_lock_down_hww_global_esr_mask =
1640	gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;	1640	gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;
		1641	gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
		1642	gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
1641	}	1643	}