1 files changed, 133 insertions, 54 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 452560d8..b3fc8ae1 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -79,6 +79,10 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
 static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
                                          struct channel_gk20a *c);
+/* sm lock down */
+static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+                u32 global_esr_mask, bool check_errors);
 void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
 {
        int i;
@@ -5365,13 +5369,9 @@ unlock:
        return chid;
 }
-static int gk20a_gr_lock_down_sm(struct gk20a *g,
+int gk20a_gr_lock_down_sm(struct gk20a *g,
                                 u32 gpc, u32 tpc, u32 global_esr_mask)
 {
-        unsigned long end_jiffies = jiffies +
-                msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
-        u32 delay = GR_IDLE_CHECK_DEFAULT;
-        bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
        u32 offset =
                proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc;
        u32 dbgr_control0;
@@ -5386,55 +5386,8 @@ static int gk20a_gr_lock_down_sm(struct gk20a *g,
        gk20a_writel(g,
                gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
-        /* wait for the sm to lock down */
+        return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask,
-        do {
+                        true);
-                u32 global_esr = gk20a_readl(g,
-                                gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
-                u32 warp_esr = gk20a_readl(g,
-                                gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
-                u32 dbgr_status0 = gk20a_readl(g,
-                                gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
-                bool locked_down =
-                        (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
-                         gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
-                bool error_pending =
-                        (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) !=
-                         gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) ||
-                        ((global_esr & ~global_esr_mask) != 0);
-                if (locked_down || !error_pending) {
-                        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-                                  "GPC%d TPC%d: locked down SM", gpc, tpc);
-                        /* de-assert stop trigger */
-                        dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
-                        gk20a_writel(g,
-                                     gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
-                                     dbgr_control0);
-                        return 0;
-                }
-                /* if an mmu fault is pending and mmu debug mode is not
-                 * enabled, the sm will never lock down. */
-                if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) {
-                        gk20a_err(dev_from_gk20a(g),
-                                        "GPC%d TPC%d: mmu fault pending,"
-                                        " sm will never lock down!", gpc, tpc);
-                        return -EFAULT;
-                }
-                usleep_range(delay, delay * 2);
-                delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
-        } while (time_before(jiffies, end_jiffies)
-                        || !tegra_platform_is_silicon());
-        gk20a_err(dev_from_gk20a(g),
-                  "GPC%d TPC%d: timed out while trying to lock down SM",
-                  gpc, tpc);
-        return -EAGAIN;
 }
 bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
@@ -7198,6 +7151,131 @@ static u32 gr_gk20a_get_tpc_num(u32 addr)
        return 0;
 }
+static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+                u32 global_esr_mask, bool check_errors)
+{
+        unsigned long end_jiffies = jiffies +
+                msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+        u32 delay = GR_IDLE_CHECK_DEFAULT;
+        bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
+        u32 offset =
+                proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc;
+        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+                "GPC%d TPC%d: locking down SM", gpc, tpc);
+        /* wait for the sm to lock down */
+        do {
+                u32 global_esr = gk20a_readl(g,
+                                gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+                u32 warp_esr = gk20a_readl(g,
+                                gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+                u32 dbgr_status0 = gk20a_readl(g,
+                                gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
+                bool locked_down =
+                    (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
+                     gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
+                bool no_error_pending =
+                        check_errors &&
+                        (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) ==
+                         gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) &&
+                        ((global_esr & ~global_esr_mask) == 0);
+                if (locked_down || no_error_pending) {
+                        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+                                  "GPC%d TPC%d: locked down SM", gpc, tpc);
+                        return 0;
+                }
+                /* if an mmu fault is pending and mmu debug mode is not
+                 * enabled, the sm will never lock down. */
+                if (!mmu_debug_mode_enabled &&
+                     gk20a_fifo_mmu_fault_pending(g)) {
+                        gk20a_err(dev_from_gk20a(g),
+                                "GPC%d TPC%d: mmu fault pending,"
+                                " sm will never lock down!", gpc, tpc);
+                        return -EFAULT;
+                }
+                usleep_range(delay, delay * 2);
+                delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+        } while (time_before(jiffies, end_jiffies)
+                        || !tegra_platform_is_silicon());
+        gk20a_err(dev_from_gk20a(g),
+                  "GPC%d TPC%d: timed out while trying to lock down SM",
+                  gpc, tpc);
+        return -EAGAIN;
+}
+void gk20a_suspend_all_sms(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 gpc, tpc;
+        int err;
+        u32 dbgr_control0;
+        /* if an SM debugger isn't attached, skip suspend */
+        if (!gk20a_gr_sm_debugger_attached(g)) {
+                gk20a_err(dev_from_gk20a(g), "SM debugger not attached, "
+                                "skipping suspend!\n");
+                return;
+        }
+        /* assert stop trigger. uniformity assumption: all SMs will have
+         * the same state in dbg_control0. */
+        dbgr_control0 =
+                gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
+        dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
+        /* broadcast write */
+        gk20a_writel(g,
+                gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
+        for (gpc = 0; gpc < gr->gpc_count; gpc++) {
+                for (tpc = 0; tpc < gr->tpc_count; tpc++) {
+                        err =
+                         gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, 0, false);
+                        if (err) {
+                                gk20a_err(dev_from_gk20a(g),
+                                        "SuspendAllSms failed\n");
+                                return;
+                        }
+                }
+        }
+}
+void gk20a_resume_all_sms(struct gk20a *g)
+{
+        u32 dbgr_control0;
+        /*
+         * The following requires some clarification. Despite the fact that both
+         * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
+         *  names, only one is actually a trigger, and that is the STOP_TRIGGER.
+         * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
+         * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
+         * (_DISABLE) as well.
+        * Advice from the arch group:  Disable the stop trigger first, as a
+        * separate operation, in order to ensure that the trigger has taken
+        * effect, before enabling the run trigger.
+        */
+        /*De-assert stop trigger */
+        dbgr_control0 =
+                gk20a_readl(g, gr_gpcs_tpcs_sm_dbgr_control0_r());
+        dbgr_control0 &= ~gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
+        gk20a_writel(g,
+                gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
+        /* Run trigger */
+        dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_enable_f();
+        gk20a_writel(g,
+                gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
+}
 void gk20a_init_gr_ops(struct gpu_ops *gops)
 {
        gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -7232,3 +7310,4 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
        gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
        gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
 }

diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 452560d8..b3fc8ae1 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -79,6 +79,10 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
79	static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,	79	static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
80	struct channel_gk20a *c);	80	struct channel_gk20a *c);
81		81
		82	/* sm lock down */
		83	static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
		84	u32 global_esr_mask, bool check_errors);
		85
82	void gk20a_fecs_dump_falcon_stats(struct gk20a *g)	86	void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
83	{	87	{
84	int i;	88	int i;
@@ -5365,13 +5369,9 @@ unlock:
5365	return chid;	5369	return chid;
5366	}	5370	}
5367		5371
5368	static int gk20a_gr_lock_down_sm(struct gk20a *g,	5372	int gk20a_gr_lock_down_sm(struct gk20a *g,
5369	u32 gpc, u32 tpc, u32 global_esr_mask)	5373	u32 gpc, u32 tpc, u32 global_esr_mask)
5370	{	5374	{
5371	unsigned long end_jiffies = jiffies +
5372	msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
5373	u32 delay = GR_IDLE_CHECK_DEFAULT;
5374	bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
5375	u32 offset =	5375	u32 offset =
5376	proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc;	5376	proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc;
5377	u32 dbgr_control0;	5377	u32 dbgr_control0;
@@ -5386,55 +5386,8 @@ static int gk20a_gr_lock_down_sm(struct gk20a *g,
5386	gk20a_writel(g,	5386	gk20a_writel(g,
5387	gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);	5387	gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
5388		5388
5389	/* wait for the sm to lock down */	5389	return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask,
5390	do {	5390	true);
5391	u32 global_esr = gk20a_readl(g,
5392	gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
5393	u32 warp_esr = gk20a_readl(g,
5394	gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
5395	u32 dbgr_status0 = gk20a_readl(g,
5396	gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
5397	bool locked_down =
5398	(gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
5399	gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
5400	bool error_pending =
5401	(gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) !=
5402	gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) \|\|
5403	((global_esr & ~global_esr_mask) != 0);
5404
5405	if (locked_down \|\| !error_pending) {
5406	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,
5407	"GPC%d TPC%d: locked down SM", gpc, tpc);
5408
5409	/* de-assert stop trigger */
5410	dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
5411	gk20a_writel(g,
5412	gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
5413	dbgr_control0);
5414
5415	return 0;
5416	}
5417
5418	/* if an mmu fault is pending and mmu debug mode is not
5419	* enabled, the sm will never lock down. */
5420	if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) {
5421	gk20a_err(dev_from_gk20a(g),
5422	"GPC%d TPC%d: mmu fault pending,"
5423	" sm will never lock down!", gpc, tpc);
5424	return -EFAULT;
5425	}
5426
5427	usleep_range(delay, delay * 2);
5428	delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
5429
5430	} while (time_before(jiffies, end_jiffies)
5431	\|\| !tegra_platform_is_silicon());
5432
5433	gk20a_err(dev_from_gk20a(g),
5434	"GPC%d TPC%d: timed out while trying to lock down SM",
5435	gpc, tpc);
5436
5437	return -EAGAIN;
5438	}	5391	}
5439		5392
5440	bool gk20a_gr_sm_debugger_attached(struct gk20a *g)	5393	bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
@@ -7198,6 +7151,131 @@ static u32 gr_gk20a_get_tpc_num(u32 addr)
7198	return 0;	7151	return 0;
7199	}	7152	}
7200		7153
		7154	static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
		7155	u32 global_esr_mask, bool check_errors)
		7156	{
		7157	unsigned long end_jiffies = jiffies +
		7158	msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
		7159	u32 delay = GR_IDLE_CHECK_DEFAULT;
		7160	bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
		7161	u32 offset =
		7162	proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc;
		7163
		7164	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,
		7165	"GPC%d TPC%d: locking down SM", gpc, tpc);
		7166
		7167	/* wait for the sm to lock down */
		7168	do {
		7169	u32 global_esr = gk20a_readl(g,
		7170	gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
		7171	u32 warp_esr = gk20a_readl(g,
		7172	gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
		7173	u32 dbgr_status0 = gk20a_readl(g,
		7174	gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
		7175	bool locked_down =
		7176	(gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
		7177	gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
		7178	bool no_error_pending =
		7179	check_errors &&
		7180	(gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) ==
		7181	gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) &&
		7182	((global_esr & ~global_esr_mask) == 0);
		7183
		7184	if (locked_down \|\| no_error_pending) {
		7185	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,
		7186	"GPC%d TPC%d: locked down SM", gpc, tpc);
		7187	return 0;
		7188	}
		7189
		7190	/* if an mmu fault is pending and mmu debug mode is not
		7191	* enabled, the sm will never lock down. */
		7192	if (!mmu_debug_mode_enabled &&
		7193	gk20a_fifo_mmu_fault_pending(g)) {
		7194	gk20a_err(dev_from_gk20a(g),
		7195	"GPC%d TPC%d: mmu fault pending,"
		7196	" sm will never lock down!", gpc, tpc);
		7197	return -EFAULT;
		7198	}
		7199
		7200	usleep_range(delay, delay * 2);
		7201	delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
		7202
		7203	} while (time_before(jiffies, end_jiffies)
		7204	\|\| !tegra_platform_is_silicon());
		7205
		7206	gk20a_err(dev_from_gk20a(g),
		7207	"GPC%d TPC%d: timed out while trying to lock down SM",
		7208	gpc, tpc);
		7209
		7210	return -EAGAIN;
		7211	}
		7212
		7213	void gk20a_suspend_all_sms(struct gk20a *g)
		7214	{
		7215	struct gr_gk20a *gr = &g->gr;
		7216	u32 gpc, tpc;
		7217	int err;
		7218	u32 dbgr_control0;
		7219
		7220	/* if an SM debugger isn't attached, skip suspend */
		7221	if (!gk20a_gr_sm_debugger_attached(g)) {
		7222	gk20a_err(dev_from_gk20a(g), "SM debugger not attached, "
		7223	"skipping suspend!\n");
		7224	return;
		7225	}
		7226
		7227	/* assert stop trigger. uniformity assumption: all SMs will have
		7228	* the same state in dbg_control0. */
		7229	dbgr_control0 =
		7230	gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
		7231	dbgr_control0 \|= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
		7232
		7233	/* broadcast write */
		7234	gk20a_writel(g,
		7235	gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
		7236
		7237	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
		7238	for (tpc = 0; tpc < gr->tpc_count; tpc++) {
		7239	err =
		7240	gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, 0, false);
		7241	if (err) {
		7242	gk20a_err(dev_from_gk20a(g),
		7243	"SuspendAllSms failed\n");
		7244	return;
		7245	}
		7246	}
		7247	}
		7248	}
		7249
		7250	void gk20a_resume_all_sms(struct gk20a *g)
		7251	{
		7252	u32 dbgr_control0;
		7253	/*
		7254	* The following requires some clarification. Despite the fact that both
		7255	* RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
		7256	* names, only one is actually a trigger, and that is the STOP_TRIGGER.
		7257	* Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
		7258	* resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
		7259	* (_DISABLE) as well.
		7260
		7261	* Advice from the arch group: Disable the stop trigger first, as a
		7262	* separate operation, in order to ensure that the trigger has taken
		7263	* effect, before enabling the run trigger.
		7264	*/
		7265
		7266	/De-assert stop trigger /
		7267	dbgr_control0 =
		7268	gk20a_readl(g, gr_gpcs_tpcs_sm_dbgr_control0_r());
		7269	dbgr_control0 &= ~gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
		7270	gk20a_writel(g,
		7271	gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
		7272
		7273	/* Run trigger */
		7274	dbgr_control0 \|= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_enable_f();
		7275	gk20a_writel(g,
		7276	gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
		7277	}
		7278
7201	void gk20a_init_gr_ops(struct gpu_ops *gops)	7279	void gk20a_init_gr_ops(struct gpu_ops *gops)
7202	{	7280	{
7203	gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;	7281	gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -7232,3 +7310,4 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7232	gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;	7310	gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
7233	gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;	7311	gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
7234	}	7312	}
		7313