From 5febd08ae76cbd4042e53ad70f062cd491b7e8b6 Mon Sep 17 00:00:00 2001
From: sujeet baranwal <sbaranwal@nvidia.com>
Date: Tue, 30 Sep 2014 10:54:57 -0700
Subject: gpu: kernel support for suspending/resuming SMs

Kernel support for allowing a GPU debugger to suspend and resume
SMs. Invocation of "suspend" on a given channel will suspend all
SMs if the channel is resident, else remove the channel form the
runlist. Similarly, "resume" will either resume all SMs if the
channel was resident, or re-enable the channel in the runlist.

Change-Id: I3b4ae21dc1b91c1059c828ec6db8125f8a0ce194
Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com>
Signed-off-by: Mayank Kaushik <mkaushik@nvidia.com>
Reviewed-on: http://git-master/r/552115
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c |   8 ++
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h |   2 +-
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c |  73 ++++++++++++-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c      | 187 +++++++++++++++++++++++---------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h      |   9 +-
 drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h   |  62 +++++++++++
 6 files changed, 284 insertions(+), 57 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 9f2e0017..f554cf77 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -396,6 +396,14 @@ static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 	return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
 }
 
+void channel_gk20a_enable(struct channel_gk20a *ch)
+{
+	/* enable channel */
+	gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+		gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
+		ccsr_channel_enable_set_true_f());
+}
+
 void channel_gk20a_disable(struct channel_gk20a *ch)
 {
 	/* disable channel */
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index aa87464b..a028b6f3 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -223,5 +223,5 @@ int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch);
 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 			u64 gpfifo_base, u32 gpfifo_entries);
-
+void channel_gk20a_enable(struct channel_gk20a *ch);
 #endif /* CHANNEL_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 94486064..39941aae 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -28,6 +28,7 @@
 #include "dbg_gpu_gk20a.h"
 #include "regops_gk20a.h"
 #include "hw_therm_gk20a.h"
+#include "hw_gr_gk20a.h"
 
 struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
 	.exec_reg_ops = exec_regops_gk20a,
@@ -359,6 +360,11 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
 static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 			      struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args);
 
+static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
+
+
 long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -418,8 +424,13 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			   (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf);
 		break;
 
+	case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS:
+		err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
+		       (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
+		break;
+
 	default:
-		dev_dbg(dev_from_gk20a(g),
+		gk20a_err(dev_from_gk20a(g),
 			   "unrecognized dbg gpu ioctl cmd: 0x%x",
 			   cmd);
 		err = -ENOTTY;
@@ -693,3 +704,63 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	mutex_unlock(&g->dbg_sessions_lock);
 	return  err;
 }
+
+static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
+{
+	struct gk20a *g = get_gk20a(dbg_s->pdev);
+	struct channel_gk20a *ch = dbg_s->ch;
+	bool ch_is_curr_ctx;
+	int err = 0, action = args->mode;
+
+	mutex_lock(&g->dbg_sessions_lock);
+
+	/* Suspend GPU context switching */
+	/* Disable channel switching.
+	 * at that point the hardware state can be inspected to
+	 * determine if the context we're interested in is current.
+	 */
+	err = gr_gk20a_disable_ctxsw(g);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw");
+		/* this should probably be ctx-fatal... */
+		goto clean_up;
+	}
+
+	/* find out whether the current channel is resident */
+	ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
+
+	if (ch_is_curr_ctx) {
+		switch (action) {
+		case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
+			gk20a_suspend_all_sms(g);
+			break;
+
+		case NVGPU_DBG_GPU_RESUME_ALL_SMS:
+			gk20a_resume_all_sms(g);
+			break;
+		}
+	} else {
+		switch (action) {
+		case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
+			/* Disable the channel */
+			channel_gk20a_disable(ch);
+			break;
+
+		case NVGPU_DBG_GPU_RESUME_ALL_SMS:
+			/* Enable the channel */
+			channel_gk20a_enable(ch);
+			break;
+		}
+	}
+
+	/* Resume GPU context switching */
+	err = gr_gk20a_enable_ctxsw(g);
+	if (err)
+		gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n");
+
+ clean_up:
+	mutex_unlock(&g->dbg_sessions_lock);
+	return  err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 452560d8..b3fc8ae1 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -79,6 +79,10 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
 static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
 					  struct channel_gk20a *c);
 
+/* sm lock down */
+static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+		u32 global_esr_mask, bool check_errors);
+
 void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
 {
 	int i;
@@ -5365,13 +5369,9 @@ unlock:
 	return chid;
 }
 
-static int gk20a_gr_lock_down_sm(struct gk20a *g,
+int gk20a_gr_lock_down_sm(struct gk20a *g,
 				 u32 gpc, u32 tpc, u32 global_esr_mask)
 {
-	unsigned long end_jiffies = jiffies +
-		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
-	u32 delay = GR_IDLE_CHECK_DEFAULT;
-	bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
 	u32 offset =
 		proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc;
 	u32 dbgr_control0;
@@ -5386,55 +5386,8 @@ static int gk20a_gr_lock_down_sm(struct gk20a *g,
 	gk20a_writel(g,
 		gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
 
-	/* wait for the sm to lock down */
-	do {
-		u32 global_esr = gk20a_readl(g,
-				gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
-		u32 warp_esr = gk20a_readl(g,
-				gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
-		u32 dbgr_status0 = gk20a_readl(g,
-				gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
-		bool locked_down =
-			(gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
-			 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
-		bool error_pending =
-			(gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) !=
-			 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) ||
-			((global_esr & ~global_esr_mask) != 0);
-
-		if (locked_down || !error_pending) {
-			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-				  "GPC%d TPC%d: locked down SM", gpc, tpc);
-
-			/* de-assert stop trigger */
-			dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
-			gk20a_writel(g,
-				     gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
-				     dbgr_control0);
-
-			return 0;
-		}
-
-		/* if an mmu fault is pending and mmu debug mode is not
-		 * enabled, the sm will never lock down. */
-		if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) {
-			gk20a_err(dev_from_gk20a(g),
-					"GPC%d TPC%d: mmu fault pending,"
-					" sm will never lock down!", gpc, tpc);
-			return -EFAULT;
-		}
-
-		usleep_range(delay, delay * 2);
-		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
-
-	} while (time_before(jiffies, end_jiffies)
-			|| !tegra_platform_is_silicon());
-
-	gk20a_err(dev_from_gk20a(g),
-		  "GPC%d TPC%d: timed out while trying to lock down SM",
-		  gpc, tpc);
-
-	return -EAGAIN;
+	return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask,
+			true);
 }
 
 bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
@@ -7198,6 +7151,131 @@ static u32 gr_gk20a_get_tpc_num(u32 addr)
 	return 0;
 }
 
+static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+		u32 global_esr_mask, bool check_errors)
+{
+	unsigned long end_jiffies = jiffies +
+		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+	u32 delay = GR_IDLE_CHECK_DEFAULT;
+	bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
+	u32 offset =
+		proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc;
+
+	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+		"GPC%d TPC%d: locking down SM", gpc, tpc);
+
+	/* wait for the sm to lock down */
+	do {
+		u32 global_esr = gk20a_readl(g,
+				gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+		u32 warp_esr = gk20a_readl(g,
+				gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+		u32 dbgr_status0 = gk20a_readl(g,
+				gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
+		bool locked_down =
+		    (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
+		     gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
+		bool no_error_pending =
+			check_errors &&
+			(gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) ==
+			 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) &&
+			((global_esr & ~global_esr_mask) == 0);
+
+		if (locked_down || no_error_pending) {
+			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+				  "GPC%d TPC%d: locked down SM", gpc, tpc);
+			return 0;
+		}
+
+		/* if an mmu fault is pending and mmu debug mode is not
+		 * enabled, the sm will never lock down. */
+		if (!mmu_debug_mode_enabled &&
+		     gk20a_fifo_mmu_fault_pending(g)) {
+			gk20a_err(dev_from_gk20a(g),
+				"GPC%d TPC%d: mmu fault pending,"
+				" sm will never lock down!", gpc, tpc);
+			return -EFAULT;
+		}
+
+		usleep_range(delay, delay * 2);
+		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+
+	} while (time_before(jiffies, end_jiffies)
+			|| !tegra_platform_is_silicon());
+
+	gk20a_err(dev_from_gk20a(g),
+		  "GPC%d TPC%d: timed out while trying to lock down SM",
+		  gpc, tpc);
+
+	return -EAGAIN;
+}
+
+void gk20a_suspend_all_sms(struct gk20a *g)
+{
+	struct gr_gk20a *gr = &g->gr;
+	u32 gpc, tpc;
+	int err;
+	u32 dbgr_control0;
+
+	/* if an SM debugger isn't attached, skip suspend */
+	if (!gk20a_gr_sm_debugger_attached(g)) {
+		gk20a_err(dev_from_gk20a(g), "SM debugger not attached, "
+				"skipping suspend!\n");
+		return;
+	}
+
+	/* assert stop trigger. uniformity assumption: all SMs will have
+	 * the same state in dbg_control0. */
+	dbgr_control0 =
+		gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
+	dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
+
+	/* broadcast write */
+	gk20a_writel(g,
+		gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
+
+	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
+		for (tpc = 0; tpc < gr->tpc_count; tpc++) {
+			err =
+			 gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, 0, false);
+			if (err) {
+				gk20a_err(dev_from_gk20a(g),
+					"SuspendAllSms failed\n");
+				return;
+			}
+		}
+	}
+}
+
+void gk20a_resume_all_sms(struct gk20a *g)
+{
+	u32 dbgr_control0;
+	/*
+	 * The following requires some clarification. Despite the fact that both
+	 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
+	 *  names, only one is actually a trigger, and that is the STOP_TRIGGER.
+	 * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
+	 * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
+	 * (_DISABLE) as well.
+
+	* Advice from the arch group:  Disable the stop trigger first, as a
+	* separate operation, in order to ensure that the trigger has taken
+	* effect, before enabling the run trigger.
+	*/
+
+	/*De-assert stop trigger */
+	dbgr_control0 =
+		gk20a_readl(g, gr_gpcs_tpcs_sm_dbgr_control0_r());
+	dbgr_control0 &= ~gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
+	gk20a_writel(g,
+		gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
+
+	/* Run trigger */
+	dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_enable_f();
+	gk20a_writel(g,
+		gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
+}
+
 void gk20a_init_gr_ops(struct gpu_ops *gops)
 {
 	gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -7232,3 +7310,4 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
 	gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
 }
+
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 4b1f6de2..72642a41 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -449,4 +449,11 @@ void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
 
 
 void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
-#endif /* GR_GK20A_H */
+int gr_gk20a_disable_ctxsw(struct gk20a *g);
+int gr_gk20a_enable_ctxsw(struct gk20a *g);
+void gk20a_resume_all_sms(struct gk20a *g);
+void gk20a_suspend_all_sms(struct gk20a *g);
+int gk20a_gr_lock_down_sm(struct gk20a *g,
+				 u32 gpc, u32 tpc, u32 global_esr_mask);
+bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
+#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 463443d6..65a3072c 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -2810,6 +2810,14 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
 {
 	return 0x80000000;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_disable_f(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
+{
+	return 0x40000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
 {
 	return 0x0050460c;
@@ -2822,6 +2830,22 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_r(void)
+{
+	return 0x00419e50;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_int_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_single_step_complete_pending_f(void)
+{
+	return 0x40;
+}
 static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void)
 {
 	return 0x00504650;
@@ -3206,4 +3230,42 @@ static inline u32 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r(void)
+{
+	return 0x00419e10;
+}
+
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r_debugger_mode_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_enable_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_debugger_mode_on_f(void)
+{
+	return 0x1;
+}
 #endif
-- 
cgit v1.2.2