From 6eeabfbdd08e48f924885952c80ff41aa2b534b7 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Wed, 23 Mar 2016 09:43:43 -0700
Subject: gpu: nvgpu: vgpu: virtualized SMPC/HWPM ctx switch

Add support for SMPC and HWPM context switching when virtualized

Bug 1648200
JIRASW EVLR-219
JIRASW EVLR-253

Change-Id: I80a1613eaad87d8510f00d9aef001400d642ecdf
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1122034
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c |  8 ++--
 drivers/gpu/nvgpu/gk20a/gk20a.h         |  6 +++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c      |  2 +
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h      |  1 +
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c      |  2 +
 drivers/gpu/nvgpu/vgpu/gr_vgpu.c        | 83 +++++++++++++++++++++++++++++++++
 include/linux/tegra_vgpu.h              | 25 +++++++++-
 7 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 321cebb2..309fe75a 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -891,8 +891,8 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 		goto clean_up;
 	}
 
-	err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
-		      args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
+	err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a,
+				args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
 	if (err) {
 		gk20a_err(dev_from_gk20a(g),
 			  "error (%d) during smpc ctxsw mode update\n", err);
@@ -927,8 +927,8 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 		goto clean_up;
 	}
 
-	err = gr_gk20a_update_hwpm_ctxsw_mode(g, ch_gk20a,
-		      args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
+	err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a,
+				args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
 	if (err)
 		gk20a_err(dev_from_gk20a(g),
 			  "error (%d) during pm ctxsw mode update\n", err);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 9d8dc5f7..d9cc3d4f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -172,6 +172,12 @@ struct gpu_ops {
 		void (*update_ctxsw_preemption_mode)(struct gk20a *g,
 				struct channel_ctx_gk20a *ch_ctx,
 				void *ctx_ptr);
+		int (*update_smpc_ctxsw_mode)(struct gk20a *g,
+				struct channel_gk20a *c,
+				bool enable);
+		int (*update_hwpm_ctxsw_mode)(struct gk20a *g,
+				struct channel_gk20a *c,
+				bool enable);
 		int (*dump_gr_regs)(struct gk20a *g,
 				struct gk20a_debug_output *o);
 		int (*update_pc_sampling)(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index b0b5571f..ada67edd 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -8308,4 +8308,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception;
 	gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception;
 	gops->gr.get_lrf_tex_ltc_dram_override = NULL;
+	gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
+	gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 52d6c4e5..5390536e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -76,6 +76,7 @@ struct zcull_ctx_desc {
 struct pm_ctx_desc {
 	struct mem_desc mem;
 	u32 pm_mode;
+	bool ctx_was_enabled;	/* Used in the virtual case only */
 };
 
 struct gk20a;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 67654cba..6a79b1ac 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1233,4 +1233,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception;
 	gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception;
 	gops->gr.get_lrf_tex_ltc_dram_override = NULL;
+	gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
+	gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 32e451ed..16d51ad3 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -402,12 +402,36 @@ static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
 	}
 }
 
+static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
+	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx;
+	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	/* check if hwpm was ever initialized. If not, nothing to do */
+	if (ch_ctx->pm_ctx.ctx_was_enabled == false)
+		return;
+
+	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX;
+	msg.handle = platform->virt_handle;
+	p->handle = c->virt_ctx;
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	WARN_ON(err || msg.ret);
+
+	ch_ctx->pm_ctx.ctx_was_enabled = false;
+}
+
 static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c)
 {
 	gk20a_dbg_fn("");
 
 	vgpu_gr_unmap_global_ctx_buffers(c);
 	vgpu_gr_free_channel_patch_ctx(c);
+	vgpu_gr_free_channel_pm_ctx(c);
 	if (!gk20a_is_channel_marked_as_tsg(c))
 		vgpu_gr_free_channel_gr_ctx(c);
 
@@ -950,6 +974,63 @@ static int vgpu_gr_set_sm_debug_mode(struct gk20a *g,
 	return err ? err : msg.ret;
 }
 
+static int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
+	struct channel_gk20a *ch, bool enable)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_SMPC_CTXSW_MODE;
+	msg.handle = platform->virt_handle;
+	p->handle = ch->virt_ctx;
+
+	if (enable)
+		p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
+	else
+		p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	WARN_ON(err || msg.ret);
+
+	return err ? err : msg.ret;
+}
+
+static int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
+	struct channel_gk20a *ch, bool enable)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
+	msg.handle = platform->virt_handle;
+	p->handle = ch->virt_ctx;
+
+	/* If we just enabled HWPM context switching, flag this
+	 * so we know we need to free the buffer when channel contexts
+	 * are cleaned up.
+	 */
+	if (enable) {
+		struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+		ch_ctx->pm_ctx.ctx_was_enabled = true;
+
+		p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
+	} else
+		p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	WARN_ON(err || msg.ret);
+
+	return err ? err : msg.ret;
+}
+
 void vgpu_init_gr_ops(struct gpu_ops *gops)
 {
 	gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx;
@@ -969,4 +1050,6 @@ void vgpu_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.zbc_query_table = vgpu_gr_query_zbc;
 	gops->gr.init_ctx_state = vgpu_gr_init_ctx_state;
 	gops->gr.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode;
+	gops->gr.update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode;
+	gops->gr.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode;
 }
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h
index c4dd81dd..979d454e 100644
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -76,7 +76,14 @@ enum {
 	TEGRA_VGPU_CMD_REG_OPS,
 	TEGRA_VGPU_CMD_CHANNEL_SET_PRIORITY,
 	TEGRA_VGPU_CMD_CHANNEL_SET_RUNLIST_INTERLEAVE,
-	TEGRA_VGPU_CMD_CHANNEL_SET_TIMESLICE
+	TEGRA_VGPU_CMD_CHANNEL_SET_TIMESLICE,
+	RESVD1,
+	RESVD2,
+	RESVD3,
+	RESVD4,
+	TEGRA_VGPU_CMD_CHANNEL_SET_SMPC_CTXSW_MODE,
+	TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE,
+	TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX,
 };
 
 struct tegra_vgpu_connect_params {
@@ -312,6 +319,20 @@ struct tegra_vgpu_channel_timeslice_params {
 	u32 timeslice_us;
 };
 
+enum {
+	TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0,
+	TEGRA_VGPU_CTXSW_MODE_CTXSW,
+};
+
+struct tegra_vgpu_channel_set_ctxsw_mode {
+	u64 handle;
+	u32 mode;
+};
+
+struct tegra_vgpu_channel_free_hwpm_ctx {
+	u64 handle;
+};
+
 struct tegra_vgpu_cmd_msg {
 	u32 cmd;
 	int ret;
@@ -342,6 +363,8 @@ struct tegra_vgpu_cmd_msg {
 		struct tegra_vgpu_channel_priority_params channel_priority;
 		struct tegra_vgpu_channel_runlist_interleave_params channel_interleave;
 		struct tegra_vgpu_channel_timeslice_params channel_timeslice;
+		struct tegra_vgpu_channel_set_ctxsw_mode set_ctxsw_mode;
+		struct tegra_vgpu_channel_free_hwpm_ctx free_hwpm_ctx;
 		char padding[192];
 	} params;
 };
-- 
cgit v1.2.2