From 1ba169d7dbea1f0e9254cbd285e92ff51235b7c0 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Fri, 4 Nov 2016 17:35:11 -0700
Subject: Revert "Revert "gpu: nvgpu: vgpu: alloc hwpm ctxt buf on client""

This reverts commit 5f1c2bc27fb9dd66ed046b0590afc365be5011bf.

Added back now that matching RM server has been updated:

In hypervisor mode, all GPU VA allocations must be done by client;
fix this for the allocation of the hwpm ctxt buffer

Bug 200231611

Change-Id: Ie5ce2c2562401b1f00821231d37608e3fc30d4a4
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1252138
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h |  1 -
 drivers/gpu/nvgpu/vgpu/gr_vgpu.c   | 39 ++++++++++++++++++++++++--------------
 include/linux/tegra_vgpu.h         |  2 ++
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 1a9bee5f..512d32e9 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -110,7 +110,6 @@ struct zcull_ctx_desc {
 struct pm_ctx_desc {
 	struct mem_desc mem;
 	u32 pm_mode;
-	bool ctx_was_enabled;	/* Used in the virtual case only */
 };
 
 struct gk20a;
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 01f5e1a5..65e3589b 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -91,8 +91,10 @@ int vgpu_gr_init_ctx_state(struct gk20a *g)
 
 	g->gr.ctx_vars.golden_image_size = priv->constants.golden_ctx_size;
 	g->gr.ctx_vars.zcull_ctxsw_image_size = priv->constants.zcull_ctx_size;
+	g->gr.ctx_vars.pm_ctxsw_image_size = priv->constants.hwpm_ctx_size;
 	if (!g->gr.ctx_vars.golden_image_size ||
-		!g->gr.ctx_vars.zcull_ctxsw_image_size)
+		!g->gr.ctx_vars.zcull_ctxsw_image_size ||
+		!g->gr.ctx_vars.pm_ctxsw_image_size)
 		return -ENXIO;
 
 	gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size;
@@ -390,12 +392,13 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
 	struct tegra_vgpu_cmd_msg msg;
 	struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx;
 	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+	struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
 	int err;
 
 	gk20a_dbg_fn("");
 
 	/* check if hwpm was ever initialized. If not, nothing to do */
-	if (ch_ctx->pm_ctx.ctx_was_enabled == false)
+	if (pm_ctx->mem.gpu_va == 0)
 		return;
 
 	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX;
@@ -404,7 +407,8 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
 
-	ch_ctx->pm_ctx.ctx_was_enabled = false;
+	gk20a_vm_free_va(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size, 0);
+	pm_ctx->mem.gpu_va = 0;
 }
 
 static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c)
@@ -1019,28 +1023,35 @@ static int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
 static int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
 	struct channel_gk20a *ch, bool enable)
 {
+	struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+	struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
 	struct tegra_vgpu_cmd_msg msg;
 	struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
 	int err;
 
 	gk20a_dbg_fn("");
 
-	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
-	msg.handle = vgpu_get_handle(g);
-	p->handle = ch->virt_ctx;
-
-	/* If we just enabled HWPM context switching, flag this
-	 * so we know we need to free the buffer when channel contexts
-	 * are cleaned up.
-	 */
 	if (enable) {
-		struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
-		ch_ctx->pm_ctx.ctx_was_enabled = true;
-
 		p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
+
+		/* Allocate buffer if necessary */
+		if (pm_ctx->mem.gpu_va == 0) {
+			pm_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch->vm,
+					g->gr.ctx_vars.pm_ctxsw_image_size,
+					gmmu_page_size_kernel);
+
+			if (!pm_ctx->mem.gpu_va)
+				return -ENOMEM;
+			pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
+		}
 	} else
 		p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
 
+	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
+	msg.handle = vgpu_get_handle(g);
+	p->handle = ch->virt_ctx;
+	p->gpu_va = pm_ctx->mem.gpu_va;
+
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
 
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h
index be8b9ad1..456622a4 100644
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -348,6 +348,7 @@ enum {
 
 struct tegra_vgpu_channel_set_ctxsw_mode {
 	u64 handle;
+	u64 gpu_va;
 	u32 mode;
 };
 
@@ -436,6 +437,7 @@ struct tegra_vgpu_constants_params {
 	 * TEGRA_VGPU_MAX_TPC_COUNT_PER_GPC
 	 */
 	u16 gpc_tpc_mask[TEGRA_VGPU_MAX_GPC_COUNT];
+	u32 hwpm_ctx_size;
 };
 
 struct tegra_vgpu_channel_cyclestats_snapshot_params {
-- 
cgit v1.2.2