From 67b54c3c76cfa488e1d102e5c74a32b3aaba9287 Mon Sep 17 00:00:00 2001
From: seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Wed, 16 Nov 2016 12:25:08 -0800
Subject: gpu: nvgpu: legacy support with subcontext

gv11b needs atleast one subcontext to submit work. To support
legacy in gv11b, currently main context is always copied into
subcontext0 (veid0) during channel commit instance.

As part of channel commit instance, veid0 for that channel is
created and relevant pdb and context info copied to vedi0.

JIRA GV11B-21

Change-Id: I5147a1708b5e94202fa55e73fa0e53199ab7fced
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1231169
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile             |   3 +-
 drivers/gpu/nvgpu/gv11b/fifo_gv11b.c   |  12 +++
 drivers/gpu/nvgpu/gv11b/gr_gv11b.c     |  42 +++++++++-
 drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 147 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gv11b/subctx_gv11b.h |  27 ++++++
 5 files changed, 228 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
 create mode 100644 drivers/gpu/nvgpu/gv11b/subctx_gv11b.h

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 4ea56d8e..c059e464 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -13,6 +13,7 @@ nvgpu-y += \
 	$(nvgpu-t19x)/gv11b/ce_gv11b.o \
 	$(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \
 	$(nvgpu-t19x)/gv11b/pmu_gv11b.o \
-	$(nvgpu-t19x)/gv11b/therm_gv11b.o
+	$(nvgpu-t19x)/gv11b/therm_gv11b.o \
+	$(nvgpu-t19x)/gv11b/subctx_gv11b.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index bc413a9e..b9276e09 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -19,6 +19,8 @@
 #include "gp10b/fifo_gp10b.h"
 #include "hw_pbdma_gv11b.h"
 #include "fifo_gv11b.h"
+#include "subctx_gv11b.h"
+#include "gr_gv11b.h"
 #include "hw_fifo_gv11b.h"
 #include "hw_ram_gv11b.h"
 #include "hw_ccsr_gv11b.h"
@@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
 
 }
 
+static void channel_gv11b_unbind(struct channel_gk20a *ch)
+{
+	gk20a_dbg_fn("");
+
+	gv11b_free_subctx_header(ch);
+
+	channel_gk20a_unbind(ch);
+
+}
 
 static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)
 {
@@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.userd_gp_get = gv11b_userd_gp_get;
 	gops->fifo.userd_gp_put = gv11b_userd_gp_put;
 	gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;
+	gops->fifo.unbind_channel = channel_gv11b_unbind;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 7f5b8d3f..bdb96329 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -13,6 +13,7 @@
  * more details.
  */
 
+#include <linux/tegra_gpu_t19x.h>
 #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
 #include <linux/delay.h>
 #include <linux/tegra-fuse.h>
@@ -24,12 +25,16 @@
 
 #include "gm20b/gr_gm20b.h"
 #include "gv11b/gr_gv11b.h"
+#include "gv11b/mm_gv11b.h"
+#include "gv11b/subctx_gv11b.h"
 #include "hw_gr_gv11b.h"
 #include "hw_fifo_gv11b.h"
 #include "hw_proj_gv11b.h"
 #include "hw_ctxsw_prog_gv11b.h"
 #include "hw_mc_gv11b.h"
 #include "hw_gr_gv11b.h"
+#include "hw_ram_gv11b.h"
+#include "hw_pbdma_gv11b.h"
 #include <linux/vmalloc.h>
 #include <linux/tegra_gpu_t19x.h>
 
@@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
 	return 0;
 }
 
-
 static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
 {
 	struct av_list_gk20a *sw_veid_bundle_init =
@@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g)
 
 	for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
 		gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
-
 	kfree(tpc_sm_id);
 
 	return 0;
 }
 
+static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
+{
+	u32 addr_lo;
+	u32 addr_hi;
+	struct ctx_header_desc *ctx;
+
+	gk20a_dbg_fn("");
+
+	gv11b_alloc_subctx_header(c);
+
+	gv11b_update_subctx_header(c, gpu_va);
+
+	ctx = &c->ch_ctx.ctx_header;
+	addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
+	addr_hi = u64_hi32(ctx->mem.gpu_va);
+
+	/* point this address to engine_wfi_ptr */
+	gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(),
+		ram_in_engine_cs_wfi_v() |
+		ram_in_engine_wfi_target_f(
+			ram_in_engine_wfi_target_sys_mem_ncoh_v()) |
+		ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) |
+		ram_in_engine_wfi_ptr_lo_f(addr_lo));
+
+	gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(),
+		ram_in_engine_wfi_ptr_hi_f(addr_hi));
+
+	return 0;
+}
+
+
+
 static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
 					struct channel_gk20a *c, bool patch)
 {
@@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
 void gv11b_init_gr(struct gpu_ops *gops)
 {
 	gp10b_init_gr(gops);
+	gops->gr.init_preemption_state = NULL;
 	gops->gr.init_fs_state = gr_gv11b_init_fs_state;
 	gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
 	gops->gr.is_valid_class = gr_gv11b_is_valid_class;
@@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
 	gops->gr.load_smid_config = gr_gv11b_load_smid_config;
 	gops->gr.program_sm_id_numbering =
 			gr_gv11b_program_sm_id_numbering;
+	gops->gr.commit_inst = gr_gv11b_commit_inst;
+
 }
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
new file mode 100644
index 00000000..3acc53f6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
+/*
+ * Volta GPU series Subcontext
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gk20a/semaphore_gk20a.h"
+#include "gv11b/subctx_gv11b.h"
+#include "gv11b/hw_ram_gv11b.h"
+#include "gv11b/hw_ctxsw_prog_gv11b.h"
+
+static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
+				struct mem_desc *inst_block);
+
+void gv11b_free_subctx_header(struct channel_gk20a *c)
+{
+	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+	struct gk20a *g = c->g;
+
+	gk20a_dbg_fn("");
+
+	if (ctx->mem.gpu_va) {
+		gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
+			ctx->mem.size, gk20a_mem_flag_none);
+
+		gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
+	}
+}
+
+int gv11b_alloc_subctx_header(struct channel_gk20a *c)
+{
+	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+	struct gk20a *g = c->g;
+	int ret = 0;
+
+	gk20a_dbg_fn("");
+
+	if (ctx->mem.gpu_va == 0) {
+		ret = gk20a_gmmu_alloc_attr_sys(g,
+				DMA_ATTR_NO_KERNEL_MAPPING,
+				ctxsw_prog_fecs_header_v(),
+				&ctx->mem);
+		if (ret) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to allocate sub ctx header");
+			return ret;
+		}
+		ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
+					&ctx->mem.sgt,
+					ctx->mem.size,
+					NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+					gk20a_mem_flag_none, true,
+					ctx->mem.aperture);
+		if (!ctx->mem.gpu_va) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to map ctx header");
+			gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
+					&ctx->mem);
+			return -ENOMEM;
+		}
+		/* Now clear the buffer */
+		if (gk20a_mem_begin(g, &ctx->mem))
+			return -ENOMEM;
+
+		gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
+		gk20a_mem_end(g, &ctx->mem);
+
+		gv11b_init_subcontext_pdb(c, &c->inst_block);
+
+	}
+	return ret;
+}
+
+static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
+				struct mem_desc *inst_block)
+{
+	struct gk20a *g = c->g;
+	struct vm_gk20a *vm;
+	u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
+	u32 format_word;
+	u32 lo, hi;
+
+	gk20a_dbg_fn("");
+	/* load main pdb as veid0 pdb also */
+	vm = c->vm;
+	pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
+	pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
+	pdb_addr_hi = u64_hi32(pdb_addr);
+	format_word = ram_in_sc_page_dir_base_target_f(
+		ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) |
+		ram_in_sc_page_dir_base_vol_f(
+		ram_in_sc_page_dir_base_vol_true_v(), 0) |
+		ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) |
+		ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) |
+		ram_in_sc_use_ver2_pt_format_f(1, 0) |
+		ram_in_sc_big_page_size_f(1, 0) |
+		ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
+	lo = ram_in_sc_page_dir_base_vol_0_w();
+	hi = ram_in_sc_page_dir_base_hi_0_w();
+	gk20a_mem_wr32(g, inst_block, lo, format_word);
+	gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
+
+	/* make subcontext0 address space to valid */
+	/* TODO fix proper hw register definations */
+	gk20a_mem_wr32(g, inst_block, 166, 0x1);
+	gk20a_mem_wr32(g, inst_block, 167, 0);
+	gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
+			ram_in_engine_wfi_veid_f(0));
+
+}
+
+int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
+{
+	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+	struct mem_desc *gr_mem;
+	struct gk20a *g = c->g;
+	int ret = 0;
+	u32 addr_lo, addr_hi;
+
+	addr_lo = u64_lo32(gpu_va);
+	addr_hi = u64_hi32(gpu_va);
+
+	gr_mem = &ctx->mem;
+	g->ops.mm.l2_flush(g, true);
+	if (gk20a_mem_begin(g, gr_mem))
+		return -ENOMEM;
+
+	gk20a_mem_wr(g, gr_mem,
+		ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
+	gk20a_mem_wr(g, gr_mem,
+		ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
+	gk20a_mem_end(g, gr_mem);
+	return ret;
+}
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
new file mode 100644
index 00000000..357cd254
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * Volta GPU series Subcontext
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+#ifndef __SUBCONTEXT_GV11B_H__
+#define __SUBCONTEXT_GV11B_H__
+
+int gv11b_alloc_subctx_header(struct channel_gk20a *c);
+
+void gv11b_free_subctx_header(struct channel_gk20a *c);
+
+int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va);
+#endif /* __SUBCONTEXT_GV11B_H__ */
-- 
cgit v1.2.2