1 files changed, 147 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
new file mode 100644
index 00000000..3acc53f6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
+/*
+ * Volta GPU series Subcontext
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+#include "gk20a/gk20a.h"
+#include "gk20a/semaphore_gk20a.h"
+#include "gv11b/subctx_gv11b.h"
+#include "gv11b/hw_ram_gv11b.h"
+#include "gv11b/hw_ctxsw_prog_gv11b.h"
+static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
+                                struct mem_desc *inst_block);
+void gv11b_free_subctx_header(struct channel_gk20a *c)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct gk20a *g = c->g;
+        gk20a_dbg_fn("");
+        if (ctx->mem.gpu_va) {
+                gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
+                        ctx->mem.size, gk20a_mem_flag_none);
+                gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
+        }
+}
+int gv11b_alloc_subctx_header(struct channel_gk20a *c)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct gk20a *g = c->g;
+        int ret = 0;
+        gk20a_dbg_fn("");
+        if (ctx->mem.gpu_va == 0) {
+                ret = gk20a_gmmu_alloc_attr_sys(g,
+                                DMA_ATTR_NO_KERNEL_MAPPING,
+                                ctxsw_prog_fecs_header_v(),
+                                &ctx->mem);
+                if (ret) {
+                        gk20a_err(dev_from_gk20a(g),
+                                "failed to allocate sub ctx header");
+                        return ret;
+                }
+                ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
+                                        &ctx->mem.sgt,
+                                        ctx->mem.size,
+                                        NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+                                        gk20a_mem_flag_none, true,
+                                        ctx->mem.aperture);
+                if (!ctx->mem.gpu_va) {
+                        gk20a_err(dev_from_gk20a(g),
+                                "failed to map ctx header");
+                        gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
+                                        &ctx->mem);
+                        return -ENOMEM;
+                }
+                /* Now clear the buffer */
+                if (gk20a_mem_begin(g, &ctx->mem))
+                        return -ENOMEM;
+                gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
+                gk20a_mem_end(g, &ctx->mem);
+                gv11b_init_subcontext_pdb(c, &c->inst_block);
+        }
+        return ret;
+}
+static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
+                                struct mem_desc *inst_block)
+{
+        struct gk20a *g = c->g;
+        struct vm_gk20a *vm;
+        u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
+        u32 format_word;
+        u32 lo, hi;
+        gk20a_dbg_fn("");
+        /* load main pdb as veid0 pdb also */
+        vm = c->vm;
+        pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
+        pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
+        pdb_addr_hi = u64_hi32(pdb_addr);
+        format_word = ram_in_sc_page_dir_base_target_f(
+                ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) |
+                ram_in_sc_page_dir_base_vol_f(
+                ram_in_sc_page_dir_base_vol_true_v(), 0) |
+                ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) |
+                ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) |
+                ram_in_sc_use_ver2_pt_format_f(1, 0) |
+                ram_in_sc_big_page_size_f(1, 0) |
+                ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
+        lo = ram_in_sc_page_dir_base_vol_0_w();
+        hi = ram_in_sc_page_dir_base_hi_0_w();
+        gk20a_mem_wr32(g, inst_block, lo, format_word);
+        gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
+        /* make subcontext0 address space to valid */
+        /* TODO fix proper hw register definations */
+        gk20a_mem_wr32(g, inst_block, 166, 0x1);
+        gk20a_mem_wr32(g, inst_block, 167, 0);
+        gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
+                        ram_in_engine_wfi_veid_f(0));
+}
+int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct mem_desc *gr_mem;
+        struct gk20a *g = c->g;
+        int ret = 0;
+        u32 addr_lo, addr_hi;
+        addr_lo = u64_lo32(gpu_va);
+        addr_hi = u64_hi32(gpu_va);
+        gr_mem = &ctx->mem;
+        g->ops.mm.l2_flush(g, true);
+        if (gk20a_mem_begin(g, gr_mem))
+                return -ENOMEM;
+        gk20a_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
+        gk20a_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
+        gk20a_mem_end(g, gr_mem);
+        return ret;
+}

diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c new file mode 100644 index 00000000..3acc53f6 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
	1	/*
	2	* Volta GPU series Subcontext
	3	*
	4	* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* This program is free software; you can redistribute it and/or modify it
	7	* under the terms and conditions of the GNU General Public License,
	8	* version 2, as published by the Free Software Foundation.
	9	*
	10	* This program is distributed in the hope it will be useful, but WITHOUT
	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	13	* more details.
	14	*
	15	* You should have received a copy of the GNU General Public License along with
	16	* this program.
	17	*/
	18
	19	#include "gk20a/gk20a.h"
	20	#include "gk20a/semaphore_gk20a.h"
	21	#include "gv11b/subctx_gv11b.h"
	22	#include "gv11b/hw_ram_gv11b.h"
	23	#include "gv11b/hw_ctxsw_prog_gv11b.h"
	24
	25	static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
	26	struct mem_desc *inst_block);
	27
	28	void gv11b_free_subctx_header(struct channel_gk20a *c)
	29	{
	30	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
	31	struct gk20a *g = c->g;
	32
	33	gk20a_dbg_fn("");
	34
	35	if (ctx->mem.gpu_va) {
	36	gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
	37	ctx->mem.size, gk20a_mem_flag_none);
	38
	39	gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
	40	}
	41	}
	42
	43	int gv11b_alloc_subctx_header(struct channel_gk20a *c)
	44	{
	45	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
	46	struct gk20a *g = c->g;
	47	int ret = 0;
	48
	49	gk20a_dbg_fn("");
	50
	51	if (ctx->mem.gpu_va == 0) {
	52	ret = gk20a_gmmu_alloc_attr_sys(g,
	53	DMA_ATTR_NO_KERNEL_MAPPING,
	54	ctxsw_prog_fecs_header_v(),
	55	&ctx->mem);
	56	if (ret) {
	57	gk20a_err(dev_from_gk20a(g),
	58	"failed to allocate sub ctx header");
	59	return ret;
	60	}
	61	ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
	62	&ctx->mem.sgt,
	63	ctx->mem.size,
	64	NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
	65	gk20a_mem_flag_none, true,
	66	ctx->mem.aperture);
	67	if (!ctx->mem.gpu_va) {
	68	gk20a_err(dev_from_gk20a(g),
	69	"failed to map ctx header");
	70	gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
	71	&ctx->mem);
	72	return -ENOMEM;
	73	}
	74	/* Now clear the buffer */
	75	if (gk20a_mem_begin(g, &ctx->mem))
	76	return -ENOMEM;
	77
	78	gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
	79	gk20a_mem_end(g, &ctx->mem);
	80
	81	gv11b_init_subcontext_pdb(c, &c->inst_block);
	82
	83	}
	84	return ret;
	85	}
	86
	87	static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
	88	struct mem_desc *inst_block)
	89	{
	90	struct gk20a *g = c->g;
	91	struct vm_gk20a *vm;
	92	u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
	93	u32 format_word;
	94	u32 lo, hi;
	95
	96	gk20a_dbg_fn("");
	97	/* load main pdb as veid0 pdb also */
	98	vm = c->vm;
	99	pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
	100	pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
	101	pdb_addr_hi = u64_hi32(pdb_addr);
	102	format_word = ram_in_sc_page_dir_base_target_f(
	103	ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) \|
	104	ram_in_sc_page_dir_base_vol_f(
	105	ram_in_sc_page_dir_base_vol_true_v(), 0) \|
	106	ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) \|
	107	ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) \|
	108	ram_in_sc_use_ver2_pt_format_f(1, 0) \|
	109	ram_in_sc_big_page_size_f(1, 0) \|
	110	ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
	111	lo = ram_in_sc_page_dir_base_vol_0_w();
	112	hi = ram_in_sc_page_dir_base_hi_0_w();
	113	gk20a_mem_wr32(g, inst_block, lo, format_word);
	114	gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
	115
	116	/* make subcontext0 address space to valid */
	117	/* TODO fix proper hw register definations */
	118	gk20a_mem_wr32(g, inst_block, 166, 0x1);
	119	gk20a_mem_wr32(g, inst_block, 167, 0);
	120	gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
	121	ram_in_engine_wfi_veid_f(0));
	122
	123	}
	124
	125	int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
	126	{
	127	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
	128	struct mem_desc *gr_mem;
	129	struct gk20a *g = c->g;
	130	int ret = 0;
	131	u32 addr_lo, addr_hi;
	132
	133	addr_lo = u64_lo32(gpu_va);
	134	addr_hi = u64_hi32(gpu_va);
	135
	136	gr_mem = &ctx->mem;
	137	g->ops.mm.l2_flush(g, true);
	138	if (gk20a_mem_begin(g, gr_mem))
	139	return -ENOMEM;
	140
	141	gk20a_mem_wr(g, gr_mem,
	142	ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
	143	gk20a_mem_wr(g, gr_mem,
	144	ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
	145	gk20a_mem_end(g, gr_mem);
	146	return ret;
	147	}