gpu: nvgpu: legacy support with subcontext

gv11b needs atleast one subcontext to submit work. To support legacy in gv11b, currently main context is always copied into subcontext0 (veid0) during channel commit instance. As part of channel commit instance, veid0 for that channel is created and relevant pdb and context info copied to vedi0. JIRA GV11B-21 Change-Id: I5147a1708b5e94202fa55e73fa0e53199ab7fced Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1231169 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: seshendra Gadagottu <sgadagottu@nvidia.com> 2016-11-16 15:25:08 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-12-20 18:15:14 -0500
commit: 67b54c3c76cfa488e1d102e5c74a32b3aaba9287 (patch)
tree: e56f4b6b11327710cd5665c241c4e454f16626e2 /drivers/gpu/nvgpu/gv11b
parent: 26199ffdf5bb5cf1d5a810c63904cc950314fe7b (diff)
4 files changed, 226 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index bc413a9e..b9276e09 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -19,6 +19,8 @@
 #include "gp10b/fifo_gp10b.h"
 #include "hw_pbdma_gv11b.h"
 #include "fifo_gv11b.h"
+#include "subctx_gv11b.h"
+#include "gr_gv11b.h"
 #include "hw_fifo_gv11b.h"
 #include "hw_ram_gv11b.h"
 #include "hw_ccsr_gv11b.h"
@@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
 }
+static void channel_gv11b_unbind(struct channel_gk20a *ch)
+{
+        gk20a_dbg_fn("");
+        gv11b_free_subctx_header(ch);
+        channel_gk20a_unbind(ch);
+}
 static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)
 {
@@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops)
        gops->fifo.userd_gp_get = gv11b_userd_gp_get;
        gops->fifo.userd_gp_put = gv11b_userd_gp_put;
        gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;
+        gops->fifo.unbind_channel = channel_gv11b_unbind;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 7f5b8d3f..bdb96329 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -13,6 +13,7 @@
 * more details.
 */
+#include <linux/tegra_gpu_t19x.h>
 #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
 #include <linux/delay.h>
 #include <linux/tegra-fuse.h>
@@ -24,12 +25,16 @@
 #include "gm20b/gr_gm20b.h"
 #include "gv11b/gr_gv11b.h"
+#include "gv11b/mm_gv11b.h"
+#include "gv11b/subctx_gv11b.h"
 #include "hw_gr_gv11b.h"
 #include "hw_fifo_gv11b.h"
 #include "hw_proj_gv11b.h"
 #include "hw_ctxsw_prog_gv11b.h"
 #include "hw_mc_gv11b.h"
 #include "hw_gr_gv11b.h"
+#include "hw_ram_gv11b.h"
+#include "hw_pbdma_gv11b.h"
 #include <linux/vmalloc.h>
 #include <linux/tegra_gpu_t19x.h>
@@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
        return 0;
 }
 static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
 {
        struct av_list_gk20a *sw_veid_bundle_init =
@@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g)
        for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
                gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
        kfree(tpc_sm_id);
        return 0;
 }
+static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
+{
+        u32 addr_lo;
+        u32 addr_hi;
+        struct ctx_header_desc *ctx;
+        gk20a_dbg_fn("");
+        gv11b_alloc_subctx_header(c);
+        gv11b_update_subctx_header(c, gpu_va);
+        ctx = &c->ch_ctx.ctx_header;
+        addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
+        addr_hi = u64_hi32(ctx->mem.gpu_va);
+        /* point this address to engine_wfi_ptr */
+        gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(),
+                ram_in_engine_cs_wfi_v() |
+                ram_in_engine_wfi_target_f(
+                        ram_in_engine_wfi_target_sys_mem_ncoh_v()) |
+                ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) |
+                ram_in_engine_wfi_ptr_lo_f(addr_lo));
+        gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(),
+                ram_in_engine_wfi_ptr_hi_f(addr_hi));
+        return 0;
+}
 static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
                                        struct channel_gk20a *c, bool patch)
 {
@@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
 void gv11b_init_gr(struct gpu_ops *gops)
 {
        gp10b_init_gr(gops);
+        gops->gr.init_preemption_state = NULL;
        gops->gr.init_fs_state = gr_gv11b_init_fs_state;
        gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
        gops->gr.is_valid_class = gr_gv11b_is_valid_class;
@@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
        gops->gr.load_smid_config = gr_gv11b_load_smid_config;
        gops->gr.program_sm_id_numbering =
                        gr_gv11b_program_sm_id_numbering;
+        gops->gr.commit_inst = gr_gv11b_commit_inst;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
new file mode 100644
index 00000000..3acc53f6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
+/*
+ * Volta GPU series Subcontext
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+#include "gk20a/gk20a.h"
+#include "gk20a/semaphore_gk20a.h"
+#include "gv11b/subctx_gv11b.h"
+#include "gv11b/hw_ram_gv11b.h"
+#include "gv11b/hw_ctxsw_prog_gv11b.h"
+static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
+                                struct mem_desc *inst_block);
+void gv11b_free_subctx_header(struct channel_gk20a *c)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct gk20a *g = c->g;
+        gk20a_dbg_fn("");
+        if (ctx->mem.gpu_va) {
+                gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
+                        ctx->mem.size, gk20a_mem_flag_none);
+                gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
+        }
+}
+int gv11b_alloc_subctx_header(struct channel_gk20a *c)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct gk20a *g = c->g;
+        int ret = 0;
+        gk20a_dbg_fn("");
+        if (ctx->mem.gpu_va == 0) {
+                ret = gk20a_gmmu_alloc_attr_sys(g,
+                                DMA_ATTR_NO_KERNEL_MAPPING,
+                                ctxsw_prog_fecs_header_v(),
+                                &ctx->mem);
+                if (ret) {
+                        gk20a_err(dev_from_gk20a(g),
+                                "failed to allocate sub ctx header");
+                        return ret;
+                }
+                ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
+                                        &ctx->mem.sgt,
+                                        ctx->mem.size,
+                                        NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+                                        gk20a_mem_flag_none, true,
+                                        ctx->mem.aperture);
+                if (!ctx->mem.gpu_va) {
+                        gk20a_err(dev_from_gk20a(g),
+                                "failed to map ctx header");
+                        gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
+                                        &ctx->mem);
+                        return -ENOMEM;
+                }
+                /* Now clear the buffer */
+                if (gk20a_mem_begin(g, &ctx->mem))
+                        return -ENOMEM;
+                gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
+                gk20a_mem_end(g, &ctx->mem);
+                gv11b_init_subcontext_pdb(c, &c->inst_block);
+        }
+        return ret;
+}
+static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
+                                struct mem_desc *inst_block)
+{
+        struct gk20a *g = c->g;
+        struct vm_gk20a *vm;
+        u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
+        u32 format_word;
+        u32 lo, hi;
+        gk20a_dbg_fn("");
+        /* load main pdb as veid0 pdb also */
+        vm = c->vm;
+        pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
+        pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
+        pdb_addr_hi = u64_hi32(pdb_addr);
+        format_word = ram_in_sc_page_dir_base_target_f(
+                ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) |
+                ram_in_sc_page_dir_base_vol_f(
+                ram_in_sc_page_dir_base_vol_true_v(), 0) |
+                ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) |
+                ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) |
+                ram_in_sc_use_ver2_pt_format_f(1, 0) |
+                ram_in_sc_big_page_size_f(1, 0) |
+                ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
+        lo = ram_in_sc_page_dir_base_vol_0_w();
+        hi = ram_in_sc_page_dir_base_hi_0_w();
+        gk20a_mem_wr32(g, inst_block, lo, format_word);
+        gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
+        /* make subcontext0 address space to valid */
+        /* TODO fix proper hw register definations */
+        gk20a_mem_wr32(g, inst_block, 166, 0x1);
+        gk20a_mem_wr32(g, inst_block, 167, 0);
+        gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
+                        ram_in_engine_wfi_veid_f(0));
+}
+int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct mem_desc *gr_mem;
+        struct gk20a *g = c->g;
+        int ret = 0;
+        u32 addr_lo, addr_hi;
+        addr_lo = u64_lo32(gpu_va);
+        addr_hi = u64_hi32(gpu_va);
+        gr_mem = &ctx->mem;
+        g->ops.mm.l2_flush(g, true);
+        if (gk20a_mem_begin(g, gr_mem))
+                return -ENOMEM;
+        gk20a_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
+        gk20a_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
+        gk20a_mem_end(g, gr_mem);
+        return ret;
+}
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
new file mode 100644
index 00000000..357cd254
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * Volta GPU series Subcontext
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+#ifndef __SUBCONTEXT_GV11B_H__
+#define __SUBCONTEXT_GV11B_H__
+int gv11b_alloc_subctx_header(struct channel_gk20a *c);
+void gv11b_free_subctx_header(struct channel_gk20a *c);
+int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va);
+#endif /* __SUBCONTEXT_GV11B_H__ */
author	seshendra Gadagottu <sgadagottu@nvidia.com>	2016-11-16 15:25:08 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-12-20 18:15:14 -0500
commit	67b54c3c76cfa488e1d102e5c74a32b3aaba9287 (patch)
tree	e56f4b6b11327710cd5665c241c4e454f16626e2 /drivers/gpu/nvgpu/gv11b
parent	26199ffdf5bb5cf1d5a810c63904cc950314fe7b (diff)

diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index bc413a9e..b9276e09 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -19,6 +19,8 @@
19	#include "gp10b/fifo_gp10b.h"	19	#include "gp10b/fifo_gp10b.h"
20	#include "hw_pbdma_gv11b.h"	20	#include "hw_pbdma_gv11b.h"
21	#include "fifo_gv11b.h"	21	#include "fifo_gv11b.h"
		22	#include "subctx_gv11b.h"
		23	#include "gr_gv11b.h"
22	#include "hw_fifo_gv11b.h"	24	#include "hw_fifo_gv11b.h"
23	#include "hw_ram_gv11b.h"	25	#include "hw_ram_gv11b.h"
24	#include "hw_ccsr_gv11b.h"	26	#include "hw_ccsr_gv11b.h"
@@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a g, struct channel_gk20a c)
200		202
201	}	203	}
202		204
		205	static void channel_gv11b_unbind(struct channel_gk20a *ch)
		206	{
		207	gk20a_dbg_fn("");
		208
		209	gv11b_free_subctx_header(ch);
		210
		211	channel_gk20a_unbind(ch);
		212
		213	}
203		214
204	static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)	215	static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)
205	{	216	{
@@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops)
218	gops->fifo.userd_gp_get = gv11b_userd_gp_get;	229	gops->fifo.userd_gp_get = gv11b_userd_gp_get;
219	gops->fifo.userd_gp_put = gv11b_userd_gp_put;	230	gops->fifo.userd_gp_put = gv11b_userd_gp_put;
220	gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;	231	gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;
		232	gops->fifo.unbind_channel = channel_gv11b_unbind;
221	}	233	}


diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7f5b8d3f..bdb96329 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -13,6 +13,7 @@
13	* more details.	13	* more details.
14	*/	14	*/
15		15
		16	#include <linux/tegra_gpu_t19x.h>
16	#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */	17	#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
17	#include <linux/delay.h>	18	#include <linux/delay.h>
18	#include <linux/tegra-fuse.h>	19	#include <linux/tegra-fuse.h>
@@ -24,12 +25,16 @@
24		25
25	#include "gm20b/gr_gm20b.h"	26	#include "gm20b/gr_gm20b.h"
26	#include "gv11b/gr_gv11b.h"	27	#include "gv11b/gr_gv11b.h"
		28	#include "gv11b/mm_gv11b.h"
		29	#include "gv11b/subctx_gv11b.h"
27	#include "hw_gr_gv11b.h"	30	#include "hw_gr_gv11b.h"
28	#include "hw_fifo_gv11b.h"	31	#include "hw_fifo_gv11b.h"
29	#include "hw_proj_gv11b.h"	32	#include "hw_proj_gv11b.h"
30	#include "hw_ctxsw_prog_gv11b.h"	33	#include "hw_ctxsw_prog_gv11b.h"
31	#include "hw_mc_gv11b.h"	34	#include "hw_mc_gv11b.h"
32	#include "hw_gr_gv11b.h"	35	#include "hw_gr_gv11b.h"
		36	#include "hw_ram_gv11b.h"
		37	#include "hw_pbdma_gv11b.h"
33	#include <linux/vmalloc.h>	38	#include <linux/vmalloc.h>
34	#include <linux/tegra_gpu_t19x.h>	39	#include <linux/tegra_gpu_t19x.h>
35		40
@@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a g, struct gr_gk20a gr)
1583	return 0;	1588	return 0;
1584	}	1589	}
1585		1590
1586
1587	static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)	1591	static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
1588	{	1592	{
1589	struct av_list_gk20a *sw_veid_bundle_init =	1593	struct av_list_gk20a *sw_veid_bundle_init =
@@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g)
1766		1770
1767	for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)	1771	for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
1768	gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);	1772	gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
1769
1770	kfree(tpc_sm_id);	1773	kfree(tpc_sm_id);
1771		1774
1772	return 0;	1775	return 0;
1773	}	1776	}
1774		1777
		1778	static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
		1779	{
		1780	u32 addr_lo;
		1781	u32 addr_hi;
		1782	struct ctx_header_desc *ctx;
		1783
		1784	gk20a_dbg_fn("");
		1785
		1786	gv11b_alloc_subctx_header(c);
		1787
		1788	gv11b_update_subctx_header(c, gpu_va);
		1789
		1790	ctx = &c->ch_ctx.ctx_header;
		1791	addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
		1792	addr_hi = u64_hi32(ctx->mem.gpu_va);
		1793
		1794	/* point this address to engine_wfi_ptr */
		1795	gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(),
		1796	ram_in_engine_cs_wfi_v() \|
		1797	ram_in_engine_wfi_target_f(
		1798	ram_in_engine_wfi_target_sys_mem_ncoh_v()) \|
		1799	ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) \|
		1800	ram_in_engine_wfi_ptr_lo_f(addr_lo));
		1801
		1802	gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(),
		1803	ram_in_engine_wfi_ptr_hi_f(addr_hi));
		1804
		1805	return 0;
		1806	}
		1807
		1808
		1809
1775	static int gr_gv11b_commit_global_timeslice(struct gk20a *g,	1810	static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
1776	struct channel_gk20a *c, bool patch)	1811	struct channel_gk20a *c, bool patch)
1777	{	1812	{
@@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
1828	void gv11b_init_gr(struct gpu_ops *gops)	1863	void gv11b_init_gr(struct gpu_ops *gops)
1829	{	1864	{
1830	gp10b_init_gr(gops);	1865	gp10b_init_gr(gops);
		1866	gops->gr.init_preemption_state = NULL;
1831	gops->gr.init_fs_state = gr_gv11b_init_fs_state;	1867	gops->gr.init_fs_state = gr_gv11b_init_fs_state;
1832	gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;	1868	gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
1833	gops->gr.is_valid_class = gr_gv11b_is_valid_class;	1869	gops->gr.is_valid_class = gr_gv11b_is_valid_class;
@@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
1872	gops->gr.load_smid_config = gr_gv11b_load_smid_config;	1908	gops->gr.load_smid_config = gr_gv11b_load_smid_config;
1873	gops->gr.program_sm_id_numbering =	1909	gops->gr.program_sm_id_numbering =
1874	gr_gv11b_program_sm_id_numbering;	1910	gr_gv11b_program_sm_id_numbering;
		1911	gops->gr.commit_inst = gr_gv11b_commit_inst;
		1912
1875	}	1913	}


diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c new file mode 100644 index 00000000..3acc53f6 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
		1	/*
		2	* Volta GPU series Subcontext
		3	*
		4	* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
		5	*
		6	* This program is free software; you can redistribute it and/or modify it
		7	* under the terms and conditions of the GNU General Public License,
		8	* version 2, as published by the Free Software Foundation.
		9	*
		10	* This program is distributed in the hope it will be useful, but WITHOUT
		11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		13	* more details.
		14	*
		15	* You should have received a copy of the GNU General Public License along with
		16	* this program.
		17	*/
		18
		19	#include "gk20a/gk20a.h"
		20	#include "gk20a/semaphore_gk20a.h"
		21	#include "gv11b/subctx_gv11b.h"
		22	#include "gv11b/hw_ram_gv11b.h"
		23	#include "gv11b/hw_ctxsw_prog_gv11b.h"
		24
		25	static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
		26	struct mem_desc *inst_block);
		27
		28	void gv11b_free_subctx_header(struct channel_gk20a *c)
		29	{
		30	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		31	struct gk20a *g = c->g;
		32
		33	gk20a_dbg_fn("");
		34
		35	if (ctx->mem.gpu_va) {
		36	gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
		37	ctx->mem.size, gk20a_mem_flag_none);
		38
		39	gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
		40	}
		41	}
		42
		43	int gv11b_alloc_subctx_header(struct channel_gk20a *c)
		44	{
		45	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		46	struct gk20a *g = c->g;
		47	int ret = 0;
		48
		49	gk20a_dbg_fn("");
		50
		51	if (ctx->mem.gpu_va == 0) {
		52	ret = gk20a_gmmu_alloc_attr_sys(g,
		53	DMA_ATTR_NO_KERNEL_MAPPING,
		54	ctxsw_prog_fecs_header_v(),
		55	&ctx->mem);
		56	if (ret) {
		57	gk20a_err(dev_from_gk20a(g),
		58	"failed to allocate sub ctx header");
		59	return ret;
		60	}
		61	ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
		62	&ctx->mem.sgt,
		63	ctx->mem.size,
		64	NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
		65	gk20a_mem_flag_none, true,
		66	ctx->mem.aperture);
		67	if (!ctx->mem.gpu_va) {
		68	gk20a_err(dev_from_gk20a(g),
		69	"failed to map ctx header");
		70	gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
		71	&ctx->mem);
		72	return -ENOMEM;
		73	}
		74	/* Now clear the buffer */
		75	if (gk20a_mem_begin(g, &ctx->mem))
		76	return -ENOMEM;
		77
		78	gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
		79	gk20a_mem_end(g, &ctx->mem);
		80
		81	gv11b_init_subcontext_pdb(c, &c->inst_block);
		82
		83	}
		84	return ret;
		85	}
		86
		87	static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
		88	struct mem_desc *inst_block)
		89	{
		90	struct gk20a *g = c->g;
		91	struct vm_gk20a *vm;
		92	u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
		93	u32 format_word;
		94	u32 lo, hi;
		95
		96	gk20a_dbg_fn("");
		97	/* load main pdb as veid0 pdb also */
		98	vm = c->vm;
		99	pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
		100	pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
		101	pdb_addr_hi = u64_hi32(pdb_addr);
		102	format_word = ram_in_sc_page_dir_base_target_f(
		103	ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) \|
		104	ram_in_sc_page_dir_base_vol_f(
		105	ram_in_sc_page_dir_base_vol_true_v(), 0) \|
		106	ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) \|
		107	ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) \|
		108	ram_in_sc_use_ver2_pt_format_f(1, 0) \|
		109	ram_in_sc_big_page_size_f(1, 0) \|
		110	ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
		111	lo = ram_in_sc_page_dir_base_vol_0_w();
		112	hi = ram_in_sc_page_dir_base_hi_0_w();
		113	gk20a_mem_wr32(g, inst_block, lo, format_word);
		114	gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
		115
		116	/* make subcontext0 address space to valid */
		117	/* TODO fix proper hw register definations */
		118	gk20a_mem_wr32(g, inst_block, 166, 0x1);
		119	gk20a_mem_wr32(g, inst_block, 167, 0);
		120	gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
		121	ram_in_engine_wfi_veid_f(0));
		122
		123	}
		124
		125	int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
		126	{
		127	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		128	struct mem_desc *gr_mem;
		129	struct gk20a *g = c->g;
		130	int ret = 0;
		131	u32 addr_lo, addr_hi;
		132
		133	addr_lo = u64_lo32(gpu_va);
		134	addr_hi = u64_hi32(gpu_va);
		135
		136	gr_mem = &ctx->mem;
		137	g->ops.mm.l2_flush(g, true);
		138	if (gk20a_mem_begin(g, gr_mem))
		139	return -ENOMEM;
		140
		141	gk20a_mem_wr(g, gr_mem,
		142	ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
		143	gk20a_mem_wr(g, gr_mem,
		144	ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
		145	gk20a_mem_end(g, gr_mem);
		146	return ret;
		147	}


diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h new file mode 100644 index 00000000..357cd254 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
@@ -0,0 +1,27 @@
		1	/*
		2	*
		3	* Volta GPU series Subcontext
		4	*
		5	* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
		6	*
		7	* This program is free software; you can redistribute it and/or modify it
		8	* under the terms and conditions of the GNU General Public License,
		9	* version 2, as published by the Free Software Foundation.
		10	*
		11	* This program is distributed in the hope it will be useful, but WITHOUT
		12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		14	* more details.
		15	*
		16	* You should have received a copy of the GNU General Public License along with
		17	* this program.
		18	*/
		19	#ifndef __SUBCONTEXT_GV11B_H__
		20	#define __SUBCONTEXT_GV11B_H__
		21
		22	int gv11b_alloc_subctx_header(struct channel_gk20a *c);
		23
		24	void gv11b_free_subctx_header(struct channel_gk20a *c);
		25
		26	int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va);
		27	#endif /* __SUBCONTEXT_GV11B_H__ */