5 files changed, 228 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 4ea56d8e..c059e464 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -13,6 +13,7 @@ nvgpu-y += \
        $(nvgpu-t19x)/gv11b/ce_gv11b.o \
        $(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \
        $(nvgpu-t19x)/gv11b/pmu_gv11b.o \
-        $(nvgpu-t19x)/gv11b/therm_gv11b.o
+        $(nvgpu-t19x)/gv11b/therm_gv11b.o \
+        $(nvgpu-t19x)/gv11b/subctx_gv11b.o
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index bc413a9e..b9276e09 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -19,6 +19,8 @@
 #include "gp10b/fifo_gp10b.h"
 #include "hw_pbdma_gv11b.h"
 #include "fifo_gv11b.h"
+#include "subctx_gv11b.h"
+#include "gr_gv11b.h"
 #include "hw_fifo_gv11b.h"
 #include "hw_ram_gv11b.h"
 #include "hw_ccsr_gv11b.h"
@@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
 }
+static void channel_gv11b_unbind(struct channel_gk20a *ch)
+{
+        gk20a_dbg_fn("");
+        gv11b_free_subctx_header(ch);
+        channel_gk20a_unbind(ch);
+}
 static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)
 {
@@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops)
        gops->fifo.userd_gp_get = gv11b_userd_gp_get;
        gops->fifo.userd_gp_put = gv11b_userd_gp_put;
        gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;
+        gops->fifo.unbind_channel = channel_gv11b_unbind;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 7f5b8d3f..bdb96329 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -13,6 +13,7 @@
 * more details.
 */
+#include <linux/tegra_gpu_t19x.h>
 #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
 #include <linux/delay.h>
 #include <linux/tegra-fuse.h>
@@ -24,12 +25,16 @@
 #include "gm20b/gr_gm20b.h"
 #include "gv11b/gr_gv11b.h"
+#include "gv11b/mm_gv11b.h"
+#include "gv11b/subctx_gv11b.h"
 #include "hw_gr_gv11b.h"
 #include "hw_fifo_gv11b.h"
 #include "hw_proj_gv11b.h"
 #include "hw_ctxsw_prog_gv11b.h"
 #include "hw_mc_gv11b.h"
 #include "hw_gr_gv11b.h"
+#include "hw_ram_gv11b.h"
+#include "hw_pbdma_gv11b.h"
 #include <linux/vmalloc.h>
 #include <linux/tegra_gpu_t19x.h>
@@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
        return 0;
 }
 static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
 {
        struct av_list_gk20a *sw_veid_bundle_init =
@@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g)
        for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
                gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
        kfree(tpc_sm_id);
        return 0;
 }
+static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
+{
+        u32 addr_lo;
+        u32 addr_hi;
+        struct ctx_header_desc *ctx;
+        gk20a_dbg_fn("");
+        gv11b_alloc_subctx_header(c);
+        gv11b_update_subctx_header(c, gpu_va);
+        ctx = &c->ch_ctx.ctx_header;
+        addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
+        addr_hi = u64_hi32(ctx->mem.gpu_va);
+        /* point this address to engine_wfi_ptr */
+        gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(),
+                ram_in_engine_cs_wfi_v() |
+                ram_in_engine_wfi_target_f(
+                        ram_in_engine_wfi_target_sys_mem_ncoh_v()) |
+                ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) |
+                ram_in_engine_wfi_ptr_lo_f(addr_lo));
+        gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(),
+                ram_in_engine_wfi_ptr_hi_f(addr_hi));
+        return 0;
+}
 static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
                                        struct channel_gk20a *c, bool patch)
 {
@@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
 void gv11b_init_gr(struct gpu_ops *gops)
 {
        gp10b_init_gr(gops);
+        gops->gr.init_preemption_state = NULL;
        gops->gr.init_fs_state = gr_gv11b_init_fs_state;
        gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
        gops->gr.is_valid_class = gr_gv11b_is_valid_class;
@@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
        gops->gr.load_smid_config = gr_gv11b_load_smid_config;
        gops->gr.program_sm_id_numbering =
                        gr_gv11b_program_sm_id_numbering;
+        gops->gr.commit_inst = gr_gv11b_commit_inst;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
new file mode 100644
index 00000000..3acc53f6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
+/*
+ * Volta GPU series Subcontext
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+#include "gk20a/gk20a.h"
+#include "gk20a/semaphore_gk20a.h"
+#include "gv11b/subctx_gv11b.h"
+#include "gv11b/hw_ram_gv11b.h"
+#include "gv11b/hw_ctxsw_prog_gv11b.h"
+static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
+                                struct mem_desc *inst_block);
+void gv11b_free_subctx_header(struct channel_gk20a *c)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct gk20a *g = c->g;
+        gk20a_dbg_fn("");
+        if (ctx->mem.gpu_va) {
+                gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
+                        ctx->mem.size, gk20a_mem_flag_none);
+                gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
+        }
+}
+int gv11b_alloc_subctx_header(struct channel_gk20a *c)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct gk20a *g = c->g;
+        int ret = 0;
+        gk20a_dbg_fn("");
+        if (ctx->mem.gpu_va == 0) {
+                ret = gk20a_gmmu_alloc_attr_sys(g,
+                                DMA_ATTR_NO_KERNEL_MAPPING,
+                                ctxsw_prog_fecs_header_v(),
+                                &ctx->mem);
+                if (ret) {
+                        gk20a_err(dev_from_gk20a(g),
+                                "failed to allocate sub ctx header");
+                        return ret;
+                }
+                ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
+                                        &ctx->mem.sgt,
+                                        ctx->mem.size,
+                                        NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+                                        gk20a_mem_flag_none, true,
+                                        ctx->mem.aperture);
+                if (!ctx->mem.gpu_va) {
+                        gk20a_err(dev_from_gk20a(g),
+                                "failed to map ctx header");
+                        gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
+                                        &ctx->mem);
+                        return -ENOMEM;
+                }
+                /* Now clear the buffer */
+                if (gk20a_mem_begin(g, &ctx->mem))
+                        return -ENOMEM;
+                gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
+                gk20a_mem_end(g, &ctx->mem);
+                gv11b_init_subcontext_pdb(c, &c->inst_block);
+        }
+        return ret;
+}
+static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
+                                struct mem_desc *inst_block)
+{
+        struct gk20a *g = c->g;
+        struct vm_gk20a *vm;
+        u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
+        u32 format_word;
+        u32 lo, hi;
+        gk20a_dbg_fn("");
+        /* load main pdb as veid0 pdb also */
+        vm = c->vm;
+        pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
+        pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
+        pdb_addr_hi = u64_hi32(pdb_addr);
+        format_word = ram_in_sc_page_dir_base_target_f(
+                ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) |
+                ram_in_sc_page_dir_base_vol_f(
+                ram_in_sc_page_dir_base_vol_true_v(), 0) |
+                ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) |
+                ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) |
+                ram_in_sc_use_ver2_pt_format_f(1, 0) |
+                ram_in_sc_big_page_size_f(1, 0) |
+                ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
+        lo = ram_in_sc_page_dir_base_vol_0_w();
+        hi = ram_in_sc_page_dir_base_hi_0_w();
+        gk20a_mem_wr32(g, inst_block, lo, format_word);
+        gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
+        /* make subcontext0 address space to valid */
+        /* TODO fix proper hw register definations */
+        gk20a_mem_wr32(g, inst_block, 166, 0x1);
+        gk20a_mem_wr32(g, inst_block, 167, 0);
+        gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
+                        ram_in_engine_wfi_veid_f(0));
+}
+int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct mem_desc *gr_mem;
+        struct gk20a *g = c->g;
+        int ret = 0;
+        u32 addr_lo, addr_hi;
+        addr_lo = u64_lo32(gpu_va);
+        addr_hi = u64_hi32(gpu_va);
+        gr_mem = &ctx->mem;
+        g->ops.mm.l2_flush(g, true);
+        if (gk20a_mem_begin(g, gr_mem))
+                return -ENOMEM;
+        gk20a_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
+        gk20a_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
+        gk20a_mem_end(g, gr_mem);
+        return ret;
+}
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
new file mode 100644
index 00000000..357cd254
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * Volta GPU series Subcontext
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+#ifndef __SUBCONTEXT_GV11B_H__
+#define __SUBCONTEXT_GV11B_H__
+int gv11b_alloc_subctx_header(struct channel_gk20a *c);
+void gv11b_free_subctx_header(struct channel_gk20a *c);
+int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va);
+#endif /* __SUBCONTEXT_GV11B_H__ */

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 4ea56d8e..c059e464 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile
@@ -13,6 +13,7 @@ nvgpu-y += \
13	$(nvgpu-t19x)/gv11b/ce_gv11b.o \	13	$(nvgpu-t19x)/gv11b/ce_gv11b.o \
14	$(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \	14	$(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \
15	$(nvgpu-t19x)/gv11b/pmu_gv11b.o \	15	$(nvgpu-t19x)/gv11b/pmu_gv11b.o \
16	$(nvgpu-t19x)/gv11b/therm_gv11b.o	16	$(nvgpu-t19x)/gv11b/therm_gv11b.o \
		17	$(nvgpu-t19x)/gv11b/subctx_gv11b.o
17		18
18	nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o	19	nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o


diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index bc413a9e..b9276e09 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -19,6 +19,8 @@
19	#include "gp10b/fifo_gp10b.h"	19	#include "gp10b/fifo_gp10b.h"
20	#include "hw_pbdma_gv11b.h"	20	#include "hw_pbdma_gv11b.h"
21	#include "fifo_gv11b.h"	21	#include "fifo_gv11b.h"
		22	#include "subctx_gv11b.h"
		23	#include "gr_gv11b.h"
22	#include "hw_fifo_gv11b.h"	24	#include "hw_fifo_gv11b.h"
23	#include "hw_ram_gv11b.h"	25	#include "hw_ram_gv11b.h"
24	#include "hw_ccsr_gv11b.h"	26	#include "hw_ccsr_gv11b.h"
@@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a g, struct channel_gk20a c)
200		202
201	}	203	}
202		204
		205	static void channel_gv11b_unbind(struct channel_gk20a *ch)
		206	{
		207	gk20a_dbg_fn("");
		208
		209	gv11b_free_subctx_header(ch);
		210
		211	channel_gk20a_unbind(ch);
		212
		213	}
203		214
204	static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)	215	static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)
205	{	216	{
@@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops)
218	gops->fifo.userd_gp_get = gv11b_userd_gp_get;	229	gops->fifo.userd_gp_get = gv11b_userd_gp_get;
219	gops->fifo.userd_gp_put = gv11b_userd_gp_put;	230	gops->fifo.userd_gp_put = gv11b_userd_gp_put;
220	gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;	231	gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;
		232	gops->fifo.unbind_channel = channel_gv11b_unbind;
221	}	233	}


diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7f5b8d3f..bdb96329 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -13,6 +13,7 @@
13	* more details.	13	* more details.
14	*/	14	*/
15		15
		16	#include <linux/tegra_gpu_t19x.h>
16	#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */	17	#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
17	#include <linux/delay.h>	18	#include <linux/delay.h>
18	#include <linux/tegra-fuse.h>	19	#include <linux/tegra-fuse.h>
@@ -24,12 +25,16 @@
24		25
25	#include "gm20b/gr_gm20b.h"	26	#include "gm20b/gr_gm20b.h"
26	#include "gv11b/gr_gv11b.h"	27	#include "gv11b/gr_gv11b.h"
		28	#include "gv11b/mm_gv11b.h"
		29	#include "gv11b/subctx_gv11b.h"
27	#include "hw_gr_gv11b.h"	30	#include "hw_gr_gv11b.h"
28	#include "hw_fifo_gv11b.h"	31	#include "hw_fifo_gv11b.h"
29	#include "hw_proj_gv11b.h"	32	#include "hw_proj_gv11b.h"
30	#include "hw_ctxsw_prog_gv11b.h"	33	#include "hw_ctxsw_prog_gv11b.h"
31	#include "hw_mc_gv11b.h"	34	#include "hw_mc_gv11b.h"
32	#include "hw_gr_gv11b.h"	35	#include "hw_gr_gv11b.h"
		36	#include "hw_ram_gv11b.h"
		37	#include "hw_pbdma_gv11b.h"
33	#include <linux/vmalloc.h>	38	#include <linux/vmalloc.h>
34	#include <linux/tegra_gpu_t19x.h>	39	#include <linux/tegra_gpu_t19x.h>
35		40
@@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a g, struct gr_gk20a gr)
1583	return 0;	1588	return 0;
1584	}	1589	}
1585		1590
1586
1587	static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)	1591	static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
1588	{	1592	{
1589	struct av_list_gk20a *sw_veid_bundle_init =	1593	struct av_list_gk20a *sw_veid_bundle_init =
@@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g)
1766		1770
1767	for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)	1771	for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
1768	gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);	1772	gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
1769
1770	kfree(tpc_sm_id);	1773	kfree(tpc_sm_id);
1771		1774
1772	return 0;	1775	return 0;
1773	}	1776	}
1774		1777
		1778	static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
		1779	{
		1780	u32 addr_lo;
		1781	u32 addr_hi;
		1782	struct ctx_header_desc *ctx;
		1783
		1784	gk20a_dbg_fn("");
		1785
		1786	gv11b_alloc_subctx_header(c);
		1787
		1788	gv11b_update_subctx_header(c, gpu_va);
		1789
		1790	ctx = &c->ch_ctx.ctx_header;
		1791	addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
		1792	addr_hi = u64_hi32(ctx->mem.gpu_va);
		1793
		1794	/* point this address to engine_wfi_ptr */
		1795	gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(),
		1796	ram_in_engine_cs_wfi_v() \|
		1797	ram_in_engine_wfi_target_f(
		1798	ram_in_engine_wfi_target_sys_mem_ncoh_v()) \|
		1799	ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) \|
		1800	ram_in_engine_wfi_ptr_lo_f(addr_lo));
		1801
		1802	gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(),
		1803	ram_in_engine_wfi_ptr_hi_f(addr_hi));
		1804
		1805	return 0;
		1806	}
		1807
		1808
		1809
1775	static int gr_gv11b_commit_global_timeslice(struct gk20a *g,	1810	static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
1776	struct channel_gk20a *c, bool patch)	1811	struct channel_gk20a *c, bool patch)
1777	{	1812	{
@@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
1828	void gv11b_init_gr(struct gpu_ops *gops)	1863	void gv11b_init_gr(struct gpu_ops *gops)
1829	{	1864	{
1830	gp10b_init_gr(gops);	1865	gp10b_init_gr(gops);
		1866	gops->gr.init_preemption_state = NULL;
1831	gops->gr.init_fs_state = gr_gv11b_init_fs_state;	1867	gops->gr.init_fs_state = gr_gv11b_init_fs_state;
1832	gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;	1868	gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
1833	gops->gr.is_valid_class = gr_gv11b_is_valid_class;	1869	gops->gr.is_valid_class = gr_gv11b_is_valid_class;
@@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
1872	gops->gr.load_smid_config = gr_gv11b_load_smid_config;	1908	gops->gr.load_smid_config = gr_gv11b_load_smid_config;
1873	gops->gr.program_sm_id_numbering =	1909	gops->gr.program_sm_id_numbering =
1874	gr_gv11b_program_sm_id_numbering;	1910	gr_gv11b_program_sm_id_numbering;
		1911	gops->gr.commit_inst = gr_gv11b_commit_inst;
		1912
1875	}	1913	}


diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c new file mode 100644 index 00000000..3acc53f6 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
		1	/*
		2	* Volta GPU series Subcontext
		3	*
		4	* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
		5	*
		6	* This program is free software; you can redistribute it and/or modify it
		7	* under the terms and conditions of the GNU General Public License,
		8	* version 2, as published by the Free Software Foundation.
		9	*
		10	* This program is distributed in the hope it will be useful, but WITHOUT
		11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		13	* more details.
		14	*
		15	* You should have received a copy of the GNU General Public License along with
		16	* this program.
		17	*/
		18
		19	#include "gk20a/gk20a.h"
		20	#include "gk20a/semaphore_gk20a.h"
		21	#include "gv11b/subctx_gv11b.h"
		22	#include "gv11b/hw_ram_gv11b.h"
		23	#include "gv11b/hw_ctxsw_prog_gv11b.h"
		24
		25	static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
		26	struct mem_desc *inst_block);
		27
		28	void gv11b_free_subctx_header(struct channel_gk20a *c)
		29	{
		30	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		31	struct gk20a *g = c->g;
		32
		33	gk20a_dbg_fn("");
		34
		35	if (ctx->mem.gpu_va) {
		36	gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
		37	ctx->mem.size, gk20a_mem_flag_none);
		38
		39	gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
		40	}
		41	}
		42
		43	int gv11b_alloc_subctx_header(struct channel_gk20a *c)
		44	{
		45	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		46	struct gk20a *g = c->g;
		47	int ret = 0;
		48
		49	gk20a_dbg_fn("");
		50
		51	if (ctx->mem.gpu_va == 0) {
		52	ret = gk20a_gmmu_alloc_attr_sys(g,
		53	DMA_ATTR_NO_KERNEL_MAPPING,
		54	ctxsw_prog_fecs_header_v(),
		55	&ctx->mem);
		56	if (ret) {
		57	gk20a_err(dev_from_gk20a(g),
		58	"failed to allocate sub ctx header");
		59	return ret;
		60	}
		61	ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
		62	&ctx->mem.sgt,
		63	ctx->mem.size,
		64	NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
		65	gk20a_mem_flag_none, true,
		66	ctx->mem.aperture);
		67	if (!ctx->mem.gpu_va) {
		68	gk20a_err(dev_from_gk20a(g),
		69	"failed to map ctx header");
		70	gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
		71	&ctx->mem);
		72	return -ENOMEM;
		73	}
		74	/* Now clear the buffer */
		75	if (gk20a_mem_begin(g, &ctx->mem))
		76	return -ENOMEM;
		77
		78	gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
		79	gk20a_mem_end(g, &ctx->mem);
		80
		81	gv11b_init_subcontext_pdb(c, &c->inst_block);
		82
		83	}
		84	return ret;
		85	}
		86
		87	static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
		88	struct mem_desc *inst_block)
		89	{
		90	struct gk20a *g = c->g;
		91	struct vm_gk20a *vm;
		92	u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
		93	u32 format_word;
		94	u32 lo, hi;
		95
		96	gk20a_dbg_fn("");
		97	/* load main pdb as veid0 pdb also */
		98	vm = c->vm;
		99	pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
		100	pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
		101	pdb_addr_hi = u64_hi32(pdb_addr);
		102	format_word = ram_in_sc_page_dir_base_target_f(
		103	ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) \|
		104	ram_in_sc_page_dir_base_vol_f(
		105	ram_in_sc_page_dir_base_vol_true_v(), 0) \|
		106	ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) \|
		107	ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) \|
		108	ram_in_sc_use_ver2_pt_format_f(1, 0) \|
		109	ram_in_sc_big_page_size_f(1, 0) \|
		110	ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
		111	lo = ram_in_sc_page_dir_base_vol_0_w();
		112	hi = ram_in_sc_page_dir_base_hi_0_w();
		113	gk20a_mem_wr32(g, inst_block, lo, format_word);
		114	gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
		115
		116	/* make subcontext0 address space to valid */
		117	/* TODO fix proper hw register definations */
		118	gk20a_mem_wr32(g, inst_block, 166, 0x1);
		119	gk20a_mem_wr32(g, inst_block, 167, 0);
		120	gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
		121	ram_in_engine_wfi_veid_f(0));
		122
		123	}
		124
		125	int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
		126	{
		127	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		128	struct mem_desc *gr_mem;
		129	struct gk20a *g = c->g;
		130	int ret = 0;
		131	u32 addr_lo, addr_hi;
		132
		133	addr_lo = u64_lo32(gpu_va);
		134	addr_hi = u64_hi32(gpu_va);
		135
		136	gr_mem = &ctx->mem;
		137	g->ops.mm.l2_flush(g, true);
		138	if (gk20a_mem_begin(g, gr_mem))
		139	return -ENOMEM;
		140
		141	gk20a_mem_wr(g, gr_mem,
		142	ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
		143	gk20a_mem_wr(g, gr_mem,
		144	ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
		145	gk20a_mem_end(g, gr_mem);
		146	return ret;
		147	}


diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h new file mode 100644 index 00000000..357cd254 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
@@ -0,0 +1,27 @@
		1	/*
		2	*
		3	* Volta GPU series Subcontext
		4	*
		5	* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
		6	*
		7	* This program is free software; you can redistribute it and/or modify it
		8	* under the terms and conditions of the GNU General Public License,
		9	* version 2, as published by the Free Software Foundation.
		10	*
		11	* This program is distributed in the hope it will be useful, but WITHOUT
		12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		14	* more details.
		15	*
		16	* You should have received a copy of the GNU General Public License along with
		17	* this program.
		18	*/
		19	#ifndef __SUBCONTEXT_GV11B_H__
		20	#define __SUBCONTEXT_GV11B_H__
		21
		22	int gv11b_alloc_subctx_header(struct channel_gk20a *c);
		23
		24	void gv11b_free_subctx_header(struct channel_gk20a *c);
		25
		26	int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va);
		27	#endif /* __SUBCONTEXT_GV11B_H__ */