summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/Makefile3
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c12
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c42
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c147
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.h27
5 files changed, 228 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 4ea56d8e..c059e464 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -13,6 +13,7 @@ nvgpu-y += \
13 $(nvgpu-t19x)/gv11b/ce_gv11b.o \ 13 $(nvgpu-t19x)/gv11b/ce_gv11b.o \
14 $(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \ 14 $(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \
15 $(nvgpu-t19x)/gv11b/pmu_gv11b.o \ 15 $(nvgpu-t19x)/gv11b/pmu_gv11b.o \
16 $(nvgpu-t19x)/gv11b/therm_gv11b.o 16 $(nvgpu-t19x)/gv11b/therm_gv11b.o \
17 $(nvgpu-t19x)/gv11b/subctx_gv11b.o
17 18
18nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o 19nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index bc413a9e..b9276e09 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -19,6 +19,8 @@
19#include "gp10b/fifo_gp10b.h" 19#include "gp10b/fifo_gp10b.h"
20#include "hw_pbdma_gv11b.h" 20#include "hw_pbdma_gv11b.h"
21#include "fifo_gv11b.h" 21#include "fifo_gv11b.h"
22#include "subctx_gv11b.h"
23#include "gr_gv11b.h"
22#include "hw_fifo_gv11b.h" 24#include "hw_fifo_gv11b.h"
23#include "hw_ram_gv11b.h" 25#include "hw_ram_gv11b.h"
24#include "hw_ccsr_gv11b.h" 26#include "hw_ccsr_gv11b.h"
@@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
200 202
201} 203}
202 204
205static void channel_gv11b_unbind(struct channel_gk20a *ch)
206{
207 gk20a_dbg_fn("");
208
209 gv11b_free_subctx_header(ch);
210
211 channel_gk20a_unbind(ch);
212
213}
203 214
204static u32 gv11b_fifo_get_num_fifos(struct gk20a *g) 215static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)
205{ 216{
@@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops)
218 gops->fifo.userd_gp_get = gv11b_userd_gp_get; 229 gops->fifo.userd_gp_get = gv11b_userd_gp_get;
219 gops->fifo.userd_gp_put = gv11b_userd_gp_put; 230 gops->fifo.userd_gp_put = gv11b_userd_gp_put;
220 gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc; 231 gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;
232 gops->fifo.unbind_channel = channel_gv11b_unbind;
221} 233}
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 7f5b8d3f..bdb96329 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -13,6 +13,7 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#include <linux/tegra_gpu_t19x.h>
16#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ 17#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
17#include <linux/delay.h> 18#include <linux/delay.h>
18#include <linux/tegra-fuse.h> 19#include <linux/tegra-fuse.h>
@@ -24,12 +25,16 @@
24 25
25#include "gm20b/gr_gm20b.h" 26#include "gm20b/gr_gm20b.h"
26#include "gv11b/gr_gv11b.h" 27#include "gv11b/gr_gv11b.h"
28#include "gv11b/mm_gv11b.h"
29#include "gv11b/subctx_gv11b.h"
27#include "hw_gr_gv11b.h" 30#include "hw_gr_gv11b.h"
28#include "hw_fifo_gv11b.h" 31#include "hw_fifo_gv11b.h"
29#include "hw_proj_gv11b.h" 32#include "hw_proj_gv11b.h"
30#include "hw_ctxsw_prog_gv11b.h" 33#include "hw_ctxsw_prog_gv11b.h"
31#include "hw_mc_gv11b.h" 34#include "hw_mc_gv11b.h"
32#include "hw_gr_gv11b.h" 35#include "hw_gr_gv11b.h"
36#include "hw_ram_gv11b.h"
37#include "hw_pbdma_gv11b.h"
33#include <linux/vmalloc.h> 38#include <linux/vmalloc.h>
34#include <linux/tegra_gpu_t19x.h> 39#include <linux/tegra_gpu_t19x.h>
35 40
@@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1583 return 0; 1588 return 0;
1584} 1589}
1585 1590
1586
1587static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) 1591static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
1588{ 1592{
1589 struct av_list_gk20a *sw_veid_bundle_init = 1593 struct av_list_gk20a *sw_veid_bundle_init =
@@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g)
1766 1770
1767 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) 1771 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
1768 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); 1772 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
1769
1770 kfree(tpc_sm_id); 1773 kfree(tpc_sm_id);
1771 1774
1772 return 0; 1775 return 0;
1773} 1776}
1774 1777
1778static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
1779{
1780 u32 addr_lo;
1781 u32 addr_hi;
1782 struct ctx_header_desc *ctx;
1783
1784 gk20a_dbg_fn("");
1785
1786 gv11b_alloc_subctx_header(c);
1787
1788 gv11b_update_subctx_header(c, gpu_va);
1789
1790 ctx = &c->ch_ctx.ctx_header;
1791 addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
1792 addr_hi = u64_hi32(ctx->mem.gpu_va);
1793
1794 /* point this address to engine_wfi_ptr */
1795 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(),
1796 ram_in_engine_cs_wfi_v() |
1797 ram_in_engine_wfi_target_f(
1798 ram_in_engine_wfi_target_sys_mem_ncoh_v()) |
1799 ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) |
1800 ram_in_engine_wfi_ptr_lo_f(addr_lo));
1801
1802 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(),
1803 ram_in_engine_wfi_ptr_hi_f(addr_hi));
1804
1805 return 0;
1806}
1807
1808
1809
1775static int gr_gv11b_commit_global_timeslice(struct gk20a *g, 1810static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
1776 struct channel_gk20a *c, bool patch) 1811 struct channel_gk20a *c, bool patch)
1777{ 1812{
@@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
1828void gv11b_init_gr(struct gpu_ops *gops) 1863void gv11b_init_gr(struct gpu_ops *gops)
1829{ 1864{
1830 gp10b_init_gr(gops); 1865 gp10b_init_gr(gops);
1866 gops->gr.init_preemption_state = NULL;
1831 gops->gr.init_fs_state = gr_gv11b_init_fs_state; 1867 gops->gr.init_fs_state = gr_gv11b_init_fs_state;
1832 gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; 1868 gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
1833 gops->gr.is_valid_class = gr_gv11b_is_valid_class; 1869 gops->gr.is_valid_class = gr_gv11b_is_valid_class;
@@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
1872 gops->gr.load_smid_config = gr_gv11b_load_smid_config; 1908 gops->gr.load_smid_config = gr_gv11b_load_smid_config;
1873 gops->gr.program_sm_id_numbering = 1909 gops->gr.program_sm_id_numbering =
1874 gr_gv11b_program_sm_id_numbering; 1910 gr_gv11b_program_sm_id_numbering;
1911 gops->gr.commit_inst = gr_gv11b_commit_inst;
1912
1875} 1913}
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
new file mode 100644
index 00000000..3acc53f6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
1/*
2 * Volta GPU series Subcontext
3 *
4 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program.
17 */
18
19#include "gk20a/gk20a.h"
20#include "gk20a/semaphore_gk20a.h"
21#include "gv11b/subctx_gv11b.h"
22#include "gv11b/hw_ram_gv11b.h"
23#include "gv11b/hw_ctxsw_prog_gv11b.h"
24
25static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
26 struct mem_desc *inst_block);
27
28void gv11b_free_subctx_header(struct channel_gk20a *c)
29{
30 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
31 struct gk20a *g = c->g;
32
33 gk20a_dbg_fn("");
34
35 if (ctx->mem.gpu_va) {
36 gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
37 ctx->mem.size, gk20a_mem_flag_none);
38
39 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
40 }
41}
42
43int gv11b_alloc_subctx_header(struct channel_gk20a *c)
44{
45 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
46 struct gk20a *g = c->g;
47 int ret = 0;
48
49 gk20a_dbg_fn("");
50
51 if (ctx->mem.gpu_va == 0) {
52 ret = gk20a_gmmu_alloc_attr_sys(g,
53 DMA_ATTR_NO_KERNEL_MAPPING,
54 ctxsw_prog_fecs_header_v(),
55 &ctx->mem);
56 if (ret) {
57 gk20a_err(dev_from_gk20a(g),
58 "failed to allocate sub ctx header");
59 return ret;
60 }
61 ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
62 &ctx->mem.sgt,
63 ctx->mem.size,
64 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
65 gk20a_mem_flag_none, true,
66 ctx->mem.aperture);
67 if (!ctx->mem.gpu_va) {
68 gk20a_err(dev_from_gk20a(g),
69 "failed to map ctx header");
70 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
71 &ctx->mem);
72 return -ENOMEM;
73 }
74 /* Now clear the buffer */
75 if (gk20a_mem_begin(g, &ctx->mem))
76 return -ENOMEM;
77
78 gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
79 gk20a_mem_end(g, &ctx->mem);
80
81 gv11b_init_subcontext_pdb(c, &c->inst_block);
82
83 }
84 return ret;
85}
86
87static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
88 struct mem_desc *inst_block)
89{
90 struct gk20a *g = c->g;
91 struct vm_gk20a *vm;
92 u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
93 u32 format_word;
94 u32 lo, hi;
95
96 gk20a_dbg_fn("");
97 /* load main pdb as veid0 pdb also */
98 vm = c->vm;
99 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
100 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
101 pdb_addr_hi = u64_hi32(pdb_addr);
102 format_word = ram_in_sc_page_dir_base_target_f(
103 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) |
104 ram_in_sc_page_dir_base_vol_f(
105 ram_in_sc_page_dir_base_vol_true_v(), 0) |
106 ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) |
107 ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) |
108 ram_in_sc_use_ver2_pt_format_f(1, 0) |
109 ram_in_sc_big_page_size_f(1, 0) |
110 ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
111 lo = ram_in_sc_page_dir_base_vol_0_w();
112 hi = ram_in_sc_page_dir_base_hi_0_w();
113 gk20a_mem_wr32(g, inst_block, lo, format_word);
114 gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
115
116 /* make subcontext0 address space to valid */
117 /* TODO fix proper hw register definations */
118 gk20a_mem_wr32(g, inst_block, 166, 0x1);
119 gk20a_mem_wr32(g, inst_block, 167, 0);
120 gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
121 ram_in_engine_wfi_veid_f(0));
122
123}
124
125int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
126{
127 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
128 struct mem_desc *gr_mem;
129 struct gk20a *g = c->g;
130 int ret = 0;
131 u32 addr_lo, addr_hi;
132
133 addr_lo = u64_lo32(gpu_va);
134 addr_hi = u64_hi32(gpu_va);
135
136 gr_mem = &ctx->mem;
137 g->ops.mm.l2_flush(g, true);
138 if (gk20a_mem_begin(g, gr_mem))
139 return -ENOMEM;
140
141 gk20a_mem_wr(g, gr_mem,
142 ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
143 gk20a_mem_wr(g, gr_mem,
144 ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
145 gk20a_mem_end(g, gr_mem);
146 return ret;
147}
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
new file mode 100644
index 00000000..357cd254
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
@@ -0,0 +1,27 @@
1/*
2 *
3 * Volta GPU series Subcontext
4 *
5 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program.
18 */
19#ifndef __SUBCONTEXT_GV11B_H__
20#define __SUBCONTEXT_GV11B_H__
21
22int gv11b_alloc_subctx_header(struct channel_gk20a *c);
23
24void gv11b_free_subctx_header(struct channel_gk20a *c);
25
26int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va);
27#endif /* __SUBCONTEXT_GV11B_H__ */