summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2016-11-16 15:25:08 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2016-12-20 18:15:14 -0500
commit67b54c3c76cfa488e1d102e5c74a32b3aaba9287 (patch)
treee56f4b6b11327710cd5665c241c4e454f16626e2 /drivers/gpu/nvgpu/gv11b
parent26199ffdf5bb5cf1d5a810c63904cc950314fe7b (diff)
gpu: nvgpu: legacy support with subcontext
gv11b needs atleast one subcontext to submit work. To support legacy in gv11b, currently main context is always copied into subcontext0 (veid0) during channel commit instance. As part of channel commit instance, veid0 for that channel is created and relevant pdb and context info copied to vedi0. JIRA GV11B-21 Change-Id: I5147a1708b5e94202fa55e73fa0e53199ab7fced Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1231169 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c12
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c42
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c147
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.h27
4 files changed, 226 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index bc413a9e..b9276e09 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -19,6 +19,8 @@
19#include "gp10b/fifo_gp10b.h" 19#include "gp10b/fifo_gp10b.h"
20#include "hw_pbdma_gv11b.h" 20#include "hw_pbdma_gv11b.h"
21#include "fifo_gv11b.h" 21#include "fifo_gv11b.h"
22#include "subctx_gv11b.h"
23#include "gr_gv11b.h"
22#include "hw_fifo_gv11b.h" 24#include "hw_fifo_gv11b.h"
23#include "hw_ram_gv11b.h" 25#include "hw_ram_gv11b.h"
24#include "hw_ccsr_gv11b.h" 26#include "hw_ccsr_gv11b.h"
@@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
200 202
201} 203}
202 204
205static void channel_gv11b_unbind(struct channel_gk20a *ch)
206{
207 gk20a_dbg_fn("");
208
209 gv11b_free_subctx_header(ch);
210
211 channel_gk20a_unbind(ch);
212
213}
203 214
204static u32 gv11b_fifo_get_num_fifos(struct gk20a *g) 215static u32 gv11b_fifo_get_num_fifos(struct gk20a *g)
205{ 216{
@@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops)
218 gops->fifo.userd_gp_get = gv11b_userd_gp_get; 229 gops->fifo.userd_gp_get = gv11b_userd_gp_get;
219 gops->fifo.userd_gp_put = gv11b_userd_gp_put; 230 gops->fifo.userd_gp_put = gv11b_userd_gp_put;
220 gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc; 231 gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc;
232 gops->fifo.unbind_channel = channel_gv11b_unbind;
221} 233}
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 7f5b8d3f..bdb96329 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -13,6 +13,7 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#include <linux/tegra_gpu_t19x.h>
16#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ 17#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
17#include <linux/delay.h> 18#include <linux/delay.h>
18#include <linux/tegra-fuse.h> 19#include <linux/tegra-fuse.h>
@@ -24,12 +25,16 @@
24 25
25#include "gm20b/gr_gm20b.h" 26#include "gm20b/gr_gm20b.h"
26#include "gv11b/gr_gv11b.h" 27#include "gv11b/gr_gv11b.h"
28#include "gv11b/mm_gv11b.h"
29#include "gv11b/subctx_gv11b.h"
27#include "hw_gr_gv11b.h" 30#include "hw_gr_gv11b.h"
28#include "hw_fifo_gv11b.h" 31#include "hw_fifo_gv11b.h"
29#include "hw_proj_gv11b.h" 32#include "hw_proj_gv11b.h"
30#include "hw_ctxsw_prog_gv11b.h" 33#include "hw_ctxsw_prog_gv11b.h"
31#include "hw_mc_gv11b.h" 34#include "hw_mc_gv11b.h"
32#include "hw_gr_gv11b.h" 35#include "hw_gr_gv11b.h"
36#include "hw_ram_gv11b.h"
37#include "hw_pbdma_gv11b.h"
33#include <linux/vmalloc.h> 38#include <linux/vmalloc.h>
34#include <linux/tegra_gpu_t19x.h> 39#include <linux/tegra_gpu_t19x.h>
35 40
@@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1583 return 0; 1588 return 0;
1584} 1589}
1585 1590
1586
1587static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) 1591static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
1588{ 1592{
1589 struct av_list_gk20a *sw_veid_bundle_init = 1593 struct av_list_gk20a *sw_veid_bundle_init =
@@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g)
1766 1770
1767 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) 1771 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
1768 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); 1772 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
1769
1770 kfree(tpc_sm_id); 1773 kfree(tpc_sm_id);
1771 1774
1772 return 0; 1775 return 0;
1773} 1776}
1774 1777
1778static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
1779{
1780 u32 addr_lo;
1781 u32 addr_hi;
1782 struct ctx_header_desc *ctx;
1783
1784 gk20a_dbg_fn("");
1785
1786 gv11b_alloc_subctx_header(c);
1787
1788 gv11b_update_subctx_header(c, gpu_va);
1789
1790 ctx = &c->ch_ctx.ctx_header;
1791 addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
1792 addr_hi = u64_hi32(ctx->mem.gpu_va);
1793
1794 /* point this address to engine_wfi_ptr */
1795 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(),
1796 ram_in_engine_cs_wfi_v() |
1797 ram_in_engine_wfi_target_f(
1798 ram_in_engine_wfi_target_sys_mem_ncoh_v()) |
1799 ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) |
1800 ram_in_engine_wfi_ptr_lo_f(addr_lo));
1801
1802 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(),
1803 ram_in_engine_wfi_ptr_hi_f(addr_hi));
1804
1805 return 0;
1806}
1807
1808
1809
1775static int gr_gv11b_commit_global_timeslice(struct gk20a *g, 1810static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
1776 struct channel_gk20a *c, bool patch) 1811 struct channel_gk20a *c, bool patch)
1777{ 1812{
@@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
1828void gv11b_init_gr(struct gpu_ops *gops) 1863void gv11b_init_gr(struct gpu_ops *gops)
1829{ 1864{
1830 gp10b_init_gr(gops); 1865 gp10b_init_gr(gops);
1866 gops->gr.init_preemption_state = NULL;
1831 gops->gr.init_fs_state = gr_gv11b_init_fs_state; 1867 gops->gr.init_fs_state = gr_gv11b_init_fs_state;
1832 gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; 1868 gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
1833 gops->gr.is_valid_class = gr_gv11b_is_valid_class; 1869 gops->gr.is_valid_class = gr_gv11b_is_valid_class;
@@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
1872 gops->gr.load_smid_config = gr_gv11b_load_smid_config; 1908 gops->gr.load_smid_config = gr_gv11b_load_smid_config;
1873 gops->gr.program_sm_id_numbering = 1909 gops->gr.program_sm_id_numbering =
1874 gr_gv11b_program_sm_id_numbering; 1910 gr_gv11b_program_sm_id_numbering;
1911 gops->gr.commit_inst = gr_gv11b_commit_inst;
1912
1875} 1913}
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
new file mode 100644
index 00000000..3acc53f6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -0,0 +1,147 @@
1/*
2 * Volta GPU series Subcontext
3 *
4 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program.
17 */
18
19#include "gk20a/gk20a.h"
20#include "gk20a/semaphore_gk20a.h"
21#include "gv11b/subctx_gv11b.h"
22#include "gv11b/hw_ram_gv11b.h"
23#include "gv11b/hw_ctxsw_prog_gv11b.h"
24
25static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
26 struct mem_desc *inst_block);
27
28void gv11b_free_subctx_header(struct channel_gk20a *c)
29{
30 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
31 struct gk20a *g = c->g;
32
33 gk20a_dbg_fn("");
34
35 if (ctx->mem.gpu_va) {
36 gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va,
37 ctx->mem.size, gk20a_mem_flag_none);
38
39 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem);
40 }
41}
42
43int gv11b_alloc_subctx_header(struct channel_gk20a *c)
44{
45 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
46 struct gk20a *g = c->g;
47 int ret = 0;
48
49 gk20a_dbg_fn("");
50
51 if (ctx->mem.gpu_va == 0) {
52 ret = gk20a_gmmu_alloc_attr_sys(g,
53 DMA_ATTR_NO_KERNEL_MAPPING,
54 ctxsw_prog_fecs_header_v(),
55 &ctx->mem);
56 if (ret) {
57 gk20a_err(dev_from_gk20a(g),
58 "failed to allocate sub ctx header");
59 return ret;
60 }
61 ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
62 &ctx->mem.sgt,
63 ctx->mem.size,
64 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
65 gk20a_mem_flag_none, true,
66 ctx->mem.aperture);
67 if (!ctx->mem.gpu_va) {
68 gk20a_err(dev_from_gk20a(g),
69 "failed to map ctx header");
70 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
71 &ctx->mem);
72 return -ENOMEM;
73 }
74 /* Now clear the buffer */
75 if (gk20a_mem_begin(g, &ctx->mem))
76 return -ENOMEM;
77
78 gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
79 gk20a_mem_end(g, &ctx->mem);
80
81 gv11b_init_subcontext_pdb(c, &c->inst_block);
82
83 }
84 return ret;
85}
86
87static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
88 struct mem_desc *inst_block)
89{
90 struct gk20a *g = c->g;
91 struct vm_gk20a *vm;
92 u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
93 u32 format_word;
94 u32 lo, hi;
95
96 gk20a_dbg_fn("");
97 /* load main pdb as veid0 pdb also */
98 vm = c->vm;
99 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
100 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
101 pdb_addr_hi = u64_hi32(pdb_addr);
102 format_word = ram_in_sc_page_dir_base_target_f(
103 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) |
104 ram_in_sc_page_dir_base_vol_f(
105 ram_in_sc_page_dir_base_vol_true_v(), 0) |
106 ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) |
107 ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) |
108 ram_in_sc_use_ver2_pt_format_f(1, 0) |
109 ram_in_sc_big_page_size_f(1, 0) |
110 ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
111 lo = ram_in_sc_page_dir_base_vol_0_w();
112 hi = ram_in_sc_page_dir_base_hi_0_w();
113 gk20a_mem_wr32(g, inst_block, lo, format_word);
114 gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi);
115
116 /* make subcontext0 address space to valid */
117 /* TODO fix proper hw register definations */
118 gk20a_mem_wr32(g, inst_block, 166, 0x1);
119 gk20a_mem_wr32(g, inst_block, 167, 0);
120 gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
121 ram_in_engine_wfi_veid_f(0));
122
123}
124
125int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
126{
127 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
128 struct mem_desc *gr_mem;
129 struct gk20a *g = c->g;
130 int ret = 0;
131 u32 addr_lo, addr_hi;
132
133 addr_lo = u64_lo32(gpu_va);
134 addr_hi = u64_hi32(gpu_va);
135
136 gr_mem = &ctx->mem;
137 g->ops.mm.l2_flush(g, true);
138 if (gk20a_mem_begin(g, gr_mem))
139 return -ENOMEM;
140
141 gk20a_mem_wr(g, gr_mem,
142 ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
143 gk20a_mem_wr(g, gr_mem,
144 ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
145 gk20a_mem_end(g, gr_mem);
146 return ret;
147}
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
new file mode 100644
index 00000000..357cd254
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h
@@ -0,0 +1,27 @@
1/*
2 *
3 * Volta GPU series Subcontext
4 *
5 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program.
18 */
19#ifndef __SUBCONTEXT_GV11B_H__
20#define __SUBCONTEXT_GV11B_H__
21
22int gv11b_alloc_subctx_header(struct channel_gk20a *c);
23
24void gv11b_free_subctx_header(struct channel_gk20a *c);
25
26int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va);
27#endif /* __SUBCONTEXT_GV11B_H__ */