summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2017-06-29 18:59:05 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-05 18:40:25 -0400
commit6d758eb81bcbff4e50df5c9fa67a369a4e1f2074 (patch)
tree8b6f62c1cb64f878c13746b8f14d42a2d45105e6 /drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
parent37fa5128ec260bc9ebb2e902ac2dfe9baead4656 (diff)
gpu: nvgpu: gv11b: support for full subcontext
Changes to enable 64 subcontexts: 1 SYNC + 63 ASYNC Currently all subcontexts with in a tsg can have only single address space. Add support for NVGPU_TSG_IOCTL_BIND_CHANNEL_EX for selecting subctx id by client. Bug 1842197 Change-Id: Icf56a41303bd1ad7fc6f2a6fbc691bb7b4a01d22 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master/r/1511145 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/subctx_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c90
1 files changed, 57 insertions, 33 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index 79ed0d1e..cb042f87 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -31,12 +31,17 @@
31static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, 31static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
32 struct nvgpu_mem *inst_block); 32 struct nvgpu_mem *inst_block);
33 33
34static void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c,
35 struct nvgpu_mem *inst_block);
36static void gv11b_subctx_commit_pdb(struct channel_gk20a *c,
37 struct nvgpu_mem *inst_block);
38
34void gv11b_free_subctx_header(struct channel_gk20a *c) 39void gv11b_free_subctx_header(struct channel_gk20a *c)
35{ 40{
36 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 41 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
37 struct gk20a *g = c->g; 42 struct gk20a *g = c->g;
38 43
39 gk20a_dbg_fn(""); 44 nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header");
40 45
41 if (ctx->mem.gpu_va) { 46 if (ctx->mem.gpu_va) {
42 nvgpu_gmmu_unmap(c->vm, &ctx->mem, ctx->mem.gpu_va); 47 nvgpu_gmmu_unmap(c->vm, &ctx->mem, ctx->mem.gpu_va);
@@ -52,7 +57,7 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c)
52 struct gr_gk20a *gr = &g->gr; 57 struct gr_gk20a *gr = &g->gr;
53 int ret = 0; 58 int ret = 0;
54 59
55 gk20a_dbg_fn(""); 60 nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header");
56 61
57 if (ctx->mem.gpu_va == 0) { 62 if (ctx->mem.gpu_va == 0) {
58 ret = nvgpu_dma_alloc_flags_sys(g, 63 ret = nvgpu_dma_alloc_flags_sys(g,
@@ -82,7 +87,6 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c)
82 nvgpu_mem_end(g, &ctx->mem); 87 nvgpu_mem_end(g, &ctx->mem);
83 88
84 gv11b_init_subcontext_pdb(c, &c->inst_block); 89 gv11b_init_subcontext_pdb(c, &c->inst_block);
85
86 } 90 }
87 return ret; 91 return ret;
88} 92}
@@ -91,37 +95,13 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
91 struct nvgpu_mem *inst_block) 95 struct nvgpu_mem *inst_block)
92{ 96{
93 struct gk20a *g = c->g; 97 struct gk20a *g = c->g;
94 struct vm_gk20a *vm;
95 u64 pdb_addr, pdb_addr_lo, pdb_addr_hi;
96 u32 format_word;
97 u32 lo, hi;
98 98
99 gk20a_dbg_fn(""); 99 gv11b_subctx_commit_pdb(c, inst_block);
100 /* load main pdb as veid0 pdb also */ 100 gv11b_subctx_commit_valid_mask(c, inst_block);
101 vm = c->vm; 101
102 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.priv.sgt->sgl, 0); 102 nvgpu_log(g, gpu_dbg_info, " subctx %d instblk set", c->t19x.subctx_id);
103 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
104 pdb_addr_hi = u64_hi32(pdb_addr);
105 format_word = ram_in_sc_page_dir_base_target_f(
106 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) |
107 ram_in_sc_page_dir_base_vol_f(
108 ram_in_sc_page_dir_base_vol_true_v(), 0) |
109 ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) |
110 ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) |
111 ram_in_sc_use_ver2_pt_format_f(1, 0) |
112 ram_in_sc_big_page_size_f(1, 0) |
113 ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
114 lo = ram_in_sc_page_dir_base_vol_0_w();
115 hi = ram_in_sc_page_dir_base_hi_0_w();
116 nvgpu_mem_wr32(g, inst_block, lo, format_word);
117 nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi);
118
119 /* make subcontext0 address space to valid */
120 /* TODO fix proper hw register definations */
121 nvgpu_mem_wr32(g, inst_block, 166, 0x1);
122 nvgpu_mem_wr32(g, inst_block, 167, 0);
123 nvgpu_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), 103 nvgpu_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(),
124 ram_in_engine_wfi_veid_f(0)); 104 ram_in_engine_wfi_veid_f(c->t19x.subctx_id));
125 105
126} 106}
127 107
@@ -149,7 +129,51 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
149 return ret; 129 return ret;
150} 130}
151 131
152int gv11b_get_max_subctx_count(struct gk20a *g) 132void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c,
133 struct nvgpu_mem *inst_block)
134{
135 struct gk20a *g = c->g;
136
137 /* Make all subctx pdbs valid */
138 nvgpu_mem_wr32(g, inst_block, 166, 0xffffffff);
139 nvgpu_mem_wr32(g, inst_block, 167, 0xffffffff);
140}
141
142void gv11b_subctx_commit_pdb(struct channel_gk20a *c,
143 struct nvgpu_mem *inst_block)
144{
145 struct gk20a *g = c->g;
146 u32 lo, hi;
147 u32 subctx_id = 0;
148 u32 format_word;
149 u32 pdb_addr_lo, pdb_addr_hi;
150 u64 pdb_addr;
151
152 pdb_addr = g->ops.mm.get_iova_addr(g, c->vm->pdb.mem.priv.sgt->sgl, 0);
153 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
154 pdb_addr_hi = u64_hi32(pdb_addr);
155 format_word = ram_in_sc_page_dir_base_target_f(
156 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) |
157 ram_in_sc_page_dir_base_vol_f(
158 ram_in_sc_page_dir_base_vol_true_v(), 0) |
159 ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) |
160 ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) |
161 ram_in_sc_use_ver2_pt_format_f(1, 0) |
162 ram_in_sc_big_page_size_f(1, 0) |
163 ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
164 nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x",
165 format_word, pdb_addr_hi);
166 for (subctx_id = 0; subctx_id < gv11b_get_max_subctx_count(g);
167 subctx_id++) {
168 lo = ram_in_sc_page_dir_base_vol_0_w() + (4 * subctx_id);
169 hi = ram_in_sc_page_dir_base_hi_0_w() + (4 * subctx_id);
170 nvgpu_mem_wr32(g, inst_block, lo, format_word);
171 nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi);
172 }
173}
174
175
176u32 gv11b_get_max_subctx_count(struct gk20a *g)
153{ 177{
154 u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); 178 u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r());
155 179