diff options
author | seshendra Gadagottu <sgadagottu@nvidia.com> | 2017-06-29 18:59:05 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-07-05 18:40:25 -0400 |
commit | 6d758eb81bcbff4e50df5c9fa67a369a4e1f2074 (patch) | |
tree | 8b6f62c1cb64f878c13746b8f14d42a2d45105e6 /drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |
parent | 37fa5128ec260bc9ebb2e902ac2dfe9baead4656 (diff) |
gpu: nvgpu: gv11b: support for full subcontext
Changes to enable 64 subcontexts: 1 SYNC + 63 ASYNC
Currently all subcontexts with in a tsg can have only
single address space.
Add support for NVGPU_TSG_IOCTL_BIND_CHANNEL_EX for
selecting subctx id by client.
Bug 1842197
Change-Id: Icf56a41303bd1ad7fc6f2a6fbc691bb7b4a01d22
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master/r/1511145
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/subctx_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 90 |
1 files changed, 57 insertions, 33 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 79ed0d1e..cb042f87 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |||
@@ -31,12 +31,17 @@ | |||
31 | static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, | 31 | static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, |
32 | struct nvgpu_mem *inst_block); | 32 | struct nvgpu_mem *inst_block); |
33 | 33 | ||
34 | static void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, | ||
35 | struct nvgpu_mem *inst_block); | ||
36 | static void gv11b_subctx_commit_pdb(struct channel_gk20a *c, | ||
37 | struct nvgpu_mem *inst_block); | ||
38 | |||
34 | void gv11b_free_subctx_header(struct channel_gk20a *c) | 39 | void gv11b_free_subctx_header(struct channel_gk20a *c) |
35 | { | 40 | { |
36 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 41 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; |
37 | struct gk20a *g = c->g; | 42 | struct gk20a *g = c->g; |
38 | 43 | ||
39 | gk20a_dbg_fn(""); | 44 | nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); |
40 | 45 | ||
41 | if (ctx->mem.gpu_va) { | 46 | if (ctx->mem.gpu_va) { |
42 | nvgpu_gmmu_unmap(c->vm, &ctx->mem, ctx->mem.gpu_va); | 47 | nvgpu_gmmu_unmap(c->vm, &ctx->mem, ctx->mem.gpu_va); |
@@ -52,7 +57,7 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) | |||
52 | struct gr_gk20a *gr = &g->gr; | 57 | struct gr_gk20a *gr = &g->gr; |
53 | int ret = 0; | 58 | int ret = 0; |
54 | 59 | ||
55 | gk20a_dbg_fn(""); | 60 | nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); |
56 | 61 | ||
57 | if (ctx->mem.gpu_va == 0) { | 62 | if (ctx->mem.gpu_va == 0) { |
58 | ret = nvgpu_dma_alloc_flags_sys(g, | 63 | ret = nvgpu_dma_alloc_flags_sys(g, |
@@ -82,7 +87,6 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) | |||
82 | nvgpu_mem_end(g, &ctx->mem); | 87 | nvgpu_mem_end(g, &ctx->mem); |
83 | 88 | ||
84 | gv11b_init_subcontext_pdb(c, &c->inst_block); | 89 | gv11b_init_subcontext_pdb(c, &c->inst_block); |
85 | |||
86 | } | 90 | } |
87 | return ret; | 91 | return ret; |
88 | } | 92 | } |
@@ -91,37 +95,13 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, | |||
91 | struct nvgpu_mem *inst_block) | 95 | struct nvgpu_mem *inst_block) |
92 | { | 96 | { |
93 | struct gk20a *g = c->g; | 97 | struct gk20a *g = c->g; |
94 | struct vm_gk20a *vm; | ||
95 | u64 pdb_addr, pdb_addr_lo, pdb_addr_hi; | ||
96 | u32 format_word; | ||
97 | u32 lo, hi; | ||
98 | 98 | ||
99 | gk20a_dbg_fn(""); | 99 | gv11b_subctx_commit_pdb(c, inst_block); |
100 | /* load main pdb as veid0 pdb also */ | 100 | gv11b_subctx_commit_valid_mask(c, inst_block); |
101 | vm = c->vm; | 101 | |
102 | pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.priv.sgt->sgl, 0); | 102 | nvgpu_log(g, gpu_dbg_info, " subctx %d instblk set", c->t19x.subctx_id); |
103 | pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | ||
104 | pdb_addr_hi = u64_hi32(pdb_addr); | ||
105 | format_word = ram_in_sc_page_dir_base_target_f( | ||
106 | ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | | ||
107 | ram_in_sc_page_dir_base_vol_f( | ||
108 | ram_in_sc_page_dir_base_vol_true_v(), 0) | | ||
109 | ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | | ||
110 | ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | | ||
111 | ram_in_sc_use_ver2_pt_format_f(1, 0) | | ||
112 | ram_in_sc_big_page_size_f(1, 0) | | ||
113 | ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); | ||
114 | lo = ram_in_sc_page_dir_base_vol_0_w(); | ||
115 | hi = ram_in_sc_page_dir_base_hi_0_w(); | ||
116 | nvgpu_mem_wr32(g, inst_block, lo, format_word); | ||
117 | nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); | ||
118 | |||
119 | /* make subcontext0 address space to valid */ | ||
120 | /* TODO fix proper hw register definations */ | ||
121 | nvgpu_mem_wr32(g, inst_block, 166, 0x1); | ||
122 | nvgpu_mem_wr32(g, inst_block, 167, 0); | ||
123 | nvgpu_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), | 103 | nvgpu_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), |
124 | ram_in_engine_wfi_veid_f(0)); | 104 | ram_in_engine_wfi_veid_f(c->t19x.subctx_id)); |
125 | 105 | ||
126 | } | 106 | } |
127 | 107 | ||
@@ -149,7 +129,51 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) | |||
149 | return ret; | 129 | return ret; |
150 | } | 130 | } |
151 | 131 | ||
152 | int gv11b_get_max_subctx_count(struct gk20a *g) | 132 | void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, |
133 | struct nvgpu_mem *inst_block) | ||
134 | { | ||
135 | struct gk20a *g = c->g; | ||
136 | |||
137 | /* Make all subctx pdbs valid */ | ||
138 | nvgpu_mem_wr32(g, inst_block, 166, 0xffffffff); | ||
139 | nvgpu_mem_wr32(g, inst_block, 167, 0xffffffff); | ||
140 | } | ||
141 | |||
142 | void gv11b_subctx_commit_pdb(struct channel_gk20a *c, | ||
143 | struct nvgpu_mem *inst_block) | ||
144 | { | ||
145 | struct gk20a *g = c->g; | ||
146 | u32 lo, hi; | ||
147 | u32 subctx_id = 0; | ||
148 | u32 format_word; | ||
149 | u32 pdb_addr_lo, pdb_addr_hi; | ||
150 | u64 pdb_addr; | ||
151 | |||
152 | pdb_addr = g->ops.mm.get_iova_addr(g, c->vm->pdb.mem.priv.sgt->sgl, 0); | ||
153 | pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | ||
154 | pdb_addr_hi = u64_hi32(pdb_addr); | ||
155 | format_word = ram_in_sc_page_dir_base_target_f( | ||
156 | ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | | ||
157 | ram_in_sc_page_dir_base_vol_f( | ||
158 | ram_in_sc_page_dir_base_vol_true_v(), 0) | | ||
159 | ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | | ||
160 | ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | | ||
161 | ram_in_sc_use_ver2_pt_format_f(1, 0) | | ||
162 | ram_in_sc_big_page_size_f(1, 0) | | ||
163 | ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); | ||
164 | nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", | ||
165 | format_word, pdb_addr_hi); | ||
166 | for (subctx_id = 0; subctx_id < gv11b_get_max_subctx_count(g); | ||
167 | subctx_id++) { | ||
168 | lo = ram_in_sc_page_dir_base_vol_0_w() + (4 * subctx_id); | ||
169 | hi = ram_in_sc_page_dir_base_hi_0_w() + (4 * subctx_id); | ||
170 | nvgpu_mem_wr32(g, inst_block, lo, format_word); | ||
171 | nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); | ||
172 | } | ||
173 | } | ||
174 | |||
175 | |||
176 | u32 gv11b_get_max_subctx_count(struct gk20a *g) | ||
153 | { | 177 | { |
154 | u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); | 178 | u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); |
155 | 179 | ||