From 6d758eb81bcbff4e50df5c9fa67a369a4e1f2074 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 29 Jun 2017 15:59:05 -0700 Subject: gpu: nvgpu: gv11b: support for full subcontext Changes to enable 64 subcontexts: 1 SYNC + 63 ASYNC Currently all subcontexts with in a tsg can have only single address space. Add support for NVGPU_TSG_IOCTL_BIND_CHANNEL_EX for selecting subctx id by client. Bug 1842197 Change-Id: Icf56a41303bd1ad7fc6f2a6fbc691bb7b4a01d22 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master/r/1511145 Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 90 +++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/subctx_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 79ed0d1e..cb042f87 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c @@ -31,12 +31,17 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, struct nvgpu_mem *inst_block); +static void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, + struct nvgpu_mem *inst_block); +static void gv11b_subctx_commit_pdb(struct channel_gk20a *c, + struct nvgpu_mem *inst_block); + void gv11b_free_subctx_header(struct channel_gk20a *c) { struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; struct gk20a *g = c->g; - gk20a_dbg_fn(""); + nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); if (ctx->mem.gpu_va) { nvgpu_gmmu_unmap(c->vm, &ctx->mem, ctx->mem.gpu_va); @@ -52,7 +57,7 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) struct gr_gk20a *gr = &g->gr; int ret = 0; - gk20a_dbg_fn(""); + nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); if (ctx->mem.gpu_va == 0) { ret = nvgpu_dma_alloc_flags_sys(g, @@ -82,7 +87,6 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) nvgpu_mem_end(g, &ctx->mem); gv11b_init_subcontext_pdb(c, &c->inst_block); - } return ret; } @@ -91,37 +95,13 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, struct nvgpu_mem *inst_block) { struct gk20a *g = c->g; - struct vm_gk20a *vm; - u64 pdb_addr, pdb_addr_lo, pdb_addr_hi; - u32 format_word; - u32 lo, hi; - gk20a_dbg_fn(""); - /* load main pdb as veid0 pdb also */ - vm = c->vm; - pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.priv.sgt->sgl, 0); - pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); - pdb_addr_hi = u64_hi32(pdb_addr); - format_word = ram_in_sc_page_dir_base_target_f( - ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | - ram_in_sc_page_dir_base_vol_f( - ram_in_sc_page_dir_base_vol_true_v(), 0) | - ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | - ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | - ram_in_sc_use_ver2_pt_format_f(1, 0) | - ram_in_sc_big_page_size_f(1, 0) | - ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); - lo = ram_in_sc_page_dir_base_vol_0_w(); - hi = ram_in_sc_page_dir_base_hi_0_w(); - nvgpu_mem_wr32(g, inst_block, lo, format_word); - nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); - - /* make subcontext0 address space to valid */ - /* TODO fix proper hw register definations */ - nvgpu_mem_wr32(g, inst_block, 166, 0x1); - nvgpu_mem_wr32(g, inst_block, 167, 0); + gv11b_subctx_commit_pdb(c, inst_block); + gv11b_subctx_commit_valid_mask(c, inst_block); + + nvgpu_log(g, gpu_dbg_info, " subctx %d instblk set", c->t19x.subctx_id); nvgpu_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), - ram_in_engine_wfi_veid_f(0)); + ram_in_engine_wfi_veid_f(c->t19x.subctx_id)); } @@ -149,7 +129,51 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) return ret; } -int gv11b_get_max_subctx_count(struct gk20a *g) +void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, + struct nvgpu_mem *inst_block) +{ + struct gk20a *g = c->g; + + /* Make all subctx pdbs valid */ + nvgpu_mem_wr32(g, inst_block, 166, 0xffffffff); + nvgpu_mem_wr32(g, inst_block, 167, 0xffffffff); +} + +void gv11b_subctx_commit_pdb(struct channel_gk20a *c, + struct nvgpu_mem *inst_block) +{ + struct gk20a *g = c->g; + u32 lo, hi; + u32 subctx_id = 0; + u32 format_word; + u32 pdb_addr_lo, pdb_addr_hi; + u64 pdb_addr; + + pdb_addr = g->ops.mm.get_iova_addr(g, c->vm->pdb.mem.priv.sgt->sgl, 0); + pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); + pdb_addr_hi = u64_hi32(pdb_addr); + format_word = ram_in_sc_page_dir_base_target_f( + ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | + ram_in_sc_page_dir_base_vol_f( + ram_in_sc_page_dir_base_vol_true_v(), 0) | + ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | + ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | + ram_in_sc_use_ver2_pt_format_f(1, 0) | + ram_in_sc_big_page_size_f(1, 0) | + ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); + nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", + format_word, pdb_addr_hi); + for (subctx_id = 0; subctx_id < gv11b_get_max_subctx_count(g); + subctx_id++) { + lo = ram_in_sc_page_dir_base_vol_0_w() + (4 * subctx_id); + hi = ram_in_sc_page_dir_base_hi_0_w() + (4 * subctx_id); + nvgpu_mem_wr32(g, inst_block, lo, format_word); + nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); + } +} + + +u32 gv11b_get_max_subctx_count(struct gk20a *g) { u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); -- cgit v1.2.2