From 6d758eb81bcbff4e50df5c9fa67a369a4e1f2074 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 29 Jun 2017 15:59:05 -0700 Subject: gpu: nvgpu: gv11b: support for full subcontext Changes to enable 64 subcontexts: 1 SYNC + 63 ASYNC Currently all subcontexts with in a tsg can have only single address space. Add support for NVGPU_TSG_IOCTL_BIND_CHANNEL_EX for selecting subctx id by client. Bug 1842197 Change-Id: Icf56a41303bd1ad7fc6f2a6fbc691bb7b4a01d22 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master/r/1511145 Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 16 +++--- drivers/gpu/nvgpu/gv11b/fifo_gv11b.h | 4 +- drivers/gpu/nvgpu/gv11b/gv11b.c | 4 +- drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 90 +++++++++++++++++++++------------- drivers/gpu/nvgpu/gv11b/subctx_gv11b.h | 2 +- 5 files changed, 74 insertions(+), 42 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b') diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index a1f6d258..ace873e9 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -45,7 +45,6 @@ #include "subctx_gv11b.h" #include "gr_gv11b.h" -#define CHANNEL_INFO_VEID0 0 #define PBDMA_SUBDEVICE_ID 1 static void gv11b_fifo_init_ramfc_eng_method_buffer(struct gk20a *g, @@ -94,7 +93,8 @@ static void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist) /* Time being use 0 pbdma sequencer */ runlist_entry = ram_rl_entry_type_channel_v() | - ram_rl_entry_chan_runqueue_selector_f(0) | + ram_rl_entry_chan_runqueue_selector_f( + c->t19x.runqueue_sel) | ram_rl_entry_chan_userd_target_f( ram_rl_entry_chan_userd_target_sys_mem_ncoh_v()) | ram_rl_entry_chan_inst_target_f( @@ -178,10 +178,14 @@ static int channel_gv11b_setup_ramfc(struct channel_gk20a *c, nvgpu_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->chid)); - /* Until full subcontext is supported, always use VEID0 */ - nvgpu_mem_wr32(g, mem, ram_fc_set_channel_info_w(), - pbdma_set_channel_info_scg_type_graphics_compute0_f() | - pbdma_set_channel_info_veid_f(CHANNEL_INFO_VEID0)); + if (c->t19x.subctx_id == CHANNEL_INFO_VEID0) + nvgpu_mem_wr32(g, mem, ram_fc_set_channel_info_w(), + pbdma_set_channel_info_scg_type_graphics_compute0_f() | + pbdma_set_channel_info_veid_f(c->t19x.subctx_id)); + else + nvgpu_mem_wr32(g, mem, ram_fc_set_channel_info_w(), + pbdma_set_channel_info_scg_type_compute1_f() | + pbdma_set_channel_info_veid_f(c->t19x.subctx_id)); gv11b_fifo_init_ramfc_eng_method_buffer(g, c, mem); diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h index 03cca839..032342b2 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h @@ -1,7 +1,7 @@ /* * GV11B Fifo * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -36,6 +36,8 @@ #define GR_RUNQUE 0 /* pbdma 0 */ #define ASYNC_CE_RUNQUE 2 /* pbdma 2 */ +#define CHANNEL_INFO_VEID0 0 + struct gpu_ops; void gv11b_init_fifo(struct gpu_ops *gops); void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gv11b/gv11b.c b/drivers/gpu/nvgpu/gv11b/gv11b.c index 09628940..69fd0cf4 100644 --- a/drivers/gpu/nvgpu/gv11b/gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gv11b.c @@ -1,7 +1,7 @@ /* * GV11B Graphics * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -23,5 +23,7 @@ int gv11b_init_gpu_characteristics(struct gk20a *g) { gk20a_init_gpu_characteristics(g); + g->gpu_characteristics.flags |= + NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS; return 0; } diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 79ed0d1e..cb042f87 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c @@ -31,12 +31,17 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, struct nvgpu_mem *inst_block); +static void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, + struct nvgpu_mem *inst_block); +static void gv11b_subctx_commit_pdb(struct channel_gk20a *c, + struct nvgpu_mem *inst_block); + void gv11b_free_subctx_header(struct channel_gk20a *c) { struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; struct gk20a *g = c->g; - gk20a_dbg_fn(""); + nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); if (ctx->mem.gpu_va) { nvgpu_gmmu_unmap(c->vm, &ctx->mem, ctx->mem.gpu_va); @@ -52,7 +57,7 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) struct gr_gk20a *gr = &g->gr; int ret = 0; - gk20a_dbg_fn(""); + nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); if (ctx->mem.gpu_va == 0) { ret = nvgpu_dma_alloc_flags_sys(g, @@ -82,7 +87,6 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) nvgpu_mem_end(g, &ctx->mem); gv11b_init_subcontext_pdb(c, &c->inst_block); - } return ret; } @@ -91,37 +95,13 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, struct nvgpu_mem *inst_block) { struct gk20a *g = c->g; - struct vm_gk20a *vm; - u64 pdb_addr, pdb_addr_lo, pdb_addr_hi; - u32 format_word; - u32 lo, hi; - gk20a_dbg_fn(""); - /* load main pdb as veid0 pdb also */ - vm = c->vm; - pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.priv.sgt->sgl, 0); - pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); - pdb_addr_hi = u64_hi32(pdb_addr); - format_word = ram_in_sc_page_dir_base_target_f( - ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | - ram_in_sc_page_dir_base_vol_f( - ram_in_sc_page_dir_base_vol_true_v(), 0) | - ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | - ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | - ram_in_sc_use_ver2_pt_format_f(1, 0) | - ram_in_sc_big_page_size_f(1, 0) | - ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); - lo = ram_in_sc_page_dir_base_vol_0_w(); - hi = ram_in_sc_page_dir_base_hi_0_w(); - nvgpu_mem_wr32(g, inst_block, lo, format_word); - nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); - - /* make subcontext0 address space to valid */ - /* TODO fix proper hw register definations */ - nvgpu_mem_wr32(g, inst_block, 166, 0x1); - nvgpu_mem_wr32(g, inst_block, 167, 0); + gv11b_subctx_commit_pdb(c, inst_block); + gv11b_subctx_commit_valid_mask(c, inst_block); + + nvgpu_log(g, gpu_dbg_info, " subctx %d instblk set", c->t19x.subctx_id); nvgpu_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), - ram_in_engine_wfi_veid_f(0)); + ram_in_engine_wfi_veid_f(c->t19x.subctx_id)); } @@ -149,7 +129,51 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) return ret; } -int gv11b_get_max_subctx_count(struct gk20a *g) +void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, + struct nvgpu_mem *inst_block) +{ + struct gk20a *g = c->g; + + /* Make all subctx pdbs valid */ + nvgpu_mem_wr32(g, inst_block, 166, 0xffffffff); + nvgpu_mem_wr32(g, inst_block, 167, 0xffffffff); +} + +void gv11b_subctx_commit_pdb(struct channel_gk20a *c, + struct nvgpu_mem *inst_block) +{ + struct gk20a *g = c->g; + u32 lo, hi; + u32 subctx_id = 0; + u32 format_word; + u32 pdb_addr_lo, pdb_addr_hi; + u64 pdb_addr; + + pdb_addr = g->ops.mm.get_iova_addr(g, c->vm->pdb.mem.priv.sgt->sgl, 0); + pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); + pdb_addr_hi = u64_hi32(pdb_addr); + format_word = ram_in_sc_page_dir_base_target_f( + ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | + ram_in_sc_page_dir_base_vol_f( + ram_in_sc_page_dir_base_vol_true_v(), 0) | + ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | + ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | + ram_in_sc_use_ver2_pt_format_f(1, 0) | + ram_in_sc_big_page_size_f(1, 0) | + ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); + nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", + format_word, pdb_addr_hi); + for (subctx_id = 0; subctx_id < gv11b_get_max_subctx_count(g); + subctx_id++) { + lo = ram_in_sc_page_dir_base_vol_0_w() + (4 * subctx_id); + hi = ram_in_sc_page_dir_base_hi_0_w() + (4 * subctx_id); + nvgpu_mem_wr32(g, inst_block, lo, format_word); + nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); + } +} + + +u32 gv11b_get_max_subctx_count(struct gk20a *g) { u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h index fdfe9e3b..5e4e99f5 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h @@ -25,5 +25,5 @@ void gv11b_free_subctx_header(struct channel_gk20a *c); int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va); -int gv11b_get_max_subctx_count(struct gk20a *g); +u32 gv11b_get_max_subctx_count(struct gk20a *g); #endif /* __SUBCONTEXT_GV11B_H__ */ -- cgit v1.2.2