From f7e37e6847896992077fe8d434ea14a751318175 Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Wed, 21 Jun 2017 16:44:55 -0700 Subject: gpu: nvgpu: vgpu: perfbuffer support Add vgpu support for ModeE perfbuffers - VM allocation is handled by the kernel, with final mapping handled by the RM server - Enabling/disabling the perfbuffer is handled by the RM server Bug 1880196 JIRA EVLR-1074 Change-Id: Ifbeb5ede6b07e2e112b930c602c22b66a58ac920 Signed-off-by: Peter Daifuku Reviewed-on: https://git-master/r/1506747 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 88 +++++++++++++++++++-------------- drivers/gpu/nvgpu/gk20a/gk20a.h | 2 + drivers/gpu/nvgpu/vgpu/dbg_vgpu.c | 33 +++++++++++++ 3 files changed, 86 insertions(+), 37 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 84edacbb..9b0d9456 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -1834,6 +1834,51 @@ static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); } +static int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) +{ + struct mm_gk20a *mm = &g->mm; + u32 virt_addr_lo; + u32 virt_addr_hi; + u32 inst_pa_page; + int err; + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); + if (err) + return err; + + g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); + + virt_addr_lo = u64_lo32(offset); + virt_addr_hi = u64_hi32(offset); + + /* address and size are aligned to 32 bytes, the lowest bits read back + * as zeros */ + gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); + gk20a_writel(g, perf_pmasys_outbaseupper_r(), + perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); + gk20a_writel(g, perf_pmasys_outsize_r(), size); + + /* this field is aligned to 4K */ + inst_pa_page = gk20a_mm_inst_block_addr(g, + &mm->perfbuf.inst_block) >> 12; + + /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK + * should be written last */ + gk20a_writel(g, perf_pmasys_mem_block_r(), + perf_pmasys_mem_block_base_f(inst_pa_page) | + perf_pmasys_mem_block_valid_true_f() | + perf_pmasys_mem_block_target_lfb_f()); + + gk20a_idle(g); + return 0; +} + static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_perfbuf_map_args *args) { @@ -1841,9 +1886,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, struct mm_gk20a *mm = &g->mm; int err; u32 virt_size; - u32 virt_addr_lo; - u32 virt_addr_hi; - u32 inst_pa_page; u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; nvgpu_mutex_acquire(&g->dbg_sessions_lock); @@ -1863,12 +1905,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, return -ENOMEM; } - err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); - if (err) - goto err_remove_vm; - - g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); - err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->dmabuf_fd, &args->offset, @@ -1882,38 +1918,15 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, /* perf output buffer may not cross a 4GB boundary */ virt_size = u64_lo32(args->mapping_size); - virt_addr_lo = u64_lo32(args->offset); - virt_addr_hi = u64_hi32(args->offset); if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { err = -EINVAL; goto err_unmap; } - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to poweron"); + err = g->ops.dbg_session_ops.perfbuffer_enable(g, + args->offset, virt_size); + if (err) goto err_unmap; - } - - /* address and size are aligned to 32 bytes, the lowest bits read back - * as zeros */ - gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); - gk20a_writel(g, perf_pmasys_outbaseupper_r(), - perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); - gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); - - /* this field is aligned to 4K */ - inst_pa_page = gk20a_mm_inst_block_addr(g, - &mm->perfbuf.inst_block) >> 12; - - /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK - * should be written last */ - gk20a_writel(g, perf_pmasys_mem_block_r(), - perf_pmasys_mem_block_base_f(inst_pa_page) | - perf_pmasys_mem_block_valid_true_f() | - perf_pmasys_mem_block_target_lfb_f()); - - gk20a_idle(g); g->perfbuf.owner = dbg_s; g->perfbuf.offset = args->offset; @@ -1924,7 +1937,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, err_unmap: nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL); err_remove_vm: - gk20a_free_inst_block(g, &mm->perfbuf.inst_block); nvgpu_vm_put(mm->perfbuf.vm); nvgpu_mutex_release(&g->dbg_sessions_lock); return err; @@ -1960,7 +1972,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) struct vm_gk20a *vm = mm->perfbuf.vm; int err; - err = gk20a_perfbuf_disable_locked(g); + err = g->ops.dbg_session_ops.perfbuffer_disable(g); nvgpu_vm_unmap_buffer(vm, offset, NULL); gk20a_free_inst_block(g, &mm->perfbuf.inst_block); @@ -2001,4 +2013,6 @@ void gk20a_init_dbg_session_ops(struct gpu_ops *gops) nvgpu_check_and_set_context_reservation; gops->dbg_session_ops.release_profiler_reservation = nvgpu_release_profiler_reservation; + gops->dbg_session_ops.perfbuffer_enable = gk20a_perfbuf_enable_locked; + gops->dbg_session_ops.perfbuffer_disable = gk20a_perfbuf_disable_locked; }; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index acfb6144..8f291f92 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -869,6 +869,8 @@ struct gpu_ops { void (*release_profiler_reservation)( struct dbg_session_gk20a *dbg_s, struct dbg_profiler_object_data *prof_obj); + int (*perfbuffer_enable)(struct gk20a *g, u64 offset, u32 size); + int (*perfbuffer_disable)(struct gk20a *g); } dbg_session_ops; struct { void (*get_program_numbers)(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/vgpu/dbg_vgpu.c b/drivers/gpu/nvgpu/vgpu/dbg_vgpu.c index b9cbcead..72faf76c 100644 --- a/drivers/gpu/nvgpu/vgpu/dbg_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/dbg_vgpu.c @@ -178,6 +178,37 @@ static void vgpu_release_profiler_reservation( vgpu_sendrecv_prof_cmd(dbg_s, TEGRA_VGPU_PROF_RELEASE); } +static int vgpu_sendrecv_perfbuf_cmd(struct gk20a *g, u64 offset, u32 size) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->perfbuf.vm; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_perfbuf_mgt_params *p = + &msg.params.perfbuf_management; + int err; + + msg.cmd = TEGRA_VGPU_CMD_PERFBUF_MGT; + msg.handle = vgpu_get_handle(g); + + p->vm_handle = vm->handle; + p->offset = offset; + p->size = size; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +static int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size) +{ + return vgpu_sendrecv_perfbuf_cmd(g, offset, size); +} + +static int vgpu_perfbuffer_disable(struct gk20a *g) +{ + return vgpu_sendrecv_perfbuf_cmd(g, 0, 0); +} + void vgpu_init_dbg_session_ops(struct gpu_ops *gops) { gops->dbg_session_ops.exec_reg_ops = vgpu_exec_regops; @@ -188,4 +219,6 @@ void vgpu_init_dbg_session_ops(struct gpu_ops *gops) vgpu_check_and_set_context_reservation; gops->dbg_session_ops.release_profiler_reservation = vgpu_release_profiler_reservation; + gops->dbg_session_ops.perfbuffer_enable = vgpu_perfbuffer_enable; + gops->dbg_session_ops.perfbuffer_disable = vgpu_perfbuffer_disable; } -- cgit v1.2.2