From 0d8f5f3fdb87b818b6239efaf91822c01cb3d859 Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Thu, 6 Apr 2017 16:39:30 -0700 Subject: gpu: nvgpu: ModeE perfbuffer feature development perfbuf mapping fixes: - Allocate VM specifically for perfbuf use: using the PMU's results in mmu faults for larger buffers where 64k pages are used. - Make 4GB boundary check work for large address spaces - remove requirement to have allow_all flag set - track perfbuf ownership and clean up appropriately Bug 1880196 JIRA EVLR-1074 Change-Id: Ieee4eb17b64acf9b6ede37bf8e6a91892cda4a7e Signed-off-by: Peter Daifuku Reviewed-on: http://git-master/r/1460809 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 111 +++++++++++++++++++++++++------- 1 file changed, 87 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c') diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index bc3f67c4..2988fdec 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -514,6 +514,8 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s, return err; } +static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset); + int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) { struct dbg_session_gk20a *dbg_s = filp->private_data; @@ -534,6 +536,10 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE); nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); + /* If this session owned the perf buffer, release it */ + if (g->perfbuf.owner == dbg_s) + gk20a_perfbuf_release_locked(g, g->perfbuf.offset); + /* Per-context profiler objects were released when we called * dbg_unbind_all_channels. We could still have global ones. */ @@ -1821,16 +1827,39 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_perfbuf_map_args *args) { struct gk20a *g = dbg_s->g; + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = &mm->perfbuf.vm; int err; u32 virt_size; u32 virt_addr_lo; u32 virt_addr_hi; u32 inst_pa_page; + u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + if (g->perfbuf.owner) { + nvgpu_mutex_release(&g->dbg_sessions_lock); + return -EBUSY; + } + + err = gk20a_init_vm(mm, vm, big_page_size, + big_page_size << 10, + NV_MM_DEFAULT_KERNEL_SIZE, + NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, + false, false, "perfbuf"); + if (err) { + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; + } + + err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); + if (err) + goto err_remove_vm; - if (!g->allow_all) - return -EACCES; + g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0); - err = gk20a_vm_map_buffer(&g->mm.pmu.vm, + err = gk20a_vm_map_buffer(vm, args->dmabuf_fd, &args->offset, 0, @@ -1839,23 +1868,21 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, args->mapping_size, NULL); if (err) - return err; + goto err_remove_vm; - /* perf output buffer may not cross a 4GB boundary - with a separate va - * smaller than that, it won't */ + /* perf output buffer may not cross a 4GB boundary */ virt_size = u64_lo32(args->mapping_size); virt_addr_lo = u64_lo32(args->offset); virt_addr_hi = u64_hi32(args->offset); - /* but check anyway */ - if (args->offset + virt_size > SZ_4G) { + if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { err = -EINVAL; - goto fail_unmap; + goto err_unmap; } err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to poweron"); - goto fail_unmap; + goto err_unmap; } /* address and size are aligned to 32 bytes, the lowest bits read back @@ -1866,7 +1893,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); /* this field is aligned to 4K */ - inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; + inst_pa_page = gk20a_mm_inst_block_addr(g, + &mm->perfbuf.inst_block) >> 12; /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK * should be written last */ @@ -1877,23 +1905,24 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, gk20a_idle(g); + g->perfbuf.owner = dbg_s; + g->perfbuf.offset = args->offset; + nvgpu_mutex_release(&g->dbg_sessions_lock); + return 0; -fail_unmap: - gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); +err_unmap: + gk20a_vm_unmap_buffer(vm, args->offset, NULL); +err_remove_vm: + gk20a_remove_vm(vm, &mm->perfbuf.inst_block); + nvgpu_mutex_release(&g->dbg_sessions_lock); return err; } -static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) +/* must be called with dbg_sessions_lock held */ +static int gk20a_perfbuf_disable_locked(struct gk20a *g) { - struct gk20a *g = dbg_s->g; - int err; - - if (!g->allow_all) - return -EACCES; - - err = gk20a_busy(g); + int err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to poweron"); return err; @@ -1911,11 +1940,45 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, gk20a_idle(g); - gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); - return 0; } +static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = &mm->perfbuf.vm; + int err; + + err = gk20a_perfbuf_disable_locked(g); + + gk20a_vm_unmap_buffer(vm, offset, NULL); + gk20a_remove_vm(vm, &mm->perfbuf.inst_block); + + g->perfbuf.owner = NULL; + g->perfbuf.offset = 0; + return err; +} + +static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) +{ + struct gk20a *g = dbg_s->g; + int err; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + if ((g->perfbuf.owner != dbg_s) || + (g->perfbuf.offset != args->offset)) { + nvgpu_mutex_release(&g->dbg_sessions_lock); + return -EINVAL; + } + + err = gk20a_perfbuf_release_locked(g, args->offset); + + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return err; +} + void gk20a_init_dbg_session_ops(struct gpu_ops *gops) { gops->dbg_session_ops.exec_reg_ops = exec_regops_gk20a; -- cgit v1.2.2