gpu: nvgpu: ModeE perfbuffer feature development

perfbuf mapping fixes: - Allocate VM specifically for perfbuf use: using the PMU's results in mmu faults for larger buffers where 64k pages are used. - Make 4GB boundary check work for large address spaces - remove requirement to have allow_all flag set - track perfbuf ownership and clean up appropriately Bug 1880196 JIRA EVLR-1074 Change-Id: Ieee4eb17b64acf9b6ede37bf8e6a91892cda4a7e Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1460809 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Peter Daifuku <pdaifuku@nvidia.com> 2017-04-06 19:39:30 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-04-19 18:44:12 -0400
commit: 0d8f5f3fdb87b818b6239efaf91822c01cb3d859 (patch)
tree: 29a00ecfe46c35b853da0a9c5846f33f53461f3e /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
parent: 6df49a63ca5f9d77a6d7d6c7dbaaa0fba1b707ca (diff)
1 files changed, 87 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index bc3f67c4..2988fdec 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -514,6 +514,8 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
        return err;
 }
+static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset);
 int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 {
        struct dbg_session_gk20a *dbg_s = filp->private_data;
@@ -534,6 +536,10 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
                                NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE);
        nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
+        /* If this session owned the perf buffer, release it */
+        if (g->perfbuf.owner == dbg_s)
+                gk20a_perfbuf_release_locked(g, g->perfbuf.offset);
        /* Per-context profiler objects were released when we called
         * dbg_unbind_all_channels. We could still have global ones.
         */
@@ -1821,16 +1827,39 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
                struct nvgpu_dbg_gpu_perfbuf_map_args *args)
 {
        struct gk20a *g = dbg_s->g;
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = &mm->perfbuf.vm;
        int err;
        u32 virt_size;
        u32 virt_addr_lo;
        u32 virt_addr_hi;
        u32 inst_pa_page;
+        u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        if (g->perfbuf.owner) {
+                nvgpu_mutex_release(&g->dbg_sessions_lock);
+                return -EBUSY;
+        }
+        err = gk20a_init_vm(mm, vm, big_page_size,
+                        big_page_size << 10,
+                        NV_MM_DEFAULT_KERNEL_SIZE,
+                        NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+                        false, false, "perfbuf");
+        if (err) {
+                nvgpu_mutex_release(&g->dbg_sessions_lock);
+                return err;
+        }
+        err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
+        if (err)
+                goto err_remove_vm;
-        if (!g->allow_all)
+        g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0);
-                return -EACCES;
-        err = gk20a_vm_map_buffer(&g->mm.pmu.vm,
+        err = gk20a_vm_map_buffer(vm,
                        args->dmabuf_fd,
                        &args->offset,
                        0,
@@ -1839,23 +1868,21 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
                        args->mapping_size,
                        NULL);
        if (err)
-                return err;
+                goto err_remove_vm;
-        /* perf output buffer may not cross a 4GB boundary - with a separate va
+        /* perf output buffer may not cross a 4GB boundary */
-         * smaller than that, it won't */
        virt_size = u64_lo32(args->mapping_size);
        virt_addr_lo = u64_lo32(args->offset);
        virt_addr_hi = u64_hi32(args->offset);
-        /* but check anyway */
+        if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) {
-        if (args->offset + virt_size > SZ_4G) {
                err = -EINVAL;
-                goto fail_unmap;
+                goto err_unmap;
        }
        err = gk20a_busy(g);
        if (err) {
                nvgpu_err(g, "failed to poweron");
-                goto fail_unmap;
+                goto err_unmap;
        }
        /* address and size are aligned to 32 bytes, the lowest bits read back
@@ -1866,7 +1893,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
        gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
        /* this field is aligned to 4K */
-        inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
+        inst_pa_page = gk20a_mm_inst_block_addr(g,
+                                                &mm->perfbuf.inst_block) >> 12;
        /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
         * should be written last */
@@ -1877,23 +1905,24 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
        gk20a_idle(g);
+        g->perfbuf.owner = dbg_s;
+        g->perfbuf.offset = args->offset;
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return 0;
-fail_unmap:
+err_unmap:
-        gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
+        gk20a_vm_unmap_buffer(vm, args->offset, NULL);
+err_remove_vm:
+        gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
-static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
+/* must be called with dbg_sessions_lock held */
-                struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
+static int gk20a_perfbuf_disable_locked(struct gk20a *g)
 {
-        struct gk20a *g = dbg_s->g;
+        int err = gk20a_busy(g);
-        int err;
-        if (!g->allow_all)
-                return -EACCES;
-        err = gk20a_busy(g);
        if (err) {
                nvgpu_err(g, "failed to poweron");
                return err;
@@ -1911,11 +1940,45 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
        gk20a_idle(g);
-        gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
        return 0;
 }
+static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = &mm->perfbuf.vm;
+        int err;
+        err = gk20a_perfbuf_disable_locked(g);
+        gk20a_vm_unmap_buffer(vm, offset, NULL);
+        gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
+        g->perfbuf.owner = NULL;
+        g->perfbuf.offset = 0;
+        return err;
+}
+static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
+                struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
+{
+        struct gk20a *g = dbg_s->g;
+        int err;
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        if ((g->perfbuf.owner != dbg_s) ||
+                                        (g->perfbuf.offset != args->offset)) {
+                nvgpu_mutex_release(&g->dbg_sessions_lock);
+                return -EINVAL;
+        }
+        err = gk20a_perfbuf_release_locked(g, args->offset);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
+        return err;
+}
 void gk20a_init_dbg_session_ops(struct gpu_ops *gops)
 {
        gops->dbg_session_ops.exec_reg_ops = exec_regops_gk20a;
author	Peter Daifuku <pdaifuku@nvidia.com>	2017-04-06 19:39:30 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-04-19 18:44:12 -0400
commit	0d8f5f3fdb87b818b6239efaf91822c01cb3d859 (patch)
tree	29a00ecfe46c35b853da0a9c5846f33f53461f3e /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
parent	6df49a63ca5f9d77a6d7d6c7dbaaa0fba1b707ca (diff)