summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2015-01-14 07:04:08 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:08:02 -0400
commit3877adcd656e8e4329e2c4250119de2256f30730 (patch)
treee1228aea1b348e6c2fcd2bcab30e4ea9ef2f5bea /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
parentf93a8cc36b68500c1d1ae235f929c35c4a039497 (diff)
gpu: nvgpu: add hw perfmon buffer mapping ioctls
Map/unmap buffers for HWPM and deal with its instance block, the minimum work required to run the HWPM via regops from userspace. Bug 1517458 Bug 1573150 Change-Id: If14086a88b54bf434843d7c2fee8a9113023a3b0 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/673689 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c95
1 files changed, 94 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index daed2967..5bee34fc 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Tegra GK20A GPU Debugger/Profiler Driver 2 * Tegra GK20A GPU Debugger/Profiler Driver
3 * 3 *
4 * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -29,6 +29,7 @@
29#include "regops_gk20a.h" 29#include "regops_gk20a.h"
30#include "hw_therm_gk20a.h" 30#include "hw_therm_gk20a.h"
31#include "hw_gr_gk20a.h" 31#include "hw_gr_gk20a.h"
32#include "hw_perf_gk20a.h"
32 33
33struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { 34struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
34 .exec_reg_ops = exec_regops_gk20a, 35 .exec_reg_ops = exec_regops_gk20a,
@@ -370,6 +371,11 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
370 struct dbg_session_gk20a *dbg_s, 371 struct dbg_session_gk20a *dbg_s,
371 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); 372 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
372 373
374static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
375 struct nvgpu_dbg_gpu_perfbuf_map_args *args);
376
377static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
378 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
373 379
374long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, 380long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
375 unsigned long arg) 381 unsigned long arg)
@@ -436,6 +442,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
436 (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); 442 (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
437 break; 443 break;
438 444
445 case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
446 err = gk20a_perfbuf_map(dbg_s,
447 (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf);
448 break;
449
450 case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP:
451 err = gk20a_perfbuf_unmap(dbg_s,
452 (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
453 break;
454
439 default: 455 default:
440 gk20a_err(dev_from_gk20a(g), 456 gk20a_err(dev_from_gk20a(g),
441 "unrecognized dbg gpu ioctl cmd: 0x%x", 457 "unrecognized dbg gpu ioctl cmd: 0x%x",
@@ -775,3 +791,80 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
775 mutex_unlock(&g->dbg_sessions_lock); 791 mutex_unlock(&g->dbg_sessions_lock);
776 return err; 792 return err;
777} 793}
794
795static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
796 struct nvgpu_dbg_gpu_perfbuf_map_args *args)
797{
798 struct gk20a *g = dbg_s->g;
799 int err;
800 u32 virt_size;
801 u32 virt_addr_lo;
802 u32 virt_addr_hi;
803 u32 inst_pa_page;
804
805 if (!g->allow_all)
806 return -EACCES;
807
808 err = gk20a_vm_map_buffer(&g->mm.pmu.vm,
809 args->dmabuf_fd,
810 &args->offset,
811 0,
812 0,
813 0,
814 args->mapping_size);
815 if (err)
816 return err;
817
818 /* perf output buffer may not cross a 4GB boundary - with a separate va
819 * smaller than that, it won't */
820 virt_size = u64_lo32(args->mapping_size);
821 virt_addr_lo = u64_lo32(args->offset);
822 virt_addr_hi = u64_hi32(args->offset);
823 /* but check anyway */
824 if (args->offset + virt_size > SZ_4G) {
825 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
826 return -EINVAL;
827 }
828
829 /* address and size are aligned to 32 bytes, the lowest bits read back
830 * as zeros */
831 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
832 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
833 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
834 gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
835
836 /* this field is aligned to 4K */
837 inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12;
838
839 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
840 * should be written last */
841 gk20a_writel(g, perf_pmasys_mem_block_r(),
842 perf_pmasys_mem_block_base_f(inst_pa_page) |
843 perf_pmasys_mem_block_valid_true_f() |
844 perf_pmasys_mem_block_target_lfb_f());
845
846 return 0;
847}
848
849static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
850 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
851{
852 struct gk20a *g = dbg_s->g;
853
854 if (!g->allow_all)
855 return -EACCES;
856
857 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
858 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
859 perf_pmasys_outbaseupper_ptr_f(0));
860 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
861
862 gk20a_writel(g, perf_pmasys_mem_block_r(),
863 perf_pmasys_mem_block_base_f(0) |
864 perf_pmasys_mem_block_valid_false_f() |
865 perf_pmasys_mem_block_target_f(0));
866
867 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
868
869 return 0;
870}