diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2015-01-14 07:04:08 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:08:02 -0400 |
commit | 3877adcd656e8e4329e2c4250119de2256f30730 (patch) | |
tree | e1228aea1b348e6c2fcd2bcab30e4ea9ef2f5bea /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |
parent | f93a8cc36b68500c1d1ae235f929c35c4a039497 (diff) |
gpu: nvgpu: add hw perfmon buffer mapping ioctls
Map/unmap buffers for HWPM and deal with its instance block, the minimum
work required to run the HWPM via regops from userspace.
Bug 1517458
Bug 1573150
Change-Id: If14086a88b54bf434843d7c2fee8a9113023a3b0
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/673689
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 95 |
1 files changed, 94 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index daed2967..5bee34fc 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Tegra GK20A GPU Debugger/Profiler Driver | 2 | * Tegra GK20A GPU Debugger/Profiler Driver |
3 | * | 3 | * |
4 | * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -29,6 +29,7 @@ | |||
29 | #include "regops_gk20a.h" | 29 | #include "regops_gk20a.h" |
30 | #include "hw_therm_gk20a.h" | 30 | #include "hw_therm_gk20a.h" |
31 | #include "hw_gr_gk20a.h" | 31 | #include "hw_gr_gk20a.h" |
32 | #include "hw_perf_gk20a.h" | ||
32 | 33 | ||
33 | struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { | 34 | struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { |
34 | .exec_reg_ops = exec_regops_gk20a, | 35 | .exec_reg_ops = exec_regops_gk20a, |
@@ -370,6 +371,11 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | |||
370 | struct dbg_session_gk20a *dbg_s, | 371 | struct dbg_session_gk20a *dbg_s, |
371 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); | 372 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); |
372 | 373 | ||
374 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
375 | struct nvgpu_dbg_gpu_perfbuf_map_args *args); | ||
376 | |||
377 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
378 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); | ||
373 | 379 | ||
374 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | 380 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, |
375 | unsigned long arg) | 381 | unsigned long arg) |
@@ -436,6 +442,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | |||
436 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); | 442 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); |
437 | break; | 443 | break; |
438 | 444 | ||
445 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP: | ||
446 | err = gk20a_perfbuf_map(dbg_s, | ||
447 | (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf); | ||
448 | break; | ||
449 | |||
450 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP: | ||
451 | err = gk20a_perfbuf_unmap(dbg_s, | ||
452 | (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); | ||
453 | break; | ||
454 | |||
439 | default: | 455 | default: |
440 | gk20a_err(dev_from_gk20a(g), | 456 | gk20a_err(dev_from_gk20a(g), |
441 | "unrecognized dbg gpu ioctl cmd: 0x%x", | 457 | "unrecognized dbg gpu ioctl cmd: 0x%x", |
@@ -775,3 +791,80 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | |||
775 | mutex_unlock(&g->dbg_sessions_lock); | 791 | mutex_unlock(&g->dbg_sessions_lock); |
776 | return err; | 792 | return err; |
777 | } | 793 | } |
794 | |||
795 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
796 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) | ||
797 | { | ||
798 | struct gk20a *g = dbg_s->g; | ||
799 | int err; | ||
800 | u32 virt_size; | ||
801 | u32 virt_addr_lo; | ||
802 | u32 virt_addr_hi; | ||
803 | u32 inst_pa_page; | ||
804 | |||
805 | if (!g->allow_all) | ||
806 | return -EACCES; | ||
807 | |||
808 | err = gk20a_vm_map_buffer(&g->mm.pmu.vm, | ||
809 | args->dmabuf_fd, | ||
810 | &args->offset, | ||
811 | 0, | ||
812 | 0, | ||
813 | 0, | ||
814 | args->mapping_size); | ||
815 | if (err) | ||
816 | return err; | ||
817 | |||
818 | /* perf output buffer may not cross a 4GB boundary - with a separate va | ||
819 | * smaller than that, it won't */ | ||
820 | virt_size = u64_lo32(args->mapping_size); | ||
821 | virt_addr_lo = u64_lo32(args->offset); | ||
822 | virt_addr_hi = u64_hi32(args->offset); | ||
823 | /* but check anyway */ | ||
824 | if (args->offset + virt_size > SZ_4G) { | ||
825 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); | ||
826 | return -EINVAL; | ||
827 | } | ||
828 | |||
829 | /* address and size are aligned to 32 bytes, the lowest bits read back | ||
830 | * as zeros */ | ||
831 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
832 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
833 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
834 | gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); | ||
835 | |||
836 | /* this field is aligned to 4K */ | ||
837 | inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12; | ||
838 | |||
839 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | ||
840 | * should be written last */ | ||
841 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
842 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
843 | perf_pmasys_mem_block_valid_true_f() | | ||
844 | perf_pmasys_mem_block_target_lfb_f()); | ||
845 | |||
846 | return 0; | ||
847 | } | ||
848 | |||
849 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
850 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) | ||
851 | { | ||
852 | struct gk20a *g = dbg_s->g; | ||
853 | |||
854 | if (!g->allow_all) | ||
855 | return -EACCES; | ||
856 | |||
857 | gk20a_writel(g, perf_pmasys_outbase_r(), 0); | ||
858 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
859 | perf_pmasys_outbaseupper_ptr_f(0)); | ||
860 | gk20a_writel(g, perf_pmasys_outsize_r(), 0); | ||
861 | |||
862 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
863 | perf_pmasys_mem_block_base_f(0) | | ||
864 | perf_pmasys_mem_block_valid_false_f() | | ||
865 | perf_pmasys_mem_block_target_f(0)); | ||
866 | |||
867 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); | ||
868 | |||
869 | return 0; | ||
870 | } | ||