summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2015-01-14 07:04:08 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:08:02 -0400
commit3877adcd656e8e4329e2c4250119de2256f30730 (patch)
treee1228aea1b348e6c2fcd2bcab30e4ea9ef2f5bea /drivers/gpu
parentf93a8cc36b68500c1d1ae235f929c35c4a039497 (diff)
gpu: nvgpu: add hw perfmon buffer mapping ioctls
Map/unmap buffers for HWPM and deal with its instance block, the minimum work required to run the HWPM via regops from userspace. Bug 1517458 Bug 1573150 Change-Id: If14086a88b54bf434843d7c2fee8a9113023a3b0 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/673689 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c95
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h117
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c21
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h6
4 files changed, 238 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index daed2967..5bee34fc 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Tegra GK20A GPU Debugger/Profiler Driver 2 * Tegra GK20A GPU Debugger/Profiler Driver
3 * 3 *
4 * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -29,6 +29,7 @@
29#include "regops_gk20a.h" 29#include "regops_gk20a.h"
30#include "hw_therm_gk20a.h" 30#include "hw_therm_gk20a.h"
31#include "hw_gr_gk20a.h" 31#include "hw_gr_gk20a.h"
32#include "hw_perf_gk20a.h"
32 33
33struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { 34struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
34 .exec_reg_ops = exec_regops_gk20a, 35 .exec_reg_ops = exec_regops_gk20a,
@@ -370,6 +371,11 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
370 struct dbg_session_gk20a *dbg_s, 371 struct dbg_session_gk20a *dbg_s,
371 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); 372 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
372 373
374static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
375 struct nvgpu_dbg_gpu_perfbuf_map_args *args);
376
377static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
378 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
373 379
374long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, 380long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
375 unsigned long arg) 381 unsigned long arg)
@@ -436,6 +442,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
436 (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); 442 (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
437 break; 443 break;
438 444
445 case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
446 err = gk20a_perfbuf_map(dbg_s,
447 (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf);
448 break;
449
450 case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP:
451 err = gk20a_perfbuf_unmap(dbg_s,
452 (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
453 break;
454
439 default: 455 default:
440 gk20a_err(dev_from_gk20a(g), 456 gk20a_err(dev_from_gk20a(g),
441 "unrecognized dbg gpu ioctl cmd: 0x%x", 457 "unrecognized dbg gpu ioctl cmd: 0x%x",
@@ -775,3 +791,80 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
775 mutex_unlock(&g->dbg_sessions_lock); 791 mutex_unlock(&g->dbg_sessions_lock);
776 return err; 792 return err;
777} 793}
794
795static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
796 struct nvgpu_dbg_gpu_perfbuf_map_args *args)
797{
798 struct gk20a *g = dbg_s->g;
799 int err;
800 u32 virt_size;
801 u32 virt_addr_lo;
802 u32 virt_addr_hi;
803 u32 inst_pa_page;
804
805 if (!g->allow_all)
806 return -EACCES;
807
808 err = gk20a_vm_map_buffer(&g->mm.pmu.vm,
809 args->dmabuf_fd,
810 &args->offset,
811 0,
812 0,
813 0,
814 args->mapping_size);
815 if (err)
816 return err;
817
818 /* perf output buffer may not cross a 4GB boundary - with a separate va
819 * smaller than that, it won't */
820 virt_size = u64_lo32(args->mapping_size);
821 virt_addr_lo = u64_lo32(args->offset);
822 virt_addr_hi = u64_hi32(args->offset);
823 /* but check anyway */
824 if (args->offset + virt_size > SZ_4G) {
825 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
826 return -EINVAL;
827 }
828
829 /* address and size are aligned to 32 bytes, the lowest bits read back
830 * as zeros */
831 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
832 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
833 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
834 gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
835
836 /* this field is aligned to 4K */
837 inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12;
838
839 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
840 * should be written last */
841 gk20a_writel(g, perf_pmasys_mem_block_r(),
842 perf_pmasys_mem_block_base_f(inst_pa_page) |
843 perf_pmasys_mem_block_valid_true_f() |
844 perf_pmasys_mem_block_target_lfb_f());
845
846 return 0;
847}
848
849static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
850 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
851{
852 struct gk20a *g = dbg_s->g;
853
854 if (!g->allow_all)
855 return -EACCES;
856
857 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
858 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
859 perf_pmasys_outbaseupper_ptr_f(0));
860 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
861
862 gk20a_writel(g, perf_pmasys_mem_block_r(),
863 perf_pmasys_mem_block_base_f(0) |
864 perf_pmasys_mem_block_valid_false_f() |
865 perf_pmasys_mem_block_target_f(0));
866
867 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
868
869 return 0;
870}
diff --git a/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h
new file mode 100644
index 00000000..65d91de6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h
@@ -0,0 +1,117 @@
1/*
2 * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_perf_gk20a_h_
51#define _hw_perf_gk20a_h_
52
53static inline u32 perf_pmasys_mem_block_r(void)
54{
55 return 0x001b4070;
56}
57static inline u32 perf_pmasys_mem_block_base_f(u32 v)
58{
59 return (v & 0xfffffff) << 0;
60}
61static inline u32 perf_pmasys_mem_block_target_f(u32 v)
62{
63 return (v & 0x3) << 28;
64}
65static inline u32 perf_pmasys_mem_block_target_v(u32 r)
66{
67 return (r >> 28) & 0x3;
68}
69static inline u32 perf_pmasys_mem_block_target_lfb_v(void)
70{
71 return 0x00000000;
72}
73static inline u32 perf_pmasys_mem_block_target_lfb_f(void)
74{
75 return 0x0;
76}
77static inline u32 perf_pmasys_mem_block_valid_f(u32 v)
78{
79 return (v & 0x1) << 31;
80}
81static inline u32 perf_pmasys_mem_block_valid_v(u32 r)
82{
83 return (r >> 31) & 0x1;
84}
85static inline u32 perf_pmasys_mem_block_valid_true_v(void)
86{
87 return 0x00000001;
88}
89static inline u32 perf_pmasys_mem_block_valid_true_f(void)
90{
91 return 0x80000000;
92}
93static inline u32 perf_pmasys_mem_block_valid_false_v(void)
94{
95 return 0x00000000;
96}
97static inline u32 perf_pmasys_mem_block_valid_false_f(void)
98{
99 return 0x0;
100}
101static inline u32 perf_pmasys_outbase_r(void)
102{
103 return 0x001b4074;
104}
105static inline u32 perf_pmasys_outbaseupper_r(void)
106{
107 return 0x001b4078;
108}
109static inline u32 perf_pmasys_outbaseupper_ptr_f(u32 v)
110{
111 return (v & 0xff) << 0;
112}
113static inline u32 perf_pmasys_outsize_r(void)
114{
115 return 0x001b407c;
116}
117#endif
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 8d9488fd..80c766b6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -101,6 +101,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
101 int rw_flag); 101 int rw_flag);
102static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); 102static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
103static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); 103static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
104static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
104 105
105 106
106struct gk20a_dmabuf_priv { 107struct gk20a_dmabuf_priv {
@@ -280,6 +281,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
280{ 281{
281 gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); 282 gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
282 gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); 283 gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
284 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
283} 285}
284 286
285int gk20a_init_mm_setup_sw(struct gk20a *g) 287int gk20a_init_mm_setup_sw(struct gk20a *g)
@@ -315,6 +317,10 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
315 if (err) 317 if (err)
316 return err; 318 return err;
317 319
320 err = gk20a_init_hwpm(mm);
321 if (err)
322 return err;
323
318 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ 324 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
319 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; 325 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
320 mm->remove_support = gk20a_remove_mm_support; 326 mm->remove_support = gk20a_remove_mm_support;
@@ -2720,6 +2726,21 @@ clean_up_va:
2720 return err; 2726 return err;
2721} 2727}
2722 2728
2729static int gk20a_init_hwpm(struct mm_gk20a *mm)
2730{
2731 int err;
2732 struct vm_gk20a *vm = &mm->pmu.vm;
2733 struct gk20a *g = gk20a_from_mm(mm);
2734 struct inst_desc *inst_block = &mm->hwpm.inst_block;
2735
2736 err = gk20a_alloc_inst_block(g, inst_block);
2737 if (err)
2738 return err;
2739 gk20a_init_inst_block(inst_block, vm, 0);
2740
2741 return 0;
2742}
2743
2723void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, 2744void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm,
2724 u32 big_page_size) 2745 u32 big_page_size)
2725{ 2746{
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 40e9488d..7b355436 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -342,6 +342,12 @@ struct mm_gk20a {
342 struct inst_desc inst_block; 342 struct inst_desc inst_block;
343 } pmu; 343 } pmu;
344 344
345 struct {
346 /* using pmu vm currently */
347 struct inst_desc inst_block;
348 } hwpm;
349
350
345 struct mutex l2_op_lock; 351 struct mutex l2_op_lock;
346 352
347 void (*remove_support)(struct mm_gk20a *mm); 353 void (*remove_support)(struct mm_gk20a *mm);