diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 95 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h | 117 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 21 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 6 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 20 |
5 files changed, 256 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index daed2967..5bee34fc 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Tegra GK20A GPU Debugger/Profiler Driver | 2 | * Tegra GK20A GPU Debugger/Profiler Driver |
3 | * | 3 | * |
4 | * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -29,6 +29,7 @@ | |||
29 | #include "regops_gk20a.h" | 29 | #include "regops_gk20a.h" |
30 | #include "hw_therm_gk20a.h" | 30 | #include "hw_therm_gk20a.h" |
31 | #include "hw_gr_gk20a.h" | 31 | #include "hw_gr_gk20a.h" |
32 | #include "hw_perf_gk20a.h" | ||
32 | 33 | ||
33 | struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { | 34 | struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { |
34 | .exec_reg_ops = exec_regops_gk20a, | 35 | .exec_reg_ops = exec_regops_gk20a, |
@@ -370,6 +371,11 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | |||
370 | struct dbg_session_gk20a *dbg_s, | 371 | struct dbg_session_gk20a *dbg_s, |
371 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); | 372 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); |
372 | 373 | ||
374 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
375 | struct nvgpu_dbg_gpu_perfbuf_map_args *args); | ||
376 | |||
377 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
378 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); | ||
373 | 379 | ||
374 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | 380 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, |
375 | unsigned long arg) | 381 | unsigned long arg) |
@@ -436,6 +442,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | |||
436 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); | 442 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); |
437 | break; | 443 | break; |
438 | 444 | ||
445 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP: | ||
446 | err = gk20a_perfbuf_map(dbg_s, | ||
447 | (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf); | ||
448 | break; | ||
449 | |||
450 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP: | ||
451 | err = gk20a_perfbuf_unmap(dbg_s, | ||
452 | (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); | ||
453 | break; | ||
454 | |||
439 | default: | 455 | default: |
440 | gk20a_err(dev_from_gk20a(g), | 456 | gk20a_err(dev_from_gk20a(g), |
441 | "unrecognized dbg gpu ioctl cmd: 0x%x", | 457 | "unrecognized dbg gpu ioctl cmd: 0x%x", |
@@ -775,3 +791,80 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | |||
775 | mutex_unlock(&g->dbg_sessions_lock); | 791 | mutex_unlock(&g->dbg_sessions_lock); |
776 | return err; | 792 | return err; |
777 | } | 793 | } |
794 | |||
795 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
796 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) | ||
797 | { | ||
798 | struct gk20a *g = dbg_s->g; | ||
799 | int err; | ||
800 | u32 virt_size; | ||
801 | u32 virt_addr_lo; | ||
802 | u32 virt_addr_hi; | ||
803 | u32 inst_pa_page; | ||
804 | |||
805 | if (!g->allow_all) | ||
806 | return -EACCES; | ||
807 | |||
808 | err = gk20a_vm_map_buffer(&g->mm.pmu.vm, | ||
809 | args->dmabuf_fd, | ||
810 | &args->offset, | ||
811 | 0, | ||
812 | 0, | ||
813 | 0, | ||
814 | args->mapping_size); | ||
815 | if (err) | ||
816 | return err; | ||
817 | |||
818 | /* perf output buffer may not cross a 4GB boundary - with a separate va | ||
819 | * smaller than that, it won't */ | ||
820 | virt_size = u64_lo32(args->mapping_size); | ||
821 | virt_addr_lo = u64_lo32(args->offset); | ||
822 | virt_addr_hi = u64_hi32(args->offset); | ||
823 | /* but check anyway */ | ||
824 | if (args->offset + virt_size > SZ_4G) { | ||
825 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); | ||
826 | return -EINVAL; | ||
827 | } | ||
828 | |||
829 | /* address and size are aligned to 32 bytes, the lowest bits read back | ||
830 | * as zeros */ | ||
831 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
832 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
833 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
834 | gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); | ||
835 | |||
836 | /* this field is aligned to 4K */ | ||
837 | inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12; | ||
838 | |||
839 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | ||
840 | * should be written last */ | ||
841 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
842 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
843 | perf_pmasys_mem_block_valid_true_f() | | ||
844 | perf_pmasys_mem_block_target_lfb_f()); | ||
845 | |||
846 | return 0; | ||
847 | } | ||
848 | |||
849 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
850 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) | ||
851 | { | ||
852 | struct gk20a *g = dbg_s->g; | ||
853 | |||
854 | if (!g->allow_all) | ||
855 | return -EACCES; | ||
856 | |||
857 | gk20a_writel(g, perf_pmasys_outbase_r(), 0); | ||
858 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
859 | perf_pmasys_outbaseupper_ptr_f(0)); | ||
860 | gk20a_writel(g, perf_pmasys_outsize_r(), 0); | ||
861 | |||
862 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
863 | perf_pmasys_mem_block_base_f(0) | | ||
864 | perf_pmasys_mem_block_valid_false_f() | | ||
865 | perf_pmasys_mem_block_target_f(0)); | ||
866 | |||
867 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); | ||
868 | |||
869 | return 0; | ||
870 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h new file mode 100644 index 00000000..65d91de6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_perf_gk20a_h_ | ||
51 | #define _hw_perf_gk20a_h_ | ||
52 | |||
53 | static inline u32 perf_pmasys_mem_block_r(void) | ||
54 | { | ||
55 | return 0x001b4070; | ||
56 | } | ||
57 | static inline u32 perf_pmasys_mem_block_base_f(u32 v) | ||
58 | { | ||
59 | return (v & 0xfffffff) << 0; | ||
60 | } | ||
61 | static inline u32 perf_pmasys_mem_block_target_f(u32 v) | ||
62 | { | ||
63 | return (v & 0x3) << 28; | ||
64 | } | ||
65 | static inline u32 perf_pmasys_mem_block_target_v(u32 r) | ||
66 | { | ||
67 | return (r >> 28) & 0x3; | ||
68 | } | ||
69 | static inline u32 perf_pmasys_mem_block_target_lfb_v(void) | ||
70 | { | ||
71 | return 0x00000000; | ||
72 | } | ||
73 | static inline u32 perf_pmasys_mem_block_target_lfb_f(void) | ||
74 | { | ||
75 | return 0x0; | ||
76 | } | ||
77 | static inline u32 perf_pmasys_mem_block_valid_f(u32 v) | ||
78 | { | ||
79 | return (v & 0x1) << 31; | ||
80 | } | ||
81 | static inline u32 perf_pmasys_mem_block_valid_v(u32 r) | ||
82 | { | ||
83 | return (r >> 31) & 0x1; | ||
84 | } | ||
85 | static inline u32 perf_pmasys_mem_block_valid_true_v(void) | ||
86 | { | ||
87 | return 0x00000001; | ||
88 | } | ||
89 | static inline u32 perf_pmasys_mem_block_valid_true_f(void) | ||
90 | { | ||
91 | return 0x80000000; | ||
92 | } | ||
93 | static inline u32 perf_pmasys_mem_block_valid_false_v(void) | ||
94 | { | ||
95 | return 0x00000000; | ||
96 | } | ||
97 | static inline u32 perf_pmasys_mem_block_valid_false_f(void) | ||
98 | { | ||
99 | return 0x0; | ||
100 | } | ||
101 | static inline u32 perf_pmasys_outbase_r(void) | ||
102 | { | ||
103 | return 0x001b4074; | ||
104 | } | ||
105 | static inline u32 perf_pmasys_outbaseupper_r(void) | ||
106 | { | ||
107 | return 0x001b4078; | ||
108 | } | ||
109 | static inline u32 perf_pmasys_outbaseupper_ptr_f(u32 v) | ||
110 | { | ||
111 | return (v & 0xff) << 0; | ||
112 | } | ||
113 | static inline u32 perf_pmasys_outsize_r(void) | ||
114 | { | ||
115 | return 0x001b407c; | ||
116 | } | ||
117 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 8d9488fd..80c766b6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -101,6 +101,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
101 | int rw_flag); | 101 | int rw_flag); |
102 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); | 102 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); |
103 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); | 103 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); |
104 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); | ||
104 | 105 | ||
105 | 106 | ||
106 | struct gk20a_dmabuf_priv { | 107 | struct gk20a_dmabuf_priv { |
@@ -280,6 +281,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) | |||
280 | { | 281 | { |
281 | gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); | 282 | gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); |
282 | gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); | 283 | gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); |
284 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); | ||
283 | } | 285 | } |
284 | 286 | ||
285 | int gk20a_init_mm_setup_sw(struct gk20a *g) | 287 | int gk20a_init_mm_setup_sw(struct gk20a *g) |
@@ -315,6 +317,10 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
315 | if (err) | 317 | if (err) |
316 | return err; | 318 | return err; |
317 | 319 | ||
320 | err = gk20a_init_hwpm(mm); | ||
321 | if (err) | ||
322 | return err; | ||
323 | |||
318 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ | 324 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ |
319 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; | 325 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; |
320 | mm->remove_support = gk20a_remove_mm_support; | 326 | mm->remove_support = gk20a_remove_mm_support; |
@@ -2720,6 +2726,21 @@ clean_up_va: | |||
2720 | return err; | 2726 | return err; |
2721 | } | 2727 | } |
2722 | 2728 | ||
2729 | static int gk20a_init_hwpm(struct mm_gk20a *mm) | ||
2730 | { | ||
2731 | int err; | ||
2732 | struct vm_gk20a *vm = &mm->pmu.vm; | ||
2733 | struct gk20a *g = gk20a_from_mm(mm); | ||
2734 | struct inst_desc *inst_block = &mm->hwpm.inst_block; | ||
2735 | |||
2736 | err = gk20a_alloc_inst_block(g, inst_block); | ||
2737 | if (err) | ||
2738 | return err; | ||
2739 | gk20a_init_inst_block(inst_block, vm, 0); | ||
2740 | |||
2741 | return 0; | ||
2742 | } | ||
2743 | |||
2723 | void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, | 2744 | void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, |
2724 | u32 big_page_size) | 2745 | u32 big_page_size) |
2725 | { | 2746 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 40e9488d..7b355436 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -342,6 +342,12 @@ struct mm_gk20a { | |||
342 | struct inst_desc inst_block; | 342 | struct inst_desc inst_block; |
343 | } pmu; | 343 | } pmu; |
344 | 344 | ||
345 | struct { | ||
346 | /* using pmu vm currently */ | ||
347 | struct inst_desc inst_block; | ||
348 | } hwpm; | ||
349 | |||
350 | |||
345 | struct mutex l2_op_lock; | 351 | struct mutex l2_op_lock; |
346 | 352 | ||
347 | void (*remove_support)(struct mm_gk20a *mm); | 353 | void (*remove_support)(struct mm_gk20a *mm); |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 1e438775..ebeacf9b 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -432,10 +432,26 @@ struct nvgpu_dbg_gpu_suspend_resume_all_sms_args { | |||
432 | #define NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS \ | 432 | #define NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS \ |
433 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 6, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args) | 433 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 6, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args) |
434 | 434 | ||
435 | struct nvgpu_dbg_gpu_perfbuf_map_args { | ||
436 | __u32 dmabuf_fd; /* in */ | ||
437 | __u32 reserved; | ||
438 | __u64 mapping_size; /* in, size of mapped buffer region */ | ||
439 | __u64 offset; /* out, virtual address of the mapping */ | ||
440 | }; | ||
441 | |||
442 | struct nvgpu_dbg_gpu_perfbuf_unmap_args { | ||
443 | __u64 offset; | ||
444 | }; | ||
445 | |||
446 | #define NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP \ | ||
447 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 7, struct nvgpu_dbg_gpu_perfbuf_map_args) | ||
448 | #define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \ | ||
449 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args) | ||
450 | |||
435 | #define NVGPU_DBG_GPU_IOCTL_LAST \ | 451 | #define NVGPU_DBG_GPU_IOCTL_LAST \ |
436 | _IOC_NR(NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS) | 452 | _IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP) |
437 | #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ | 453 | #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ |
438 | sizeof(struct nvgpu_dbg_gpu_exec_reg_ops_args) | 454 | sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) |
439 | 455 | ||
440 | /* | 456 | /* |
441 | * /dev/nvhost-gpu device | 457 | * /dev/nvhost-gpu device |