From 3877adcd656e8e4329e2c4250119de2256f30730 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Wed, 14 Jan 2015 14:04:08 +0200 Subject: gpu: nvgpu: add hw perfmon buffer mapping ioctls Map/unmap buffers for HWPM and deal with its instance block, the minimum work required to run the HWPM via regops from userspace. Bug 1517458 Bug 1573150 Change-Id: If14086a88b54bf434843d7c2fee8a9113023a3b0 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/673689 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 95 +++++++++++++++++++++++++- drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h | 117 ++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 21 ++++++ drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 6 ++ 4 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index daed2967..5bee34fc 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -1,7 +1,7 @@ /* * Tegra GK20A GPU Debugger/Profiler Driver * - * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -29,6 +29,7 @@ #include "regops_gk20a.h" #include "hw_therm_gk20a.h" #include "hw_gr_gk20a.h" +#include "hw_perf_gk20a.h" struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { .exec_reg_ops = exec_regops_gk20a, @@ -370,6 +371,11 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); +static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_map_args *args); + +static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) @@ -436,6 +442,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); break; + case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP: + err = gk20a_perfbuf_map(dbg_s, + (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP: + err = gk20a_perfbuf_unmap(dbg_s, + (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); + break; + default: gk20a_err(dev_from_gk20a(g), "unrecognized dbg gpu ioctl cmd: 0x%x", @@ -775,3 +791,80 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( mutex_unlock(&g->dbg_sessions_lock); return err; } + +static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_map_args *args) +{ + struct gk20a *g = dbg_s->g; + int err; + u32 virt_size; + u32 virt_addr_lo; + u32 virt_addr_hi; + u32 inst_pa_page; + + if (!g->allow_all) + return -EACCES; + + err = gk20a_vm_map_buffer(&g->mm.pmu.vm, + args->dmabuf_fd, + &args->offset, + 0, + 0, + 0, + args->mapping_size); + if (err) + return err; + + /* perf output buffer may not cross a 4GB boundary - with a separate va + * smaller than that, it won't */ + virt_size = u64_lo32(args->mapping_size); + virt_addr_lo = u64_lo32(args->offset); + virt_addr_hi = u64_hi32(args->offset); + /* but check anyway */ + if (args->offset + virt_size > SZ_4G) { + gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); + return -EINVAL; + } + + /* address and size are aligned to 32 bytes, the lowest bits read back + * as zeros */ + gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); + gk20a_writel(g, perf_pmasys_outbaseupper_r(), + perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); + gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); + + /* this field is aligned to 4K */ + inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12; + + /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK + * should be written last */ + gk20a_writel(g, perf_pmasys_mem_block_r(), + perf_pmasys_mem_block_base_f(inst_pa_page) | + perf_pmasys_mem_block_valid_true_f() | + perf_pmasys_mem_block_target_lfb_f()); + + return 0; +} + +static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) +{ + struct gk20a *g = dbg_s->g; + + if (!g->allow_all) + return -EACCES; + + gk20a_writel(g, perf_pmasys_outbase_r(), 0); + gk20a_writel(g, perf_pmasys_outbaseupper_r(), + perf_pmasys_outbaseupper_ptr_f(0)); + gk20a_writel(g, perf_pmasys_outsize_r(), 0); + + gk20a_writel(g, perf_pmasys_mem_block_r(), + perf_pmasys_mem_block_base_f(0) | + perf_pmasys_mem_block_valid_false_f() | + perf_pmasys_mem_block_target_f(0)); + + gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); + + return 0; +} diff --git a/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h new file mode 100644 index 00000000..65d91de6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_perf_gk20a_h_ +#define _hw_perf_gk20a_h_ + +static inline u32 perf_pmasys_mem_block_r(void) +{ + return 0x001b4070; +} +static inline u32 perf_pmasys_mem_block_base_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 perf_pmasys_mem_block_target_f(u32 v) +{ + return (v & 0x3) << 28; +} +static inline u32 perf_pmasys_mem_block_target_v(u32 r) +{ + return (r >> 28) & 0x3; +} +static inline u32 perf_pmasys_mem_block_target_lfb_v(void) +{ + return 0x00000000; +} +static inline u32 perf_pmasys_mem_block_target_lfb_f(void) +{ + return 0x0; +} +static inline u32 perf_pmasys_mem_block_valid_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 perf_pmasys_mem_block_valid_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 perf_pmasys_mem_block_valid_true_v(void) +{ + return 0x00000001; +} +static inline u32 perf_pmasys_mem_block_valid_true_f(void) +{ + return 0x80000000; +} +static inline u32 perf_pmasys_mem_block_valid_false_v(void) +{ + return 0x00000000; +} +static inline u32 perf_pmasys_mem_block_valid_false_f(void) +{ + return 0x0; +} +static inline u32 perf_pmasys_outbase_r(void) +{ + return 0x001b4074; +} +static inline u32 perf_pmasys_outbaseupper_r(void) +{ + return 0x001b4078; +} +static inline u32 perf_pmasys_outbaseupper_ptr_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 perf_pmasys_outsize_r(void) +{ + return 0x001b407c; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 8d9488fd..80c766b6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -101,6 +101,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, int rw_flag); static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); +static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); struct gk20a_dmabuf_priv { @@ -280,6 +281,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) { gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); + gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); } int gk20a_init_mm_setup_sw(struct gk20a *g) @@ -315,6 +317,10 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) if (err) return err; + err = gk20a_init_hwpm(mm); + if (err) + return err; + /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; mm->remove_support = gk20a_remove_mm_support; @@ -2720,6 +2726,21 @@ clean_up_va: return err; } +static int gk20a_init_hwpm(struct mm_gk20a *mm) +{ + int err; + struct vm_gk20a *vm = &mm->pmu.vm; + struct gk20a *g = gk20a_from_mm(mm); + struct inst_desc *inst_block = &mm->hwpm.inst_block; + + err = gk20a_alloc_inst_block(g, inst_block); + if (err) + return err; + gk20a_init_inst_block(inst_block, vm, 0); + + return 0; +} + void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, u32 big_page_size) { diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 40e9488d..7b355436 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -342,6 +342,12 @@ struct mm_gk20a { struct inst_desc inst_block; } pmu; + struct { + /* using pmu vm currently */ + struct inst_desc inst_block; + } hwpm; + + struct mutex l2_op_lock; void (*remove_support)(struct mm_gk20a *mm); -- cgit v1.2.2