From 503d489dba278136ea0e4241d000018682989da5 Mon Sep 17 00:00:00 2001 From: Vaibhav Kachore Date: Tue, 3 Jul 2018 17:21:13 +0530 Subject: gpu: nvgpu: Initialize hwpm perfmons (engine_sel) - For Mode-E ctxsw it is required that engine_sel is set to 0xFFFFFFFF. - Default 0 is a valid signal and causes problems. Bug 2106999 Change-Id: I5cdb4441a8e6d7e8133c31a9e361b54611dd2995 Signed-off-by: Vaibhav Kachore Reviewed-on: https://git-master.nvidia.com/r/1770755 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv100/gr_gv100.c | 87 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'drivers/gpu/nvgpu/gv100/gr_gv100.c') diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index b1b1cee6..5f891ce5 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -39,6 +39,7 @@ #include #include #include +#include /* @@ -459,3 +460,89 @@ u32 gr_gv100_get_hw_accessor_stream_out_mode() { return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); } + +static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, + u32 num_chiplets, u32 num_perfmons) +{ + u32 perfmon_index = 0; + u32 chiplet_index = 0; + u32 reg_offset = 0; + u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); + + for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { + for (perfmon_index = 0; perfmon_index < num_perfmons; + perfmon_index++) { + reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() + + chiplet_index * chiplet_stride; + nvgpu_writel(g, reg_offset, val); + } + } + +} + +static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, + int *num_fbp_perfmon, int *num_gpc_perfmon) +{ + int err; + u32 buf_offset_lo, buf_offset_addr, num_offsets; + u32 perfmon_index = 0; + + for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v(); + perfmon_index++) { + err = gr_gk20a_get_pm_ctx_buffer_offsets(g, + perf_pmmsys_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err) { + break; + } + } + *num_sys_perfmon = perfmon_index; + + for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v(); + perfmon_index++) { + err = gr_gk20a_get_pm_ctx_buffer_offsets(g, + perf_pmmfbp_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err) { + break; + } + } + *num_fbp_perfmon = perfmon_index; + + for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v(); + perfmon_index++) { + err = gr_gk20a_get_pm_ctx_buffer_offsets(g, + perf_pmmgpc_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err) { + break; + } + } + *num_gpc_perfmon = perfmon_index; +} + +void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) +{ + int num_sys_perfmon = 0; + int num_fbp_perfmon = 0; + int num_gpc_perfmon = 0; + + gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon, + &num_fbp_perfmon, &num_gpc_perfmon); + + gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), + 1, 0xFFFFFFFF, num_sys_perfmon); + gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), + nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon); + gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), + nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon); +} -- cgit v1.2.2