From 503d489dba278136ea0e4241d000018682989da5 Mon Sep 17 00:00:00 2001 From: Vaibhav Kachore Date: Tue, 3 Jul 2018 17:21:13 +0530 Subject: gpu: nvgpu: Initialize hwpm perfmons (engine_sel) - For Mode-E ctxsw it is required that engine_sel is set to 0xFFFFFFFF. - Default 0 is a valid signal and causes problems. Bug 2106999 Change-Id: I5cdb4441a8e6d7e8133c31a9e361b54611dd2995 Signed-off-by: Vaibhav Kachore Reviewed-on: https://git-master.nvidia.com/r/1770755 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 5 ++ drivers/gpu/nvgpu/gv100/gr_gv100.c | 87 ++++++++++++++++++++++ drivers/gpu/nvgpu/gv100/gr_gv100.h | 1 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 + .../nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h | 24 ++++++ 6 files changed, 119 insertions(+) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ce0a6563..b677419c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -312,6 +312,7 @@ struct gpu_ops { struct channel_gk20a *c, u64 gpu_va, u32 mode); + void (*init_hwpm_pmm_register)(struct gk20a *g); int (*dump_gr_regs)(struct gk20a *g, struct gk20a_debug_output *o); int (*update_pc_sampling)(struct channel_gk20a *ch, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3f49fbf7..c70c1cd4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1782,6 +1782,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, return -ENOMEM; } } + + if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW && + g->ops.gr.init_hwpm_pmm_register) { + g->ops.gr.init_hwpm_pmm_register(g); + } } data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index b1b1cee6..5f891ce5 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -39,6 +39,7 @@ #include #include #include +#include /* @@ -459,3 +460,89 @@ u32 gr_gv100_get_hw_accessor_stream_out_mode() { return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); } + +static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, + u32 num_chiplets, u32 num_perfmons) +{ + u32 perfmon_index = 0; + u32 chiplet_index = 0; + u32 reg_offset = 0; + u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); + + for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { + for (perfmon_index = 0; perfmon_index < num_perfmons; + perfmon_index++) { + reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() + + chiplet_index * chiplet_stride; + nvgpu_writel(g, reg_offset, val); + } + } + +} + +static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, + int *num_fbp_perfmon, int *num_gpc_perfmon) +{ + int err; + u32 buf_offset_lo, buf_offset_addr, num_offsets; + u32 perfmon_index = 0; + + for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v(); + perfmon_index++) { + err = gr_gk20a_get_pm_ctx_buffer_offsets(g, + perf_pmmsys_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err) { + break; + } + } + *num_sys_perfmon = perfmon_index; + + for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v(); + perfmon_index++) { + err = gr_gk20a_get_pm_ctx_buffer_offsets(g, + perf_pmmfbp_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err) { + break; + } + } + *num_fbp_perfmon = perfmon_index; + + for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v(); + perfmon_index++) { + err = gr_gk20a_get_pm_ctx_buffer_offsets(g, + perf_pmmgpc_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err) { + break; + } + } + *num_gpc_perfmon = perfmon_index; +} + +void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) +{ + int num_sys_perfmon = 0; + int num_fbp_perfmon = 0; + int num_gpc_perfmon = 0; + + gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon, + &num_fbp_perfmon, &num_gpc_perfmon); + + gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), + 1, 0xFFFFFFFF, num_sys_perfmon); + gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), + nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon); + gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), + nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon); +} diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 457bd701..81bf7e38 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -48,4 +48,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *t); void gr_gv100_init_gpc_mmu(struct gk20a *g); u32 gr_gv100_get_hw_accessor_stream_out_mode(void); +void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index c84778dd..3cfda7ca 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -361,6 +361,7 @@ static const struct gpu_ops gv100_ops = { .get_hw_accessor_stream_out_mode = gr_gv100_get_hw_accessor_stream_out_mode, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, + .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register, .record_sm_error_state = gv11b_gr_record_sm_error_state, .update_sm_error_state = gv11b_gr_update_sm_error_state, .clear_sm_error_state = gm20b_gr_clear_sm_error_state, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h index 268efc52..a7ba460e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h @@ -232,4 +232,28 @@ static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void) { return 0x10U; } +static inline u32 perf_pmmsys_engine_sel_r(u32 i) +{ + return 0x0024006cU + i*512U; +} +static inline u32 perf_pmmsys_engine_sel__size_1_v(void) +{ + return 0x00000020U; +} +static inline u32 perf_pmmfbp_engine_sel_r(u32 i) +{ + return 0x0020006cU + i*512U; +} +static inline u32 perf_pmmfbp_engine_sel__size_1_v(void) +{ + return 0x00000020U; +} +static inline u32 perf_pmmgpc_engine_sel_r(u32 i) +{ + return 0x0018006cU + i*512U; +} +static inline u32 perf_pmmgpc_engine_sel__size_1_v(void) +{ + return 0x00000020U; +} #endif -- cgit v1.2.2