From 3f83528d769169fdaf25912f425226eaa07427f0 Mon Sep 17 00:00:00 2001 From: Vaibhav Kachore Date: Wed, 25 Jul 2018 17:12:38 +0530 Subject: gpu: nvgpu: correct parameters in set_pmm_register - This patch corrects parameters in set_pmm_registers - As FBP 6 and 7 are floorswept for GV100, GPU_LIT_NUM_FBPS should not be used - halify get_num_hwpm_perfmon and set_pmm_register Bug 2106999 Change-Id: Ib285b25d0c836c93b529dfe4e26c078159a3e6dd Signed-off-by: Vaibhav Kachore Reviewed-on: https://git-master.nvidia.com/r/1785620 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv100/gr_gv100.c | 87 +++++++++++----------- drivers/gpu/nvgpu/gv100/gr_gv100.h | 4 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 4 + .../nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h | 4 + 5 files changed, 59 insertions(+), 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 0d5692e1..5e8a99df 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -420,12 +420,12 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, } } -u32 gr_gv100_get_hw_accessor_stream_out_mode() +u32 gr_gv100_get_hw_accessor_stream_out_mode(void) { return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); } -static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, +void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, u32 num_chiplets, u32 num_perfmons) { u32 perfmon_index = 0; @@ -434,60 +434,63 @@ static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { - for (perfmon_index = 0; perfmon_index < num_perfmons; + for (perfmon_index = 0; perfmon_index < num_perfmons; perfmon_index++) { - reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() + - chiplet_index * chiplet_stride; + reg_offset = offset + perfmon_index * + perf_pmmsys_perdomain_offset_v() + + chiplet_index * chiplet_stride; nvgpu_writel(g, reg_offset, val); } } - } -static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, - int *num_fbp_perfmon, int *num_gpc_perfmon) +void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon, + u32 *num_fbp_perfmon, u32 *num_gpc_perfmon) { int err; u32 buf_offset_lo, buf_offset_addr, num_offsets; u32 perfmon_index = 0; - for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v(); - perfmon_index++) { + for (perfmon_index = 0; perfmon_index < + perf_pmmsys_engine_sel__size_1_v(); + perfmon_index++) { err = gr_gk20a_get_pm_ctx_buffer_offsets(g, - perf_pmmsys_engine_sel_r(perfmon_index), - 1, - &buf_offset_lo, - &buf_offset_addr, - &num_offsets); - if (err) { + perf_pmmsys_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err != 0) { break; } } *num_sys_perfmon = perfmon_index; - for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v(); - perfmon_index++) { + for (perfmon_index = 0; perfmon_index < + perf_pmmfbp_engine_sel__size_1_v(); + perfmon_index++) { err = gr_gk20a_get_pm_ctx_buffer_offsets(g, - perf_pmmfbp_engine_sel_r(perfmon_index), - 1, - &buf_offset_lo, - &buf_offset_addr, - &num_offsets); - if (err) { + perf_pmmfbp_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err != 0) { break; } } *num_fbp_perfmon = perfmon_index; - for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v(); - perfmon_index++) { + for (perfmon_index = 0; perfmon_index < + perf_pmmgpc_engine_sel__size_1_v(); + perfmon_index++) { err = gr_gk20a_get_pm_ctx_buffer_offsets(g, - perf_pmmgpc_engine_sel_r(perfmon_index), - 1, - &buf_offset_lo, - &buf_offset_addr, - &num_offsets); - if (err) { + perf_pmmgpc_engine_sel_r(perfmon_index), + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets); + if (err != 0) { break; } } @@ -496,17 +499,17 @@ static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) { - int num_sys_perfmon = 0; - int num_fbp_perfmon = 0; - int num_gpc_perfmon = 0; + u32 num_sys_perfmon = 0; + u32 num_fbp_perfmon = 0; + u32 num_gpc_perfmon = 0; - gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon, + g->ops.gr.get_num_hwpm_perfmon(g, &num_sys_perfmon, &num_fbp_perfmon, &num_gpc_perfmon); - gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), - 1, 0xFFFFFFFF, num_sys_perfmon); - gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), - nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon); - gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), - nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon); + g->ops.gr.set_pmm_register(g, perf_pmmsys_engine_sel_r(0), + 0xFFFFFFFFU, 1U, num_sys_perfmon); + g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0), + 0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon); + g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0), + 0xFFFFFFFFU, g->gr.gpc_count, num_gpc_perfmon); } diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index aae87f09..d0d76737 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -48,4 +48,8 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *t); u32 gr_gv100_get_hw_accessor_stream_out_mode(void); void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); +void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, + u32 num_chiplets, u32 num_perfmons); +void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon, + u32 *num_fbp_perfmon, u32 *num_gpc_perfmon); #endif /* NVGPU_GR_GV100_H */ diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 26584bb4..3c3e5742 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -376,6 +376,8 @@ static const struct gpu_ops gv100_ops = { .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, .get_hw_accessor_stream_out_mode = gr_gv100_get_hw_accessor_stream_out_mode, + .get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon, + .set_pmm_register = gr_gv100_set_pmm_register, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register, .record_sm_error_state = gv11b_gr_record_sm_error_state, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index fa31d0e1..a7fe1c2f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -330,6 +330,10 @@ struct gpu_ops { u64 gpu_va, u32 mode); void (*init_hwpm_pmm_register)(struct gk20a *g); + void (*get_num_hwpm_perfmon)(struct gk20a *g, u32 *num_sys_perfmon, + u32 *num_fbp_perfmon, u32 *num_gpc_perfmon); + void (*set_pmm_register)(struct gk20a *g, u32 offset, u32 val, + u32 num_chiplets, u32 num_perfmons); int (*dump_gr_regs)(struct gk20a *g, struct gk20a_debug_output *o); int (*update_pc_sampling)(struct channel_gk20a *ch, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h index a7ba460e..40107ee8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h @@ -60,6 +60,10 @@ static inline u32 perf_pmmgpc_perdomain_offset_v(void) { return 0x00000200U; } +static inline u32 perf_pmmsys_perdomain_offset_v(void) +{ + return 0x00000200U; +} static inline u32 perf_pmmgpc_base_v(void) { return 0x00180000U; -- cgit v1.2.2