From 994a60385166725cc007731c0ff353d4d643eeed Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 6 Jun 2017 22:50:12 -0700 Subject: gpu: nvgpu: add perf gr ops to support t19x Add init_ovr_sm_dsm_perf & get_ovr_perf_regs gr ops JIRA GPUT19X-49 Bug 200311674 Change-Id: If02dd9dc0e2e0eb1f68fdbaa86a37c6768eddcef Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1497403 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.h | 4 ++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 19 +++++++++++++++---- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 +++ drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 2 ++ 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ac195fea..6fe29abe 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -195,6 +195,9 @@ struct gpu_ops { u32 *num_sm_dsm_perf_regs, u32 **sm_dsm_perf_regs, u32 *perf_register_stride); + void (*get_ovr_perf_regs)(struct gk20a *g, + u32 *num_ovr_perf_regs, + u32 **ovr_perf_regsr); void (*set_hww_esr_report_mask)(struct gk20a *g); int (*setup_alpha_beta_tables)(struct gk20a *g, struct gr_gk20a *gr); @@ -273,6 +276,7 @@ struct gpu_ops { u32 (*get_max_lts_per_ltc)(struct gk20a *g); u32* (*get_rop_l2_en_mask)(struct gk20a *g); void (*init_sm_dsm_reg_info)(void); + void (*init_ovr_sm_dsm_perf)(void); int (*wait_empty)(struct gk20a *g, unsigned long duration_ms, u32 expect_delay); void (*init_cyclestats)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 80dca77a..618c276b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -6488,7 +6488,7 @@ static u32 _ovr_perf_regs[17] = { 0, }; /* Following are the blocks of registers that the ucode stores in the extended region.*/ -static void init_ovr_perf_reg_info(void) +void gk20a_gr_init_ovr_sm_dsm_perf(void) { if (_ovr_perf_regs[0] != 0) return; @@ -6510,6 +6510,7 @@ static void init_ovr_perf_reg_info(void) _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); + } /* TBD: would like to handle this elsewhere, at a higher level. @@ -6529,21 +6530,24 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, u32 vaddr_lo; u32 vaddr_hi; u32 tmp; + u32 num_ovr_perf_regs = 0; + u32 *ovr_perf_regs = NULL; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); - init_ovr_perf_reg_info(); + g->ops.gr.init_ovr_sm_dsm_perf(); g->ops.gr.init_sm_dsm_reg_info(); + g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); - for (reg = 0; reg < _num_ovr_perf_regs; reg++) { + for (reg = 0; reg < num_ovr_perf_regs; reg++) { for (gpc = 0; gpc < num_gpc; gpc++) { num_tpc = g->gr.gpc_tpc_count[gpc]; for (tpc = 0; tpc < num_tpc; tpc++) { chk_addr = ((gpc_stride * gpc) + (tpc_in_gpc_stride * tpc) + - _ovr_perf_regs[reg]); + ovr_perf_regs[reg]); if (chk_addr != addr) continue; /* reset the patch count from previous @@ -6603,6 +6607,13 @@ static inline int ctxsw_prog_ucode_header_size_in_bytes(void) return 256; } +void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, + u32 **ovr_perf_regs) +{ + *num_ovr_perf_regs = _num_ovr_perf_regs; + *ovr_perf_regs = _ovr_perf_regs; +} + static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, u32 addr, bool is_quad, u32 quad, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index d32a719e..400b7feb 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -733,6 +733,9 @@ u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc); u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc); void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, u32 *esr_sm_sel); +void gk20a_gr_init_ovr_sm_dsm_perf(void); +void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, + u32 **ovr_perf_regs); static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) { diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 1ff295c2..bd9b627f 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1653,4 +1653,6 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.lock_down_sm = gk20a_gr_lock_down_sm; gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down; gops->gr.clear_sm_hww = gm20b_gr_clear_sm_hww; + gops->gr.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf; + gops->gr.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs; } -- cgit v1.2.2