summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-06-07 01:50:12 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-19 02:42:01 -0400
commit994a60385166725cc007731c0ff353d4d643eeed (patch)
treef16e7ec3eb26eabde5c459e016b8688d4f7cf7e4
parent71cd4a486f54eee67972a81f59c4185f828102e5 (diff)
gpu: nvgpu: add perf gr ops to support t19x
Add init_ovr_sm_dsm_perf & get_ovr_perf_regs gr ops JIRA GPUT19X-49 Bug 200311674 Change-Id: If02dd9dc0e2e0eb1f68fdbaa86a37c6768eddcef Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1497403 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c19
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c2
4 files changed, 24 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index ac195fea..6fe29abe 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -195,6 +195,9 @@ struct gpu_ops {
195 u32 *num_sm_dsm_perf_regs, 195 u32 *num_sm_dsm_perf_regs,
196 u32 **sm_dsm_perf_regs, 196 u32 **sm_dsm_perf_regs,
197 u32 *perf_register_stride); 197 u32 *perf_register_stride);
198 void (*get_ovr_perf_regs)(struct gk20a *g,
199 u32 *num_ovr_perf_regs,
200 u32 **ovr_perf_regsr);
198 void (*set_hww_esr_report_mask)(struct gk20a *g); 201 void (*set_hww_esr_report_mask)(struct gk20a *g);
199 int (*setup_alpha_beta_tables)(struct gk20a *g, 202 int (*setup_alpha_beta_tables)(struct gk20a *g,
200 struct gr_gk20a *gr); 203 struct gr_gk20a *gr);
@@ -273,6 +276,7 @@ struct gpu_ops {
273 u32 (*get_max_lts_per_ltc)(struct gk20a *g); 276 u32 (*get_max_lts_per_ltc)(struct gk20a *g);
274 u32* (*get_rop_l2_en_mask)(struct gk20a *g); 277 u32* (*get_rop_l2_en_mask)(struct gk20a *g);
275 void (*init_sm_dsm_reg_info)(void); 278 void (*init_sm_dsm_reg_info)(void);
279 void (*init_ovr_sm_dsm_perf)(void);
276 int (*wait_empty)(struct gk20a *g, unsigned long duration_ms, 280 int (*wait_empty)(struct gk20a *g, unsigned long duration_ms,
277 u32 expect_delay); 281 u32 expect_delay);
278 void (*init_cyclestats)(struct gk20a *g); 282 void (*init_cyclestats)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 80dca77a..618c276b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -6488,7 +6488,7 @@ static u32 _ovr_perf_regs[17] = { 0, };
6488/* Following are the blocks of registers that the ucode 6488/* Following are the blocks of registers that the ucode
6489 stores in the extended region.*/ 6489 stores in the extended region.*/
6490 6490
6491static void init_ovr_perf_reg_info(void) 6491void gk20a_gr_init_ovr_sm_dsm_perf(void)
6492{ 6492{
6493 if (_ovr_perf_regs[0] != 0) 6493 if (_ovr_perf_regs[0] != 0)
6494 return; 6494 return;
@@ -6510,6 +6510,7 @@ static void init_ovr_perf_reg_info(void)
6510 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); 6510 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r();
6511 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); 6511 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r();
6512 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); 6512 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r();
6513
6513} 6514}
6514 6515
6515/* TBD: would like to handle this elsewhere, at a higher level. 6516/* TBD: would like to handle this elsewhere, at a higher level.
@@ -6529,21 +6530,24 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6529 u32 vaddr_lo; 6530 u32 vaddr_lo;
6530 u32 vaddr_hi; 6531 u32 vaddr_hi;
6531 u32 tmp; 6532 u32 tmp;
6533 u32 num_ovr_perf_regs = 0;
6534 u32 *ovr_perf_regs = NULL;
6532 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 6535 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6533 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 6536 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6534 6537
6535 init_ovr_perf_reg_info(); 6538 g->ops.gr.init_ovr_sm_dsm_perf();
6536 g->ops.gr.init_sm_dsm_reg_info(); 6539 g->ops.gr.init_sm_dsm_reg_info();
6540 g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs);
6537 6541
6538 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 6542 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6539 6543
6540 for (reg = 0; reg < _num_ovr_perf_regs; reg++) { 6544 for (reg = 0; reg < num_ovr_perf_regs; reg++) {
6541 for (gpc = 0; gpc < num_gpc; gpc++) { 6545 for (gpc = 0; gpc < num_gpc; gpc++) {
6542 num_tpc = g->gr.gpc_tpc_count[gpc]; 6546 num_tpc = g->gr.gpc_tpc_count[gpc];
6543 for (tpc = 0; tpc < num_tpc; tpc++) { 6547 for (tpc = 0; tpc < num_tpc; tpc++) {
6544 chk_addr = ((gpc_stride * gpc) + 6548 chk_addr = ((gpc_stride * gpc) +
6545 (tpc_in_gpc_stride * tpc) + 6549 (tpc_in_gpc_stride * tpc) +
6546 _ovr_perf_regs[reg]); 6550 ovr_perf_regs[reg]);
6547 if (chk_addr != addr) 6551 if (chk_addr != addr)
6548 continue; 6552 continue;
6549 /* reset the patch count from previous 6553 /* reset the patch count from previous
@@ -6603,6 +6607,13 @@ static inline int ctxsw_prog_ucode_header_size_in_bytes(void)
6603 return 256; 6607 return 256;
6604} 6608}
6605 6609
6610void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
6611 u32 **ovr_perf_regs)
6612{
6613 *num_ovr_perf_regs = _num_ovr_perf_regs;
6614 *ovr_perf_regs = _ovr_perf_regs;
6615}
6616
6606static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, 6617static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6607 u32 addr, 6618 u32 addr,
6608 bool is_quad, u32 quad, 6619 bool is_quad, u32 quad,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index d32a719e..400b7feb 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -733,6 +733,9 @@ u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
733u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc); 733u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
734void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, 734void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
735 u32 *esr_sm_sel); 735 u32 *esr_sm_sel);
736void gk20a_gr_init_ovr_sm_dsm_perf(void);
737void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
738 u32 **ovr_perf_regs);
736 739
737static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) 740static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
738{ 741{
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 1ff295c2..bd9b627f 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1653,4 +1653,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
1653 gops->gr.lock_down_sm = gk20a_gr_lock_down_sm; 1653 gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
1654 gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down; 1654 gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
1655 gops->gr.clear_sm_hww = gm20b_gr_clear_sm_hww; 1655 gops->gr.clear_sm_hww = gm20b_gr_clear_sm_hww;
1656 gops->gr.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf;
1657 gops->gr.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs;
1656} 1658}