diff options
author | Seema Khowala <seemaj@nvidia.com> | 2017-06-07 01:50:12 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-07-19 02:42:01 -0400 |
commit | 994a60385166725cc007731c0ff353d4d643eeed (patch) | |
tree | f16e7ec3eb26eabde5c459e016b8688d4f7cf7e4 /drivers | |
parent | 71cd4a486f54eee67972a81f59c4185f828102e5 (diff) |
gpu: nvgpu: add perf gr ops to support t19x
Add init_ovr_sm_dsm_perf & get_ovr_perf_regs gr ops
JIRA GPUT19X-49
Bug 200311674
Change-Id: If02dd9dc0e2e0eb1f68fdbaa86a37c6768eddcef
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1497403
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 2 |
4 files changed, 24 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ac195fea..6fe29abe 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -195,6 +195,9 @@ struct gpu_ops { | |||
195 | u32 *num_sm_dsm_perf_regs, | 195 | u32 *num_sm_dsm_perf_regs, |
196 | u32 **sm_dsm_perf_regs, | 196 | u32 **sm_dsm_perf_regs, |
197 | u32 *perf_register_stride); | 197 | u32 *perf_register_stride); |
198 | void (*get_ovr_perf_regs)(struct gk20a *g, | ||
199 | u32 *num_ovr_perf_regs, | ||
200 | u32 **ovr_perf_regsr); | ||
198 | void (*set_hww_esr_report_mask)(struct gk20a *g); | 201 | void (*set_hww_esr_report_mask)(struct gk20a *g); |
199 | int (*setup_alpha_beta_tables)(struct gk20a *g, | 202 | int (*setup_alpha_beta_tables)(struct gk20a *g, |
200 | struct gr_gk20a *gr); | 203 | struct gr_gk20a *gr); |
@@ -273,6 +276,7 @@ struct gpu_ops { | |||
273 | u32 (*get_max_lts_per_ltc)(struct gk20a *g); | 276 | u32 (*get_max_lts_per_ltc)(struct gk20a *g); |
274 | u32* (*get_rop_l2_en_mask)(struct gk20a *g); | 277 | u32* (*get_rop_l2_en_mask)(struct gk20a *g); |
275 | void (*init_sm_dsm_reg_info)(void); | 278 | void (*init_sm_dsm_reg_info)(void); |
279 | void (*init_ovr_sm_dsm_perf)(void); | ||
276 | int (*wait_empty)(struct gk20a *g, unsigned long duration_ms, | 280 | int (*wait_empty)(struct gk20a *g, unsigned long duration_ms, |
277 | u32 expect_delay); | 281 | u32 expect_delay); |
278 | void (*init_cyclestats)(struct gk20a *g); | 282 | void (*init_cyclestats)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 80dca77a..618c276b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -6488,7 +6488,7 @@ static u32 _ovr_perf_regs[17] = { 0, }; | |||
6488 | /* Following are the blocks of registers that the ucode | 6488 | /* Following are the blocks of registers that the ucode |
6489 | stores in the extended region.*/ | 6489 | stores in the extended region.*/ |
6490 | 6490 | ||
6491 | static void init_ovr_perf_reg_info(void) | 6491 | void gk20a_gr_init_ovr_sm_dsm_perf(void) |
6492 | { | 6492 | { |
6493 | if (_ovr_perf_regs[0] != 0) | 6493 | if (_ovr_perf_regs[0] != 0) |
6494 | return; | 6494 | return; |
@@ -6510,6 +6510,7 @@ static void init_ovr_perf_reg_info(void) | |||
6510 | _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); | 6510 | _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); |
6511 | _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); | 6511 | _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); |
6512 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); | 6512 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); |
6513 | |||
6513 | } | 6514 | } |
6514 | 6515 | ||
6515 | /* TBD: would like to handle this elsewhere, at a higher level. | 6516 | /* TBD: would like to handle this elsewhere, at a higher level. |
@@ -6529,21 +6530,24 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6529 | u32 vaddr_lo; | 6530 | u32 vaddr_lo; |
6530 | u32 vaddr_hi; | 6531 | u32 vaddr_hi; |
6531 | u32 tmp; | 6532 | u32 tmp; |
6533 | u32 num_ovr_perf_regs = 0; | ||
6534 | u32 *ovr_perf_regs = NULL; | ||
6532 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 6535 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
6533 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 6536 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
6534 | 6537 | ||
6535 | init_ovr_perf_reg_info(); | 6538 | g->ops.gr.init_ovr_sm_dsm_perf(); |
6536 | g->ops.gr.init_sm_dsm_reg_info(); | 6539 | g->ops.gr.init_sm_dsm_reg_info(); |
6540 | g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); | ||
6537 | 6541 | ||
6538 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | 6542 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); |
6539 | 6543 | ||
6540 | for (reg = 0; reg < _num_ovr_perf_regs; reg++) { | 6544 | for (reg = 0; reg < num_ovr_perf_regs; reg++) { |
6541 | for (gpc = 0; gpc < num_gpc; gpc++) { | 6545 | for (gpc = 0; gpc < num_gpc; gpc++) { |
6542 | num_tpc = g->gr.gpc_tpc_count[gpc]; | 6546 | num_tpc = g->gr.gpc_tpc_count[gpc]; |
6543 | for (tpc = 0; tpc < num_tpc; tpc++) { | 6547 | for (tpc = 0; tpc < num_tpc; tpc++) { |
6544 | chk_addr = ((gpc_stride * gpc) + | 6548 | chk_addr = ((gpc_stride * gpc) + |
6545 | (tpc_in_gpc_stride * tpc) + | 6549 | (tpc_in_gpc_stride * tpc) + |
6546 | _ovr_perf_regs[reg]); | 6550 | ovr_perf_regs[reg]); |
6547 | if (chk_addr != addr) | 6551 | if (chk_addr != addr) |
6548 | continue; | 6552 | continue; |
6549 | /* reset the patch count from previous | 6553 | /* reset the patch count from previous |
@@ -6603,6 +6607,13 @@ static inline int ctxsw_prog_ucode_header_size_in_bytes(void) | |||
6603 | return 256; | 6607 | return 256; |
6604 | } | 6608 | } |
6605 | 6609 | ||
6610 | void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, | ||
6611 | u32 **ovr_perf_regs) | ||
6612 | { | ||
6613 | *num_ovr_perf_regs = _num_ovr_perf_regs; | ||
6614 | *ovr_perf_regs = _ovr_perf_regs; | ||
6615 | } | ||
6616 | |||
6606 | static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | 6617 | static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, |
6607 | u32 addr, | 6618 | u32 addr, |
6608 | bool is_quad, u32 quad, | 6619 | bool is_quad, u32 quad, |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index d32a719e..400b7feb 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -733,6 +733,9 @@ u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc); | |||
733 | u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc); | 733 | u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc); |
734 | void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, | 734 | void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, |
735 | u32 *esr_sm_sel); | 735 | u32 *esr_sm_sel); |
736 | void gk20a_gr_init_ovr_sm_dsm_perf(void); | ||
737 | void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, | ||
738 | u32 **ovr_perf_regs); | ||
736 | 739 | ||
737 | static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) | 740 | static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) |
738 | { | 741 | { |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 1ff295c2..bd9b627f 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -1653,4 +1653,6 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1653 | gops->gr.lock_down_sm = gk20a_gr_lock_down_sm; | 1653 | gops->gr.lock_down_sm = gk20a_gr_lock_down_sm; |
1654 | gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down; | 1654 | gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down; |
1655 | gops->gr.clear_sm_hww = gm20b_gr_clear_sm_hww; | 1655 | gops->gr.clear_sm_hww = gm20b_gr_clear_sm_hww; |
1656 | gops->gr.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf; | ||
1657 | gops->gr.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs; | ||
1656 | } | 1658 | } |