summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c53
4 files changed, 48 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 695e3f69..0436c466 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -172,6 +172,7 @@ struct gpu_ops {
172 u32 (*get_max_ltc_per_fbp)(struct gk20a *g); 172 u32 (*get_max_ltc_per_fbp)(struct gk20a *g);
173 u32 (*get_max_lts_per_ltc)(struct gk20a *g); 173 u32 (*get_max_lts_per_ltc)(struct gk20a *g);
174 u32* (*get_rop_l2_en_mask)(struct gk20a *g); 174 u32* (*get_rop_l2_en_mask)(struct gk20a *g);
175 void (*init_sm_dsm_reg_info)(void);
175 } gr; 176 } gr;
176 const char *name; 177 const char *name;
177 struct { 178 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4933d442..e4e0d163 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5995,7 +5995,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 4;
5995static u32 _sm_dsm_perf_regs[5]; 5995static u32 _sm_dsm_perf_regs[5];
5996static u32 _sm_dsm_perf_ctrl_regs[4]; 5996static u32 _sm_dsm_perf_ctrl_regs[4];
5997 5997
5998static void init_sm_dsm_reg_info(void) 5998static void init_ovr_perf_reg_info(void)
5999{ 5999{
6000 if (_ovr_perf_regs[0] != 0) 6000 if (_ovr_perf_regs[0] != 0)
6001 return; 6001 return;
@@ -6017,7 +6017,12 @@ static void init_sm_dsm_reg_info(void)
6017 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); 6017 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r();
6018 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); 6018 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r();
6019 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); 6019 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r();
6020}
6020 6021
6022void gr_gk20a_init_sm_dsm_reg_info(void)
6023{
6024 if (_sm_dsm_perf_regs[0] != 0)
6025 return;
6021 6026
6022 _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); 6027 _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r();
6023 _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); 6028 _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r();
@@ -6050,7 +6055,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6050 u32 vaddr_hi; 6055 u32 vaddr_hi;
6051 u32 tmp; 6056 u32 tmp;
6052 6057
6053 init_sm_dsm_reg_info(); 6058 init_ovr_perf_reg_info();
6059 g->ops.gr.init_sm_dsm_reg_info();
6054 6060
6055 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 6061 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6056 6062
@@ -6274,7 +6280,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6274 &sm_dsm_perf_regs, 6280 &sm_dsm_perf_regs,
6275 &perf_register_stride); 6281 &perf_register_stride);
6276 6282
6277 init_sm_dsm_reg_info(); 6283 g->ops.gr.init_sm_dsm_reg_info();
6278 6284
6279 for (i = 0; i < num_sm_dsm_perf_regs; i++) { 6285 for (i = 0; i < num_sm_dsm_perf_regs; i++) {
6280 if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { 6286 if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) {
@@ -7375,4 +7381,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7375 gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; 7381 gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp;
7376 gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; 7382 gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc;
7377 gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; 7383 gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask;
7384 gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info;
7378} 7385}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 59176af8..1a55e064 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -461,14 +461,6 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g,
461 u64 addr, u32 size, bool patch); 461 u64 addr, u32 size, bool patch);
462void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); 462void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
463void gr_gk20a_enable_hww_exceptions(struct gk20a *g); 463void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
464void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g,
465 u32 *num_sm_dsm_perf_regs,
466 u32 **sm_dsm_perf_regs,
467 u32 *perf_register_stride);
468void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
469 u32 *num_sm_dsm_perf_regs,
470 u32 **sm_dsm_perf_regs,
471 u32 *perf_register_stride);
472int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); 464int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
473int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); 465int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
474int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); 466int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index e5af96d2..cffc56d1 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -453,26 +453,46 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
453 return valid; 453 return valid;
454} 454}
455 455
456static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, 456/* Following are the blocks of registers that the ucode
457 u32 *num_sm_dsm_perf_regs, 457 stores in the extended region.*/
458 u32 **sm_dsm_perf_regs, 458/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
459 u32 *perf_register_stride) 459static const u32 _num_sm_dsm_perf_regs;
460/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/
461static const u32 _num_sm_dsm_perf_ctrl_regs = 2;
462static u32 *_sm_dsm_perf_regs;
463static u32 _sm_dsm_perf_ctrl_regs[2];
464
465void gr_gm20b_init_sm_dsm_reg_info(void)
460{ 466{
461 gr_gk20a_get_sm_dsm_perf_regs(g, num_sm_dsm_perf_regs, 467 if (_sm_dsm_perf_ctrl_regs[0] != 0)
462 sm_dsm_perf_regs, 468 return;
463 perf_register_stride); 469
464 *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); 470 _sm_dsm_perf_ctrl_regs[0] =
471 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
472 _sm_dsm_perf_ctrl_regs[1] =
473 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
465} 474}
466 475
467static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, 476void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
468 u32 *num_sm_dsm_perf_regs, 477 u32 *num_sm_dsm_perf_regs,
469 u32 **sm_dsm_perf_regs, 478 u32 **sm_dsm_perf_regs,
470 u32 *ctrl_register_stride) 479 u32 *perf_register_stride)
471{ 480{
472 gr_gk20a_get_sm_dsm_perf_ctrl_regs(g, num_sm_dsm_perf_regs, 481 *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
473 sm_dsm_perf_regs, 482 *sm_dsm_perf_regs = _sm_dsm_perf_regs;
474 ctrl_register_stride); 483 *perf_register_stride = 0;
475 *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); 484}
485
486void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
487 u32 *num_sm_dsm_perf_ctrl_regs,
488 u32 **sm_dsm_perf_ctrl_regs,
489 u32 *ctrl_register_stride)
490{
491 *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
492 *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
493
494 *ctrl_register_stride =
495 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
476} 496}
477 497
478static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) 498static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
@@ -1072,4 +1092,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
1072 gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; 1092 gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc;
1073 gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; 1093 gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask;
1074 gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; 1094 gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count;
1095 gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info;
1075} 1096}