summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorSandarbh Jain <sanjain@nvidia.com>2015-03-13 15:41:51 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 22:01:25 -0400
commit95548fa880f3a31d900cfb9c4b2e30e7dfacadac (patch)
tree4b35b21ce56e9953fe05b0ca6374240c743fccc9 /drivers/gpu
parent42e6b2f4512ce4481f2e5fd82e375e256173528e (diff)
gpu: nvgpu: GM20B extended buffer definition
Update extended buffer definition for Maxwell. On GM20B only PERF_CONTROL0 and PERF_CONTROL5 registers are restored in extended buffer. They are needed for stopping the counters as late as possible during ctx save and start them as early as possible during context restore. On Maxwell, these registers contain the enable/disable bit. Bug 200086767 Change-Id: I59125a2f04bd0975be8a1ccecf993c9370f20337 Signed-off-by: Sandarbh Jain <sanjain@nvidia.com> Reviewed-on: http://git-master/r/717421 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c53
4 files changed, 48 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 695e3f69..0436c466 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -172,6 +172,7 @@ struct gpu_ops {
172 u32 (*get_max_ltc_per_fbp)(struct gk20a *g); 172 u32 (*get_max_ltc_per_fbp)(struct gk20a *g);
173 u32 (*get_max_lts_per_ltc)(struct gk20a *g); 173 u32 (*get_max_lts_per_ltc)(struct gk20a *g);
174 u32* (*get_rop_l2_en_mask)(struct gk20a *g); 174 u32* (*get_rop_l2_en_mask)(struct gk20a *g);
175 void (*init_sm_dsm_reg_info)(void);
175 } gr; 176 } gr;
176 const char *name; 177 const char *name;
177 struct { 178 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4933d442..e4e0d163 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5995,7 +5995,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 4;
5995static u32 _sm_dsm_perf_regs[5]; 5995static u32 _sm_dsm_perf_regs[5];
5996static u32 _sm_dsm_perf_ctrl_regs[4]; 5996static u32 _sm_dsm_perf_ctrl_regs[4];
5997 5997
5998static void init_sm_dsm_reg_info(void) 5998static void init_ovr_perf_reg_info(void)
5999{ 5999{
6000 if (_ovr_perf_regs[0] != 0) 6000 if (_ovr_perf_regs[0] != 0)
6001 return; 6001 return;
@@ -6017,7 +6017,12 @@ static void init_sm_dsm_reg_info(void)
6017 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); 6017 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r();
6018 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); 6018 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r();
6019 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); 6019 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r();
6020}
6020 6021
6022void gr_gk20a_init_sm_dsm_reg_info(void)
6023{
6024 if (_sm_dsm_perf_regs[0] != 0)
6025 return;
6021 6026
6022 _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); 6027 _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r();
6023 _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); 6028 _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r();
@@ -6050,7 +6055,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6050 u32 vaddr_hi; 6055 u32 vaddr_hi;
6051 u32 tmp; 6056 u32 tmp;
6052 6057
6053 init_sm_dsm_reg_info(); 6058 init_ovr_perf_reg_info();
6059 g->ops.gr.init_sm_dsm_reg_info();
6054 6060
6055 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 6061 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6056 6062
@@ -6274,7 +6280,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6274 &sm_dsm_perf_regs, 6280 &sm_dsm_perf_regs,
6275 &perf_register_stride); 6281 &perf_register_stride);
6276 6282
6277 init_sm_dsm_reg_info(); 6283 g->ops.gr.init_sm_dsm_reg_info();
6278 6284
6279 for (i = 0; i < num_sm_dsm_perf_regs; i++) { 6285 for (i = 0; i < num_sm_dsm_perf_regs; i++) {
6280 if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { 6286 if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) {
@@ -7375,4 +7381,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7375 gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; 7381 gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp;
7376 gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; 7382 gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc;
7377 gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; 7383 gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask;
7384 gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info;
7378} 7385}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 59176af8..1a55e064 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -461,14 +461,6 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g,
461 u64 addr, u32 size, bool patch); 461 u64 addr, u32 size, bool patch);
462void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); 462void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
463void gr_gk20a_enable_hww_exceptions(struct gk20a *g); 463void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
464void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g,
465 u32 *num_sm_dsm_perf_regs,
466 u32 **sm_dsm_perf_regs,
467 u32 *perf_register_stride);
468void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
469 u32 *num_sm_dsm_perf_regs,
470 u32 **sm_dsm_perf_regs,
471 u32 *perf_register_stride);
472int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); 464int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
473int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); 465int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
474int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); 466int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index e5af96d2..cffc56d1 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -453,26 +453,46 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
453 return valid; 453 return valid;
454} 454}
455 455
456static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, 456/* Following are the blocks of registers that the ucode
457 u32 *num_sm_dsm_perf_regs, 457 stores in the extended region.*/
458 u32 **sm_dsm_perf_regs, 458/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
459 u32 *perf_register_stride) 459static const u32 _num_sm_dsm_perf_regs;
460/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/
461static const u32 _num_sm_dsm_perf_ctrl_regs = 2;
462static u32 *_sm_dsm_perf_regs;
463static u32 _sm_dsm_perf_ctrl_regs[2];
464
465void gr_gm20b_init_sm_dsm_reg_info(void)
460{ 466{
461 gr_gk20a_get_sm_dsm_perf_regs(g, num_sm_dsm_perf_regs, 467 if (_sm_dsm_perf_ctrl_regs[0] != 0)
462 sm_dsm_perf_regs, 468 return;
463 perf_register_stride); 469
464 *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); 470 _sm_dsm_perf_ctrl_regs[0] =
471 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
472 _sm_dsm_perf_ctrl_regs[1] =
473 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
465} 474}
466 475
467static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, 476void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
468 u32 *num_sm_dsm_perf_regs, 477 u32 *num_sm_dsm_perf_regs,
469 u32 **sm_dsm_perf_regs, 478 u32 **sm_dsm_perf_regs,
470 u32 *ctrl_register_stride) 479 u32 *perf_register_stride)
471{ 480{
472 gr_gk20a_get_sm_dsm_perf_ctrl_regs(g, num_sm_dsm_perf_regs, 481 *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
473 sm_dsm_perf_regs, 482 *sm_dsm_perf_regs = _sm_dsm_perf_regs;
474 ctrl_register_stride); 483 *perf_register_stride = 0;
475 *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); 484}
485
486void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
487 u32 *num_sm_dsm_perf_ctrl_regs,
488 u32 **sm_dsm_perf_ctrl_regs,
489 u32 *ctrl_register_stride)
490{
491 *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
492 *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
493
494 *ctrl_register_stride =
495 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
476} 496}
477 497
478static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) 498static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
@@ -1072,4 +1092,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
1072 gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; 1092 gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc;
1073 gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; 1093 gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask;
1074 gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; 1094 gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count;
1095 gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info;
1075} 1096}