diff options
author | Sandarbh Jain <sanjain@nvidia.com> | 2015-03-13 15:41:51 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 22:01:25 -0400 |
commit | 95548fa880f3a31d900cfb9c4b2e30e7dfacadac (patch) | |
tree | 4b35b21ce56e9953fe05b0ca6374240c743fccc9 | |
parent | 42e6b2f4512ce4481f2e5fd82e375e256173528e (diff) |
gpu: nvgpu: GM20B extended buffer definition
Update extended buffer definition for Maxwell. On GM20B only PERF_CONTROL0 and
PERF_CONTROL5 registers are restored in extended buffer. They are needed for
stopping the counters as late as possible during ctx save and start them as
early as possible during context restore. On Maxwell, these registers contain
the enable/disable bit.
Bug 200086767
Change-Id: I59125a2f04bd0975be8a1ccecf993c9370f20337
Signed-off-by: Sandarbh Jain <sanjain@nvidia.com>
Reviewed-on: http://git-master/r/717421
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 53 |
4 files changed, 48 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 695e3f69..0436c466 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -172,6 +172,7 @@ struct gpu_ops { | |||
172 | u32 (*get_max_ltc_per_fbp)(struct gk20a *g); | 172 | u32 (*get_max_ltc_per_fbp)(struct gk20a *g); |
173 | u32 (*get_max_lts_per_ltc)(struct gk20a *g); | 173 | u32 (*get_max_lts_per_ltc)(struct gk20a *g); |
174 | u32* (*get_rop_l2_en_mask)(struct gk20a *g); | 174 | u32* (*get_rop_l2_en_mask)(struct gk20a *g); |
175 | void (*init_sm_dsm_reg_info)(void); | ||
175 | } gr; | 176 | } gr; |
176 | const char *name; | 177 | const char *name; |
177 | struct { | 178 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4933d442..e4e0d163 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -5995,7 +5995,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 4; | |||
5995 | static u32 _sm_dsm_perf_regs[5]; | 5995 | static u32 _sm_dsm_perf_regs[5]; |
5996 | static u32 _sm_dsm_perf_ctrl_regs[4]; | 5996 | static u32 _sm_dsm_perf_ctrl_regs[4]; |
5997 | 5997 | ||
5998 | static void init_sm_dsm_reg_info(void) | 5998 | static void init_ovr_perf_reg_info(void) |
5999 | { | 5999 | { |
6000 | if (_ovr_perf_regs[0] != 0) | 6000 | if (_ovr_perf_regs[0] != 0) |
6001 | return; | 6001 | return; |
@@ -6017,7 +6017,12 @@ static void init_sm_dsm_reg_info(void) | |||
6017 | _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); | 6017 | _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); |
6018 | _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); | 6018 | _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); |
6019 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); | 6019 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); |
6020 | } | ||
6020 | 6021 | ||
6022 | void gr_gk20a_init_sm_dsm_reg_info(void) | ||
6023 | { | ||
6024 | if (_sm_dsm_perf_regs[0] != 0) | ||
6025 | return; | ||
6021 | 6026 | ||
6022 | _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); | 6027 | _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); |
6023 | _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); | 6028 | _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); |
@@ -6050,7 +6055,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6050 | u32 vaddr_hi; | 6055 | u32 vaddr_hi; |
6051 | u32 tmp; | 6056 | u32 tmp; |
6052 | 6057 | ||
6053 | init_sm_dsm_reg_info(); | 6058 | init_ovr_perf_reg_info(); |
6059 | g->ops.gr.init_sm_dsm_reg_info(); | ||
6054 | 6060 | ||
6055 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | 6061 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); |
6056 | 6062 | ||
@@ -6274,7 +6280,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6274 | &sm_dsm_perf_regs, | 6280 | &sm_dsm_perf_regs, |
6275 | &perf_register_stride); | 6281 | &perf_register_stride); |
6276 | 6282 | ||
6277 | init_sm_dsm_reg_info(); | 6283 | g->ops.gr.init_sm_dsm_reg_info(); |
6278 | 6284 | ||
6279 | for (i = 0; i < num_sm_dsm_perf_regs; i++) { | 6285 | for (i = 0; i < num_sm_dsm_perf_regs; i++) { |
6280 | if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { | 6286 | if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { |
@@ -7375,4 +7381,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7375 | gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; | 7381 | gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; |
7376 | gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; | 7382 | gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; |
7377 | gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; | 7383 | gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; |
7384 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; | ||
7378 | } | 7385 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 59176af8..1a55e064 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -461,14 +461,6 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g, | |||
461 | u64 addr, u32 size, bool patch); | 461 | u64 addr, u32 size, bool patch); |
462 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); | 462 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); |
463 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); | 463 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); |
464 | void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, | ||
465 | u32 *num_sm_dsm_perf_regs, | ||
466 | u32 **sm_dsm_perf_regs, | ||
467 | u32 *perf_register_stride); | ||
468 | void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
469 | u32 *num_sm_dsm_perf_regs, | ||
470 | u32 **sm_dsm_perf_regs, | ||
471 | u32 *perf_register_stride); | ||
472 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); | 464 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); |
473 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); | 465 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); |
474 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); | 466 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index e5af96d2..cffc56d1 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -453,26 +453,46 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) | |||
453 | return valid; | 453 | return valid; |
454 | } | 454 | } |
455 | 455 | ||
456 | static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, | 456 | /* Following are the blocks of registers that the ucode |
457 | u32 *num_sm_dsm_perf_regs, | 457 | stores in the extended region.*/ |
458 | u32 **sm_dsm_perf_regs, | 458 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ |
459 | u32 *perf_register_stride) | 459 | static const u32 _num_sm_dsm_perf_regs; |
460 | /* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ | ||
461 | static const u32 _num_sm_dsm_perf_ctrl_regs = 2; | ||
462 | static u32 *_sm_dsm_perf_regs; | ||
463 | static u32 _sm_dsm_perf_ctrl_regs[2]; | ||
464 | |||
465 | void gr_gm20b_init_sm_dsm_reg_info(void) | ||
460 | { | 466 | { |
461 | gr_gk20a_get_sm_dsm_perf_regs(g, num_sm_dsm_perf_regs, | 467 | if (_sm_dsm_perf_ctrl_regs[0] != 0) |
462 | sm_dsm_perf_regs, | 468 | return; |
463 | perf_register_stride); | 469 | |
464 | *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); | 470 | _sm_dsm_perf_ctrl_regs[0] = |
471 | gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(); | ||
472 | _sm_dsm_perf_ctrl_regs[1] = | ||
473 | gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); | ||
465 | } | 474 | } |
466 | 475 | ||
467 | static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | 476 | void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, |
468 | u32 *num_sm_dsm_perf_regs, | 477 | u32 *num_sm_dsm_perf_regs, |
469 | u32 **sm_dsm_perf_regs, | 478 | u32 **sm_dsm_perf_regs, |
470 | u32 *ctrl_register_stride) | 479 | u32 *perf_register_stride) |
471 | { | 480 | { |
472 | gr_gk20a_get_sm_dsm_perf_ctrl_regs(g, num_sm_dsm_perf_regs, | 481 | *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; |
473 | sm_dsm_perf_regs, | 482 | *sm_dsm_perf_regs = _sm_dsm_perf_regs; |
474 | ctrl_register_stride); | 483 | *perf_register_stride = 0; |
475 | *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | 484 | } |
485 | |||
486 | void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
487 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
488 | u32 **sm_dsm_perf_ctrl_regs, | ||
489 | u32 *ctrl_register_stride) | ||
490 | { | ||
491 | *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; | ||
492 | *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; | ||
493 | |||
494 | *ctrl_register_stride = | ||
495 | ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
476 | } | 496 | } |
477 | 497 | ||
478 | static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | 498 | static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) |
@@ -1072,4 +1092,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1072 | gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; | 1092 | gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; |
1073 | gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; | 1093 | gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; |
1074 | gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; | 1094 | gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; |
1095 | gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info; | ||
1075 | } | 1096 | } |