diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 53 |
4 files changed, 48 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 695e3f69..0436c466 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -172,6 +172,7 @@ struct gpu_ops { | |||
172 | u32 (*get_max_ltc_per_fbp)(struct gk20a *g); | 172 | u32 (*get_max_ltc_per_fbp)(struct gk20a *g); |
173 | u32 (*get_max_lts_per_ltc)(struct gk20a *g); | 173 | u32 (*get_max_lts_per_ltc)(struct gk20a *g); |
174 | u32* (*get_rop_l2_en_mask)(struct gk20a *g); | 174 | u32* (*get_rop_l2_en_mask)(struct gk20a *g); |
175 | void (*init_sm_dsm_reg_info)(void); | ||
175 | } gr; | 176 | } gr; |
176 | const char *name; | 177 | const char *name; |
177 | struct { | 178 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4933d442..e4e0d163 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -5995,7 +5995,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 4; | |||
5995 | static u32 _sm_dsm_perf_regs[5]; | 5995 | static u32 _sm_dsm_perf_regs[5]; |
5996 | static u32 _sm_dsm_perf_ctrl_regs[4]; | 5996 | static u32 _sm_dsm_perf_ctrl_regs[4]; |
5997 | 5997 | ||
5998 | static void init_sm_dsm_reg_info(void) | 5998 | static void init_ovr_perf_reg_info(void) |
5999 | { | 5999 | { |
6000 | if (_ovr_perf_regs[0] != 0) | 6000 | if (_ovr_perf_regs[0] != 0) |
6001 | return; | 6001 | return; |
@@ -6017,7 +6017,12 @@ static void init_sm_dsm_reg_info(void) | |||
6017 | _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); | 6017 | _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); |
6018 | _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); | 6018 | _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); |
6019 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); | 6019 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); |
6020 | } | ||
6020 | 6021 | ||
6022 | void gr_gk20a_init_sm_dsm_reg_info(void) | ||
6023 | { | ||
6024 | if (_sm_dsm_perf_regs[0] != 0) | ||
6025 | return; | ||
6021 | 6026 | ||
6022 | _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); | 6027 | _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); |
6023 | _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); | 6028 | _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); |
@@ -6050,7 +6055,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6050 | u32 vaddr_hi; | 6055 | u32 vaddr_hi; |
6051 | u32 tmp; | 6056 | u32 tmp; |
6052 | 6057 | ||
6053 | init_sm_dsm_reg_info(); | 6058 | init_ovr_perf_reg_info(); |
6059 | g->ops.gr.init_sm_dsm_reg_info(); | ||
6054 | 6060 | ||
6055 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | 6061 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); |
6056 | 6062 | ||
@@ -6274,7 +6280,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6274 | &sm_dsm_perf_regs, | 6280 | &sm_dsm_perf_regs, |
6275 | &perf_register_stride); | 6281 | &perf_register_stride); |
6276 | 6282 | ||
6277 | init_sm_dsm_reg_info(); | 6283 | g->ops.gr.init_sm_dsm_reg_info(); |
6278 | 6284 | ||
6279 | for (i = 0; i < num_sm_dsm_perf_regs; i++) { | 6285 | for (i = 0; i < num_sm_dsm_perf_regs; i++) { |
6280 | if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { | 6286 | if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { |
@@ -7375,4 +7381,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7375 | gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; | 7381 | gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; |
7376 | gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; | 7382 | gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; |
7377 | gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; | 7383 | gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; |
7384 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; | ||
7378 | } | 7385 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 59176af8..1a55e064 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -461,14 +461,6 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g, | |||
461 | u64 addr, u32 size, bool patch); | 461 | u64 addr, u32 size, bool patch); |
462 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); | 462 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); |
463 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); | 463 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); |
464 | void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, | ||
465 | u32 *num_sm_dsm_perf_regs, | ||
466 | u32 **sm_dsm_perf_regs, | ||
467 | u32 *perf_register_stride); | ||
468 | void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
469 | u32 *num_sm_dsm_perf_regs, | ||
470 | u32 **sm_dsm_perf_regs, | ||
471 | u32 *perf_register_stride); | ||
472 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); | 464 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); |
473 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); | 465 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); |
474 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); | 466 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index e5af96d2..cffc56d1 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -453,26 +453,46 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) | |||
453 | return valid; | 453 | return valid; |
454 | } | 454 | } |
455 | 455 | ||
456 | static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, | 456 | /* Following are the blocks of registers that the ucode |
457 | u32 *num_sm_dsm_perf_regs, | 457 | stores in the extended region.*/ |
458 | u32 **sm_dsm_perf_regs, | 458 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ |
459 | u32 *perf_register_stride) | 459 | static const u32 _num_sm_dsm_perf_regs; |
460 | /* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ | ||
461 | static const u32 _num_sm_dsm_perf_ctrl_regs = 2; | ||
462 | static u32 *_sm_dsm_perf_regs; | ||
463 | static u32 _sm_dsm_perf_ctrl_regs[2]; | ||
464 | |||
465 | void gr_gm20b_init_sm_dsm_reg_info(void) | ||
460 | { | 466 | { |
461 | gr_gk20a_get_sm_dsm_perf_regs(g, num_sm_dsm_perf_regs, | 467 | if (_sm_dsm_perf_ctrl_regs[0] != 0) |
462 | sm_dsm_perf_regs, | 468 | return; |
463 | perf_register_stride); | 469 | |
464 | *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); | 470 | _sm_dsm_perf_ctrl_regs[0] = |
471 | gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(); | ||
472 | _sm_dsm_perf_ctrl_regs[1] = | ||
473 | gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); | ||
465 | } | 474 | } |
466 | 475 | ||
467 | static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | 476 | void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, |
468 | u32 *num_sm_dsm_perf_regs, | 477 | u32 *num_sm_dsm_perf_regs, |
469 | u32 **sm_dsm_perf_regs, | 478 | u32 **sm_dsm_perf_regs, |
470 | u32 *ctrl_register_stride) | 479 | u32 *perf_register_stride) |
471 | { | 480 | { |
472 | gr_gk20a_get_sm_dsm_perf_ctrl_regs(g, num_sm_dsm_perf_regs, | 481 | *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; |
473 | sm_dsm_perf_regs, | 482 | *sm_dsm_perf_regs = _sm_dsm_perf_regs; |
474 | ctrl_register_stride); | 483 | *perf_register_stride = 0; |
475 | *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | 484 | } |
485 | |||
486 | void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
487 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
488 | u32 **sm_dsm_perf_ctrl_regs, | ||
489 | u32 *ctrl_register_stride) | ||
490 | { | ||
491 | *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; | ||
492 | *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; | ||
493 | |||
494 | *ctrl_register_stride = | ||
495 | ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
476 | } | 496 | } |
477 | 497 | ||
478 | static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | 498 | static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) |
@@ -1072,4 +1092,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1072 | gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; | 1092 | gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; |
1073 | gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; | 1093 | gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; |
1074 | gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; | 1094 | gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; |
1095 | gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info; | ||
1075 | } | 1096 | } |