diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 52 |
1 files changed, 32 insertions, 20 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 5b00078f..35bbe70c 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -175,10 +175,11 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
175 | u32 alpha_offset_in_chunk = 0; | 175 | u32 alpha_offset_in_chunk = 0; |
176 | u32 pd_ab_max_output; | 176 | u32 pd_ab_max_output; |
177 | u32 gpc_index, ppc_index; | 177 | u32 gpc_index, ppc_index; |
178 | u32 temp; | ||
179 | u32 cbm_cfg_size1, cbm_cfg_size2; | 178 | u32 cbm_cfg_size1, cbm_cfg_size2; |
180 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 179 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
181 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | 180 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); |
181 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, | ||
182 | GPU_LIT_NUM_PES_PER_GPC); | ||
182 | 183 | ||
183 | gk20a_dbg_fn(""); | 184 | gk20a_dbg_fn(""); |
184 | 185 | ||
@@ -199,7 +200,8 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
199 | gr->tpc_count * gr->attrib_cb_size; | 200 | gr->tpc_count * gr->attrib_cb_size; |
200 | 201 | ||
201 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 202 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
202 | temp = gpc_stride * gpc_index; | 203 | u32 temp = gpc_stride * gpc_index; |
204 | u32 temp2 = num_pes_per_gpc * gpc_index; | ||
203 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 205 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
204 | ppc_index++) { | 206 | ppc_index++) { |
205 | cbm_cfg_size1 = gr->attrib_cb_default_size * | 207 | cbm_cfg_size1 = gr->attrib_cb_default_size * |
@@ -234,7 +236,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
234 | gr->pes_tpc_count[ppc_index][gpc_index]; | 236 | gr->pes_tpc_count[ppc_index][gpc_index]; |
235 | 237 | ||
236 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 238 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
237 | gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + gpc_index), | 239 | gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), |
238 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) | | 240 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) | |
239 | gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3), | 241 | gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3), |
240 | patch); | 242 | patch); |
@@ -523,6 +525,28 @@ static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | |||
523 | } | 525 | } |
524 | } | 526 | } |
525 | 527 | ||
528 | static void gr_gm20b_load_tpc_mask(struct gk20a *g) | ||
529 | { | ||
530 | u32 pes_tpc_mask = 0; | ||
531 | u32 gpc, pes; | ||
532 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
533 | |||
534 | for (gpc = 0; gpc < g->gr.gpc_count; gpc++) | ||
535 | for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { | ||
536 | pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << | ||
537 | num_tpc_per_gpc * gpc; | ||
538 | } | ||
539 | |||
540 | if (g->tpc_fs_mask_user && g->ops.gr.get_gpc_tpc_mask(g, 0) == | ||
541 | (0x1 << g->gr.max_tpc_count) - 1) { | ||
542 | u32 val = g->tpc_fs_mask_user; | ||
543 | val &= (0x1 << g->gr.max_tpc_count) - 1; | ||
544 | gk20a_writel(g, gr_fe_tpc_fs_r(), val); | ||
545 | } else { | ||
546 | gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); | ||
547 | } | ||
548 | } | ||
549 | |||
526 | int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | 550 | int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) |
527 | { | 551 | { |
528 | struct gr_gk20a *gr = &g->gr; | 552 | struct gr_gk20a *gr = &g->gr; |
@@ -531,7 +555,6 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
531 | u32 sm_id = 0; | 555 | u32 sm_id = 0; |
532 | u32 tpc_per_gpc = 0; | 556 | u32 tpc_per_gpc = 0; |
533 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; | 557 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; |
534 | u32 pes_tpc_mask = 0, pes_index; | ||
535 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 558 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
536 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 559 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
537 | 560 | ||
@@ -576,9 +599,9 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
576 | gpc_index += 4) { | 599 | gpc_index += 4) { |
577 | 600 | ||
578 | gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), | 601 | gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), |
579 | gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) || | 602 | gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) | |
580 | gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) || | 603 | gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) | |
581 | gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) || | 604 | gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) | |
582 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); | 605 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); |
583 | } | 606 | } |
584 | 607 | ||
@@ -586,6 +609,8 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
586 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | 609 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | |
587 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | 610 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); |
588 | 611 | ||
612 | gr_gm20b_load_tpc_mask(g); | ||
613 | |||
589 | gk20a_writel(g, gr_bes_zrop_settings_r(), | 614 | gk20a_writel(g, gr_bes_zrop_settings_r(), |
590 | gr_bes_zrop_settings_num_active_ltcs_f(gr->num_fbps)); | 615 | gr_bes_zrop_settings_num_active_ltcs_f(gr->num_fbps)); |
591 | gk20a_writel(g, gr_bes_crop_settings_r(), | 616 | gk20a_writel(g, gr_bes_crop_settings_r(), |
@@ -595,19 +620,6 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
595 | gk20a_readl(g, gr_be0_crop_debug3_r()) | | 620 | gk20a_readl(g, gr_be0_crop_debug3_r()) | |
596 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); | 621 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); |
597 | 622 | ||
598 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | ||
599 | for (pes_index = 0; pes_index < gr->pe_count_per_gpc; | ||
600 | pes_index++) | ||
601 | pes_tpc_mask |= gr->pes_tpc_mask[pes_index][gpc_index]; | ||
602 | if (g->tpc_fs_mask_user && g->ops.gr.get_gpc_tpc_mask(g, 0) == | ||
603 | (0x1 << gr->max_tpc_count) - 1) { | ||
604 | u32 val = g->tpc_fs_mask_user; | ||
605 | val &= (0x1 << gr->max_tpc_count) - 1; | ||
606 | gk20a_writel(g, gr_fe_tpc_fs_r(), val); | ||
607 | } else { | ||
608 | gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); | ||
609 | } | ||
610 | |||
611 | for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) { | 623 | for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) { |
612 | if (tpc_index == 0) { | 624 | if (tpc_index == 0) { |
613 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); | 625 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); |