summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2016-04-25 17:10:40 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-27 11:12:23 -0400
commit2db5e4794e37952bdbd2882c22ba810a45e9ea84 (patch)
tree1f075eaf5daf71879e87fa4ef1bee4c922279fd8 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent6d888006aa7ed87b1589198369180e7e69f9f1d2 (diff)
gpu: nvgpu: Fix floorsweeping for multi-GPC GPU
There were multiple bugs in dealing with a GPU with more than one GPC. * Beta CB size was set to wrong PPC * TPC mask did not shift fields correctly * PD skip table used || instead of | operator Change-Id: I849e2331a943586df16996fe573da2a0ac4cce19 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1132109
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c52
1 files changed, 32 insertions, 20 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 5b00078f..35bbe70c 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -175,10 +175,11 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
175 u32 alpha_offset_in_chunk = 0; 175 u32 alpha_offset_in_chunk = 0;
176 u32 pd_ab_max_output; 176 u32 pd_ab_max_output;
177 u32 gpc_index, ppc_index; 177 u32 gpc_index, ppc_index;
178 u32 temp;
179 u32 cbm_cfg_size1, cbm_cfg_size2; 178 u32 cbm_cfg_size1, cbm_cfg_size2;
180 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 179 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
181 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); 180 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
181 u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
182 GPU_LIT_NUM_PES_PER_GPC);
182 183
183 gk20a_dbg_fn(""); 184 gk20a_dbg_fn("");
184 185
@@ -199,7 +200,8 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
199 gr->tpc_count * gr->attrib_cb_size; 200 gr->tpc_count * gr->attrib_cb_size;
200 201
201 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 202 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
202 temp = gpc_stride * gpc_index; 203 u32 temp = gpc_stride * gpc_index;
204 u32 temp2 = num_pes_per_gpc * gpc_index;
203 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 205 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
204 ppc_index++) { 206 ppc_index++) {
205 cbm_cfg_size1 = gr->attrib_cb_default_size * 207 cbm_cfg_size1 = gr->attrib_cb_default_size *
@@ -234,7 +236,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
234 gr->pes_tpc_count[ppc_index][gpc_index]; 236 gr->pes_tpc_count[ppc_index][gpc_index];
235 237
236 gr_gk20a_ctx_patch_write(g, ch_ctx, 238 gr_gk20a_ctx_patch_write(g, ch_ctx,
237 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + gpc_index), 239 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
238 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) | 240 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) |
239 gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3), 241 gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3),
240 patch); 242 patch);
@@ -523,6 +525,28 @@ static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
523 } 525 }
524} 526}
525 527
528static void gr_gm20b_load_tpc_mask(struct gk20a *g)
529{
530 u32 pes_tpc_mask = 0;
531 u32 gpc, pes;
532 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
533
534 for (gpc = 0; gpc < g->gr.gpc_count; gpc++)
535 for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) {
536 pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] <<
537 num_tpc_per_gpc * gpc;
538 }
539
540 if (g->tpc_fs_mask_user && g->ops.gr.get_gpc_tpc_mask(g, 0) ==
541 (0x1 << g->gr.max_tpc_count) - 1) {
542 u32 val = g->tpc_fs_mask_user;
543 val &= (0x1 << g->gr.max_tpc_count) - 1;
544 gk20a_writel(g, gr_fe_tpc_fs_r(), val);
545 } else {
546 gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask);
547 }
548}
549
526int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) 550int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
527{ 551{
528 struct gr_gk20a *gr = &g->gr; 552 struct gr_gk20a *gr = &g->gr;
@@ -531,7 +555,6 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
531 u32 sm_id = 0; 555 u32 sm_id = 0;
532 u32 tpc_per_gpc = 0; 556 u32 tpc_per_gpc = 0;
533 u32 tpc_sm_id = 0, gpc_tpc_id = 0; 557 u32 tpc_sm_id = 0, gpc_tpc_id = 0;
534 u32 pes_tpc_mask = 0, pes_index;
535 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 558 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
536 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 559 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
537 560
@@ -576,9 +599,9 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
576 gpc_index += 4) { 599 gpc_index += 4) {
577 600
578 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), 601 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
579 gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) || 602 gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) |
580 gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) || 603 gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) |
581 gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) || 604 gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) |
582 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); 605 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
583 } 606 }
584 607
@@ -586,6 +609,8 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
586 gr_cwd_fs_num_gpcs_f(gr->gpc_count) | 609 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
587 gr_cwd_fs_num_tpcs_f(gr->tpc_count)); 610 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
588 611
612 gr_gm20b_load_tpc_mask(g);
613
589 gk20a_writel(g, gr_bes_zrop_settings_r(), 614 gk20a_writel(g, gr_bes_zrop_settings_r(),
590 gr_bes_zrop_settings_num_active_ltcs_f(gr->num_fbps)); 615 gr_bes_zrop_settings_num_active_ltcs_f(gr->num_fbps));
591 gk20a_writel(g, gr_bes_crop_settings_r(), 616 gk20a_writel(g, gr_bes_crop_settings_r(),
@@ -595,19 +620,6 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
595 gk20a_readl(g, gr_be0_crop_debug3_r()) | 620 gk20a_readl(g, gr_be0_crop_debug3_r()) |
596 gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); 621 gr_bes_crop_debug3_comp_vdc_4to2_disable_m());
597 622
598 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
599 for (pes_index = 0; pes_index < gr->pe_count_per_gpc;
600 pes_index++)
601 pes_tpc_mask |= gr->pes_tpc_mask[pes_index][gpc_index];
602 if (g->tpc_fs_mask_user && g->ops.gr.get_gpc_tpc_mask(g, 0) ==
603 (0x1 << gr->max_tpc_count) - 1) {
604 u32 val = g->tpc_fs_mask_user;
605 val &= (0x1 << gr->max_tpc_count) - 1;
606 gk20a_writel(g, gr_fe_tpc_fs_r(), val);
607 } else {
608 gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask);
609 }
610
611 for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) { 623 for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) {
612 if (tpc_index == 0) { 624 if (tpc_index == 0) {
613 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); 625 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);