From ff1b2fc1e84fbac147e144d3c8d80104b7eca5e9 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Fri, 7 Nov 2014 18:25:48 +0530 Subject: gpu: nvgpu: fix gm20b floorsweep API Rewrite gr_gm20b_ctx_state_floorsweep() to include necessary register writes for gm20b tpc floorsweeping. This includes : - update the loop to write gr_gpc0_tpc0_sm_cfg_r() and gr_gpc0_gpm_pd_sm_id_r() - for gr_pd_num_tpc_per_gpc_r(i), we just need to write register with i = 0 and the value being written is tpc count in that gpc - gr_fe_tpc_fs_r() needs to have logical list of TPCs after floorsweeping. Get this value from pes_tpc_mask. - gr_cwd_gpc_tpc_id_tpc0_f() and gr_cwd_sm_id_tpc0_f() also refer to logical ids and hence no need to check tpc_fs_mask to configure these registers Bug 1513685 Change-Id: I82dc36a223fbd21e814e58e4d67738d7c63f04a7 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/601117 Reviewed-by: Sachin Nikam --- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 92 +++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 52 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index a6b54ea5..93ad5e8e 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -519,52 +519,38 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) struct gr_gk20a *gr = &g->gr; u32 tpc_index, gpc_index; u32 tpc_offset, gpc_offset; - u32 sm_id = 0, gpc_id = 0; - u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; - u32 tpc_per_gpc; - u32 tpc_fs_mask = 0, tpc_sm_id = 0, gpc_tpc_id = 0; + u32 sm_id = 0; + u32 tpc_per_gpc = 0; + u32 tpc_sm_id = 0, gpc_tpc_id = 0; + u32 pes_tpc_mask = 0, pes_index; gk20a_dbg_fn(""); - for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - gpc_offset = proj_gpc_stride_v() * gpc_index; - if (tpc_index < gr->gpc_tpc_count[gpc_index]) { - tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; - - gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); - gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset, - gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); - gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); - - sm_id_to_gpc_id[sm_id] = gpc_index; - sm_id++; - } + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + gpc_offset = proj_gpc_stride_v() * gpc_index; + for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; + tpc_index++) { + tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; + + gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + + gpc_offset + tpc_offset, + gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); + gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + + gpc_offset, + gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); + gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + + gpc_offset + tpc_offset, + gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); + + sm_id++; } } - for (tpc_index = 0, gpc_id = 0; - tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); - tpc_index++, gpc_id += 8) { - - if (gpc_id >= gr->gpc_count) - gpc_id = 0; - - tpc_per_gpc = - gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | - gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) | - gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) | - gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) | - gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) | - gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) | - gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) | - gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]); - - gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); - gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); - } + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + tpc_per_gpc |= gr->gpc_tpc_count[gpc_index] + << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index); + gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(0), tpc_per_gpc); + gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(0), tpc_per_gpc); /* gr__setup_pd_mapping stubbed for gk20a */ gr_gk20a_setup_rop_mapping(g, gr); @@ -593,20 +579,22 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) gk20a_readl(g, gr_be0_crop_debug3_r()) | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - tpc_fs_mask |= gr->gpc_tpc_mask[gpc_index] << - (gr->max_tpc_per_gpc_count * gpc_index); + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + for (pes_index = 0; pes_index < gr->pe_count_per_gpc; + pes_index++) + pes_tpc_mask |= gr->pes_tpc_mask[pes_index][gpc_index]; + gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); + + for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) { + if (tpc_index == 0) { + gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); + tpc_sm_id |= gr_cwd_sm_id_tpc0_f(tpc_index); + } else if (tpc_index == 1) { + gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(tpc_index); + tpc_sm_id |= gr_cwd_sm_id_tpc1_f(tpc_index); + } } - gk20a_writel(g, gr_fe_tpc_fs_r(), tpc_fs_mask); - if (tpc_fs_mask & (0x1 << 0)) { - tpc_sm_id |= gr_cwd_sm_id_tpc0_f(0); - gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(0); - } - if (tpc_fs_mask & (0x1 << 1)) { - gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(1); - tpc_sm_id |= gr_cwd_sm_id_tpc1_f(1); - } /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs. * Since we know TPC number is less than 5. We select * index 0 directly. */ -- cgit v1.2.2