From 2c23fd19ad62a58fc35ac4c08760915abc74dc63 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 26 Sep 2016 08:49:39 -0700 Subject: gpu: nvgpu: gv11b: smid programming gv11b specific smid table init, smid numbering and smid programing. JIRA GV11B-21 Change-Id: I3a0f8355f2cd90ab1518cd8a5642a0e84202bdf8 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1227096 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 92 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index cc4bbb21..e6050359 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1674,7 +1674,95 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_sm_version_v(v); g->gpu_characteristics.sm_arch_warp_count = gr_gpc0_tpc0_sm_arch_warp_count_v(v); +} + +static void gr_gv11b_init_sm_id_table(struct gk20a *g) +{ + u32 gpc, tpc; + u32 sm_id = 0; + + /* TODO populate smids based on power efficiency */ + for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) { + for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + + if (tpc < g->gr.gpc_tpc_count[gpc]) { + g->gr.sm_to_cluster[sm_id].tpc_index = tpc; + g->gr.sm_to_cluster[sm_id].gpc_index = gpc; + g->gr.sm_to_cluster[sm_id].sm_index = sm_id % 2; + g->gr.sm_to_cluster[sm_id].global_tpc_index = + sm_id; + sm_id++; + } + } + } + g->gr.no_of_sm = sm_id; +} + +static void gr_gv11b_program_sm_id_numbering(struct gk20a *g, + u32 gpc, u32 tpc, u32 smid) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_STRIDE); + u32 gpc_offset = gpc_stride * gpc; + u32 tpc_offset = tpc_in_gpc_stride * tpc; + u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index; + + gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); + gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, + gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index)); + gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index)); +} + +static int gr_gv11b_load_smid_config(struct gk20a *g) +{ + u32 *tpc_sm_id; + u32 i, j; + u32 tpc_index, gpc_index, tpc_id; + u32 sms_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + + tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL); + if (!tpc_sm_id) + return -ENOMEM; + /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ + for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) { + u32 reg = 0; + u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + + gr_cwd_gpc_tpc_id_tpc0_s(); + + for (j = 0; j < 4; j++) { + u32 sm_id; + u32 bits; + + tpc_id = (i << 2) + j; + sm_id = tpc_id * sms_per_tpc; + + if (sm_id >= g->gr.no_of_sm) + break; + + gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; + + bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | + gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); + reg |= bits << (j * bit_stride); + + tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4) + >> 2))] |= tpc_id << tpc_index * bit_stride; + } + gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); + } + + for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) + gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); + + kfree(tpc_sm_id); + + return 0; } static int gr_gv11b_commit_global_timeslice(struct gk20a *g, @@ -1773,4 +1861,8 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice; + gops->gr.init_sm_id_table = gr_gv11b_init_sm_id_table; + gops->gr.load_smid_config = gr_gv11b_load_smid_config; + gops->gr.program_sm_id_numbering = + gr_gv11b_program_sm_id_numbering; } -- cgit v1.2.2