summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 8d8f56f6..2b242978 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -2123,6 +2123,26 @@ void gr_gv11b_detect_sm_arch(struct gk20a *g)
2123 gr_gpc0_tpc0_sm_arch_warp_count_v(v); 2123 gr_gpc0_tpc0_sm_arch_warp_count_v(v);
2124} 2124}
2125 2125
2126static u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc)
2127{
2128 u32 tpc_new = 0;
2129 u32 temp;
2130 u32 pes;
2131 struct gr_gk20a *gr = &g->gr;
2132
2133 for (pes = 0; pes < gr->gpc_ppc_count[gpc]; pes++) {
2134 if (gr->pes_tpc_mask[pes][gpc] & BIT(tpc))
2135 break;
2136 tpc_new += gr->pes_tpc_count[pes][gpc];
2137 }
2138 temp = (BIT(tpc) - 1) & gr->pes_tpc_mask[pes][gpc];
2139 temp = hweight32(temp);
2140 tpc_new += temp;
2141
2142 nvgpu_log_info(g, "tpc: %d -> new tpc: %d", tpc, tpc_new);
2143 return tpc_new;
2144}
2145
2126void gr_gv11b_program_sm_id_numbering(struct gk20a *g, 2146void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
2127 u32 gpc, u32 tpc, u32 smid) 2147 u32 gpc, u32 tpc, u32 smid)
2128{ 2148{
@@ -2133,6 +2153,8 @@ void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
2133 u32 tpc_offset = tpc_in_gpc_stride * tpc; 2153 u32 tpc_offset = tpc_in_gpc_stride * tpc;
2134 u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index; 2154 u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index;
2135 2155
2156 tpc = gr_gv11b_get_nonpes_aware_tpc(g, gpc, tpc);
2157
2136 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, 2158 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
2137 gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); 2159 gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index));
2138 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, 2160 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,