gpu: nvgpu: gv11b: smid programming

gv11b specific smid table init, smid numbering and smid programing. JIRA GV11B-21 Change-Id: I3a0f8355f2cd90ab1518cd8a5642a0e84202bdf8 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1227096 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: seshendra Gadagottu <sgadagottu@nvidia.com> 2016-09-26 11:49:39 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-11-03 12:14:57 -0400
commit: 2c23fd19ad62a58fc35ac4c08760915abc74dc63 (patch)
tree: 9f32f723f54c5cc222dd9a36c17d576cab62ad35 /drivers
parent: e1bcaa33abd46fd7e2b66ad09a02f7f30d6a0dfe (diff)
1 files changed, 92 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index cc4bbb21..e6050359 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1674,7 +1674,95 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g)
                gr_gpc0_tpc0_sm_arch_sm_version_v(v);
        g->gpu_characteristics.sm_arch_warp_count =
                gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+static void gr_gv11b_init_sm_id_table(struct gk20a *g)
+{
+        u32 gpc, tpc;
+        u32 sm_id = 0;
+        /* TODO populate smids based on power efficiency */
+        for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) {
+                for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
+                        if (tpc < g->gr.gpc_tpc_count[gpc]) {
+                                g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
+                                g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
+                                g->gr.sm_to_cluster[sm_id].sm_index = sm_id % 2;
+                                g->gr.sm_to_cluster[sm_id].global_tpc_index =
+                                                                        sm_id;
+                                sm_id++;
+                        }
+                }
+        }
+        g->gr.no_of_sm = sm_id;
+}
+static void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
+                                        u32 gpc, u32 tpc, u32 smid)
+{
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
+                                        GPU_LIT_TPC_IN_GPC_STRIDE);
+        u32 gpc_offset = gpc_stride * gpc;
+        u32 tpc_offset = tpc_in_gpc_stride * tpc;
+        u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index;
+        gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
+                gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index));
+        gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
+                        gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index));
+        gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
+                        gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index));
+}
+static int gr_gv11b_load_smid_config(struct gk20a *g)
+{
+        u32 *tpc_sm_id;
+        u32 i, j;
+        u32 tpc_index, gpc_index, tpc_id;
+        u32 sms_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
+        int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
+        tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL);
+        if (!tpc_sm_id)
+                return -ENOMEM;
+        /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
+        for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
+                u32 reg = 0;
+                u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
+                                 gr_cwd_gpc_tpc_id_tpc0_s();
+                for (j = 0; j < 4; j++) {
+                        u32 sm_id;
+                        u32 bits;
+                        tpc_id = (i << 2) + j;
+                        sm_id = tpc_id * sms_per_tpc;
+                        if (sm_id >= g->gr.no_of_sm)
+                                break;
+                        gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
+                        tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
+                        bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
+                                gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
+                        reg |= bits << (j * bit_stride);
+                        tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4)
+                                 >> 2))] |= tpc_id << tpc_index * bit_stride;
+                }
+                gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
+        }
+        for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
+                gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
+        kfree(tpc_sm_id);
+        return 0;
 }
 static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
@@ -1773,4 +1861,8 @@ void gv11b_init_gr(struct gpu_ops *gops)
        gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle;
        gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping;
        gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice;
+        gops->gr.init_sm_id_table = gr_gv11b_init_sm_id_table;
+        gops->gr.load_smid_config = gr_gv11b_load_smid_config;
+        gops->gr.program_sm_id_numbering =
+                        gr_gv11b_program_sm_id_numbering;
 }
author	seshendra Gadagottu <sgadagottu@nvidia.com>	2016-09-26 11:49:39 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-11-03 12:14:57 -0400
commit	2c23fd19ad62a58fc35ac4c08760915abc74dc63 (patch)
tree	9f32f723f54c5cc222dd9a36c17d576cab62ad35 /drivers
parent	e1bcaa33abd46fd7e2b66ad09a02f7f30d6a0dfe (diff)

diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index cc4bbb21..e6050359 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1674,7 +1674,95 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g)
1674	gr_gpc0_tpc0_sm_arch_sm_version_v(v);	1674	gr_gpc0_tpc0_sm_arch_sm_version_v(v);
1675	g->gpu_characteristics.sm_arch_warp_count =	1675	g->gpu_characteristics.sm_arch_warp_count =
1676	gr_gpc0_tpc0_sm_arch_warp_count_v(v);	1676	gr_gpc0_tpc0_sm_arch_warp_count_v(v);
		1677	}
		1678
		1679	static void gr_gv11b_init_sm_id_table(struct gk20a *g)
		1680	{
		1681	u32 gpc, tpc;
		1682	u32 sm_id = 0;
		1683
		1684	/* TODO populate smids based on power efficiency */
		1685	for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) {
		1686	for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
		1687
		1688	if (tpc < g->gr.gpc_tpc_count[gpc]) {
		1689	g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
		1690	g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
		1691	g->gr.sm_to_cluster[sm_id].sm_index = sm_id % 2;
		1692	g->gr.sm_to_cluster[sm_id].global_tpc_index =
		1693	sm_id;
		1694	sm_id++;
		1695	}
		1696	}
		1697	}
		1698	g->gr.no_of_sm = sm_id;
		1699	}
		1700
		1701	static void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
		1702	u32 gpc, u32 tpc, u32 smid)
		1703	{
		1704	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
		1705	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
		1706	GPU_LIT_TPC_IN_GPC_STRIDE);
		1707	u32 gpc_offset = gpc_stride * gpc;
		1708	u32 tpc_offset = tpc_in_gpc_stride * tpc;
		1709	u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index;
		1710
		1711	gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
		1712	gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index));
		1713	gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
		1714	gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index));
		1715	gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
		1716	gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index));
		1717	}
		1718
		1719	static int gr_gv11b_load_smid_config(struct gk20a *g)
		1720	{
		1721	u32 *tpc_sm_id;
		1722	u32 i, j;
		1723	u32 tpc_index, gpc_index, tpc_id;
		1724	u32 sms_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
		1725	int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
		1726
		1727	tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL);
		1728	if (!tpc_sm_id)
		1729	return -ENOMEM;
1677		1730
		1731	/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
		1732	for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
		1733	u32 reg = 0;
		1734	u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
		1735	gr_cwd_gpc_tpc_id_tpc0_s();
		1736
		1737	for (j = 0; j < 4; j++) {
		1738	u32 sm_id;
		1739	u32 bits;
		1740
		1741	tpc_id = (i << 2) + j;
		1742	sm_id = tpc_id * sms_per_tpc;
		1743
		1744	if (sm_id >= g->gr.no_of_sm)
		1745	break;
		1746
		1747	gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
		1748	tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
		1749
		1750	bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) \|
		1751	gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
		1752	reg \|= bits << (j * bit_stride);
		1753
		1754	tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4)
		1755	>> 2))] \|= tpc_id << tpc_index * bit_stride;
		1756	}
		1757	gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
		1758	}
		1759
		1760	for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
		1761	gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
		1762
		1763	kfree(tpc_sm_id);
		1764
		1765	return 0;
1678	}	1766	}
1679		1767
1680	static int gr_gv11b_commit_global_timeslice(struct gk20a *g,	1768	static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
@@ -1773,4 +1861,8 @@ void gv11b_init_gr(struct gpu_ops *gops)
1773	gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle;	1861	gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle;
1774	gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping;	1862	gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping;
1775	gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice;	1863	gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice;
		1864	gops->gr.init_sm_id_table = gr_gv11b_init_sm_id_table;
		1865	gops->gr.load_smid_config = gr_gv11b_load_smid_config;
		1866	gops->gr.program_sm_id_numbering =
		1867	gr_gv11b_program_sm_id_numbering;
1776	}	1868	}