summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2016-09-26 11:49:39 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-11-03 12:14:57 -0400
commit2c23fd19ad62a58fc35ac4c08760915abc74dc63 (patch)
tree9f32f723f54c5cc222dd9a36c17d576cab62ad35 /drivers
parente1bcaa33abd46fd7e2b66ad09a02f7f30d6a0dfe (diff)
gpu: nvgpu: gv11b: smid programming
gv11b specific smid table init, smid numbering and smid programing. JIRA GV11B-21 Change-Id: I3a0f8355f2cd90ab1518cd8a5642a0e84202bdf8 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1227096 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c92
1 files changed, 92 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index cc4bbb21..e6050359 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1674,7 +1674,95 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g)
1674 gr_gpc0_tpc0_sm_arch_sm_version_v(v); 1674 gr_gpc0_tpc0_sm_arch_sm_version_v(v);
1675 g->gpu_characteristics.sm_arch_warp_count = 1675 g->gpu_characteristics.sm_arch_warp_count =
1676 gr_gpc0_tpc0_sm_arch_warp_count_v(v); 1676 gr_gpc0_tpc0_sm_arch_warp_count_v(v);
1677}
1678
1679static void gr_gv11b_init_sm_id_table(struct gk20a *g)
1680{
1681 u32 gpc, tpc;
1682 u32 sm_id = 0;
1683
1684 /* TODO populate smids based on power efficiency */
1685 for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) {
1686 for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
1687
1688 if (tpc < g->gr.gpc_tpc_count[gpc]) {
1689 g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
1690 g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
1691 g->gr.sm_to_cluster[sm_id].sm_index = sm_id % 2;
1692 g->gr.sm_to_cluster[sm_id].global_tpc_index =
1693 sm_id;
1694 sm_id++;
1695 }
1696 }
1697 }
1698 g->gr.no_of_sm = sm_id;
1699}
1700
1701static void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
1702 u32 gpc, u32 tpc, u32 smid)
1703{
1704 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1705 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1706 GPU_LIT_TPC_IN_GPC_STRIDE);
1707 u32 gpc_offset = gpc_stride * gpc;
1708 u32 tpc_offset = tpc_in_gpc_stride * tpc;
1709 u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index;
1710
1711 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
1712 gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index));
1713 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
1714 gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index));
1715 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
1716 gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index));
1717}
1718
1719static int gr_gv11b_load_smid_config(struct gk20a *g)
1720{
1721 u32 *tpc_sm_id;
1722 u32 i, j;
1723 u32 tpc_index, gpc_index, tpc_id;
1724 u32 sms_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
1725 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
1726
1727 tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL);
1728 if (!tpc_sm_id)
1729 return -ENOMEM;
1677 1730
1731 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
1732 for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
1733 u32 reg = 0;
1734 u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
1735 gr_cwd_gpc_tpc_id_tpc0_s();
1736
1737 for (j = 0; j < 4; j++) {
1738 u32 sm_id;
1739 u32 bits;
1740
1741 tpc_id = (i << 2) + j;
1742 sm_id = tpc_id * sms_per_tpc;
1743
1744 if (sm_id >= g->gr.no_of_sm)
1745 break;
1746
1747 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
1748 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
1749
1750 bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
1751 gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
1752 reg |= bits << (j * bit_stride);
1753
1754 tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4)
1755 >> 2))] |= tpc_id << tpc_index * bit_stride;
1756 }
1757 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
1758 }
1759
1760 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
1761 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
1762
1763 kfree(tpc_sm_id);
1764
1765 return 0;
1678} 1766}
1679 1767
1680static int gr_gv11b_commit_global_timeslice(struct gk20a *g, 1768static int gr_gv11b_commit_global_timeslice(struct gk20a *g,
@@ -1773,4 +1861,8 @@ void gv11b_init_gr(struct gpu_ops *gops)
1773 gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; 1861 gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle;
1774 gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; 1862 gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping;
1775 gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice; 1863 gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice;
1864 gops->gr.init_sm_id_table = gr_gv11b_init_sm_id_table;
1865 gops->gr.load_smid_config = gr_gv11b_load_smid_config;
1866 gops->gr.program_sm_id_numbering =
1867 gr_gv11b_program_sm_id_numbering;
1776} 1868}