diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index cc4bbb21..e6050359 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -1674,7 +1674,95 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g) | |||
1674 | gr_gpc0_tpc0_sm_arch_sm_version_v(v); | 1674 | gr_gpc0_tpc0_sm_arch_sm_version_v(v); |
1675 | g->gpu_characteristics.sm_arch_warp_count = | 1675 | g->gpu_characteristics.sm_arch_warp_count = |
1676 | gr_gpc0_tpc0_sm_arch_warp_count_v(v); | 1676 | gr_gpc0_tpc0_sm_arch_warp_count_v(v); |
1677 | } | ||
1678 | |||
1679 | static void gr_gv11b_init_sm_id_table(struct gk20a *g) | ||
1680 | { | ||
1681 | u32 gpc, tpc; | ||
1682 | u32 sm_id = 0; | ||
1683 | |||
1684 | /* TODO populate smids based on power efficiency */ | ||
1685 | for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) { | ||
1686 | for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { | ||
1687 | |||
1688 | if (tpc < g->gr.gpc_tpc_count[gpc]) { | ||
1689 | g->gr.sm_to_cluster[sm_id].tpc_index = tpc; | ||
1690 | g->gr.sm_to_cluster[sm_id].gpc_index = gpc; | ||
1691 | g->gr.sm_to_cluster[sm_id].sm_index = sm_id % 2; | ||
1692 | g->gr.sm_to_cluster[sm_id].global_tpc_index = | ||
1693 | sm_id; | ||
1694 | sm_id++; | ||
1695 | } | ||
1696 | } | ||
1697 | } | ||
1698 | g->gr.no_of_sm = sm_id; | ||
1699 | } | ||
1700 | |||
1701 | static void gr_gv11b_program_sm_id_numbering(struct gk20a *g, | ||
1702 | u32 gpc, u32 tpc, u32 smid) | ||
1703 | { | ||
1704 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1705 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
1706 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1707 | u32 gpc_offset = gpc_stride * gpc; | ||
1708 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
1709 | u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index; | ||
1710 | |||
1711 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
1712 | gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); | ||
1713 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
1714 | gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index)); | ||
1715 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1716 | gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index)); | ||
1717 | } | ||
1718 | |||
1719 | static int gr_gv11b_load_smid_config(struct gk20a *g) | ||
1720 | { | ||
1721 | u32 *tpc_sm_id; | ||
1722 | u32 i, j; | ||
1723 | u32 tpc_index, gpc_index, tpc_id; | ||
1724 | u32 sms_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | ||
1725 | int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
1726 | |||
1727 | tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL); | ||
1728 | if (!tpc_sm_id) | ||
1729 | return -ENOMEM; | ||
1677 | 1730 | ||
1731 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ | ||
1732 | for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) { | ||
1733 | u32 reg = 0; | ||
1734 | u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + | ||
1735 | gr_cwd_gpc_tpc_id_tpc0_s(); | ||
1736 | |||
1737 | for (j = 0; j < 4; j++) { | ||
1738 | u32 sm_id; | ||
1739 | u32 bits; | ||
1740 | |||
1741 | tpc_id = (i << 2) + j; | ||
1742 | sm_id = tpc_id * sms_per_tpc; | ||
1743 | |||
1744 | if (sm_id >= g->gr.no_of_sm) | ||
1745 | break; | ||
1746 | |||
1747 | gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1748 | tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
1749 | |||
1750 | bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | | ||
1751 | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); | ||
1752 | reg |= bits << (j * bit_stride); | ||
1753 | |||
1754 | tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4) | ||
1755 | >> 2))] |= tpc_id << tpc_index * bit_stride; | ||
1756 | } | ||
1757 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); | ||
1758 | } | ||
1759 | |||
1760 | for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) | ||
1761 | gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); | ||
1762 | |||
1763 | kfree(tpc_sm_id); | ||
1764 | |||
1765 | return 0; | ||
1678 | } | 1766 | } |
1679 | 1767 | ||
1680 | static int gr_gv11b_commit_global_timeslice(struct gk20a *g, | 1768 | static int gr_gv11b_commit_global_timeslice(struct gk20a *g, |
@@ -1773,4 +1861,8 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
1773 | gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; | 1861 | gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; |
1774 | gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; | 1862 | gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; |
1775 | gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice; | 1863 | gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice; |
1864 | gops->gr.init_sm_id_table = gr_gv11b_init_sm_id_table; | ||
1865 | gops->gr.load_smid_config = gr_gv11b_load_smid_config; | ||
1866 | gops->gr.program_sm_id_numbering = | ||
1867 | gr_gv11b_program_sm_id_numbering; | ||
1776 | } | 1868 | } |