summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c166
1 files changed, 41 insertions, 125 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index e9b209c4..a160942f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -567,7 +567,7 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
567 567
568 gk20a_dbg_fn(""); 568 gk20a_dbg_fn("");
569 569
570 inst_ptr = c->inst_block.cpuva; 570 inst_ptr = c->inst_block.cpu_va;
571 if (!inst_ptr) 571 if (!inst_ptr)
572 return -ENOMEM; 572 return -ENOMEM;
573 573
@@ -674,7 +674,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
674static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, 674static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
675 struct channel_gk20a *c) 675 struct channel_gk20a *c)
676{ 676{
677 u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa 677 u32 inst_base_ptr = u64_lo32(gk20a_mem_phys(&c->inst_block)
678 >> ram_in_base_shift_v()); 678 >> ram_in_base_shift_v());
679 u32 ret; 679 u32 ret;
680 680
@@ -1375,7 +1375,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
1375 int ret; 1375 int ret;
1376 1376
1377 u32 inst_base_ptr = 1377 u32 inst_base_ptr =
1378 u64_lo32(c->inst_block.cpu_pa 1378 u64_lo32(gk20a_mem_phys(&c->inst_block)
1379 >> ram_in_base_shift_v()); 1379 >> ram_in_base_shift_v());
1380 1380
1381 1381
@@ -1671,7 +1671,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1671 1671
1672 if (tegra_platform_is_linsim()) { 1672 if (tegra_platform_is_linsim()) {
1673 u32 inst_base_ptr = 1673 u32 inst_base_ptr =
1674 u64_lo32(c->inst_block.cpu_pa 1674 u64_lo32(gk20a_mem_phys(&c->inst_block)
1675 >> ram_in_base_shift_v()); 1675 >> ram_in_base_shift_v());
1676 1676
1677 ret = gr_gk20a_submit_fecs_method_op(g, 1677 ret = gr_gk20a_submit_fecs_method_op(g,
@@ -1729,12 +1729,12 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
1729 gk20a_init_inst_block(&ucode_info->inst_blk_desc, vm, 0); 1729 gk20a_init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
1730 1730
1731 /* Map ucode surface to GMMU */ 1731 /* Map ucode surface to GMMU */
1732 ucode_info->ucode_gpuva = gk20a_gmmu_map(vm, 1732 ucode_info->surface_desc.gpu_va = gk20a_gmmu_map(vm,
1733 &ucode_info->surface_desc.sgt, 1733 &ucode_info->surface_desc.sgt,
1734 ucode_info->surface_desc.size, 1734 ucode_info->surface_desc.size,
1735 0, /* flags */ 1735 0, /* flags */
1736 gk20a_mem_flag_read_only); 1736 gk20a_mem_flag_read_only);
1737 if (!ucode_info->ucode_gpuva) { 1737 if (!ucode_info->surface_desc.gpu_va) {
1738 gk20a_err(d, "failed to update gmmu ptes\n"); 1738 gk20a_err(d, "failed to update gmmu ptes\n");
1739 return -ENOMEM; 1739 return -ENOMEM;
1740 } 1740 }
@@ -1798,8 +1798,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
1798 u8 *buf; 1798 u8 *buf;
1799 u32 ucode_size; 1799 u32 ucode_size;
1800 int err = 0; 1800 int err = 0;
1801 dma_addr_t iova;
1802 DEFINE_DMA_ATTRS(attrs);
1803 1801
1804 fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE); 1802 fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE);
1805 if (!fecs_fw) { 1803 if (!fecs_fw) {
@@ -1832,30 +1830,12 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
1832 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), 1830 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
1833 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); 1831 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
1834 1832
1835 ucode_info->surface_desc.size = ucode_size; 1833 err = gk20a_gmmu_alloc_attr(g, DMA_ATTR_READ_ONLY, ucode_size,
1836 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); 1834 &ucode_info->surface_desc);
1837 ucode_info->surface_desc.cpuva = dma_alloc_attrs(d, 1835 if (err)
1838 ucode_info->surface_desc.size,
1839 &iova,
1840 GFP_KERNEL,
1841 &attrs);
1842 if (!ucode_info->surface_desc.cpuva) {
1843 gk20a_err(d, "memory allocation failed\n");
1844 err = -ENOMEM;
1845 goto clean_up;
1846 }
1847
1848 ucode_info->surface_desc.iova = iova;
1849 err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt,
1850 ucode_info->surface_desc.cpuva,
1851 ucode_info->surface_desc.iova,
1852 ucode_info->surface_desc.size);
1853 if (err) {
1854 gk20a_err(d, "failed to create sg table\n");
1855 goto clean_up; 1836 goto clean_up;
1856 }
1857 1837
1858 buf = (u8 *)ucode_info->surface_desc.cpuva; 1838 buf = (u8 *)ucode_info->surface_desc.cpu_va;
1859 if (!buf) { 1839 if (!buf) {
1860 gk20a_err(d, "failed to map surface desc buffer"); 1840 gk20a_err(d, "failed to map surface desc buffer");
1861 err = -ENOMEM; 1841 err = -ENOMEM;
@@ -1882,23 +1862,13 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
1882 if (err) 1862 if (err)
1883 goto clean_up; 1863 goto clean_up;
1884 1864
1885 gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
1886
1887 return 0; 1865 return 0;
1888 1866
1889 clean_up: 1867 clean_up:
1890 if (ucode_info->ucode_gpuva) 1868 if (ucode_info->surface_desc.gpu_va)
1891 gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva, 1869 gk20a_gmmu_unmap(vm, ucode_info->surface_desc.gpu_va,
1892 ucode_info->surface_desc.size, gk20a_mem_flag_none); 1870 ucode_info->surface_desc.size, gk20a_mem_flag_none);
1893 if (ucode_info->surface_desc.sgt) 1871 gk20a_gmmu_free(g, &ucode_info->surface_desc);
1894 gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
1895 if (ucode_info->surface_desc.cpuva)
1896 dma_free_attrs(d, ucode_info->surface_desc.size,
1897 ucode_info->surface_desc.cpuva,
1898 ucode_info->surface_desc.iova,
1899 &attrs);
1900 ucode_info->surface_desc.cpuva = NULL;
1901 ucode_info->surface_desc.iova = 0;
1902 1872
1903 release_firmware(gpccs_fw); 1873 release_firmware(gpccs_fw);
1904 gpccs_fw = NULL; 1874 gpccs_fw = NULL;
@@ -1928,7 +1898,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
1928 1898
1929 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); 1899 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
1930 1900
1931 inst_ptr = ucode_info->inst_blk_desc.cpu_pa; 1901 inst_ptr = gk20a_mem_phys(&ucode_info->inst_blk_desc);
1932 gk20a_writel(g, gr_fecs_new_ctx_r(), 1902 gk20a_writel(g, gr_fecs_new_ctx_r(),
1933 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | 1903 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
1934 gr_fecs_new_ctx_target_m() | 1904 gr_fecs_new_ctx_target_m() |
@@ -2111,7 +2081,7 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
2111static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) 2081static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
2112{ 2082{
2113 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; 2083 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2114 u64 addr_base = ucode_info->ucode_gpuva; 2084 u64 addr_base = ucode_info->surface_desc.gpu_va;
2115 2085
2116 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); 2086 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
2117 2087
@@ -2128,6 +2098,7 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
2128 2098
2129int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) 2099int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
2130{ 2100{
2101 int err;
2131 2102
2132 gk20a_dbg_fn(""); 2103 gk20a_dbg_fn("");
2133 2104
@@ -2147,8 +2118,12 @@ int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
2147 gr_gk20a_load_falcon_imem(g); 2118 gr_gk20a_load_falcon_imem(g);
2148 gr_gk20a_start_falcon_ucode(g); 2119 gr_gk20a_start_falcon_ucode(g);
2149 } else { 2120 } else {
2150 if (!g->gr.skip_ucode_init) 2121 if (!g->gr.skip_ucode_init) {
2151 gr_gk20a_init_ctxsw_ucode(g); 2122 err = gr_gk20a_init_ctxsw_ucode(g);
2123
2124 if (err)
2125 return err;
2126 }
2152 gr_gk20a_load_falcon_with_bootloader(g); 2127 gr_gk20a_load_falcon_with_bootloader(g);
2153 g->gr.skip_ucode_init = true; 2128 g->gr.skip_ucode_init = true;
2154 } 2129 }
@@ -2976,21 +2951,13 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
2976 2951
2977 gr_gk20a_free_global_ctx_buffers(g); 2952 gr_gk20a_free_global_ctx_buffers(g);
2978 2953
2979 dma_free_coherent(d, gr->mmu_wr_mem.size, 2954 gk20a_gmmu_free(g, &gr->mmu_wr_mem);
2980 gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); 2955 gk20a_gmmu_free(g, &gr->mmu_rd_mem);
2981 gr->mmu_wr_mem.cpuva = NULL;
2982 gr->mmu_wr_mem.iova = 0;
2983 dma_free_coherent(d, gr->mmu_rd_mem.size,
2984 gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova);
2985 gr->mmu_rd_mem.cpuva = NULL;
2986 gr->mmu_rd_mem.iova = 0;
2987 2956
2988 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 2957 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2989 dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages, 2958 dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages,
2990 gr->compbit_store.base_iova, &attrs); 2959 gr->compbit_store.base_iova, &attrs);
2991 2960
2992 memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc));
2993 memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc));
2994 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); 2961 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
2995 2962
2996 kfree(gr->gpc_tpc_count); 2963 kfree(gr->gpc_tpc_count);
@@ -3234,33 +3201,19 @@ clean_up:
3234 3201
3235static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr) 3202static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
3236{ 3203{
3237 struct device *d = dev_from_gk20a(g); 3204 int err;
3238 dma_addr_t iova;
3239
3240 gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000;
3241 3205
3242 gr->mmu_wr_mem.size = gr->mmu_wr_mem_size; 3206 err = gk20a_gmmu_alloc(g, 0x1000, &gr->mmu_wr_mem);
3243 gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size, 3207 if (err)
3244 &iova, GFP_KERNEL);
3245 if (!gr->mmu_wr_mem.cpuva)
3246 goto err; 3208 goto err;
3247 3209
3248 gr->mmu_wr_mem.iova = iova; 3210 err = gk20a_gmmu_alloc(g, 0x1000, &gr->mmu_rd_mem);
3249 3211 if (err)
3250 gr->mmu_rd_mem.size = gr->mmu_rd_mem_size;
3251 gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size,
3252 &iova, GFP_KERNEL);
3253 if (!gr->mmu_rd_mem.cpuva)
3254 goto err_free_wr_mem; 3212 goto err_free_wr_mem;
3255
3256 gr->mmu_rd_mem.iova = iova;
3257 return 0; 3213 return 0;
3258 3214
3259 err_free_wr_mem: 3215 err_free_wr_mem:
3260 dma_free_coherent(d, gr->mmu_wr_mem.size, 3216 gk20a_gmmu_free(g, &gr->mmu_wr_mem);
3261 gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova);
3262 gr->mmu_wr_mem.cpuva = NULL;
3263 gr->mmu_wr_mem.iova = 0;
3264 err: 3217 err:
3265 return -ENOMEM; 3218 return -ENOMEM;
3266} 3219}
@@ -4241,7 +4194,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4241 gk20a_dbg_fn(""); 4194 gk20a_dbg_fn("");
4242 4195
4243 /* init mmu debug buffer */ 4196 /* init mmu debug buffer */
4244 addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_wr_mem.iova); 4197 addr = gk20a_mm_iova_addr(g, gr->mmu_wr_mem.sgt->sgl);
4245 addr >>= fb_mmu_debug_wr_addr_alignment_v(); 4198 addr >>= fb_mmu_debug_wr_addr_alignment_v();
4246 4199
4247 gk20a_writel(g, fb_mmu_debug_wr_r(), 4200 gk20a_writel(g, fb_mmu_debug_wr_r(),
@@ -4249,7 +4202,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4249 fb_mmu_debug_wr_vol_false_f() | 4202 fb_mmu_debug_wr_vol_false_f() |
4250 fb_mmu_debug_wr_addr_f(addr)); 4203 fb_mmu_debug_wr_addr_f(addr));
4251 4204
4252 addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_rd_mem.iova); 4205 addr = gk20a_mm_iova_addr(g, gr->mmu_rd_mem.sgt->sgl);
4253 addr >>= fb_mmu_debug_rd_addr_alignment_v(); 4206 addr >>= fb_mmu_debug_rd_addr_alignment_v();
4254 4207
4255 gk20a_writel(g, fb_mmu_debug_rd_r(), 4208 gk20a_writel(g, fb_mmu_debug_rd_r(),
@@ -4651,8 +4604,6 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
4651 int err = 0; 4604 int err = 0;
4652 4605
4653 u32 size; 4606 u32 size;
4654 struct sg_table *sgt_pg_buf;
4655 dma_addr_t iova;
4656 4607
4657 gk20a_dbg_fn(""); 4608 gk20a_dbg_fn("");
4658 4609
@@ -4665,50 +4616,24 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
4665 return err; 4616 return err;
4666 } 4617 }
4667 4618
4668 if (!pmu->pg_buf.cpuva) { 4619 if (!pmu->pg_buf.cpu_va) {
4669 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size, 4620 err = gk20a_gmmu_alloc_map(vm, size, &pmu->pg_buf);
4670 &iova, 4621 if (err) {
4671 GFP_KERNEL);
4672 if (!pmu->pg_buf.cpuva) {
4673 gk20a_err(d, "failed to allocate memory\n"); 4622 gk20a_err(d, "failed to allocate memory\n");
4674 return -ENOMEM; 4623 return -ENOMEM;
4675 } 4624 }
4676
4677 pmu->pg_buf.iova = iova;
4678 pmu->pg_buf.size = size;
4679
4680 err = gk20a_get_sgtable(d, &sgt_pg_buf,
4681 pmu->pg_buf.cpuva,
4682 pmu->pg_buf.iova,
4683 size);
4684 if (err) {
4685 gk20a_err(d, "failed to create sg table\n");
4686 goto err_free_pg_buf;
4687 }
4688
4689 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
4690 &sgt_pg_buf,
4691 size,
4692 0, /* flags */
4693 gk20a_mem_flag_none);
4694 if (!pmu->pg_buf.pmu_va) {
4695 gk20a_err(d, "failed to map fecs pg buffer");
4696 err = -ENOMEM;
4697 goto err_free_sgtable;
4698 }
4699
4700 gk20a_free_sgtable(&sgt_pg_buf);
4701 } 4625 }
4702 4626
4703 4627
4704 err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa); 4628 err = gr_gk20a_fecs_set_reglist_bind_inst(g,
4629 gk20a_mem_phys(&mm->pmu.inst_block));
4705 if (err) { 4630 if (err) {
4706 gk20a_err(dev_from_gk20a(g), 4631 gk20a_err(dev_from_gk20a(g),
4707 "fail to bind pmu inst to gr"); 4632 "fail to bind pmu inst to gr");
4708 return err; 4633 return err;
4709 } 4634 }
4710 4635
4711 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va); 4636 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.gpu_va);
4712 if (err) { 4637 if (err) {
4713 gk20a_err(dev_from_gk20a(g), 4638 gk20a_err(dev_from_gk20a(g),
4714 "fail to set pg buffer pmu va"); 4639 "fail to set pg buffer pmu va");
@@ -4716,15 +4641,6 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
4716 } 4641 }
4717 4642
4718 return err; 4643 return err;
4719
4720err_free_sgtable:
4721 gk20a_free_sgtable(&sgt_pg_buf);
4722err_free_pg_buf:
4723 dma_free_coherent(d, size,
4724 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
4725 pmu->pg_buf.cpuva = NULL;
4726 pmu->pg_buf.iova = 0;
4727 return err;
4728} 4644}
4729 4645
4730int gk20a_init_gr_support(struct gk20a *g) 4646int gk20a_init_gr_support(struct gk20a *g)
@@ -4983,14 +4899,14 @@ int gk20a_gr_reset(struct gk20a *g)
4983 } 4899 }
4984 4900
4985 err = gr_gk20a_fecs_set_reglist_bind_inst(g, 4901 err = gr_gk20a_fecs_set_reglist_bind_inst(g,
4986 g->mm.pmu.inst_block.cpu_pa); 4902 gk20a_mem_phys(&g->mm.pmu.inst_block));
4987 if (err) { 4903 if (err) {
4988 gk20a_err(dev_from_gk20a(g), 4904 gk20a_err(dev_from_gk20a(g),
4989 "fail to bind pmu inst to gr"); 4905 "fail to bind pmu inst to gr");
4990 return err; 4906 return err;
4991 } 4907 }
4992 4908
4993 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.pmu_va); 4909 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.gpu_va);
4994 if (err) { 4910 if (err) {
4995 gk20a_err(dev_from_gk20a(g), 4911 gk20a_err(dev_from_gk20a(g),
4996 "fail to set pg buffer pmu va"); 4912 "fail to set pg buffer pmu va");
@@ -5357,7 +5273,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
5357 /* slow path */ 5273 /* slow path */
5358 for (chid = 0; chid < f->num_channels; chid++) 5274 for (chid = 0; chid < f->num_channels; chid++)
5359 if (f->channel[chid].in_use) { 5275 if (f->channel[chid].in_use) {
5360 if ((u32)(f->channel[chid].inst_block.cpu_pa >> 5276 if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >>
5361 ram_in_base_shift_v()) == 5277 ram_in_base_shift_v()) ==
5362 gr_fecs_current_ctx_ptr_v(curr_ctx)) { 5278 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5363 tsgid = f->channel[chid].tsgid; 5279 tsgid = f->channel[chid].tsgid;