diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-11-12 07:22:35 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:12:19 -0400 |
commit | 2d71d633cf754e15c5667215c44086080c7c328d (patch) | |
tree | 62e64ee0c4aa8128abc66fa83a66c1dd678965b3 /drivers/gpu | |
parent | 1deb73b9c6512c6f0a296e35145c49233ea47f74 (diff) |
gpu: nvgpu: Physical page bits to be per chip
Retrieve number of physical page bits based on chip.
Bug 1567274
Change-Id: I5a0f6a66be37f2cf720d66b5bdb2b704cd992234
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/601700
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_common.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 45 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 5 |
11 files changed, 47 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index cffac380..de51e83e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -98,7 +98,7 @@ int channel_gk20a_commit_va(struct channel_gk20a *c) | |||
98 | if (!inst_ptr) | 98 | if (!inst_ptr) |
99 | return -ENOMEM; | 99 | return -ENOMEM; |
100 | 100 | ||
101 | addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl); | 101 | addr = gk20a_mm_iova_addr(c->g, c->vm->pdes.sgt->sgl); |
102 | addr_lo = u64_lo32(addr >> 12); | 102 | addr_lo = u64_lo32(addr >> 12); |
103 | addr_hi = u64_hi32(addr); | 103 | addr_hi = u64_hi32(addr); |
104 | 104 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 64203027..a872e304 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -581,7 +581,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
581 | f->channel[chid].userd_cpu_va = | 581 | f->channel[chid].userd_cpu_va = |
582 | f->userd.cpuva + chid * f->userd_entry_size; | 582 | f->userd.cpuva + chid * f->userd_entry_size; |
583 | f->channel[chid].userd_iova = | 583 | f->channel[chid].userd_iova = |
584 | NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) | 584 | gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) |
585 | + chid * f->userd_entry_size; | 585 | + chid * f->userd_entry_size; |
586 | f->channel[chid].userd_gpu_va = | 586 | f->channel[chid].userd_gpu_va = |
587 | f->userd.gpu_va + chid * f->userd_entry_size; | 587 | f->userd.gpu_va + chid * f->userd_entry_size; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 0bbc66cf..5ca07d7f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -312,6 +312,7 @@ struct gpu_ops { | |||
312 | void (*set_big_page_size)(struct gk20a *g, | 312 | void (*set_big_page_size)(struct gk20a *g, |
313 | void *inst_ptr, int size); | 313 | void *inst_ptr, int size); |
314 | u32 (*get_big_page_sizes)(void); | 314 | u32 (*get_big_page_sizes)(void); |
315 | u32 (*get_physical_addr_bits)(struct gk20a *g); | ||
315 | } mm; | 316 | } mm; |
316 | struct { | 317 | struct { |
317 | int (*prepare_ucode)(struct gk20a *g); | 318 | int (*prepare_ucode)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 36636d4f..482b3c5f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -1734,7 +1734,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) | |||
1734 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | 1734 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), |
1735 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | 1735 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); |
1736 | 1736 | ||
1737 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | 1737 | pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl); |
1738 | pde_addr_lo = u64_lo32(pde_addr >> 12); | 1738 | pde_addr_lo = u64_lo32(pde_addr >> 12); |
1739 | pde_addr_hi = u64_hi32(pde_addr); | 1739 | pde_addr_hi = u64_hi32(pde_addr); |
1740 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 1740 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), |
@@ -4255,7 +4255,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4255 | gk20a_dbg_fn(""); | 4255 | gk20a_dbg_fn(""); |
4256 | 4256 | ||
4257 | /* init mmu debug buffer */ | 4257 | /* init mmu debug buffer */ |
4258 | addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_wr_mem.iova); | 4258 | addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_wr_mem.iova); |
4259 | addr >>= fb_mmu_debug_wr_addr_alignment_v(); | 4259 | addr >>= fb_mmu_debug_wr_addr_alignment_v(); |
4260 | 4260 | ||
4261 | gk20a_writel(g, fb_mmu_debug_wr_r(), | 4261 | gk20a_writel(g, fb_mmu_debug_wr_r(), |
@@ -4263,7 +4263,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4263 | fb_mmu_debug_wr_vol_false_f() | | 4263 | fb_mmu_debug_wr_vol_false_f() | |
4264 | fb_mmu_debug_wr_addr_f(addr)); | 4264 | fb_mmu_debug_wr_addr_f(addr)); |
4265 | 4265 | ||
4266 | addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_rd_mem.iova); | 4266 | addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_rd_mem.iova); |
4267 | addr >>= fb_mmu_debug_rd_addr_alignment_v(); | 4267 | addr >>= fb_mmu_debug_rd_addr_alignment_v(); |
4268 | 4268 | ||
4269 | gk20a_writel(g, fb_mmu_debug_rd_r(), | 4269 | gk20a_writel(g, fb_mmu_debug_rd_r(), |
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c index e0ab3f9b..e230c4cd 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_common.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c | |||
@@ -173,7 +173,7 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | |||
173 | if (tegra_platform_is_linsim()) | 173 | if (tegra_platform_is_linsim()) |
174 | compbit_store_base_iova = gr->compbit_store.base_iova; | 174 | compbit_store_base_iova = gr->compbit_store.base_iova; |
175 | else | 175 | else |
176 | compbit_store_base_iova = NV_MC_SMMU_VADDR_TRANSLATE( | 176 | compbit_store_base_iova = gk20a_mm_smmu_vaddr_translate(g, |
177 | gr->compbit_store.base_iova); | 177 | gr->compbit_store.base_iova); |
178 | 178 | ||
179 | compbit_base_post_divide64 = compbit_store_base_iova >> | 179 | compbit_base_post_divide64 = compbit_store_base_iova >> |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 0d68464d..03a5dabb 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -597,7 +597,7 @@ int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, | |||
597 | return err; | 597 | return err; |
598 | 598 | ||
599 | gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d", | 599 | gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d", |
600 | pte, gk20a_mm_iova_addr(sgt->sgl), pte_order); | 600 | pte, gk20a_mm_iova_addr(vm->mm->g, sgt->sgl), pte_order); |
601 | 601 | ||
602 | pte->ref = handle; | 602 | pte->ref = handle; |
603 | pte->sgt = sgt; | 603 | pte->sgt = sgt; |
@@ -1554,7 +1554,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | |||
1554 | mutex_lock(&vm->update_gmmu_lock); | 1554 | mutex_lock(&vm->update_gmmu_lock); |
1555 | buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr); | 1555 | buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr); |
1556 | if (buffer) | 1556 | if (buffer) |
1557 | addr = gk20a_mm_iova_addr(buffer->sgt->sgl); | 1557 | addr = gk20a_mm_iova_addr(vm->mm->g, buffer->sgt->sgl); |
1558 | mutex_unlock(&vm->update_gmmu_lock); | 1558 | mutex_unlock(&vm->update_gmmu_lock); |
1559 | 1559 | ||
1560 | return addr; | 1560 | return addr; |
@@ -1657,16 +1657,19 @@ void gk20a_free_sgtable(struct sg_table **sgt) | |||
1657 | *sgt = NULL; | 1657 | *sgt = NULL; |
1658 | } | 1658 | } |
1659 | 1659 | ||
1660 | u64 gk20a_mm_iova_addr(struct scatterlist *sgl) | 1660 | u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) |
1661 | { | ||
1662 | return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g); | ||
1663 | } | ||
1664 | |||
1665 | u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl) | ||
1661 | { | 1666 | { |
1662 | u64 result = sg_phys(sgl); | 1667 | u64 result = sg_phys(sgl); |
1663 | #ifdef CONFIG_TEGRA_IOMMU_SMMU | 1668 | #ifdef CONFIG_TEGRA_IOMMU_SMMU |
1664 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) | 1669 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) |
1665 | result = 0; | 1670 | result = 0; |
1666 | else if (sg_dma_address(sgl)) { | 1671 | else if (sg_dma_address(sgl)) |
1667 | result = sg_dma_address(sgl) | | 1672 | result = gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
1668 | 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT; | ||
1669 | } | ||
1670 | #endif | 1673 | #endif |
1671 | return result; | 1674 | return result; |
1672 | } | 1675 | } |
@@ -1709,7 +1712,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1709 | BUG_ON(space_to_skip & (page_size - 1)); | 1712 | BUG_ON(space_to_skip & (page_size - 1)); |
1710 | 1713 | ||
1711 | while (space_to_skip > 0 && cur_chunk) { | 1714 | while (space_to_skip > 0 && cur_chunk) { |
1712 | u64 new_addr = gk20a_mm_iova_addr(cur_chunk); | 1715 | u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, cur_chunk); |
1713 | if (new_addr) { | 1716 | if (new_addr) { |
1714 | addr = new_addr; | 1717 | addr = new_addr; |
1715 | addr += cur_offset; | 1718 | addr += cur_offset; |
@@ -1759,7 +1762,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1759 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | 1762 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); |
1760 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | 1763 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { |
1761 | if (likely(sgt)) { | 1764 | if (likely(sgt)) { |
1762 | u64 new_addr = gk20a_mm_iova_addr(cur_chunk); | 1765 | u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, |
1766 | cur_chunk); | ||
1763 | if (new_addr) { | 1767 | if (new_addr) { |
1764 | addr = new_addr; | 1768 | addr = new_addr; |
1765 | addr += cur_offset; | 1769 | addr += cur_offset; |
@@ -1886,11 +1890,11 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | |||
1886 | 1890 | ||
1887 | if (small_valid) | 1891 | if (small_valid) |
1888 | pte_addr[gmmu_page_size_small] = | 1892 | pte_addr[gmmu_page_size_small] = |
1889 | gk20a_mm_iova_addr(small_pte->sgt->sgl); | 1893 | gk20a_mm_iova_addr(vm->mm->g, small_pte->sgt->sgl); |
1890 | 1894 | ||
1891 | if (big_valid) | 1895 | if (big_valid) |
1892 | pte_addr[gmmu_page_size_big] = | 1896 | pte_addr[gmmu_page_size_big] = |
1893 | gk20a_mm_iova_addr(big_pte->sgt->sgl); | 1897 | gk20a_mm_iova_addr(vm->mm->g, big_pte->sgt->sgl); |
1894 | 1898 | ||
1895 | pde_v[0] = gmmu_pde_size_full_f(); | 1899 | pde_v[0] = gmmu_pde_size_full_f(); |
1896 | pde_v[0] |= big_valid ? | 1900 | pde_v[0] |= big_valid ? |
@@ -2270,7 +2274,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2270 | goto clean_up_ptes; | 2274 | goto clean_up_ptes; |
2271 | } | 2275 | } |
2272 | gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx", | 2276 | gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx", |
2273 | vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | 2277 | vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl)); |
2274 | /* we could release vm->pdes.kv but it's only one page... */ | 2278 | /* we could release vm->pdes.kv but it's only one page... */ |
2275 | 2279 | ||
2276 | /* low-half: alloc small pages */ | 2280 | /* low-half: alloc small pages */ |
@@ -2728,9 +2732,9 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2728 | mm->bar1.aperture_size, false, "bar1"); | 2732 | mm->bar1.aperture_size, false, "bar1"); |
2729 | 2733 | ||
2730 | gk20a_dbg_info("pde pa=0x%llx", | 2734 | gk20a_dbg_info("pde pa=0x%llx", |
2731 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | 2735 | (u64)gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl)); |
2732 | 2736 | ||
2733 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | 2737 | pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl); |
2734 | pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); | 2738 | pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); |
2735 | pde_addr_hi = u64_hi32(pde_addr); | 2739 | pde_addr_hi = u64_hi32(pde_addr); |
2736 | 2740 | ||
@@ -2814,9 +2818,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
2814 | SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); | 2818 | SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); |
2815 | 2819 | ||
2816 | gk20a_dbg_info("pde pa=0x%llx", | 2820 | gk20a_dbg_info("pde pa=0x%llx", |
2817 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | 2821 | (u64)gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl)); |
2818 | 2822 | ||
2819 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | 2823 | pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl); |
2820 | pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); | 2824 | pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); |
2821 | pde_addr_hi = u64_hi32(pde_addr); | 2825 | pde_addr_hi = u64_hi32(pde_addr); |
2822 | 2826 | ||
@@ -3034,7 +3038,8 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | |||
3034 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | 3038 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) |
3035 | { | 3039 | { |
3036 | struct gk20a *g = gk20a_from_vm(vm); | 3040 | struct gk20a *g = gk20a_from_vm(vm); |
3037 | u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); | 3041 | u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g, |
3042 | vm->pdes.sgt->sgl) >> 12); | ||
3038 | u32 data; | 3043 | u32 data; |
3039 | s32 retry = 200; | 3044 | s32 retry = 200; |
3040 | static DEFINE_MUTEX(tlb_lock); | 3045 | static DEFINE_MUTEX(tlb_lock); |
@@ -3116,6 +3121,11 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g) | |||
3116 | fb_mmu_debug_ctrl_debug_enabled_v(); | 3121 | fb_mmu_debug_ctrl_debug_enabled_v(); |
3117 | } | 3122 | } |
3118 | 3123 | ||
3124 | u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g) | ||
3125 | { | ||
3126 | return 34; | ||
3127 | } | ||
3128 | |||
3119 | void gk20a_init_mm(struct gpu_ops *gops) | 3129 | void gk20a_init_mm(struct gpu_ops *gops) |
3120 | { | 3130 | { |
3121 | /* remember to remove NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS in | 3131 | /* remember to remove NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS in |
@@ -3134,5 +3144,6 @@ void gk20a_init_mm(struct gpu_ops *gops) | |||
3134 | gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; | 3144 | gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; |
3135 | gops->mm.l2_flush = gk20a_mm_l2_flush; | 3145 | gops->mm.l2_flush = gk20a_mm_l2_flush; |
3136 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; | 3146 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; |
3147 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; | ||
3137 | } | 3148 | } |
3138 | 3149 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index efed79f8..d6cb74de 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -25,15 +25,6 @@ | |||
25 | #include <asm/cacheflush.h> | 25 | #include <asm/cacheflush.h> |
26 | #include "gk20a_allocator.h" | 26 | #include "gk20a_allocator.h" |
27 | 27 | ||
28 | /* This "address bit" in the gmmu ptes (and other gk20a accesses) | ||
29 | * signals the address as presented should be translated by the SMMU. | ||
30 | * Without this bit present gk20a accesses are *not* translated. | ||
31 | */ | ||
32 | /* Hack, get this from manuals somehow... */ | ||
33 | #define NV_MC_SMMU_VADDR_TRANSLATION_BIT 34 | ||
34 | #define NV_MC_SMMU_VADDR_TRANSLATE(x) (x | \ | ||
35 | (1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT)) | ||
36 | |||
37 | /* For now keep the size relatively small-ish compared to the full | 28 | /* For now keep the size relatively small-ish compared to the full |
38 | * 40b va. 32GB for now. It consists of two 16GB spaces. */ | 29 | * 40b va. 32GB for now. It consists of two 16GB spaces. */ |
39 | #define NV_GMMU_VA_RANGE 35ULL | 30 | #define NV_GMMU_VA_RANGE 35ULL |
@@ -360,6 +351,7 @@ struct mm_gk20a { | |||
360 | 351 | ||
361 | void (*remove_support)(struct mm_gk20a *mm); | 352 | void (*remove_support)(struct mm_gk20a *mm); |
362 | bool sw_ready; | 353 | bool sw_ready; |
354 | int physical_bits; | ||
363 | #ifdef CONFIG_DEBUG_FS | 355 | #ifdef CONFIG_DEBUG_FS |
364 | u32 ltc_enabled; | 356 | u32 ltc_enabled; |
365 | u32 ltc_enabled_debug; | 357 | u32 ltc_enabled_debug; |
@@ -420,7 +412,8 @@ int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt, | |||
420 | 412 | ||
421 | void gk20a_free_sgtable(struct sg_table **sgt); | 413 | void gk20a_free_sgtable(struct sg_table **sgt); |
422 | 414 | ||
423 | u64 gk20a_mm_iova_addr(struct scatterlist *sgl); | 415 | u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl); |
416 | u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova); | ||
424 | 417 | ||
425 | void gk20a_mm_ltc_isr(struct gk20a *g); | 418 | void gk20a_mm_ltc_isr(struct gk20a *g); |
426 | 419 | ||
@@ -557,6 +550,8 @@ void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | |||
557 | size_t size); | 550 | size_t size); |
558 | void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); | 551 | void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); |
559 | 552 | ||
553 | u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); | ||
554 | |||
560 | struct gpu_ops; | 555 | struct gpu_ops; |
561 | void gk20a_init_mm(struct gpu_ops *gops); | 556 | void gk20a_init_mm(struct gpu_ops *gops); |
562 | #endif /* MM_GK20A_H */ | 557 | #endif /* MM_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 470a93bc..cb874a48 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c | |||
@@ -258,7 +258,8 @@ int prepare_ucode_blob(struct gk20a *g) | |||
258 | gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", | 258 | gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", |
259 | plsfm->managed_flcn_cnt, plsfm->wpr_size); | 259 | plsfm->managed_flcn_cnt, plsfm->wpr_size); |
260 | lsfm_init_wpr_contents(g, plsfm, nonwpr_addr); | 260 | lsfm_init_wpr_contents(g, plsfm, nonwpr_addr); |
261 | g->acr.ucode_blob_start = NV_MC_SMMU_VADDR_TRANSLATE(iova); | 261 | g->acr.ucode_blob_start = |
262 | gk20a_mm_smmu_vaddr_translate(g, iova); | ||
262 | g->acr.ucode_blob_size = plsfm->wpr_size; | 263 | g->acr.ucode_blob_size = plsfm->wpr_size; |
263 | gm20b_dbg_pmu("base reg carveout 2:%x\n", | 264 | gm20b_dbg_pmu("base reg carveout 2:%x\n", |
264 | readl(mc + MC_SECURITY_CARVEOUT2_BOM_0)); | 265 | readl(mc + MC_SECURITY_CARVEOUT2_BOM_0)); |
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 678ef4fd..0113e227 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -301,4 +301,5 @@ void gm20b_init_mm(struct gpu_ops *gops) | |||
301 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; | 301 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; |
302 | gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; | 302 | gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; |
303 | gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; | 303 | gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; |
304 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; | ||
304 | } | 305 | } |
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 80a89e1e..24b9f4be 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -317,7 +317,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
317 | f->channel[chid].userd_cpu_va = | 317 | f->channel[chid].userd_cpu_va = |
318 | f->userd.cpuva + chid * f->userd_entry_size; | 318 | f->userd.cpuva + chid * f->userd_entry_size; |
319 | f->channel[chid].userd_iova = | 319 | f->channel[chid].userd_iova = |
320 | NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) | 320 | gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) |
321 | + chid * f->userd_entry_size; | 321 | + chid * f->userd_entry_size; |
322 | f->channel[chid].userd_gpu_va = | 322 | f->channel[chid].userd_gpu_va = |
323 | f->userd.gpu_va + chid * f->userd_entry_size; | 323 | f->userd.gpu_va + chid * f->userd_entry_size; |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 7f1a5856..20f2b5ee 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -74,7 +74,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
74 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | 74 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); |
75 | struct tegra_vgpu_cmd_msg msg; | 75 | struct tegra_vgpu_cmd_msg msg; |
76 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | 76 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; |
77 | u64 addr = gk20a_mm_iova_addr(sgt->sgl); | 77 | u64 addr = gk20a_mm_iova_addr(g, sgt->sgl); |
78 | u8 prot; | 78 | u8 prot; |
79 | 79 | ||
80 | gk20a_dbg_fn(""); | 80 | gk20a_dbg_fn(""); |
@@ -210,7 +210,7 @@ u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size) | |||
210 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | 210 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); |
211 | struct dma_iommu_mapping *mapping = | 211 | struct dma_iommu_mapping *mapping = |
212 | to_dma_iommu_mapping(dev_from_gk20a(g)); | 212 | to_dma_iommu_mapping(dev_from_gk20a(g)); |
213 | u64 addr = gk20a_mm_iova_addr((*sgt)->sgl); | 213 | u64 addr = gk20a_mm_iova_addr(g, (*sgt)->sgl); |
214 | struct tegra_vgpu_cmd_msg msg; | 214 | struct tegra_vgpu_cmd_msg msg; |
215 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | 215 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; |
216 | int err; | 216 | int err; |
@@ -429,4 +429,5 @@ void vgpu_init_mm_ops(struct gpu_ops *gops) | |||
429 | gops->mm.l2_invalidate = vgpu_mm_l2_invalidate; | 429 | gops->mm.l2_invalidate = vgpu_mm_l2_invalidate; |
430 | gops->mm.l2_flush = vgpu_mm_l2_flush; | 430 | gops->mm.l2_flush = vgpu_mm_l2_flush; |
431 | gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate; | 431 | gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate; |
432 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; | ||
432 | } | 433 | } |