From 2d71d633cf754e15c5667215c44086080c7c328d Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Wed, 12 Nov 2014 14:22:35 +0200 Subject: gpu: nvgpu: Physical page bits to be per chip Retrieve number of physical page bits based on chip. Bug 1567274 Change-Id: I5a0f6a66be37f2cf720d66b5bdb2b704cd992234 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/601700 --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 6 ++--- drivers/gpu/nvgpu/gk20a/ltc_common.c | 2 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 45 ++++++++++++++++++++------------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 15 ++++------- drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 3 ++- drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 1 + drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 2 +- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 5 ++-- 11 files changed, 47 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index cffac380..de51e83e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -98,7 +98,7 @@ int channel_gk20a_commit_va(struct channel_gk20a *c) if (!inst_ptr) return -ENOMEM; - addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl); + addr = gk20a_mm_iova_addr(c->g, c->vm->pdes.sgt->sgl); addr_lo = u64_lo32(addr >> 12); addr_hi = u64_hi32(addr); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 64203027..a872e304 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -581,7 +581,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) f->channel[chid].userd_cpu_va = f->userd.cpuva + chid * f->userd_entry_size; f->channel[chid].userd_iova = - NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) + gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) + chid * f->userd_entry_size; f->channel[chid].userd_gpu_va = f->userd.gpu_va + chid * f->userd_entry_size; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 0bbc66cf..5ca07d7f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -312,6 +312,7 @@ struct gpu_ops { void (*set_big_page_size)(struct gk20a *g, void *inst_ptr, int size); u32 (*get_big_page_sizes)(void); + u32 (*get_physical_addr_bits)(struct gk20a *g); } mm; struct { int (*prepare_ucode)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 36636d4f..482b3c5f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1734,7 +1734,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); - pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); + pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl); pde_addr_lo = u64_lo32(pde_addr >> 12); pde_addr_hi = u64_hi32(pde_addr); gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), @@ -4255,7 +4255,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) gk20a_dbg_fn(""); /* init mmu debug buffer */ - addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_wr_mem.iova); + addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_wr_mem.iova); addr >>= fb_mmu_debug_wr_addr_alignment_v(); gk20a_writel(g, fb_mmu_debug_wr_r(), @@ -4263,7 +4263,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) fb_mmu_debug_wr_vol_false_f() | fb_mmu_debug_wr_addr_f(addr)); - addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_rd_mem.iova); + addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_rd_mem.iova); addr >>= fb_mmu_debug_rd_addr_alignment_v(); gk20a_writel(g, fb_mmu_debug_rd_r(), diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c index e0ab3f9b..e230c4cd 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_common.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c @@ -173,7 +173,7 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) if (tegra_platform_is_linsim()) compbit_store_base_iova = gr->compbit_store.base_iova; else - compbit_store_base_iova = NV_MC_SMMU_VADDR_TRANSLATE( + compbit_store_base_iova = gk20a_mm_smmu_vaddr_translate(g, gr->compbit_store.base_iova); compbit_base_post_divide64 = compbit_store_base_iova >> diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 0d68464d..03a5dabb 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -597,7 +597,7 @@ int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, return err; gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d", - pte, gk20a_mm_iova_addr(sgt->sgl), pte_order); + pte, gk20a_mm_iova_addr(vm->mm->g, sgt->sgl), pte_order); pte->ref = handle; pte->sgt = sgt; @@ -1554,7 +1554,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) mutex_lock(&vm->update_gmmu_lock); buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr); if (buffer) - addr = gk20a_mm_iova_addr(buffer->sgt->sgl); + addr = gk20a_mm_iova_addr(vm->mm->g, buffer->sgt->sgl); mutex_unlock(&vm->update_gmmu_lock); return addr; @@ -1657,16 +1657,19 @@ void gk20a_free_sgtable(struct sg_table **sgt) *sgt = NULL; } -u64 gk20a_mm_iova_addr(struct scatterlist *sgl) +u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) +{ + return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g); +} + +u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl) { u64 result = sg_phys(sgl); #ifdef CONFIG_TEGRA_IOMMU_SMMU if (sg_dma_address(sgl) == DMA_ERROR_CODE) result = 0; - else if (sg_dma_address(sgl)) { - result = sg_dma_address(sgl) | - 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT; - } + else if (sg_dma_address(sgl)) + result = gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); #endif return result; } @@ -1709,7 +1712,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, BUG_ON(space_to_skip & (page_size - 1)); while (space_to_skip > 0 && cur_chunk) { - u64 new_addr = gk20a_mm_iova_addr(cur_chunk); + u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, cur_chunk); if (new_addr) { addr = new_addr; addr += cur_offset; @@ -1759,7 +1762,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { if (likely(sgt)) { - u64 new_addr = gk20a_mm_iova_addr(cur_chunk); + u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, + cur_chunk); if (new_addr) { addr = new_addr; addr += cur_offset; @@ -1886,11 +1890,11 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) if (small_valid) pte_addr[gmmu_page_size_small] = - gk20a_mm_iova_addr(small_pte->sgt->sgl); + gk20a_mm_iova_addr(vm->mm->g, small_pte->sgt->sgl); if (big_valid) pte_addr[gmmu_page_size_big] = - gk20a_mm_iova_addr(big_pte->sgt->sgl); + gk20a_mm_iova_addr(vm->mm->g, big_pte->sgt->sgl); pde_v[0] = gmmu_pde_size_full_f(); pde_v[0] |= big_valid ? @@ -2270,7 +2274,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm, goto clean_up_ptes; } gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx", - vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); + vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl)); /* we could release vm->pdes.kv but it's only one page... */ /* low-half: alloc small pages */ @@ -2728,9 +2732,9 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) mm->bar1.aperture_size, false, "bar1"); gk20a_dbg_info("pde pa=0x%llx", - (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); + (u64)gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl)); - pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); + pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl); pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); pde_addr_hi = u64_hi32(pde_addr); @@ -2814,9 +2818,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); gk20a_dbg_info("pde pa=0x%llx", - (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); + (u64)gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl)); - pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); + pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl); pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); pde_addr_hi = u64_hi32(pde_addr); @@ -3034,7 +3038,8 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) { struct gk20a *g = gk20a_from_vm(vm); - u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); + u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g, + vm->pdes.sgt->sgl) >> 12); u32 data; s32 retry = 200; static DEFINE_MUTEX(tlb_lock); @@ -3116,6 +3121,11 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g) fb_mmu_debug_ctrl_debug_enabled_v(); } +u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g) +{ + return 34; +} + void gk20a_init_mm(struct gpu_ops *gops) { /* remember to remove NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS in @@ -3134,5 +3144,6 @@ void gk20a_init_mm(struct gpu_ops *gops) gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; gops->mm.l2_flush = gk20a_mm_l2_flush; gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; + gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index efed79f8..d6cb74de 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -25,15 +25,6 @@ #include #include "gk20a_allocator.h" -/* This "address bit" in the gmmu ptes (and other gk20a accesses) - * signals the address as presented should be translated by the SMMU. - * Without this bit present gk20a accesses are *not* translated. - */ -/* Hack, get this from manuals somehow... */ -#define NV_MC_SMMU_VADDR_TRANSLATION_BIT 34 -#define NV_MC_SMMU_VADDR_TRANSLATE(x) (x | \ - (1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT)) - /* For now keep the size relatively small-ish compared to the full * 40b va. 32GB for now. It consists of two 16GB spaces. */ #define NV_GMMU_VA_RANGE 35ULL @@ -360,6 +351,7 @@ struct mm_gk20a { void (*remove_support)(struct mm_gk20a *mm); bool sw_ready; + int physical_bits; #ifdef CONFIG_DEBUG_FS u32 ltc_enabled; u32 ltc_enabled_debug; @@ -420,7 +412,8 @@ int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt, void gk20a_free_sgtable(struct sg_table **sgt); -u64 gk20a_mm_iova_addr(struct scatterlist *sgl); +u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl); +u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova); void gk20a_mm_ltc_isr(struct gk20a *g); @@ -557,6 +550,8 @@ void free_gmmu_pages(struct vm_gk20a *vm, void *handle, size_t size); void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); +u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); + struct gpu_ops; void gk20a_init_mm(struct gpu_ops *gops); #endif /* MM_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 470a93bc..cb874a48 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -258,7 +258,8 @@ int prepare_ucode_blob(struct gk20a *g) gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", plsfm->managed_flcn_cnt, plsfm->wpr_size); lsfm_init_wpr_contents(g, plsfm, nonwpr_addr); - g->acr.ucode_blob_start = NV_MC_SMMU_VADDR_TRANSLATE(iova); + g->acr.ucode_blob_start = + gk20a_mm_smmu_vaddr_translate(g, iova); g->acr.ucode_blob_size = plsfm->wpr_size; gm20b_dbg_pmu("base reg carveout 2:%x\n", readl(mc + MC_SECURITY_CARVEOUT2_BOM_0)); diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 678ef4fd..0113e227 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -301,4 +301,5 @@ void gm20b_init_mm(struct gpu_ops *gops) gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; + gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; } diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 80a89e1e..24b9f4be 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -317,7 +317,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) f->channel[chid].userd_cpu_va = f->userd.cpuva + chid * f->userd_entry_size; f->channel[chid].userd_iova = - NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) + gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) + chid * f->userd_entry_size; f->channel[chid].userd_gpu_va = f->userd.gpu_va + chid * f->userd_entry_size; diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 7f1a5856..20f2b5ee 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -74,7 +74,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_as_map_params *p = &msg.params.as_map; - u64 addr = gk20a_mm_iova_addr(sgt->sgl); + u64 addr = gk20a_mm_iova_addr(g, sgt->sgl); u8 prot; gk20a_dbg_fn(""); @@ -210,7 +210,7 @@ u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size) struct gk20a_platform *platform = gk20a_get_platform(g->dev); struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev_from_gk20a(g)); - u64 addr = gk20a_mm_iova_addr((*sgt)->sgl); + u64 addr = gk20a_mm_iova_addr(g, (*sgt)->sgl); struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_as_map_params *p = &msg.params.as_map; int err; @@ -429,4 +429,5 @@ void vgpu_init_mm_ops(struct gpu_ops *gops) gops->mm.l2_invalidate = vgpu_mm_l2_invalidate; gops->mm.l2_flush = vgpu_mm_l2_flush; gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate; + gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; } -- cgit v1.2.2