1 files changed, 0 insertions, 521 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a1873a30..e7bcf6f0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -124,15 +124,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl)
 *
 */
-static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
-                                   enum gmmu_pgsz_gk20a pgsz_idx,
-                                   struct sg_table *sgt, u64 buffer_offset,
-                                   u64 first_vaddr, u64 last_vaddr,
-                                   u8 kind_v, u32 ctag_offset, bool cacheable,
-                                   bool umapped_pte, int rw_flag,
-                                   bool sparse,
-                                   bool priv,
-                                   enum nvgpu_aperture aperture);
 static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
 static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
 static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
@@ -781,104 +772,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g)
 #endif
 }
-static void free_gmmu_phys_pages(struct vm_gk20a *vm,
-                            struct gk20a_mm_entry *entry)
-{
-        gk20a_dbg_fn("");
-        /* note: mem_desc slightly abused (wrt. free_gmmu_pages) */
-        free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size));
-        entry->mem.cpu_va = NULL;
-        sg_free_table(entry->mem.priv.sgt);
-        nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
-        entry->mem.priv.sgt = NULL;
-        entry->mem.size = 0;
-        entry->mem.aperture = APERTURE_INVALID;
-}
-static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
-{
-        FLUSH_CPU_DCACHE(entry->mem.cpu_va,
-                         sg_phys(entry->mem.priv.sgt->sgl),
-                         entry->mem.priv.sgt->sgl->length);
-        return 0;
-}
-static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry)
-{
-        FLUSH_CPU_DCACHE(entry->mem.cpu_va,
-                         sg_phys(entry->mem.priv.sgt->sgl),
-                         entry->mem.priv.sgt->sgl->length);
-}
-void free_gmmu_pages(struct vm_gk20a *vm,
-                     struct gk20a_mm_entry *entry)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        gk20a_dbg_fn("");
-        if (!entry->mem.size)
-                return;
-        if (entry->woffset) /* fake shadow mem */
-                return;
-        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
-                free_gmmu_phys_pages(vm, entry);
-                return;
-        }
-        nvgpu_dma_free(g, &entry->mem);
-}
-int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
-{
-        gk20a_dbg_fn("");
-        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
-                return map_gmmu_phys_pages(entry);
-        if (IS_ENABLED(CONFIG_ARM64)) {
-                if (entry->mem.aperture == APERTURE_VIDMEM)
-                        return 0;
-                FLUSH_CPU_DCACHE(entry->mem.cpu_va,
-                                 sg_phys(entry->mem.priv.sgt->sgl),
-                                 entry->mem.size);
-        } else {
-                int err = nvgpu_mem_begin(g, &entry->mem);
-                if (err)
-                        return err;
-        }
-        return 0;
-}
-void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
-{
-        gk20a_dbg_fn("");
-        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
-                unmap_gmmu_phys_pages(entry);
-                return;
-        }
-        if (IS_ENABLED(CONFIG_ARM64)) {
-                if (entry->mem.aperture == APERTURE_VIDMEM)
-                        return;
-                FLUSH_CPU_DCACHE(entry->mem.cpu_va,
-                                 sg_phys(entry->mem.priv.sgt->sgl),
-                                 entry->mem.size);
-        } else {
-                nvgpu_mem_end(g, &entry->mem);
-        }
-}
 int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
 {
        return vm->mmu_levels[0].lo_bit[0];
@@ -909,21 +802,6 @@ static u32 pte_from_index(u32 i)
        return i * gmmu_pte__size_v() / sizeof(u32);
 }
-u32 pte_index_from_vaddr(struct vm_gk20a *vm,
-                                       u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
-{
-        u32 ret;
-        /* mask off pde part */
-        addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL);
-        /* shift over to get pte index. note assumption that pte index
-         * doesn't leak over into the high 32b */
-        ret = (u32)(addr >> ilog2(vm->gmmu_page_sizes[pgsz_idx]));
-        gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
-        return ret;
-}
 int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
                         struct nvgpu_mapped_buf ***mapped_buffers,
                         int *num_buffers)
@@ -1096,141 +974,6 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
        return 0;
 }
-u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
-                        u64 map_offset,
-                        struct sg_table *sgt,
-                        u64 buffer_offset,
-                        u64 size,
-                        int pgsz_idx,
-                        u8 kind_v,
-                        u32 ctag_offset,
-                        u32 flags,
-                        int rw_flag,
-                        bool clear_ctags,
-                        bool sparse,
-                        bool priv,
-                        struct vm_gk20a_mapping_batch *batch,
-                        enum nvgpu_aperture aperture)
-{
-        int err = 0;
-        bool allocated = false;
-        struct gk20a *g = gk20a_from_vm(vm);
-        int ctag_granularity = g->ops.fb.compression_page_size(g);
-        u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
-        /* Allocate (or validate when map_offset != 0) the virtual address. */
-        if (!map_offset) {
-                map_offset = __nvgpu_vm_alloc_va(vm, size,
-                                          pgsz_idx);
-                if (!map_offset) {
-                        nvgpu_err(g, "failed to allocate va space");
-                        err = -ENOMEM;
-                        goto fail_alloc;
-                }
-                allocated = true;
-        }
-        gk20a_dbg(gpu_dbg_map,
-                  "gv: 0x%04x_%08x + 0x%-7llx "
-                  "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
-                  "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
-                  "kind=0x%x flags=0x%x apt=%s",
-                  u64_hi32(map_offset), u64_lo32(map_offset), size,
-                  sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
-                  sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
-                  sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
-                  sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
-                  vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
-                  ctag_lines, ctag_offset,
-                  kind_v, flags, nvgpu_aperture_str(aperture));
-        err = update_gmmu_ptes_locked(vm, pgsz_idx,
-                                      sgt,
-                                      buffer_offset,
-                                      map_offset, map_offset + size,
-                                      kind_v,
-                                      ctag_offset,
-                                      flags &
-                                      NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
-                                      flags &
-                                      NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE,
-                                      rw_flag,
-                                      sparse,
-                                      priv,
-                                      aperture);
-        if (err) {
-                nvgpu_err(g, "failed to update ptes on map");
-                goto fail_validate;
-        }
-        if (!batch)
-                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
-        else
-                batch->need_tlb_invalidate = true;
-        return map_offset;
-fail_validate:
-        if (allocated)
-                __nvgpu_vm_free_va(vm, map_offset, pgsz_idx);
-fail_alloc:
-        nvgpu_err(g, "%s: failed with err=%d", __func__, err);
-        return 0;
-}
-void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
-                        u64 vaddr,
-                        u64 size,
-                        int pgsz_idx,
-                        bool va_allocated,
-                        int rw_flag,
-                        bool sparse,
-                        struct vm_gk20a_mapping_batch *batch)
-{
-        int err = 0;
-        struct gk20a *g = gk20a_from_vm(vm);
-        if (va_allocated) {
-                err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
-                if (err) {
-                        nvgpu_err(g, "failed to free va");
-                        return;
-                }
-        }
-        /* unmap here needs to know the page size we assigned at mapping */
-        err = update_gmmu_ptes_locked(vm,
-                                pgsz_idx,
-                                NULL, /* n/a for unmap */
-                                0,
-                                vaddr,
-                                vaddr + size,
-                                0, 0, false /* n/a for unmap */,
-                                false, rw_flag,
-                                sparse, 0,
-                                APERTURE_INVALID); /* don't care for unmap */
-        if (err)
-                nvgpu_err(g, "failed to update gmmu ptes on unmap");
-        /* flush l2 so any dirty lines are written out *now*.
-         *  also as we could potentially be switching this buffer
-         * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
-         * some point in the future we need to invalidate l2.  e.g. switching
-         * from a render buffer unmap (here) to later using the same memory
-         * for gmmu ptes.  note the positioning of this relative to any smmu
-         * unmapping (below). */
-        if (!batch) {
-                gk20a_mm_l2_flush(g, true);
-                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
-        } else {
-                if (!batch->gpu_l2_flushed) {
-                        gk20a_mm_l2_flush(g, true);
-                        batch->gpu_l2_flushed = true;
-                }
-                batch->need_tlb_invalidate = true;
-        }
-}
 enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
                                          struct dma_buf *dmabuf)
 {
@@ -2036,254 +1779,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
        return 0;
 }
-static int update_gmmu_level_locked(struct vm_gk20a *vm,
-                                    struct gk20a_mm_entry *pte,
-                                    enum gmmu_pgsz_gk20a pgsz_idx,
-                                    struct scatterlist **sgl,
-                                    u64 *offset,
-                                    u64 *iova,
-                                    u64 gpu_va, u64 gpu_end,
-                                    u8 kind_v, u64 *ctag,
-                                    bool cacheable, bool unmapped_pte,
-                                    int rw_flag,
-                                    bool sparse,
-                                    int lvl,
-                                    bool priv,
-                                    enum nvgpu_aperture aperture)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
-        const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
-        int err = 0;
-        u32 pde_i;
-        u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx];
-        struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL;
-        gk20a_dbg_fn("");
-        pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL))
-                >> (u64)l->lo_bit[pgsz_idx];
-        gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx",
-                  pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
-        while (gpu_va < gpu_end) {
-                u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
-                /* Allocate next level */
-                if (next_l->update_entry) {
-                        if (!pte->entries) {
-                                int num_entries =
-                                        1 <<
-                                         (l->hi_bit[pgsz_idx]
-                                          - l->lo_bit[pgsz_idx] + 1);
-                                pte->entries =
-                                        nvgpu_vzalloc(g,
-                                                sizeof(struct gk20a_mm_entry) *
-                                                num_entries);
-                                if (!pte->entries)
-                                        return -ENOMEM;
-                                pte->pgsz = pgsz_idx;
-                                pte->num_entries = num_entries;
-                        }
-                        prev_pte = next_pte;
-                        next_pte = pte->entries + pde_i;
-                        if (!next_pte->mem.size) {
-                                err = nvgpu_zalloc_gmmu_page_table(vm,
-                                        pgsz_idx, next_l, next_pte, prev_pte);
-                                if (err)
-                                        return err;
-                        }
-                }
-                err = l->update_entry(vm, pte, pde_i, pgsz_idx,
-                                sgl, offset, iova,
-                                kind_v, ctag, cacheable, unmapped_pte,
-                                rw_flag, sparse, priv, aperture);
-                if (err)
-                        return err;
-                if (next_l->update_entry) {
-                        /* get cpu access to the ptes */
-                        err = map_gmmu_pages(g, next_pte);
-                        if (err) {
-                                nvgpu_err(g,
-                                           "couldn't map ptes for update as=%d",
-                                           vm_aspace_id(vm));
-                                return err;
-                        }
-                        err = update_gmmu_level_locked(vm, next_pte,
-                                pgsz_idx,
-                                sgl,
-                                offset,
-                                iova,
-                                gpu_va,
-                                next,
-                                kind_v, ctag, cacheable, unmapped_pte,
-                                rw_flag, sparse, lvl+1, priv, aperture);
-                        unmap_gmmu_pages(g, next_pte);
-                        if (err)
-                                return err;
-                }
-                pde_i++;
-                gpu_va = next;
-        }
-        gk20a_dbg_fn("done");
-        return 0;
-}
-static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
-                                   enum gmmu_pgsz_gk20a pgsz_idx,
-                                   struct sg_table *sgt,
-                                   u64 buffer_offset,
-                                   u64 gpu_va, u64 gpu_end,
-                                   u8 kind_v, u32 ctag_offset,
-                                   bool cacheable, bool unmapped_pte,
-                                   int rw_flag,
-                                   bool sparse,
-                                   bool priv,
-                                   enum nvgpu_aperture aperture)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        int ctag_granularity = g->ops.fb.compression_page_size(g);
-        u64 ctag = (u64)ctag_offset * (u64)ctag_granularity;
-        u64 iova = 0;
-        u64 space_to_skip = buffer_offset;
-        u64 map_size = gpu_end - gpu_va;
-        u32 page_size  = vm->gmmu_page_sizes[pgsz_idx];
-        int err;
-        struct scatterlist *sgl = NULL;
-        struct nvgpu_page_alloc *alloc = NULL;
-        struct page_alloc_chunk *chunk = NULL;
-        u64 length;
-        /* note: here we need to map kernel to small, since the
-         * low-level mmu code assumes 0 is small and 1 is big pages */
-        if (pgsz_idx == gmmu_page_size_kernel)
-                pgsz_idx = gmmu_page_size_small;
-        if (space_to_skip & (page_size - 1))
-                return -EINVAL;
-        err = map_gmmu_pages(g, &vm->pdb);
-        if (err) {
-                nvgpu_err(g,
-                           "couldn't map ptes for update as=%d",
-                           vm_aspace_id(vm));
-                return err;
-        }
-        if (aperture == APERTURE_VIDMEM) {
-                gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx",
-                                pgsz_idx, gpu_va, gpu_end-1, iova);
-                if (sgt) {
-                        alloc = get_vidmem_page_alloc(sgt->sgl);
-                        nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
-                                                 page_alloc_chunk, list_entry) {
-                                if (space_to_skip &&
-                                    space_to_skip > chunk->length) {
-                                        space_to_skip -= chunk->length;
-                                } else {
-                                        iova = chunk->base + space_to_skip;
-                                        length = chunk->length - space_to_skip;
-                                        length = min(length, map_size);
-                                        space_to_skip = 0;
-                                        err = update_gmmu_level_locked(vm,
-                                                &vm->pdb, pgsz_idx,
-                                                &sgl,
-                                                &space_to_skip,
-                                                &iova,
-                                                gpu_va, gpu_va + length,
-                                                kind_v, &ctag,
-                                                cacheable, unmapped_pte,
-                                                rw_flag, sparse, 0, priv,
-                                                aperture);
-                                        if (err)
-                                                break;
-                                        /* need to set explicit zero here */
-                                        space_to_skip = 0;
-                                        gpu_va += length;
-                                        map_size -= length;
-                                        if (!map_size)
-                                                break;
-                                }
-                        }
-                } else {
-                        err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
-                                        &sgl,
-                                        &space_to_skip,
-                                        &iova,
-                                        gpu_va, gpu_end,
-                                        kind_v, &ctag,
-                                        cacheable, unmapped_pte, rw_flag,
-                                        sparse, 0, priv,
-                                        aperture);
-                }
-        } else {
-                gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",
-                           pgsz_idx,
-                           sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0)
-                               : 0ULL,
-                           buffer_offset,
-                           sgt ? sgt->nents : 0);
-                gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
-                                pgsz_idx, gpu_va, gpu_end-1, iova);
-                if (sgt) {
-                        iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
-                        if (!vm->mm->bypass_smmu && iova) {
-                                iova += space_to_skip;
-                        } else {
-                                sgl = sgt->sgl;
-                                gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
-                                                (u64)sg_phys(sgl),
-                                                sgl->length);
-                                while (space_to_skip && sgl &&
-                                      space_to_skip + page_size > sgl->length) {
-                                        space_to_skip -= sgl->length;
-                                        sgl = sg_next(sgl);
-                                        gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
-                                                        (u64)sg_phys(sgl),
-                                                        sgl->length);
-                                }
-                                iova = sg_phys(sgl) + space_to_skip;
-                        }
-                }
-                err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
-                                &sgl,
-                                &space_to_skip,
-                                &iova,
-                                gpu_va, gpu_end,
-                                kind_v, &ctag,
-                                cacheable, unmapped_pte, rw_flag,
-                                sparse, 0, priv,
-                                aperture);
-        }
-        unmap_gmmu_pages(g, &vm->pdb);
-        smp_mb();
-        gk20a_dbg_fn("done");
-        return err;
-}
 /* NOTE! mapped_buffers lock must be held */
 void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
                           struct vm_gk20a_mapping_batch *batch)
@@ -2341,22 +1836,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
        return;
 }
-void gk20a_vm_free_entries(struct vm_gk20a *vm,
-                           struct gk20a_mm_entry *parent,
-                           int level)
-{
-        int i;
-        if (parent->entries)
-                for (i = 0; i < parent->num_entries; i++)
-                        gk20a_vm_free_entries(vm, &parent->entries[i], level+1);
-        if (parent->mem.size)
-                free_gmmu_pages(vm, parent);
-        nvgpu_vfree(vm->mm->g, parent->entries);
-        parent->entries = NULL;
-}
 const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
        {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
         .lo_bit = {26, 26},

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a1873a30..e7bcf6f0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -124,15 +124,6 @@ struct nvgpu_page_alloc get_vidmem_page_alloc(struct scatterlist sgl)
124	*	124	*
125	*/	125	*/
126		126
127	static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
128	enum gmmu_pgsz_gk20a pgsz_idx,
129	struct sg_table *sgt, u64 buffer_offset,
130	u64 first_vaddr, u64 last_vaddr,
131	u8 kind_v, u32 ctag_offset, bool cacheable,
132	bool umapped_pte, int rw_flag,
133	bool sparse,
134	bool priv,
135	enum nvgpu_aperture aperture);
136	static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);	127	static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
137	static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);	128	static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
138	static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);	129	static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
@@ -781,104 +772,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g)
781	#endif	772	#endif
782	}	773	}
783		774
784	static void free_gmmu_phys_pages(struct vm_gk20a *vm,
785	struct gk20a_mm_entry *entry)
786	{
787	gk20a_dbg_fn("");
788
789	/* note: mem_desc slightly abused (wrt. free_gmmu_pages) */
790
791	free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size));
792	entry->mem.cpu_va = NULL;
793
794	sg_free_table(entry->mem.priv.sgt);
795	nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
796	entry->mem.priv.sgt = NULL;
797	entry->mem.size = 0;
798	entry->mem.aperture = APERTURE_INVALID;
799	}
800
801	static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
802	{
803	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
804	sg_phys(entry->mem.priv.sgt->sgl),
805	entry->mem.priv.sgt->sgl->length);
806	return 0;
807	}
808
809	static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry)
810	{
811	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
812	sg_phys(entry->mem.priv.sgt->sgl),
813	entry->mem.priv.sgt->sgl->length);
814	}
815
816	void free_gmmu_pages(struct vm_gk20a *vm,
817	struct gk20a_mm_entry *entry)
818	{
819	struct gk20a *g = gk20a_from_vm(vm);
820
821	gk20a_dbg_fn("");
822
823	if (!entry->mem.size)
824	return;
825
826	if (entry->woffset) /* fake shadow mem */
827	return;
828
829	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
830	free_gmmu_phys_pages(vm, entry);
831	return;
832	}
833
834	nvgpu_dma_free(g, &entry->mem);
835	}
836
837	int map_gmmu_pages(struct gk20a g, struct gk20a_mm_entry entry)
838	{
839	gk20a_dbg_fn("");
840
841	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
842	return map_gmmu_phys_pages(entry);
843
844	if (IS_ENABLED(CONFIG_ARM64)) {
845	if (entry->mem.aperture == APERTURE_VIDMEM)
846	return 0;
847
848	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
849	sg_phys(entry->mem.priv.sgt->sgl),
850	entry->mem.size);
851	} else {
852	int err = nvgpu_mem_begin(g, &entry->mem);
853
854	if (err)
855	return err;
856	}
857
858	return 0;
859	}
860
861	void unmap_gmmu_pages(struct gk20a g, struct gk20a_mm_entry entry)
862	{
863	gk20a_dbg_fn("");
864
865	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
866	unmap_gmmu_phys_pages(entry);
867	return;
868	}
869
870	if (IS_ENABLED(CONFIG_ARM64)) {
871	if (entry->mem.aperture == APERTURE_VIDMEM)
872	return;
873
874	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
875	sg_phys(entry->mem.priv.sgt->sgl),
876	entry->mem.size);
877	} else {
878	nvgpu_mem_end(g, &entry->mem);
879	}
880	}
881
882	int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)	775	int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
883	{	776	{
884	return vm->mmu_levels[0].lo_bit[0];	777	return vm->mmu_levels[0].lo_bit[0];
@@ -909,21 +802,6 @@ static u32 pte_from_index(u32 i)
909	return i * gmmu_pte__size_v() / sizeof(u32);	802	return i * gmmu_pte__size_v() / sizeof(u32);
910	}	803	}
911		804
912	u32 pte_index_from_vaddr(struct vm_gk20a *vm,
913	u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
914	{
915	u32 ret;
916	/* mask off pde part */
917	addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL);
918
919	/* shift over to get pte index. note assumption that pte index
920	* doesn't leak over into the high 32b */
921	ret = (u32)(addr >> ilog2(vm->gmmu_page_sizes[pgsz_idx]));
922
923	gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
924	return ret;
925	}
926
927	int nvgpu_vm_get_buffers(struct vm_gk20a *vm,	805	int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
928	struct nvgpu_mapped_buf ***mapped_buffers,	806	struct nvgpu_mapped_buf ***mapped_buffers,
929	int *num_buffers)	807	int *num_buffers)
@@ -1096,141 +974,6 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1096	return 0;	974	return 0;
1097	}	975	}
1098		976
1099	u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1100	u64 map_offset,
1101	struct sg_table *sgt,
1102	u64 buffer_offset,
1103	u64 size,
1104	int pgsz_idx,
1105	u8 kind_v,
1106	u32 ctag_offset,
1107	u32 flags,
1108	int rw_flag,
1109	bool clear_ctags,
1110	bool sparse,
1111	bool priv,
1112	struct vm_gk20a_mapping_batch *batch,
1113	enum nvgpu_aperture aperture)
1114	{
1115	int err = 0;
1116	bool allocated = false;
1117	struct gk20a *g = gk20a_from_vm(vm);
1118	int ctag_granularity = g->ops.fb.compression_page_size(g);
1119	u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
1120
1121	/* Allocate (or validate when map_offset != 0) the virtual address. */
1122	if (!map_offset) {
1123	map_offset = __nvgpu_vm_alloc_va(vm, size,
1124	pgsz_idx);
1125	if (!map_offset) {
1126	nvgpu_err(g, "failed to allocate va space");
1127	err = -ENOMEM;
1128	goto fail_alloc;
1129	}
1130	allocated = true;
1131	}
1132
1133	gk20a_dbg(gpu_dbg_map,
1134	"gv: 0x%04x_%08x + 0x%-7llx "
1135	"[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
1136	"pgsz=%-3dKb as=%-2d ctags=%d start=%d "
1137	"kind=0x%x flags=0x%x apt=%s",
1138	u64_hi32(map_offset), u64_lo32(map_offset), size,
1139	sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
1140	sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
1141	sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
1142	sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
1143	vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
1144	ctag_lines, ctag_offset,
1145	kind_v, flags, nvgpu_aperture_str(aperture));
1146
1147	err = update_gmmu_ptes_locked(vm, pgsz_idx,
1148	sgt,
1149	buffer_offset,
1150	map_offset, map_offset + size,
1151	kind_v,
1152	ctag_offset,
1153	flags &
1154	NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1155	flags &
1156	NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE,
1157	rw_flag,
1158	sparse,
1159	priv,
1160	aperture);
1161	if (err) {
1162	nvgpu_err(g, "failed to update ptes on map");
1163	goto fail_validate;
1164	}
1165
1166	if (!batch)
1167	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
1168	else
1169	batch->need_tlb_invalidate = true;
1170
1171	return map_offset;
1172	fail_validate:
1173	if (allocated)
1174	__nvgpu_vm_free_va(vm, map_offset, pgsz_idx);
1175	fail_alloc:
1176	nvgpu_err(g, "%s: failed with err=%d", __func__, err);
1177	return 0;
1178	}
1179
1180	void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1181	u64 vaddr,
1182	u64 size,
1183	int pgsz_idx,
1184	bool va_allocated,
1185	int rw_flag,
1186	bool sparse,
1187	struct vm_gk20a_mapping_batch *batch)
1188	{
1189	int err = 0;
1190	struct gk20a *g = gk20a_from_vm(vm);
1191
1192	if (va_allocated) {
1193	err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
1194	if (err) {
1195	nvgpu_err(g, "failed to free va");
1196	return;
1197	}
1198	}
1199
1200	/* unmap here needs to know the page size we assigned at mapping */
1201	err = update_gmmu_ptes_locked(vm,
1202	pgsz_idx,
1203	NULL, /* n/a for unmap */
1204	0,
1205	vaddr,
1206	vaddr + size,
1207	0, 0, false /* n/a for unmap */,
1208	false, rw_flag,
1209	sparse, 0,
1210	APERTURE_INVALID); /* don't care for unmap */
1211	if (err)
1212	nvgpu_err(g, "failed to update gmmu ptes on unmap");
1213
1214	/* flush l2 so any dirty lines are written out now.
1215	* also as we could potentially be switching this buffer
1216	* from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1217	* some point in the future we need to invalidate l2. e.g. switching
1218	* from a render buffer unmap (here) to later using the same memory
1219	* for gmmu ptes. note the positioning of this relative to any smmu
1220	* unmapping (below). */
1221
1222	if (!batch) {
1223	gk20a_mm_l2_flush(g, true);
1224	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
1225	} else {
1226	if (!batch->gpu_l2_flushed) {
1227	gk20a_mm_l2_flush(g, true);
1228	batch->gpu_l2_flushed = true;
1229	}
1230	batch->need_tlb_invalidate = true;
1231	}
1232	}
1233
1234	enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,	977	enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
1235	struct dma_buf *dmabuf)	978	struct dma_buf *dmabuf)
1236	{	979	{
@@ -2036,254 +1779,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2036	return 0;	1779	return 0;
2037	}	1780	}
2038		1781
2039	static int update_gmmu_level_locked(struct vm_gk20a *vm,
2040	struct gk20a_mm_entry *pte,
2041	enum gmmu_pgsz_gk20a pgsz_idx,
2042	struct scatterlist **sgl,
2043	u64 *offset,
2044	u64 *iova,
2045	u64 gpu_va, u64 gpu_end,
2046	u8 kind_v, u64 *ctag,
2047	bool cacheable, bool unmapped_pte,
2048	int rw_flag,
2049	bool sparse,
2050	int lvl,
2051	bool priv,
2052	enum nvgpu_aperture aperture)
2053	{
2054	struct gk20a *g = gk20a_from_vm(vm);
2055	const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
2056	const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
2057	int err = 0;
2058	u32 pde_i;
2059	u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx];
2060	struct gk20a_mm_entry next_pte = NULL, prev_pte = NULL;
2061
2062	gk20a_dbg_fn("");
2063
2064	pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL))
2065	>> (u64)l->lo_bit[pgsz_idx];
2066
2067	gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx",
2068	pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
2069
2070	while (gpu_va < gpu_end) {
2071	u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
2072
2073	/* Allocate next level */
2074	if (next_l->update_entry) {
2075	if (!pte->entries) {
2076	int num_entries =
2077	1 <<
2078	(l->hi_bit[pgsz_idx]
2079	- l->lo_bit[pgsz_idx] + 1);
2080	pte->entries =
2081	nvgpu_vzalloc(g,
2082	sizeof(struct gk20a_mm_entry) *
2083	num_entries);
2084	if (!pte->entries)
2085	return -ENOMEM;
2086	pte->pgsz = pgsz_idx;
2087	pte->num_entries = num_entries;
2088	}
2089	prev_pte = next_pte;
2090	next_pte = pte->entries + pde_i;
2091
2092	if (!next_pte->mem.size) {
2093	err = nvgpu_zalloc_gmmu_page_table(vm,
2094	pgsz_idx, next_l, next_pte, prev_pte);
2095	if (err)
2096	return err;
2097	}
2098	}
2099
2100	err = l->update_entry(vm, pte, pde_i, pgsz_idx,
2101	sgl, offset, iova,
2102	kind_v, ctag, cacheable, unmapped_pte,
2103	rw_flag, sparse, priv, aperture);
2104	if (err)
2105	return err;
2106
2107	if (next_l->update_entry) {
2108	/* get cpu access to the ptes */
2109	err = map_gmmu_pages(g, next_pte);
2110	if (err) {
2111	nvgpu_err(g,
2112	"couldn't map ptes for update as=%d",
2113	vm_aspace_id(vm));
2114	return err;
2115	}
2116	err = update_gmmu_level_locked(vm, next_pte,
2117	pgsz_idx,
2118	sgl,
2119	offset,
2120	iova,
2121	gpu_va,
2122	next,
2123	kind_v, ctag, cacheable, unmapped_pte,
2124	rw_flag, sparse, lvl+1, priv, aperture);
2125	unmap_gmmu_pages(g, next_pte);
2126
2127	if (err)
2128	return err;
2129	}
2130
2131	pde_i++;
2132	gpu_va = next;
2133	}
2134
2135	gk20a_dbg_fn("done");
2136
2137	return 0;
2138	}
2139
2140	static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2141	enum gmmu_pgsz_gk20a pgsz_idx,
2142	struct sg_table *sgt,
2143	u64 buffer_offset,
2144	u64 gpu_va, u64 gpu_end,
2145	u8 kind_v, u32 ctag_offset,
2146	bool cacheable, bool unmapped_pte,
2147	int rw_flag,
2148	bool sparse,
2149	bool priv,
2150	enum nvgpu_aperture aperture)
2151	{
2152	struct gk20a *g = gk20a_from_vm(vm);
2153	int ctag_granularity = g->ops.fb.compression_page_size(g);
2154	u64 ctag = (u64)ctag_offset * (u64)ctag_granularity;
2155	u64 iova = 0;
2156	u64 space_to_skip = buffer_offset;
2157	u64 map_size = gpu_end - gpu_va;
2158	u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
2159	int err;
2160	struct scatterlist *sgl = NULL;
2161	struct nvgpu_page_alloc *alloc = NULL;
2162	struct page_alloc_chunk *chunk = NULL;
2163	u64 length;
2164
2165	/* note: here we need to map kernel to small, since the
2166	* low-level mmu code assumes 0 is small and 1 is big pages */
2167	if (pgsz_idx == gmmu_page_size_kernel)
2168	pgsz_idx = gmmu_page_size_small;
2169
2170	if (space_to_skip & (page_size - 1))
2171	return -EINVAL;
2172
2173	err = map_gmmu_pages(g, &vm->pdb);
2174	if (err) {
2175	nvgpu_err(g,
2176	"couldn't map ptes for update as=%d",
2177	vm_aspace_id(vm));
2178	return err;
2179	}
2180
2181	if (aperture == APERTURE_VIDMEM) {
2182	gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx",
2183	pgsz_idx, gpu_va, gpu_end-1, iova);
2184
2185	if (sgt) {
2186	alloc = get_vidmem_page_alloc(sgt->sgl);
2187
2188	nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
2189	page_alloc_chunk, list_entry) {
2190	if (space_to_skip &&
2191	space_to_skip > chunk->length) {
2192	space_to_skip -= chunk->length;
2193	} else {
2194	iova = chunk->base + space_to_skip;
2195	length = chunk->length - space_to_skip;
2196	length = min(length, map_size);
2197	space_to_skip = 0;
2198
2199	err = update_gmmu_level_locked(vm,
2200	&vm->pdb, pgsz_idx,
2201	&sgl,
2202	&space_to_skip,
2203	&iova,
2204	gpu_va, gpu_va + length,
2205	kind_v, &ctag,
2206	cacheable, unmapped_pte,
2207	rw_flag, sparse, 0, priv,
2208	aperture);
2209	if (err)
2210	break;
2211
2212	/* need to set explicit zero here */
2213	space_to_skip = 0;
2214	gpu_va += length;
2215	map_size -= length;
2216
2217	if (!map_size)
2218	break;
2219	}
2220	}
2221	} else {
2222	err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
2223	&sgl,
2224	&space_to_skip,
2225	&iova,
2226	gpu_va, gpu_end,
2227	kind_v, &ctag,
2228	cacheable, unmapped_pte, rw_flag,
2229	sparse, 0, priv,
2230	aperture);
2231	}
2232	} else {
2233	gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",
2234	pgsz_idx,
2235	sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0)
2236	: 0ULL,
2237	buffer_offset,
2238	sgt ? sgt->nents : 0);
2239
2240	gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
2241	pgsz_idx, gpu_va, gpu_end-1, iova);
2242
2243	if (sgt) {
2244	iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
2245	if (!vm->mm->bypass_smmu && iova) {
2246	iova += space_to_skip;
2247	} else {
2248	sgl = sgt->sgl;
2249
2250	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
2251	(u64)sg_phys(sgl),
2252	sgl->length);
2253
2254	while (space_to_skip && sgl &&
2255	space_to_skip + page_size > sgl->length) {
2256	space_to_skip -= sgl->length;
2257	sgl = sg_next(sgl);
2258	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
2259	(u64)sg_phys(sgl),
2260	sgl->length);
2261	}
2262
2263	iova = sg_phys(sgl) + space_to_skip;
2264	}
2265	}
2266
2267	err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
2268	&sgl,
2269	&space_to_skip,
2270	&iova,
2271	gpu_va, gpu_end,
2272	kind_v, &ctag,
2273	cacheable, unmapped_pte, rw_flag,
2274	sparse, 0, priv,
2275	aperture);
2276	}
2277
2278	unmap_gmmu_pages(g, &vm->pdb);
2279
2280	smp_mb();
2281
2282	gk20a_dbg_fn("done");
2283
2284	return err;
2285	}
2286
2287	/* NOTE! mapped_buffers lock must be held */	1782	/* NOTE! mapped_buffers lock must be held */
2288	void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,	1783	void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
2289	struct vm_gk20a_mapping_batch *batch)	1784	struct vm_gk20a_mapping_batch *batch)
@@ -2341,22 +1836,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
2341	return;	1836	return;
2342	}	1837	}
2343		1838
2344	void gk20a_vm_free_entries(struct vm_gk20a *vm,
2345	struct gk20a_mm_entry *parent,
2346	int level)
2347	{
2348	int i;
2349
2350	if (parent->entries)
2351	for (i = 0; i < parent->num_entries; i++)
2352	gk20a_vm_free_entries(vm, &parent->entries[i], level+1);
2353
2354	if (parent->mem.size)
2355	free_gmmu_pages(vm, parent);
2356	nvgpu_vfree(vm->mm->g, parent->entries);
2357	parent->entries = NULL;
2358	}
2359
2360	const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {	1839	const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
2361	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},	1840	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
2362	.lo_bit = {26, 26},	1841	.lo_bit = {26, 26},