gpu: nvgpu: Separate GMMU mapping impl from mm_gk20a.c

Separate the non-chip specific GMMU mapping implementation code out of mm_gk20a.c. This puts all of the chip-agnostic code into common/mm/gmmu.c in preparation for rewriting it. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I6f7fdac3422703f5e80bb22ad304dc27bba4814d Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1480228 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-05-11 13:25:47 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-06-06 20:09:22 -0400
commit: 048c6b062ae381a329dccbc7ca0599113dbd7417 (patch)
tree: 24712fcaf967e22bd91bcb6a81195cf79ac08cc1 /drivers/gpu/nvgpu/common/mm
parent: c21f5bca9ae81804130e30ea3e6f7a18d51203dc (diff)
2 files changed, 536 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index dc91cc2f..e63155f2 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -15,14 +15,81 @@
 */
 #include <nvgpu/log.h>
+#include <nvgpu/list.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/page_allocator.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
+#define gmmu_dbg(g, fmt, args...)                       \
+        nvgpu_log(g, gpu_dbg_map, fmt, ##args)
+#define gmmu_dbg_v(g, fmt, args...)                     \
+        nvgpu_log(g, gpu_dbg_map_v, fmt, ##args)
+static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
+{
+        FLUSH_CPU_DCACHE(entry->mem.cpu_va,
+                         sg_phys(entry->mem.priv.sgt->sgl),
+                         entry->mem.priv.sgt->sgl->length);
+        return 0;
+}
+static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry)
+{
+        FLUSH_CPU_DCACHE(entry->mem.cpu_va,
+                         sg_phys(entry->mem.priv.sgt->sgl),
+                         entry->mem.priv.sgt->sgl->length);
+}
+static int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
+{
+        gk20a_dbg_fn("");
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
+                return map_gmmu_phys_pages(entry);
+        if (IS_ENABLED(CONFIG_ARM64)) {
+                if (entry->mem.aperture == APERTURE_VIDMEM)
+                        return 0;
+                FLUSH_CPU_DCACHE(entry->mem.cpu_va,
+                                 sg_phys(entry->mem.priv.sgt->sgl),
+                                 entry->mem.size);
+        } else {
+                int err = nvgpu_mem_begin(g, &entry->mem);
+                if (err)
+                        return err;
+        }
+        return 0;
+}
+static void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
+{
+        gk20a_dbg_fn("");
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                unmap_gmmu_phys_pages(entry);
+                return;
+        }
+        if (IS_ENABLED(CONFIG_ARM64)) {
+                if (entry->mem.aperture == APERTURE_VIDMEM)
+                        return;
+                FLUSH_CPU_DCACHE(entry->mem.cpu_va,
+                                 sg_phys(entry->mem.priv.sgt->sgl),
+                                 entry->mem.size);
+        } else {
+                nvgpu_mem_end(g, &entry->mem);
+        }
+}
 static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
                                 struct gk20a_mm_entry *entry)
 {
@@ -97,6 +164,44 @@ static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
        return 0;
 }
+static void free_gmmu_phys_pages(struct vm_gk20a *vm,
+                            struct gk20a_mm_entry *entry)
+{
+        gk20a_dbg_fn("");
+        /* note: mem_desc slightly abused (wrt. nvgpu_free_gmmu_pages) */
+        free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size));
+        entry->mem.cpu_va = NULL;
+        sg_free_table(entry->mem.priv.sgt);
+        nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
+        entry->mem.priv.sgt = NULL;
+        entry->mem.size = 0;
+        entry->mem.aperture = APERTURE_INVALID;
+}
+void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
+                           struct gk20a_mm_entry *entry)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        gk20a_dbg_fn("");
+        if (!entry->mem.size)
+                return;
+        if (entry->woffset) /* fake shadow mem */
+                return;
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                free_gmmu_phys_pages(vm, entry);
+                return;
+        }
+        nvgpu_dma_free(g, &entry->mem);
+}
 /*
 * Allocate a phys contig region big enough for a full
 * sized gmmu page table for the given gmmu_page_size.
@@ -202,6 +307,9 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
        return vaddr;
 }
+/*
+ * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings.
+ */
 u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
                   struct nvgpu_mem *mem,
                   u64 size,
@@ -246,3 +354,412 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va)
        nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
+static int update_gmmu_level_locked(struct vm_gk20a *vm,
+                                    struct gk20a_mm_entry *pte,
+                                    enum gmmu_pgsz_gk20a pgsz_idx,
+                                    struct scatterlist **sgl,
+                                    u64 *offset,
+                                    u64 *iova,
+                                    u64 gpu_va, u64 gpu_end,
+                                    u8 kind_v, u64 *ctag,
+                                    bool cacheable, bool unmapped_pte,
+                                    int rw_flag,
+                                    bool sparse,
+                                    int lvl,
+                                    bool priv,
+                                    enum nvgpu_aperture aperture)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
+        const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
+        int err = 0;
+        u32 pde_i;
+        u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx];
+        struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL;
+        gk20a_dbg_fn("");
+        pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL))
+                >> (u64)l->lo_bit[pgsz_idx];
+        gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx",
+                  pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
+        while (gpu_va < gpu_end) {
+                u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
+                /* Allocate next level */
+                if (next_l->update_entry) {
+                        if (!pte->entries) {
+                                int num_entries =
+                                        1 <<
+                                         (l->hi_bit[pgsz_idx]
+                                          - l->lo_bit[pgsz_idx] + 1);
+                                pte->entries =
+                                        nvgpu_vzalloc(g,
+                                                sizeof(struct gk20a_mm_entry) *
+                                                num_entries);
+                                if (!pte->entries)
+                                        return -ENOMEM;
+                                pte->pgsz = pgsz_idx;
+                                pte->num_entries = num_entries;
+                        }
+                        prev_pte = next_pte;
+                        next_pte = pte->entries + pde_i;
+                        if (!next_pte->mem.size) {
+                                err = nvgpu_zalloc_gmmu_page_table(vm,
+                                        pgsz_idx, next_l, next_pte, prev_pte);
+                                if (err)
+                                        return err;
+                        }
+                }
+                err = l->update_entry(vm, pte, pde_i, pgsz_idx,
+                                sgl, offset, iova,
+                                kind_v, ctag, cacheable, unmapped_pte,
+                                rw_flag, sparse, priv, aperture);
+                if (err)
+                        return err;
+                if (next_l->update_entry) {
+                        /* get cpu access to the ptes */
+                        err = map_gmmu_pages(g, next_pte);
+                        if (err) {
+                                nvgpu_err(g,
+                                           "couldn't map ptes for update as=%d",
+                                           vm_aspace_id(vm));
+                                return err;
+                        }
+                        err = update_gmmu_level_locked(vm, next_pte,
+                                pgsz_idx,
+                                sgl,
+                                offset,
+                                iova,
+                                gpu_va,
+                                next,
+                                kind_v, ctag, cacheable, unmapped_pte,
+                                rw_flag, sparse, lvl+1, priv, aperture);
+                        unmap_gmmu_pages(g, next_pte);
+                        if (err)
+                                return err;
+                }
+                pde_i++;
+                gpu_va = next;
+        }
+        gk20a_dbg_fn("done");
+        return 0;
+}
+/*
+ * This is the true top level GMMU mapping logic. This breaks down the incoming
+ * scatter gather table and does actual programming of GPU virtual address to
+ * physical* address.
+ *
+ * The update of each level of the page tables is farmed out to chip specific
+ * implementations. But the logic around that is generic to all chips. Every chip
+ * has some number of PDE levels and then a PTE level.
+ *
+ * Each chunk of the incoming SGT is sent to the chip specific implementation
+ * of page table update.
+ *
+ * [*] Note: the "physical" address may actually be an IO virtual address in the
+ *     case of SMMU usage.
+ */
+static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
+                                   enum gmmu_pgsz_gk20a pgsz_idx,
+                                   struct sg_table *sgt,
+                                   u64 buffer_offset,
+                                   u64 gpu_va, u64 gpu_end,
+                                   u8 kind_v, u32 ctag_offset,
+                                   bool cacheable, bool unmapped_pte,
+                                   int rw_flag,
+                                   bool sparse,
+                                   bool priv,
+                                   enum nvgpu_aperture aperture)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        int ctag_granularity = g->ops.fb.compression_page_size(g);
+        u64 ctag = (u64)ctag_offset * (u64)ctag_granularity;
+        u64 iova = 0;
+        u64 space_to_skip = buffer_offset;
+        u64 map_size = gpu_end - gpu_va;
+        u32 page_size  = vm->gmmu_page_sizes[pgsz_idx];
+        int err;
+        struct scatterlist *sgl = NULL;
+        struct nvgpu_page_alloc *alloc = NULL;
+        struct page_alloc_chunk *chunk = NULL;
+        u64 length;
+        /* note: here we need to map kernel to small, since the
+         * low-level mmu code assumes 0 is small and 1 is big pages */
+        if (pgsz_idx == gmmu_page_size_kernel)
+                pgsz_idx = gmmu_page_size_small;
+        if (space_to_skip & (page_size - 1))
+                return -EINVAL;
+        err = map_gmmu_pages(g, &vm->pdb);
+        if (err) {
+                nvgpu_err(g,
+                           "couldn't map ptes for update as=%d",
+                           vm_aspace_id(vm));
+                return err;
+        }
+        if (aperture == APERTURE_VIDMEM) {
+                gmmu_dbg_v(g, "vidmem map size_idx=%d, gpu_va=[%llx,%llx]",
+                           pgsz_idx, gpu_va, gpu_end-1);
+                if (sgt) {
+                        alloc = get_vidmem_page_alloc(sgt->sgl);
+                        nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
+                                                 page_alloc_chunk, list_entry) {
+                                if (space_to_skip &&
+                                    space_to_skip > chunk->length) {
+                                        space_to_skip -= chunk->length;
+                                } else {
+                                        iova = chunk->base + space_to_skip;
+                                        length = chunk->length - space_to_skip;
+                                        length = min(length, map_size);
+                                        space_to_skip = 0;
+                                        err = update_gmmu_level_locked(vm,
+                                                &vm->pdb, pgsz_idx,
+                                                &sgl,
+                                                &space_to_skip,
+                                                &iova,
+                                                gpu_va, gpu_va + length,
+                                                kind_v, &ctag,
+                                                cacheable, unmapped_pte,
+                                                rw_flag, sparse, 0, priv,
+                                                aperture);
+                                        if (err)
+                                                break;
+                                        /* need to set explicit zero here */
+                                        space_to_skip = 0;
+                                        gpu_va += length;
+                                        map_size -= length;
+                                        if (!map_size)
+                                                break;
+                                }
+                        }
+                } else {
+                        err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
+                                        &sgl,
+                                        &space_to_skip,
+                                        &iova,
+                                        gpu_va, gpu_end,
+                                        kind_v, &ctag,
+                                        cacheable, unmapped_pte, rw_flag,
+                                        sparse, 0, priv,
+                                        aperture);
+                }
+        } else {
+                gmmu_dbg_v(g,
+                           "pgsz=%-6d, gpu_va: %#-12llx +%#-6llx  phys: %#-12llx "
+                           "buffer offset: %-4lld, nents: %d",
+                           page_size,
+                           gpu_va, gpu_end - gpu_va,
+                           sgt ? g->ops.mm.get_iova_addr(g, sgt->sgl, 0) : 0ULL,
+                           buffer_offset,
+                           sgt ? sgt->nents : 0);
+                if (sgt) {
+                        iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
+                        if (!vm->mm->bypass_smmu && iova) {
+                                iova += space_to_skip;
+                        } else {
+                                sgl = sgt->sgl;
+                                gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
+                                                (u64)sg_phys(sgl),
+                                                sgl->length);
+                                while (space_to_skip && sgl &&
+                                      space_to_skip + page_size > sgl->length) {
+                                        space_to_skip -= sgl->length;
+                                        sgl = sg_next(sgl);
+                                        gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
+                                                        (u64)sg_phys(sgl),
+                                                        sgl->length);
+                                }
+                                iova = sg_phys(sgl) + space_to_skip;
+                        }
+                }
+                err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
+                                &sgl,
+                                &space_to_skip,
+                                &iova,
+                                gpu_va, gpu_end,
+                                kind_v, &ctag,
+                                cacheable, unmapped_pte, rw_flag,
+                                sparse, 0, priv,
+                                aperture);
+        }
+        unmap_gmmu_pages(g, &vm->pdb);
+        mb();
+        gk20a_dbg_fn("done");
+        return err;
+}
+/**
+ * gk20a_locked_gmmu_map - Map a buffer into the GMMU
+ *
+ * This is for non-vGPU chips. It's part of the HAL at the moment but really
+ * should not be. Chip specific stuff is handled at the PTE/PDE programming
+ * layer. The rest of the logic is essentially generic for all chips.
+ *
+ * To call this function you must have locked the VM lock: vm->update_gmmu_lock.
+ * However, note: this function is not called directly. It's used through the
+ * mm.gmmu_lock() HAL. So before calling the mm.gmmu_lock() HAL make sure you
+ * have the update_gmmu_lock aquired.
+ */
+u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
+                        u64 map_offset,
+                        struct sg_table *sgt,
+                        u64 buffer_offset,
+                        u64 size,
+                        int pgsz_idx,
+                        u8 kind_v,
+                        u32 ctag_offset,
+                        u32 flags,
+                        int rw_flag,
+                        bool clear_ctags,
+                        bool sparse,
+                        bool priv,
+                        struct vm_gk20a_mapping_batch *batch,
+                        enum nvgpu_aperture aperture)
+{
+        int err = 0;
+        bool allocated = false;
+        struct gk20a *g = gk20a_from_vm(vm);
+        int ctag_granularity = g->ops.fb.compression_page_size(g);
+        u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
+        /* Allocate (or validate when map_offset != 0) the virtual address. */
+        if (!map_offset) {
+                map_offset = __nvgpu_vm_alloc_va(vm, size,
+                                          pgsz_idx);
+                if (!map_offset) {
+                        nvgpu_err(g, "failed to allocate va space");
+                        err = -ENOMEM;
+                        goto fail_alloc;
+                }
+                allocated = true;
+        }
+        gmmu_dbg(g,
+                 "gv: 0x%04x_%08x + 0x%-7llx "
+                 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
+                 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
+                 "kind=0x%x flags=0x%x apt=%s",
+                 u64_hi32(map_offset), u64_lo32(map_offset), size,
+                 sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
+                 sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
+                 sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
+                 sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
+                 vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
+                 ctag_lines, ctag_offset,
+                 kind_v, flags, nvgpu_aperture_str(aperture));
+        err = update_gmmu_ptes_locked(vm, pgsz_idx,
+                                      sgt,
+                                      buffer_offset,
+                                      map_offset, map_offset + size,
+                                      kind_v,
+                                      ctag_offset,
+                                      flags &
+                                      NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+                                      flags &
+                                      NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE,
+                                      rw_flag,
+                                      sparse,
+                                      priv,
+                                      aperture);
+        if (err) {
+                nvgpu_err(g, "failed to update ptes on map");
+                goto fail_validate;
+        }
+        if (!batch)
+                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
+        else
+                batch->need_tlb_invalidate = true;
+        return map_offset;
+fail_validate:
+        if (allocated)
+                __nvgpu_vm_free_va(vm, map_offset, pgsz_idx);
+fail_alloc:
+        nvgpu_err(g, "%s: failed with err=%d", __func__, err);
+        return 0;
+}
+void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
+                        u64 vaddr,
+                        u64 size,
+                        int pgsz_idx,
+                        bool va_allocated,
+                        int rw_flag,
+                        bool sparse,
+                        struct vm_gk20a_mapping_batch *batch)
+{
+        int err = 0;
+        struct gk20a *g = gk20a_from_vm(vm);
+        if (va_allocated) {
+                err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
+                if (err) {
+                        nvgpu_err(g, "failed to free va");
+                        return;
+                }
+        }
+        /* unmap here needs to know the page size we assigned at mapping */
+        err = update_gmmu_ptes_locked(vm,
+                                pgsz_idx,
+                                NULL, /* n/a for unmap */
+                                0,
+                                vaddr,
+                                vaddr + size,
+                                0, 0, false /* n/a for unmap */,
+                                false, rw_flag,
+                                sparse, 0,
+                                APERTURE_INVALID); /* don't care for unmap */
+        if (err)
+                nvgpu_err(g, "failed to update gmmu ptes on unmap");
+        /* flush l2 so any dirty lines are written out *now*.
+         *  also as we could potentially be switching this buffer
+         * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
+         * some point in the future we need to invalidate l2.  e.g. switching
+         * from a render buffer unmap (here) to later using the same memory
+         * for gmmu ptes.  note the positioning of this relative to any smmu
+         * unmapping (below). */
+        if (!batch) {
+                gk20a_mm_l2_flush(g, true);
+                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
+        } else {
+                if (!batch->gpu_l2_flushed) {
+                        gk20a_mm_l2_flush(g, true);
+                        batch->gpu_l2_flushed = true;
+                }
+                batch->need_tlb_invalidate = true;
+        }
+}
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index e24d40bf..5ba386c9 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -18,6 +18,7 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/vm_area.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/lock.h>
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
@@ -34,6 +35,22 @@ int vm_aspace_id(struct vm_gk20a *vm)
        return vm->as_share ? vm->as_share->id : -1;
 }
+static void nvgpu_vm_free_entries(struct vm_gk20a *vm,
+                                  struct gk20a_mm_entry *parent,
+                                  int level)
+{
+        int i;
+        if (parent->entries)
+                for (i = 0; i < parent->num_entries; i++)
+                        nvgpu_vm_free_entries(vm, &parent->entries[i], level+1);
+        if (parent->mem.size)
+                nvgpu_free_gmmu_pages(vm, parent);
+        nvgpu_vfree(vm->mm->g, parent->entries);
+        parent->entries = NULL;
+}
 u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,
                        enum gmmu_pgsz_gk20a pgsz_idx)
@@ -421,7 +438,7 @@ clean_up_allocators:
 clean_up_page_tables:
        /* Cleans up nvgpu_vm_init_page_tables() */
        nvgpu_vfree(g, vm->pdb.entries);
-        free_gmmu_pages(vm, &vm->pdb);
+        nvgpu_free_gmmu_pages(vm, &vm->pdb);
 clean_up_vgpu_vm:
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
        if (g->is_virtual)
@@ -537,7 +554,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
        if (nvgpu_alloc_initialized(&vm->user_lp))
                nvgpu_alloc_destroy(&vm->user_lp);
-        gk20a_vm_free_entries(vm, &vm->pdb, 0);
+        nvgpu_vm_free_entries(vm, &vm->pdb, 0);
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
        if (g->is_virtual)
author	Alex Waterman <alexw@nvidia.com>	2017-05-11 13:25:47 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-06-06 20:09:22 -0400
commit	048c6b062ae381a329dccbc7ca0599113dbd7417 (patch)
tree	24712fcaf967e22bd91bcb6a81195cf79ac08cc1 /drivers/gpu/nvgpu/common/mm
parent	c21f5bca9ae81804130e30ea3e6f7a18d51203dc (diff)

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index dc91cc2f..e63155f2 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -15,14 +15,81 @@
15	*/	15	*/
16		16
17	#include <nvgpu/log.h>	17	#include <nvgpu/log.h>
		18	#include <nvgpu/list.h>
18	#include <nvgpu/dma.h>	19	#include <nvgpu/dma.h>
19	#include <nvgpu/gmmu.h>	20	#include <nvgpu/gmmu.h>
20	#include <nvgpu/nvgpu_mem.h>	21	#include <nvgpu/nvgpu_mem.h>
21	#include <nvgpu/enabled.h>	22	#include <nvgpu/enabled.h>
		23	#include <nvgpu/page_allocator.h>
22		24
23	#include "gk20a/gk20a.h"	25	#include "gk20a/gk20a.h"
24	#include "gk20a/mm_gk20a.h"	26	#include "gk20a/mm_gk20a.h"
25		27
		28	#define gmmu_dbg(g, fmt, args...) \
		29	nvgpu_log(g, gpu_dbg_map, fmt, ##args)
		30	#define gmmu_dbg_v(g, fmt, args...) \
		31	nvgpu_log(g, gpu_dbg_map_v, fmt, ##args)
		32
		33	static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
		34	{
		35	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
		36	sg_phys(entry->mem.priv.sgt->sgl),
		37	entry->mem.priv.sgt->sgl->length);
		38	return 0;
		39	}
		40
		41	static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry)
		42	{
		43	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
		44	sg_phys(entry->mem.priv.sgt->sgl),
		45	entry->mem.priv.sgt->sgl->length);
		46	}
		47
		48	static int map_gmmu_pages(struct gk20a g, struct gk20a_mm_entry entry)
		49	{
		50	gk20a_dbg_fn("");
		51
		52	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
		53	return map_gmmu_phys_pages(entry);
		54
		55	if (IS_ENABLED(CONFIG_ARM64)) {
		56	if (entry->mem.aperture == APERTURE_VIDMEM)
		57	return 0;
		58
		59	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
		60	sg_phys(entry->mem.priv.sgt->sgl),
		61	entry->mem.size);
		62	} else {
		63	int err = nvgpu_mem_begin(g, &entry->mem);
		64
		65	if (err)
		66	return err;
		67	}
		68
		69	return 0;
		70	}
		71
		72	static void unmap_gmmu_pages(struct gk20a g, struct gk20a_mm_entry entry)
		73	{
		74	gk20a_dbg_fn("");
		75
		76	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
		77	unmap_gmmu_phys_pages(entry);
		78	return;
		79	}
		80
		81	if (IS_ENABLED(CONFIG_ARM64)) {
		82	if (entry->mem.aperture == APERTURE_VIDMEM)
		83	return;
		84
		85	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
		86	sg_phys(entry->mem.priv.sgt->sgl),
		87	entry->mem.size);
		88	} else {
		89	nvgpu_mem_end(g, &entry->mem);
		90	}
		91	}
		92
26	static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,	93	static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
27	struct gk20a_mm_entry *entry)	94	struct gk20a_mm_entry *entry)
28	{	95	{
@@ -97,6 +164,44 @@ static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
97	return 0;	164	return 0;
98	}	165	}
99		166
		167	static void free_gmmu_phys_pages(struct vm_gk20a *vm,
		168	struct gk20a_mm_entry *entry)
		169	{
		170	gk20a_dbg_fn("");
		171
		172	/* note: mem_desc slightly abused (wrt. nvgpu_free_gmmu_pages) */
		173
		174	free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size));
		175	entry->mem.cpu_va = NULL;
		176
		177	sg_free_table(entry->mem.priv.sgt);
		178	nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
		179	entry->mem.priv.sgt = NULL;
		180	entry->mem.size = 0;
		181	entry->mem.aperture = APERTURE_INVALID;
		182	}
		183
		184	void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
		185	struct gk20a_mm_entry *entry)
		186	{
		187	struct gk20a *g = gk20a_from_vm(vm);
		188
		189	gk20a_dbg_fn("");
		190
		191	if (!entry->mem.size)
		192	return;
		193
		194	if (entry->woffset) /* fake shadow mem */
		195	return;
		196
		197	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
		198	free_gmmu_phys_pages(vm, entry);
		199	return;
		200	}
		201
		202	nvgpu_dma_free(g, &entry->mem);
		203	}
		204
100	/*	205	/*
101	* Allocate a phys contig region big enough for a full	206	* Allocate a phys contig region big enough for a full
102	* sized gmmu page table for the given gmmu_page_size.	207	* sized gmmu page table for the given gmmu_page_size.
@@ -202,6 +307,9 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
202	return vaddr;	307	return vaddr;
203	}	308	}
204		309
		310	/*
		311	* Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings.
		312	*/
205	u64 nvgpu_gmmu_map(struct vm_gk20a *vm,	313	u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
206	struct nvgpu_mem *mem,	314	struct nvgpu_mem *mem,
207	u64 size,	315	u64 size,
@@ -246,3 +354,412 @@ void nvgpu_gmmu_unmap(struct vm_gk20a vm, struct nvgpu_mem mem, u64 gpu_va)
246		354
247	nvgpu_mutex_release(&vm->update_gmmu_lock);	355	nvgpu_mutex_release(&vm->update_gmmu_lock);
248	}	356	}
		357
		358	static int update_gmmu_level_locked(struct vm_gk20a *vm,
		359	struct gk20a_mm_entry *pte,
		360	enum gmmu_pgsz_gk20a pgsz_idx,
		361	struct scatterlist **sgl,
		362	u64 *offset,
		363	u64 *iova,
		364	u64 gpu_va, u64 gpu_end,
		365	u8 kind_v, u64 *ctag,
		366	bool cacheable, bool unmapped_pte,
		367	int rw_flag,
		368	bool sparse,
		369	int lvl,
		370	bool priv,
		371	enum nvgpu_aperture aperture)
		372	{
		373	struct gk20a *g = gk20a_from_vm(vm);
		374	const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
		375	const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
		376	int err = 0;
		377	u32 pde_i;
		378	u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx];
		379	struct gk20a_mm_entry next_pte = NULL, prev_pte = NULL;
		380
		381	gk20a_dbg_fn("");
		382
		383	pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL))
		384	>> (u64)l->lo_bit[pgsz_idx];
		385
		386	gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx",
		387	pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
		388
		389	while (gpu_va < gpu_end) {
		390	u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
		391
		392	/* Allocate next level */
		393	if (next_l->update_entry) {
		394	if (!pte->entries) {
		395	int num_entries =
		396	1 <<
		397	(l->hi_bit[pgsz_idx]
		398	- l->lo_bit[pgsz_idx] + 1);
		399	pte->entries =
		400	nvgpu_vzalloc(g,
		401	sizeof(struct gk20a_mm_entry) *
		402	num_entries);
		403	if (!pte->entries)
		404	return -ENOMEM;
		405	pte->pgsz = pgsz_idx;
		406	pte->num_entries = num_entries;
		407	}
		408	prev_pte = next_pte;
		409	next_pte = pte->entries + pde_i;
		410
		411	if (!next_pte->mem.size) {
		412	err = nvgpu_zalloc_gmmu_page_table(vm,
		413	pgsz_idx, next_l, next_pte, prev_pte);
		414	if (err)
		415	return err;
		416	}
		417	}
		418
		419	err = l->update_entry(vm, pte, pde_i, pgsz_idx,
		420	sgl, offset, iova,
		421	kind_v, ctag, cacheable, unmapped_pte,
		422	rw_flag, sparse, priv, aperture);
		423	if (err)
		424	return err;
		425
		426	if (next_l->update_entry) {
		427	/* get cpu access to the ptes */
		428	err = map_gmmu_pages(g, next_pte);
		429	if (err) {
		430	nvgpu_err(g,
		431	"couldn't map ptes for update as=%d",
		432	vm_aspace_id(vm));
		433	return err;
		434	}
		435	err = update_gmmu_level_locked(vm, next_pte,
		436	pgsz_idx,
		437	sgl,
		438	offset,
		439	iova,
		440	gpu_va,
		441	next,
		442	kind_v, ctag, cacheable, unmapped_pte,
		443	rw_flag, sparse, lvl+1, priv, aperture);
		444	unmap_gmmu_pages(g, next_pte);
		445
		446	if (err)
		447	return err;
		448	}
		449
		450	pde_i++;
		451	gpu_va = next;
		452	}
		453
		454	gk20a_dbg_fn("done");
		455
		456	return 0;
		457	}
		458
		459	/*
		460	* This is the true top level GMMU mapping logic. This breaks down the incoming
		461	* scatter gather table and does actual programming of GPU virtual address to
		462	* physical* address.
		463	*
		464	* The update of each level of the page tables is farmed out to chip specific
		465	* implementations. But the logic around that is generic to all chips. Every chip
		466	* has some number of PDE levels and then a PTE level.
		467	*
		468	* Each chunk of the incoming SGT is sent to the chip specific implementation
		469	* of page table update.
		470	*
		471	* [*] Note: the "physical" address may actually be an IO virtual address in the
		472	* case of SMMU usage.
		473	*/
		474	static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
		475	enum gmmu_pgsz_gk20a pgsz_idx,
		476	struct sg_table *sgt,
		477	u64 buffer_offset,
		478	u64 gpu_va, u64 gpu_end,
		479	u8 kind_v, u32 ctag_offset,
		480	bool cacheable, bool unmapped_pte,
		481	int rw_flag,
		482	bool sparse,
		483	bool priv,
		484	enum nvgpu_aperture aperture)
		485	{
		486	struct gk20a *g = gk20a_from_vm(vm);
		487	int ctag_granularity = g->ops.fb.compression_page_size(g);
		488	u64 ctag = (u64)ctag_offset * (u64)ctag_granularity;
		489	u64 iova = 0;
		490	u64 space_to_skip = buffer_offset;
		491	u64 map_size = gpu_end - gpu_va;
		492	u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
		493	int err;
		494	struct scatterlist *sgl = NULL;
		495	struct nvgpu_page_alloc *alloc = NULL;
		496	struct page_alloc_chunk *chunk = NULL;
		497	u64 length;
		498
		499	/* note: here we need to map kernel to small, since the
		500	* low-level mmu code assumes 0 is small and 1 is big pages */
		501	if (pgsz_idx == gmmu_page_size_kernel)
		502	pgsz_idx = gmmu_page_size_small;
		503
		504	if (space_to_skip & (page_size - 1))
		505	return -EINVAL;
		506
		507	err = map_gmmu_pages(g, &vm->pdb);
		508	if (err) {
		509	nvgpu_err(g,
		510	"couldn't map ptes for update as=%d",
		511	vm_aspace_id(vm));
		512	return err;
		513	}
		514
		515	if (aperture == APERTURE_VIDMEM) {
		516	gmmu_dbg_v(g, "vidmem map size_idx=%d, gpu_va=[%llx,%llx]",
		517	pgsz_idx, gpu_va, gpu_end-1);
		518
		519	if (sgt) {
		520	alloc = get_vidmem_page_alloc(sgt->sgl);
		521
		522	nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
		523	page_alloc_chunk, list_entry) {
		524	if (space_to_skip &&
		525	space_to_skip > chunk->length) {
		526	space_to_skip -= chunk->length;
		527	} else {
		528	iova = chunk->base + space_to_skip;
		529	length = chunk->length - space_to_skip;
		530	length = min(length, map_size);
		531	space_to_skip = 0;
		532
		533	err = update_gmmu_level_locked(vm,
		534	&vm->pdb, pgsz_idx,
		535	&sgl,
		536	&space_to_skip,
		537	&iova,
		538	gpu_va, gpu_va + length,
		539	kind_v, &ctag,
		540	cacheable, unmapped_pte,
		541	rw_flag, sparse, 0, priv,
		542	aperture);
		543	if (err)
		544	break;
		545
		546	/* need to set explicit zero here */
		547	space_to_skip = 0;
		548	gpu_va += length;
		549	map_size -= length;
		550
		551	if (!map_size)
		552	break;
		553	}
		554	}
		555	} else {
		556	err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
		557	&sgl,
		558	&space_to_skip,
		559	&iova,
		560	gpu_va, gpu_end,
		561	kind_v, &ctag,
		562	cacheable, unmapped_pte, rw_flag,
		563	sparse, 0, priv,
		564	aperture);
		565	}
		566	} else {
		567	gmmu_dbg_v(g,
		568	"pgsz=%-6d, gpu_va: %#-12llx +%#-6llx phys: %#-12llx "
		569	"buffer offset: %-4lld, nents: %d",
		570	page_size,
		571	gpu_va, gpu_end - gpu_va,
		572	sgt ? g->ops.mm.get_iova_addr(g, sgt->sgl, 0) : 0ULL,
		573	buffer_offset,
		574	sgt ? sgt->nents : 0);
		575
		576	if (sgt) {
		577	iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
		578	if (!vm->mm->bypass_smmu && iova) {
		579	iova += space_to_skip;
		580	} else {
		581	sgl = sgt->sgl;
		582
		583	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
		584	(u64)sg_phys(sgl),
		585	sgl->length);
		586
		587	while (space_to_skip && sgl &&
		588	space_to_skip + page_size > sgl->length) {
		589	space_to_skip -= sgl->length;
		590	sgl = sg_next(sgl);
		591	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
		592	(u64)sg_phys(sgl),
		593	sgl->length);
		594	}
		595
		596	iova = sg_phys(sgl) + space_to_skip;
		597	}
		598	}
		599
		600	err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
		601	&sgl,
		602	&space_to_skip,
		603	&iova,
		604	gpu_va, gpu_end,
		605	kind_v, &ctag,
		606	cacheable, unmapped_pte, rw_flag,
		607	sparse, 0, priv,
		608	aperture);
		609	}
		610
		611	unmap_gmmu_pages(g, &vm->pdb);
		612
		613	mb();
		614
		615	gk20a_dbg_fn("done");
		616
		617	return err;
		618	}
		619
		620	/**
		621	* gk20a_locked_gmmu_map - Map a buffer into the GMMU
		622	*
		623	* This is for non-vGPU chips. It's part of the HAL at the moment but really
		624	* should not be. Chip specific stuff is handled at the PTE/PDE programming
		625	* layer. The rest of the logic is essentially generic for all chips.
		626	*
		627	* To call this function you must have locked the VM lock: vm->update_gmmu_lock.
		628	* However, note: this function is not called directly. It's used through the
		629	* mm.gmmu_lock() HAL. So before calling the mm.gmmu_lock() HAL make sure you
		630	* have the update_gmmu_lock aquired.
		631	*/
		632	u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
		633	u64 map_offset,
		634	struct sg_table *sgt,
		635	u64 buffer_offset,
		636	u64 size,
		637	int pgsz_idx,
		638	u8 kind_v,
		639	u32 ctag_offset,
		640	u32 flags,
		641	int rw_flag,
		642	bool clear_ctags,
		643	bool sparse,
		644	bool priv,
		645	struct vm_gk20a_mapping_batch *batch,
		646	enum nvgpu_aperture aperture)
		647	{
		648	int err = 0;
		649	bool allocated = false;
		650	struct gk20a *g = gk20a_from_vm(vm);
		651	int ctag_granularity = g->ops.fb.compression_page_size(g);
		652	u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
		653
		654	/* Allocate (or validate when map_offset != 0) the virtual address. */
		655	if (!map_offset) {
		656	map_offset = __nvgpu_vm_alloc_va(vm, size,
		657	pgsz_idx);
		658	if (!map_offset) {
		659	nvgpu_err(g, "failed to allocate va space");
		660	err = -ENOMEM;
		661	goto fail_alloc;
		662	}
		663	allocated = true;
		664	}
		665
		666	gmmu_dbg(g,
		667	"gv: 0x%04x_%08x + 0x%-7llx "
		668	"[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
		669	"pgsz=%-3dKb as=%-2d ctags=%d start=%d "
		670	"kind=0x%x flags=0x%x apt=%s",
		671	u64_hi32(map_offset), u64_lo32(map_offset), size,
		672	sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
		673	sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
		674	sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
		675	sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
		676	vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
		677	ctag_lines, ctag_offset,
		678	kind_v, flags, nvgpu_aperture_str(aperture));
		679
		680	err = update_gmmu_ptes_locked(vm, pgsz_idx,
		681	sgt,
		682	buffer_offset,
		683	map_offset, map_offset + size,
		684	kind_v,
		685	ctag_offset,
		686	flags &
		687	NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
		688	flags &
		689	NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE,
		690	rw_flag,
		691	sparse,
		692	priv,
		693	aperture);
		694	if (err) {
		695	nvgpu_err(g, "failed to update ptes on map");
		696	goto fail_validate;
		697	}
		698
		699	if (!batch)
		700	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
		701	else
		702	batch->need_tlb_invalidate = true;
		703
		704	return map_offset;
		705	fail_validate:
		706	if (allocated)
		707	__nvgpu_vm_free_va(vm, map_offset, pgsz_idx);
		708	fail_alloc:
		709	nvgpu_err(g, "%s: failed with err=%d", __func__, err);
		710	return 0;
		711	}
		712
		713	void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
		714	u64 vaddr,
		715	u64 size,
		716	int pgsz_idx,
		717	bool va_allocated,
		718	int rw_flag,
		719	bool sparse,
		720	struct vm_gk20a_mapping_batch *batch)
		721	{
		722	int err = 0;
		723	struct gk20a *g = gk20a_from_vm(vm);
		724
		725	if (va_allocated) {
		726	err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
		727	if (err) {
		728	nvgpu_err(g, "failed to free va");
		729	return;
		730	}
		731	}
		732
		733	/* unmap here needs to know the page size we assigned at mapping */
		734	err = update_gmmu_ptes_locked(vm,
		735	pgsz_idx,
		736	NULL, /* n/a for unmap */
		737	0,
		738	vaddr,
		739	vaddr + size,
		740	0, 0, false /* n/a for unmap */,
		741	false, rw_flag,
		742	sparse, 0,
		743	APERTURE_INVALID); /* don't care for unmap */
		744	if (err)
		745	nvgpu_err(g, "failed to update gmmu ptes on unmap");
		746
		747	/* flush l2 so any dirty lines are written out now.
		748	* also as we could potentially be switching this buffer
		749	* from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
		750	* some point in the future we need to invalidate l2. e.g. switching
		751	* from a render buffer unmap (here) to later using the same memory
		752	* for gmmu ptes. note the positioning of this relative to any smmu
		753	* unmapping (below). */
		754
		755	if (!batch) {
		756	gk20a_mm_l2_flush(g, true);
		757	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
		758	} else {
		759	if (!batch->gpu_l2_flushed) {
		760	gk20a_mm_l2_flush(g, true);
		761	batch->gpu_l2_flushed = true;
		762	}
		763	batch->need_tlb_invalidate = true;
		764	}
		765	}


diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index e24d40bf..5ba386c9 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -18,6 +18,7 @@
18	#include <nvgpu/dma.h>	18	#include <nvgpu/dma.h>
19	#include <nvgpu/vm.h>	19	#include <nvgpu/vm.h>
20	#include <nvgpu/vm_area.h>	20	#include <nvgpu/vm_area.h>
		21	#include <nvgpu/gmmu.h>
21	#include <nvgpu/lock.h>	22	#include <nvgpu/lock.h>
22	#include <nvgpu/list.h>	23	#include <nvgpu/list.h>
23	#include <nvgpu/rbtree.h>	24	#include <nvgpu/rbtree.h>
@@ -34,6 +35,22 @@ int vm_aspace_id(struct vm_gk20a *vm)
34	return vm->as_share ? vm->as_share->id : -1;	35	return vm->as_share ? vm->as_share->id : -1;
35	}	36	}
36		37
		38	static void nvgpu_vm_free_entries(struct vm_gk20a *vm,
		39	struct gk20a_mm_entry *parent,
		40	int level)
		41	{
		42	int i;
		43
		44	if (parent->entries)
		45	for (i = 0; i < parent->num_entries; i++)
		46	nvgpu_vm_free_entries(vm, &parent->entries[i], level+1);
		47
		48	if (parent->mem.size)
		49	nvgpu_free_gmmu_pages(vm, parent);
		50	nvgpu_vfree(vm->mm->g, parent->entries);
		51	parent->entries = NULL;
		52	}
		53
37	u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,	54	u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,
38	enum gmmu_pgsz_gk20a pgsz_idx)	55	enum gmmu_pgsz_gk20a pgsz_idx)
39		56
@@ -421,7 +438,7 @@ clean_up_allocators:
421	clean_up_page_tables:	438	clean_up_page_tables:
422	/* Cleans up nvgpu_vm_init_page_tables() */	439	/* Cleans up nvgpu_vm_init_page_tables() */
423	nvgpu_vfree(g, vm->pdb.entries);	440	nvgpu_vfree(g, vm->pdb.entries);
424	free_gmmu_pages(vm, &vm->pdb);	441	nvgpu_free_gmmu_pages(vm, &vm->pdb);
425	clean_up_vgpu_vm:	442	clean_up_vgpu_vm:
426	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION	443	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
427	if (g->is_virtual)	444	if (g->is_virtual)
@@ -537,7 +554,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
537	if (nvgpu_alloc_initialized(&vm->user_lp))	554	if (nvgpu_alloc_initialized(&vm->user_lp))
538	nvgpu_alloc_destroy(&vm->user_lp);	555	nvgpu_alloc_destroy(&vm->user_lp);
539		556
540	gk20a_vm_free_entries(vm, &vm->pdb, 0);	557	nvgpu_vm_free_entries(vm, &vm->pdb, 0);
541		558
542	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION	559	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
543	if (g->is_virtual)	560	if (g->is_virtual)