7 files changed, 81 insertions, 332 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index d5d75be5..cf95019b 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -46,6 +46,12 @@
 #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
+/*
+ * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the
+ * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem.
+ */
+#include "common/linux/vm_priv.h"
 static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
 static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g);
@@ -1016,8 +1022,8 @@ __releases(&cde_app->mutex)
        /* map the destination buffer */
-        get_dma_buf(compbits_scatter_buf); /* a ref for gk20a_vm_map */
+        get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
-        map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
+        map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
                                 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
                                 compbits_kind, NULL, true,
                                 gk20a_mem_flag_none,
@@ -1136,7 +1142,7 @@ __releases(&cde_app->mutex)
        cde_ctx->init_cmd_executed = true;
        /* unmap the buffers - channel holds references to them now */
-        gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
+        nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
        return err;
@@ -1144,7 +1150,7 @@ exit_unmap_surface:
        if (surface)
                dma_buf_vunmap(compbits_scatter_buf, surface);
 exit_unmap_vaddr:
-        gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
+        nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
 exit_idle:
        gk20a_idle(g);
        return err;
@@ -1277,7 +1283,7 @@ err_init_cde_img:
        nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
 err_map_backingstore:
 err_alloc_gpfifo:
-        gk20a_vm_put(ch->vm);
+        nvgpu_vm_put(ch->vm);
 err_commit_va:
 err_get_gk20a_channel:
        nvgpu_release_firmware(g, img);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 31358468..b7fb363e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -39,6 +39,13 @@
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
 /*
+ * This is required for nvgpu_vm_find_buffer() which is used in the tracing
+ * code. Once we can get and access userspace buffers without requiring
+ * direct dma_buf usage this can be removed.
+ */
+#include "common/linux/vm_priv.h"
+/*
 * Although channels do have pointers back to the gk20a struct that they were
 * created under in cases where the driver is killed that pointer can be bad.
 * The channel memory can be freed before the release() function for a given
@@ -550,7 +557,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
        /*
         * When releasing the channel we unbind the VM - so release the ref.
         */
-        gk20a_vm_put(ch_vm);
+        nvgpu_vm_put(ch_vm);
        nvgpu_spinlock_acquire(&ch->update_fn_lock);
        ch->update_fn = NULL;
@@ -1399,7 +1406,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
                int err;
                words = pbdma_gp_entry1_length_v(g->entry1);
-                err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
+                err = nvgpu_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
                if (!err)
                        mem = dma_buf_vmap(dmabuf);
        }
@@ -1901,7 +1908,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
        bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
        if (!skip_buffer_refcounting) {
-                err = gk20a_vm_get_buffers(vm, &mapped_buffers,
+                err = nvgpu_vm_get_buffers(vm, &mapped_buffers,
                                        &num_mapped_buffers);
                if (err)
                        return err;
@@ -1940,7 +1947,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
        return 0;
 err_put_buffers:
-        gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
+        nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
        return err;
 }
@@ -2039,7 +2046,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
                }
                if (job->num_mapped_buffers)
-                        gk20a_vm_put_buffers(vm, job->mapped_buffers,
+                        nvgpu_vm_put_buffers(vm, job->mapped_buffers,
                                job->num_mapped_buffers);
                /* Remove job from channel's job list before we close the
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index a08eb047..5351750a 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
+#include <nvgpu/vm.h>
 #include "gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -38,6 +39,14 @@
 #include <nvgpu/hw/gk20a/hw_perf_gk20a.h>
 /*
+ * Currently this code uses nvgpu_vm_map_buffer() since it takes dmabuf FDs from
+ * the dbg ioctls. That has to change; this needs to hide the usage of dmabufs
+ * in Linux specific code. All core driver usage of mapping must be done through
+ * nvgpu_gmmu_map().
+ */
+#include "common/linux/vm_priv.h"
+/*
 * API to get first channel from the list of all channels
 * bound to the debug session
 */
@@ -1844,7 +1853,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
                return -EBUSY;
        }
-        err = gk20a_init_vm(mm, vm, big_page_size,
+        err = nvgpu_init_vm(mm, vm, big_page_size,
                        big_page_size << 10,
                        NV_MM_DEFAULT_KERNEL_SIZE,
                        NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -1860,7 +1869,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
        g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0);
-        err = gk20a_vm_map_buffer(vm,
+        err = nvgpu_vm_map_buffer(vm,
                        args->dmabuf_fd,
                        &args->offset,
                        0,
@@ -1913,7 +1922,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
        return 0;
 err_unmap:
-        gk20a_vm_unmap_buffer(vm, args->offset, NULL);
+        nvgpu_vm_unmap_buffer(vm, args->offset, NULL);
 err_remove_vm:
        gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
        nvgpu_mutex_release(&g->dbg_sessions_lock);
@@ -1952,7 +1961,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
        err = gk20a_perfbuf_disable_locked(g);
-        gk20a_vm_unmap_buffer(vm, offset, NULL);
+        nvgpu_vm_unmap_buffer(vm, offset, NULL);
        gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
        g->perfbuf.owner = NULL;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4d41f9ff..9bd07894 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3182,14 +3182,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
        } else {
                if (!tsg->tsg_gr_ctx) {
                        tsg->vm = c->vm;
-                        gk20a_vm_get(tsg->vm);
+                        nvgpu_vm_get(tsg->vm);
                        err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg,
                                                        args->class_num,
                                                        args->flags);
                        if (err) {
                                nvgpu_err(g,
                                        "fail to allocate TSG gr ctx buffer");
-                                gk20a_vm_put(tsg->vm);
+                                nvgpu_vm_put(tsg->vm);
                                tsg->vm = NULL;
                                goto out;
                        }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 201c2090..72a3ee13 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -55,6 +55,12 @@
 #include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
+/*
+ * Necessary while transitioning to less coupled code. Will be removed once
+ * all the common APIs no longers have Linux stuff in them.
+ */
+#include "common/linux/vm_priv.h"
 #if defined(CONFIG_GK20A_VIDMEM)
 static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);
 #endif
@@ -177,8 +183,6 @@ struct gk20a_vidmem_buf {
        void (*dmabuf_priv_delete)(void *);
 };
-static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
 static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
                u32 *offset, u32 len)
 {
@@ -460,16 +464,6 @@ static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
        return 0;
 }
-void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
-{
-        struct gk20a *g = vm->mm->g;
-        gk20a_dbg_fn("");
-        gk20a_free_inst_block(g, inst_block);
-        gk20a_vm_remove_support_nofree(vm);
-}
 static void gk20a_vidmem_destroy(struct gk20a *g)
 {
 #if defined(CONFIG_GK20A_VIDMEM)
@@ -487,7 +481,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
        mm->vidmem.ce_ctx_id = (u32)~0;
-        gk20a_vm_remove_support_nofree(&mm->ce.vm);
+        nvgpu_vm_remove_support_nofree(&mm->ce.vm);
 }
@@ -503,7 +497,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
        gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
        gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
-        gk20a_vm_remove_support_nofree(&mm->cde.vm);
+        nvgpu_vm_remove_support_nofree(&mm->cde.vm);
        gk20a_semaphore_sea_destroy(g);
        gk20a_vidmem_destroy(g);
@@ -1102,7 +1096,7 @@ static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
        return NULL;
 }
-int gk20a_vm_get_buffers(struct vm_gk20a *vm,
+int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
                         struct mapped_buffer_node ***mapped_buffers,
                         int *num_buffers)
 {
@@ -1151,37 +1145,10 @@ static void gk20a_vm_unmap_locked_kref(struct kref *ref)
 {
        struct mapped_buffer_node *mapped_buffer =
                container_of(ref, struct mapped_buffer_node, ref);
-        gk20a_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
+        nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
-}
-void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
-{
-        memset(mapping_batch, 0, sizeof(*mapping_batch));
-        mapping_batch->gpu_l2_flushed = false;
-        mapping_batch->need_tlb_invalidate = false;
 }
-void gk20a_vm_mapping_batch_finish_locked(
+void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
-        struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch)
-{
-         /* hanging kref_put batch pointer? */
-        WARN_ON(vm->kref_put_batch == mapping_batch);
-        if (mapping_batch->need_tlb_invalidate) {
-                struct gk20a *g = gk20a_from_vm(vm);
-                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
-        }
-}
-void gk20a_vm_mapping_batch_finish(struct vm_gk20a *vm,
-                                   struct vm_gk20a_mapping_batch *mapping_batch)
-{
-        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-        gk20a_vm_mapping_batch_finish_locked(vm, mapping_batch);
-        nvgpu_mutex_release(&vm->update_gmmu_lock);
-}
-void gk20a_vm_put_buffers(struct vm_gk20a *vm,
                                 struct mapped_buffer_node **mapped_buffers,
                                 int num_buffers)
 {
@@ -1192,7 +1159,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
                return;
        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-        gk20a_vm_mapping_batch_start(&batch);
+        nvgpu_vm_mapping_batch_start(&batch);
        vm->kref_put_batch = &batch;
        for (i = 0; i < num_buffers; ++i)
@@ -1200,13 +1167,13 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
                         gk20a_vm_unmap_locked_kref);
        vm->kref_put_batch = NULL;
-        gk20a_vm_mapping_batch_finish_locked(vm, &batch);
+        nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
        nvgpu_mutex_release(&vm->update_gmmu_lock);
        nvgpu_big_free(vm->mm->g, mapped_buffers);
 }
-static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
+static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
                                struct vm_gk20a_mapping_batch *batch)
 {
        struct gk20a *g = vm->mm->g;
@@ -1650,7 +1617,7 @@ static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
        }
 }
-static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
+static u64 nvgpu_vm_map_duplicate_locked(struct vm_gk20a *vm,
                                         struct dma_buf *dmabuf,
                                         u64 offset_align,
                                         u32 flags,
@@ -1997,7 +1964,7 @@ static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl,
        return align;
 }
-u64 gk20a_vm_map(struct vm_gk20a *vm,
+u64 nvgpu_vm_map(struct vm_gk20a *vm,
                        struct dma_buf *dmabuf,
                        u64 offset_align,
                        u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
@@ -2038,7 +2005,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        /* check if this buffer is already mapped */
        if (!vm->userspace_managed) {
-                map_offset = gk20a_vm_map_duplicate_locked(
+                map_offset = nvgpu_vm_map_duplicate_locked(
                        vm, dmabuf, offset_align,
                        flags, kind, sgt,
                        user_mapped, rw_flag);
@@ -2256,7 +2223,7 @@ clean_up:
        return 0;
 }
-int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
+int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
                               u64 mapping_gva,
                               u64 *compbits_win_size,
                               u32 *compbits_win_ctagline,
@@ -2298,7 +2265,7 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
 }
-int gk20a_vm_map_compbits(struct vm_gk20a *vm,
+int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
                          u64 mapping_gva,
                          u64 *compbits_win_gva,
                          u64 *mapping_iova,
@@ -3059,7 +3026,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
 }
 /* NOTE! mapped_buffers lock must be held */
-void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
+void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
                           struct vm_gk20a_mapping_batch *batch)
 {
        struct vm_gk20a *vm = mapped_buffer->vm;
@@ -3115,7 +3082,7 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
        return;
 }
-void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
 {
        struct gk20a *g = vm->mm->g;
        struct mapped_buffer_node *mapped_buffer;
@@ -3148,76 +3115,6 @@ static void gk20a_vm_free_entries(struct vm_gk20a *vm,
        parent->entries = NULL;
 }
-static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
-{
-        struct mapped_buffer_node *mapped_buffer;
-        struct vm_reserved_va_node *va_node, *va_node_tmp;
-        struct nvgpu_rbtree_node *node = NULL;
-        struct gk20a *g = vm->mm->g;
-        gk20a_dbg_fn("");
-        /*
-         * Do this outside of the update_gmmu_lock since unmapping the semaphore
-         * pool involves unmapping a GMMU mapping which means aquiring the
-         * update_gmmu_lock.
-         */
-        if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) {
-                if (vm->sema_pool) {
-                        nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
-                        nvgpu_semaphore_pool_put(vm->sema_pool);
-                }
-        }
-        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-        /* TBD: add a flag here for the unmap code to recognize teardown
-         * and short-circuit any otherwise expensive operations. */
-        nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
-        while (node) {
-                mapped_buffer = mapped_buffer_from_rbtree_node(node);
-                gk20a_vm_unmap_locked(mapped_buffer, NULL);
-                nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
-        }
-        /* destroy remaining reserved memory areas */
-        nvgpu_list_for_each_entry_safe(va_node, va_node_tmp,
-                        &vm->reserved_va_list,
-                        vm_reserved_va_node, reserved_va_list) {
-                nvgpu_list_del(&va_node->reserved_va_list);
-                nvgpu_kfree(vm->mm->g, va_node);
-        }
-        gk20a_deinit_vm(vm);
-        nvgpu_mutex_release(&vm->update_gmmu_lock);
-}
-void gk20a_vm_remove_support(struct vm_gk20a *vm)
-{
-        gk20a_vm_remove_support_nofree(vm);
-        /* vm is not used anymore. release it. */
-        nvgpu_kfree(vm->mm->g, vm);
-}
-static void gk20a_vm_remove_support_kref(struct kref *ref)
-{
-        struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
-        struct gk20a *g = gk20a_from_vm(vm);
-        g->ops.mm.vm_remove(vm);
-}
-void gk20a_vm_get(struct vm_gk20a *vm)
-{
-        kref_get(&vm->ref);
-}
-void gk20a_vm_put(struct vm_gk20a *vm)
-{
-        kref_put(&vm->ref, gk20a_vm_remove_support_kref);
-}
 const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
        {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
         .lo_bit = {26, 26},
@@ -3284,7 +3181,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
                                             SZ_4K);
        if (!sema_sea->gpu_va) {
                nvgpu_free(&vm->kernel, sema_sea->gpu_va);
-                gk20a_vm_put(vm);
+                nvgpu_vm_put(vm);
                return -ENOMEM;
        }
@@ -3408,7 +3305,7 @@ static int init_vm_page_tables(struct vm_gk20a *vm)
 }
 /**
- * gk20a_init_vm() - Initialize an address space.
+ * nvgpu_init_vm() - Initialize an address space.
 *
 * @mm - Parent MM.
 * @vm - The VM to init.
@@ -3443,7 +3340,7 @@ static int init_vm_page_tables(struct vm_gk20a *vm)
 * such cases the @kernel_reserved and @low_hole should sum to exactly
 * @aperture_size.
 */
-int gk20a_init_vm(struct mm_gk20a *mm,
+int nvgpu_init_vm(struct mm_gk20a *mm,
                struct vm_gk20a *vm,
                u32 big_page_size,
                u64 low_hole,
@@ -3683,7 +3580,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
        snprintf(name, sizeof(name), "as_%d", as_share->id);
-        err = gk20a_init_vm(mm, vm, big_page_size,
+        err = nvgpu_init_vm(mm, vm, big_page_size,
                            big_page_size << 10,
                            mm->channel.kernel_size,
                            mm->channel.user_size + mm->channel.kernel_size,
@@ -3701,7 +3598,7 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
        vm->as_share = NULL;
        as_share->vm = NULL;
-        gk20a_vm_put(vm);
+        nvgpu_vm_put(vm);
        return 0;
 }
@@ -3864,7 +3761,7 @@ int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
        gk20a_dbg_fn("");
-        gk20a_vm_get(vm);
+        nvgpu_vm_get(vm);
        ch->vm = vm;
        err = channel_gk20a_commit_va(ch);
        if (err)
@@ -3960,7 +3857,7 @@ out:
 }
-int gk20a_vm_map_buffer(struct vm_gk20a *vm,
+int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
                        int dmabuf_fd,
                        u64 *offset_align,
                        u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
@@ -3989,7 +3886,7 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
                return err;
        }
-        ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
+        ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
                        flags, kind, NULL, true,
                        gk20a_mem_flag_none,
                        buffer_offset,
@@ -4005,16 +3902,16 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
        return err;
 }
-int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
+int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
                          struct vm_gk20a_mapping_batch *batch)
 {
        gk20a_dbg_fn("");
-        gk20a_vm_unmap_user(vm, offset, batch);
+        nvgpu_vm_unmap_user(vm, offset, batch);
        return 0;
 }
-void gk20a_deinit_vm(struct vm_gk20a *vm)
+void nvgpu_deinit_vm(struct vm_gk20a *vm)
 {
        if (nvgpu_alloc_initialized(&vm->kernel))
                nvgpu_alloc_destroy(&vm->kernel);
@@ -4069,7 +3966,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
        mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
        gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-        gk20a_init_vm(mm, vm,
+        nvgpu_init_vm(mm, vm,
                      big_page_size,
                      SZ_4K,                            /* Low hole */
                      mm->bar1.aperture_size - SZ_4K,   /* Kernel reserved. */
@@ -4085,7 +3982,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
        return 0;
 clean_up_va:
-        gk20a_deinit_vm(vm);
+        nvgpu_deinit_vm(vm);
        return err;
 }
@@ -4108,7 +4005,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
        mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
        gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
-        gk20a_init_vm(mm, vm, big_page_size,
+        nvgpu_init_vm(mm, vm, big_page_size,
                      low_hole,
                      aperture_size - low_hole,
                      aperture_size,
@@ -4124,7 +4021,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
        return 0;
 clean_up_va:
-        gk20a_deinit_vm(vm);
+        nvgpu_deinit_vm(vm);
        return err;
 }
@@ -4149,7 +4046,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
        struct gk20a *g = gk20a_from_mm(mm);
        u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
-        return gk20a_init_vm(mm, vm, big_page_size,
+        return nvgpu_init_vm(mm, vm, big_page_size,
                        big_page_size << 10,
                        NV_MM_DEFAULT_KERNEL_SIZE,
                        NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -4162,7 +4059,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
        struct gk20a *g = gk20a_from_mm(mm);
        u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
-        return gk20a_init_vm(mm, vm, big_page_size,
+        return nvgpu_init_vm(mm, vm, big_page_size,
                        big_page_size << 10,
                        NV_MM_DEFAULT_KERNEL_SIZE,
                        NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -4399,7 +4296,7 @@ hw_was_off:
        gk20a_idle_nosuspend(g->dev);
 }
-int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
+int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
                         struct dma_buf **dmabuf,
                         u64 *offset)
 {
@@ -4503,7 +4400,7 @@ void gk20a_init_mm(struct gpu_ops *gops)
 {
        gops->mm.gmmu_map = gk20a_locked_gmmu_map;
        gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
-        gops->mm.vm_remove = gk20a_vm_remove_support;
+        gops->mm.vm_remove = nvgpu_vm_remove_support;
        gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
        gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
        gops->mm.fb_flush = gk20a_mm_fb_flush;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 0a102cb2..331843cc 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -26,6 +26,7 @@
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/allocator.h>
+#include <nvgpu/vm.h>
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/kref.h>
@@ -116,13 +117,6 @@ gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
                ((uintptr_t)node - offsetof(struct gk20a_buffer_state, list));
 };
-enum gmmu_pgsz_gk20a {
-        gmmu_page_size_small  = 0,
-        gmmu_page_size_big    = 1,
-        gmmu_page_size_kernel = 2,
-        gmmu_nr_page_sizes    = 3,
-};
 struct gk20a_comptags {
        u32 offset;
        u32 lines;
@@ -130,15 +124,6 @@ struct gk20a_comptags {
        bool user_mappable;
 };
-struct gk20a_mm_entry {
-        /* backing for */
-        struct nvgpu_mem mem;
-        u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
-        int pgsz;
-        struct gk20a_mm_entry *entries;
-        int num_entries;
-};
 struct priv_cmd_queue {
        struct nvgpu_mem mem;
        u32 size;       /* num of entries in words */
@@ -214,84 +199,6 @@ vm_reserved_va_node_from_reserved_va_list(struct nvgpu_list_node *node)
                ((uintptr_t)node - offsetof(struct vm_reserved_va_node, reserved_va_list));
 };
-struct gk20a_mmu_level {
-        int hi_bit[2];
-        int lo_bit[2];
-        int (*update_entry)(struct vm_gk20a *vm,
-                           struct gk20a_mm_entry *pte,
-                           u32 i, u32 gmmu_pgsz_idx,
-                           struct scatterlist **sgl,
-                           u64 *offset,
-                           u64 *iova,
-                           u32 kind_v, u64 *ctag,
-                           bool cacheable, bool unmapped_pte,
-                           int rw_flag, bool sparse, bool priv,
-                           enum nvgpu_aperture aperture);
-        size_t entry_size;
-};
-/* map/unmap batch state */
-struct vm_gk20a_mapping_batch
-{
-        bool gpu_l2_flushed;
-        bool need_tlb_invalidate;
-};
-struct vm_gk20a {
-        struct mm_gk20a *mm;
-        struct gk20a_as_share *as_share; /* as_share this represents */
-        u64 va_start;
-        u64 va_limit;
-        int num_user_mapped_buffers;
-        bool big_pages;   /* enable large page support */
-        bool enable_ctag;
-        bool mapped;
-        u32 big_page_size;
-        bool userspace_managed;
-        const struct gk20a_mmu_level *mmu_levels;
-        struct kref ref;
-        struct nvgpu_mutex update_gmmu_lock;
-        struct gk20a_mm_entry pdb;
-        /*
-         * These structs define the address spaces. In some cases it's possible
-         * to merge address spaces (user and user_lp) and in other cases it's
-         * not. vma[] allows the code to be agnostic to this by always using
-         * address spaces through this pointer array.
-         */
-        struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
-        struct nvgpu_allocator kernel;
-        struct nvgpu_allocator user;
-        struct nvgpu_allocator user_lp;
-        struct nvgpu_rbtree_node *mapped_buffers;
-        struct nvgpu_list_node reserved_va_list;
-#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
-        u64 handle;
-#endif
-        u32 gmmu_page_sizes[gmmu_nr_page_sizes];
-        /* if non-NULL, kref_put will use this batch when
-           unmapping. Must hold vm->update_gmmu_lock. */
-        struct vm_gk20a_mapping_batch *kref_put_batch;
-        /*
-         * Each address space needs to have a semaphore pool.
-         */
-        struct nvgpu_semaphore_pool *sema_pool;
-};
 struct gk20a;
 struct channel_gk20a;
@@ -562,57 +469,13 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
 void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
                    struct sg_table *sgt);
-u64 gk20a_vm_map(struct vm_gk20a *vm,
+int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
-                struct dma_buf *dmabuf,
-                u64 offset_align,
-                u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
-                int kind,
-                struct sg_table **sgt,
-                bool user_mapped,
-                int rw_flag,
-                 u64 buffer_offset,
-                 u64 mapping_size,
-                 struct vm_gk20a_mapping_batch *mapping_batch);
-int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
                               u64 mapping_gva,
                               u64 *compbits_win_size,
                               u32 *compbits_win_ctagline,
                               u32 *mapping_ctagline,
                               u32 *flags);
-int gk20a_vm_map_compbits(struct vm_gk20a *vm,
-                          u64 mapping_gva,
-                          u64 *compbits_win_gva,
-                          u64 *mapping_iova,
-                          u32 flags);
-/* unmap handle from kernel */
-void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
-void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
-                           struct vm_gk20a_mapping_batch *batch);
-/* get reference to all currently mapped buffers */
-int gk20a_vm_get_buffers(struct vm_gk20a *vm,
-                         struct mapped_buffer_node ***mapped_buffers,
-                         int *num_buffers);
-/* put references on the given buffers */
-void gk20a_vm_put_buffers(struct vm_gk20a *vm,
-                          struct mapped_buffer_node **mapped_buffers,
-                          int num_buffers);
-/* find buffer corresponding to va */
-int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
-                         struct dma_buf **dmabuf,
-                         u64 *offset);
-void gk20a_vm_get(struct vm_gk20a *vm);
-void gk20a_vm_put(struct vm_gk20a *vm);
-void gk20a_vm_remove_support(struct vm_gk20a *vm);
 u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
                     u64 size,
                     enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
@@ -635,44 +498,11 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
                          struct channel_gk20a *ch);
 int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch);
-/* batching eliminates redundant cache flushes and invalidates */
-void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
-void gk20a_vm_mapping_batch_finish(
-        struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
-/* called when holding vm->update_gmmu_lock */
-void gk20a_vm_mapping_batch_finish_locked(
-        struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
 int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes);
 int gk20a_vidmem_get_space(struct gk20a *g, u64 *space);
 int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
                void *buffer, u64 offset, u64 size, u32 cmd);
-/* Note: batch may be NULL if map op is not part of a batch */
-int gk20a_vm_map_buffer(struct vm_gk20a *vm,
-                        int dmabuf_fd,
-                        u64 *offset_align,
-                        u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
-                        int kind,
-                        u64 buffer_offset,
-                        u64 mapping_size,
-                        struct vm_gk20a_mapping_batch *batch);
-int gk20a_init_vm(struct mm_gk20a *mm,
-                struct vm_gk20a *vm,
-                u32 big_page_size,
-                u64 low_hole,
-                u64 kernel_reserved,
-                u64 aperture_size,
-                bool big_pages,
-                bool userspace_managed,
-                char *name);
-void gk20a_deinit_vm(struct vm_gk20a *vm);
-/* Note: batch may be NULL if unmap op is not part of a batch */
-int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
-                          struct vm_gk20a_mapping_batch *batch);
 void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
                        struct gk20a_comptags *comptags);
 dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index d1e667b6..f9884cfb 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -289,7 +289,7 @@ void gk20a_tsg_release(struct kref *ref)
                tsg->tsg_gr_ctx = NULL;
        }
        if (tsg->vm) {
-                gk20a_vm_put(tsg->vm);
+                nvgpu_vm_put(tsg->vm);
                tsg->vm = NULL;
        }