From d37e8f7dcf190f31f9c0c12583db2bb0c0d313c0 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 17 Apr 2017 16:26:28 -0700
Subject: gpu: nvgpu: Split VM interface out

This patch begins the major rework of the GPU's virtual memory manager
(VMM). The VMM is the piece of code that handles the userspace interface
to buffers and their mappings into the GMMU. The core data structure is
the VM - for now still known as 'struct vm_gk20a'. Each one of these
structs represents one addres space to which channels or TSGs may bind
themselves to.

The VMM splits the interface up into two broad categories. First there's
the common, OS independent interfaces; and second there's the OS specific
interfaces.

OS independent
--------------

  This is the code that manages the lifetime of VMs, the buffers inside
  VMs (search, batch mapping) creation, destruction, etc.

OS Specific
-----------

  This handles mapping of buffers represented as they are represented by
  the OS (dma_buf's for example on Linux).

This patch is by no means complete. There's still Linux specific functions
scattered in ostensibly OS independent code. This is the first step. A
patch that rewrites everything in one go would simply be too big to
effectively review.

Instead the goal of this change is to simply separate out the basic
OS specific and OS agnostic interfaces into their own header files. The
next series of patches will start to pull the relevant implementations
into OS specific C files and common C files.

JIRA NVGPU-12
JIRA NVGPU-30

Change-Id: I242c7206047b6c769296226d855b7e44d5c4bfa8
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1464939
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu          |   1 +
 drivers/gpu/nvgpu/common/linux/ioctl_as.c |  21 +--
 drivers/gpu/nvgpu/common/linux/vm_priv.h  |  62 +++++++++
 drivers/gpu/nvgpu/common/mm/vm.c          | 129 +++++++++++++++++
 drivers/gpu/nvgpu/common/mm/vm_area.c     | 223 ++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/cde_gk20a.c       |  16 ++-
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c   |  17 ++-
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c   |  17 ++-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c        |   4 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c        | 183 ++++++------------------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h        | 174 +----------------------
 drivers/gpu/nvgpu/gk20a/tsg_gk20a.c       |   2 +-
 drivers/gpu/nvgpu/gm20b/mm_gm20b.c        |   2 +-
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c        |   4 +-
 drivers/gpu/nvgpu/include/nvgpu/gmmu.h    |  35 ++++-
 drivers/gpu/nvgpu/include/nvgpu/vm.h      | 144 +++++++++++++++++++
 drivers/gpu/nvgpu/vgpu/gr_vgpu.c          |   4 +-
 drivers/gpu/nvgpu/vgpu/mm_vgpu.c          |   6 +-
 18 files changed, 694 insertions(+), 350 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/linux/vm_priv.h
 create mode 100644 drivers/gpu/nvgpu/common/mm/vm.c
 create mode 100644 drivers/gpu/nvgpu/common/mm/vm_area.c
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/vm.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 04107dbc..0a60eece 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -45,6 +45,7 @@ nvgpu-y := \
 	common/mm/page_allocator.o \
 	common/mm/lockless_allocator.o \
 	common/mm/gmmu.o \
+	common/mm/vm.o \
 	common/pramin.o \
 	common/semaphore.o \
 	common/as.o \
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
index 4bbcedda..7a24a14f 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
@@ -28,6 +28,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/platform_gk20a.h"
 #include "ioctl_as.h"
+#include "vm_priv.h"
 
 static int gk20a_as_ioctl_bind_channel(
 		struct gk20a_as_share *as_share,
@@ -72,7 +73,7 @@ static int gk20a_as_ioctl_map_buffer_ex(
 {
 	gk20a_dbg_fn("");
 
-	return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd,
+	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
 				   &args->offset, args->flags,
 				   args->kind,
 				   args->buffer_offset,
@@ -85,7 +86,7 @@ static int gk20a_as_ioctl_map_buffer(
 		struct nvgpu_as_map_buffer_args *args)
 {
 	gk20a_dbg_fn("");
-	return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd,
+	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
 				   &args->o_a.offset,
 				   args->flags, NV_KIND_DEFAULT,
 				   0, 0, NULL);
@@ -97,7 +98,7 @@ static int gk20a_as_ioctl_unmap_buffer(
 		struct nvgpu_as_unmap_buffer_args *args)
 {
 	gk20a_dbg_fn("");
-	return gk20a_vm_unmap_buffer(as_share->vm, args->offset, NULL);
+	return nvgpu_vm_unmap_buffer(as_share->vm, args->offset, NULL);
 }
 
 static int gk20a_as_ioctl_map_buffer_batch(
@@ -123,7 +124,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
 	    args->num_maps > g->gpu_characteristics.map_buffer_batch_limit)
 		return -EINVAL;
 
-	gk20a_vm_mapping_batch_start(&batch);
+	nvgpu_vm_mapping_batch_start(&batch);
 
 	for (i = 0; i < args->num_unmaps; ++i) {
 		struct nvgpu_as_unmap_buffer_args unmap_args;
@@ -134,14 +135,14 @@ static int gk20a_as_ioctl_map_buffer_batch(
 			break;
 		}
 
-		err = gk20a_vm_unmap_buffer(as_share->vm, unmap_args.offset,
+		err = nvgpu_vm_unmap_buffer(as_share->vm, unmap_args.offset,
 					    &batch);
 		if (err)
 			break;
 	}
 
 	if (err) {
-		gk20a_vm_mapping_batch_finish(as_share->vm, &batch);
+		nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
 
 		args->num_unmaps = i;
 		args->num_maps = 0;
@@ -158,7 +159,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
 			break;
 		}
 
-		err = gk20a_vm_map_buffer(
+		err = nvgpu_vm_map_buffer(
 			as_share->vm, map_args.dmabuf_fd,
 			&map_args.offset, map_args.flags,
 			map_args.kind,
@@ -169,7 +170,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
 			break;
 	}
 
-	gk20a_vm_mapping_batch_finish(as_share->vm, &batch);
+	nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
 
 	if (err)
 		args->num_maps = i;
@@ -228,7 +229,7 @@ static int gk20a_as_ioctl_get_buffer_compbits_info(
 		struct nvgpu_as_get_buffer_compbits_info_args *args)
 {
 	gk20a_dbg_fn("");
-	return gk20a_vm_get_compbits_info(as_share->vm,
+	return nvgpu_vm_get_compbits_info(as_share->vm,
 					  args->mapping_gva,
 					  &args->compbits_win_size,
 					  &args->compbits_win_ctagline,
@@ -241,7 +242,7 @@ static int gk20a_as_ioctl_map_buffer_compbits(
 		struct nvgpu_as_map_buffer_compbits_args *args)
 {
 	gk20a_dbg_fn("");
-	return gk20a_vm_map_compbits(as_share->vm,
+	return nvgpu_vm_map_compbits(as_share->vm,
 				     args->mapping_gva,
 				     &args->compbits_win_gva,
 				     &args->mapping_iova,
diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h
new file mode 100644
index 00000000..c0fb0ffe
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __COMMON_LINUX_VM_PRIV_H__
+#define __COMMON_LINUX_VM_PRIV_H__
+
+#include <nvgpu/types.h>
+
+struct sg_table;
+struct dma_buf;
+
+struct vm_gk20a;
+struct vm_gk20a_mapping_batch;
+
+u64 nvgpu_vm_map(struct vm_gk20a *vm,
+		 struct dma_buf *dmabuf,
+		 u64 offset_align,
+		 u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
+		 int kind,
+		 struct sg_table **sgt,
+		 bool user_mapped,
+		 int rw_flag,
+		 u64 buffer_offset,
+		 u64 mapping_size,
+		 struct vm_gk20a_mapping_batch *mapping_batch);
+
+int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
+			  u64 mapping_gva,
+			  u64 *compbits_win_gva,
+			  u64 *mapping_iova,
+			  u32 flags);
+
+/* Note: batch may be NULL if map op is not part of a batch */
+int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
+			int dmabuf_fd,
+			u64 *offset_align,
+			u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
+			int kind,
+			u64 buffer_offset,
+			u64 mapping_size,
+			struct vm_gk20a_mapping_batch *batch);
+
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset);
+
+/* find buffer corresponding to va */
+int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
+			 struct dma_buf **dmabuf,
+			 u64 *offset);
+#endif
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
new file mode 100644
index 00000000..eaf30fd0
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/vm.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/list.h>
+#include <nvgpu/rbtree.h>
+#include <nvgpu/semaphore.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
+{
+	memset(mapping_batch, 0, sizeof(*mapping_batch));
+	mapping_batch->gpu_l2_flushed = false;
+	mapping_batch->need_tlb_invalidate = false;
+}
+
+void nvgpu_vm_mapping_batch_finish_locked(
+	struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch)
+{
+	/* hanging kref_put batch pointer? */
+	WARN_ON(vm->kref_put_batch == mapping_batch);
+
+	if (mapping_batch->need_tlb_invalidate) {
+		struct gk20a *g = gk20a_from_vm(vm);
+		g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
+	}
+}
+
+void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
+				   struct vm_gk20a_mapping_batch *mapping_batch)
+{
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+	nvgpu_vm_mapping_batch_finish_locked(vm, mapping_batch);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+}
+
+void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm)
+{
+	struct mapped_buffer_node *mapped_buffer;
+	struct vm_reserved_va_node *va_node, *va_node_tmp;
+	struct nvgpu_rbtree_node *node = NULL;
+	struct gk20a *g = vm->mm->g;
+
+	gk20a_dbg_fn("");
+
+	/*
+	 * Do this outside of the update_gmmu_lock since unmapping the semaphore
+	 * pool involves unmapping a GMMU mapping which means aquiring the
+	 * update_gmmu_lock.
+	 */
+	if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) {
+		if (vm->sema_pool) {
+			nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
+			nvgpu_semaphore_pool_put(vm->sema_pool);
+		}
+	}
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+	nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
+	while (node) {
+		mapped_buffer = mapped_buffer_from_rbtree_node(node);
+		nvgpu_vm_unmap_locked(mapped_buffer, NULL);
+		nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
+	}
+
+	/* destroy remaining reserved memory areas */
+	nvgpu_list_for_each_entry_safe(va_node, va_node_tmp,
+			&vm->reserved_va_list,
+			vm_reserved_va_node, reserved_va_list) {
+		nvgpu_list_del(&va_node->reserved_va_list);
+		nvgpu_kfree(vm->mm->g, va_node);
+	}
+
+	nvgpu_deinit_vm(vm);
+
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+}
+
+void nvgpu_vm_remove_support(struct vm_gk20a *vm)
+{
+	nvgpu_vm_remove_support_nofree(vm);
+	/* vm is not used anymore. release it. */
+	nvgpu_kfree(vm->mm->g, vm);
+}
+
+static void nvgpu_vm_remove_support_kref(struct kref *ref)
+{
+	struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	g->ops.mm.vm_remove(vm);
+}
+
+void nvgpu_vm_get(struct vm_gk20a *vm)
+{
+	kref_get(&vm->ref);
+}
+
+void nvgpu_vm_put(struct vm_gk20a *vm)
+{
+	kref_put(&vm->ref, nvgpu_vm_remove_support_kref);
+}
+
+void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
+{
+	struct gk20a *g = vm->mm->g;
+
+	gk20a_dbg_fn("");
+
+	gk20a_free_inst_block(g, inst_block);
+	nvgpu_vm_remove_support_nofree(vm);
+}
diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c
new file mode 100644
index 00000000..7b831947
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vm_area.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/vm.h>
+#include <nvgpu/vm_area.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+struct nvgpu_vm_area *nvgpu_vm_area_find(struct vm_gk20a *vm, u64 addr)
+{
+	struct nvgpu_vm_area *vm_area;
+
+	nvgpu_list_for_each_entry(vm_area, &vm->vm_area_list,
+				  nvgpu_vm_area, vm_area_list) {
+		if (addr >= vm_area->addr &&
+		    addr < (u64)vm_area->addr + (u64)vm_area->size)
+			return vm_area;
+	}
+
+	return NULL;
+}
+
+int nvgpu_vm_area_validate_buffer(struct vm_gk20a *vm,
+				  u64 map_addr, u64 map_size, int pgsz_idx,
+				  struct nvgpu_vm_area **pvm_area)
+{
+	struct gk20a *g = vm->mm->g;
+	struct nvgpu_vm_area *vm_area;
+	struct nvgpu_mapped_buf *buffer;
+	u64 map_end = map_addr + map_size;
+
+	/* can wrap around with insane map_size; zero is disallowed too */
+	if (map_end <= map_addr) {
+		nvgpu_warn(g, "fixed offset mapping with invalid map_size");
+		return -EINVAL;
+	}
+
+	if (map_addr & (vm->gmmu_page_sizes[pgsz_idx] - 1)) {
+		nvgpu_err(g, "map offset must be buffer page size aligned 0x%llx",
+			  map_addr);
+		return -EINVAL;
+	}
+
+	/* Find the space reservation, but it's ok to have none for
+	 * userspace-managed address spaces */
+	vm_area = nvgpu_vm_area_find(vm, map_addr);
+	if (!vm_area && !vm->userspace_managed) {
+		nvgpu_warn(g, "fixed offset mapping without space allocation");
+		return -EINVAL;
+	}
+
+	/* Mapped area should fit inside va, if there's one */
+	if (vm_area && map_end > vm_area->addr + vm_area->size) {
+		nvgpu_warn(g, "fixed offset mapping size overflows va node");
+		return -EINVAL;
+	}
+
+	/* check that this mapping does not collide with existing
+	 * mappings by checking the buffer with the highest GPU VA
+	 * that is less than our buffer end */
+	buffer = __nvgpu_vm_find_mapped_buf_less_than(
+		vm, map_addr + map_size);
+	if (buffer && buffer->addr + buffer->size > map_addr) {
+		nvgpu_warn(g, "overlapping buffer map requested");
+		return -EINVAL;
+	}
+
+	*pvm_area = vm_area;
+
+	return 0;
+}
+
+int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size,
+			u64 *addr, u32 flags)
+{
+	struct gk20a *g = vm->mm->g;
+	struct nvgpu_allocator *vma;
+	struct nvgpu_vm_area *vm_area;
+	u64 vaddr_start = 0;
+	int pgsz_idx = gmmu_page_size_small;
+
+	nvgpu_log(g, gpu_dbg_map,
+		  "ADD vm_area: pgsz=%#-8x pages=%-9u addr=%#-14llx flags=0x%x",
+		  page_size, pages, *addr, flags);
+
+	for (; pgsz_idx < gmmu_nr_page_sizes; pgsz_idx++) {
+		if (vm->gmmu_page_sizes[pgsz_idx] == page_size)
+			break;
+	}
+
+	if (pgsz_idx > gmmu_page_size_big)
+		return -EINVAL;
+
+	if (!vm->big_pages && pgsz_idx == gmmu_page_size_big)
+		return -EINVAL;
+
+	vm_area = nvgpu_kzalloc(g, sizeof(*vm_area));
+	if (!vm_area)
+		goto clean_up_err;
+
+	vma = vm->vma[pgsz_idx];
+	if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
+		vaddr_start = nvgpu_alloc_fixed(vma, *addr,
+						(u64)pages *
+						(u64)page_size,
+						page_size);
+	else
+		vaddr_start = nvgpu_alloc(vma,
+					  (u64)pages *
+					  (u64)page_size);
+
+	if (!vaddr_start)
+		goto clean_up_err;
+
+	vm_area->flags = flags;
+	vm_area->addr = vaddr_start;
+	vm_area->size = (u64)page_size * (u64)pages;
+	vm_area->pgsz_idx = pgsz_idx;
+	nvgpu_init_list_node(&vm_area->buffer_list_head);
+	nvgpu_init_list_node(&vm_area->vm_area_list);
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+	if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) {
+		u64 map_addr = g->ops.mm.gmmu_map(vm, vaddr_start,
+					 NULL,
+					 0,
+					 vm_area->size,
+					 pgsz_idx,
+					 0,
+					 0,
+					 flags,
+					 gk20a_mem_flag_none,
+					 false,
+					 true,
+					 false,
+					 NULL,
+					 APERTURE_INVALID);
+		if (!map_addr) {
+			nvgpu_mutex_release(&vm->update_gmmu_lock);
+			goto clean_up_err;
+		}
+
+		vm_area->sparse = true;
+	}
+	nvgpu_list_add_tail(&vm_area->vm_area_list, &vm->vm_area_list);
+
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+
+	*addr = vaddr_start;
+	return 0;
+
+clean_up_err:
+	if (vaddr_start)
+		nvgpu_free(vma, vaddr_start);
+	if (vm_area)
+		nvgpu_kfree(g, vm_area);
+	return -ENOMEM;
+}
+
+int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct nvgpu_mapped_buf *buffer, *n;
+	struct nvgpu_vm_area *vm_area;
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+	vm_area = nvgpu_vm_area_find(vm, addr);
+	if (!vm_area) {
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
+		return 0;
+	}
+	nvgpu_list_del(&vm_area->vm_area_list);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+
+	nvgpu_log(g, gpu_dbg_map,
+		  "DEL vm_area: pgsz=%#-8x pages=%-9llu "
+		  "addr=%#-14llx flags=0x%x",
+		  vm->gmmu_page_sizes[vm_area->pgsz_idx],
+		  vm_area->size / vm->gmmu_page_sizes[vm_area->pgsz_idx],
+		  vm_area->addr,
+		  vm_area->flags);
+
+	/* Decrement the ref count on all buffers in this vm_area. This
+	 * allows userspace to let the kernel free mappings that are
+	 * only used by this vm_area. */
+	nvgpu_list_for_each_entry_safe(buffer, n,
+				       &vm_area->buffer_list_head,
+				       nvgpu_mapped_buf, buffer_list) {
+		nvgpu_list_del(&buffer->buffer_list);
+		kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref);
+	}
+
+	/* if this was a sparse mapping, free the va */
+	if (vm_area->sparse)
+		g->ops.mm.gmmu_unmap(vm,
+				     vm_area->addr,
+				     vm_area->size,
+				     vm_area->pgsz_idx,
+				     true,
+				     gk20a_mem_flag_none,
+				     true,
+				     NULL);
+
+	nvgpu_free(vm->vma[vm_area->pgsz_idx], vm_area->addr);
+	nvgpu_kfree(g, vm_area);
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index d5d75be5..cf95019b 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -46,6 +46,12 @@
 #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
 
+/*
+ * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the
+ * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem.
+ */
+#include "common/linux/vm_priv.h"
+
 static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
 static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g);
 
@@ -1016,8 +1022,8 @@ __releases(&cde_app->mutex)
 
 
 	/* map the destination buffer */
-	get_dma_buf(compbits_scatter_buf); /* a ref for gk20a_vm_map */
-	map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
+	get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
+	map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
 				 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
 				 compbits_kind, NULL, true,
 				 gk20a_mem_flag_none,
@@ -1136,7 +1142,7 @@ __releases(&cde_app->mutex)
 	cde_ctx->init_cmd_executed = true;
 
 	/* unmap the buffers - channel holds references to them now */
-	gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
+	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
 
 	return err;
 
@@ -1144,7 +1150,7 @@ exit_unmap_surface:
 	if (surface)
 		dma_buf_vunmap(compbits_scatter_buf, surface);
 exit_unmap_vaddr:
-	gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
+	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
 exit_idle:
 	gk20a_idle(g);
 	return err;
@@ -1277,7 +1283,7 @@ err_init_cde_img:
 	nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
 err_map_backingstore:
 err_alloc_gpfifo:
-	gk20a_vm_put(ch->vm);
+	nvgpu_vm_put(ch->vm);
 err_commit_va:
 err_get_gk20a_channel:
 	nvgpu_release_firmware(g, img);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 31358468..b7fb363e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -38,6 +38,13 @@
 
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
 
+/*
+ * This is required for nvgpu_vm_find_buffer() which is used in the tracing
+ * code. Once we can get and access userspace buffers without requiring
+ * direct dma_buf usage this can be removed.
+ */
+#include "common/linux/vm_priv.h"
+
 /*
  * Although channels do have pointers back to the gk20a struct that they were
  * created under in cases where the driver is killed that pointer can be bad.
@@ -550,7 +557,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	/*
 	 * When releasing the channel we unbind the VM - so release the ref.
 	 */
-	gk20a_vm_put(ch_vm);
+	nvgpu_vm_put(ch_vm);
 
 	nvgpu_spinlock_acquire(&ch->update_fn_lock);
 	ch->update_fn = NULL;
@@ -1399,7 +1406,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
 		int err;
 
 		words = pbdma_gp_entry1_length_v(g->entry1);
-		err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
+		err = nvgpu_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
 		if (!err)
 			mem = dma_buf_vmap(dmabuf);
 	}
@@ -1901,7 +1908,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
 	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
 
 	if (!skip_buffer_refcounting) {
-		err = gk20a_vm_get_buffers(vm, &mapped_buffers,
+		err = nvgpu_vm_get_buffers(vm, &mapped_buffers,
 					&num_mapped_buffers);
 		if (err)
 			return err;
@@ -1940,7 +1947,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
 	return 0;
 
 err_put_buffers:
-	gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
+	nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
 
 	return err;
 }
@@ -2039,7 +2046,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 		}
 
 		if (job->num_mapped_buffers)
-			gk20a_vm_put_buffers(vm, job->mapped_buffers,
+			nvgpu_vm_put_buffers(vm, job->mapped_buffers,
 				job->num_mapped_buffers);
 
 		/* Remove job from channel's job list before we close the
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index a08eb047..5351750a 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
 
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
+#include <nvgpu/vm.h>
 
 #include "gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -37,6 +38,14 @@
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_perf_gk20a.h>
 
+/*
+ * Currently this code uses nvgpu_vm_map_buffer() since it takes dmabuf FDs from
+ * the dbg ioctls. That has to change; this needs to hide the usage of dmabufs
+ * in Linux specific code. All core driver usage of mapping must be done through
+ * nvgpu_gmmu_map().
+ */
+#include "common/linux/vm_priv.h"
+
 /*
  * API to get first channel from the list of all channels
  * bound to the debug session
@@ -1844,7 +1853,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 		return -EBUSY;
 	}
 
-	err = gk20a_init_vm(mm, vm, big_page_size,
+	err = nvgpu_init_vm(mm, vm, big_page_size,
 			big_page_size << 10,
 			NV_MM_DEFAULT_KERNEL_SIZE,
 			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -1860,7 +1869,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 
 	g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0);
 
-	err = gk20a_vm_map_buffer(vm,
+	err = nvgpu_vm_map_buffer(vm,
 			args->dmabuf_fd,
 			&args->offset,
 			0,
@@ -1913,7 +1922,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 	return 0;
 
 err_unmap:
-	gk20a_vm_unmap_buffer(vm, args->offset, NULL);
+	nvgpu_vm_unmap_buffer(vm, args->offset, NULL);
 err_remove_vm:
 	gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
@@ -1952,7 +1961,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
 
 	err = gk20a_perfbuf_disable_locked(g);
 
-	gk20a_vm_unmap_buffer(vm, offset, NULL);
+	nvgpu_vm_unmap_buffer(vm, offset, NULL);
 	gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
 
 	g->perfbuf.owner = NULL;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4d41f9ff..9bd07894 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3182,14 +3182,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
 	} else {
 		if (!tsg->tsg_gr_ctx) {
 			tsg->vm = c->vm;
-			gk20a_vm_get(tsg->vm);
+			nvgpu_vm_get(tsg->vm);
 			err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg,
 							args->class_num,
 							args->flags);
 			if (err) {
 				nvgpu_err(g,
 					"fail to allocate TSG gr ctx buffer");
-				gk20a_vm_put(tsg->vm);
+				nvgpu_vm_put(tsg->vm);
 				tsg->vm = NULL;
 				goto out;
 			}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 201c2090..72a3ee13 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -55,6 +55,12 @@
 #include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
 
+/*
+ * Necessary while transitioning to less coupled code. Will be removed once
+ * all the common APIs no longers have Linux stuff in them.
+ */
+#include "common/linux/vm_priv.h"
+
 #if defined(CONFIG_GK20A_VIDMEM)
 static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);
 #endif
@@ -177,8 +183,6 @@ struct gk20a_vidmem_buf {
 	void (*dmabuf_priv_delete)(void *);
 };
 
-static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
-
 static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
 		u32 *offset, u32 len)
 {
@@ -460,16 +464,6 @@ static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
 	return 0;
 }
 
-void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
-{
-	struct gk20a *g = vm->mm->g;
-
-	gk20a_dbg_fn("");
-
-	gk20a_free_inst_block(g, inst_block);
-	gk20a_vm_remove_support_nofree(vm);
-}
-
 static void gk20a_vidmem_destroy(struct gk20a *g)
 {
 #if defined(CONFIG_GK20A_VIDMEM)
@@ -487,7 +481,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
 
 	mm->vidmem.ce_ctx_id = (u32)~0;
 
-	gk20a_vm_remove_support_nofree(&mm->ce.vm);
+	nvgpu_vm_remove_support_nofree(&mm->ce.vm);
 
 }
 
@@ -503,7 +497,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
 
 	gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
 	gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
-	gk20a_vm_remove_support_nofree(&mm->cde.vm);
+	nvgpu_vm_remove_support_nofree(&mm->cde.vm);
 
 	gk20a_semaphore_sea_destroy(g);
 	gk20a_vidmem_destroy(g);
@@ -1102,7 +1096,7 @@ static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
 	return NULL;
 }
 
-int gk20a_vm_get_buffers(struct vm_gk20a *vm,
+int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
 			 struct mapped_buffer_node ***mapped_buffers,
 			 int *num_buffers)
 {
@@ -1151,37 +1145,10 @@ static void gk20a_vm_unmap_locked_kref(struct kref *ref)
 {
 	struct mapped_buffer_node *mapped_buffer =
 		container_of(ref, struct mapped_buffer_node, ref);
-	gk20a_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
-}
-
-void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
-{
-	memset(mapping_batch, 0, sizeof(*mapping_batch));
-	mapping_batch->gpu_l2_flushed = false;
-	mapping_batch->need_tlb_invalidate = false;
+	nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
 }
 
-void gk20a_vm_mapping_batch_finish_locked(
-	struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch)
-{
-	 /* hanging kref_put batch pointer? */
-	WARN_ON(vm->kref_put_batch == mapping_batch);
-
-	if (mapping_batch->need_tlb_invalidate) {
-		struct gk20a *g = gk20a_from_vm(vm);
-		g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
-	}
-}
-
-void gk20a_vm_mapping_batch_finish(struct vm_gk20a *vm,
-				   struct vm_gk20a_mapping_batch *mapping_batch)
-{
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	gk20a_vm_mapping_batch_finish_locked(vm, mapping_batch);
-	nvgpu_mutex_release(&vm->update_gmmu_lock);
-}
-
-void gk20a_vm_put_buffers(struct vm_gk20a *vm,
+void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
 				 struct mapped_buffer_node **mapped_buffers,
 				 int num_buffers)
 {
@@ -1192,7 +1159,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
 		return;
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	gk20a_vm_mapping_batch_start(&batch);
+	nvgpu_vm_mapping_batch_start(&batch);
 	vm->kref_put_batch = &batch;
 
 	for (i = 0; i < num_buffers; ++i)
@@ -1200,13 +1167,13 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
 			 gk20a_vm_unmap_locked_kref);
 
 	vm->kref_put_batch = NULL;
-	gk20a_vm_mapping_batch_finish_locked(vm, &batch);
+	nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
 	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	nvgpu_big_free(vm->mm->g, mapped_buffers);
 }
 
-static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
+static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
 				struct vm_gk20a_mapping_batch *batch)
 {
 	struct gk20a *g = vm->mm->g;
@@ -1650,7 +1617,7 @@ static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
 	}
 }
 
-static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
+static u64 nvgpu_vm_map_duplicate_locked(struct vm_gk20a *vm,
 					 struct dma_buf *dmabuf,
 					 u64 offset_align,
 					 u32 flags,
@@ -1997,7 +1964,7 @@ static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl,
 	return align;
 }
 
-u64 gk20a_vm_map(struct vm_gk20a *vm,
+u64 nvgpu_vm_map(struct vm_gk20a *vm,
 			struct dma_buf *dmabuf,
 			u64 offset_align,
 			u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
@@ -2038,7 +2005,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 
 	/* check if this buffer is already mapped */
 	if (!vm->userspace_managed) {
-		map_offset = gk20a_vm_map_duplicate_locked(
+		map_offset = nvgpu_vm_map_duplicate_locked(
 			vm, dmabuf, offset_align,
 			flags, kind, sgt,
 			user_mapped, rw_flag);
@@ -2256,7 +2223,7 @@ clean_up:
 	return 0;
 }
 
-int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
+int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
 			       u64 mapping_gva,
 			       u64 *compbits_win_size,
 			       u32 *compbits_win_ctagline,
@@ -2298,7 +2265,7 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
 }
 
 
-int gk20a_vm_map_compbits(struct vm_gk20a *vm,
+int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
 			  u64 mapping_gva,
 			  u64 *compbits_win_gva,
 			  u64 *mapping_iova,
@@ -3059,7 +3026,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
 }
 
 /* NOTE! mapped_buffers lock must be held */
-void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
+void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
 			   struct vm_gk20a_mapping_batch *batch)
 {
 	struct vm_gk20a *vm = mapped_buffer->vm;
@@ -3115,7 +3082,7 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
 	return;
 }
 
-void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
 {
 	struct gk20a *g = vm->mm->g;
 	struct mapped_buffer_node *mapped_buffer;
@@ -3148,76 +3115,6 @@ static void gk20a_vm_free_entries(struct vm_gk20a *vm,
 	parent->entries = NULL;
 }
 
-static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
-{
-	struct mapped_buffer_node *mapped_buffer;
-	struct vm_reserved_va_node *va_node, *va_node_tmp;
-	struct nvgpu_rbtree_node *node = NULL;
-	struct gk20a *g = vm->mm->g;
-
-	gk20a_dbg_fn("");
-
-	/*
-	 * Do this outside of the update_gmmu_lock since unmapping the semaphore
-	 * pool involves unmapping a GMMU mapping which means aquiring the
-	 * update_gmmu_lock.
-	 */
-	if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) {
-		if (vm->sema_pool) {
-			nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
-			nvgpu_semaphore_pool_put(vm->sema_pool);
-		}
-	}
-
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-
-	/* TBD: add a flag here for the unmap code to recognize teardown
-	 * and short-circuit any otherwise expensive operations. */
-
-	nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
-	while (node) {
-		mapped_buffer = mapped_buffer_from_rbtree_node(node);
-		gk20a_vm_unmap_locked(mapped_buffer, NULL);
-		nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
-	}
-
-	/* destroy remaining reserved memory areas */
-	nvgpu_list_for_each_entry_safe(va_node, va_node_tmp,
-			&vm->reserved_va_list,
-			vm_reserved_va_node, reserved_va_list) {
-		nvgpu_list_del(&va_node->reserved_va_list);
-		nvgpu_kfree(vm->mm->g, va_node);
-	}
-
-	gk20a_deinit_vm(vm);
-
-	nvgpu_mutex_release(&vm->update_gmmu_lock);
-}
-
-void gk20a_vm_remove_support(struct vm_gk20a *vm)
-{
-	gk20a_vm_remove_support_nofree(vm);
-	/* vm is not used anymore. release it. */
-	nvgpu_kfree(vm->mm->g, vm);
-}
-
-static void gk20a_vm_remove_support_kref(struct kref *ref)
-{
-	struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
-	struct gk20a *g = gk20a_from_vm(vm);
-	g->ops.mm.vm_remove(vm);
-}
-
-void gk20a_vm_get(struct vm_gk20a *vm)
-{
-	kref_get(&vm->ref);
-}
-
-void gk20a_vm_put(struct vm_gk20a *vm)
-{
-	kref_put(&vm->ref, gk20a_vm_remove_support_kref);
-}
-
 const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
 	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
 	 .lo_bit = {26, 26},
@@ -3284,7 +3181,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
 					     SZ_4K);
 	if (!sema_sea->gpu_va) {
 		nvgpu_free(&vm->kernel, sema_sea->gpu_va);
-		gk20a_vm_put(vm);
+		nvgpu_vm_put(vm);
 		return -ENOMEM;
 	}
 
@@ -3408,7 +3305,7 @@ static int init_vm_page_tables(struct vm_gk20a *vm)
 }
 
 /**
- * gk20a_init_vm() - Initialize an address space.
+ * nvgpu_init_vm() - Initialize an address space.
  *
  * @mm - Parent MM.
  * @vm - The VM to init.
@@ -3443,7 +3340,7 @@ static int init_vm_page_tables(struct vm_gk20a *vm)
  * such cases the @kernel_reserved and @low_hole should sum to exactly
  * @aperture_size.
  */
-int gk20a_init_vm(struct mm_gk20a *mm,
+int nvgpu_init_vm(struct mm_gk20a *mm,
 		struct vm_gk20a *vm,
 		u32 big_page_size,
 		u64 low_hole,
@@ -3683,7 +3580,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
 
 	snprintf(name, sizeof(name), "as_%d", as_share->id);
 
-	err = gk20a_init_vm(mm, vm, big_page_size,
+	err = nvgpu_init_vm(mm, vm, big_page_size,
 			    big_page_size << 10,
 			    mm->channel.kernel_size,
 			    mm->channel.user_size + mm->channel.kernel_size,
@@ -3701,7 +3598,7 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
 	vm->as_share = NULL;
 	as_share->vm = NULL;
 
-	gk20a_vm_put(vm);
+	nvgpu_vm_put(vm);
 
 	return 0;
 }
@@ -3864,7 +3761,7 @@ int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 
 	gk20a_dbg_fn("");
 
-	gk20a_vm_get(vm);
+	nvgpu_vm_get(vm);
 	ch->vm = vm;
 	err = channel_gk20a_commit_va(ch);
 	if (err)
@@ -3960,7 +3857,7 @@ out:
 
 }
 
-int gk20a_vm_map_buffer(struct vm_gk20a *vm,
+int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 			int dmabuf_fd,
 			u64 *offset_align,
 			u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
@@ -3989,7 +3886,7 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
 		return err;
 	}
 
-	ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
+	ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
 			flags, kind, NULL, true,
 			gk20a_mem_flag_none,
 			buffer_offset,
@@ -4005,16 +3902,16 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
 	return err;
 }
 
-int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
+int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
 			  struct vm_gk20a_mapping_batch *batch)
 {
 	gk20a_dbg_fn("");
 
-	gk20a_vm_unmap_user(vm, offset, batch);
+	nvgpu_vm_unmap_user(vm, offset, batch);
 	return 0;
 }
 
-void gk20a_deinit_vm(struct vm_gk20a *vm)
+void nvgpu_deinit_vm(struct vm_gk20a *vm)
 {
 	if (nvgpu_alloc_initialized(&vm->kernel))
 		nvgpu_alloc_destroy(&vm->kernel);
@@ -4069,7 +3966,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
 
 	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
 	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-	gk20a_init_vm(mm, vm,
+	nvgpu_init_vm(mm, vm,
 		      big_page_size,
 		      SZ_4K,				/* Low hole */
 		      mm->bar1.aperture_size - SZ_4K,	/* Kernel reserved. */
@@ -4085,7 +3982,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
 	return 0;
 
 clean_up_va:
-	gk20a_deinit_vm(vm);
+	nvgpu_deinit_vm(vm);
 	return err;
 }
 
@@ -4108,7 +4005,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
 	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
 	gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
 
-	gk20a_init_vm(mm, vm, big_page_size,
+	nvgpu_init_vm(mm, vm, big_page_size,
 		      low_hole,
 		      aperture_size - low_hole,
 		      aperture_size,
@@ -4124,7 +4021,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
 	return 0;
 
 clean_up_va:
-	gk20a_deinit_vm(vm);
+	nvgpu_deinit_vm(vm);
 	return err;
 }
 
@@ -4149,7 +4046,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
 	struct gk20a *g = gk20a_from_mm(mm);
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
 
-	return gk20a_init_vm(mm, vm, big_page_size,
+	return nvgpu_init_vm(mm, vm, big_page_size,
 			big_page_size << 10,
 			NV_MM_DEFAULT_KERNEL_SIZE,
 			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -4162,7 +4059,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
 	struct gk20a *g = gk20a_from_mm(mm);
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
 
-	return gk20a_init_vm(mm, vm, big_page_size,
+	return nvgpu_init_vm(mm, vm, big_page_size,
 			big_page_size << 10,
 			NV_MM_DEFAULT_KERNEL_SIZE,
 			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -4399,7 +4296,7 @@ hw_was_off:
 	gk20a_idle_nosuspend(g->dev);
 }
 
-int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
+int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
 			 struct dma_buf **dmabuf,
 			 u64 *offset)
 {
@@ -4503,7 +4400,7 @@ void gk20a_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.gmmu_map = gk20a_locked_gmmu_map;
 	gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
-	gops->mm.vm_remove = gk20a_vm_remove_support;
+	gops->mm.vm_remove = nvgpu_vm_remove_support;
 	gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
 	gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
 	gops->mm.fb_flush = gk20a_mm_fb_flush;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 0a102cb2..331843cc 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -26,6 +26,7 @@
 
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/allocator.h>
+#include <nvgpu/vm.h>
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/kref.h>
@@ -116,13 +117,6 @@ gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
 		((uintptr_t)node - offsetof(struct gk20a_buffer_state, list));
 };
 
-enum gmmu_pgsz_gk20a {
-	gmmu_page_size_small  = 0,
-	gmmu_page_size_big    = 1,
-	gmmu_page_size_kernel = 2,
-	gmmu_nr_page_sizes    = 3,
-};
-
 struct gk20a_comptags {
 	u32 offset;
 	u32 lines;
@@ -130,15 +124,6 @@ struct gk20a_comptags {
 	bool user_mappable;
 };
 
-struct gk20a_mm_entry {
-	/* backing for */
-	struct nvgpu_mem mem;
-	u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
-	int pgsz;
-	struct gk20a_mm_entry *entries;
-	int num_entries;
-};
-
 struct priv_cmd_queue {
 	struct nvgpu_mem mem;
 	u32 size;	/* num of entries in words */
@@ -214,84 +199,6 @@ vm_reserved_va_node_from_reserved_va_list(struct nvgpu_list_node *node)
 		((uintptr_t)node - offsetof(struct vm_reserved_va_node, reserved_va_list));
 };
 
-struct gk20a_mmu_level {
-	int hi_bit[2];
-	int lo_bit[2];
-	int (*update_entry)(struct vm_gk20a *vm,
-			   struct gk20a_mm_entry *pte,
-			   u32 i, u32 gmmu_pgsz_idx,
-			   struct scatterlist **sgl,
-			   u64 *offset,
-			   u64 *iova,
-			   u32 kind_v, u64 *ctag,
-			   bool cacheable, bool unmapped_pte,
-			   int rw_flag, bool sparse, bool priv,
-			   enum nvgpu_aperture aperture);
-	size_t entry_size;
-};
-
-/* map/unmap batch state */
-struct vm_gk20a_mapping_batch
-{
-	bool gpu_l2_flushed;
-	bool need_tlb_invalidate;
-};
-
-struct vm_gk20a {
-	struct mm_gk20a *mm;
-	struct gk20a_as_share *as_share; /* as_share this represents */
-
-	u64 va_start;
-	u64 va_limit;
-
-	int num_user_mapped_buffers;
-
-	bool big_pages;   /* enable large page support */
-	bool enable_ctag;
-	bool mapped;
-
-	u32 big_page_size;
-
-	bool userspace_managed;
-
-	const struct gk20a_mmu_level *mmu_levels;
-
-	struct kref ref;
-
-	struct nvgpu_mutex update_gmmu_lock;
-
-	struct gk20a_mm_entry pdb;
-
-	/*
-	 * These structs define the address spaces. In some cases it's possible
-	 * to merge address spaces (user and user_lp) and in other cases it's
-	 * not. vma[] allows the code to be agnostic to this by always using
-	 * address spaces through this pointer array.
-	 */
-	struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
-	struct nvgpu_allocator kernel;
-	struct nvgpu_allocator user;
-	struct nvgpu_allocator user_lp;
-
-	struct nvgpu_rbtree_node *mapped_buffers;
-
-	struct nvgpu_list_node reserved_va_list;
-
-#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
-	u64 handle;
-#endif
-	u32 gmmu_page_sizes[gmmu_nr_page_sizes];
-
-	/* if non-NULL, kref_put will use this batch when
-	   unmapping. Must hold vm->update_gmmu_lock. */
-	struct vm_gk20a_mapping_batch *kref_put_batch;
-
-	/*
-	 * Each address space needs to have a semaphore pool.
-	 */
-	struct nvgpu_semaphore_pool *sema_pool;
-};
-
 struct gk20a;
 struct channel_gk20a;
 
@@ -562,57 +469,13 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
 void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
 		    struct sg_table *sgt);
 
-u64 gk20a_vm_map(struct vm_gk20a *vm,
-		struct dma_buf *dmabuf,
-		u64 offset_align,
-		u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
-		int kind,
-		struct sg_table **sgt,
-		bool user_mapped,
-		int rw_flag,
-		 u64 buffer_offset,
-		 u64 mapping_size,
-		 struct vm_gk20a_mapping_batch *mapping_batch);
-
-int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
+int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
 			       u64 mapping_gva,
 			       u64 *compbits_win_size,
 			       u32 *compbits_win_ctagline,
 			       u32 *mapping_ctagline,
 			       u32 *flags);
 
-int gk20a_vm_map_compbits(struct vm_gk20a *vm,
-			  u64 mapping_gva,
-			  u64 *compbits_win_gva,
-			  u64 *mapping_iova,
-			  u32 flags);
-
-/* unmap handle from kernel */
-void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
-
-void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
-			   struct vm_gk20a_mapping_batch *batch);
-
-/* get reference to all currently mapped buffers */
-int gk20a_vm_get_buffers(struct vm_gk20a *vm,
-			 struct mapped_buffer_node ***mapped_buffers,
-			 int *num_buffers);
-
-/* put references on the given buffers */
-void gk20a_vm_put_buffers(struct vm_gk20a *vm,
-			  struct mapped_buffer_node **mapped_buffers,
-			  int num_buffers);
-
-/* find buffer corresponding to va */
-int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
-			 struct dma_buf **dmabuf,
-			 u64 *offset);
-
-void gk20a_vm_get(struct vm_gk20a *vm);
-void gk20a_vm_put(struct vm_gk20a *vm);
-
-void gk20a_vm_remove_support(struct vm_gk20a *vm);
-
 u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
 		     u64 size,
 		     enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
@@ -635,44 +498,11 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
 			  struct channel_gk20a *ch);
 int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch);
 
-/* batching eliminates redundant cache flushes and invalidates */
-void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
-void gk20a_vm_mapping_batch_finish(
-	struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
-/* called when holding vm->update_gmmu_lock */
-void gk20a_vm_mapping_batch_finish_locked(
-	struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
-
-
 int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes);
 int gk20a_vidmem_get_space(struct gk20a *g, u64 *space);
 int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
 		void *buffer, u64 offset, u64 size, u32 cmd);
 
-/* Note: batch may be NULL if map op is not part of a batch */
-int gk20a_vm_map_buffer(struct vm_gk20a *vm,
-			int dmabuf_fd,
-			u64 *offset_align,
-			u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
-			int kind,
-			u64 buffer_offset,
-			u64 mapping_size,
-			struct vm_gk20a_mapping_batch *batch);
-
-int gk20a_init_vm(struct mm_gk20a *mm,
-		struct vm_gk20a *vm,
-		u32 big_page_size,
-		u64 low_hole,
-		u64 kernel_reserved,
-		u64 aperture_size,
-		bool big_pages,
-		bool userspace_managed,
-		char *name);
-void gk20a_deinit_vm(struct vm_gk20a *vm);
-
-/* Note: batch may be NULL if unmap op is not part of a batch */
-int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
-			  struct vm_gk20a_mapping_batch *batch);
 void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
 			struct gk20a_comptags *comptags);
 dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index d1e667b6..f9884cfb 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -289,7 +289,7 @@ void gk20a_tsg_release(struct kref *ref)
 		tsg->tsg_gr_ctx = NULL;
 	}
 	if (tsg->vm) {
-		gk20a_vm_put(tsg->vm);
+		nvgpu_vm_put(tsg->vm);
 		tsg->vm = NULL;
 	}
 
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index bf3dd240..1405ef30 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -60,7 +60,7 @@ void gm20b_init_mm(struct gpu_ops *gops)
 	gops->mm.support_sparse = gm20b_mm_support_sparse;
 	gops->mm.gmmu_map = gk20a_locked_gmmu_map;
 	gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
-	gops->mm.vm_remove = gk20a_vm_remove_support;
+	gops->mm.vm_remove = nvgpu_vm_remove_support;
 	gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
 	gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
 	gops->mm.fb_flush = gk20a_mm_fb_flush;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 6b7f63b3..eab51175 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -74,7 +74,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
 	/* BAR2 aperture size is 32MB */
 	mm->bar2.aperture_size = 32 << 20;
 	gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
-	gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
+	nvgpu_init_vm(mm, vm, big_page_size, SZ_4K,
 		mm->bar2.aperture_size - SZ_4K,
 		mm->bar2.aperture_size, false, false, "bar2");
 
@@ -88,7 +88,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
 	return 0;
 
 clean_up_va:
-	gk20a_deinit_vm(vm);
+	nvgpu_deinit_vm(vm);
 	return err;
 }
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 7fb0147e..6d8aa025 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -18,6 +18,9 @@
 #define __NVGPU_GMMU_H__
 
 #include <nvgpu/types.h>
+#include <nvgpu/nvgpu_mem.h>
+
+struct scatterlist;
 
 /*
  * This is the GMMU API visible to blocks outside of the GMMU. Basically this
@@ -28,7 +31,37 @@
 struct vm_gk20a;
 struct nvgpu_mem;
 
-enum nvgpu_aperture;
+enum gmmu_pgsz_gk20a {
+	gmmu_page_size_small  = 0,
+	gmmu_page_size_big    = 1,
+	gmmu_page_size_kernel = 2,
+	gmmu_nr_page_sizes    = 3,
+};
+
+struct gk20a_mm_entry {
+	/* backing for */
+	struct nvgpu_mem mem;
+	u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
+	int pgsz;
+	struct gk20a_mm_entry *entries;
+	int num_entries;
+};
+
+struct gk20a_mmu_level {
+	int hi_bit[2];
+	int lo_bit[2];
+	int (*update_entry)(struct vm_gk20a *vm,
+			   struct gk20a_mm_entry *pte,
+			   u32 i, u32 gmmu_pgsz_idx,
+			   struct scatterlist **sgl,
+			   u64 *offset,
+			   u64 *iova,
+			   u32 kind_v, u64 *ctag,
+			   bool cacheable, bool unmapped_pte,
+			   int rw_flag, bool sparse, bool priv,
+			   enum nvgpu_aperture aperture);
+	size_t entry_size;
+};
 
 /**
  * nvgpu_gmmu_map - Map memory into the GMMU.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
new file mode 100644
index 00000000..1fb772d5
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVGPU_VM_H__
+#define __NVGPU_VM_H__
+
+#include <nvgpu/kref.h>
+#include <nvgpu/list.h>
+#include <nvgpu/rbtree.h>
+#include <nvgpu/types.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/allocator.h>
+
+struct vm_gk20a;
+struct mapped_buffer_node;
+
+/**
+ * This header contains the OS agnostic APIs for dealing with VMs. Most of the
+ * VM implementation is system specific - it must translate from a platform's
+ * representation of DMA'able memory to our nvgpu_mem notion.
+ *
+ * However, some stuff is platform agnostic. VM ref-counting and the VM struct
+ * itself are platform agnostic. Also, the initialization and destruction of
+ * VMs is the same across all platforms (for now).
+ */
+
+/* map/unmap batch state */
+struct vm_gk20a_mapping_batch {
+	bool gpu_l2_flushed;
+	bool need_tlb_invalidate;
+};
+
+struct vm_gk20a {
+	struct mm_gk20a *mm;
+	struct gk20a_as_share *as_share; /* as_share this represents */
+
+	u64 va_start;
+	u64 va_limit;
+
+	int num_user_mapped_buffers;
+
+	bool big_pages;   /* enable large page support */
+	bool enable_ctag;
+	bool mapped;
+
+	u32 big_page_size;
+
+	bool userspace_managed;
+
+	const struct gk20a_mmu_level *mmu_levels;
+
+	struct kref ref;
+
+	struct nvgpu_mutex update_gmmu_lock;
+
+	struct gk20a_mm_entry pdb;
+
+	/*
+	 * These structs define the address spaces. In some cases it's possible
+	 * to merge address spaces (user and user_lp) and in other cases it's
+	 * not. vma[] allows the code to be agnostic to this by always using
+	 * address spaces through this pointer array.
+	 */
+	struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
+	struct nvgpu_allocator kernel;
+	struct nvgpu_allocator user;
+	struct nvgpu_allocator user_lp;
+
+	struct nvgpu_rbtree_node *mapped_buffers;
+
+	struct nvgpu_list_node reserved_va_list;
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	u64 handle;
+#endif
+	u32 gmmu_page_sizes[gmmu_nr_page_sizes];
+
+	/* if non-NULL, kref_put will use this batch when
+	   unmapping. Must hold vm->update_gmmu_lock. */
+	struct vm_gk20a_mapping_batch *kref_put_batch;
+
+	/*
+	 * Each address space needs to have a semaphore pool.
+	 */
+	struct nvgpu_semaphore_pool *sema_pool;
+};
+
+void nvgpu_vm_get(struct vm_gk20a *vm);
+void nvgpu_vm_put(struct vm_gk20a *vm);
+
+/* batching eliminates redundant cache flushes and invalidates */
+void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
+void nvgpu_vm_mapping_batch_finish(
+	struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
+/* called when holding vm->update_gmmu_lock */
+void nvgpu_vm_mapping_batch_finish_locked(
+	struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
+
+/* get reference to all currently mapped buffers */
+int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
+			 struct mapped_buffer_node ***mapped_buffers,
+			 int *num_buffers);
+
+/* put references on the given buffers */
+void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
+			  struct mapped_buffer_node **mapped_buffers,
+			  int num_buffers);
+
+/* Note: batch may be NULL if unmap op is not part of a batch */
+int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
+			  struct vm_gk20a_mapping_batch *batch);
+
+void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
+			   struct vm_gk20a_mapping_batch *batch);
+
+void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm);
+void nvgpu_vm_remove_support(struct vm_gk20a *vm);
+
+int nvgpu_init_vm(struct mm_gk20a *mm,
+		struct vm_gk20a *vm,
+		u32 big_page_size,
+		u64 low_hole,
+		u64 kernel_reserved,
+		u64 aperture_size,
+		bool big_pages,
+		bool userspace_managed,
+		char *name);
+void nvgpu_deinit_vm(struct vm_gk20a *vm);
+
+#endif
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index c7960c1f..dd515f41 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -526,7 +526,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c,
 	} else {
 		if (!tsg->tsg_gr_ctx) {
 			tsg->vm = c->vm;
-			gk20a_vm_get(tsg->vm);
+			nvgpu_vm_get(tsg->vm);
 			err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx,
 						c->vm,
 						args->class_num,
@@ -536,7 +536,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c,
 			if (err) {
 				nvgpu_err(g,
 					"fail to allocate TSG gr ctx buffer, err=%d", err);
-				gk20a_vm_put(tsg->vm);
+				nvgpu_vm_put(tsg->vm);
 				tsg->vm = NULL;
 				goto out;
 			}
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 7e42c198..cfa9e428 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -22,6 +22,8 @@
 #include "vgpu/vgpu.h"
 #include "gk20a/mm_gk20a.h"
 
+#include "common/linux/vm_priv.h"
+
 static int vgpu_init_mm_setup_sw(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
@@ -216,7 +218,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
 	nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
 	while (node) {
 		mapped_buffer = mapped_buffer_from_rbtree_node(node);
-		gk20a_vm_unmap_locked(mapped_buffer, NULL);
+		nvgpu_vm_unmap_locked(mapped_buffer, NULL);
 		nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
 	}
 
@@ -454,7 +456,7 @@ static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
 	}
 
 	if (ch->vm)
-		gk20a_vm_get(ch->vm);
+		nvgpu_vm_get(ch->vm);
 
 	return err;
 }
-- 
cgit v1.2.2