From 2a285d0607a20694476399f5719e74dbc26fcd58 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Fri, 6 Oct 2017 11:30:29 -0700
Subject: gpu: nvgpu: Cleanup generic MM code in gk20a/mm_gk20a.c

Move much of the remaining generic MM code to a new common location:
common/mm/mm.c. Also add a corresponding <nvgpu/mm.h> header. This
mostly consists of init and cleanup code to handle the common MM
data structures like the VIDMEM code, address spaces for various
engines, etc.

A few more indepth changes were made as well.

1. alloc_inst_block() has been added to the MM HAL. This used to be
   defined directly in the gk20a code but it used a register. As a
   result, if this register hypothetically changes in the future,
   it would need to become a HAL anyway. This path preempts that
   and for now just defines all HALs to use the gk20a version.

2. Rename as much as possible: global functions are, for the most
   part, prepended with nvgpu (there are a few exceptions which I
   have yet to decide what to do with). Functions that are static
   are renamed to be as consistent with their functionality as
   possible since in some cases function effect and function name
   have diverged.

JIRA NVGPU-30

Change-Id: Ic948f1ecc2f7976eba4bb7169a44b7226bb7c0b5
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1574499
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu              |   1 +
 drivers/gpu/nvgpu/common/linux/ioctl_dbg.c    |   2 +-
 drivers/gpu/nvgpu/common/mm/buddy_allocator.c |   4 +-
 drivers/gpu/nvgpu/common/mm/mm.c              | 426 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/bus_gk20a.c           |   5 +-
 drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c        |   3 +-
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c       |   6 +-
 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c    |   7 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c          |  13 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h          |   1 +
 drivers/gpu/nvgpu/gk20a/gk20a.c               |   7 +-
 drivers/gpu/nvgpu/gk20a/gk20a.h               |   3 +
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c            |  11 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c            | 441 ++------------------------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h            | 208 +-----------
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c           |   3 +-
 drivers/gpu/nvgpu/gm20b/acr_gm20b.c           |   4 +-
 drivers/gpu/nvgpu/gm20b/bus_gm20b.c           |   5 +-
 drivers/gpu/nvgpu/gm20b/fifo_gm20b.c          |   3 +-
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c           |   1 +
 drivers/gpu/nvgpu/gp106/hal_gp106.c           |   1 +
 drivers/gpu/nvgpu/gp106/sec2_gp106.c          |   5 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c           |   1 +
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c            |   7 +-
 drivers/gpu/nvgpu/include/nvgpu/mm.h          | 220 +++++++++++++
 25 files changed, 724 insertions(+), 664 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/mm/mm.c
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/mm.h

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index ce4f67b0..e689aa7f 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -65,6 +65,7 @@ nvgpu-y := \
 	common/mm/vm_area.o \
 	common/mm/nvgpu_mem.o \
 	common/mm/comptags.o \
+	common/mm/mm.o \
 	common/bus.o \
 	common/enabled.o \
 	common/pramin.o \
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
index 56edc11b..c8831a97 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
@@ -1372,7 +1372,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
 	err = g->ops.dbg_session_ops.perfbuffer_disable(g);
 
 	nvgpu_vm_unmap_buffer(vm, offset, NULL);
-	gk20a_free_inst_block(g, &mm->perfbuf.inst_block);
+	nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
 	nvgpu_vm_put(vm);
 
 	g->perfbuf.owner = NULL;
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index c6f10a69..a2546e9d 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -25,8 +25,8 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/log2.h>
 #include <nvgpu/barrier.h>
-
-#include "gk20a/mm_gk20a.h"
+#include <nvgpu/mm.h>
+#include <nvgpu/vm.h>
 
 #include "buddy_allocator_priv.h"
 
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
new file mode 100644
index 00000000..1027ed28
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -0,0 +1,426 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/mm.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/pramin.h>
+#include <nvgpu/enabled.h>
+
+#include "gk20a/gk20a.h"
+
+/*
+ * Attempt to find a reserved memory area to determine PTE size for the passed
+ * mapping. If no reserved area can be found use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+					      u64 base, u64 size)
+{
+	struct nvgpu_vm_area *vm_area;
+
+	vm_area = nvgpu_vm_area_find(vm, base);
+	if (!vm_area)
+		return gmmu_page_size_small;
+
+	return vm_area->pgsz_idx;
+}
+
+/*
+ * This is for when the address space does not support unified address spaces.
+ */
+static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
+					       u64 base, u64 size)
+{
+	if (!base) {
+		if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+			return gmmu_page_size_big;
+		return gmmu_page_size_small;
+	} else {
+		if (base < __nv_gmmu_va_small_page_limit())
+			return gmmu_page_size_small;
+		else
+			return gmmu_page_size_big;
+	}
+}
+
+/*
+ * This determines the PTE size for a given alloc. Used by both the GVA space
+ * allocator and the mm core code so that agreement can be reached on how to
+ * map allocations.
+ *
+ * The page size of a buffer is this:
+ *
+ *   o  If the VM doesn't support large pages then obviously small pages
+ *      must be used.
+ *   o  If the base address is non-zero (fixed address map):
+ *      - Attempt to find a reserved memory area and use the page size
+ *        based on that.
+ *      - If no reserved page size is available, default to small pages.
+ *   o  If the base is zero:
+ *      - If the size is larger than or equal to the big page size, use big
+ *        pages.
+ *      - Otherwise use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	if (!vm->big_pages)
+		return gmmu_page_size_small;
+
+	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
+		return __get_pte_size_split_addr(vm, base, size);
+
+	if (base)
+		return __get_pte_size_fixed_map(vm, base, size);
+
+	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+		return gmmu_page_size_big;
+	return gmmu_page_size_small;
+}
+
+int nvgpu_mm_suspend(struct gk20a *g)
+{
+	nvgpu_info(g, "MM suspend running...");
+
+	nvgpu_vidmem_thread_pause_sync(&g->mm);
+
+	g->ops.mm.cbc_clean(g);
+	g->ops.mm.l2_flush(g, false);
+
+	nvgpu_info(g, "MM suspend done!");
+
+	return 0;
+}
+
+u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	if (g->mm.has_physical_mode)
+		return nvgpu_mem_get_phys_addr(g, inst_block);
+	else
+		return nvgpu_mem_get_addr(g, inst_block);
+}
+
+void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	if (nvgpu_mem_is_valid(inst_block))
+		nvgpu_dma_free(g, inst_block);
+}
+
+static int nvgpu_alloc_sysmem_flush(struct gk20a *g)
+{
+	return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
+}
+
+static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm)
+{
+	struct gk20a *g = gk20a_from_mm(mm);
+
+	if (mm->vidmem.ce_ctx_id != (u32)~0)
+		gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
+
+	mm->vidmem.ce_ctx_id = (u32)~0;
+
+	nvgpu_vm_put(mm->ce.vm);
+}
+
+static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
+{
+	struct gk20a *g = gk20a_from_mm(mm);
+
+	if (g->ops.mm.fault_info_mem_destroy)
+		g->ops.mm.fault_info_mem_destroy(g);
+
+	if (g->ops.mm.remove_bar2_vm)
+		g->ops.mm.remove_bar2_vm(g);
+
+	if (g->ops.mm.is_bar1_supported(g)) {
+		nvgpu_free_inst_block(g, &mm->bar1.inst_block);
+		nvgpu_vm_put(mm->bar1.vm);
+	}
+
+	nvgpu_free_inst_block(g, &mm->pmu.inst_block);
+	nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
+	nvgpu_vm_put(mm->pmu.vm);
+	nvgpu_vm_put(mm->cde.vm);
+
+	nvgpu_semaphore_sea_destroy(g);
+	nvgpu_vidmem_destroy(g);
+	nvgpu_pd_cache_fini(g);
+}
+
+/* pmu vm, share channel_vm interfaces */
+static int nvgpu_init_system_vm(struct mm_gk20a *mm)
+{
+	int err;
+	struct gk20a *g = gk20a_from_mm(mm);
+	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 low_hole, aperture_size;
+
+	/*
+	 * No user region - so we will pass that as zero sized.
+	 */
+	low_hole = SZ_4K * 16;
+	aperture_size = GK20A_PMU_VA_SIZE * 2;
+
+	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
+	nvgpu_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size);
+
+	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
+				   low_hole,
+				   aperture_size - low_hole,
+				   aperture_size,
+				   true,
+				   false,
+				   "system");
+	if (!mm->pmu.vm)
+		return -ENOMEM;
+
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
+	if (err)
+		goto clean_up_vm;
+	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
+
+	return 0;
+
+clean_up_vm:
+	nvgpu_vm_put(mm->pmu.vm);
+	return err;
+}
+
+static int nvgpu_init_hwpm(struct mm_gk20a *mm)
+{
+	int err;
+	struct gk20a *g = gk20a_from_mm(mm);
+	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
+
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
+	if (err)
+		return err;
+	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
+
+	return 0;
+}
+
+static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
+{
+	struct gk20a *g = gk20a_from_mm(mm);
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+
+	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
+				   big_page_size << 10,
+				   NV_MM_DEFAULT_KERNEL_SIZE,
+				   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+				   false, false, "cde");
+	if (!mm->cde.vm)
+		return -ENOMEM;
+	return 0;
+}
+
+static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
+{
+	struct gk20a *g = gk20a_from_mm(mm);
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+
+	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
+				  big_page_size << 10,
+				  NV_MM_DEFAULT_KERNEL_SIZE,
+				  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+				  false, false, "ce");
+	if (!mm->ce.vm)
+		return -ENOMEM;
+	return 0;
+}
+
+void nvgpu_init_mm_ce_context(struct gk20a *g)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
+		g->mm.vidmem.ce_ctx_id =
+			gk20a_ce_create_context_with_cb(g,
+				gk20a_fifo_get_fast_ce_runlist_id(g),
+				-1,
+				-1,
+				-1,
+				NULL);
+
+		if (g->mm.vidmem.ce_ctx_id == (u32)~0)
+			nvgpu_err(g,
+				"Failed to allocate CE context for vidmem page clearing support");
+	}
+#endif
+}
+
+static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g)
+{
+	if (g->ops.fb.reset)
+		g->ops.fb.reset(g);
+
+	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
+		g->ops.clock_gating.slcg_fb_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
+		g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
+		g->ops.clock_gating.blcg_fb_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
+		g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
+				g->blcg_enabled);
+
+	if (g->ops.fb.init_fs_state)
+		g->ops.fb.init_fs_state(g);
+
+	return 0;
+}
+
+static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
+{
+	int err;
+	struct gk20a *g = gk20a_from_mm(mm);
+	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+
+	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
+	nvgpu_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
+	mm->bar1.vm = nvgpu_vm_init(g,
+				    big_page_size,
+				    SZ_4K,
+				    mm->bar1.aperture_size - SZ_4K,
+				    mm->bar1.aperture_size,
+				    true, false,
+				    "bar1");
+	if (!mm->bar1.vm)
+		return -ENOMEM;
+
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
+	if (err)
+		goto clean_up_vm;
+	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
+
+	return 0;
+
+clean_up_vm:
+	nvgpu_vm_put(mm->bar1.vm);
+	return err;
+}
+
+static int nvgpu_init_mm_setup_sw(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	int err;
+
+	if (mm->sw_ready) {
+		nvgpu_info(g, "skip init");
+		return 0;
+	}
+
+	mm->g = g;
+	nvgpu_mutex_init(&mm->l2_op_lock);
+
+	/*TBD: make channel vm size configurable */
+	mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
+		NV_MM_DEFAULT_KERNEL_SIZE;
+	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
+
+	nvgpu_info(g, "channel vm size: user %dMB  kernel %dMB",
+		   (int)(mm->channel.user_size >> 20),
+		   (int)(mm->channel.kernel_size >> 20));
+
+	nvgpu_init_pramin(mm);
+
+	mm->vidmem.ce_ctx_id = (u32)~0;
+
+	err = nvgpu_vidmem_init(mm);
+	if (err)
+		return err;
+
+	/*
+	 * this requires fixed allocations in vidmem which must be
+	 * allocated before all other buffers
+	 */
+	if (g->ops.pmu.alloc_blob_space
+			&& !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
+		err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
+		if (err)
+			return err;
+	}
+
+	err = nvgpu_alloc_sysmem_flush(g);
+	if (err)
+		return err;
+
+	if (g->ops.mm.is_bar1_supported(g)) {
+		err = nvgpu_init_bar1_vm(mm);
+		if (err)
+			return err;
+	}
+	if (g->ops.mm.init_bar2_vm) {
+		err = g->ops.mm.init_bar2_vm(g);
+		if (err)
+			return err;
+	}
+	err = nvgpu_init_system_vm(mm);
+	if (err)
+		return err;
+
+	err = nvgpu_init_hwpm(mm);
+	if (err)
+		return err;
+
+	err = nvgpu_init_cde_vm(mm);
+	if (err)
+		return err;
+
+	err = nvgpu_init_ce_vm(mm);
+	if (err)
+		return err;
+
+	mm->remove_support = nvgpu_remove_mm_support;
+	mm->remove_ce_support = nvgpu_remove_mm_ce_support;
+
+	mm->sw_ready = true;
+
+	return 0;
+}
+
+int nvgpu_init_mm_support(struct gk20a *g)
+{
+	u32 err;
+
+	err = nvgpu_init_mm_reset_enable_hw(g);
+	if (err)
+		return err;
+
+	err = nvgpu_init_mm_setup_sw(g);
+	if (err)
+		return err;
+
+	if (g->ops.mm.init_mm_setup_hw)
+		err = g->ops.mm.init_mm_setup_hw(g);
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index 938c4b00..9b031bbf 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -24,6 +24,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/bus.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "bus_gk20a.h"
@@ -137,8 +138,8 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value)
 
 int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
 {
-	u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
-	u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
+	u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
+	u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
 
 	gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
 
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 725ae278..e3896981 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -29,6 +29,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/lock.h>
 #include <nvgpu/dma.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "css_gr_gk20a.h"
@@ -183,7 +184,7 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch,
 	gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
 
 	/* this field is aligned to 4K */
-	inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
+	inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
 
 	/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
 	 * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 8c39ecb7..802ccd76 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/atomic.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -305,7 +306,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
 		return err;
 	}
 
-	err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
+	err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block);
 	if (err)
 		return err;
 
@@ -322,8 +323,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
 	gk20a_writel(g, perf_pmasys_outsize_r(), size);
 
 	/* this field is aligned to 4K */
-	inst_pa_page = gk20a_mm_inst_block_addr(g,
-						&mm->perfbuf.inst_block) >> 12;
+	inst_pa_page = nvgpu_inst_block_addr(g,	&mm->perfbuf.inst_block) >> 12;
 
 	/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
 	 * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 7fd1793c..12d7dcb9 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -32,6 +32,7 @@
 #include <nvgpu/circ_buf.h>
 #include <nvgpu/thread.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
 
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
@@ -93,7 +94,7 @@ static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
 
 static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
 {
-	return (u32) (gk20a_mm_inst_block_addr(g, &ch->inst_block) >> 12LL);
+	return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
 }
 
 static inline int gk20a_fecs_trace_num_ts(void)
@@ -633,12 +634,12 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
 			"chid=%d context_ptr=%x inst_block=%llx",
 			ch->chid, context_ptr,
-			gk20a_mm_inst_block_addr(g, &ch->inst_block));
+			nvgpu_inst_block_addr(g, &ch->inst_block));
 
 	if (!trace)
 		return -ENOMEM;
 
-	pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf);
+	pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
 	if (!pa)
 		return -ENOMEM;
 	aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 03ca6984..fc71c358 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -24,6 +24,7 @@
 
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/mm.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/semaphore.h>
@@ -1058,7 +1059,7 @@ gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
 		if (!ch)
 			continue;
 
-		ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block);
+		ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
 		if (inst_ptr == ch_inst_ptr)
 			return ch;
 
@@ -1659,10 +1660,10 @@ static bool gk20a_fifo_handle_mmu_fault(
 						ch->chid);
 			}
 		} else if (mmfault_info.inst_ptr ==
-				gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) {
+				nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
 			nvgpu_err(g, "mmu fault from bar1");
 		} else if (mmfault_info.inst_ptr ==
-				gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) {
+				nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) {
 			nvgpu_err(g, "mmu fault from pmu");
 		} else
 			nvgpu_err(g, "couldn't locate channel for mmu fault");
@@ -3973,12 +3974,12 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 
 	gk20a_dbg_fn("");
 
-	err = gk20a_alloc_inst_block(g, &ch->inst_block);
+	err = g->ops.mm.alloc_inst_block(g, &ch->inst_block);
 	if (err)
 		return err;
 
 	gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
-		ch->chid, gk20a_mm_inst_block_addr(g, &ch->inst_block));
+		ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));
 
 	gk20a_dbg_fn("done");
 	return 0;
@@ -3986,7 +3987,7 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 
 void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
-	gk20a_free_inst_block(g, &ch->inst_block);
+	nvgpu_free_inst_block(g, &ch->inst_block);
 }
 
 u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 2bc7d9a8..ea5d55a4 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -36,6 +36,7 @@
 #include <nvgpu/kref.h>
 
 struct gk20a_debug_output;
+struct mmu_fault_info;
 
 #define MAX_RUNLIST_BUFFERS		2
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 2d09c0bb..e3c2397c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -34,6 +34,7 @@
 #include <nvgpu/gmmu.h>
 #include <nvgpu/ltc.h>
 #include <nvgpu/vidmem.h>
+#include <nvgpu/mm.h>
 
 #include <trace/events/gk20a.h>
 
@@ -107,7 +108,7 @@ int gk20a_prepare_poweroff(struct gk20a *g)
 		ret |= nvgpu_pmu_destroy(g);
 
 	ret |= gk20a_gr_suspend(g);
-	ret |= gk20a_mm_suspend(g);
+	ret |= nvgpu_mm_suspend(g);
 	ret |= gk20a_fifo_suspend(g);
 
 	gk20a_ce_suspend(g);
@@ -213,7 +214,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		goto done;
 	}
 
-	err = gk20a_init_mm_support(g);
+	err = nvgpu_init_mm_support(g);
 	if (err) {
 		nvgpu_err(g, "failed to init gk20a mm");
 		goto done;
@@ -314,7 +315,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 
 	gk20a_init_ce_support(g);
 
-	gk20a_init_mm_ce_context(g);
+	nvgpu_init_mm_ce_context(g);
 
 	if (g->ops.xve.available_speeds) {
 		u32 speed;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 92bcb618..9c09e85f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -47,6 +47,7 @@ struct nvgpu_warpstate;
 #include <nvgpu/thread.h>
 #include <nvgpu/io.h>
 
+#include <nvgpu/mm.h>
 #include <nvgpu/as.h>
 #include <nvgpu/log.h>
 #include <nvgpu/pramin.h>
@@ -756,6 +757,8 @@ struct gpu_ops {
 		u64 (*gpu_phys_addr)(struct gk20a *g,
 				     struct nvgpu_gmmu_attrs *attrs, u64 phys);
 		size_t (*get_vidmem_size)(struct gk20a *g);
+		int (*alloc_inst_block)(struct gk20a *g,
+					struct nvgpu_mem *inst_block);
 		void (*init_inst_block)(struct nvgpu_mem *inst_block,
 				struct vm_gk20a *vm, u32 big_page_size);
 		bool (*mmu_fault_pending)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d6732453..6d370250 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -37,6 +37,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "kind_gk20a.h"
@@ -731,7 +732,7 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
 
 static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 {
-	u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block)
+	u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block)
 			>> ram_in_base_shift_v());
 	u32 aperture = nvgpu_aperture_mask(g, inst_block,
 			gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
@@ -744,7 +745,7 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
 					struct channel_gk20a *c)
 {
-	u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
+	u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block)
 				     >> ram_in_base_shift_v());
 	u32 data = fecs_current_ctx_data(g, &c->inst_block);
 	u32 ret;
@@ -1980,7 +1981,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
 	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
 	int err;
 
-	err = gk20a_alloc_inst_block(g, &ucode_info->inst_blk_desc);
+	err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
 	if (err)
 		return err;
 
@@ -2154,7 +2155,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
 
 	gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
 
-	inst_ptr = gk20a_mm_inst_block_addr(g, &ucode_info->inst_blk_desc);
+	inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
 	gk20a_writel(g, gr_fecs_new_ctx_r(),
 			gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
 			nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
@@ -5455,7 +5456,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
 		if (!gk20a_channel_get(ch))
 			continue;
 
-		if ((u32)(gk20a_mm_inst_block_addr(g, &ch->inst_block) >>
+		if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >>
 					ram_in_base_shift_v()) ==
 				gr_fecs_current_ctx_ptr_v(curr_ctx)) {
 			tsgid = ch->tsgid;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d96fa4e1..a17d6bb6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
 /*
- * GK20A memory management
- *
  * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
 
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/mm.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/vm_area.h>
 #include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
  *
  */
 
-static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
-
-static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
-{
-	gk20a_dbg_fn("");
-	if (g->ops.fb.reset)
-		g->ops.fb.reset(g);
-
-	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
-		g->ops.clock_gating.slcg_fb_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
-		g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
-		g->ops.clock_gating.blcg_fb_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
-		g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
-				g->blcg_enabled);
-
-	if (g->ops.fb.init_fs_state)
-		g->ops.fb.init_fs_state(g);
-
-	return 0;
-}
-
-static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-
-	if (mm->vidmem.ce_ctx_id != (u32)~0)
-		gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
-
-	mm->vidmem.ce_ctx_id = (u32)~0;
-
-	nvgpu_vm_put(mm->ce.vm);
-}
-
-static void gk20a_remove_mm_support(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-
-	if (g->ops.mm.fault_info_mem_destroy)
-		g->ops.mm.fault_info_mem_destroy(g);
-
-	if (g->ops.mm.remove_bar2_vm)
-		g->ops.mm.remove_bar2_vm(g);
-
-	if (g->ops.mm.is_bar1_supported(g)) {
-		gk20a_free_inst_block(g, &mm->bar1.inst_block);
-		nvgpu_vm_put(mm->bar1.vm);
-	}
-
-	gk20a_free_inst_block(g, &mm->pmu.inst_block);
-	gk20a_free_inst_block(g, &mm->hwpm.inst_block);
-	nvgpu_vm_put(mm->pmu.vm);
-	nvgpu_vm_put(mm->cde.vm);
-
-	nvgpu_semaphore_sea_destroy(g);
-	nvgpu_vidmem_destroy(g);
-	nvgpu_pd_cache_fini(g);
-}
-
-static int gk20a_alloc_sysmem_flush(struct gk20a *g)
-{
-	return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
-}
-
-int gk20a_init_mm_setup_sw(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	int err;
-
-	gk20a_dbg_fn("");
-
-	if (mm->sw_ready) {
-		gk20a_dbg_fn("skip init");
-		return 0;
-	}
-
-	mm->g = g;
-	nvgpu_mutex_init(&mm->l2_op_lock);
-
-	/*TBD: make channel vm size configurable */
-	mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
-		NV_MM_DEFAULT_KERNEL_SIZE;
-	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
-
-	gk20a_dbg_info("channel vm size: user %dMB  kernel %dMB",
-		       (int)(mm->channel.user_size >> 20),
-		       (int)(mm->channel.kernel_size >> 20));
-
-	nvgpu_init_pramin(mm);
-
-	mm->vidmem.ce_ctx_id = (u32)~0;
-
-	err = nvgpu_vidmem_init(mm);
-	if (err)
-		return err;
-
-	/*
-	 * this requires fixed allocations in vidmem which must be
-	 * allocated before all other buffers
-	 */
-	if (g->ops.pmu.alloc_blob_space
-			&& !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
-		err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
-		if (err)
-			return err;
-	}
-
-	err = gk20a_alloc_sysmem_flush(g);
-	if (err)
-		return err;
-
-	if (g->ops.mm.is_bar1_supported(g)) {
-		err = gk20a_init_bar1_vm(mm);
-		if (err)
-			return err;
-	}
-	if (g->ops.mm.init_bar2_vm) {
-		err = g->ops.mm.init_bar2_vm(g);
-		if (err)
-			return err;
-	}
-	err = gk20a_init_system_vm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_hwpm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_cde_vm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_ce_vm(mm);
-	if (err)
-		return err;
-
-	mm->remove_support = gk20a_remove_mm_support;
-	mm->remove_ce_support = gk20a_remove_mm_ce_support;
-
-	mm->sw_ready = true;
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
 /* make sure gk20a_init_mm_support is called before */
 int gk20a_init_mm_setup_hw(struct gk20a *g)
 {
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 	return 0;
 }
 
-int gk20a_init_mm_support(struct gk20a *g)
-{
-	u32 err;
-
-	err = gk20a_init_mm_reset_enable_hw(g);
-	if (err)
-		return err;
-
-	err = gk20a_init_mm_setup_sw(g);
-	if (err)
-		return err;
-
-	if (g->ops.mm.init_mm_setup_hw)
-		err = g->ops.mm.init_mm_setup_hw(g);
-
-	return err;
-}
-
-void gk20a_init_mm_ce_context(struct gk20a *g)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
-		g->mm.vidmem.ce_ctx_id =
-			gk20a_ce_create_context_with_cb(g,
-				gk20a_fifo_get_fast_ce_runlist_id(g),
-				-1,
-				-1,
-				-1,
-				NULL);
-
-		if (g->mm.vidmem.ce_ctx_id == (u32)~0)
-			nvgpu_err(g,
-				"Failed to allocate CE context for vidmem page clearing support");
-	}
-#endif
-}
-
 int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
 {
 	return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
 	{.update_entry = NULL}
 };
 
-/*
- * Attempt to find a reserved memory area to determine PTE size for the passed
- * mapping. If no reserved area can be found use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
-					      u64 base, u64 size)
-{
-	struct nvgpu_vm_area *vm_area;
-
-	vm_area = nvgpu_vm_area_find(vm, base);
-	if (!vm_area)
-		return gmmu_page_size_small;
-
-	return vm_area->pgsz_idx;
-}
-
-/*
- * This is for when the address space does not support unified address spaces.
- */
-static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
-					       u64 base, u64 size)
-{
-	if (!base) {
-		if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-			return gmmu_page_size_big;
-		return gmmu_page_size_small;
-	} else {
-		if (base < __nv_gmmu_va_small_page_limit())
-			return gmmu_page_size_small;
-		else
-			return gmmu_page_size_big;
-	}
-}
-
-/*
- * This determines the PTE size for a given alloc. Used by both the GVA space
- * allocator and the mm core code so that agreement can be reached on how to
- * map allocations.
- *
- * The page size of a buffer is this:
- *
- *   o  If the VM doesn't support large pages then obviously small pages
- *      must be used.
- *   o  If the base address is non-zero (fixed address map):
- *      - Attempt to find a reserved memory area and use the page size
- *        based on that.
- *      - If no reserved page size is available, default to small pages.
- *   o  If the base is zero:
- *      - If the size is larger than or equal to the big page size, use big
- *        pages.
- *      - Otherwise use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-
-	if (!vm->big_pages)
-		return gmmu_page_size_small;
-
-	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
-		return __get_pte_size_split_addr(vm, base, size);
-
-	if (base)
-		return __get_pte_size_fixed_map(vm, base, size);
-
-	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-		return gmmu_page_size_big;
-	return gmmu_page_size_small;
-}
-
 int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 {
 	int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
 	return __gk20a_vm_bind_channel(as_share->vm, ch);
 }
 
-int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	int err;
-
-	gk20a_dbg_fn("");
-
-	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
-	if (err) {
-		nvgpu_err(g, "%s: memory allocation failed", __func__);
-		return err;
-	}
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
-void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	if (inst_block->size)
-		nvgpu_dma_free(g, inst_block);
-}
-
-u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	if (g->mm.has_physical_mode)
-		return nvgpu_mem_get_phys_addr(g, inst_block);
-	else
-		return nvgpu_mem_get_addr(g, inst_block);
-}
-
-static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
-	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-	mm->bar1.vm = nvgpu_vm_init(g,
-				    big_page_size,
-				    SZ_4K,
-				    mm->bar1.aperture_size - SZ_4K,
-				    mm->bar1.aperture_size,
-				    true, false,
-				    "bar1");
-	if (!mm->bar1.vm)
-		return -ENOMEM;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		goto clean_up_vm;
-	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
-
-	return 0;
-
-clean_up_vm:
-	nvgpu_vm_put(mm->bar1.vm);
-	return err;
-}
-
-/* pmu vm, share channel_vm interfaces */
-static int gk20a_init_system_vm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-	u32 low_hole, aperture_size;
-
-	/*
-	 * No user region - so we will pass that as zero sized.
-	 */
-	low_hole = SZ_4K * 16;
-	aperture_size = GK20A_PMU_VA_SIZE * 2;
-
-	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
-	gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
-
-	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
-				   low_hole,
-				   aperture_size - low_hole,
-				   aperture_size,
-				   true,
-				   false,
-				   "system");
-	if (!mm->pmu.vm)
-		return -ENOMEM;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		goto clean_up_vm;
-	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
-
-	return 0;
-
-clean_up_vm:
-	nvgpu_vm_put(mm->pmu.vm);
-	return err;
-}
-
-static int gk20a_init_hwpm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		return err;
-	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
-
-	return 0;
-}
-
-static int gk20a_init_cde_vm(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
-				   big_page_size << 10,
-				   NV_MM_DEFAULT_KERNEL_SIZE,
-				   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-				   false, false, "cde");
-	if (!mm->cde.vm)
-		return -ENOMEM;
-	return 0;
-}
-
-static int gk20a_init_ce_vm(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
-				  big_page_size << 10,
-				  NV_MM_DEFAULT_KERNEL_SIZE,
-				  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-				  false, false, "ce");
-	if (!mm->ce.vm)
-		return -ENOMEM;
-	return 0;
-}
-
 void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm)
 {
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 	struct gk20a *g = gk20a_from_vm(vm);
 
 	gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
-		gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
+		nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
 
 	g->ops.mm.init_pdb(g, inst_block, vm);
 
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 		g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
 }
 
+int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
+	if (err) {
+		nvgpu_err(g, "%s: memory allocation failed", __func__);
+		return err;
+	}
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
 int gk20a_mm_fb_flush(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
 	gk20a_idle_nosuspend(g);
 }
 
-int gk20a_mm_suspend(struct gk20a *g)
-{
-	gk20a_dbg_fn("");
-
-	nvgpu_vidmem_thread_pause_sync(&g->mm);
-
-	g->ops.mm.cbc_clean(g);
-	g->ops.mm.l2_flush(g, false);
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 {
 	return 34;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 15876b10..434fc422 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -35,11 +35,6 @@
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/kref.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/cond.h>
-#include <nvgpu/thread.h>
-
-struct nvgpu_pd_cache;
 
 #ifdef CONFIG_ARM64
 #define outer_flush_range(a, b)
@@ -138,218 +133,23 @@ struct priv_cmd_entry {
 struct gk20a;
 struct channel_gk20a;
 
-int gk20a_init_mm_support(struct gk20a *g);
-int gk20a_init_mm_setup_sw(struct gk20a *g);
-int gk20a_init_mm_setup_hw(struct gk20a *g);
-void gk20a_init_mm_ce_context(struct gk20a *g);
-
 int gk20a_mm_fb_flush(struct gk20a *g);
 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
 void gk20a_mm_cbc_clean(struct gk20a *g);
 void gk20a_mm_l2_invalidate(struct gk20a *g);
 
-#define FAULT_TYPE_NUM		2	/* replay and nonreplay faults */
-
-struct mmu_fault_info {
-	u64	inst_ptr;
-	u32	inst_aperture;
-	u64	fault_addr;
-	u32	fault_addr_aperture;
-	u32	timestamp_lo;
-	u32	timestamp_hi;
-	u32	mmu_engine_id;
-	u32	gpc_id;
-	u32	client_type;
-	u32	client_id;
-	u32	fault_type;
-	u32	access_type;
-	u32	protected_mode;
-	u32	replayable_fault;
-	u32	replay_fault_en;
-	u32	valid;
-	u32	faulted_pbdma;
-	u32	faulted_engine;
-	u32	faulted_subid;
-	u32	chid;
-	struct channel_gk20a *refch;
-	const char *client_type_desc;
-	const char *fault_type_desc;
-	const char *client_id_desc;
-};
-
-struct mm_gk20a {
-	struct gk20a *g;
-
-	/* GPU VA default sizes address spaces for channels */
-	struct {
-		u64 user_size;   /* userspace-visible GPU VA region */
-		u64 kernel_size; /* kernel-only GPU VA region */
-	} channel;
-
-	struct {
-		u32 aperture_size;
-		struct vm_gk20a *vm;
-		struct nvgpu_mem inst_block;
-	} bar1;
-
-	struct {
-		u32 aperture_size;
-		struct vm_gk20a *vm;
-		struct nvgpu_mem inst_block;
-	} bar2;
-
-	struct {
-		u32 aperture_size;
-		struct vm_gk20a *vm;
-		struct nvgpu_mem inst_block;
-	} pmu;
-
-	struct {
-		/* using pmu vm currently */
-		struct nvgpu_mem inst_block;
-	} hwpm;
-
-	struct {
-		struct vm_gk20a *vm;
-		struct nvgpu_mem inst_block;
-	} perfbuf;
-
-	struct {
-		struct vm_gk20a *vm;
-	} cde;
-
-	struct {
-		struct vm_gk20a *vm;
-	} ce;
-
-	struct nvgpu_pd_cache *pd_cache;
-
-	struct nvgpu_mutex l2_op_lock;
-	struct nvgpu_mutex tlb_lock;
-	struct nvgpu_mutex priv_lock;
-
-	struct nvgpu_mem bar2_desc;
-
-#ifdef CONFIG_TEGRA_19x_GPU
-	struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM];
-	unsigned int hw_fault_buf_status[FAULT_TYPE_NUM];
-	struct mmu_fault_info *fault_info[FAULT_TYPE_NUM];
-	struct nvgpu_mutex hub_isr_mutex;
-	u32    hub_intr_types;
-#endif
-	/*
-	 * Separate function to cleanup the CE since it requires a channel to
-	 * be closed which must happen before fifo cleanup.
-	 */
-	void (*remove_ce_support)(struct mm_gk20a *mm);
-	void (*remove_support)(struct mm_gk20a *mm);
-	bool sw_ready;
-	int physical_bits;
-	bool use_full_comp_tag_line;
-	bool ltc_enabled_current;
-	bool ltc_enabled_target;
-	bool bypass_smmu;
-	bool disable_bigpage;
-	bool has_physical_mode;
-
-	struct nvgpu_mem sysmem_flush;
-
-	u32 pramin_window;
-	struct nvgpu_spinlock pramin_window_lock;
-	bool force_pramin; /* via debugfs */
-
-	struct {
-		size_t size;
-		u64 base;
-		size_t bootstrap_size;
-		u64 bootstrap_base;
-
-		struct nvgpu_allocator allocator;
-		struct nvgpu_allocator bootstrap_allocator;
-
-		u32 ce_ctx_id;
-		volatile bool cleared;
-		struct nvgpu_mutex first_clear_mutex;
-
-		struct nvgpu_list_node clear_list_head;
-		struct nvgpu_mutex clear_list_mutex;
-
-		struct nvgpu_cond clearing_thread_cond;
-		struct nvgpu_thread clearing_thread;
-		struct nvgpu_mutex clearing_thread_lock;
-		nvgpu_atomic_t pause_count;
-
-		nvgpu_atomic64_t bytes_pending;
-	} vidmem;
-};
-
-int gk20a_mm_init(struct mm_gk20a *mm);
-
-#define gk20a_from_mm(mm) ((mm)->g)
-#define gk20a_from_vm(vm) ((vm)->mm->g)
-
 #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
 
-#define DEFAULT_ALLOC_ALIGNMENT (4*1024)
-
-static inline int bar1_aperture_size_mb_gk20a(void)
-{
-	return 16; /* 16MB is more than enough atm. */
-}
-
-/* The maximum GPU VA range supported */
-#define NV_GMMU_VA_RANGE          38
-
-/* The default userspace-visible GPU VA size */
-#define NV_MM_DEFAULT_USER_SIZE   (1ULL << 37)
-
-/* The default kernel-reserved GPU VA size */
-#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
-
-/*
- * When not using unified address spaces, the bottom 56GB of the space are used
- * for small pages, and the remaining high memory is used for large pages.
- */
-static inline u64 __nv_gmmu_va_small_page_limit(void)
-{
-	return ((u64)SZ_1G * 56);
-}
-
-enum nvgpu_flush_op {
-	NVGPU_FLUSH_DEFAULT,
-	NVGPU_FLUSH_FB,
-	NVGPU_FLUSH_L2_INV,
-	NVGPU_FLUSH_L2_FLUSH,
-	NVGPU_FLUSH_CBC_CLEAN,
-};
+void gk20a_mm_ltc_isr(struct gk20a *g);
 
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
-					      u64 base, u64 size);
-enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
+bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
 
-#if 0 /*related to addr bits above, concern below TBD on which is accurate */
-#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
-					   bus_bar1_block_ptr_s())
-#else
-#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
-#endif
+int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
 
 int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
-void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
 void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 		u32 big_page_size);
-u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem);
-
-void gk20a_mm_dump_vm(struct vm_gk20a *vm,
-		u64 va_begin, u64 va_end, char *label);
-
-int gk20a_mm_suspend(struct gk20a *g);
-
-void gk20a_mm_ltc_isr(struct gk20a *g);
-
-bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
-
-int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
+int gk20a_init_mm_setup_hw(struct gk20a *g);
 
 u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 			  u64 map_offset,
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index e4dd6a59..2b954e1a 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -30,6 +30,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/firmware.h>
 #include <nvgpu/falcon.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "gr_gk20a.h"
@@ -181,7 +182,7 @@ int pmu_bootstrap(struct nvgpu_pmu *pmu)
 		pwr_falcon_itfen_ctxen_enable_f());
 	gk20a_writel(g, pwr_pmu_new_instblk_r(),
 		pwr_pmu_new_instblk_ptr_f(
-			gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+			nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 		pwr_pmu_new_instblk_valid_f(1) |
 		pwr_pmu_new_instblk_target_sys_coh_f());
 
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 7029b477..557948e1 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -21,7 +21,6 @@
  */
 
 #include <nvgpu/types.h>
-
 #include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/timers.h>
@@ -33,6 +32,7 @@
 #include <nvgpu/pmu.h>
 #include <nvgpu/falcon.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
@@ -1170,7 +1170,7 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
 			pwr_falcon_itfen_ctxen_enable_f());
 	gk20a_writel(g, pwr_pmu_new_instblk_r(),
 			pwr_pmu_new_instblk_ptr_f(
-				gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+				nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 			pwr_pmu_new_instblk_valid_f(1) |
 			pwr_pmu_new_instblk_target_sys_coh_f());
 
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
index b8d42f7a..34c8d4b7 100644
--- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -24,6 +24,7 @@
 
 #include <nvgpu/timers.h>
 #include <nvgpu/bus.h>
+#include <nvgpu/mm.h>
 
 #include "bus_gm20b.h"
 #include "gk20a/gk20a.h"
@@ -35,8 +36,8 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
 {
 	struct nvgpu_timeout timeout;
 	int err = 0;
-	u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
-	u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
+	u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
+	u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
 
 	gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
 
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index f4ddd92f..0762e8bd 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -31,6 +31,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/atomic.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
 
 #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -42,7 +43,7 @@ void channel_gm20b_bind(struct channel_gk20a *c)
 {
 	struct gk20a *g = c->g;
 
-	u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block)
+	u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block)
 		>> ram_in_base_shift_v();
 
 	gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 269fd7f1..d081fb24 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -442,6 +442,7 @@ static const struct gpu_ops gm20b_ops = {
 		.init_pdb = gk20a_mm_init_pdb,
 		.init_mm_setup_hw = gk20a_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
+		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gk20a_init_inst_block,
 		.mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
 		.get_kind_invalid = gm20b_get_kind_invalid,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 1246ee7f..59f72e13 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -524,6 +524,7 @@ static const struct gpu_ops gp106_ops = {
 		.init_pdb = gp10b_mm_init_pdb,
 		.init_mm_setup_hw = gp10b_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
+		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gk20a_init_inst_block,
 		.mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
 		.init_bar2_vm = gb10b_init_bar2_vm,
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 9f0fe375..26ded39e 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -22,6 +22,7 @@
 
 #include <nvgpu/pmu.h>
 #include <nvgpu/falcon.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a/gk20a.h"
 #include "sec2_gp106.h"
@@ -88,7 +89,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu,
 
 	gk20a_writel(g, psec_falcon_nxtctx_r(),
 			pwr_pmu_new_instblk_ptr_f(
-			gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+			nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 			pwr_pmu_new_instblk_valid_f(1) |
 			nvgpu_aperture_mask(g, &mm->pmu.inst_block,
 				pwr_pmu_new_instblk_target_sys_coh_f(),
@@ -154,7 +155,7 @@ void init_pmu_setup_hw1(struct gk20a *g)
 				pwr_falcon_itfen_ctxen_enable_f());
 	gk20a_writel(g, pwr_pmu_new_instblk_r(),
 				pwr_pmu_new_instblk_ptr_f(
-					gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+					nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 				pwr_pmu_new_instblk_valid_f(1) |
 				nvgpu_aperture_mask(g, &mm->pmu.inst_block,
 					pwr_pmu_new_instblk_target_sys_coh_f(),
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index b80722b8..a10df740 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -484,6 +484,7 @@ static const struct gpu_ops gp10b_ops = {
 		.init_pdb = gp10b_mm_init_pdb,
 		.init_mm_setup_hw = gp10b_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
+		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gk20a_init_inst_block,
 		.mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
 		.init_bar2_vm = gb10b_init_bar2_vm,
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 06a9b929..dc746153 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -22,6 +22,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include <nvgpu/mm.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
 
@@ -95,7 +96,7 @@ int gb10b_init_bar2_vm(struct gk20a *g)
 		return -ENOMEM;
 
 	/* allocate instance mem for bar2 */
-	err = gk20a_alloc_inst_block(g, inst_block);
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
 	if (err)
 		goto clean_up_va;
 
@@ -112,7 +113,7 @@ int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
-	u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
+	u64 inst_pa = nvgpu_inst_block_addr(g, inst_block);
 
 	gk20a_dbg_fn("");
 
@@ -374,6 +375,6 @@ void gp10b_remove_bar2_vm(struct gk20a *g)
 	struct mm_gk20a *mm = &g->mm;
 
 	gp10b_replayable_pagefault_buffer_deinit(g);
-	gk20a_free_inst_block(g, &mm->bar2.inst_block);
+	nvgpu_free_inst_block(g, &mm->bar2.inst_block);
 	nvgpu_vm_put(mm->bar2.vm);
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h
new file mode 100644
index 00000000..13b33d9f
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h
@@ -0,0 +1,220 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVGPU_MM_H__
+#define __NVGPU_MM_H__
+
+#include <nvgpu/types.h>
+#include <nvgpu/cond.h>
+#include <nvgpu/thread.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/allocator.h>
+#include <nvgpu/list.h>
+
+struct gk20a;
+struct vm_gk20a;
+struct nvgpu_mem;
+struct nvgpu_pd_cache;
+
+#define FAULT_TYPE_NUM		2	/* replay and nonreplay faults */
+
+struct mmu_fault_info {
+	u64	inst_ptr;
+	u32	inst_aperture;
+	u64	fault_addr;
+	u32	fault_addr_aperture;
+	u32	timestamp_lo;
+	u32	timestamp_hi;
+	u32	mmu_engine_id;
+	u32	gpc_id;
+	u32	client_type;
+	u32	client_id;
+	u32	fault_type;
+	u32	access_type;
+	u32	protected_mode;
+	u32	replayable_fault;
+	u32	replay_fault_en;
+	u32	valid;
+	u32	faulted_pbdma;
+	u32	faulted_engine;
+	u32	faulted_subid;
+	u32	chid;
+	struct channel_gk20a *refch;
+	const char *client_type_desc;
+	const char *fault_type_desc;
+	const char *client_id_desc;
+};
+
+enum nvgpu_flush_op {
+	NVGPU_FLUSH_DEFAULT,
+	NVGPU_FLUSH_FB,
+	NVGPU_FLUSH_L2_INV,
+	NVGPU_FLUSH_L2_FLUSH,
+	NVGPU_FLUSH_CBC_CLEAN,
+};
+
+struct mm_gk20a {
+	struct gk20a *g;
+
+	/* GPU VA default sizes address spaces for channels */
+	struct {
+		u64 user_size;   /* userspace-visible GPU VA region */
+		u64 kernel_size; /* kernel-only GPU VA region */
+	} channel;
+
+	struct {
+		u32 aperture_size;
+		struct vm_gk20a *vm;
+		struct nvgpu_mem inst_block;
+	} bar1;
+
+	struct {
+		u32 aperture_size;
+		struct vm_gk20a *vm;
+		struct nvgpu_mem inst_block;
+	} bar2;
+
+	struct {
+		u32 aperture_size;
+		struct vm_gk20a *vm;
+		struct nvgpu_mem inst_block;
+	} pmu;
+
+	struct {
+		/* using pmu vm currently */
+		struct nvgpu_mem inst_block;
+	} hwpm;
+
+	struct {
+		struct vm_gk20a *vm;
+		struct nvgpu_mem inst_block;
+	} perfbuf;
+
+	struct {
+		struct vm_gk20a *vm;
+	} cde;
+
+	struct {
+		struct vm_gk20a *vm;
+	} ce;
+
+	struct nvgpu_pd_cache *pd_cache;
+
+	struct nvgpu_mutex l2_op_lock;
+	struct nvgpu_mutex tlb_lock;
+	struct nvgpu_mutex priv_lock;
+
+	struct nvgpu_mem bar2_desc;
+
+#ifdef CONFIG_TEGRA_19x_GPU
+	struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM];
+	unsigned int hw_fault_buf_status[FAULT_TYPE_NUM];
+	struct mmu_fault_info *fault_info[FAULT_TYPE_NUM];
+	struct nvgpu_mutex hub_isr_mutex;
+	u32    hub_intr_types;
+#endif
+	/*
+	 * Separate function to cleanup the CE since it requires a channel to
+	 * be closed which must happen before fifo cleanup.
+	 */
+	void (*remove_ce_support)(struct mm_gk20a *mm);
+	void (*remove_support)(struct mm_gk20a *mm);
+	bool sw_ready;
+	int physical_bits;
+	bool use_full_comp_tag_line;
+	bool ltc_enabled_current;
+	bool ltc_enabled_target;
+	bool bypass_smmu;
+	bool disable_bigpage;
+	bool has_physical_mode;
+
+	struct nvgpu_mem sysmem_flush;
+
+	u32 pramin_window;
+	struct nvgpu_spinlock pramin_window_lock;
+	bool force_pramin; /* via debugfs */
+
+	struct {
+		size_t size;
+		u64 base;
+		size_t bootstrap_size;
+		u64 bootstrap_base;
+
+		struct nvgpu_allocator allocator;
+		struct nvgpu_allocator bootstrap_allocator;
+
+		u32 ce_ctx_id;
+		volatile bool cleared;
+		struct nvgpu_mutex first_clear_mutex;
+
+		struct nvgpu_list_node clear_list_head;
+		struct nvgpu_mutex clear_list_mutex;
+
+		struct nvgpu_cond clearing_thread_cond;
+		struct nvgpu_thread clearing_thread;
+		struct nvgpu_mutex clearing_thread_lock;
+		nvgpu_atomic_t pause_count;
+
+		nvgpu_atomic64_t bytes_pending;
+	} vidmem;
+};
+
+#define gk20a_from_mm(mm) ((mm)->g)
+#define gk20a_from_vm(vm) ((vm)->mm->g)
+
+static inline int bar1_aperture_size_mb_gk20a(void)
+{
+	return 16; /* 16MB is more than enough atm. */
+}
+
+/* The maximum GPU VA range supported */
+#define NV_GMMU_VA_RANGE          38
+
+/* The default userspace-visible GPU VA size */
+#define NV_MM_DEFAULT_USER_SIZE   (1ULL << 37)
+
+/* The default kernel-reserved GPU VA size */
+#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
+
+/*
+ * When not using unified address spaces, the bottom 56GB of the space are used
+ * for small pages, and the remaining high memory is used for large pages.
+ */
+static inline u64 __nv_gmmu_va_small_page_limit(void)
+{
+	return ((u64)SZ_1G * 56);
+}
+
+enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+					      u64 base, u64 size);
+enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
+
+void nvgpu_init_mm_ce_context(struct gk20a *g);
+int nvgpu_init_mm_support(struct gk20a *g);
+int nvgpu_init_mm_setup_hw(struct gk20a *g);
+
+u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem);
+void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
+
+int nvgpu_mm_suspend(struct gk20a *g);
+
+#endif
-- 
cgit v1.2.2