12 files changed, 57 insertions, 651 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index 938c4b00..9b031bbf 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -24,6 +24,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/bus.h>
+#include <nvgpu/mm.h>
 #include "gk20a.h"
 #include "bus_gk20a.h"
@@ -137,8 +138,8 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value)
 int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
 {
-        u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
+        u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
-        u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
+        u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
        gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 725ae278..e3896981 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -29,6 +29,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/lock.h>
 #include <nvgpu/dma.h>
+#include <nvgpu/mm.h>
 #include "gk20a.h"
 #include "css_gr_gk20a.h"
@@ -183,7 +184,7 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch,
        gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
        /* this field is aligned to 4K */
-        inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
+        inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
        /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
         * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 8c39ecb7..802ccd76 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/atomic.h>
+#include <nvgpu/mm.h>
 #include "gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -305,7 +306,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
                return err;
        }
-        err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
+        err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block);
        if (err)
                return err;
@@ -322,8 +323,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
        gk20a_writel(g, perf_pmasys_outsize_r(), size);
        /* this field is aligned to 4K */
-        inst_pa_page = gk20a_mm_inst_block_addr(g,
+        inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
-                                                &mm->perfbuf.inst_block) >> 12;
        /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
         * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 7fd1793c..12d7dcb9 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -32,6 +32,7 @@
 #include <nvgpu/circ_buf.h>
 #include <nvgpu/thread.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
@@ -93,7 +94,7 @@ static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
 static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
 {
-        return (u32) (gk20a_mm_inst_block_addr(g, &ch->inst_block) >> 12LL);
+        return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
 }
 static inline int gk20a_fecs_trace_num_ts(void)
@@ -633,12 +634,12 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
                        "chid=%d context_ptr=%x inst_block=%llx",
                        ch->chid, context_ptr,
-                        gk20a_mm_inst_block_addr(g, &ch->inst_block));
+                        nvgpu_inst_block_addr(g, &ch->inst_block));
        if (!trace)
                return -ENOMEM;
-        pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf);
+        pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
        if (!pa)
                return -ENOMEM;
        aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 03ca6984..fc71c358 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -24,6 +24,7 @@
 #include <trace/events/gk20a.h>
+#include <nvgpu/mm.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/semaphore.h>
@@ -1058,7 +1059,7 @@ gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
                if (!ch)
                        continue;
-                ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block);
+                ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
                if (inst_ptr == ch_inst_ptr)
                        return ch;
@@ -1659,10 +1660,10 @@ static bool gk20a_fifo_handle_mmu_fault(
                                                ch->chid);
                        }
                } else if (mmfault_info.inst_ptr ==
-                                gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) {
+                                nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
                        nvgpu_err(g, "mmu fault from bar1");
                } else if (mmfault_info.inst_ptr ==
-                                gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) {
+                                nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) {
                        nvgpu_err(g, "mmu fault from pmu");
                } else
                        nvgpu_err(g, "couldn't locate channel for mmu fault");
@@ -3973,12 +3974,12 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
        gk20a_dbg_fn("");
-        err = gk20a_alloc_inst_block(g, &ch->inst_block);
+        err = g->ops.mm.alloc_inst_block(g, &ch->inst_block);
        if (err)
                return err;
        gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
-                ch->chid, gk20a_mm_inst_block_addr(g, &ch->inst_block));
+                ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));
        gk20a_dbg_fn("done");
        return 0;
@@ -3986,7 +3987,7 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
-        gk20a_free_inst_block(g, &ch->inst_block);
+        nvgpu_free_inst_block(g, &ch->inst_block);
 }
 u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 2bc7d9a8..ea5d55a4 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -36,6 +36,7 @@
 #include <nvgpu/kref.h>
 struct gk20a_debug_output;
+struct mmu_fault_info;
 #define MAX_RUNLIST_BUFFERS             2
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 2d09c0bb..e3c2397c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -34,6 +34,7 @@
 #include <nvgpu/gmmu.h>
 #include <nvgpu/ltc.h>
 #include <nvgpu/vidmem.h>
+#include <nvgpu/mm.h>
 #include <trace/events/gk20a.h>
@@ -107,7 +108,7 @@ int gk20a_prepare_poweroff(struct gk20a *g)
                ret |= nvgpu_pmu_destroy(g);
        ret |= gk20a_gr_suspend(g);
-        ret |= gk20a_mm_suspend(g);
+        ret |= nvgpu_mm_suspend(g);
        ret |= gk20a_fifo_suspend(g);
        gk20a_ce_suspend(g);
@@ -213,7 +214,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
                goto done;
        }
-        err = gk20a_init_mm_support(g);
+        err = nvgpu_init_mm_support(g);
        if (err) {
                nvgpu_err(g, "failed to init gk20a mm");
                goto done;
@@ -314,7 +315,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
        gk20a_init_ce_support(g);
-        gk20a_init_mm_ce_context(g);
+        nvgpu_init_mm_ce_context(g);
        if (g->ops.xve.available_speeds) {
                u32 speed;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 92bcb618..9c09e85f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -47,6 +47,7 @@ struct nvgpu_warpstate;
 #include <nvgpu/thread.h>
 #include <nvgpu/io.h>
+#include <nvgpu/mm.h>
 #include <nvgpu/as.h>
 #include <nvgpu/log.h>
 #include <nvgpu/pramin.h>
@@ -756,6 +757,8 @@ struct gpu_ops {
                u64 (*gpu_phys_addr)(struct gk20a *g,
                                     struct nvgpu_gmmu_attrs *attrs, u64 phys);
                size_t (*get_vidmem_size)(struct gk20a *g);
+                int (*alloc_inst_block)(struct gk20a *g,
+                                        struct nvgpu_mem *inst_block);
                void (*init_inst_block)(struct nvgpu_mem *inst_block,
                                struct vm_gk20a *vm, u32 big_page_size);
                bool (*mmu_fault_pending)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d6732453..6d370250 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -37,6 +37,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
 #include "gk20a.h"
 #include "kind_gk20a.h"
@@ -731,7 +732,7 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
 static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 {
-        u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block)
+        u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block)
                        >> ram_in_base_shift_v());
        u32 aperture = nvgpu_aperture_mask(g, inst_block,
                        gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
@@ -744,7 +745,7 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
                                        struct channel_gk20a *c)
 {
-        u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
+        u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block)
                                     >> ram_in_base_shift_v());
        u32 data = fecs_current_ctx_data(g, &c->inst_block);
        u32 ret;
@@ -1980,7 +1981,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
        struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
        int err;
-        err = gk20a_alloc_inst_block(g, &ucode_info->inst_blk_desc);
+        err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
        if (err)
                return err;
@@ -2154,7 +2155,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
        gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
-        inst_ptr = gk20a_mm_inst_block_addr(g, &ucode_info->inst_blk_desc);
+        inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
        gk20a_writel(g, gr_fecs_new_ctx_r(),
                        gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
                        nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
@@ -5455,7 +5456,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
                if (!gk20a_channel_get(ch))
                        continue;
-                if ((u32)(gk20a_mm_inst_block_addr(g, &ch->inst_block) >>
+                if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >>
                                        ram_in_base_shift_v()) ==
                                gr_fecs_current_ctx_ptr_v(curr_ctx)) {
                        tsgid = ch->tsgid;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d96fa4e1..a17d6bb6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
 /*
- * GK20A memory management
- *
 * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
 #include <trace/events/gk20a.h>
+#include <nvgpu/mm.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/vm_area.h>
 #include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
 *
 */
-static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
-static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
-{
-        gk20a_dbg_fn("");
-        if (g->ops.fb.reset)
-                g->ops.fb.reset(g);
-        if (g->ops.clock_gating.slcg_fb_load_gating_prod)
-                g->ops.clock_gating.slcg_fb_load_gating_prod(g,
-                                g->slcg_enabled);
-        if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
-                g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
-                                g->slcg_enabled);
-        if (g->ops.clock_gating.blcg_fb_load_gating_prod)
-                g->ops.clock_gating.blcg_fb_load_gating_prod(g,
-                                g->blcg_enabled);
-        if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
-                g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
-                                g->blcg_enabled);
-        if (g->ops.fb.init_fs_state)
-                g->ops.fb.init_fs_state(g);
-        return 0;
-}
-static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
-{
-        struct gk20a *g = gk20a_from_mm(mm);
-        if (mm->vidmem.ce_ctx_id != (u32)~0)
-                gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
-        mm->vidmem.ce_ctx_id = (u32)~0;
-        nvgpu_vm_put(mm->ce.vm);
-}
-static void gk20a_remove_mm_support(struct mm_gk20a *mm)
-{
-        struct gk20a *g = gk20a_from_mm(mm);
-        if (g->ops.mm.fault_info_mem_destroy)
-                g->ops.mm.fault_info_mem_destroy(g);
-        if (g->ops.mm.remove_bar2_vm)
-                g->ops.mm.remove_bar2_vm(g);
-        if (g->ops.mm.is_bar1_supported(g)) {
-                gk20a_free_inst_block(g, &mm->bar1.inst_block);
-                nvgpu_vm_put(mm->bar1.vm);
-        }
-        gk20a_free_inst_block(g, &mm->pmu.inst_block);
-        gk20a_free_inst_block(g, &mm->hwpm.inst_block);
-        nvgpu_vm_put(mm->pmu.vm);
-        nvgpu_vm_put(mm->cde.vm);
-        nvgpu_semaphore_sea_destroy(g);
-        nvgpu_vidmem_destroy(g);
-        nvgpu_pd_cache_fini(g);
-}
-static int gk20a_alloc_sysmem_flush(struct gk20a *g)
-{
-        return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
-}
-int gk20a_init_mm_setup_sw(struct gk20a *g)
-{
-        struct mm_gk20a *mm = &g->mm;
-        int err;
-        gk20a_dbg_fn("");
-        if (mm->sw_ready) {
-                gk20a_dbg_fn("skip init");
-                return 0;
-        }
-        mm->g = g;
-        nvgpu_mutex_init(&mm->l2_op_lock);
-        /*TBD: make channel vm size configurable */
-        mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
-                NV_MM_DEFAULT_KERNEL_SIZE;
-        mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
-        gk20a_dbg_info("channel vm size: user %dMB  kernel %dMB",
-                       (int)(mm->channel.user_size >> 20),
-                       (int)(mm->channel.kernel_size >> 20));
-        nvgpu_init_pramin(mm);
-        mm->vidmem.ce_ctx_id = (u32)~0;
-        err = nvgpu_vidmem_init(mm);
-        if (err)
-                return err;
-        /*
-         * this requires fixed allocations in vidmem which must be
-         * allocated before all other buffers
-         */
-        if (g->ops.pmu.alloc_blob_space
-                        && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
-                err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
-                if (err)
-                        return err;
-        }
-        err = gk20a_alloc_sysmem_flush(g);
-        if (err)
-                return err;
-        if (g->ops.mm.is_bar1_supported(g)) {
-                err = gk20a_init_bar1_vm(mm);
-                if (err)
-                        return err;
-        }
-        if (g->ops.mm.init_bar2_vm) {
-                err = g->ops.mm.init_bar2_vm(g);
-                if (err)
-                        return err;
-        }
-        err = gk20a_init_system_vm(mm);
-        if (err)
-                return err;
-        err = gk20a_init_hwpm(mm);
-        if (err)
-                return err;
-        err = gk20a_init_cde_vm(mm);
-        if (err)
-                return err;
-        err = gk20a_init_ce_vm(mm);
-        if (err)
-                return err;
-        mm->remove_support = gk20a_remove_mm_support;
-        mm->remove_ce_support = gk20a_remove_mm_ce_support;
-        mm->sw_ready = true;
-        gk20a_dbg_fn("done");
-        return 0;
-}
 /* make sure gk20a_init_mm_support is called before */
 int gk20a_init_mm_setup_hw(struct gk20a *g)
 {
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
        return 0;
 }
-int gk20a_init_mm_support(struct gk20a *g)
-{
-        u32 err;
-        err = gk20a_init_mm_reset_enable_hw(g);
-        if (err)
-                return err;
-        err = gk20a_init_mm_setup_sw(g);
-        if (err)
-                return err;
-        if (g->ops.mm.init_mm_setup_hw)
-                err = g->ops.mm.init_mm_setup_hw(g);
-        return err;
-}
-void gk20a_init_mm_ce_context(struct gk20a *g)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-        if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
-                g->mm.vidmem.ce_ctx_id =
-                        gk20a_ce_create_context_with_cb(g,
-                                gk20a_fifo_get_fast_ce_runlist_id(g),
-                                -1,
-                                -1,
-                                -1,
-                                NULL);
-                if (g->mm.vidmem.ce_ctx_id == (u32)~0)
-                        nvgpu_err(g,
-                                "Failed to allocate CE context for vidmem page clearing support");
-        }
-#endif
-}
 int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
 {
        return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
        {.update_entry = NULL}
 };
-/*
- * Attempt to find a reserved memory area to determine PTE size for the passed
- * mapping. If no reserved area can be found use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
-                                              u64 base, u64 size)
-{
-        struct nvgpu_vm_area *vm_area;
-        vm_area = nvgpu_vm_area_find(vm, base);
-        if (!vm_area)
-                return gmmu_page_size_small;
-        return vm_area->pgsz_idx;
-}
-/*
- * This is for when the address space does not support unified address spaces.
- */
-static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
-                                               u64 base, u64 size)
-{
-        if (!base) {
-                if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-                        return gmmu_page_size_big;
-                return gmmu_page_size_small;
-        } else {
-                if (base < __nv_gmmu_va_small_page_limit())
-                        return gmmu_page_size_small;
-                else
-                        return gmmu_page_size_big;
-        }
-}
-/*
- * This determines the PTE size for a given alloc. Used by both the GVA space
- * allocator and the mm core code so that agreement can be reached on how to
- * map allocations.
- *
- * The page size of a buffer is this:
- *
- *   o  If the VM doesn't support large pages then obviously small pages
- *      must be used.
- *   o  If the base address is non-zero (fixed address map):
- *      - Attempt to find a reserved memory area and use the page size
- *        based on that.
- *      - If no reserved page size is available, default to small pages.
- *   o  If the base is zero:
- *      - If the size is larger than or equal to the big page size, use big
- *        pages.
- *      - Otherwise use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        if (!vm->big_pages)
-                return gmmu_page_size_small;
-        if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
-                return __get_pte_size_split_addr(vm, base, size);
-        if (base)
-                return __get_pte_size_fixed_map(vm, base, size);
-        if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-                return gmmu_page_size_big;
-        return gmmu_page_size_small;
-}
 int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 {
        int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
        return __gk20a_vm_bind_channel(as_share->vm, ch);
 }
-int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-        int err;
-        gk20a_dbg_fn("");
-        err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
-        if (err) {
-                nvgpu_err(g, "%s: memory allocation failed", __func__);
-                return err;
-        }
-        gk20a_dbg_fn("done");
-        return 0;
-}
-void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-        if (inst_block->size)
-                nvgpu_dma_free(g, inst_block);
-}
-u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-        if (g->mm.has_physical_mode)
-                return nvgpu_mem_get_phys_addr(g, inst_block);
-        else
-                return nvgpu_mem_get_addr(g, inst_block);
-}
-static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
-{
-        int err;
-        struct gk20a *g = gk20a_from_mm(mm);
-        struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
-        u32 big_page_size = g->ops.mm.get_default_big_page_size();
-        mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
-        gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-        mm->bar1.vm = nvgpu_vm_init(g,
-                                    big_page_size,
-                                    SZ_4K,
-                                    mm->bar1.aperture_size - SZ_4K,
-                                    mm->bar1.aperture_size,
-                                    true, false,
-                                    "bar1");
-        if (!mm->bar1.vm)
-                return -ENOMEM;
-        err = gk20a_alloc_inst_block(g, inst_block);
-        if (err)
-                goto clean_up_vm;
-        g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
-        return 0;
-clean_up_vm:
-        nvgpu_vm_put(mm->bar1.vm);
-        return err;
-}
-/* pmu vm, share channel_vm interfaces */
-static int gk20a_init_system_vm(struct mm_gk20a *mm)
-{
-        int err;
-        struct gk20a *g = gk20a_from_mm(mm);
-        struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
-        u32 big_page_size = g->ops.mm.get_default_big_page_size();
-        u32 low_hole, aperture_size;
-        /*
-         * No user region - so we will pass that as zero sized.
-         */
-        low_hole = SZ_4K * 16;
-        aperture_size = GK20A_PMU_VA_SIZE * 2;
-        mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
-        gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
-        mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
-                                   low_hole,
-                                   aperture_size - low_hole,
-                                   aperture_size,
-                                   true,
-                                   false,
-                                   "system");
-        if (!mm->pmu.vm)
-                return -ENOMEM;
-        err = gk20a_alloc_inst_block(g, inst_block);
-        if (err)
-                goto clean_up_vm;
-        g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
-        return 0;
-clean_up_vm:
-        nvgpu_vm_put(mm->pmu.vm);
-        return err;
-}
-static int gk20a_init_hwpm(struct mm_gk20a *mm)
-{
-        int err;
-        struct gk20a *g = gk20a_from_mm(mm);
-        struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
-        err = gk20a_alloc_inst_block(g, inst_block);
-        if (err)
-                return err;
-        g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
-        return 0;
-}
-static int gk20a_init_cde_vm(struct mm_gk20a *mm)
-{
-        struct gk20a *g = gk20a_from_mm(mm);
-        u32 big_page_size = g->ops.mm.get_default_big_page_size();
-        mm->cde.vm = nvgpu_vm_init(g, big_page_size,
-                                   big_page_size << 10,
-                                   NV_MM_DEFAULT_KERNEL_SIZE,
-                                   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-                                   false, false, "cde");
-        if (!mm->cde.vm)
-                return -ENOMEM;
-        return 0;
-}
-static int gk20a_init_ce_vm(struct mm_gk20a *mm)
-{
-        struct gk20a *g = gk20a_from_mm(mm);
-        u32 big_page_size = g->ops.mm.get_default_big_page_size();
-        mm->ce.vm = nvgpu_vm_init(g, big_page_size,
-                                  big_page_size << 10,
-                                  NV_MM_DEFAULT_KERNEL_SIZE,
-                                  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-                                  false, false, "ce");
-        if (!mm->ce.vm)
-                return -ENOMEM;
-        return 0;
-}
 void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
                struct vm_gk20a *vm)
 {
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
        struct gk20a *g = gk20a_from_vm(vm);
        gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
-                gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
+                nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
        g->ops.mm.init_pdb(g, inst_block, vm);
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
                g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
 }
+int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+        int err;
+        gk20a_dbg_fn("");
+        err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
+        if (err) {
+                nvgpu_err(g, "%s: memory allocation failed", __func__);
+                return err;
+        }
+        gk20a_dbg_fn("done");
+        return 0;
+}
 int gk20a_mm_fb_flush(struct gk20a *g)
 {
        struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
        gk20a_idle_nosuspend(g);
 }
-int gk20a_mm_suspend(struct gk20a *g)
-{
-        gk20a_dbg_fn("");
-        nvgpu_vidmem_thread_pause_sync(&g->mm);
-        g->ops.mm.cbc_clean(g);
-        g->ops.mm.l2_flush(g, false);
-        gk20a_dbg_fn("done");
-        return 0;
-}
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 {
        return 34;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 15876b10..434fc422 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -35,11 +35,6 @@
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/kref.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/cond.h>
-#include <nvgpu/thread.h>
-struct nvgpu_pd_cache;
 #ifdef CONFIG_ARM64
 #define outer_flush_range(a, b)
@@ -138,218 +133,23 @@ struct priv_cmd_entry {
 struct gk20a;
 struct channel_gk20a;
-int gk20a_init_mm_support(struct gk20a *g);
-int gk20a_init_mm_setup_sw(struct gk20a *g);
-int gk20a_init_mm_setup_hw(struct gk20a *g);
-void gk20a_init_mm_ce_context(struct gk20a *g);
 int gk20a_mm_fb_flush(struct gk20a *g);
 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
 void gk20a_mm_cbc_clean(struct gk20a *g);
 void gk20a_mm_l2_invalidate(struct gk20a *g);
-#define FAULT_TYPE_NUM          2       /* replay and nonreplay faults */
-struct mmu_fault_info {
-        u64     inst_ptr;
-        u32     inst_aperture;
-        u64     fault_addr;
-        u32     fault_addr_aperture;
-        u32     timestamp_lo;
-        u32     timestamp_hi;
-        u32     mmu_engine_id;
-        u32     gpc_id;
-        u32     client_type;
-        u32     client_id;
-        u32     fault_type;
-        u32     access_type;
-        u32     protected_mode;
-        u32     replayable_fault;
-        u32     replay_fault_en;
-        u32     valid;
-        u32     faulted_pbdma;
-        u32     faulted_engine;
-        u32     faulted_subid;
-        u32     chid;
-        struct channel_gk20a *refch;
-        const char *client_type_desc;
-        const char *fault_type_desc;
-        const char *client_id_desc;
-};
-struct mm_gk20a {
-        struct gk20a *g;
-        /* GPU VA default sizes address spaces for channels */
-        struct {
-                u64 user_size;   /* userspace-visible GPU VA region */
-                u64 kernel_size; /* kernel-only GPU VA region */
-        } channel;
-        struct {
-                u32 aperture_size;
-                struct vm_gk20a *vm;
-                struct nvgpu_mem inst_block;
-        } bar1;
-        struct {
-                u32 aperture_size;
-                struct vm_gk20a *vm;
-                struct nvgpu_mem inst_block;
-        } bar2;
-        struct {
-                u32 aperture_size;
-                struct vm_gk20a *vm;
-                struct nvgpu_mem inst_block;
-        } pmu;
-        struct {
-                /* using pmu vm currently */
-                struct nvgpu_mem inst_block;
-        } hwpm;
-        struct {
-                struct vm_gk20a *vm;
-                struct nvgpu_mem inst_block;
-        } perfbuf;
-        struct {
-                struct vm_gk20a *vm;
-        } cde;
-        struct {
-                struct vm_gk20a *vm;
-        } ce;
-        struct nvgpu_pd_cache *pd_cache;
-        struct nvgpu_mutex l2_op_lock;
-        struct nvgpu_mutex tlb_lock;
-        struct nvgpu_mutex priv_lock;
-        struct nvgpu_mem bar2_desc;
-#ifdef CONFIG_TEGRA_19x_GPU
-        struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM];
-        unsigned int hw_fault_buf_status[FAULT_TYPE_NUM];
-        struct mmu_fault_info *fault_info[FAULT_TYPE_NUM];
-        struct nvgpu_mutex hub_isr_mutex;
-        u32    hub_intr_types;
-#endif
-        /*
-         * Separate function to cleanup the CE since it requires a channel to
-         * be closed which must happen before fifo cleanup.
-         */
-        void (*remove_ce_support)(struct mm_gk20a *mm);
-        void (*remove_support)(struct mm_gk20a *mm);
-        bool sw_ready;
-        int physical_bits;
-        bool use_full_comp_tag_line;
-        bool ltc_enabled_current;
-        bool ltc_enabled_target;
-        bool bypass_smmu;
-        bool disable_bigpage;
-        bool has_physical_mode;
-        struct nvgpu_mem sysmem_flush;
-        u32 pramin_window;
-        struct nvgpu_spinlock pramin_window_lock;
-        bool force_pramin; /* via debugfs */
-        struct {
-                size_t size;
-                u64 base;
-                size_t bootstrap_size;
-                u64 bootstrap_base;
-                struct nvgpu_allocator allocator;
-                struct nvgpu_allocator bootstrap_allocator;
-                u32 ce_ctx_id;
-                volatile bool cleared;
-                struct nvgpu_mutex first_clear_mutex;
-                struct nvgpu_list_node clear_list_head;
-                struct nvgpu_mutex clear_list_mutex;
-                struct nvgpu_cond clearing_thread_cond;
-                struct nvgpu_thread clearing_thread;
-                struct nvgpu_mutex clearing_thread_lock;
-                nvgpu_atomic_t pause_count;
-                nvgpu_atomic64_t bytes_pending;
-        } vidmem;
-};
-int gk20a_mm_init(struct mm_gk20a *mm);
-#define gk20a_from_mm(mm) ((mm)->g)
-#define gk20a_from_vm(vm) ((vm)->mm->g)
 #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
-#define DEFAULT_ALLOC_ALIGNMENT (4*1024)
+void gk20a_mm_ltc_isr(struct gk20a *g);
-static inline int bar1_aperture_size_mb_gk20a(void)
-{
-        return 16; /* 16MB is more than enough atm. */
-}
-/* The maximum GPU VA range supported */
-#define NV_GMMU_VA_RANGE          38
-/* The default userspace-visible GPU VA size */
-#define NV_MM_DEFAULT_USER_SIZE   (1ULL << 37)
-/* The default kernel-reserved GPU VA size */
-#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
-/*
- * When not using unified address spaces, the bottom 56GB of the space are used
- * for small pages, and the remaining high memory is used for large pages.
- */
-static inline u64 __nv_gmmu_va_small_page_limit(void)
-{
-        return ((u64)SZ_1G * 56);
-}
-enum nvgpu_flush_op {
-        NVGPU_FLUSH_DEFAULT,
-        NVGPU_FLUSH_FB,
-        NVGPU_FLUSH_L2_INV,
-        NVGPU_FLUSH_L2_FLUSH,
-        NVGPU_FLUSH_CBC_CLEAN,
-};
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
-                                              u64 base, u64 size);
-enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
-#if 0 /*related to addr bits above, concern below TBD on which is accurate */
+int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
-#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
-                                           bus_bar1_block_ptr_s())
-#else
-#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
-#endif
 int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
-void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
 void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
                u32 big_page_size);
-u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem);
+int gk20a_init_mm_setup_hw(struct gk20a *g);
-void gk20a_mm_dump_vm(struct vm_gk20a *vm,
-                u64 va_begin, u64 va_end, char *label);
-int gk20a_mm_suspend(struct gk20a *g);
-void gk20a_mm_ltc_isr(struct gk20a *g);
-bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
-int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
 u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
                          u64 map_offset,
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index e4dd6a59..2b954e1a 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -30,6 +30,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/firmware.h>
 #include <nvgpu/falcon.h>
+#include <nvgpu/mm.h>
 #include "gk20a.h"
 #include "gr_gk20a.h"
@@ -181,7 +182,7 @@ int pmu_bootstrap(struct nvgpu_pmu *pmu)
                pwr_falcon_itfen_ctxen_enable_f());
        gk20a_writel(g, pwr_pmu_new_instblk_r(),
                pwr_pmu_new_instblk_ptr_f(
-                        gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+                        nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
                pwr_pmu_new_instblk_valid_f(1) |
                pwr_pmu_new_instblk_target_sys_coh_f());