13 files changed, 237 insertions, 150 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 08d939f0..59c81826 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -43,6 +43,7 @@ nvgpu-y := \
        common/mm/buddy_allocator.o \
        common/mm/page_allocator.o \
        common/mm/lockless_allocator.o \
+        common/mm/gmmu.o \
        common/pramin.o \
        common/semaphore.o \
        common/as.o \
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index eb2d0ac4..d3d51f18 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -20,6 +20,7 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/lock.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/linux/dma.h>
@@ -71,7 +72,7 @@ int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
                /*
                 * Force the no-kernel-mapping flag on because we don't support
                 * the lack of it for vidmem - the user should not care when
-                 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+                 * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
                 * difference, the user should use the flag explicitly anyway.
                 */
                int err = nvgpu_dma_alloc_flags_vid(g,
@@ -285,7 +286,7 @@ int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
        if (err)
                return err;
-        mem->gpu_va = gk20a_gmmu_map(vm, &mem->priv.sgt, size, 0,
+        mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
                                     gk20a_mem_flag_none, false,
                                     mem->aperture);
        if (!mem->gpu_va) {
@@ -315,7 +316,7 @@ int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
        if (err)
                return err;
-        mem->gpu_va = gk20a_gmmu_map(vm, &mem->priv.sgt, size, 0,
+        mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
                                     gk20a_mem_flag_none, false,
                                     mem->aperture);
        if (!mem->gpu_va) {
@@ -420,8 +421,7 @@ void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
 void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
 {
        if (mem->gpu_va)
-                gk20a_gmmu_unmap(vm, mem->gpu_va,
+                nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
-                                 mem->size, gk20a_mem_flag_none);
        mem->gpu_va = 0;
        nvgpu_dma_free(vm->mm->g, mem);
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
index 6a9d3811..31d99d26 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
@@ -23,6 +23,8 @@
 #include <uapi/linux/nvgpu.h>
+#include <nvgpu/gmmu.h>
 #include "gk20a/gk20a.h"
 #include "ioctl_as.h"
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
new file mode 100644
index 00000000..a2ed3f3a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/log.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/nvgpu_mem.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+/*
+ * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
+ * VA will be allocated for you. If addr is non-zero then the buffer will be
+ * mapped at @addr.
+ */
+static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
+                            struct nvgpu_mem *mem,
+                            u64 addr,
+                            u64 size,
+                            u32 flags,
+                            int rw_flag,
+                            bool priv,
+                            enum nvgpu_aperture aperture)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        u64 vaddr;
+        struct sg_table *sgt = mem->priv.sgt;
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        vaddr = g->ops.mm.gmmu_map(vm, addr,
+                                   sgt,    /* sg table */
+                                   0,      /* sg offset */
+                                   size,
+                                   gmmu_page_size_kernel,
+                                   0,      /* kind */
+                                   0,      /* ctag_offset */
+                                   flags, rw_flag,
+                                   false,  /* clear_ctags */
+                                   false,  /* sparse */
+                                   priv,   /* priv */
+                                   NULL,   /* mapping_batch handle */
+                                   aperture);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        if (!vaddr) {
+                nvgpu_err(g, "failed to allocate va space");
+                return 0;
+        }
+        return vaddr;
+}
+u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
+                   struct nvgpu_mem *mem,
+                   u64 size,
+                   u32 flags,
+                   int rw_flag,
+                   bool priv,
+                   enum nvgpu_aperture aperture)
+{
+        return __nvgpu_gmmu_map(vm, mem, 0, size, flags, rw_flag, priv,
+                        aperture);
+}
+/*
+ * Like nvgpu_gmmu_map() except it can work on a fixed address instead.
+ */
+u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
+                         struct nvgpu_mem *mem,
+                         u64 addr,
+                         u64 size,
+                         u32 flags,
+                         int rw_flag,
+                         bool priv,
+                         enum nvgpu_aperture aperture)
+{
+        return __nvgpu_gmmu_map(vm, mem, addr, size, flags, rw_flag, priv,
+                        aperture);
+}
+void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        g->ops.mm.gmmu_unmap(vm,
+                             gpu_va,
+                             mem->size,
+                             gmmu_page_size_kernel,
+                             true, /*va_allocated */
+                             gk20a_mem_flag_none,
+                             false,
+                             NULL);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+}
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index fa86985b..a54ce831 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -14,6 +14,7 @@
 */
 #include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
@@ -197,7 +198,7 @@ int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
         */
        __lock_sema_sea(p->sema_sea);
-        addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->sea_mem.priv.sgt,
+        addr = nvgpu_gmmu_map_fixed(vm, &p->sema_sea->sea_mem,
                                    p->sema_sea->gpu_va,
                                    p->sema_sea->map_size,
                                    0, gk20a_mem_flag_read_only, 0,
@@ -225,7 +226,7 @@ int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
        if (err)
                goto fail_unmap;
-        addr = gk20a_gmmu_map(vm, &p->rw_mem.priv.sgt, SZ_4K, 0,
+        addr = nvgpu_gmmu_map(vm, &p->rw_mem, SZ_4K, 0,
                              gk20a_mem_flag_none, 0,
                              p->rw_mem.aperture);
@@ -250,10 +251,7 @@ int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
 fail_free_submem:
        nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
 fail_unmap:
-        gk20a_gmmu_unmap(vm,
+        nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
-                         p->sema_sea->sea_mem.gpu_va,
-                         p->sema_sea->map_size,
-                         gk20a_mem_flag_none);
        gpu_sema_dbg(pool_to_gk20a(p),
                     "  %d: Failed to map semaphore pool!", p->page_idx);
 fail_unlock:
@@ -269,14 +267,8 @@ void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
 {
        __lock_sema_sea(p->sema_sea);
-        gk20a_gmmu_unmap(vm,
+        nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
-                         p->sema_sea->sea_mem.gpu_va,
+        nvgpu_gmmu_unmap(vm, &p->rw_mem, p->gpu_va);
-                         p->sema_sea->sea_mem.size,
-                         gk20a_mem_flag_none);
-        gk20a_gmmu_unmap(vm,
-                         p->rw_mem.gpu_va,
-                         p->rw_mem.size,
-                         gk20a_mem_flag_none);
        nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
        p->gpu_va = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index f0927692..74de7e8e 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -26,6 +26,7 @@
 #include <trace/events/gk20a.h>
 #include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
@@ -83,8 +84,8 @@ __must_hold(&cde_app->mutex)
        /* release mapped memory */
        gk20a_deinit_cde_img(cde_ctx);
-        gk20a_gmmu_unmap(vm, cde_ctx->backing_store_vaddr,
+        nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
-                         g->gr.compbit_store.mem.size, 1);
+                         cde_ctx->backing_store_vaddr);
        /* free the channel */
        gk20a_channel_close(ch);
@@ -1241,7 +1242,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
        }
        /* map backing store to gpu virtual space */
-        vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.mem.priv.sgt,
+        vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
                               g->gr.compbit_store.mem.size,
                               NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
                               gk20a_mem_flag_read_only,
@@ -1272,7 +1273,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
        return 0;
 err_init_cde_img:
-        gk20a_gmmu_unmap(ch->vm, vaddr, g->gr.compbit_store.mem.size, 1);
+        nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
 err_map_backingstore:
 err_alloc_gpfifo:
        gk20a_vm_put(ch->vm);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 77a947de..2f52fdcf 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -21,6 +21,7 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/log.h>
@@ -1946,8 +1947,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
                                return ret;
                        }
-                        pm_ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
+                        pm_ctx->mem.gpu_va = nvgpu_gmmu_map(c->vm,
-                                                        &pm_ctx->mem.priv.sgt,
+                                                        &pm_ctx->mem,
                                                        pm_ctx->mem.size,
                                                        NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
                                                        gk20a_mem_flag_none, true,
@@ -2013,8 +2014,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 clean_up_mem:
        nvgpu_mem_end(g, gr_mem);
 cleanup_pm_buf:
-        gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size,
+        nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
-                        gk20a_mem_flag_none);
        nvgpu_dma_free(g, &pm_ctx->mem);
        memset(&pm_ctx->mem, 0, sizeof(struct nvgpu_mem));
@@ -2198,8 +2198,8 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
        g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
        /* Map ucode surface to GMMU */
-        ucode_info->surface_desc.gpu_va = gk20a_gmmu_map(vm,
+        ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
-                                        &ucode_info->surface_desc.priv.sgt,
+                                        &ucode_info->surface_desc,
                                        ucode_info->surface_desc.size,
                                        0, /* flags */
                                        gk20a_mem_flag_read_only,
@@ -2331,10 +2331,10 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
        return 0;
- clean_up:
+clean_up:
        if (ucode_info->surface_desc.gpu_va)
-                gk20a_gmmu_unmap(vm, ucode_info->surface_desc.gpu_va,
+                nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc,
-                        ucode_info->surface_desc.size, gk20a_mem_flag_none);
+                                 ucode_info->surface_desc.gpu_va);
        nvgpu_dma_free(g, &ucode_info->surface_desc);
        nvgpu_release_firmware(g, gpccs_fw);
@@ -2824,7 +2824,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
                mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem;
        }
-        gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size,
+        gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
                                NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
                                gk20a_mem_flag_none, true, mem->aperture);
        if (!gpu_va)
@@ -2840,7 +2840,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
                mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem;
        }
-        gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size,
+        gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
                                NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
                                gk20a_mem_flag_none, false, mem->aperture);
        if (!gpu_va)
@@ -2856,7 +2856,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
                mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem;
        }
-        gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size,
+        gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
                                NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
                                gk20a_mem_flag_none, true, mem->aperture);
        if (!gpu_va)
@@ -2866,7 +2866,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
        /* Golden Image */
        mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
-        gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, 0,
+        gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
                                gk20a_mem_flag_none, true, mem->aperture);
        if (!gpu_va)
                goto clean_up;
@@ -2875,7 +2875,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
        /* Priv register Access Map */
        mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
-        gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, 0,
+        gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
                                gk20a_mem_flag_none, true, mem->aperture);
        if (!gpu_va)
                goto clean_up;
@@ -2885,12 +2885,11 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
        c->ch_ctx.global_ctx_buffer_mapped = true;
        return 0;
- clean_up:
+clean_up:
        for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
                if (g_bfr_va[i]) {
-                        gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
+                        nvgpu_gmmu_unmap(ch_vm, &gr->global_ctx_buffer[i].mem,
-                                         gr->global_ctx_buffer[i].mem.size,
+                                         g_bfr_va[i]);
-                                         gk20a_mem_flag_none);
                        g_bfr_va[i] = 0;
                }
        }
@@ -2900,6 +2899,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
 {
        struct vm_gk20a *ch_vm = c->vm;
+        struct gr_gk20a *gr = &c->g->gr;
        u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
        u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
        u32 i;
@@ -2908,9 +2908,8 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
        for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
                if (g_bfr_va[i]) {
-                        gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
+                        nvgpu_gmmu_unmap(ch_vm, &gr->global_ctx_buffer[i].mem,
-                                         g_bfr_size[i],
+                                         g_bfr_va[i]);
-                                         gk20a_mem_flag_none);
                        g_bfr_va[i] = 0;
                        g_bfr_size[i] = 0;
                }
@@ -2946,8 +2945,8 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
        if (err)
                goto err_free_ctx;
-        gr_ctx->mem.gpu_va = gk20a_gmmu_map(vm,
+        gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
-                                        &gr_ctx->mem.priv.sgt,
+                                        &gr_ctx->mem,
                                        gr_ctx->mem.size,
                                        NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_FALSE,
                                        gk20a_mem_flag_none, true,
@@ -3007,8 +3006,7 @@ void gr_gk20a_free_gr_ctx(struct gk20a *g,
        if (!gr_ctx || !gr_ctx->mem.gpu_va)
                return;
-        gk20a_gmmu_unmap(vm, gr_ctx->mem.gpu_va,
+        nvgpu_gmmu_unmap(vm, &gr_ctx->mem, gr_ctx->mem.gpu_va);
-                gr_ctx->mem.size, gk20a_mem_flag_none);
        nvgpu_dma_free(g, &gr_ctx->mem);
        nvgpu_kfree(g, gr_ctx);
 }
@@ -3055,8 +3053,8 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
        gk20a_dbg_fn("");
        if (patch_ctx->mem.gpu_va)
-                gk20a_gmmu_unmap(c->vm, patch_ctx->mem.gpu_va,
+                nvgpu_gmmu_unmap(c->vm, &patch_ctx->mem,
-                                 patch_ctx->mem.size, gk20a_mem_flag_none);
+                                 patch_ctx->mem.gpu_va);
        nvgpu_dma_free(g, &patch_ctx->mem);
        patch_ctx->data_count = 0;
@@ -3070,8 +3068,7 @@ static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c)
        gk20a_dbg_fn("");
        if (pm_ctx->mem.gpu_va) {
-                gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va,
+                nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
-                                 pm_ctx->mem.size, gk20a_mem_flag_none);
                nvgpu_dma_free(g, &pm_ctx->mem);
        }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d95a2cde..08c99895 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2441,74 +2441,6 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
        return 0;
 }
-/*
- * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
- * VA will be allocated for you. If addr is non-zero then the buffer will be
- * mapped at @addr.
- */
-static u64 __gk20a_gmmu_map(struct vm_gk20a *vm,
-                            struct sg_table **sgt,
-                            u64 addr,
-                            u64 size,
-                            u32 flags,
-                            int rw_flag,
-                            bool priv,
-                            enum nvgpu_aperture aperture)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        u64 vaddr;
-        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-        vaddr = g->ops.mm.gmmu_map(vm, addr,
-                                *sgt, /* sg table */
-                                0, /* sg offset */
-                                size,
-                                gmmu_page_size_kernel,
-                                0, /* kind */
-                                0, /* ctag_offset */
-                                flags, rw_flag,
-                                false, /* clear_ctags */
-                                false, /* sparse */
-                                priv, /* priv */
-                                NULL, /* mapping_batch handle */
-                                aperture);
-        nvgpu_mutex_release(&vm->update_gmmu_lock);
-        if (!vaddr) {
-                nvgpu_err(g, "failed to allocate va space");
-                return 0;
-        }
-        return vaddr;
-}
-u64 gk20a_gmmu_map(struct vm_gk20a *vm,
-                   struct sg_table **sgt,
-                   u64 size,
-                   u32 flags,
-                   int rw_flag,
-                   bool priv,
-                   enum nvgpu_aperture aperture)
-{
-        return __gk20a_gmmu_map(vm, sgt, 0, size, flags, rw_flag, priv,
-                        aperture);
-}
-/*
- * Like gk20a_gmmu_map() except it works on a fixed address instead.
- */
-u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
-                         struct sg_table **sgt,
-                         u64 addr,
-                         u64 size,
-                         u32 flags,
-                         int rw_flag,
-                         bool priv,
-                         enum nvgpu_aperture aperture)
-{
-        return __gk20a_gmmu_map(vm, sgt, addr, size, flags, rw_flag, priv,
-                        aperture);
-}
 #if defined(CONFIG_GK20A_VIDMEM)
 static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 9717efff..0a102cb2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -509,22 +509,6 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
 int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
-u64 gk20a_gmmu_map(struct vm_gk20a *vm,
-                struct sg_table **sgt,
-                u64 size,
-                u32 flags,
-                int rw_flag,
-                bool priv,
-                enum nvgpu_aperture aperture);
-u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
-                struct sg_table **sgt,
-                u64 addr,
-                u64 size,
-                u32 flags,
-                int rw_flag,
-                bool priv,
-                enum nvgpu_aperture aperture);
 static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
 {
        /* FIXME: the sgt/sgl may get null if this is accessed e.g. in an isr
@@ -565,11 +549,6 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
                        struct vm_gk20a_mapping_batch *batch,
                        enum nvgpu_aperture aperture);
-void gk20a_gmmu_unmap(struct vm_gk20a *vm,
-                u64 vaddr,
-                u64 size,
-                int rw_flag);
 void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
                        u64 vaddr,
                        u64 size,
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 6679d905..7c56c4cc 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -19,6 +19,7 @@
 #include <linux/platform/tegra/mc.h>
 #include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
@@ -413,7 +414,7 @@ int prepare_ucode_blob(struct gk20a *g)
        page = phys_to_page(wpr_addr);
        __nvgpu_mem_create_from_pages(g, &g->pmu.wpr_buf, &page, 1);
-        g->pmu.wpr_buf.gpu_va = gk20a_gmmu_map(vm, &g->pmu.wpr_buf.priv.sgt,
+        g->pmu.wpr_buf.gpu_va = nvgpu_gmmu_map(vm, &g->pmu.wpr_buf,
                                               wprsize, 0, gk20a_mem_flag_none,
                                               false, APERTURE_SYSMEM);
        gm20b_dbg_pmu("wpr mapped gpu va :%llx\n", g->pmu.wpr_buf.gpu_va);
@@ -445,8 +446,7 @@ int prepare_ucode_blob(struct gk20a *g)
        gm20b_dbg_pmu("prepare ucode blob return 0\n");
        free_acr_resources(g, plsfm);
 free_sgt:
-        gk20a_gmmu_unmap(vm, g->pmu.wpr_buf.gpu_va,
+        nvgpu_gmmu_unmap(vm, &g->pmu.wpr_buf, g->pmu.wpr_buf.gpu_va);
-                         g->pmu.wpr_buf.size, gk20a_mem_flag_none);
        return err;
 }
@@ -1412,8 +1412,8 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
                        goto err_done;
                }
-                acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm,
+                acr->hsbl_ucode.gpu_va = nvgpu_gmmu_map(vm,
-                                &acr->hsbl_ucode.priv.sgt,
+                                &acr->hsbl_ucode,
                                bl_sz,
                                0, /* flags */
                                gk20a_mem_flag_read_only, false,
@@ -1461,8 +1461,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
        start_gm20b_pmu(g);
        return 0;
 err_unmap_bl:
-        gk20a_gmmu_unmap(vm, acr->hsbl_ucode.gpu_va,
+        nvgpu_gmmu_unmap(vm, &acr->hsbl_ucode, acr->hsbl_ucode.gpu_va);
-                        acr->hsbl_ucode.size, gk20a_mem_flag_none);
 err_free_ucode:
        nvgpu_dma_free(g, &acr->hsbl_ucode);
 err_done:
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 855c2b14..a43252de 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -20,6 +20,7 @@
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/bug.h>
@@ -858,8 +859,8 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
        if (err)
                return err;
-        mem->gpu_va = gk20a_gmmu_map(vm,
+        mem->gpu_va = nvgpu_gmmu_map(vm,
-                                &mem->priv.sgt,
+                                mem,
                                size,
                                NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
                                gk20a_mem_flag_none,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/dma.h b/drivers/gpu/nvgpu/include/nvgpu/dma.h
index 43cff215..1c6474e7 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/dma.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/dma.h
@@ -197,6 +197,11 @@ void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem);
 * Note this is different than mapping it into the CPU. This memory can be
 * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
 * driver.
+ *
+ * Note: currently a bug exists in the nvgpu_dma_alloc_map*() routines: you
+ * cannot use nvgpu_gmmu_map() on said buffer - it will overwrite the necessary
+ * information for the DMA unmap routines to actually unmap the buffer. You
+ * will either leak mappings or see GMMU faults.
 */
 int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
                struct nvgpu_mem *mem);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
new file mode 100644
index 00000000..7fb0147e
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVGPU_GMMU_H__
+#define __NVGPU_GMMU_H__
+#include <nvgpu/types.h>
+/*
+ * This is the GMMU API visible to blocks outside of the GMMU. Basically this
+ * API supports all the different types of mappings that might be done in the
+ * GMMU.
+ */
+struct vm_gk20a;
+struct nvgpu_mem;
+enum nvgpu_aperture;
+/**
+ * nvgpu_gmmu_map - Map memory into the GMMU.
+ *
+ * Kernel space.
+ */
+u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
+                   struct nvgpu_mem *mem,
+                   u64 size,
+                   u32 flags,
+                   int rw_flag,
+                   bool priv,
+                   enum nvgpu_aperture aperture);
+/**
+ * nvgpu_gmmu_map_fixed - Map memory into the GMMU.
+ *
+ * Kernel space.
+ */
+u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
+                         struct nvgpu_mem *mem,
+                         u64 addr,
+                         u64 size,
+                         u32 flags,
+                         int rw_flag,
+                         bool priv,
+                         enum nvgpu_aperture aperture);
+/**
+ * nvgpu_gmmu_unmap - Unmap a buffer.
+ *
+ * Kernel space.
+ */
+void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
+                      struct nvgpu_mem *mem,
+                      u64 gpu_va);
+#endif