From c3fa78b1d9cba28547ca59154207d434931ae746 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 10 Apr 2017 14:04:15 -0700
Subject: gpu: nvgpu: Separate GMMU out of mm_gk20a.c

Begin moving (and renaming) the GMMU code into common/mm/gmmu.c. This
block of code will be responsible for handling the platform/OS
independent GMMU operations.

JIRA NVGPU-12
JIRA NVGPU-30

Change-Id: Ide761bab75e5d84be3dcb977c4842ae4b3a7c1b3
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1464083
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu          |   1 +
 drivers/gpu/nvgpu/common/linux/dma.c      |  10 +--
 drivers/gpu/nvgpu/common/linux/ioctl_as.c |   2 +
 drivers/gpu/nvgpu/common/mm/gmmu.c        | 109 ++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/common/semaphore.c      |  20 ++----
 drivers/gpu/nvgpu/gk20a/cde_gk20a.c       |   9 +--
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c        |  55 +++++++--------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c        |  68 -------------------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h        |  21 ------
 drivers/gpu/nvgpu/gm20b/acr_gm20b.c       |  13 ++--
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c        |   5 +-
 drivers/gpu/nvgpu/include/nvgpu/dma.h     |   5 ++
 drivers/gpu/nvgpu/include/nvgpu/gmmu.h    |  69 +++++++++++++++++++
 13 files changed, 237 insertions(+), 150 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/mm/gmmu.c
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/gmmu.h

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 08d939f0..59c81826 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -43,6 +43,7 @@ nvgpu-y := \
 	common/mm/buddy_allocator.o \
 	common/mm/page_allocator.o \
 	common/mm/lockless_allocator.o \
+	common/mm/gmmu.o \
 	common/pramin.o \
 	common/semaphore.o \
 	common/as.o \
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index eb2d0ac4..d3d51f18 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -20,6 +20,7 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/lock.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/gmmu.h>
 
 #include <nvgpu/linux/dma.h>
 
@@ -71,7 +72,7 @@ int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
 		/*
 		 * Force the no-kernel-mapping flag on because we don't support
 		 * the lack of it for vidmem - the user should not care when
-		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+		 * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
 		 * difference, the user should use the flag explicitly anyway.
 		 */
 		int err = nvgpu_dma_alloc_flags_vid(g,
@@ -285,7 +286,7 @@ int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
 	if (err)
 		return err;
 
-	mem->gpu_va = gk20a_gmmu_map(vm, &mem->priv.sgt, size, 0,
+	mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
 				     gk20a_mem_flag_none, false,
 				     mem->aperture);
 	if (!mem->gpu_va) {
@@ -315,7 +316,7 @@ int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
 	if (err)
 		return err;
 
-	mem->gpu_va = gk20a_gmmu_map(vm, &mem->priv.sgt, size, 0,
+	mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
 				     gk20a_mem_flag_none, false,
 				     mem->aperture);
 	if (!mem->gpu_va) {
@@ -420,8 +421,7 @@ void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
 void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
 {
 	if (mem->gpu_va)
-		gk20a_gmmu_unmap(vm, mem->gpu_va,
-				 mem->size, gk20a_mem_flag_none);
+		nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
 	mem->gpu_va = 0;
 
 	nvgpu_dma_free(vm->mm->g, mem);
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
index 6a9d3811..31d99d26 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
@@ -23,6 +23,8 @@
 
 #include <uapi/linux/nvgpu.h>
 
+#include <nvgpu/gmmu.h>
+
 #include "gk20a/gk20a.h"
 #include "ioctl_as.h"
 
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
new file mode 100644
index 00000000..a2ed3f3a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/nvgpu_mem.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+/*
+ * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
+ * VA will be allocated for you. If addr is non-zero then the buffer will be
+ * mapped at @addr.
+ */
+static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
+			    struct nvgpu_mem *mem,
+			    u64 addr,
+			    u64 size,
+			    u32 flags,
+			    int rw_flag,
+			    bool priv,
+			    enum nvgpu_aperture aperture)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	u64 vaddr;
+
+	struct sg_table *sgt = mem->priv.sgt;
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+	vaddr = g->ops.mm.gmmu_map(vm, addr,
+				   sgt,    /* sg table */
+				   0,      /* sg offset */
+				   size,
+				   gmmu_page_size_kernel,
+				   0,      /* kind */
+				   0,      /* ctag_offset */
+				   flags, rw_flag,
+				   false,  /* clear_ctags */
+				   false,  /* sparse */
+				   priv,   /* priv */
+				   NULL,   /* mapping_batch handle */
+				   aperture);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+	if (!vaddr) {
+		nvgpu_err(g, "failed to allocate va space");
+		return 0;
+	}
+
+	return vaddr;
+}
+
+u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
+		   struct nvgpu_mem *mem,
+		   u64 size,
+		   u32 flags,
+		   int rw_flag,
+		   bool priv,
+		   enum nvgpu_aperture aperture)
+{
+	return __nvgpu_gmmu_map(vm, mem, 0, size, flags, rw_flag, priv,
+			aperture);
+}
+
+/*
+ * Like nvgpu_gmmu_map() except it can work on a fixed address instead.
+ */
+u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
+			 struct nvgpu_mem *mem,
+			 u64 addr,
+			 u64 size,
+			 u32 flags,
+			 int rw_flag,
+			 bool priv,
+			 enum nvgpu_aperture aperture)
+{
+	return __nvgpu_gmmu_map(vm, mem, addr, size, flags, rw_flag, priv,
+			aperture);
+}
+
+void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+	g->ops.mm.gmmu_unmap(vm,
+			     gpu_va,
+			     mem->size,
+			     gmmu_page_size_kernel,
+			     true, /*va_allocated */
+			     gk20a_mem_flag_none,
+			     false,
+			     NULL);
+
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+}
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index fa86985b..a54ce831 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -14,6 +14,7 @@
  */
 
 #include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
@@ -197,7 +198,7 @@ int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
 	 */
 	__lock_sema_sea(p->sema_sea);
 
-	addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->sea_mem.priv.sgt,
+	addr = nvgpu_gmmu_map_fixed(vm, &p->sema_sea->sea_mem,
 				    p->sema_sea->gpu_va,
 				    p->sema_sea->map_size,
 				    0, gk20a_mem_flag_read_only, 0,
@@ -225,7 +226,7 @@ int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
 	if (err)
 		goto fail_unmap;
 
-	addr = gk20a_gmmu_map(vm, &p->rw_mem.priv.sgt, SZ_4K, 0,
+	addr = nvgpu_gmmu_map(vm, &p->rw_mem, SZ_4K, 0,
 			      gk20a_mem_flag_none, 0,
 			      p->rw_mem.aperture);
 
@@ -250,10 +251,7 @@ int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
 fail_free_submem:
 	nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
 fail_unmap:
-	gk20a_gmmu_unmap(vm,
-			 p->sema_sea->sea_mem.gpu_va,
-			 p->sema_sea->map_size,
-			 gk20a_mem_flag_none);
+	nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
 	gpu_sema_dbg(pool_to_gk20a(p),
 		     "  %d: Failed to map semaphore pool!", p->page_idx);
 fail_unlock:
@@ -269,14 +267,8 @@ void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
 {
 	__lock_sema_sea(p->sema_sea);
 
-	gk20a_gmmu_unmap(vm,
-			 p->sema_sea->sea_mem.gpu_va,
-			 p->sema_sea->sea_mem.size,
-			 gk20a_mem_flag_none);
-	gk20a_gmmu_unmap(vm,
-			 p->rw_mem.gpu_va,
-			 p->rw_mem.size,
-			 gk20a_mem_flag_none);
+	nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
+	nvgpu_gmmu_unmap(vm, &p->rw_mem, p->gpu_va);
 	nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
 
 	p->gpu_va = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index f0927692..74de7e8e 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -26,6 +26,7 @@
 #include <trace/events/gk20a.h>
 
 #include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
@@ -83,8 +84,8 @@ __must_hold(&cde_app->mutex)
 
 	/* release mapped memory */
 	gk20a_deinit_cde_img(cde_ctx);
-	gk20a_gmmu_unmap(vm, cde_ctx->backing_store_vaddr,
-			 g->gr.compbit_store.mem.size, 1);
+	nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
+			 cde_ctx->backing_store_vaddr);
 
 	/* free the channel */
 	gk20a_channel_close(ch);
@@ -1241,7 +1242,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
 	}
 
 	/* map backing store to gpu virtual space */
-	vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.mem.priv.sgt,
+	vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
 			       g->gr.compbit_store.mem.size,
 			       NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
 			       gk20a_mem_flag_read_only,
@@ -1272,7 +1273,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
 	return 0;
 
 err_init_cde_img:
-	gk20a_gmmu_unmap(ch->vm, vaddr, g->gr.compbit_store.mem.size, 1);
+	nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
 err_map_backingstore:
 err_alloc_gpfifo:
 	gk20a_vm_put(ch->vm);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 77a947de..2f52fdcf 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -21,6 +21,7 @@
 
 #include <nvgpu/dma.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/log.h>
@@ -1946,8 +1947,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 				return ret;
 			}
 
-			pm_ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
-							&pm_ctx->mem.priv.sgt,
+			pm_ctx->mem.gpu_va = nvgpu_gmmu_map(c->vm,
+							&pm_ctx->mem,
 							pm_ctx->mem.size,
 							NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
 							gk20a_mem_flag_none, true,
@@ -2013,8 +2014,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 clean_up_mem:
 	nvgpu_mem_end(g, gr_mem);
 cleanup_pm_buf:
-	gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size,
-			gk20a_mem_flag_none);
+	nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
 	nvgpu_dma_free(g, &pm_ctx->mem);
 	memset(&pm_ctx->mem, 0, sizeof(struct nvgpu_mem));
 
@@ -2198,8 +2198,8 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
 	g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
 
 	/* Map ucode surface to GMMU */
-	ucode_info->surface_desc.gpu_va = gk20a_gmmu_map(vm,
-					&ucode_info->surface_desc.priv.sgt,
+	ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
+					&ucode_info->surface_desc,
 					ucode_info->surface_desc.size,
 					0, /* flags */
 					gk20a_mem_flag_read_only,
@@ -2331,10 +2331,10 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
 
 	return 0;
 
- clean_up:
+clean_up:
 	if (ucode_info->surface_desc.gpu_va)
-		gk20a_gmmu_unmap(vm, ucode_info->surface_desc.gpu_va,
-			ucode_info->surface_desc.size, gk20a_mem_flag_none);
+		nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc,
+				 ucode_info->surface_desc.gpu_va);
 	nvgpu_dma_free(g, &ucode_info->surface_desc);
 
 	nvgpu_release_firmware(g, gpccs_fw);
@@ -2824,7 +2824,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 		mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem;
 	}
 
-	gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size,
+	gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
 				NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
 				gk20a_mem_flag_none, true, mem->aperture);
 	if (!gpu_va)
@@ -2840,7 +2840,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 		mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem;
 	}
 
-	gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size,
+	gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
 				NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
 				gk20a_mem_flag_none, false, mem->aperture);
 	if (!gpu_va)
@@ -2856,7 +2856,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 		mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem;
 	}
 
-	gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size,
+	gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
 				NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
 				gk20a_mem_flag_none, true, mem->aperture);
 	if (!gpu_va)
@@ -2866,7 +2866,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 
 	/* Golden Image */
 	mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
-	gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, 0,
+	gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
 				gk20a_mem_flag_none, true, mem->aperture);
 	if (!gpu_va)
 		goto clean_up;
@@ -2875,7 +2875,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 
 	/* Priv register Access Map */
 	mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
-	gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, 0,
+	gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
 				gk20a_mem_flag_none, true, mem->aperture);
 	if (!gpu_va)
 		goto clean_up;
@@ -2885,12 +2885,11 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 	c->ch_ctx.global_ctx_buffer_mapped = true;
 	return 0;
 
- clean_up:
+clean_up:
 	for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
 		if (g_bfr_va[i]) {
-			gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
-					 gr->global_ctx_buffer[i].mem.size,
-					 gk20a_mem_flag_none);
+			nvgpu_gmmu_unmap(ch_vm, &gr->global_ctx_buffer[i].mem,
+					 g_bfr_va[i]);
 			g_bfr_va[i] = 0;
 		}
 	}
@@ -2900,6 +2899,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
 {
 	struct vm_gk20a *ch_vm = c->vm;
+	struct gr_gk20a *gr = &c->g->gr;
 	u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
 	u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
 	u32 i;
@@ -2908,9 +2908,8 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
 
 	for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
 		if (g_bfr_va[i]) {
-			gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
-					 g_bfr_size[i],
-					 gk20a_mem_flag_none);
+			nvgpu_gmmu_unmap(ch_vm, &gr->global_ctx_buffer[i].mem,
+					 g_bfr_va[i]);
 			g_bfr_va[i] = 0;
 			g_bfr_size[i] = 0;
 		}
@@ -2946,8 +2945,8 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
 	if (err)
 		goto err_free_ctx;
 
-	gr_ctx->mem.gpu_va = gk20a_gmmu_map(vm,
-					&gr_ctx->mem.priv.sgt,
+	gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
+					&gr_ctx->mem,
 					gr_ctx->mem.size,
 					NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_FALSE,
 					gk20a_mem_flag_none, true,
@@ -3007,8 +3006,7 @@ void gr_gk20a_free_gr_ctx(struct gk20a *g,
 	if (!gr_ctx || !gr_ctx->mem.gpu_va)
 		return;
 
-	gk20a_gmmu_unmap(vm, gr_ctx->mem.gpu_va,
-		gr_ctx->mem.size, gk20a_mem_flag_none);
+	nvgpu_gmmu_unmap(vm, &gr_ctx->mem, gr_ctx->mem.gpu_va);
 	nvgpu_dma_free(g, &gr_ctx->mem);
 	nvgpu_kfree(g, gr_ctx);
 }
@@ -3055,8 +3053,8 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
 	gk20a_dbg_fn("");
 
 	if (patch_ctx->mem.gpu_va)
-		gk20a_gmmu_unmap(c->vm, patch_ctx->mem.gpu_va,
-				 patch_ctx->mem.size, gk20a_mem_flag_none);
+		nvgpu_gmmu_unmap(c->vm, &patch_ctx->mem,
+				 patch_ctx->mem.gpu_va);
 
 	nvgpu_dma_free(g, &patch_ctx->mem);
 	patch_ctx->data_count = 0;
@@ -3070,8 +3068,7 @@ static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c)
 	gk20a_dbg_fn("");
 
 	if (pm_ctx->mem.gpu_va) {
-		gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va,
-				 pm_ctx->mem.size, gk20a_mem_flag_none);
+		nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
 
 		nvgpu_dma_free(g, &pm_ctx->mem);
 	}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d95a2cde..08c99895 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2441,74 +2441,6 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 	return 0;
 }
 
-/*
- * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
- * VA will be allocated for you. If addr is non-zero then the buffer will be
- * mapped at @addr.
- */
-static u64 __gk20a_gmmu_map(struct vm_gk20a *vm,
-			    struct sg_table **sgt,
-			    u64 addr,
-			    u64 size,
-			    u32 flags,
-			    int rw_flag,
-			    bool priv,
-			    enum nvgpu_aperture aperture)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	u64 vaddr;
-
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	vaddr = g->ops.mm.gmmu_map(vm, addr,
-				*sgt, /* sg table */
-				0, /* sg offset */
-				size,
-				gmmu_page_size_kernel,
-				0, /* kind */
-				0, /* ctag_offset */
-				flags, rw_flag,
-				false, /* clear_ctags */
-				false, /* sparse */
-				priv, /* priv */
-				NULL, /* mapping_batch handle */
-				aperture);
-	nvgpu_mutex_release(&vm->update_gmmu_lock);
-	if (!vaddr) {
-		nvgpu_err(g, "failed to allocate va space");
-		return 0;
-	}
-
-	return vaddr;
-}
-
-u64 gk20a_gmmu_map(struct vm_gk20a *vm,
-		   struct sg_table **sgt,
-		   u64 size,
-		   u32 flags,
-		   int rw_flag,
-		   bool priv,
-		   enum nvgpu_aperture aperture)
-{
-	return __gk20a_gmmu_map(vm, sgt, 0, size, flags, rw_flag, priv,
-			aperture);
-}
-
-/*
- * Like gk20a_gmmu_map() except it works on a fixed address instead.
- */
-u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
-			 struct sg_table **sgt,
-			 u64 addr,
-			 u64 size,
-			 u32 flags,
-			 int rw_flag,
-			 bool priv,
-			 enum nvgpu_aperture aperture)
-{
-	return __gk20a_gmmu_map(vm, sgt, addr, size, flags, rw_flag, priv,
-			aperture);
-}
-
 #if defined(CONFIG_GK20A_VIDMEM)
 static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 9717efff..0a102cb2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -509,22 +509,6 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
 
 int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
 
-u64 gk20a_gmmu_map(struct vm_gk20a *vm,
-		struct sg_table **sgt,
-		u64 size,
-		u32 flags,
-		int rw_flag,
-		bool priv,
-		enum nvgpu_aperture aperture);
-u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
-		struct sg_table **sgt,
-		u64 addr,
-		u64 size,
-		u32 flags,
-		int rw_flag,
-		bool priv,
-		enum nvgpu_aperture aperture);
-
 static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
 {
 	/* FIXME: the sgt/sgl may get null if this is accessed e.g. in an isr
@@ -565,11 +549,6 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 			struct vm_gk20a_mapping_batch *batch,
 			enum nvgpu_aperture aperture);
 
-void gk20a_gmmu_unmap(struct vm_gk20a *vm,
-		u64 vaddr,
-		u64 size,
-		int rw_flag);
-
 void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 			u64 vaddr,
 			u64 size,
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 6679d905..7c56c4cc 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -19,6 +19,7 @@
 #include <linux/platform/tegra/mc.h>
 
 #include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
@@ -413,7 +414,7 @@ int prepare_ucode_blob(struct gk20a *g)
 
 	page = phys_to_page(wpr_addr);
 	__nvgpu_mem_create_from_pages(g, &g->pmu.wpr_buf, &page, 1);
-	g->pmu.wpr_buf.gpu_va = gk20a_gmmu_map(vm, &g->pmu.wpr_buf.priv.sgt,
+	g->pmu.wpr_buf.gpu_va = nvgpu_gmmu_map(vm, &g->pmu.wpr_buf,
 					       wprsize, 0, gk20a_mem_flag_none,
 					       false, APERTURE_SYSMEM);
 	gm20b_dbg_pmu("wpr mapped gpu va :%llx\n", g->pmu.wpr_buf.gpu_va);
@@ -445,8 +446,7 @@ int prepare_ucode_blob(struct gk20a *g)
 	gm20b_dbg_pmu("prepare ucode blob return 0\n");
 	free_acr_resources(g, plsfm);
 free_sgt:
-	gk20a_gmmu_unmap(vm, g->pmu.wpr_buf.gpu_va,
-			 g->pmu.wpr_buf.size, gk20a_mem_flag_none);
+	nvgpu_gmmu_unmap(vm, &g->pmu.wpr_buf, g->pmu.wpr_buf.gpu_va);
 	return err;
 }
 
@@ -1412,8 +1412,8 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
 			goto err_done;
 		}
 
-		acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm,
-				&acr->hsbl_ucode.priv.sgt,
+		acr->hsbl_ucode.gpu_va = nvgpu_gmmu_map(vm,
+				&acr->hsbl_ucode,
 				bl_sz,
 				0, /* flags */
 				gk20a_mem_flag_read_only, false,
@@ -1461,8 +1461,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
 	start_gm20b_pmu(g);
 	return 0;
 err_unmap_bl:
-	gk20a_gmmu_unmap(vm, acr->hsbl_ucode.gpu_va,
-			acr->hsbl_ucode.size, gk20a_mem_flag_none);
+	nvgpu_gmmu_unmap(vm, &acr->hsbl_ucode, acr->hsbl_ucode.gpu_va);
 err_free_ucode:
 	nvgpu_dma_free(g, &acr->hsbl_ucode);
 err_done:
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 855c2b14..a43252de 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -20,6 +20,7 @@
 
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/gmmu.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/bug.h>
 
@@ -858,8 +859,8 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
 	if (err)
 		return err;
 
-	mem->gpu_va = gk20a_gmmu_map(vm,
-				&mem->priv.sgt,
+	mem->gpu_va = nvgpu_gmmu_map(vm,
+				mem,
 				size,
 				NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
 				gk20a_mem_flag_none,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/dma.h b/drivers/gpu/nvgpu/include/nvgpu/dma.h
index 43cff215..1c6474e7 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/dma.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/dma.h
@@ -197,6 +197,11 @@ void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem);
  * Note this is different than mapping it into the CPU. This memory can be
  * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
  * driver.
+ *
+ * Note: currently a bug exists in the nvgpu_dma_alloc_map*() routines: you
+ * cannot use nvgpu_gmmu_map() on said buffer - it will overwrite the necessary
+ * information for the DMA unmap routines to actually unmap the buffer. You
+ * will either leak mappings or see GMMU faults.
  */
 int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
 		struct nvgpu_mem *mem);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
new file mode 100644
index 00000000..7fb0147e
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVGPU_GMMU_H__
+#define __NVGPU_GMMU_H__
+
+#include <nvgpu/types.h>
+
+/*
+ * This is the GMMU API visible to blocks outside of the GMMU. Basically this
+ * API supports all the different types of mappings that might be done in the
+ * GMMU.
+ */
+
+struct vm_gk20a;
+struct nvgpu_mem;
+
+enum nvgpu_aperture;
+
+/**
+ * nvgpu_gmmu_map - Map memory into the GMMU.
+ *
+ * Kernel space.
+ */
+u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
+		   struct nvgpu_mem *mem,
+		   u64 size,
+		   u32 flags,
+		   int rw_flag,
+		   bool priv,
+		   enum nvgpu_aperture aperture);
+
+/**
+ * nvgpu_gmmu_map_fixed - Map memory into the GMMU.
+ *
+ * Kernel space.
+ */
+u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
+			 struct nvgpu_mem *mem,
+			 u64 addr,
+			 u64 size,
+			 u32 flags,
+			 int rw_flag,
+			 bool priv,
+			 enum nvgpu_aperture aperture);
+
+/**
+ * nvgpu_gmmu_unmap - Unmap a buffer.
+ *
+ * Kernel space.
+ */
+void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
+		      struct nvgpu_mem *mem,
+		      u64 gpu_va);
+
+#endif
-- 
cgit v1.2.2