From 8f2d4a3f4a0acc81bae6725d30506e92651a42b5 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Tue, 21 Mar 2017 14:32:13 -0700
Subject: gpu: nvgpu: Move DMA API to dma.h

Make an nvgpu DMA API include file so that the intricacies of the
Linux DMA API can be hidden from the calling code.

Also document the nvgpu DMA API.

JIRA NVGPU-12

Change-Id: I7578e4c726ad46344b7921179d95861858e9a27e
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1323326
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu             |   1 +
 drivers/gpu/nvgpu/common/linux/dma.c         | 415 +++++++++++++++++++++++++++
 drivers/gpu/nvgpu/common/semaphore.c         |   1 +
 drivers/gpu/nvgpu/gk20a/cde_gk20a.c          |   1 +
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c          |   1 +
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c      |   1 +
 drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c       |   1 +
 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c   |   1 +
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c         |   1 +
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c           |   1 +
 drivers/gpu/nvgpu/gk20a/ltc_common.c         |   2 +
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c           | 393 +------------------------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h           |  50 ----
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c          |   1 +
 drivers/gpu/nvgpu/gm20b/acr_gm20b.c          |   1 +
 drivers/gpu/nvgpu/gp106/acr_gp106.c          |   1 +
 drivers/gpu/nvgpu/gp106/gr_gp106.c           |   2 +
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c         |   2 +
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c           |   1 +
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c           |   2 +
 drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c         |   2 +
 drivers/gpu/nvgpu/include/nvgpu/dma.h        | 312 ++++++++++++++++++++
 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c           |   1 +
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c |   1 +
 drivers/gpu/nvgpu/vgpu/mm_vgpu.c             |   1 +
 25 files changed, 754 insertions(+), 442 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/linux/dma.c
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/dma.h

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 34d8d19f..d994ac1d 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -32,6 +32,7 @@ nvgpu-y := \
 	common/linux/ioctl_tsg.o \
 	common/linux/log.o \
 	common/linux/nvgpu_mem.o \
+	common/linux/dma.o \
 	common/mm/nvgpu_allocator.o \
 	common/mm/bitmap_allocator.o \
 	common/mm/buddy_allocator.o \
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
new file mode 100644
index 00000000..755848ea
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+
+#include <nvgpu/dma.h>
+#include <nvgpu/lock.h>
+
+#include "gk20a/gk20a.h"
+
+#if defined(CONFIG_GK20A_VIDMEM)
+static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
+				size_t size)
+{
+	u64 addr = 0;
+
+	if (at)
+		addr = nvgpu_alloc_fixed(allocator, at, size, 0);
+	else
+		addr = nvgpu_alloc(allocator, size);
+
+	return addr;
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static void gk20a_dma_flags_to_attrs(unsigned long *attrs,
+		unsigned long flags)
+#define ATTR_ARG(x) *x
+#else
+static void gk20a_dma_flags_to_attrs(struct dma_attrs *attrs,
+		unsigned long flags)
+#define ATTR_ARG(x) x
+#endif
+{
+	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
+	if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
+		dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
+	if (flags & NVGPU_DMA_READ_ONLY)
+		dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs));
+#undef ATTR_ARG
+}
+
+int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags(g, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
+		struct nvgpu_mem *mem)
+{
+	if (g->mm.vidmem_is_vidmem) {
+		/*
+		 * Force the no-kernel-mapping flag on because we don't support
+		 * the lack of it for vidmem - the user should not care when
+		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+		 * difference, the user should use the flag explicitly anyway.
+		 */
+		int err = gk20a_gmmu_alloc_flags_vid(g,
+				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+				size, mem);
+
+		if (!err)
+			return 0;
+		/*
+		 * Fall back to sysmem (which may then also fail) in case
+		 * vidmem is exhausted.
+		 */
+	}
+
+	return gk20a_gmmu_alloc_flags_sys(g, flags, size, mem);
+}
+
+int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_sys(g, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	struct device *d = dev_from_gk20a(g);
+	int err;
+	dma_addr_t iova;
+
+	gk20a_dbg_fn("");
+
+	if (flags) {
+		DEFINE_DMA_ATTRS(dma_attrs);
+
+		gk20a_dma_flags_to_attrs(&dma_attrs, flags);
+
+		if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+			mem->pages = dma_alloc_attrs(d,
+					size, &iova, GFP_KERNEL,
+					__DMA_ATTR(dma_attrs));
+			if (!mem->pages)
+				return -ENOMEM;
+		} else {
+			mem->cpu_va = dma_alloc_attrs(d,
+					size, &iova, GFP_KERNEL,
+					__DMA_ATTR(dma_attrs));
+			if (!mem->cpu_va)
+				return -ENOMEM;
+		}
+	} else {
+		mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL);
+		if (!mem->cpu_va)
+			return -ENOMEM;
+	}
+
+	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages,
+						   iova, size);
+	else {
+		err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size);
+		memset(mem->cpu_va, 0, size);
+	}
+	if (err)
+		goto fail_free;
+
+	mem->size = size;
+	mem->aperture = APERTURE_SYSMEM;
+	mem->flags = flags;
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+
+fail_free:
+	dma_free_coherent(d, size, mem->cpu_va, iova);
+	mem->cpu_va = NULL;
+	mem->sgt = NULL;
+	return err;
+}
+
+int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_vid(g,
+			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_vid_at(g, flags, size, mem, 0);
+}
+
+int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem, dma_addr_t at)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	u64 addr;
+	int err;
+	struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
+		&g->mm.vidmem.allocator :
+		&g->mm.vidmem.bootstrap_allocator;
+	int before_pending;
+
+	gk20a_dbg_fn("");
+
+	if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+		return -ENOSYS;
+
+	/*
+	 * Our own allocator doesn't have any flags yet, and we can't
+	 * kernel-map these, so require explicit flags.
+	 */
+	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+
+	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
+	addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
+	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+	if (!addr) {
+		/*
+		 * If memory is known to be freed soon, let the user know that
+		 * it may be available after a while.
+		 */
+		if (before_pending)
+			return -EAGAIN;
+		else
+			return -ENOMEM;
+	}
+
+	if (at)
+		mem->fixed = true;
+	else
+		mem->fixed = false;
+
+	mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
+	if (!mem->sgt) {
+		err = -ENOMEM;
+		goto fail_physfree;
+	}
+
+	err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
+	if (err)
+		goto fail_kfree;
+
+	set_vidmem_page_alloc(mem->sgt->sgl, addr);
+	sg_set_page(mem->sgt->sgl, NULL, size, 0);
+
+	mem->size = size;
+	mem->aperture = APERTURE_VIDMEM;
+	mem->allocator = vidmem_alloc;
+	mem->flags = flags;
+
+	nvgpu_init_list_node(&mem->clear_list_entry);
+
+	gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
+
+	return 0;
+
+fail_kfree:
+	nvgpu_kfree(g, mem->sgt);
+fail_physfree:
+	nvgpu_free(&g->mm.vidmem.allocator, addr);
+	return err;
+#else
+	return -ENOSYS;
+#endif
+}
+
+int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags(vm, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	if (vm->mm->vidmem_is_vidmem) {
+		/*
+		 * Force the no-kernel-mapping flag on because we don't support
+		 * the lack of it for vidmem - the user should not care when
+		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+		 * difference, the user should use the flag explicitly anyway.
+		 */
+		int err = gk20a_gmmu_alloc_map_flags_vid(vm,
+				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+				size, mem);
+
+		if (!err)
+			return 0;
+		/*
+		 * Fall back to sysmem (which may then also fail) in case
+		 * vidmem is exhausted.
+		 */
+	}
+
+	return gk20a_gmmu_alloc_map_flags_sys(vm, flags, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags_sys(vm, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	int err = gk20a_gmmu_alloc_flags_sys(vm->mm->g, flags, size, mem);
+
+	if (err)
+		return err;
+
+	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
+				     gk20a_mem_flag_none, false,
+				     mem->aperture);
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	gk20a_gmmu_free(vm->mm->g, mem);
+	return err;
+}
+
+int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags_vid(vm,
+			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	int err = gk20a_gmmu_alloc_flags_vid(vm->mm->g, flags, size, mem);
+
+	if (err)
+		return err;
+
+	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
+				     gk20a_mem_flag_none, false,
+				     mem->aperture);
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	gk20a_gmmu_free(vm->mm->g, mem);
+	return err;
+}
+
+static void gk20a_gmmu_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	struct device *d = dev_from_gk20a(g);
+
+	if (mem->cpu_va || mem->pages) {
+		if (mem->flags) {
+			DEFINE_DMA_ATTRS(dma_attrs);
+
+			gk20a_dma_flags_to_attrs(&dma_attrs, mem->flags);
+
+			if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+				dma_free_attrs(d, mem->size, mem->pages,
+					sg_dma_address(mem->sgt->sgl),
+					__DMA_ATTR(dma_attrs));
+			} else {
+				dma_free_attrs(d, mem->size, mem->cpu_va,
+					sg_dma_address(mem->sgt->sgl),
+					__DMA_ATTR(dma_attrs));
+			}
+		} else {
+			dma_free_coherent(d, mem->size, mem->cpu_va,
+					sg_dma_address(mem->sgt->sgl));
+		}
+		mem->cpu_va = NULL;
+		mem->pages = NULL;
+	}
+
+	if (mem->sgt)
+		gk20a_free_sgtable(g, &mem->sgt);
+
+	mem->size = 0;
+	mem->aperture = APERTURE_INVALID;
+}
+
+static void gk20a_gmmu_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	bool was_empty;
+
+	/* Sanity check - only this supported when allocating. */
+	WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+
+	if (mem->user_mem) {
+		nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+		was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
+		nvgpu_list_add_tail(&mem->clear_list_entry,
+			      &g->mm.vidmem.clear_list_head);
+		atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
+		nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+
+		if (was_empty) {
+			cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
+			schedule_work(&g->mm.vidmem.clear_mem_worker);
+		}
+	} else {
+		nvgpu_memset(g, mem, 0, 0, mem->size);
+		nvgpu_free(mem->allocator,
+			   (u64)get_vidmem_page_alloc(mem->sgt->sgl));
+		gk20a_free_sgtable(g, &mem->sgt);
+
+		mem->size = 0;
+		mem->aperture = APERTURE_INVALID;
+	}
+#endif
+}
+
+void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	switch (mem->aperture) {
+	case APERTURE_SYSMEM:
+		return gk20a_gmmu_free_sys(g, mem);
+	case APERTURE_VIDMEM:
+		return gk20a_gmmu_free_vid(g, mem);
+	default:
+		break; /* like free() on "null" memory */
+	}
+}
+
+void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
+{
+	if (mem->gpu_va)
+		gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none);
+	mem->gpu_va = 0;
+
+	gk20a_gmmu_free(vm->mm->g, mem);
+}
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 6fb6c27e..cfe1149f 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -18,6 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/highmem.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
 
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 0db6c21a..e70ee4a6 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -24,6 +24,7 @@
 
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index ce76bfc3..9cc4b678 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -26,6 +26,7 @@
 #include <linux/debugfs.h>
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a.h"
 #include "debug_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index d0e2be79..6be616b3 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/semaphore.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a.h"
 #include "debug_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 98fa53ab..738e8c1c 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -22,6 +22,7 @@
 
 #include <nvgpu/kmem.h>
 #include <nvgpu/lock.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a.h"
 #include "css_gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 3ed28718..d8fa7505 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -24,6 +24,7 @@
 #include <uapi/linux/nvgpu.h>
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 1e9a8e15..c1f94eb3 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -26,6 +26,7 @@
 #endif
 #include <linux/sort.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 971e2320..a9b6a546 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -29,6 +29,7 @@
 #include <linux/bsearch.h>
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index 7c4db84e..7c73be77 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -21,6 +21,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
 
+#include <nvgpu/dma.h>
+
 #include "gk20a.h"
 #include "gr_gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cdd0e541..79654af3 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -30,6 +30,7 @@
 #include <uapi/linux/nvgpu.h>
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/pramin.h>
@@ -2519,152 +2520,6 @@ u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
 			aperture);
 }
 
-int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_flags(g, 0, size, mem);
-}
-
-int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
-		struct nvgpu_mem *mem)
-{
-	if (g->mm.vidmem_is_vidmem) {
-		/*
-		 * Force the no-kernel-mapping flag on because we don't support
-		 * the lack of it for vidmem - the user should not care when
-		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
-		 * difference, the user should use the flag explicitly anyway.
-		 */
-		int err = gk20a_gmmu_alloc_flags_vid(g,
-				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
-				size, mem);
-
-		if (!err)
-			return 0;
-		/*
-		 * Fall back to sysmem (which may then also fail) in case
-		 * vidmem is exhausted.
-		 */
-	}
-
-	return gk20a_gmmu_alloc_flags_sys(g, flags, size, mem);
-}
-
-int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_flags_sys(g, 0, size, mem);
-}
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
-static void gk20a_dma_flags_to_attrs(unsigned long *attrs,
-		unsigned long flags)
-#define ATTR_ARG(x) *x
-#else
-static void gk20a_dma_flags_to_attrs(struct dma_attrs *attrs,
-		unsigned long flags)
-#define ATTR_ARG(x) x
-#endif
-{
-	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
-		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
-	if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
-		dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
-	if (flags & NVGPU_DMA_READ_ONLY)
-		dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs));
-#undef ATTR_ARG
-}
-
-int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	struct device *d = dev_from_gk20a(g);
-	int err;
-	dma_addr_t iova;
-
-	gk20a_dbg_fn("");
-
-	if (flags) {
-		DEFINE_DMA_ATTRS(dma_attrs);
-
-		gk20a_dma_flags_to_attrs(&dma_attrs, flags);
-
-		if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
-			mem->pages = dma_alloc_attrs(d,
-					size, &iova, GFP_KERNEL,
-					__DMA_ATTR(dma_attrs));
-			if (!mem->pages)
-				return -ENOMEM;
-		} else {
-			mem->cpu_va = dma_alloc_attrs(d,
-					size, &iova, GFP_KERNEL,
-					__DMA_ATTR(dma_attrs));
-			if (!mem->cpu_va)
-				return -ENOMEM;
-		}
-	} else {
-		mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL);
-		if (!mem->cpu_va)
-			return -ENOMEM;
-	}
-
-	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
-		err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages,
-						   iova, size);
-	else {
-		err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size);
-		memset(mem->cpu_va, 0, size);
-	}
-	if (err)
-		goto fail_free;
-
-	mem->size = size;
-	mem->aperture = APERTURE_SYSMEM;
-	mem->flags = flags;
-
-	gk20a_dbg_fn("done");
-
-	return 0;
-
-fail_free:
-	dma_free_coherent(d, size, mem->cpu_va, iova);
-	mem->cpu_va = NULL;
-	mem->sgt = NULL;
-	return err;
-}
-
-static void gk20a_gmmu_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	struct device *d = dev_from_gk20a(g);
-
-	if (mem->cpu_va || mem->pages) {
-		if (mem->flags) {
-			DEFINE_DMA_ATTRS(dma_attrs);
-
-			gk20a_dma_flags_to_attrs(&dma_attrs, mem->flags);
-
-			if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
-				dma_free_attrs(d, mem->size, mem->pages,
-					sg_dma_address(mem->sgt->sgl),
-					__DMA_ATTR(dma_attrs));
-			} else {
-				dma_free_attrs(d, mem->size, mem->cpu_va,
-					sg_dma_address(mem->sgt->sgl),
-					__DMA_ATTR(dma_attrs));
-			}
-		} else {
-			dma_free_coherent(d, mem->size, mem->cpu_va,
-					sg_dma_address(mem->sgt->sgl));
-		}
-		mem->cpu_va = NULL;
-		mem->pages = NULL;
-	}
-
-	if (mem->sgt)
-		gk20a_free_sgtable(g, &mem->sgt);
-
-	mem->size = 0;
-	mem->aperture = APERTURE_INVALID;
-}
-
 #if defined(CONFIG_GK20A_VIDMEM)
 static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 {
@@ -2728,153 +2583,6 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 }
 #endif
 
-int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_flags_vid(g,
-			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
-}
-
-int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_flags_vid_at(g, flags, size, mem, 0);
-}
-
-#if defined(CONFIG_GK20A_VIDMEM)
-static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
-				size_t size)
-{
-	u64 addr = 0;
-
-	if (at)
-		addr = nvgpu_alloc_fixed(allocator, at, size, 0);
-	else
-		addr = nvgpu_alloc(allocator, size);
-
-	return addr;
-}
-#endif
-
-int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem, dma_addr_t at)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	u64 addr;
-	int err;
-	struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
-		&g->mm.vidmem.allocator :
-		&g->mm.vidmem.bootstrap_allocator;
-	int before_pending;
-
-	gk20a_dbg_fn("");
-
-	if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
-		return -ENOSYS;
-
-	/*
-	 * Our own allocator doesn't have any flags yet, and we can't
-	 * kernel-map these, so require explicit flags.
-	 */
-	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
-
-	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
-	addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
-	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
-	if (!addr) {
-		/*
-		 * If memory is known to be freed soon, let the user know that
-		 * it may be available after a while.
-		 */
-		if (before_pending)
-			return -EAGAIN;
-		else
-			return -ENOMEM;
-	}
-
-	if (at)
-		mem->fixed = true;
-	else
-		mem->fixed = false;
-
-	mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
-	if (!mem->sgt) {
-		err = -ENOMEM;
-		goto fail_physfree;
-	}
-
-	err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
-	if (err)
-		goto fail_kfree;
-
-	set_vidmem_page_alloc(mem->sgt->sgl, addr);
-	sg_set_page(mem->sgt->sgl, NULL, size, 0);
-
-	mem->size = size;
-	mem->aperture = APERTURE_VIDMEM;
-	mem->allocator = vidmem_alloc;
-	mem->flags = flags;
-
-	nvgpu_init_list_node(&mem->clear_list_entry);
-
-	gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
-
-	return 0;
-
-fail_kfree:
-	nvgpu_kfree(g, mem->sgt);
-fail_physfree:
-	nvgpu_free(&g->mm.vidmem.allocator, addr);
-	return err;
-#else
-	return -ENOSYS;
-#endif
-}
-
-static void gk20a_gmmu_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	bool was_empty;
-
-	/* Sanity check - only this supported when allocating. */
-	WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING);
-
-	if (mem->user_mem) {
-		nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-		was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
-		nvgpu_list_add_tail(&mem->clear_list_entry,
-			      &g->mm.vidmem.clear_list_head);
-		atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
-		nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
-
-		if (was_empty) {
-			cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
-			schedule_work(&g->mm.vidmem.clear_mem_worker);
-		}
-	} else {
-		nvgpu_memset(g, mem, 0, 0, mem->size);
-		nvgpu_free(mem->allocator,
-			   (u64)get_vidmem_page_alloc(mem->sgt->sgl));
-		gk20a_free_sgtable(g, &mem->sgt);
-
-		mem->size = 0;
-		mem->aperture = APERTURE_INVALID;
-	}
-#endif
-}
-
-void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	switch (mem->aperture) {
-	case APERTURE_SYSMEM:
-		return gk20a_gmmu_free_sys(g, mem);
-	case APERTURE_VIDMEM:
-		return gk20a_gmmu_free_vid(g, mem);
-	default:
-		break; /* like free() on "null" memory */
-	}
-}
-
 /*
  * If mem is in VIDMEM, return base address in vidmem
  * else return IOVA address for SYSMEM
@@ -2938,105 +2646,6 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
 }
 #endif
 
-int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_map_flags(vm, 0, size, mem);
-}
-
-int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	if (vm->mm->vidmem_is_vidmem) {
-		/*
-		 * Force the no-kernel-mapping flag on because we don't support
-		 * the lack of it for vidmem - the user should not care when
-		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
-		 * difference, the user should use the flag explicitly anyway.
-		 */
-		int err = gk20a_gmmu_alloc_map_flags_vid(vm,
-				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
-				size, mem);
-
-		if (!err)
-			return 0;
-		/*
-		 * Fall back to sysmem (which may then also fail) in case
-		 * vidmem is exhausted.
-		 */
-	}
-
-	return gk20a_gmmu_alloc_map_flags_sys(vm, flags, size, mem);
-}
-
-int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_map_flags_sys(vm, 0, size, mem);
-}
-
-int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	int err = gk20a_gmmu_alloc_flags_sys(vm->mm->g, flags, size, mem);
-
-	if (err)
-		return err;
-
-	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
-				     gk20a_mem_flag_none, false,
-				     mem->aperture);
-	if (!mem->gpu_va) {
-		err = -ENOMEM;
-		goto fail_free;
-	}
-
-	return 0;
-
-fail_free:
-	gk20a_gmmu_free(vm->mm->g, mem);
-	return err;
-}
-
-int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_map_flags_vid(vm,
-			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
-}
-
-int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	int err = gk20a_gmmu_alloc_flags_vid(vm->mm->g, flags, size, mem);
-
-	if (err)
-		return err;
-
-	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
-				     gk20a_mem_flag_none, false,
-				     mem->aperture);
-	if (!mem->gpu_va) {
-		err = -ENOMEM;
-		goto fail_free;
-	}
-
-	return 0;
-
-fail_free:
-	gk20a_gmmu_free(vm->mm->g, mem);
-	return err;
-}
-
-void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
-{
-	if (mem->gpu_va)
-		gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none);
-	mem->gpu_va = 0;
-
-	gk20a_gmmu_free(vm->mm->g, mem);
-}
-
 dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
 {
 	struct mapped_buffer_node *buffer;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index db72ca79..53366caf 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -524,56 +524,6 @@ u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
 		bool priv,
 		enum nvgpu_aperture aperture);
 
-/* Flags for the below gk20a_gmmu_{alloc,alloc_map}_flags* */
-
-/*
- * Don't create a virtual kernel mapping for the buffer but only allocate it;
- * this may save some resources. The buffer can be mapped later explicitly.
- */
-#define NVGPU_DMA_NO_KERNEL_MAPPING	(1 << 0)
-/*
- * Don't allow building the buffer from individual pages but require a
- * physically contiguous block.
- */
-#define NVGPU_DMA_FORCE_CONTIGUOUS	(1 << 1)
-/*
- * Make the mapping read-only.
- */
-#define NVGPU_DMA_READ_ONLY		(1 << 2)
-
-int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-
-void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
-		struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem, dma_addr_t at);
-
-void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem);
-
 static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
 {
 	/* FIXME: the sgt/sgl may get null if this is accessed e.g. in an isr
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 591b7163..7a6bfe22 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a.h"
 #include "gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 013ce43a..3cfcbb19 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -20,6 +20,7 @@
 
 #include <linux/platform/tegra/mc.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 41c4981d..9acc8eda 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -20,6 +20,7 @@
 
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/acr/nvgpu_acr.h>
 
 #include "gk20a/gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index dae23374..78859f88 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -13,6 +13,8 @@
  * more details.
  */
 
+#include <nvgpu/dma.h>
+
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
 #include "gm20b/gr_gm20b.h"
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index a7e77232..b305b895 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -15,6 +15,8 @@
 
 #include <linux/delay.h>
 
+#include <nvgpu/dma.h>
+
 #include "fifo_gp10b.h"
 
 #include "gk20a/gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 8e1517f6..b9367120 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -22,6 +22,7 @@
 
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index a0dc8c55..2f894435 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -16,6 +16,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
 
+#include <nvgpu/dma.h>
+
 #include "gk20a/gk20a.h"
 #include "gm20b/mm_gm20b.h"
 #include "mm_gp10b.h"
diff --git a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
index e73bcd8f..bf52b5c9 100644
--- a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
@@ -16,6 +16,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
 
+#include <nvgpu/dma.h>
+
 #include "gk20a/gk20a.h"
 
 #include "rpfb_gp10b.h"
diff --git a/drivers/gpu/nvgpu/include/nvgpu/dma.h b/drivers/gpu/nvgpu/include/nvgpu/dma.h
new file mode 100644
index 00000000..d4fad584
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/dma.h
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVGPU_DMA_H__
+#define __NVGPU_DMA_H__
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+struct vm_gk20a;
+struct nvgpu_mem;
+
+/*
+ * Flags for the below gk20a_gmmu_{alloc,alloc_map}_flags*
+ */
+
+/*
+ * Don't create a virtual kernel mapping for the buffer but only allocate it;
+ * this may save some resources. The buffer can be mapped later explicitly.
+ */
+#define NVGPU_DMA_NO_KERNEL_MAPPING	(1 << 0)
+
+/*
+ * Don't allow building the buffer from individual pages but require a
+ * physically contiguous block.
+ */
+#define NVGPU_DMA_FORCE_CONTIGUOUS	(1 << 1)
+
+/*
+ * Make the mapping read-only.
+ */
+#define NVGPU_DMA_READ_ONLY		(1 << 2)
+
+/**
+ * gk20a_gmmu_alloc - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * memory can be either placed in VIDMEM or SYSMEM, which ever is more
+ * convenient for the driver.
+ */
+int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_flags - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * memory can be either placed in VIDMEM or SYSMEM, which ever is more
+ * convenient for the driver.
+ *
+ * The following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
+		struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_sys - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in SYSMEM.
+ */
+int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_flags_sys - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in SYSMEM.
+ *
+ * The following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_vid - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ */
+int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_flags_vid - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ *
+ * Only the following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *
+ */
+int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_flags_vid_at - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ * @at    - A specific location to attempt to allocate memory from or 0 if the
+ *          caller does not care what the address is.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ *
+ * Only the following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ */
+int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem, dma_addr_t at);
+
+/**
+ * gk20a_gmmu_free - Free a DMA allocation
+ *
+ * @g   - The GPU.
+ * @mem - An allocation to free.
+ *
+ * Free memory created with any of:
+ *
+ *   gk20a_gmmu_alloc()
+ *   gk20a_gmmu_alloc_flags()
+ *   gk20a_gmmu_alloc_sys()
+ *   gk20a_gmmu_alloc_flags_sys()
+ *   gk20a_gmmu_alloc_vid()
+ *   gk20a_gmmu_alloc_flags_vid()
+ *   gk20a_gmmu_alloc_flags_vid_at()
+ */
+void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * Note this is different than mapping it into the CPU. This memory can be
+ * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
+ * driver.
+ */
+int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_flags - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * Note this is different than mapping it into the CPU. This memory can be
+ * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
+ * driver.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_sys - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in SYSMEM.
+ */
+int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_flags_sys - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in SYSMEM.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_vid - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in VIDMEM.
+ */
+int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_flags_vid - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in VIDMEM.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_unmap_free - Free a DMA allocation
+ *
+ * @g   - The GPU.
+ * @mem - An allocation to free.
+ *
+ * Free memory created with any of:
+ *
+ *   gk20a_gmmu_alloc_map()
+ *   gk20a_gmmu_alloc_map_flags()
+ *   gk20a_gmmu_alloc_map_sys()
+ *   gk20a_gmmu_alloc_map_flags_sys()
+ *   gk20a_gmmu_alloc_map_vid()
+ *   gk20a_gmmu_alloc_map_flags_vid()
+ */
+void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem);
+
+#endif
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index cfe9322e..59fb0c4a 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -17,6 +17,7 @@
 #include <trace/events/gk20a.h>
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "vgpu/vgpu.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 8cb5b029..527e12e4 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -12,6 +12,7 @@
  */
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "vgpu/vgpu.h"
 #include "vgpu/gm20b/vgpu_gr_gm20b.h"
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index ea81cefe..b12f8a53 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -16,6 +16,7 @@
 #include <linux/dma-mapping.h>
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "vgpu/vgpu.h"
 #include "gk20a/mm_gk20a.h"
-- 
cgit v1.2.2