25 files changed, 754 insertions, 442 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 34d8d19f..d994ac1d 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -32,6 +32,7 @@ nvgpu-y := \
        common/linux/ioctl_tsg.o \
        common/linux/log.o \
        common/linux/nvgpu_mem.o \
+        common/linux/dma.o \
        common/mm/nvgpu_allocator.o \
        common/mm/bitmap_allocator.o \
        common/mm/buddy_allocator.o \
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
new file mode 100644
index 00000000..755848ea
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/lock.h>
+#include "gk20a/gk20a.h"
+#if defined(CONFIG_GK20A_VIDMEM)
+static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
+                                size_t size)
+{
+        u64 addr = 0;
+        if (at)
+                addr = nvgpu_alloc_fixed(allocator, at, size, 0);
+        else
+                addr = nvgpu_alloc(allocator, size);
+        return addr;
+}
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static void gk20a_dma_flags_to_attrs(unsigned long *attrs,
+                unsigned long flags)
+#define ATTR_ARG(x) *x
+#else
+static void gk20a_dma_flags_to_attrs(struct dma_attrs *attrs,
+                unsigned long flags)
+#define ATTR_ARG(x) x
+#endif
+{
+        if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+                dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
+        if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
+                dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
+        if (flags & NVGPU_DMA_READ_ONLY)
+                dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs));
+#undef ATTR_ARG
+}
+int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+        return gk20a_gmmu_alloc_flags(g, 0, size, mem);
+}
+int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
+                struct nvgpu_mem *mem)
+{
+        if (g->mm.vidmem_is_vidmem) {
+                /*
+                 * Force the no-kernel-mapping flag on because we don't support
+                 * the lack of it for vidmem - the user should not care when
+                 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+                 * difference, the user should use the flag explicitly anyway.
+                 */
+                int err = gk20a_gmmu_alloc_flags_vid(g,
+                                flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+                                size, mem);
+                if (!err)
+                        return 0;
+                /*
+                 * Fall back to sysmem (which may then also fail) in case
+                 * vidmem is exhausted.
+                 */
+        }
+        return gk20a_gmmu_alloc_flags_sys(g, flags, size, mem);
+}
+int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+        return gk20a_gmmu_alloc_flags_sys(g, 0, size, mem);
+}
+int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem)
+{
+        struct device *d = dev_from_gk20a(g);
+        int err;
+        dma_addr_t iova;
+        gk20a_dbg_fn("");
+        if (flags) {
+                DEFINE_DMA_ATTRS(dma_attrs);
+                gk20a_dma_flags_to_attrs(&dma_attrs, flags);
+                if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+                        mem->pages = dma_alloc_attrs(d,
+                                        size, &iova, GFP_KERNEL,
+                                        __DMA_ATTR(dma_attrs));
+                        if (!mem->pages)
+                                return -ENOMEM;
+                } else {
+                        mem->cpu_va = dma_alloc_attrs(d,
+                                        size, &iova, GFP_KERNEL,
+                                        __DMA_ATTR(dma_attrs));
+                        if (!mem->cpu_va)
+                                return -ENOMEM;
+                }
+        } else {
+                mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL);
+                if (!mem->cpu_va)
+                        return -ENOMEM;
+        }
+        if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+                err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages,
+                                                   iova, size);
+        else {
+                err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size);
+                memset(mem->cpu_va, 0, size);
+        }
+        if (err)
+                goto fail_free;
+        mem->size = size;
+        mem->aperture = APERTURE_SYSMEM;
+        mem->flags = flags;
+        gk20a_dbg_fn("done");
+        return 0;
+fail_free:
+        dma_free_coherent(d, size, mem->cpu_va, iova);
+        mem->cpu_va = NULL;
+        mem->sgt = NULL;
+        return err;
+}
+int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+        return gk20a_gmmu_alloc_flags_vid(g,
+                        NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem)
+{
+        return gk20a_gmmu_alloc_flags_vid_at(g, flags, size, mem, 0);
+}
+int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem, dma_addr_t at)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+        u64 addr;
+        int err;
+        struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
+                &g->mm.vidmem.allocator :
+                &g->mm.vidmem.bootstrap_allocator;
+        int before_pending;
+        gk20a_dbg_fn("");
+        if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+                return -ENOSYS;
+        /*
+         * Our own allocator doesn't have any flags yet, and we can't
+         * kernel-map these, so require explicit flags.
+         */
+        WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+        before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
+        addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
+        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+        if (!addr) {
+                /*
+                 * If memory is known to be freed soon, let the user know that
+                 * it may be available after a while.
+                 */
+                if (before_pending)
+                        return -EAGAIN;
+                else
+                        return -ENOMEM;
+        }
+        if (at)
+                mem->fixed = true;
+        else
+                mem->fixed = false;
+        mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
+        if (!mem->sgt) {
+                err = -ENOMEM;
+                goto fail_physfree;
+        }
+        err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
+        if (err)
+                goto fail_kfree;
+        set_vidmem_page_alloc(mem->sgt->sgl, addr);
+        sg_set_page(mem->sgt->sgl, NULL, size, 0);
+        mem->size = size;
+        mem->aperture = APERTURE_VIDMEM;
+        mem->allocator = vidmem_alloc;
+        mem->flags = flags;
+        nvgpu_init_list_node(&mem->clear_list_entry);
+        gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
+        return 0;
+fail_kfree:
+        nvgpu_kfree(g, mem->sgt);
+fail_physfree:
+        nvgpu_free(&g->mm.vidmem.allocator, addr);
+        return err;
+#else
+        return -ENOSYS;
+#endif
+}
+int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
+                struct nvgpu_mem *mem)
+{
+        return gk20a_gmmu_alloc_map_flags(vm, 0, size, mem);
+}
+int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem)
+{
+        if (vm->mm->vidmem_is_vidmem) {
+                /*
+                 * Force the no-kernel-mapping flag on because we don't support
+                 * the lack of it for vidmem - the user should not care when
+                 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+                 * difference, the user should use the flag explicitly anyway.
+                 */
+                int err = gk20a_gmmu_alloc_map_flags_vid(vm,
+                                flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+                                size, mem);
+                if (!err)
+                        return 0;
+                /*
+                 * Fall back to sysmem (which may then also fail) in case
+                 * vidmem is exhausted.
+                 */
+        }
+        return gk20a_gmmu_alloc_map_flags_sys(vm, flags, size, mem);
+}
+int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
+                struct nvgpu_mem *mem)
+{
+        return gk20a_gmmu_alloc_map_flags_sys(vm, 0, size, mem);
+}
+int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem)
+{
+        int err = gk20a_gmmu_alloc_flags_sys(vm->mm->g, flags, size, mem);
+        if (err)
+                return err;
+        mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
+                                     gk20a_mem_flag_none, false,
+                                     mem->aperture);
+        if (!mem->gpu_va) {
+                err = -ENOMEM;
+                goto fail_free;
+        }
+        return 0;
+fail_free:
+        gk20a_gmmu_free(vm->mm->g, mem);
+        return err;
+}
+int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
+                struct nvgpu_mem *mem)
+{
+        return gk20a_gmmu_alloc_map_flags_vid(vm,
+                        NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem)
+{
+        int err = gk20a_gmmu_alloc_flags_vid(vm->mm->g, flags, size, mem);
+        if (err)
+                return err;
+        mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
+                                     gk20a_mem_flag_none, false,
+                                     mem->aperture);
+        if (!mem->gpu_va) {
+                err = -ENOMEM;
+                goto fail_free;
+        }
+        return 0;
+fail_free:
+        gk20a_gmmu_free(vm->mm->g, mem);
+        return err;
+}
+static void gk20a_gmmu_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct device *d = dev_from_gk20a(g);
+        if (mem->cpu_va || mem->pages) {
+                if (mem->flags) {
+                        DEFINE_DMA_ATTRS(dma_attrs);
+                        gk20a_dma_flags_to_attrs(&dma_attrs, mem->flags);
+                        if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+                                dma_free_attrs(d, mem->size, mem->pages,
+                                        sg_dma_address(mem->sgt->sgl),
+                                        __DMA_ATTR(dma_attrs));
+                        } else {
+                                dma_free_attrs(d, mem->size, mem->cpu_va,
+                                        sg_dma_address(mem->sgt->sgl),
+                                        __DMA_ATTR(dma_attrs));
+                        }
+                } else {
+                        dma_free_coherent(d, mem->size, mem->cpu_va,
+                                        sg_dma_address(mem->sgt->sgl));
+                }
+                mem->cpu_va = NULL;
+                mem->pages = NULL;
+        }
+        if (mem->sgt)
+                gk20a_free_sgtable(g, &mem->sgt);
+        mem->size = 0;
+        mem->aperture = APERTURE_INVALID;
+}
+static void gk20a_gmmu_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+        bool was_empty;
+        /* Sanity check - only this supported when allocating. */
+        WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+        if (mem->user_mem) {
+                nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+                was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
+                nvgpu_list_add_tail(&mem->clear_list_entry,
+                              &g->mm.vidmem.clear_list_head);
+                atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
+                nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+                if (was_empty) {
+                        cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
+                        schedule_work(&g->mm.vidmem.clear_mem_worker);
+                }
+        } else {
+                nvgpu_memset(g, mem, 0, 0, mem->size);
+                nvgpu_free(mem->allocator,
+                           (u64)get_vidmem_page_alloc(mem->sgt->sgl));
+                gk20a_free_sgtable(g, &mem->sgt);
+                mem->size = 0;
+                mem->aperture = APERTURE_INVALID;
+        }
+#endif
+}
+void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        switch (mem->aperture) {
+        case APERTURE_SYSMEM:
+                return gk20a_gmmu_free_sys(g, mem);
+        case APERTURE_VIDMEM:
+                return gk20a_gmmu_free_vid(g, mem);
+        default:
+                break; /* like free() on "null" memory */
+        }
+}
+void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
+{
+        if (mem->gpu_va)
+                gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none);
+        mem->gpu_va = 0;
+        gk20a_gmmu_free(vm->mm->g, mem);
+}
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 6fb6c27e..cfe1149f 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -18,6 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/highmem.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 0db6c21a..e70ee4a6 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -24,6 +24,7 @@
 #include <trace/events/gk20a.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index ce76bfc3..9cc4b678 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -26,6 +26,7 @@
 #include <linux/debugfs.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include "gk20a.h"
 #include "debug_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index d0e2be79..6be616b3 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/semaphore.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include "gk20a.h"
 #include "debug_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 98fa53ab..738e8c1c 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -22,6 +22,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/lock.h>
+#include <nvgpu/dma.h>
 #include "gk20a.h"
 #include "css_gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 3ed28718..d8fa7505 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -24,6 +24,7 @@
 #include <uapi/linux/nvgpu.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 1e9a8e15..c1f94eb3 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -26,6 +26,7 @@
 #endif
 #include <linux/sort.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 971e2320..a9b6a546 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -29,6 +29,7 @@
 #include <linux/bsearch.h>
 #include <trace/events/gk20a.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index 7c4db84e..7c73be77 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -21,6 +21,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <nvgpu/dma.h>
 #include "gk20a.h"
 #include "gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cdd0e541..79654af3 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -30,6 +30,7 @@
 #include <uapi/linux/nvgpu.h>
 #include <trace/events/gk20a.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/pramin.h>
@@ -2519,152 +2520,6 @@ u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
                        aperture);
 }
-int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-        return gk20a_gmmu_alloc_flags(g, 0, size, mem);
-}
-int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
-                struct nvgpu_mem *mem)
-{
-        if (g->mm.vidmem_is_vidmem) {
-                /*
-                 * Force the no-kernel-mapping flag on because we don't support
-                 * the lack of it for vidmem - the user should not care when
-                 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
-                 * difference, the user should use the flag explicitly anyway.
-                 */
-                int err = gk20a_gmmu_alloc_flags_vid(g,
-                                flags | NVGPU_DMA_NO_KERNEL_MAPPING,
-                                size, mem);
-                if (!err)
-                        return 0;
-                /*
-                 * Fall back to sysmem (which may then also fail) in case
-                 * vidmem is exhausted.
-                 */
-        }
-        return gk20a_gmmu_alloc_flags_sys(g, flags, size, mem);
-}
-int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-        return gk20a_gmmu_alloc_flags_sys(g, 0, size, mem);
-}
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
-static void gk20a_dma_flags_to_attrs(unsigned long *attrs,
-                unsigned long flags)
-#define ATTR_ARG(x) *x
-#else
-static void gk20a_dma_flags_to_attrs(struct dma_attrs *attrs,
-                unsigned long flags)
-#define ATTR_ARG(x) x
-#endif
-{
-        if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
-                dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
-        if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
-                dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
-        if (flags & NVGPU_DMA_READ_ONLY)
-                dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs));
-#undef ATTR_ARG
-}
-int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem)
-{
-        struct device *d = dev_from_gk20a(g);
-        int err;
-        dma_addr_t iova;
-        gk20a_dbg_fn("");
-        if (flags) {
-                DEFINE_DMA_ATTRS(dma_attrs);
-                gk20a_dma_flags_to_attrs(&dma_attrs, flags);
-                if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
-                        mem->pages = dma_alloc_attrs(d,
-                                        size, &iova, GFP_KERNEL,
-                                        __DMA_ATTR(dma_attrs));
-                        if (!mem->pages)
-                                return -ENOMEM;
-                } else {
-                        mem->cpu_va = dma_alloc_attrs(d,
-                                        size, &iova, GFP_KERNEL,
-                                        __DMA_ATTR(dma_attrs));
-                        if (!mem->cpu_va)
-                                return -ENOMEM;
-                }
-        } else {
-                mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL);
-                if (!mem->cpu_va)
-                        return -ENOMEM;
-        }
-        if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
-                err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages,
-                                                   iova, size);
-        else {
-                err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size);
-                memset(mem->cpu_va, 0, size);
-        }
-        if (err)
-                goto fail_free;
-        mem->size = size;
-        mem->aperture = APERTURE_SYSMEM;
-        mem->flags = flags;
-        gk20a_dbg_fn("done");
-        return 0;
-fail_free:
-        dma_free_coherent(d, size, mem->cpu_va, iova);
-        mem->cpu_va = NULL;
-        mem->sgt = NULL;
-        return err;
-}
-static void gk20a_gmmu_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
-{
-        struct device *d = dev_from_gk20a(g);
-        if (mem->cpu_va || mem->pages) {
-                if (mem->flags) {
-                        DEFINE_DMA_ATTRS(dma_attrs);
-                        gk20a_dma_flags_to_attrs(&dma_attrs, mem->flags);
-                        if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
-                                dma_free_attrs(d, mem->size, mem->pages,
-                                        sg_dma_address(mem->sgt->sgl),
-                                        __DMA_ATTR(dma_attrs));
-                        } else {
-                                dma_free_attrs(d, mem->size, mem->cpu_va,
-                                        sg_dma_address(mem->sgt->sgl),
-                                        __DMA_ATTR(dma_attrs));
-                        }
-                } else {
-                        dma_free_coherent(d, mem->size, mem->cpu_va,
-                                        sg_dma_address(mem->sgt->sgl));
-                }
-                mem->cpu_va = NULL;
-                mem->pages = NULL;
-        }
-        if (mem->sgt)
-                gk20a_free_sgtable(g, &mem->sgt);
-        mem->size = 0;
-        mem->aperture = APERTURE_INVALID;
-}
 #if defined(CONFIG_GK20A_VIDMEM)
 static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 {
@@ -2728,153 +2583,6 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 }
 #endif
-int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-        return gk20a_gmmu_alloc_flags_vid(g,
-                        NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
-}
-int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem)
-{
-        return gk20a_gmmu_alloc_flags_vid_at(g, flags, size, mem, 0);
-}
-#if defined(CONFIG_GK20A_VIDMEM)
-static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
-                                size_t size)
-{
-        u64 addr = 0;
-        if (at)
-                addr = nvgpu_alloc_fixed(allocator, at, size, 0);
-        else
-                addr = nvgpu_alloc(allocator, size);
-        return addr;
-}
-#endif
-int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem, dma_addr_t at)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-        u64 addr;
-        int err;
-        struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
-                &g->mm.vidmem.allocator :
-                &g->mm.vidmem.bootstrap_allocator;
-        int before_pending;
-        gk20a_dbg_fn("");
-        if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
-                return -ENOSYS;
-        /*
-         * Our own allocator doesn't have any flags yet, and we can't
-         * kernel-map these, so require explicit flags.
-         */
-        WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
-        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-        before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
-        addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
-        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
-        if (!addr) {
-                /*
-                 * If memory is known to be freed soon, let the user know that
-                 * it may be available after a while.
-                 */
-                if (before_pending)
-                        return -EAGAIN;
-                else
-                        return -ENOMEM;
-        }
-        if (at)
-                mem->fixed = true;
-        else
-                mem->fixed = false;
-        mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
-        if (!mem->sgt) {
-                err = -ENOMEM;
-                goto fail_physfree;
-        }
-        err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
-        if (err)
-                goto fail_kfree;
-        set_vidmem_page_alloc(mem->sgt->sgl, addr);
-        sg_set_page(mem->sgt->sgl, NULL, size, 0);
-        mem->size = size;
-        mem->aperture = APERTURE_VIDMEM;
-        mem->allocator = vidmem_alloc;
-        mem->flags = flags;
-        nvgpu_init_list_node(&mem->clear_list_entry);
-        gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
-        return 0;
-fail_kfree:
-        nvgpu_kfree(g, mem->sgt);
-fail_physfree:
-        nvgpu_free(&g->mm.vidmem.allocator, addr);
-        return err;
-#else
-        return -ENOSYS;
-#endif
-}
-static void gk20a_gmmu_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-        bool was_empty;
-        /* Sanity check - only this supported when allocating. */
-        WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING);
-        if (mem->user_mem) {
-                nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-                was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
-                nvgpu_list_add_tail(&mem->clear_list_entry,
-                              &g->mm.vidmem.clear_list_head);
-                atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
-                nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
-                if (was_empty) {
-                        cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
-                        schedule_work(&g->mm.vidmem.clear_mem_worker);
-                }
-        } else {
-                nvgpu_memset(g, mem, 0, 0, mem->size);
-                nvgpu_free(mem->allocator,
-                           (u64)get_vidmem_page_alloc(mem->sgt->sgl));
-                gk20a_free_sgtable(g, &mem->sgt);
-                mem->size = 0;
-                mem->aperture = APERTURE_INVALID;
-        }
-#endif
-}
-void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem)
-{
-        switch (mem->aperture) {
-        case APERTURE_SYSMEM:
-                return gk20a_gmmu_free_sys(g, mem);
-        case APERTURE_VIDMEM:
-                return gk20a_gmmu_free_vid(g, mem);
-        default:
-                break; /* like free() on "null" memory */
-        }
-}
 /*
 * If mem is in VIDMEM, return base address in vidmem
 * else return IOVA address for SYSMEM
@@ -2938,105 +2646,6 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
 }
 #endif
-int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
-                struct nvgpu_mem *mem)
-{
-        return gk20a_gmmu_alloc_map_flags(vm, 0, size, mem);
-}
-int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem)
-{
-        if (vm->mm->vidmem_is_vidmem) {
-                /*
-                 * Force the no-kernel-mapping flag on because we don't support
-                 * the lack of it for vidmem - the user should not care when
-                 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
-                 * difference, the user should use the flag explicitly anyway.
-                 */
-                int err = gk20a_gmmu_alloc_map_flags_vid(vm,
-                                flags | NVGPU_DMA_NO_KERNEL_MAPPING,
-                                size, mem);
-                if (!err)
-                        return 0;
-                /*
-                 * Fall back to sysmem (which may then also fail) in case
-                 * vidmem is exhausted.
-                 */
-        }
-        return gk20a_gmmu_alloc_map_flags_sys(vm, flags, size, mem);
-}
-int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
-                struct nvgpu_mem *mem)
-{
-        return gk20a_gmmu_alloc_map_flags_sys(vm, 0, size, mem);
-}
-int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem)
-{
-        int err = gk20a_gmmu_alloc_flags_sys(vm->mm->g, flags, size, mem);
-        if (err)
-                return err;
-        mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
-                                     gk20a_mem_flag_none, false,
-                                     mem->aperture);
-        if (!mem->gpu_va) {
-                err = -ENOMEM;
-                goto fail_free;
-        }
-        return 0;
-fail_free:
-        gk20a_gmmu_free(vm->mm->g, mem);
-        return err;
-}
-int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
-                struct nvgpu_mem *mem)
-{
-        return gk20a_gmmu_alloc_map_flags_vid(vm,
-                        NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
-}
-int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem)
-{
-        int err = gk20a_gmmu_alloc_flags_vid(vm->mm->g, flags, size, mem);
-        if (err)
-                return err;
-        mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
-                                     gk20a_mem_flag_none, false,
-                                     mem->aperture);
-        if (!mem->gpu_va) {
-                err = -ENOMEM;
-                goto fail_free;
-        }
-        return 0;
-fail_free:
-        gk20a_gmmu_free(vm->mm->g, mem);
-        return err;
-}
-void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
-{
-        if (mem->gpu_va)
-                gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none);
-        mem->gpu_va = 0;
-        gk20a_gmmu_free(vm->mm->g, mem);
-}
 dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
 {
        struct mapped_buffer_node *buffer;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index db72ca79..53366caf 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -524,56 +524,6 @@ u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
                bool priv,
                enum nvgpu_aperture aperture);
-/* Flags for the below gk20a_gmmu_{alloc,alloc_map}_flags* */
-/*
- * Don't create a virtual kernel mapping for the buffer but only allocate it;
- * this may save some resources. The buffer can be mapped later explicitly.
- */
-#define NVGPU_DMA_NO_KERNEL_MAPPING     (1 << 0)
-/*
- * Don't allow building the buffer from individual pages but require a
- * physically contiguous block.
- */
-#define NVGPU_DMA_FORCE_CONTIGUOUS      (1 << 1)
-/*
- * Make the mapping read-only.
- */
-#define NVGPU_DMA_READ_ONLY             (1 << 2)
-int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
-                struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
-                struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
-                struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem);
-void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
-                struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
-                size_t size, struct nvgpu_mem *mem, dma_addr_t at);
-void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem);
 static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
 {
        /* FIXME: the sgt/sgl may get null if this is accessed e.g. in an isr
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 591b7163..7a6bfe22 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include "gk20a.h"
 #include "gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 013ce43a..3cfcbb19 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -20,6 +20,7 @@
 #include <linux/platform/tegra/mc.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 41c4981d..9acc8eda 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -20,6 +20,7 @@
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/acr/nvgpu_acr.h>
 #include "gk20a/gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index dae23374..78859f88 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -13,6 +13,8 @@
 * more details.
 */
+#include <nvgpu/dma.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
 #include "gm20b/gr_gm20b.h"
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index a7e77232..b305b895 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -15,6 +15,8 @@
 #include <linux/delay.h>
+#include <nvgpu/dma.h>
 #include "fifo_gp10b.h"
 #include "gk20a/gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 8e1517f6..b9367120 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -22,6 +22,7 @@
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index a0dc8c55..2f894435 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -16,6 +16,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
+#include <nvgpu/dma.h>
 #include "gk20a/gk20a.h"
 #include "gm20b/mm_gm20b.h"
 #include "mm_gp10b.h"
diff --git a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
index e73bcd8f..bf52b5c9 100644
--- a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
@@ -16,6 +16,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
+#include <nvgpu/dma.h>
 #include "gk20a/gk20a.h"
 #include "rpfb_gp10b.h"
diff --git a/drivers/gpu/nvgpu/include/nvgpu/dma.h b/drivers/gpu/nvgpu/include/nvgpu/dma.h
new file mode 100644
index 00000000..d4fad584
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/dma.h
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVGPU_DMA_H__
+#define __NVGPU_DMA_H__
+#include <nvgpu/types.h>
+struct gk20a;
+struct vm_gk20a;
+struct nvgpu_mem;
+/*
+ * Flags for the below gk20a_gmmu_{alloc,alloc_map}_flags*
+ */
+/*
+ * Don't create a virtual kernel mapping for the buffer but only allocate it;
+ * this may save some resources. The buffer can be mapped later explicitly.
+ */
+#define NVGPU_DMA_NO_KERNEL_MAPPING     (1 << 0)
+/*
+ * Don't allow building the buffer from individual pages but require a
+ * physically contiguous block.
+ */
+#define NVGPU_DMA_FORCE_CONTIGUOUS      (1 << 1)
+/*
+ * Make the mapping read-only.
+ */
+#define NVGPU_DMA_READ_ONLY             (1 << 2)
+/**
+ * gk20a_gmmu_alloc - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * memory can be either placed in VIDMEM or SYSMEM, which ever is more
+ * convenient for the driver.
+ */
+int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_flags - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * memory can be either placed in VIDMEM or SYSMEM, which ever is more
+ * convenient for the driver.
+ *
+ * The following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
+                struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_sys - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in SYSMEM.
+ */
+int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_flags_sys - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in SYSMEM.
+ *
+ * The following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_vid - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ */
+int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_flags_vid - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ *
+ * Only the following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *
+ */
+int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_flags_vid_at - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ * @at    - A specific location to attempt to allocate memory from or 0 if the
+ *          caller does not care what the address is.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ *
+ * Only the following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ */
+int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem, dma_addr_t at);
+/**
+ * gk20a_gmmu_free - Free a DMA allocation
+ *
+ * @g   - The GPU.
+ * @mem - An allocation to free.
+ *
+ * Free memory created with any of:
+ *
+ *   gk20a_gmmu_alloc()
+ *   gk20a_gmmu_alloc_flags()
+ *   gk20a_gmmu_alloc_sys()
+ *   gk20a_gmmu_alloc_flags_sys()
+ *   gk20a_gmmu_alloc_vid()
+ *   gk20a_gmmu_alloc_flags_vid()
+ *   gk20a_gmmu_alloc_flags_vid_at()
+ */
+void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_map - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * Note this is different than mapping it into the CPU. This memory can be
+ * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
+ * driver.
+ */
+int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
+                struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_map_flags - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * Note this is different than mapping it into the CPU. This memory can be
+ * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
+ * driver.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_map_sys - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in SYSMEM.
+ */
+int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
+                struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_map_flags_sys - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in SYSMEM.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_map_vid - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in VIDMEM.
+ */
+int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
+                struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_alloc_map_flags_vid - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in VIDMEM.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem);
+/**
+ * gk20a_gmmu_unmap_free - Free a DMA allocation
+ *
+ * @g   - The GPU.
+ * @mem - An allocation to free.
+ *
+ * Free memory created with any of:
+ *
+ *   gk20a_gmmu_alloc_map()
+ *   gk20a_gmmu_alloc_map_flags()
+ *   gk20a_gmmu_alloc_map_sys()
+ *   gk20a_gmmu_alloc_map_flags_sys()
+ *   gk20a_gmmu_alloc_map_vid()
+ *   gk20a_gmmu_alloc_map_flags_vid()
+ */
+void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem);
+#endif
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index cfe9322e..59fb0c4a 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -17,6 +17,7 @@
 #include <trace/events/gk20a.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include "vgpu/vgpu.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 8cb5b029..527e12e4 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -12,6 +12,7 @@
 */
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include "vgpu/vgpu.h"
 #include "vgpu/gm20b/vgpu_gr_gm20b.h"
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index ea81cefe..b12f8a53 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -16,6 +16,7 @@
 #include <linux/dma-mapping.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include "vgpu/vgpu.h"
 #include "gk20a/mm_gk20a.h"