1 files changed, 534 insertions, 0 deletions
diff --git a/include/os/linux/linux-dma.c b/include/os/linux/linux-dma.c
new file mode 100644
index 0000000..d704b2a
--- /dev/null
+++ b/include/os/linux/linux-dma.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/version.h>
+#include <nvgpu/log.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/linux/dma.h>
+#include "platform_gk20a.h"
+#include "os_linux.h"
+#include "dmabuf_vidmem.h"
+#ifdef __DMA_ATTRS_LONGS
+#define NVGPU_DEFINE_DMA_ATTRS(x)                                     \
+        struct dma_attrs x = {                                  \
+                .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 },   \
+        }
+#define NVGPU_DMA_ATTR(attrs) &attrs
+#else
+#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0
+#define NVGPU_DMA_ATTR(attrs) attrs
+#endif
+/*
+ * Enough to hold all the possible flags in string form. When a new flag is
+ * added it must be added here as well!!
+ */
+#define NVGPU_DMA_STR_SIZE                                      \
+        sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS")
+/*
+ * The returned string is kmalloc()ed here but must be freed by the caller.
+ */
+static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags)
+{
+        char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
+        int bytes_available = NVGPU_DMA_STR_SIZE;
+        /*
+         * Return the empty buffer if there's no flags. Makes it easier on the
+         * calling code to just print it instead of any if (NULL) type logic.
+         */
+        if (!flags)
+                return buf;
+#define APPEND_FLAG(flag, str_flag)                                     \
+        do {                                                            \
+                if (flags & flag) {                                     \
+                        strncat(buf, str_flag, bytes_available);        \
+                        bytes_available -= strlen(str_flag);            \
+                }                                                       \
+        } while (0)
+        APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
+        APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS,  "FORCE_CONTIGUOUS ");
+#undef APPEND_FLAG
+        return buf;
+}
+/**
+ * __dma_dbg - Debug print for DMA allocs and frees.
+ *
+ * @g     - The GPU.
+ * @size  - The requested size of the alloc (size_t).
+ * @flags - The flags (unsigned long).
+ * @type  - A string describing the type (i.e: sysmem or vidmem).
+ * @what  - A string with 'alloc' or 'free'.
+ *
+ * @flags is the DMA flags. If there are none or it doesn't make sense to print
+ * flags just pass 0.
+ *
+ * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
+ */
+static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
+                      const char *type, const char *what,
+                      const char *func, int line)
+{
+        char *flags_str = NULL;
+        /*
+         * Don't bother making the flags_str if debugging is
+         * not enabled. This saves a malloc and a free.
+         */
+        if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
+                return;
+        flags_str = nvgpu_dma_flags_to_str(g, flags);
+        __nvgpu_log_dbg(g, gpu_dbg_dma,
+                        func, line,
+                        "DMA %s: [%s] size=%-7zu "
+                        "aligned=%-7zu total=%-10llukB %s",
+                        what, type,
+                        size, PAGE_ALIGN(size),
+                        g->dma_memory_used >> 10,
+                        flags_str);
+        if (flags_str)
+                nvgpu_kfree(g, flags_str);
+}
+#define dma_dbg_alloc(g, size, flags, type)                             \
+        __dma_dbg(g, size, flags, type, "alloc", __func__, __LINE__)
+#define dma_dbg_free(g, size, flags, type)                              \
+        __dma_dbg(g, size, flags, type, "free", __func__, __LINE__)
+/*
+ * For after the DMA alloc is done.
+ */
+#define __dma_dbg_done(g, size, type, what)                             \
+        nvgpu_log(g, gpu_dbg_dma,                                       \
+                  "DMA %s: [%s] size=%-7zu Done!",                      \
+                  what, type, size);                                    \
+#define dma_dbg_alloc_done(g, size, type)                               \
+        __dma_dbg_done(g, size, type, "alloc")
+#define dma_dbg_free_done(g, size, type)                                \
+        __dma_dbg_done(g, size, type, "free")
+#if defined(CONFIG_GK20A_VIDMEM)
+static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
+                                size_t size)
+{
+        u64 addr = 0;
+        if (at)
+                addr = nvgpu_alloc_fixed(allocator, at, size, 0);
+        else
+                addr = nvgpu_alloc(allocator, size);
+        return addr;
+}
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
+                unsigned long flags)
+#define ATTR_ARG(x) *x
+#else
+static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
+                unsigned long flags)
+#define ATTR_ARG(x) x
+#endif
+{
+        if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+                dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
+        if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
+                dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
+#undef ATTR_ARG
+}
+int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem)
+{
+        struct device *d = dev_from_gk20a(g);
+        int err;
+        dma_addr_t iova;
+        NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
+        void *alloc_ret;
+        if (nvgpu_mem_is_valid(mem)) {
+                nvgpu_warn(g, "memory leak !!");
+                WARN_ON(1);
+        }
+        /*
+         * WAR for IO coherent chips: the DMA API does not seem to generate
+         * mappings that work correctly. Unclear why - Bug ID: 2040115.
+         *
+         * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
+         * and then make a vmap() ourselves.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
+        /*
+         * Before the debug print so we see this in the total. But during
+         * cleanup in the fail path this has to be subtracted.
+         */
+        g->dma_memory_used += PAGE_ALIGN(size);
+        dma_dbg_alloc(g, size, flags, "sysmem");
+        /*
+         * Save the old size but for actual allocation purposes the size is
+         * going to be page aligned.
+         */
+        mem->size = size;
+        size = PAGE_ALIGN(size);
+        nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
+        alloc_ret = dma_alloc_attrs(d, size, &iova,
+                                    GFP_KERNEL|__GFP_ZERO,
+                                    NVGPU_DMA_ATTR(dma_attrs));
+        if (!alloc_ret)
+                return -ENOMEM;
+        if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+                mem->priv.pages = alloc_ret;
+                err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
+                                                   mem->priv.pages,
+                                                   iova, size);
+        } else {
+                mem->cpu_va = alloc_ret;
+                err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
+                                        iova, size, flags);
+        }
+        if (err)
+                goto fail_free_dma;
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
+                mem->cpu_va = vmap(mem->priv.pages,
+                                   size >> PAGE_SHIFT,
+                                   0, PAGE_KERNEL);
+                if (!mem->cpu_va) {
+                        err = -ENOMEM;
+                        goto fail_free_sgt;
+                }
+        }
+        mem->aligned_size = size;
+        mem->aperture = APERTURE_SYSMEM;
+        mem->priv.flags = flags;
+        dma_dbg_alloc_done(g, mem->size, "sysmem");
+        return 0;
+fail_free_sgt:
+        nvgpu_free_sgtable(g, &mem->priv.sgt);
+fail_free_dma:
+        dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
+        mem->cpu_va = NULL;
+        mem->priv.sgt = NULL;
+        mem->size = 0;
+        g->dma_memory_used -= mem->aligned_size;
+        return err;
+}
+int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+                size_t size, struct nvgpu_mem *mem, u64 at)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+        u64 addr;
+        int err;
+        struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
+                &g->mm.vidmem.allocator :
+                &g->mm.vidmem.bootstrap_allocator;
+        u64 before_pending;
+        if (nvgpu_mem_is_valid(mem)) {
+                nvgpu_warn(g, "memory leak !!");
+                WARN_ON(1);
+        }
+        dma_dbg_alloc(g, size, flags, "vidmem");
+        mem->size = size;
+        size = PAGE_ALIGN(size);
+        if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+                return -ENOSYS;
+        /*
+         * Our own allocator doesn't have any flags yet, and we can't
+         * kernel-map these, so require explicit flags.
+         */
+        WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+        before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
+        addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
+        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+        if (!addr) {
+                /*
+                 * If memory is known to be freed soon, let the user know that
+                 * it may be available after a while.
+                 */
+                if (before_pending)
+                        return -EAGAIN;
+                else
+                        return -ENOMEM;
+        }
+        if (at)
+                mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
+        mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
+        if (!mem->priv.sgt) {
+                err = -ENOMEM;
+                goto fail_physfree;
+        }
+        err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
+        if (err)
+                goto fail_kfree;
+        nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
+        sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
+        mem->aligned_size = size;
+        mem->aperture = APERTURE_VIDMEM;
+        mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
+        mem->allocator = vidmem_alloc;
+        mem->priv.flags = flags;
+        nvgpu_init_list_node(&mem->clear_list_entry);
+        dma_dbg_alloc_done(g, mem->size, "vidmem");
+        return 0;
+fail_kfree:
+        nvgpu_kfree(g, mem->priv.sgt);
+fail_physfree:
+        nvgpu_free(&g->mm.vidmem.allocator, addr);
+        mem->size = 0;
+        return err;
+#else
+        return -ENOSYS;
+#endif
+}
+void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct device *d = dev_from_gk20a(g);
+        g->dma_memory_used -= mem->aligned_size;
+        dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
+        if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
+            !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
+            (mem->cpu_va || mem->priv.pages)) {
+                /*
+                 * Free side of WAR for bug 2040115.
+                 */
+                if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                        vunmap(mem->cpu_va);
+                if (mem->priv.flags) {
+                        NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
+                        nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
+                        if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+                                dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
+                                        sg_dma_address(mem->priv.sgt->sgl),
+                                        NVGPU_DMA_ATTR(dma_attrs));
+                        } else {
+                                dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
+                                        sg_dma_address(mem->priv.sgt->sgl),
+                                        NVGPU_DMA_ATTR(dma_attrs));
+                        }
+                } else {
+                        dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
+                                        sg_dma_address(mem->priv.sgt->sgl));
+                }
+                mem->cpu_va = NULL;
+                mem->priv.pages = NULL;
+        }
+        /*
+         * When this flag is set we expect that pages is still populated but not
+         * by the DMA API.
+         */
+        if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
+                nvgpu_kfree(g, mem->priv.pages);
+        if ((mem->mem_flags & NVGPU_MEM_FLAG_FOREIGN_SGT) == 0 &&
+                        mem->priv.sgt != NULL) {
+                nvgpu_free_sgtable(g, &mem->priv.sgt);
+        }
+        dma_dbg_free_done(g, mem->size, "sysmem");
+        mem->size = 0;
+        mem->aligned_size = 0;
+        mem->aperture = APERTURE_INVALID;
+}
+void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+        size_t mem_size = mem->size;
+        dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
+        /* Sanity check - only this supported when allocating. */
+        WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+        if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
+                int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
+                /*
+                 * If there's an error here then that means we can't clear the
+                 * vidmem. That's too bad; however, we still own the nvgpu_mem
+                 * buf so we have to free that.
+                 *
+                 * We don't need to worry about the vidmem allocator itself
+                 * since when that gets cleaned up in the driver shutdown path
+                 * all the outstanding allocs are force freed.
+                 */
+                if (err)
+                        nvgpu_kfree(g, mem);
+        } else {
+                nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
+                nvgpu_free(mem->allocator,
+                           (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
+                nvgpu_free_sgtable(g, &mem->priv.sgt);
+                mem->size = 0;
+                mem->aligned_size = 0;
+                mem->aperture = APERTURE_INVALID;
+        }
+        dma_dbg_free_done(g, mem_size, "vidmem");
+#endif
+}
+int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
+                      void *cpuva, u64 iova, size_t size, unsigned long flags)
+{
+        int err = 0;
+        struct sg_table *tbl;
+        NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
+        tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
+        if (!tbl) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
+        err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
+                                        size, NVGPU_DMA_ATTR(dma_attrs));
+        if (err)
+                goto fail;
+        sg_dma_address(tbl->sgl) = iova;
+        *sgt = tbl;
+        return 0;
+fail:
+        if (tbl)
+                nvgpu_kfree(g, tbl);
+        return err;
+}
+int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
+                      void *cpuva, u64 iova, size_t size)
+{
+        return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
+}
+int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
+                                 struct page **pages, u64 iova, size_t size)
+{
+        int err = 0;
+        struct sg_table *tbl;
+        tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
+        if (!tbl) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        err = sg_alloc_table_from_pages(tbl, pages,
+                                        DIV_ROUND_UP(size, PAGE_SIZE),
+                                        0, size, GFP_KERNEL);
+        if (err)
+                goto fail;
+        sg_dma_address(tbl->sgl) = iova;
+        *sgt = tbl;
+        return 0;
+fail:
+        if (tbl)
+                nvgpu_kfree(g, tbl);
+        return err;
+}
+void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
+{
+        sg_free_table(*sgt);
+        nvgpu_kfree(g, *sgt);
+        *sgt = NULL;
+}
+bool nvgpu_iommuable(struct gk20a *g)
+{
+#ifdef CONFIG_TEGRA_GK20A
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        /*
+         * Check against the nvgpu device to see if it's been marked as
+         * IOMMU'able.
+         */
+        if (!device_is_iommuable(l->dev))
+                return false;
+#endif
+        return true;
+}

diff --git a/include/os/linux/linux-dma.c b/include/os/linux/linux-dma.c new file mode 100644 index 0000000..d704b2a --- /dev/null +++ b/include/os/linux/linux-dma.c
@@ -0,0 +1,534 @@
	1	/*
	2	* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*
	13	* You should have received a copy of the GNU General Public License
	14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include <linux/dma-mapping.h>
	18	#include <linux/version.h>
	19
	20	#include <nvgpu/log.h>
	21	#include <nvgpu/dma.h>
	22	#include <nvgpu/lock.h>
	23	#include <nvgpu/bug.h>
	24	#include <nvgpu/gmmu.h>
	25	#include <nvgpu/kmem.h>
	26	#include <nvgpu/enabled.h>
	27	#include <nvgpu/vidmem.h>
	28	#include <nvgpu/gk20a.h>
	29
	30	#include <nvgpu/linux/dma.h>
	31
	32	#include "platform_gk20a.h"
	33	#include "os_linux.h"
	34	#include "dmabuf_vidmem.h"
	35
	36	#ifdef __DMA_ATTRS_LONGS
	37	#define NVGPU_DEFINE_DMA_ATTRS(x) \
	38	struct dma_attrs x = { \
	39	.flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \
	40	}
	41	#define NVGPU_DMA_ATTR(attrs) &attrs
	42	#else
	43	#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0
	44	#define NVGPU_DMA_ATTR(attrs) attrs
	45	#endif
	46
	47	/*
	48	* Enough to hold all the possible flags in string form. When a new flag is
	49	* added it must be added here as well!!
	50	*/
	51	#define NVGPU_DMA_STR_SIZE \
	52	sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS")
	53
	54	/*
	55	* The returned string is kmalloc()ed here but must be freed by the caller.
	56	*/
	57	static char nvgpu_dma_flags_to_str(struct gk20a g, unsigned long flags)
	58	{
	59	char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
	60	int bytes_available = NVGPU_DMA_STR_SIZE;
	61
	62	/*
	63	* Return the empty buffer if there's no flags. Makes it easier on the
	64	* calling code to just print it instead of any if (NULL) type logic.
	65	*/
	66	if (!flags)
	67	return buf;
	68
	69	#define APPEND_FLAG(flag, str_flag) \
	70	do { \
	71	if (flags & flag) { \
	72	strncat(buf, str_flag, bytes_available); \
	73	bytes_available -= strlen(str_flag); \
	74	} \
	75	} while (0)
	76
	77	APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
	78	APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS ");
	79	#undef APPEND_FLAG
	80
	81	return buf;
	82	}
	83
	84	/**
	85	* __dma_dbg - Debug print for DMA allocs and frees.
	86	*
	87	* @g - The GPU.
	88	* @size - The requested size of the alloc (size_t).
	89	* @flags - The flags (unsigned long).
	90	* @type - A string describing the type (i.e: sysmem or vidmem).
	91	* @what - A string with 'alloc' or 'free'.
	92	*
	93	* @flags is the DMA flags. If there are none or it doesn't make sense to print
	94	* flags just pass 0.
	95	*
	96	* Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
	97	*/
	98	static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
	99	const char type, const char what,
	100	const char *func, int line)
	101	{
	102	char *flags_str = NULL;
	103
	104	/*
	105	* Don't bother making the flags_str if debugging is
	106	* not enabled. This saves a malloc and a free.
	107	*/
	108	if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
	109	return;
	110
	111	flags_str = nvgpu_dma_flags_to_str(g, flags);
	112
	113	__nvgpu_log_dbg(g, gpu_dbg_dma,
	114	func, line,
	115	"DMA %s: [%s] size=%-7zu "
	116	"aligned=%-7zu total=%-10llukB %s",
	117	what, type,
	118	size, PAGE_ALIGN(size),
	119	g->dma_memory_used >> 10,
	120	flags_str);
	121
	122	if (flags_str)
	123	nvgpu_kfree(g, flags_str);
	124	}
	125
	126	#define dma_dbg_alloc(g, size, flags, type) \
	127	__dma_dbg(g, size, flags, type, "alloc", __func__, __LINE__)
	128	#define dma_dbg_free(g, size, flags, type) \
	129	__dma_dbg(g, size, flags, type, "free", __func__, __LINE__)
	130
	131	/*
	132	* For after the DMA alloc is done.
	133	*/
	134	#define __dma_dbg_done(g, size, type, what) \
	135	nvgpu_log(g, gpu_dbg_dma, \
	136	"DMA %s: [%s] size=%-7zu Done!", \
	137	what, type, size); \
	138
	139	#define dma_dbg_alloc_done(g, size, type) \
	140	__dma_dbg_done(g, size, type, "alloc")
	141	#define dma_dbg_free_done(g, size, type) \
	142	__dma_dbg_done(g, size, type, "free")
	143
	144	#if defined(CONFIG_GK20A_VIDMEM)
	145	static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
	146	size_t size)
	147	{
	148	u64 addr = 0;
	149
	150	if (at)
	151	addr = nvgpu_alloc_fixed(allocator, at, size, 0);
	152	else
	153	addr = nvgpu_alloc(allocator, size);
	154
	155	return addr;
	156	}
	157	#endif
	158
	159	#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
	160	static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
	161	unsigned long flags)
	162	#define ATTR_ARG(x) *x
	163	#else
	164	static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
	165	unsigned long flags)
	166	#define ATTR_ARG(x) x
	167	#endif
	168	{
	169	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
	170	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
	171	if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
	172	dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
	173	#undef ATTR_ARG
	174	}
	175
	176	int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
	177	size_t size, struct nvgpu_mem *mem)
	178	{
	179	struct device *d = dev_from_gk20a(g);
	180	int err;
	181	dma_addr_t iova;
	182	NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
	183	void *alloc_ret;
	184
	185	if (nvgpu_mem_is_valid(mem)) {
	186	nvgpu_warn(g, "memory leak !!");
	187	WARN_ON(1);
	188	}
	189
	190	/*
	191	* WAR for IO coherent chips: the DMA API does not seem to generate
	192	* mappings that work correctly. Unclear why - Bug ID: 2040115.
	193	*
	194	* Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
	195	* and then make a vmap() ourselves.
	196	*/
	197	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
	198	flags \|= NVGPU_DMA_NO_KERNEL_MAPPING;
	199
	200	/*
	201	* Before the debug print so we see this in the total. But during
	202	* cleanup in the fail path this has to be subtracted.
	203	*/
	204	g->dma_memory_used += PAGE_ALIGN(size);
	205
	206	dma_dbg_alloc(g, size, flags, "sysmem");
	207
	208	/*
	209	* Save the old size but for actual allocation purposes the size is
	210	* going to be page aligned.
	211	*/
	212	mem->size = size;
	213	size = PAGE_ALIGN(size);
	214
	215	nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
	216
	217	alloc_ret = dma_alloc_attrs(d, size, &iova,
	218	GFP_KERNEL\|__GFP_ZERO,
	219	NVGPU_DMA_ATTR(dma_attrs));
	220	if (!alloc_ret)
	221	return -ENOMEM;
	222
	223	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
	224	mem->priv.pages = alloc_ret;
	225	err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
	226	mem->priv.pages,
	227	iova, size);
	228	} else {
	229	mem->cpu_va = alloc_ret;
	230	err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
	231	iova, size, flags);
	232	}
	233	if (err)
	234	goto fail_free_dma;
	235
	236	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
	237	mem->cpu_va = vmap(mem->priv.pages,
	238	size >> PAGE_SHIFT,
	239	0, PAGE_KERNEL);
	240	if (!mem->cpu_va) {
	241	err = -ENOMEM;
	242	goto fail_free_sgt;
	243	}
	244	}
	245
	246	mem->aligned_size = size;
	247	mem->aperture = APERTURE_SYSMEM;
	248	mem->priv.flags = flags;
	249
	250	dma_dbg_alloc_done(g, mem->size, "sysmem");
	251
	252	return 0;
	253
	254	fail_free_sgt:
	255	nvgpu_free_sgtable(g, &mem->priv.sgt);
	256	fail_free_dma:
	257	dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
	258	mem->cpu_va = NULL;
	259	mem->priv.sgt = NULL;
	260	mem->size = 0;
	261	g->dma_memory_used -= mem->aligned_size;
	262	return err;
	263	}
	264
	265	int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
	266	size_t size, struct nvgpu_mem *mem, u64 at)
	267	{
	268	#if defined(CONFIG_GK20A_VIDMEM)
	269	u64 addr;
	270	int err;
	271	struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
	272	&g->mm.vidmem.allocator :
	273	&g->mm.vidmem.bootstrap_allocator;
	274	u64 before_pending;
	275
	276	if (nvgpu_mem_is_valid(mem)) {
	277	nvgpu_warn(g, "memory leak !!");
	278	WARN_ON(1);
	279	}
	280
	281	dma_dbg_alloc(g, size, flags, "vidmem");
	282
	283	mem->size = size;
	284	size = PAGE_ALIGN(size);
	285
	286	if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
	287	return -ENOSYS;
	288
	289	/*
	290	* Our own allocator doesn't have any flags yet, and we can't
	291	* kernel-map these, so require explicit flags.
	292	*/
	293	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
	294
	295	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
	296	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
	297	addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
	298	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
	299	if (!addr) {
	300	/*
	301	* If memory is known to be freed soon, let the user know that
	302	* it may be available after a while.
	303	*/
	304	if (before_pending)
	305	return -EAGAIN;
	306	else
	307	return -ENOMEM;
	308	}
	309
	310	if (at)
	311	mem->mem_flags \|= NVGPU_MEM_FLAG_FIXED;
	312
	313	mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
	314	if (!mem->priv.sgt) {
	315	err = -ENOMEM;
	316	goto fail_physfree;
	317	}
	318
	319	err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
	320	if (err)
	321	goto fail_kfree;
	322
	323	nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
	324	sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
	325
	326	mem->aligned_size = size;
	327	mem->aperture = APERTURE_VIDMEM;
	328	mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
	329	mem->allocator = vidmem_alloc;
	330	mem->priv.flags = flags;
	331
	332	nvgpu_init_list_node(&mem->clear_list_entry);
	333
	334	dma_dbg_alloc_done(g, mem->size, "vidmem");
	335
	336	return 0;
	337
	338	fail_kfree:
	339	nvgpu_kfree(g, mem->priv.sgt);
	340	fail_physfree:
	341	nvgpu_free(&g->mm.vidmem.allocator, addr);
	342	mem->size = 0;
	343	return err;
	344	#else
	345	return -ENOSYS;
	346	#endif
	347	}
	348
	349	void nvgpu_dma_free_sys(struct gk20a g, struct nvgpu_mem mem)
	350	{
	351	struct device *d = dev_from_gk20a(g);
	352
	353	g->dma_memory_used -= mem->aligned_size;
	354
	355	dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
	356
	357	if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
	358	!(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
	359	(mem->cpu_va \|\| mem->priv.pages)) {
	360	/*
	361	* Free side of WAR for bug 2040115.
	362	*/
	363	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
	364	vunmap(mem->cpu_va);
	365
	366	if (mem->priv.flags) {
	367	NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
	368
	369	nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
	370
	371	if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
	372	dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
	373	sg_dma_address(mem->priv.sgt->sgl),
	374	NVGPU_DMA_ATTR(dma_attrs));
	375	} else {
	376	dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
	377	sg_dma_address(mem->priv.sgt->sgl),
	378	NVGPU_DMA_ATTR(dma_attrs));
	379	}
	380	} else {
	381	dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
	382	sg_dma_address(mem->priv.sgt->sgl));
	383	}
	384	mem->cpu_va = NULL;
	385	mem->priv.pages = NULL;
	386	}
	387
	388	/*
	389	* When this flag is set we expect that pages is still populated but not
	390	* by the DMA API.
	391	*/
	392	if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
	393	nvgpu_kfree(g, mem->priv.pages);
	394
	395	if ((mem->mem_flags & NVGPU_MEM_FLAG_FOREIGN_SGT) == 0 &&
	396	mem->priv.sgt != NULL) {
	397	nvgpu_free_sgtable(g, &mem->priv.sgt);
	398	}
	399
	400	dma_dbg_free_done(g, mem->size, "sysmem");
	401
	402	mem->size = 0;
	403	mem->aligned_size = 0;
	404	mem->aperture = APERTURE_INVALID;
	405	}
	406
	407	void nvgpu_dma_free_vid(struct gk20a g, struct nvgpu_mem mem)
	408	{
	409	#if defined(CONFIG_GK20A_VIDMEM)
	410	size_t mem_size = mem->size;
	411
	412	dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
	413
	414	/* Sanity check - only this supported when allocating. */
	415	WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
	416
	417	if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
	418	int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
	419
	420	/*
	421	* If there's an error here then that means we can't clear the
	422	* vidmem. That's too bad; however, we still own the nvgpu_mem
	423	* buf so we have to free that.
	424	*
	425	* We don't need to worry about the vidmem allocator itself
	426	* since when that gets cleaned up in the driver shutdown path
	427	* all the outstanding allocs are force freed.
	428	*/
	429	if (err)
	430	nvgpu_kfree(g, mem);
	431	} else {
	432	nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
	433	nvgpu_free(mem->allocator,
	434	(u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
	435	nvgpu_free_sgtable(g, &mem->priv.sgt);
	436
	437	mem->size = 0;
	438	mem->aligned_size = 0;
	439	mem->aperture = APERTURE_INVALID;
	440	}
	441
	442	dma_dbg_free_done(g, mem_size, "vidmem");
	443	#endif
	444	}
	445
	446	int nvgpu_get_sgtable_attrs(struct gk20a g, struct sg_table *sgt,
	447	void *cpuva, u64 iova, size_t size, unsigned long flags)
	448	{
	449	int err = 0;
	450	struct sg_table *tbl;
	451	NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
	452
	453	tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
	454	if (!tbl) {
	455	err = -ENOMEM;
	456	goto fail;
	457	}
	458
	459	nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
	460	err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
	461	size, NVGPU_DMA_ATTR(dma_attrs));
	462	if (err)
	463	goto fail;
	464
	465	sg_dma_address(tbl->sgl) = iova;
	466	*sgt = tbl;
	467
	468	return 0;
	469
	470	fail:
	471	if (tbl)
	472	nvgpu_kfree(g, tbl);
	473
	474	return err;
	475	}
	476
	477	int nvgpu_get_sgtable(struct gk20a g, struct sg_table *sgt,
	478	void *cpuva, u64 iova, size_t size)
	479	{
	480	return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
	481	}
	482
	483	int nvgpu_get_sgtable_from_pages(struct gk20a g, struct sg_table *sgt,
	484	struct page **pages, u64 iova, size_t size)
	485	{
	486	int err = 0;
	487	struct sg_table *tbl;
	488
	489	tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
	490	if (!tbl) {
	491	err = -ENOMEM;
	492	goto fail;
	493	}
	494
	495	err = sg_alloc_table_from_pages(tbl, pages,
	496	DIV_ROUND_UP(size, PAGE_SIZE),
	497	0, size, GFP_KERNEL);
	498	if (err)
	499	goto fail;
	500
	501	sg_dma_address(tbl->sgl) = iova;
	502	*sgt = tbl;
	503
	504	return 0;
	505
	506	fail:
	507	if (tbl)
	508	nvgpu_kfree(g, tbl);
	509
	510	return err;
	511	}
	512
	513	void nvgpu_free_sgtable(struct gk20a g, struct sg_table *sgt)
	514	{
	515	sg_free_table(*sgt);
	516	nvgpu_kfree(g, *sgt);
	517	*sgt = NULL;
	518	}
	519
	520	bool nvgpu_iommuable(struct gk20a *g)
	521	{
	522	#ifdef CONFIG_TEGRA_GK20A
	523	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
	524
	525	/*
	526	* Check against the nvgpu device to see if it's been marked as
	527	* IOMMU'able.
	528	*/
	529	if (!device_is_iommuable(l->dev))
	530	return false;
	531	#endif
	532
	533	return true;
	534	}