From 8f2d4a3f4a0acc81bae6725d30506e92651a42b5 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Tue, 21 Mar 2017 14:32:13 -0700
Subject: gpu: nvgpu: Move DMA API to dma.h

Make an nvgpu DMA API include file so that the intricacies of the
Linux DMA API can be hidden from the calling code.

Also document the nvgpu DMA API.

JIRA NVGPU-12

Change-Id: I7578e4c726ad46344b7921179d95861858e9a27e
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1323326
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/dma.c | 415 +++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/common/semaphore.c |   1 +
 2 files changed, 416 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/common/linux/dma.c

(limited to 'drivers/gpu/nvgpu/common')

diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
new file mode 100644
index 00000000..755848ea
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+
+#include <nvgpu/dma.h>
+#include <nvgpu/lock.h>
+
+#include "gk20a/gk20a.h"
+
+#if defined(CONFIG_GK20A_VIDMEM)
+static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
+				size_t size)
+{
+	u64 addr = 0;
+
+	if (at)
+		addr = nvgpu_alloc_fixed(allocator, at, size, 0);
+	else
+		addr = nvgpu_alloc(allocator, size);
+
+	return addr;
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static void gk20a_dma_flags_to_attrs(unsigned long *attrs,
+		unsigned long flags)
+#define ATTR_ARG(x) *x
+#else
+static void gk20a_dma_flags_to_attrs(struct dma_attrs *attrs,
+		unsigned long flags)
+#define ATTR_ARG(x) x
+#endif
+{
+	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
+	if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
+		dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
+	if (flags & NVGPU_DMA_READ_ONLY)
+		dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs));
+#undef ATTR_ARG
+}
+
+int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags(g, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
+		struct nvgpu_mem *mem)
+{
+	if (g->mm.vidmem_is_vidmem) {
+		/*
+		 * Force the no-kernel-mapping flag on because we don't support
+		 * the lack of it for vidmem - the user should not care when
+		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+		 * difference, the user should use the flag explicitly anyway.
+		 */
+		int err = gk20a_gmmu_alloc_flags_vid(g,
+				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+				size, mem);
+
+		if (!err)
+			return 0;
+		/*
+		 * Fall back to sysmem (which may then also fail) in case
+		 * vidmem is exhausted.
+		 */
+	}
+
+	return gk20a_gmmu_alloc_flags_sys(g, flags, size, mem);
+}
+
+int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_sys(g, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	struct device *d = dev_from_gk20a(g);
+	int err;
+	dma_addr_t iova;
+
+	gk20a_dbg_fn("");
+
+	if (flags) {
+		DEFINE_DMA_ATTRS(dma_attrs);
+
+		gk20a_dma_flags_to_attrs(&dma_attrs, flags);
+
+		if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+			mem->pages = dma_alloc_attrs(d,
+					size, &iova, GFP_KERNEL,
+					__DMA_ATTR(dma_attrs));
+			if (!mem->pages)
+				return -ENOMEM;
+		} else {
+			mem->cpu_va = dma_alloc_attrs(d,
+					size, &iova, GFP_KERNEL,
+					__DMA_ATTR(dma_attrs));
+			if (!mem->cpu_va)
+				return -ENOMEM;
+		}
+	} else {
+		mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL);
+		if (!mem->cpu_va)
+			return -ENOMEM;
+	}
+
+	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages,
+						   iova, size);
+	else {
+		err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size);
+		memset(mem->cpu_va, 0, size);
+	}
+	if (err)
+		goto fail_free;
+
+	mem->size = size;
+	mem->aperture = APERTURE_SYSMEM;
+	mem->flags = flags;
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+
+fail_free:
+	dma_free_coherent(d, size, mem->cpu_va, iova);
+	mem->cpu_va = NULL;
+	mem->sgt = NULL;
+	return err;
+}
+
+int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_vid(g,
+			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_vid_at(g, flags, size, mem, 0);
+}
+
+int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem, dma_addr_t at)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	u64 addr;
+	int err;
+	struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
+		&g->mm.vidmem.allocator :
+		&g->mm.vidmem.bootstrap_allocator;
+	int before_pending;
+
+	gk20a_dbg_fn("");
+
+	if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+		return -ENOSYS;
+
+	/*
+	 * Our own allocator doesn't have any flags yet, and we can't
+	 * kernel-map these, so require explicit flags.
+	 */
+	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+
+	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
+	addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
+	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+	if (!addr) {
+		/*
+		 * If memory is known to be freed soon, let the user know that
+		 * it may be available after a while.
+		 */
+		if (before_pending)
+			return -EAGAIN;
+		else
+			return -ENOMEM;
+	}
+
+	if (at)
+		mem->fixed = true;
+	else
+		mem->fixed = false;
+
+	mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
+	if (!mem->sgt) {
+		err = -ENOMEM;
+		goto fail_physfree;
+	}
+
+	err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
+	if (err)
+		goto fail_kfree;
+
+	set_vidmem_page_alloc(mem->sgt->sgl, addr);
+	sg_set_page(mem->sgt->sgl, NULL, size, 0);
+
+	mem->size = size;
+	mem->aperture = APERTURE_VIDMEM;
+	mem->allocator = vidmem_alloc;
+	mem->flags = flags;
+
+	nvgpu_init_list_node(&mem->clear_list_entry);
+
+	gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
+
+	return 0;
+
+fail_kfree:
+	nvgpu_kfree(g, mem->sgt);
+fail_physfree:
+	nvgpu_free(&g->mm.vidmem.allocator, addr);
+	return err;
+#else
+	return -ENOSYS;
+#endif
+}
+
+int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags(vm, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	if (vm->mm->vidmem_is_vidmem) {
+		/*
+		 * Force the no-kernel-mapping flag on because we don't support
+		 * the lack of it for vidmem - the user should not care when
+		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+		 * difference, the user should use the flag explicitly anyway.
+		 */
+		int err = gk20a_gmmu_alloc_map_flags_vid(vm,
+				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+				size, mem);
+
+		if (!err)
+			return 0;
+		/*
+		 * Fall back to sysmem (which may then also fail) in case
+		 * vidmem is exhausted.
+		 */
+	}
+
+	return gk20a_gmmu_alloc_map_flags_sys(vm, flags, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags_sys(vm, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	int err = gk20a_gmmu_alloc_flags_sys(vm->mm->g, flags, size, mem);
+
+	if (err)
+		return err;
+
+	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
+				     gk20a_mem_flag_none, false,
+				     mem->aperture);
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	gk20a_gmmu_free(vm->mm->g, mem);
+	return err;
+}
+
+int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags_vid(vm,
+			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	int err = gk20a_gmmu_alloc_flags_vid(vm->mm->g, flags, size, mem);
+
+	if (err)
+		return err;
+
+	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
+				     gk20a_mem_flag_none, false,
+				     mem->aperture);
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	gk20a_gmmu_free(vm->mm->g, mem);
+	return err;
+}
+
+static void gk20a_gmmu_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	struct device *d = dev_from_gk20a(g);
+
+	if (mem->cpu_va || mem->pages) {
+		if (mem->flags) {
+			DEFINE_DMA_ATTRS(dma_attrs);
+
+			gk20a_dma_flags_to_attrs(&dma_attrs, mem->flags);
+
+			if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+				dma_free_attrs(d, mem->size, mem->pages,
+					sg_dma_address(mem->sgt->sgl),
+					__DMA_ATTR(dma_attrs));
+			} else {
+				dma_free_attrs(d, mem->size, mem->cpu_va,
+					sg_dma_address(mem->sgt->sgl),
+					__DMA_ATTR(dma_attrs));
+			}
+		} else {
+			dma_free_coherent(d, mem->size, mem->cpu_va,
+					sg_dma_address(mem->sgt->sgl));
+		}
+		mem->cpu_va = NULL;
+		mem->pages = NULL;
+	}
+
+	if (mem->sgt)
+		gk20a_free_sgtable(g, &mem->sgt);
+
+	mem->size = 0;
+	mem->aperture = APERTURE_INVALID;
+}
+
+static void gk20a_gmmu_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	bool was_empty;
+
+	/* Sanity check - only this supported when allocating. */
+	WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+
+	if (mem->user_mem) {
+		nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+		was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
+		nvgpu_list_add_tail(&mem->clear_list_entry,
+			      &g->mm.vidmem.clear_list_head);
+		atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
+		nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+
+		if (was_empty) {
+			cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
+			schedule_work(&g->mm.vidmem.clear_mem_worker);
+		}
+	} else {
+		nvgpu_memset(g, mem, 0, 0, mem->size);
+		nvgpu_free(mem->allocator,
+			   (u64)get_vidmem_page_alloc(mem->sgt->sgl));
+		gk20a_free_sgtable(g, &mem->sgt);
+
+		mem->size = 0;
+		mem->aperture = APERTURE_INVALID;
+	}
+#endif
+}
+
+void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	switch (mem->aperture) {
+	case APERTURE_SYSMEM:
+		return gk20a_gmmu_free_sys(g, mem);
+	case APERTURE_VIDMEM:
+		return gk20a_gmmu_free_vid(g, mem);
+	default:
+		break; /* like free() on "null" memory */
+	}
+}
+
+void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
+{
+	if (mem->gpu_va)
+		gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none);
+	mem->gpu_va = 0;
+
+	gk20a_gmmu_free(vm->mm->g, mem);
+}
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 6fb6c27e..cfe1149f 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -18,6 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/highmem.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
 
-- 
cgit v1.2.2