From 014ace5a85f274de7debb4c6168d69c803445e19 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 24 Apr 2017 15:26:00 -0700
Subject: gpu: nvgpu: Split VM implementation out

This patch begins splitting out the VM implementation from mm_gk20a.c and
moves it to common/linux/vm.c and common/mm/vm.c. This split is necessary
because the VM code has two portions: first, an interface for the OS
specific code to use (i.e userspace mappings), and second, a set of APIs
for the driver to use (init, cleanup, etc) which are not OS specific.

This is only the beginning of the split - there's still a lot of things
that need to be carefully moved around.

JIRA NVGPU-12
JIRA NVGPU-30

Change-Id: I3b57cba245d7daf9e4326a143b9c6217e0f28c96
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1477743
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/vm.c | 421 ++++++++++++++++++++++++++++++++++++
 1 file changed, 421 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/common/linux/vm.c

(limited to 'drivers/gpu/nvgpu/common/linux/vm.c')

diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
new file mode 100644
index 00000000..8b9d6f96
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+
+#include <nvgpu/log.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/rbtree.h>
+#include <nvgpu/page_allocator.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+#include "vm_priv.h"
+
+static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
+	struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
+{
+	struct nvgpu_rbtree_node *node = NULL;
+	struct nvgpu_rbtree_node *root = vm->mapped_buffers;
+
+	nvgpu_rbtree_enum_start(0, &node, root);
+
+	while (node) {
+		struct nvgpu_mapped_buf *mapped_buffer =
+				mapped_buffer_from_rbtree_node(node);
+
+		if (mapped_buffer->dmabuf == dmabuf &&
+		    kind == mapped_buffer->kind)
+			return mapped_buffer;
+
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	return NULL;
+}
+
+/*
+ * Determine alignment for a passed buffer. Necessary since the buffer may
+ * appear big to map with large pages but the SGL may have chunks that are not
+ * aligned on a 64/128kB large page boundary.
+ */
+static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
+				      enum nvgpu_aperture aperture)
+{
+	u64 align = 0, chunk_align = 0;
+	u64 buf_addr;
+
+	if (aperture == APERTURE_VIDMEM) {
+		struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
+		struct page_alloc_chunk *chunk = NULL;
+
+		nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
+					page_alloc_chunk, list_entry) {
+			chunk_align = 1ULL << __ffs(chunk->base |
+						    chunk->length);
+
+			if (align)
+				align = min(align, chunk_align);
+			else
+				align = chunk_align;
+		}
+
+		return align;
+	}
+
+	buf_addr = (u64)sg_dma_address(sgl);
+
+	if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
+		while (sgl) {
+			buf_addr = (u64)sg_phys(sgl);
+			chunk_align = 1ULL << __ffs(buf_addr |
+						    (u64)sgl->length);
+
+			if (align)
+				align = min(align, chunk_align);
+			else
+				align = chunk_align;
+			sgl = sg_next(sgl);
+		}
+
+		return align;
+	}
+
+	align = 1ULL << __ffs(buf_addr);
+
+	return align;
+}
+
+/*
+ * vm->update_gmmu_lock must be held. This checks to see if we already have
+ * mapped the passed buffer into this VM. If so, just return the existing
+ * mapping address.
+ */
+static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
+				   struct dma_buf *dmabuf,
+				   u64 offset_align,
+				   u32 flags,
+				   int kind,
+				   bool user_mapped,
+				   int rw_flag)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct nvgpu_mapped_buf *mapped_buffer = NULL;
+
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
+		mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align);
+		if (!mapped_buffer)
+			return 0;
+
+		if (mapped_buffer->dmabuf != dmabuf ||
+		    mapped_buffer->kind != (u32)kind)
+			return 0;
+	} else {
+		mapped_buffer =
+			__nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind);
+		if (!mapped_buffer)
+			return 0;
+	}
+
+	if (mapped_buffer->flags != flags)
+		return 0;
+
+	/* mark the buffer as used */
+	if (user_mapped) {
+		if (mapped_buffer->user_mapped == 0)
+			vm->num_user_mapped_buffers++;
+		mapped_buffer->user_mapped++;
+
+		/* If the mapping comes from user space, we own
+		 * the handle ref. Since we reuse an
+		 * existing mapping here, we need to give back those
+		 * refs once in order not to leak.
+		 */
+		if (mapped_buffer->own_mem_ref)
+			dma_buf_put(mapped_buffer->dmabuf);
+		else
+			mapped_buffer->own_mem_ref = true;
+	}
+	kref_get(&mapped_buffer->ref);
+
+	nvgpu_log(g, gpu_dbg_map,
+		  "gv: 0x%04x_%08x + 0x%-7zu "
+		  "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
+		  "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
+		  "flags=0x%x apt=%s (reused)",
+		  u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
+		  dmabuf->size,
+		  u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
+		  u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
+		  u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
+		  u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
+		  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
+		  vm_aspace_id(vm),
+		  mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
+		  mapped_buffer->flags,
+		  nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
+
+	return mapped_buffer->addr;
+}
+
+u64 nvgpu_vm_map(struct vm_gk20a *vm,
+		 struct dma_buf *dmabuf,
+		 u64 offset_align,
+		 u32 flags,
+		 int kind,
+		 bool user_mapped,
+		 int rw_flag,
+		 u64 buffer_offset,
+		 u64 mapping_size,
+		 struct vm_gk20a_mapping_batch *batch)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
+	struct nvgpu_mapped_buf *mapped_buffer = NULL;
+	bool inserted = false, va_allocated = false;
+	u64 map_offset = 0;
+	int err = 0;
+	struct buffer_attrs bfr = {NULL};
+	struct gk20a_comptags comptags;
+	bool clear_ctags = false;
+	struct scatterlist *sgl;
+	u64 ctag_map_win_size = 0;
+	u32 ctag_map_win_ctagline = 0;
+	struct vm_reserved_va_node *va_node = NULL;
+	u32 ctag_offset;
+	enum nvgpu_aperture aperture;
+
+	if (user_mapped && vm->userspace_managed &&
+	    !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+		nvgpu_err(g, "non-fixed-offset mapping not available on "
+			  "userspace managed address spaces");
+		return -EFAULT;
+	}
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+	/* check if this buffer is already mapped */
+	if (!vm->userspace_managed) {
+		map_offset = __nvgpu_vm_find_mapping(
+			vm, dmabuf, offset_align,
+			flags, kind,
+			user_mapped, rw_flag);
+		if (map_offset) {
+			nvgpu_mutex_release(&vm->update_gmmu_lock);
+			return map_offset;
+		}
+	}
+
+	/* pin buffer to get phys/iovmm addr */
+	bfr.sgt = gk20a_mm_pin(g->dev, dmabuf);
+	if (IS_ERR(bfr.sgt)) {
+		/* Falling back to physical is actually possible
+		 * here in many cases if we use 4K phys pages in the
+		 * gmmu.  However we have some regions which require
+		 * contig regions to work properly (either phys-contig
+		 * or contig through smmu io_vaspace).  Until we can
+		 * track the difference between those two cases we have
+		 * to fail the mapping when we run out of SMMU space.
+		 */
+		nvgpu_warn(g, "oom allocating tracking buffer");
+		goto clean_up;
+	}
+
+	bfr.kind_v = kind;
+	bfr.size = dmabuf->size;
+	sgl = bfr.sgt->sgl;
+
+	aperture = gk20a_dmabuf_aperture(g, dmabuf);
+	if (aperture == APERTURE_INVALID) {
+		err = -EINVAL;
+		goto clean_up;
+	}
+
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
+		map_offset = offset_align;
+
+	bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture);
+	bfr.pgsz_idx = __get_pte_size(vm, map_offset,
+				      min_t(u64, bfr.size, bfr.align));
+	mapping_size = mapping_size ? mapping_size : bfr.size;
+
+	/* Check if we should use a fixed offset for mapping this buffer */
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
+		err = validate_fixed_buffer(vm, &bfr,
+					    offset_align, mapping_size,
+					    &va_node);
+		if (err)
+			goto clean_up;
+
+		map_offset = offset_align;
+		va_allocated = false;
+	} else
+		va_allocated = true;
+
+	err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
+	if (unlikely(err)) {
+		nvgpu_err(g, "failure setting up kind and compression");
+		goto clean_up;
+	}
+
+	/* bar1 and pmu vm don't need ctag */
+	if (!vm->enable_ctag)
+		bfr.ctag_lines = 0;
+
+	gk20a_get_comptags(g->dev, dmabuf, &comptags);
+
+	/* ensure alignment to compression page size if compression enabled */
+	if (bfr.ctag_offset)
+		mapping_size = ALIGN(mapping_size,
+				     g->ops.fb.compression_page_size(g));
+
+	if (bfr.ctag_lines && !comptags.lines) {
+		const bool user_mappable =
+			!!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
+
+		/* allocate compression resources if needed */
+		err = gk20a_alloc_comptags(g, g->dev, dmabuf, ctag_allocator,
+					   bfr.ctag_lines, user_mappable,
+					   &ctag_map_win_size,
+					   &ctag_map_win_ctagline);
+		if (err) {
+			/* ok to fall back here if we ran out */
+			/* TBD: we can partially alloc ctags as well... */
+			bfr.kind_v = bfr.uc_kind_v;
+		} else {
+			gk20a_get_comptags(g->dev, dmabuf, &comptags);
+
+			if (g->ops.ltc.cbc_ctrl)
+				g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
+						    comptags.offset,
+						    comptags.offset +
+							comptags.allocated_lines - 1);
+			else
+				clear_ctags = true;
+		}
+	}
+
+	/* store the comptag info */
+	bfr.ctag_offset = comptags.offset;
+	bfr.ctag_lines = comptags.lines;
+	bfr.ctag_allocated_lines = comptags.allocated_lines;
+	bfr.ctag_user_mappable = comptags.user_mappable;
+
+	/*
+	 * Calculate comptag index for this mapping. Differs in
+	 * case of partial mapping.
+	 */
+	ctag_offset = comptags.offset;
+	if (ctag_offset)
+		ctag_offset += buffer_offset >>
+			       ilog2(g->ops.fb.compression_page_size(g));
+
+	/* update gmmu ptes */
+	map_offset = g->ops.mm.gmmu_map(vm, map_offset,
+					bfr.sgt,
+					buffer_offset, /* sg offset */
+					mapping_size,
+					bfr.pgsz_idx,
+					bfr.kind_v,
+					ctag_offset,
+					flags, rw_flag,
+					clear_ctags,
+					false,
+					false,
+					batch,
+					aperture);
+	if (!map_offset)
+		goto clean_up;
+
+	mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
+	if (!mapped_buffer) {
+		nvgpu_warn(g, "oom allocating tracking buffer");
+		goto clean_up;
+	}
+	mapped_buffer->dmabuf      = dmabuf;
+	mapped_buffer->sgt         = bfr.sgt;
+	mapped_buffer->addr        = map_offset;
+	mapped_buffer->size        = mapping_size;
+	mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
+	mapped_buffer->ctag_offset = bfr.ctag_offset;
+	mapped_buffer->ctag_lines  = bfr.ctag_lines;
+	mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
+	mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
+	mapped_buffer->ctag_map_win_size = ctag_map_win_size;
+	mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
+	mapped_buffer->vm          = vm;
+	mapped_buffer->flags       = flags;
+	mapped_buffer->kind        = kind;
+	mapped_buffer->va_allocated = va_allocated;
+	mapped_buffer->user_mapped = user_mapped ? 1 : 0;
+	mapped_buffer->own_mem_ref = user_mapped;
+	nvgpu_init_list_node(&mapped_buffer->buffer_list);
+	kref_init(&mapped_buffer->ref);
+
+	err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
+	if (err) {
+		nvgpu_err(g, "failed to insert into mapped buffer tree");
+		goto clean_up;
+	}
+	inserted = true;
+	if (user_mapped)
+		vm->num_user_mapped_buffers++;
+
+	if (va_node) {
+		nvgpu_list_add_tail(&mapped_buffer->buffer_list,
+			      &va_node->buffer_list_head);
+		mapped_buffer->va_node = va_node;
+	}
+
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+
+	return map_offset;
+
+clean_up:
+	if (inserted) {
+		nvgpu_remove_mapped_buf(vm, mapped_buffer);
+		if (user_mapped)
+			vm->num_user_mapped_buffers--;
+	}
+	nvgpu_kfree(g, mapped_buffer);
+	if (va_allocated)
+		gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
+	if (!IS_ERR(bfr.sgt))
+		gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt);
+
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+	nvgpu_log_info(g, "err=%d\n", err);
+	return 0;
+}
+
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
+{
+	struct gk20a *g = vm->mm->g;
+	struct nvgpu_mapped_buf *mapped_buffer;
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+	mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
+	if (!mapped_buffer) {
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
+		nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
+		return;
+	}
+
+	kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+}
-- 
cgit v1.2.2