1 files changed, 421 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
new file mode 100644
index 00000000..8b9d6f96
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <nvgpu/log.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/rbtree.h>
+#include <nvgpu/page_allocator.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#include "vm_priv.h"
+static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
+        struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
+{
+        struct nvgpu_rbtree_node *node = NULL;
+        struct nvgpu_rbtree_node *root = vm->mapped_buffers;
+        nvgpu_rbtree_enum_start(0, &node, root);
+        while (node) {
+                struct nvgpu_mapped_buf *mapped_buffer =
+                                mapped_buffer_from_rbtree_node(node);
+                if (mapped_buffer->dmabuf == dmabuf &&
+                    kind == mapped_buffer->kind)
+                        return mapped_buffer;
+                nvgpu_rbtree_enum_next(&node, node);
+        }
+        return NULL;
+}
+/*
+ * Determine alignment for a passed buffer. Necessary since the buffer may
+ * appear big to map with large pages but the SGL may have chunks that are not
+ * aligned on a 64/128kB large page boundary.
+ */
+static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
+                                      enum nvgpu_aperture aperture)
+{
+        u64 align = 0, chunk_align = 0;
+        u64 buf_addr;
+        if (aperture == APERTURE_VIDMEM) {
+                struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
+                struct page_alloc_chunk *chunk = NULL;
+                nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
+                                        page_alloc_chunk, list_entry) {
+                        chunk_align = 1ULL << __ffs(chunk->base |
+                                                    chunk->length);
+                        if (align)
+                                align = min(align, chunk_align);
+                        else
+                                align = chunk_align;
+                }
+                return align;
+        }
+        buf_addr = (u64)sg_dma_address(sgl);
+        if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
+                while (sgl) {
+                        buf_addr = (u64)sg_phys(sgl);
+                        chunk_align = 1ULL << __ffs(buf_addr |
+                                                    (u64)sgl->length);
+                        if (align)
+                                align = min(align, chunk_align);
+                        else
+                                align = chunk_align;
+                        sgl = sg_next(sgl);
+                }
+                return align;
+        }
+        align = 1ULL << __ffs(buf_addr);
+        return align;
+}
+/*
+ * vm->update_gmmu_lock must be held. This checks to see if we already have
+ * mapped the passed buffer into this VM. If so, just return the existing
+ * mapping address.
+ */
+static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
+                                   struct dma_buf *dmabuf,
+                                   u64 offset_align,
+                                   u32 flags,
+                                   int kind,
+                                   bool user_mapped,
+                                   int rw_flag)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct nvgpu_mapped_buf *mapped_buffer = NULL;
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
+                mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align);
+                if (!mapped_buffer)
+                        return 0;
+                if (mapped_buffer->dmabuf != dmabuf ||
+                    mapped_buffer->kind != (u32)kind)
+                        return 0;
+        } else {
+                mapped_buffer =
+                        __nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind);
+                if (!mapped_buffer)
+                        return 0;
+        }
+        if (mapped_buffer->flags != flags)
+                return 0;
+        /* mark the buffer as used */
+        if (user_mapped) {
+                if (mapped_buffer->user_mapped == 0)
+                        vm->num_user_mapped_buffers++;
+                mapped_buffer->user_mapped++;
+                /* If the mapping comes from user space, we own
+                 * the handle ref. Since we reuse an
+                 * existing mapping here, we need to give back those
+                 * refs once in order not to leak.
+                 */
+                if (mapped_buffer->own_mem_ref)
+                        dma_buf_put(mapped_buffer->dmabuf);
+                else
+                        mapped_buffer->own_mem_ref = true;
+        }
+        kref_get(&mapped_buffer->ref);
+        nvgpu_log(g, gpu_dbg_map,
+                  "gv: 0x%04x_%08x + 0x%-7zu "
+                  "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
+                  "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
+                  "flags=0x%x apt=%s (reused)",
+                  u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
+                  dmabuf->size,
+                  u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
+                  u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
+                  u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
+                  u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
+                  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
+                  vm_aspace_id(vm),
+                  mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
+                  mapped_buffer->flags,
+                  nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
+        return mapped_buffer->addr;
+}
+u64 nvgpu_vm_map(struct vm_gk20a *vm,
+                 struct dma_buf *dmabuf,
+                 u64 offset_align,
+                 u32 flags,
+                 int kind,
+                 bool user_mapped,
+                 int rw_flag,
+                 u64 buffer_offset,
+                 u64 mapping_size,
+                 struct vm_gk20a_mapping_batch *batch)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
+        struct nvgpu_mapped_buf *mapped_buffer = NULL;
+        bool inserted = false, va_allocated = false;
+        u64 map_offset = 0;
+        int err = 0;
+        struct buffer_attrs bfr = {NULL};
+        struct gk20a_comptags comptags;
+        bool clear_ctags = false;
+        struct scatterlist *sgl;
+        u64 ctag_map_win_size = 0;
+        u32 ctag_map_win_ctagline = 0;
+        struct vm_reserved_va_node *va_node = NULL;
+        u32 ctag_offset;
+        enum nvgpu_aperture aperture;
+        if (user_mapped && vm->userspace_managed &&
+            !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+                nvgpu_err(g, "non-fixed-offset mapping not available on "
+                          "userspace managed address spaces");
+                return -EFAULT;
+        }
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        /* check if this buffer is already mapped */
+        if (!vm->userspace_managed) {
+                map_offset = __nvgpu_vm_find_mapping(
+                        vm, dmabuf, offset_align,
+                        flags, kind,
+                        user_mapped, rw_flag);
+                if (map_offset) {
+                        nvgpu_mutex_release(&vm->update_gmmu_lock);
+                        return map_offset;
+                }
+        }
+        /* pin buffer to get phys/iovmm addr */
+        bfr.sgt = gk20a_mm_pin(g->dev, dmabuf);
+        if (IS_ERR(bfr.sgt)) {
+                /* Falling back to physical is actually possible
+                 * here in many cases if we use 4K phys pages in the
+                 * gmmu.  However we have some regions which require
+                 * contig regions to work properly (either phys-contig
+                 * or contig through smmu io_vaspace).  Until we can
+                 * track the difference between those two cases we have
+                 * to fail the mapping when we run out of SMMU space.
+                 */
+                nvgpu_warn(g, "oom allocating tracking buffer");
+                goto clean_up;
+        }
+        bfr.kind_v = kind;
+        bfr.size = dmabuf->size;
+        sgl = bfr.sgt->sgl;
+        aperture = gk20a_dmabuf_aperture(g, dmabuf);
+        if (aperture == APERTURE_INVALID) {
+                err = -EINVAL;
+                goto clean_up;
+        }
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
+                map_offset = offset_align;
+        bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture);
+        bfr.pgsz_idx = __get_pte_size(vm, map_offset,
+                                      min_t(u64, bfr.size, bfr.align));
+        mapping_size = mapping_size ? mapping_size : bfr.size;
+        /* Check if we should use a fixed offset for mapping this buffer */
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
+                err = validate_fixed_buffer(vm, &bfr,
+                                            offset_align, mapping_size,
+                                            &va_node);
+                if (err)
+                        goto clean_up;
+                map_offset = offset_align;
+                va_allocated = false;
+        } else
+                va_allocated = true;
+        err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
+        if (unlikely(err)) {
+                nvgpu_err(g, "failure setting up kind and compression");
+                goto clean_up;
+        }
+        /* bar1 and pmu vm don't need ctag */
+        if (!vm->enable_ctag)
+                bfr.ctag_lines = 0;
+        gk20a_get_comptags(g->dev, dmabuf, &comptags);
+        /* ensure alignment to compression page size if compression enabled */
+        if (bfr.ctag_offset)
+                mapping_size = ALIGN(mapping_size,
+                                     g->ops.fb.compression_page_size(g));
+        if (bfr.ctag_lines && !comptags.lines) {
+                const bool user_mappable =
+                        !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
+                /* allocate compression resources if needed */
+                err = gk20a_alloc_comptags(g, g->dev, dmabuf, ctag_allocator,
+                                           bfr.ctag_lines, user_mappable,
+                                           &ctag_map_win_size,
+                                           &ctag_map_win_ctagline);
+                if (err) {
+                        /* ok to fall back here if we ran out */
+                        /* TBD: we can partially alloc ctags as well... */
+                        bfr.kind_v = bfr.uc_kind_v;
+                } else {
+                        gk20a_get_comptags(g->dev, dmabuf, &comptags);
+                        if (g->ops.ltc.cbc_ctrl)
+                                g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
+                                                    comptags.offset,
+                                                    comptags.offset +
+                                                        comptags.allocated_lines - 1);
+                        else
+                                clear_ctags = true;
+                }
+        }
+        /* store the comptag info */
+        bfr.ctag_offset = comptags.offset;
+        bfr.ctag_lines = comptags.lines;
+        bfr.ctag_allocated_lines = comptags.allocated_lines;
+        bfr.ctag_user_mappable = comptags.user_mappable;
+        /*
+         * Calculate comptag index for this mapping. Differs in
+         * case of partial mapping.
+         */
+        ctag_offset = comptags.offset;
+        if (ctag_offset)
+                ctag_offset += buffer_offset >>
+                               ilog2(g->ops.fb.compression_page_size(g));
+        /* update gmmu ptes */
+        map_offset = g->ops.mm.gmmu_map(vm, map_offset,
+                                        bfr.sgt,
+                                        buffer_offset, /* sg offset */
+                                        mapping_size,
+                                        bfr.pgsz_idx,
+                                        bfr.kind_v,
+                                        ctag_offset,
+                                        flags, rw_flag,
+                                        clear_ctags,
+                                        false,
+                                        false,
+                                        batch,
+                                        aperture);
+        if (!map_offset)
+                goto clean_up;
+        mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
+        if (!mapped_buffer) {
+                nvgpu_warn(g, "oom allocating tracking buffer");
+                goto clean_up;
+        }
+        mapped_buffer->dmabuf      = dmabuf;
+        mapped_buffer->sgt         = bfr.sgt;
+        mapped_buffer->addr        = map_offset;
+        mapped_buffer->size        = mapping_size;
+        mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
+        mapped_buffer->ctag_offset = bfr.ctag_offset;
+        mapped_buffer->ctag_lines  = bfr.ctag_lines;
+        mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
+        mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
+        mapped_buffer->ctag_map_win_size = ctag_map_win_size;
+        mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
+        mapped_buffer->vm          = vm;
+        mapped_buffer->flags       = flags;
+        mapped_buffer->kind        = kind;
+        mapped_buffer->va_allocated = va_allocated;
+        mapped_buffer->user_mapped = user_mapped ? 1 : 0;
+        mapped_buffer->own_mem_ref = user_mapped;
+        nvgpu_init_list_node(&mapped_buffer->buffer_list);
+        kref_init(&mapped_buffer->ref);
+        err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
+        if (err) {
+                nvgpu_err(g, "failed to insert into mapped buffer tree");
+                goto clean_up;
+        }
+        inserted = true;
+        if (user_mapped)
+                vm->num_user_mapped_buffers++;
+        if (va_node) {
+                nvgpu_list_add_tail(&mapped_buffer->buffer_list,
+                              &va_node->buffer_list_head);
+                mapped_buffer->va_node = va_node;
+        }
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        return map_offset;
+clean_up:
+        if (inserted) {
+                nvgpu_remove_mapped_buf(vm, mapped_buffer);
+                if (user_mapped)
+                        vm->num_user_mapped_buffers--;
+        }
+        nvgpu_kfree(g, mapped_buffer);
+        if (va_allocated)
+                gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
+        if (!IS_ERR(bfr.sgt))
+                gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        nvgpu_log_info(g, "err=%d\n", err);
+        return 0;
+}
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
+{
+        struct gk20a *g = vm->mm->g;
+        struct nvgpu_mapped_buf *mapped_buffer;
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
+        if (!mapped_buffer) {
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
+                nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
+                return;
+        }
+        kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+}

diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c new file mode 100644 index 00000000..8b9d6f96 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -0,0 +1,421 @@
	1	/*
	2	* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*
	13	* You should have received a copy of the GNU General Public License
	14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include <linux/dma-buf.h>
	18	#include <linux/scatterlist.h>
	19
	20	#include <nvgpu/log.h>
	21	#include <nvgpu/lock.h>
	22	#include <nvgpu/rbtree.h>
	23	#include <nvgpu/page_allocator.h>
	24
	25	#include "gk20a/gk20a.h"
	26	#include "gk20a/mm_gk20a.h"
	27
	28	#include "vm_priv.h"
	29
	30	static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
	31	struct vm_gk20a vm, struct dma_buf dmabuf, u32 kind)
	32	{
	33	struct nvgpu_rbtree_node *node = NULL;
	34	struct nvgpu_rbtree_node *root = vm->mapped_buffers;
	35
	36	nvgpu_rbtree_enum_start(0, &node, root);
	37
	38	while (node) {
	39	struct nvgpu_mapped_buf *mapped_buffer =
	40	mapped_buffer_from_rbtree_node(node);
	41
	42	if (mapped_buffer->dmabuf == dmabuf &&
	43	kind == mapped_buffer->kind)
	44	return mapped_buffer;
	45
	46	nvgpu_rbtree_enum_next(&node, node);
	47	}
	48
	49	return NULL;
	50	}
	51
	52	/*
	53	* Determine alignment for a passed buffer. Necessary since the buffer may
	54	* appear big to map with large pages but the SGL may have chunks that are not
	55	* aligned on a 64/128kB large page boundary.
	56	*/
	57	static u64 nvgpu_get_buffer_alignment(struct gk20a g, struct scatterlist sgl,
	58	enum nvgpu_aperture aperture)
	59	{
	60	u64 align = 0, chunk_align = 0;
	61	u64 buf_addr;
	62
	63	if (aperture == APERTURE_VIDMEM) {
	64	struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
	65	struct page_alloc_chunk *chunk = NULL;
	66
	67	nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
	68	page_alloc_chunk, list_entry) {
	69	chunk_align = 1ULL << __ffs(chunk->base \|
	70	chunk->length);
	71
	72	if (align)
	73	align = min(align, chunk_align);
	74	else
	75	align = chunk_align;
	76	}
	77
	78	return align;
	79	}
	80
	81	buf_addr = (u64)sg_dma_address(sgl);
	82
	83	if (g->mm.bypass_smmu \|\| buf_addr == DMA_ERROR_CODE \|\| !buf_addr) {
	84	while (sgl) {
	85	buf_addr = (u64)sg_phys(sgl);
	86	chunk_align = 1ULL << __ffs(buf_addr \|
	87	(u64)sgl->length);
	88
	89	if (align)
	90	align = min(align, chunk_align);
	91	else
	92	align = chunk_align;
	93	sgl = sg_next(sgl);
	94	}
	95
	96	return align;
	97	}
	98
	99	align = 1ULL << __ffs(buf_addr);
	100
	101	return align;
	102	}
	103
	104	/*
	105	* vm->update_gmmu_lock must be held. This checks to see if we already have
	106	* mapped the passed buffer into this VM. If so, just return the existing
	107	* mapping address.
	108	*/
	109	static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
	110	struct dma_buf *dmabuf,
	111	u64 offset_align,
	112	u32 flags,
	113	int kind,
	114	bool user_mapped,
	115	int rw_flag)
	116	{
	117	struct gk20a *g = gk20a_from_vm(vm);
	118	struct nvgpu_mapped_buf *mapped_buffer = NULL;
	119
	120	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
	121	mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align);
	122	if (!mapped_buffer)
	123	return 0;
	124
	125	if (mapped_buffer->dmabuf != dmabuf \|\|
	126	mapped_buffer->kind != (u32)kind)
	127	return 0;
	128	} else {
	129	mapped_buffer =
	130	__nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind);
	131	if (!mapped_buffer)
	132	return 0;
	133	}
	134
	135	if (mapped_buffer->flags != flags)
	136	return 0;
	137
	138	/* mark the buffer as used */
	139	if (user_mapped) {
	140	if (mapped_buffer->user_mapped == 0)
	141	vm->num_user_mapped_buffers++;
	142	mapped_buffer->user_mapped++;
	143
	144	/* If the mapping comes from user space, we own
	145	* the handle ref. Since we reuse an
	146	* existing mapping here, we need to give back those
	147	* refs once in order not to leak.
	148	*/
	149	if (mapped_buffer->own_mem_ref)
	150	dma_buf_put(mapped_buffer->dmabuf);
	151	else
	152	mapped_buffer->own_mem_ref = true;
	153	}
	154	kref_get(&mapped_buffer->ref);
	155
	156	nvgpu_log(g, gpu_dbg_map,
	157	"gv: 0x%04x_%08x + 0x%-7zu "
	158	"[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
	159	"pgsz=%-3dKb as=%-2d ctags=%d start=%d "
	160	"flags=0x%x apt=%s (reused)",
	161	u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
	162	dmabuf->size,
	163	u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
	164	u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
	165	u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
	166	u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
	167	vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
	168	vm_aspace_id(vm),
	169	mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
	170	mapped_buffer->flags,
	171	nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
	172
	173	return mapped_buffer->addr;
	174	}
	175
	176	u64 nvgpu_vm_map(struct vm_gk20a *vm,
	177	struct dma_buf *dmabuf,
	178	u64 offset_align,
	179	u32 flags,
	180	int kind,
	181	bool user_mapped,
	182	int rw_flag,
	183	u64 buffer_offset,
	184	u64 mapping_size,
	185	struct vm_gk20a_mapping_batch *batch)
	186	{
	187	struct gk20a *g = gk20a_from_vm(vm);
	188	struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
	189	struct nvgpu_mapped_buf *mapped_buffer = NULL;
	190	bool inserted = false, va_allocated = false;
	191	u64 map_offset = 0;
	192	int err = 0;
	193	struct buffer_attrs bfr = {NULL};
	194	struct gk20a_comptags comptags;
	195	bool clear_ctags = false;
	196	struct scatterlist *sgl;
	197	u64 ctag_map_win_size = 0;
	198	u32 ctag_map_win_ctagline = 0;
	199	struct vm_reserved_va_node *va_node = NULL;
	200	u32 ctag_offset;
	201	enum nvgpu_aperture aperture;
	202
	203	if (user_mapped && vm->userspace_managed &&
	204	!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
	205	nvgpu_err(g, "non-fixed-offset mapping not available on "
	206	"userspace managed address spaces");
	207	return -EFAULT;
	208	}
	209
	210	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
	211
	212	/* check if this buffer is already mapped */
	213	if (!vm->userspace_managed) {
	214	map_offset = __nvgpu_vm_find_mapping(
	215	vm, dmabuf, offset_align,
	216	flags, kind,
	217	user_mapped, rw_flag);
	218	if (map_offset) {
	219	nvgpu_mutex_release(&vm->update_gmmu_lock);
	220	return map_offset;
	221	}
	222	}
	223
	224	/* pin buffer to get phys/iovmm addr */
	225	bfr.sgt = gk20a_mm_pin(g->dev, dmabuf);
	226	if (IS_ERR(bfr.sgt)) {
	227	/* Falling back to physical is actually possible
	228	* here in many cases if we use 4K phys pages in the
	229	* gmmu. However we have some regions which require
	230	* contig regions to work properly (either phys-contig
	231	* or contig through smmu io_vaspace). Until we can
	232	* track the difference between those two cases we have
	233	* to fail the mapping when we run out of SMMU space.
	234	*/
	235	nvgpu_warn(g, "oom allocating tracking buffer");
	236	goto clean_up;
	237	}
	238
	239	bfr.kind_v = kind;
	240	bfr.size = dmabuf->size;
	241	sgl = bfr.sgt->sgl;
	242
	243	aperture = gk20a_dmabuf_aperture(g, dmabuf);
	244	if (aperture == APERTURE_INVALID) {
	245	err = -EINVAL;
	246	goto clean_up;
	247	}
	248
	249	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
	250	map_offset = offset_align;
	251
	252	bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture);
	253	bfr.pgsz_idx = __get_pte_size(vm, map_offset,
	254	min_t(u64, bfr.size, bfr.align));
	255	mapping_size = mapping_size ? mapping_size : bfr.size;
	256
	257	/* Check if we should use a fixed offset for mapping this buffer */
	258	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
	259	err = validate_fixed_buffer(vm, &bfr,
	260	offset_align, mapping_size,
	261	&va_node);
	262	if (err)
	263	goto clean_up;
	264
	265	map_offset = offset_align;
	266	va_allocated = false;
	267	} else
	268	va_allocated = true;
	269
	270	err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
	271	if (unlikely(err)) {
	272	nvgpu_err(g, "failure setting up kind and compression");
	273	goto clean_up;
	274	}
	275
	276	/* bar1 and pmu vm don't need ctag */
	277	if (!vm->enable_ctag)
	278	bfr.ctag_lines = 0;
	279
	280	gk20a_get_comptags(g->dev, dmabuf, &comptags);
	281
	282	/* ensure alignment to compression page size if compression enabled */
	283	if (bfr.ctag_offset)
	284	mapping_size = ALIGN(mapping_size,
	285	g->ops.fb.compression_page_size(g));
	286
	287	if (bfr.ctag_lines && !comptags.lines) {
	288	const bool user_mappable =
	289	!!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
	290
	291	/* allocate compression resources if needed */
	292	err = gk20a_alloc_comptags(g, g->dev, dmabuf, ctag_allocator,
	293	bfr.ctag_lines, user_mappable,
	294	&ctag_map_win_size,
	295	&ctag_map_win_ctagline);
	296	if (err) {
	297	/* ok to fall back here if we ran out */
	298	/* TBD: we can partially alloc ctags as well... */
	299	bfr.kind_v = bfr.uc_kind_v;
	300	} else {
	301	gk20a_get_comptags(g->dev, dmabuf, &comptags);
	302
	303	if (g->ops.ltc.cbc_ctrl)
	304	g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
	305	comptags.offset,
	306	comptags.offset +
	307	comptags.allocated_lines - 1);
	308	else
	309	clear_ctags = true;
	310	}
	311	}
	312
	313	/* store the comptag info */
	314	bfr.ctag_offset = comptags.offset;
	315	bfr.ctag_lines = comptags.lines;
	316	bfr.ctag_allocated_lines = comptags.allocated_lines;
	317	bfr.ctag_user_mappable = comptags.user_mappable;
	318
	319	/*
	320	* Calculate comptag index for this mapping. Differs in
	321	* case of partial mapping.
	322	*/
	323	ctag_offset = comptags.offset;
	324	if (ctag_offset)
	325	ctag_offset += buffer_offset >>
	326	ilog2(g->ops.fb.compression_page_size(g));
	327
	328	/* update gmmu ptes */
	329	map_offset = g->ops.mm.gmmu_map(vm, map_offset,
	330	bfr.sgt,
	331	buffer_offset, /* sg offset */
	332	mapping_size,
	333	bfr.pgsz_idx,
	334	bfr.kind_v,
	335	ctag_offset,
	336	flags, rw_flag,
	337	clear_ctags,
	338	false,
	339	false,
	340	batch,
	341	aperture);
	342	if (!map_offset)
	343	goto clean_up;
	344
	345	mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
	346	if (!mapped_buffer) {
	347	nvgpu_warn(g, "oom allocating tracking buffer");
	348	goto clean_up;
	349	}
	350	mapped_buffer->dmabuf = dmabuf;
	351	mapped_buffer->sgt = bfr.sgt;
	352	mapped_buffer->addr = map_offset;
	353	mapped_buffer->size = mapping_size;
	354	mapped_buffer->pgsz_idx = bfr.pgsz_idx;
	355	mapped_buffer->ctag_offset = bfr.ctag_offset;
	356	mapped_buffer->ctag_lines = bfr.ctag_lines;
	357	mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
	358	mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
	359	mapped_buffer->ctag_map_win_size = ctag_map_win_size;
	360	mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
	361	mapped_buffer->vm = vm;
	362	mapped_buffer->flags = flags;
	363	mapped_buffer->kind = kind;
	364	mapped_buffer->va_allocated = va_allocated;
	365	mapped_buffer->user_mapped = user_mapped ? 1 : 0;
	366	mapped_buffer->own_mem_ref = user_mapped;
	367	nvgpu_init_list_node(&mapped_buffer->buffer_list);
	368	kref_init(&mapped_buffer->ref);
	369
	370	err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
	371	if (err) {
	372	nvgpu_err(g, "failed to insert into mapped buffer tree");
	373	goto clean_up;
	374	}
	375	inserted = true;
	376	if (user_mapped)
	377	vm->num_user_mapped_buffers++;
	378
	379	if (va_node) {
	380	nvgpu_list_add_tail(&mapped_buffer->buffer_list,
	381	&va_node->buffer_list_head);
	382	mapped_buffer->va_node = va_node;
	383	}
	384
	385	nvgpu_mutex_release(&vm->update_gmmu_lock);
	386
	387	return map_offset;
	388
	389	clean_up:
	390	if (inserted) {
	391	nvgpu_remove_mapped_buf(vm, mapped_buffer);
	392	if (user_mapped)
	393	vm->num_user_mapped_buffers--;
	394	}
	395	nvgpu_kfree(g, mapped_buffer);
	396	if (va_allocated)
	397	gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
	398	if (!IS_ERR(bfr.sgt))
	399	gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt);
	400
	401	nvgpu_mutex_release(&vm->update_gmmu_lock);
	402	nvgpu_log_info(g, "err=%d\n", err);
	403	return 0;
	404	}
	405
	406	void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
	407	{
	408	struct gk20a *g = vm->mm->g;
	409	struct nvgpu_mapped_buf *mapped_buffer;
	410
	411	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
	412	mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
	413	if (!mapped_buffer) {
	414	nvgpu_mutex_release(&vm->update_gmmu_lock);
	415	nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
	416	return;
	417	}
	418
	419	kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
	420	nvgpu_mutex_release(&vm->update_gmmu_lock);
	421	}