From 014ace5a85f274de7debb4c6168d69c803445e19 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 24 Apr 2017 15:26:00 -0700 Subject: gpu: nvgpu: Split VM implementation out This patch begins splitting out the VM implementation from mm_gk20a.c and moves it to common/linux/vm.c and common/mm/vm.c. This split is necessary because the VM code has two portions: first, an interface for the OS specific code to use (i.e userspace mappings), and second, a set of APIs for the driver to use (init, cleanup, etc) which are not OS specific. This is only the beginning of the split - there's still a lot of things that need to be carefully moved around. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I3b57cba245d7daf9e4326a143b9c6217e0f28c96 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1477743 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/vm.c | 421 ++++++++++++++++++++++++++++++++++++ 1 file changed, 421 insertions(+) create mode 100644 drivers/gpu/nvgpu/common/linux/vm.c (limited to 'drivers/gpu/nvgpu/common/linux/vm.c') diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c new file mode 100644 index 00000000..8b9d6f96 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" + +#include "vm_priv.h" + +static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( + struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_enum_start(0, &node, root); + + while (node) { + struct nvgpu_mapped_buf *mapped_buffer = + mapped_buffer_from_rbtree_node(node); + + if (mapped_buffer->dmabuf == dmabuf && + kind == mapped_buffer->kind) + return mapped_buffer; + + nvgpu_rbtree_enum_next(&node, node); + } + + return NULL; +} + +/* + * Determine alignment for a passed buffer. Necessary since the buffer may + * appear big to map with large pages but the SGL may have chunks that are not + * aligned on a 64/128kB large page boundary. + */ +static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, + enum nvgpu_aperture aperture) +{ + u64 align = 0, chunk_align = 0; + u64 buf_addr; + + if (aperture == APERTURE_VIDMEM) { + struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); + struct page_alloc_chunk *chunk = NULL; + + nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, + page_alloc_chunk, list_entry) { + chunk_align = 1ULL << __ffs(chunk->base | + chunk->length); + + if (align) + align = min(align, chunk_align); + else + align = chunk_align; + } + + return align; + } + + buf_addr = (u64)sg_dma_address(sgl); + + if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { + while (sgl) { + buf_addr = (u64)sg_phys(sgl); + chunk_align = 1ULL << __ffs(buf_addr | + (u64)sgl->length); + + if (align) + align = min(align, chunk_align); + else + align = chunk_align; + sgl = sg_next(sgl); + } + + return align; + } + + align = 1ULL << __ffs(buf_addr); + + return align; +} + +/* + * vm->update_gmmu_lock must be held. This checks to see if we already have + * mapped the passed buffer into this VM. If so, just return the existing + * mapping address. + */ +static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags, + int kind, + bool user_mapped, + int rw_flag) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_mapped_buf *mapped_buffer = NULL; + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align); + if (!mapped_buffer) + return 0; + + if (mapped_buffer->dmabuf != dmabuf || + mapped_buffer->kind != (u32)kind) + return 0; + } else { + mapped_buffer = + __nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind); + if (!mapped_buffer) + return 0; + } + + if (mapped_buffer->flags != flags) + return 0; + + /* mark the buffer as used */ + if (user_mapped) { + if (mapped_buffer->user_mapped == 0) + vm->num_user_mapped_buffers++; + mapped_buffer->user_mapped++; + + /* If the mapping comes from user space, we own + * the handle ref. Since we reuse an + * existing mapping here, we need to give back those + * refs once in order not to leak. + */ + if (mapped_buffer->own_mem_ref) + dma_buf_put(mapped_buffer->dmabuf); + else + mapped_buffer->own_mem_ref = true; + } + kref_get(&mapped_buffer->ref); + + nvgpu_log(g, gpu_dbg_map, + "gv: 0x%04x_%08x + 0x%-7zu " + "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " + "pgsz=%-3dKb as=%-2d ctags=%d start=%d " + "flags=0x%x apt=%s (reused)", + u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), + dmabuf->size, + u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), + u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), + u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)), + u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)), + vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, + vm_aspace_id(vm), + mapped_buffer->ctag_lines, mapped_buffer->ctag_offset, + mapped_buffer->flags, + nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf))); + + return mapped_buffer->addr; +} + +u64 nvgpu_vm_map(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags, + int kind, + bool user_mapped, + int rw_flag, + u64 buffer_offset, + u64 mapping_size, + struct vm_gk20a_mapping_batch *batch) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags; + struct nvgpu_mapped_buf *mapped_buffer = NULL; + bool inserted = false, va_allocated = false; + u64 map_offset = 0; + int err = 0; + struct buffer_attrs bfr = {NULL}; + struct gk20a_comptags comptags; + bool clear_ctags = false; + struct scatterlist *sgl; + u64 ctag_map_win_size = 0; + u32 ctag_map_win_ctagline = 0; + struct vm_reserved_va_node *va_node = NULL; + u32 ctag_offset; + enum nvgpu_aperture aperture; + + if (user_mapped && vm->userspace_managed && + !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { + nvgpu_err(g, "non-fixed-offset mapping not available on " + "userspace managed address spaces"); + return -EFAULT; + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + /* check if this buffer is already mapped */ + if (!vm->userspace_managed) { + map_offset = __nvgpu_vm_find_mapping( + vm, dmabuf, offset_align, + flags, kind, + user_mapped, rw_flag); + if (map_offset) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return map_offset; + } + } + + /* pin buffer to get phys/iovmm addr */ + bfr.sgt = gk20a_mm_pin(g->dev, dmabuf); + if (IS_ERR(bfr.sgt)) { + /* Falling back to physical is actually possible + * here in many cases if we use 4K phys pages in the + * gmmu. However we have some regions which require + * contig regions to work properly (either phys-contig + * or contig through smmu io_vaspace). Until we can + * track the difference between those two cases we have + * to fail the mapping when we run out of SMMU space. + */ + nvgpu_warn(g, "oom allocating tracking buffer"); + goto clean_up; + } + + bfr.kind_v = kind; + bfr.size = dmabuf->size; + sgl = bfr.sgt->sgl; + + aperture = gk20a_dmabuf_aperture(g, dmabuf); + if (aperture == APERTURE_INVALID) { + err = -EINVAL; + goto clean_up; + } + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) + map_offset = offset_align; + + bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture); + bfr.pgsz_idx = __get_pte_size(vm, map_offset, + min_t(u64, bfr.size, bfr.align)); + mapping_size = mapping_size ? mapping_size : bfr.size; + + /* Check if we should use a fixed offset for mapping this buffer */ + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + err = validate_fixed_buffer(vm, &bfr, + offset_align, mapping_size, + &va_node); + if (err) + goto clean_up; + + map_offset = offset_align; + va_allocated = false; + } else + va_allocated = true; + + err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx); + if (unlikely(err)) { + nvgpu_err(g, "failure setting up kind and compression"); + goto clean_up; + } + + /* bar1 and pmu vm don't need ctag */ + if (!vm->enable_ctag) + bfr.ctag_lines = 0; + + gk20a_get_comptags(g->dev, dmabuf, &comptags); + + /* ensure alignment to compression page size if compression enabled */ + if (bfr.ctag_offset) + mapping_size = ALIGN(mapping_size, + g->ops.fb.compression_page_size(g)); + + if (bfr.ctag_lines && !comptags.lines) { + const bool user_mappable = + !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS); + + /* allocate compression resources if needed */ + err = gk20a_alloc_comptags(g, g->dev, dmabuf, ctag_allocator, + bfr.ctag_lines, user_mappable, + &ctag_map_win_size, + &ctag_map_win_ctagline); + if (err) { + /* ok to fall back here if we ran out */ + /* TBD: we can partially alloc ctags as well... */ + bfr.kind_v = bfr.uc_kind_v; + } else { + gk20a_get_comptags(g->dev, dmabuf, &comptags); + + if (g->ops.ltc.cbc_ctrl) + g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, + comptags.offset, + comptags.offset + + comptags.allocated_lines - 1); + else + clear_ctags = true; + } + } + + /* store the comptag info */ + bfr.ctag_offset = comptags.offset; + bfr.ctag_lines = comptags.lines; + bfr.ctag_allocated_lines = comptags.allocated_lines; + bfr.ctag_user_mappable = comptags.user_mappable; + + /* + * Calculate comptag index for this mapping. Differs in + * case of partial mapping. + */ + ctag_offset = comptags.offset; + if (ctag_offset) + ctag_offset += buffer_offset >> + ilog2(g->ops.fb.compression_page_size(g)); + + /* update gmmu ptes */ + map_offset = g->ops.mm.gmmu_map(vm, map_offset, + bfr.sgt, + buffer_offset, /* sg offset */ + mapping_size, + bfr.pgsz_idx, + bfr.kind_v, + ctag_offset, + flags, rw_flag, + clear_ctags, + false, + false, + batch, + aperture); + if (!map_offset) + goto clean_up; + + mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); + if (!mapped_buffer) { + nvgpu_warn(g, "oom allocating tracking buffer"); + goto clean_up; + } + mapped_buffer->dmabuf = dmabuf; + mapped_buffer->sgt = bfr.sgt; + mapped_buffer->addr = map_offset; + mapped_buffer->size = mapping_size; + mapped_buffer->pgsz_idx = bfr.pgsz_idx; + mapped_buffer->ctag_offset = bfr.ctag_offset; + mapped_buffer->ctag_lines = bfr.ctag_lines; + mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; + mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; + mapped_buffer->ctag_map_win_size = ctag_map_win_size; + mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline; + mapped_buffer->vm = vm; + mapped_buffer->flags = flags; + mapped_buffer->kind = kind; + mapped_buffer->va_allocated = va_allocated; + mapped_buffer->user_mapped = user_mapped ? 1 : 0; + mapped_buffer->own_mem_ref = user_mapped; + nvgpu_init_list_node(&mapped_buffer->buffer_list); + kref_init(&mapped_buffer->ref); + + err = nvgpu_insert_mapped_buf(vm, mapped_buffer); + if (err) { + nvgpu_err(g, "failed to insert into mapped buffer tree"); + goto clean_up; + } + inserted = true; + if (user_mapped) + vm->num_user_mapped_buffers++; + + if (va_node) { + nvgpu_list_add_tail(&mapped_buffer->buffer_list, + &va_node->buffer_list_head); + mapped_buffer->va_node = va_node; + } + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + return map_offset; + +clean_up: + if (inserted) { + nvgpu_remove_mapped_buf(vm, mapped_buffer); + if (user_mapped) + vm->num_user_mapped_buffers--; + } + nvgpu_kfree(g, mapped_buffer); + if (va_allocated) + gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); + if (!IS_ERR(bfr.sgt)) + gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt); + + nvgpu_mutex_release(&vm->update_gmmu_lock); + nvgpu_log_info(g, "err=%d\n", err); + return 0; +} + +void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_mapped_buf *mapped_buffer; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); + if (!mapped_buffer) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); + return; + } + + kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); + nvgpu_mutex_release(&vm->update_gmmu_lock); +} -- cgit v1.2.2