From 014ace5a85f274de7debb4c6168d69c803445e19 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 24 Apr 2017 15:26:00 -0700 Subject: gpu: nvgpu: Split VM implementation out This patch begins splitting out the VM implementation from mm_gk20a.c and moves it to common/linux/vm.c and common/mm/vm.c. This split is necessary because the VM code has two portions: first, an interface for the OS specific code to use (i.e userspace mappings), and second, a set of APIs for the driver to use (init, cleanup, etc) which are not OS specific. This is only the beginning of the split - there's still a lot of things that need to be carefully moved around. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I3b57cba245d7daf9e4326a143b9c6217e0f28c96 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1477743 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile.nvgpu | 1 + drivers/gpu/nvgpu/common/linux/vm.c | 421 +++++++++++++++++++++ drivers/gpu/nvgpu/common/linux/vm_priv.h | 36 +- drivers/gpu/nvgpu/common/mm/vm.c | 65 +++- drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 622 +++---------------------------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 52 +-- drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 +- drivers/gpu/nvgpu/include/nvgpu/vm.h | 78 +++- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 2 +- 13 files changed, 661 insertions(+), 630 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/linux/vm.c diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 0a60eece..2f9d1b36 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -39,6 +39,7 @@ nvgpu-y := \ common/linux/driver_common.o \ common/linux/firmware.o \ common/linux/thread.o \ + common/linux/vm.o \ common/mm/nvgpu_allocator.o \ common/mm/bitmap_allocator.o \ common/mm/buddy_allocator.o \ diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c new file mode 100644 index 00000000..8b9d6f96 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" + +#include "vm_priv.h" + +static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( + struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_enum_start(0, &node, root); + + while (node) { + struct nvgpu_mapped_buf *mapped_buffer = + mapped_buffer_from_rbtree_node(node); + + if (mapped_buffer->dmabuf == dmabuf && + kind == mapped_buffer->kind) + return mapped_buffer; + + nvgpu_rbtree_enum_next(&node, node); + } + + return NULL; +} + +/* + * Determine alignment for a passed buffer. Necessary since the buffer may + * appear big to map with large pages but the SGL may have chunks that are not + * aligned on a 64/128kB large page boundary. + */ +static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, + enum nvgpu_aperture aperture) +{ + u64 align = 0, chunk_align = 0; + u64 buf_addr; + + if (aperture == APERTURE_VIDMEM) { + struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); + struct page_alloc_chunk *chunk = NULL; + + nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, + page_alloc_chunk, list_entry) { + chunk_align = 1ULL << __ffs(chunk->base | + chunk->length); + + if (align) + align = min(align, chunk_align); + else + align = chunk_align; + } + + return align; + } + + buf_addr = (u64)sg_dma_address(sgl); + + if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { + while (sgl) { + buf_addr = (u64)sg_phys(sgl); + chunk_align = 1ULL << __ffs(buf_addr | + (u64)sgl->length); + + if (align) + align = min(align, chunk_align); + else + align = chunk_align; + sgl = sg_next(sgl); + } + + return align; + } + + align = 1ULL << __ffs(buf_addr); + + return align; +} + +/* + * vm->update_gmmu_lock must be held. This checks to see if we already have + * mapped the passed buffer into this VM. If so, just return the existing + * mapping address. + */ +static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags, + int kind, + bool user_mapped, + int rw_flag) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_mapped_buf *mapped_buffer = NULL; + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align); + if (!mapped_buffer) + return 0; + + if (mapped_buffer->dmabuf != dmabuf || + mapped_buffer->kind != (u32)kind) + return 0; + } else { + mapped_buffer = + __nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind); + if (!mapped_buffer) + return 0; + } + + if (mapped_buffer->flags != flags) + return 0; + + /* mark the buffer as used */ + if (user_mapped) { + if (mapped_buffer->user_mapped == 0) + vm->num_user_mapped_buffers++; + mapped_buffer->user_mapped++; + + /* If the mapping comes from user space, we own + * the handle ref. Since we reuse an + * existing mapping here, we need to give back those + * refs once in order not to leak. + */ + if (mapped_buffer->own_mem_ref) + dma_buf_put(mapped_buffer->dmabuf); + else + mapped_buffer->own_mem_ref = true; + } + kref_get(&mapped_buffer->ref); + + nvgpu_log(g, gpu_dbg_map, + "gv: 0x%04x_%08x + 0x%-7zu " + "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " + "pgsz=%-3dKb as=%-2d ctags=%d start=%d " + "flags=0x%x apt=%s (reused)", + u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), + dmabuf->size, + u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), + u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), + u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)), + u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)), + vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, + vm_aspace_id(vm), + mapped_buffer->ctag_lines, mapped_buffer->ctag_offset, + mapped_buffer->flags, + nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf))); + + return mapped_buffer->addr; +} + +u64 nvgpu_vm_map(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags, + int kind, + bool user_mapped, + int rw_flag, + u64 buffer_offset, + u64 mapping_size, + struct vm_gk20a_mapping_batch *batch) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags; + struct nvgpu_mapped_buf *mapped_buffer = NULL; + bool inserted = false, va_allocated = false; + u64 map_offset = 0; + int err = 0; + struct buffer_attrs bfr = {NULL}; + struct gk20a_comptags comptags; + bool clear_ctags = false; + struct scatterlist *sgl; + u64 ctag_map_win_size = 0; + u32 ctag_map_win_ctagline = 0; + struct vm_reserved_va_node *va_node = NULL; + u32 ctag_offset; + enum nvgpu_aperture aperture; + + if (user_mapped && vm->userspace_managed && + !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { + nvgpu_err(g, "non-fixed-offset mapping not available on " + "userspace managed address spaces"); + return -EFAULT; + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + /* check if this buffer is already mapped */ + if (!vm->userspace_managed) { + map_offset = __nvgpu_vm_find_mapping( + vm, dmabuf, offset_align, + flags, kind, + user_mapped, rw_flag); + if (map_offset) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return map_offset; + } + } + + /* pin buffer to get phys/iovmm addr */ + bfr.sgt = gk20a_mm_pin(g->dev, dmabuf); + if (IS_ERR(bfr.sgt)) { + /* Falling back to physical is actually possible + * here in many cases if we use 4K phys pages in the + * gmmu. However we have some regions which require + * contig regions to work properly (either phys-contig + * or contig through smmu io_vaspace). Until we can + * track the difference between those two cases we have + * to fail the mapping when we run out of SMMU space. + */ + nvgpu_warn(g, "oom allocating tracking buffer"); + goto clean_up; + } + + bfr.kind_v = kind; + bfr.size = dmabuf->size; + sgl = bfr.sgt->sgl; + + aperture = gk20a_dmabuf_aperture(g, dmabuf); + if (aperture == APERTURE_INVALID) { + err = -EINVAL; + goto clean_up; + } + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) + map_offset = offset_align; + + bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture); + bfr.pgsz_idx = __get_pte_size(vm, map_offset, + min_t(u64, bfr.size, bfr.align)); + mapping_size = mapping_size ? mapping_size : bfr.size; + + /* Check if we should use a fixed offset for mapping this buffer */ + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + err = validate_fixed_buffer(vm, &bfr, + offset_align, mapping_size, + &va_node); + if (err) + goto clean_up; + + map_offset = offset_align; + va_allocated = false; + } else + va_allocated = true; + + err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx); + if (unlikely(err)) { + nvgpu_err(g, "failure setting up kind and compression"); + goto clean_up; + } + + /* bar1 and pmu vm don't need ctag */ + if (!vm->enable_ctag) + bfr.ctag_lines = 0; + + gk20a_get_comptags(g->dev, dmabuf, &comptags); + + /* ensure alignment to compression page size if compression enabled */ + if (bfr.ctag_offset) + mapping_size = ALIGN(mapping_size, + g->ops.fb.compression_page_size(g)); + + if (bfr.ctag_lines && !comptags.lines) { + const bool user_mappable = + !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS); + + /* allocate compression resources if needed */ + err = gk20a_alloc_comptags(g, g->dev, dmabuf, ctag_allocator, + bfr.ctag_lines, user_mappable, + &ctag_map_win_size, + &ctag_map_win_ctagline); + if (err) { + /* ok to fall back here if we ran out */ + /* TBD: we can partially alloc ctags as well... */ + bfr.kind_v = bfr.uc_kind_v; + } else { + gk20a_get_comptags(g->dev, dmabuf, &comptags); + + if (g->ops.ltc.cbc_ctrl) + g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, + comptags.offset, + comptags.offset + + comptags.allocated_lines - 1); + else + clear_ctags = true; + } + } + + /* store the comptag info */ + bfr.ctag_offset = comptags.offset; + bfr.ctag_lines = comptags.lines; + bfr.ctag_allocated_lines = comptags.allocated_lines; + bfr.ctag_user_mappable = comptags.user_mappable; + + /* + * Calculate comptag index for this mapping. Differs in + * case of partial mapping. + */ + ctag_offset = comptags.offset; + if (ctag_offset) + ctag_offset += buffer_offset >> + ilog2(g->ops.fb.compression_page_size(g)); + + /* update gmmu ptes */ + map_offset = g->ops.mm.gmmu_map(vm, map_offset, + bfr.sgt, + buffer_offset, /* sg offset */ + mapping_size, + bfr.pgsz_idx, + bfr.kind_v, + ctag_offset, + flags, rw_flag, + clear_ctags, + false, + false, + batch, + aperture); + if (!map_offset) + goto clean_up; + + mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); + if (!mapped_buffer) { + nvgpu_warn(g, "oom allocating tracking buffer"); + goto clean_up; + } + mapped_buffer->dmabuf = dmabuf; + mapped_buffer->sgt = bfr.sgt; + mapped_buffer->addr = map_offset; + mapped_buffer->size = mapping_size; + mapped_buffer->pgsz_idx = bfr.pgsz_idx; + mapped_buffer->ctag_offset = bfr.ctag_offset; + mapped_buffer->ctag_lines = bfr.ctag_lines; + mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; + mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; + mapped_buffer->ctag_map_win_size = ctag_map_win_size; + mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline; + mapped_buffer->vm = vm; + mapped_buffer->flags = flags; + mapped_buffer->kind = kind; + mapped_buffer->va_allocated = va_allocated; + mapped_buffer->user_mapped = user_mapped ? 1 : 0; + mapped_buffer->own_mem_ref = user_mapped; + nvgpu_init_list_node(&mapped_buffer->buffer_list); + kref_init(&mapped_buffer->ref); + + err = nvgpu_insert_mapped_buf(vm, mapped_buffer); + if (err) { + nvgpu_err(g, "failed to insert into mapped buffer tree"); + goto clean_up; + } + inserted = true; + if (user_mapped) + vm->num_user_mapped_buffers++; + + if (va_node) { + nvgpu_list_add_tail(&mapped_buffer->buffer_list, + &va_node->buffer_list_head); + mapped_buffer->va_node = va_node; + } + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + return map_offset; + +clean_up: + if (inserted) { + nvgpu_remove_mapped_buf(vm, mapped_buffer); + if (user_mapped) + vm->num_user_mapped_buffers--; + } + nvgpu_kfree(g, mapped_buffer); + if (va_allocated) + gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); + if (!IS_ERR(bfr.sgt)) + gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt); + + nvgpu_mutex_release(&vm->update_gmmu_lock); + nvgpu_log_info(g, "err=%d\n", err); + return 0; +} + +void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_mapped_buf *mapped_buffer; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); + if (!mapped_buffer) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); + return; + } + + kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); + nvgpu_mutex_release(&vm->update_gmmu_lock); +} diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h index c0fb0ffe..9e064d76 100644 --- a/drivers/gpu/nvgpu/common/linux/vm_priv.h +++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h @@ -25,12 +25,24 @@ struct dma_buf; struct vm_gk20a; struct vm_gk20a_mapping_batch; +struct buffer_attrs { + struct sg_table *sgt; + u64 size; + u64 align; + u32 ctag_offset; + u32 ctag_lines; + u32 ctag_allocated_lines; + int pgsz_idx; + u8 kind_v; + u8 uc_kind_v; + bool ctag_user_mappable; +}; + u64 nvgpu_vm_map(struct vm_gk20a *vm, struct dma_buf *dmabuf, u64 offset_align, - u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, + u32 flags, int kind, - struct sg_table **sgt, bool user_mapped, int rw_flag, u64 buffer_offset, @@ -59,4 +71,24 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset); int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, struct dma_buf **dmabuf, u64 *offset); + +enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, + struct dma_buf *dmabuf); +int validate_fixed_buffer(struct vm_gk20a *vm, + struct buffer_attrs *bfr, + u64 map_offset, u64 map_size, + struct vm_reserved_va_node **pva_node); +int setup_buffer_kind_and_compression(struct vm_gk20a *vm, + u32 flags, + struct buffer_attrs *bfr, + enum gmmu_pgsz_gk20a pgsz_idx); +int gk20a_alloc_comptags(struct gk20a *g, + struct device *dev, + struct dma_buf *dmabuf, + struct gk20a_comptag_allocator *allocator, + u32 lines, bool user_mappable, + u64 *ctag_map_win_size, + u32 *ctag_map_win_ctagline); +void gk20a_vm_unmap_locked_kref(struct kref *ref); + #endif diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index eaf30fd0..635ac0fb 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -23,6 +23,11 @@ #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" +int vm_aspace_id(struct vm_gk20a *vm) +{ + return vm->as_share ? vm->as_share->id : -1; +} + void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) { memset(mapping_batch, 0, sizeof(*mapping_batch)); @@ -52,7 +57,7 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm, void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) { - struct mapped_buffer_node *mapped_buffer; + struct nvgpu_mapped_buf *mapped_buffer; struct vm_reserved_va_node *va_node, *va_node_tmp; struct nvgpu_rbtree_node *node = NULL; struct gk20a *g = vm->mm->g; @@ -118,7 +123,7 @@ void nvgpu_vm_put(struct vm_gk20a *vm) kref_put(&vm->ref, nvgpu_vm_remove_support_kref); } -void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) +void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) { struct gk20a *g = vm->mm->g; @@ -127,3 +132,59 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) gk20a_free_inst_block(g, inst_block); nvgpu_vm_remove_support_nofree(vm); } + +int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, + struct nvgpu_mapped_buf *mapped_buffer) +{ + mapped_buffer->node.key_start = mapped_buffer->addr; + mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size; + + nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers); + + return 0; +} + +void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, + struct nvgpu_mapped_buf *mapped_buffer) +{ + nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers); +} + +struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf( + struct vm_gk20a *vm, u64 addr) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_search(addr, &node, root); + if (!node) + return NULL; + + return mapped_buffer_from_rbtree_node(node); +} + +struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range( + struct vm_gk20a *vm, u64 addr) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_range_search(addr, &node, root); + if (!node) + return NULL; + + return mapped_buffer_from_rbtree_node(node); +} + +struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( + struct vm_gk20a *vm, u64 addr) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_less_than_search(addr, &node, root); + if (!node) + return NULL; + + return mapped_buffer_from_rbtree_node(node); +} diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index cf95019b..d2bb3ee9 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -1025,7 +1025,7 @@ __releases(&cde_app->mutex) get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - compbits_kind, NULL, true, + compbits_kind, true, gk20a_mem_flag_none, map_offset, map_size, NULL); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index b7fb363e..9e3bc05e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -1406,7 +1406,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, int err; words = pbdma_gp_entry1_length_v(g->entry1); - err = nvgpu_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); + err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); if (!err) mem = dma_buf_vmap(dmabuf); } @@ -1903,7 +1903,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, bool skip_buffer_refcounting) { struct vm_gk20a *vm = c->vm; - struct mapped_buffer_node **mapped_buffers = NULL; + struct nvgpu_mapped_buf **mapped_buffers = NULL; int err = 0, num_mapped_buffers = 0; bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 1cbf7689..bbc1a72a 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -60,7 +60,7 @@ struct channel_ctx_gk20a { }; struct channel_gk20a_job { - struct mapped_buffer_node **mapped_buffers; + struct nvgpu_mapped_buf **mapped_buffers; int num_mapped_buffers; struct gk20a_fence *pre_fence; struct gk20a_fence *post_fence; diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 5351750a..54317195 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -1924,7 +1924,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, err_unmap: nvgpu_vm_unmap_buffer(vm, args->offset, NULL); err_remove_vm: - gk20a_remove_vm(vm, &mm->perfbuf.inst_block); + nvgpu_remove_vm(vm, &mm->perfbuf.inst_block); nvgpu_mutex_release(&g->dbg_sessions_lock); return err; } @@ -1962,7 +1962,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) err = gk20a_perfbuf_disable_locked(g); nvgpu_vm_unmap_buffer(vm, offset, NULL); - gk20a_remove_vm(vm, &mm->perfbuf.inst_block); + nvgpu_remove_vm(vm, &mm->perfbuf.inst_block); g->perfbuf.owner = NULL; g->perfbuf.offset = 0; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 72a3ee13..84919d50 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -121,25 +122,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl) * */ -static inline int vm_aspace_id(struct vm_gk20a *vm) -{ - /* -1 is bar1 or pmu, etc. */ - return vm->as_share ? vm->as_share->id : -1; -} -static inline u32 hi32(u64 f) -{ - return (u32)(f >> 32); -} -static inline u32 lo32(u64 f) -{ - return (u32)(f & 0xffffffff); -} - -static struct mapped_buffer_node *find_mapped_buffer_locked( - struct nvgpu_rbtree_node *root, u64 addr); -static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( - struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf, - u32 kind); static int update_gmmu_ptes_locked(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx, struct sg_table *sgt, u64 buffer_offset, @@ -316,13 +298,13 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, *comptags = priv->comptags; } -static int gk20a_alloc_comptags(struct gk20a *g, - struct device *dev, - struct dma_buf *dmabuf, - struct gk20a_comptag_allocator *allocator, - u32 lines, bool user_mappable, - u64 *ctag_map_win_size, - u32 *ctag_map_win_ctagline) +int gk20a_alloc_comptags(struct gk20a *g, + struct device *dev, + struct dma_buf *dmabuf, + struct gk20a_comptag_allocator *allocator, + u32 lines, bool user_mappable, + u64 *ctag_map_win_size, + u32 *ctag_map_win_ctagline) { struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); u32 ctaglines_allocsize; @@ -493,9 +475,9 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) g->ops.mm.remove_bar2_vm(g); if (g->ops.mm.is_bar1_supported(g)) - gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); + nvgpu_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); - gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); + nvgpu_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); nvgpu_vm_remove_support_nofree(&mm->cde.vm); @@ -1097,11 +1079,11 @@ static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, } int nvgpu_vm_get_buffers(struct vm_gk20a *vm, - struct mapped_buffer_node ***mapped_buffers, + struct nvgpu_mapped_buf ***mapped_buffers, int *num_buffers) { - struct mapped_buffer_node *mapped_buffer; - struct mapped_buffer_node **buffer_list; + struct nvgpu_mapped_buf *mapped_buffer; + struct nvgpu_mapped_buf **buffer_list; struct nvgpu_rbtree_node *node = NULL; int i = 0; @@ -1141,15 +1123,15 @@ int nvgpu_vm_get_buffers(struct vm_gk20a *vm, return 0; } -static void gk20a_vm_unmap_locked_kref(struct kref *ref) +void gk20a_vm_unmap_locked_kref(struct kref *ref) { - struct mapped_buffer_node *mapped_buffer = - container_of(ref, struct mapped_buffer_node, ref); + struct nvgpu_mapped_buf *mapped_buffer = + container_of(ref, struct nvgpu_mapped_buf, ref); nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); } void nvgpu_vm_put_buffers(struct vm_gk20a *vm, - struct mapped_buffer_node **mapped_buffers, + struct nvgpu_mapped_buf **mapped_buffers, int num_buffers) { int i; @@ -1177,11 +1159,11 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset, struct vm_gk20a_mapping_batch *batch) { struct gk20a *g = vm->mm->g; - struct mapped_buffer_node *mapped_buffer; + struct nvgpu_mapped_buf *mapped_buffer; nvgpu_mutex_acquire(&vm->update_gmmu_lock); - mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset); + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); if (!mapped_buffer) { nvgpu_mutex_release(&vm->update_gmmu_lock); nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); @@ -1273,100 +1255,10 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, return 0; } - -static void remove_mapped_buffer(struct vm_gk20a *vm, - struct mapped_buffer_node *mapped_buffer) -{ - nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers); -} - -static int insert_mapped_buffer(struct vm_gk20a *vm, - struct mapped_buffer_node *mapped_buffer) -{ - mapped_buffer->node.key_start = mapped_buffer->addr; - mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size; - - nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers); - - return 0; -} - -static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( - struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf, - u32 kind) -{ - struct nvgpu_rbtree_node *node = NULL; - - nvgpu_rbtree_enum_start(0, &node, root); - - while (node) { - struct mapped_buffer_node *mapped_buffer = - mapped_buffer_from_rbtree_node(node); - - if (mapped_buffer->dmabuf == dmabuf && - kind == mapped_buffer->kind) - return mapped_buffer; - - nvgpu_rbtree_enum_next(&node, node); - } - - return NULL; -} - -static struct mapped_buffer_node *find_mapped_buffer_locked( - struct nvgpu_rbtree_node *root, u64 addr) -{ - struct nvgpu_rbtree_node *node = NULL; - - nvgpu_rbtree_search(addr, &node, root); - if (!node) - return NULL; - - return mapped_buffer_from_rbtree_node(node); -} - -static struct mapped_buffer_node *find_mapped_buffer_range_locked( - struct nvgpu_rbtree_node *root, u64 addr) -{ - struct nvgpu_rbtree_node *node = NULL; - - nvgpu_rbtree_range_search(addr, &node, root); - if (!node) - return NULL; - - return mapped_buffer_from_rbtree_node(node); -} - -/* find the first mapped buffer with GPU VA less than addr */ -static struct mapped_buffer_node *find_mapped_buffer_less_than_locked( - struct nvgpu_rbtree_node *root, u64 addr) -{ - struct nvgpu_rbtree_node *node = NULL; - - nvgpu_rbtree_less_than_search(addr, &node, root); - if (!node) - return NULL; - - return mapped_buffer_from_rbtree_node(node); -} - -struct buffer_attrs { - struct sg_table *sgt; - u64 size; - u64 align; - u32 ctag_offset; - u32 ctag_lines; - u32 ctag_allocated_lines; - int pgsz_idx; - u8 kind_v; - u8 uc_kind_v; - bool ctag_user_mappable; -}; - -static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, - u32 flags, - struct buffer_attrs *bfr, - enum gmmu_pgsz_gk20a pgsz_idx) +int setup_buffer_kind_and_compression(struct vm_gk20a *vm, + u32 flags, + struct buffer_attrs *bfr, + enum gmmu_pgsz_gk20a pgsz_idx) { bool kind_compressible; struct gk20a *g = gk20a_from_vm(vm); @@ -1409,14 +1301,14 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, return 0; } -static int validate_fixed_buffer(struct vm_gk20a *vm, - struct buffer_attrs *bfr, - u64 map_offset, u64 map_size, - struct vm_reserved_va_node **pva_node) +int validate_fixed_buffer(struct vm_gk20a *vm, + struct buffer_attrs *bfr, + u64 map_offset, u64 map_size, + struct vm_reserved_va_node **pva_node) { struct gk20a *g = vm->mm->g; struct vm_reserved_va_node *va_node; - struct mapped_buffer_node *buffer; + struct nvgpu_mapped_buf *buffer; u64 map_end = map_offset + map_size; /* can wrap around with insane map_size; zero is disallowed too */ @@ -1448,8 +1340,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, /* check that this mapping does not collide with existing * mappings by checking the buffer with the highest GPU VA * that is less than our buffer end */ - buffer = find_mapped_buffer_less_than_locked( - vm->mapped_buffers, map_offset + map_size); + buffer = __nvgpu_vm_find_mapped_buf_less_than( + vm, map_offset + map_size); if (buffer && buffer->addr + buffer->size > map_offset) { nvgpu_warn(g, "overlapping buffer map requested"); return -EINVAL; @@ -1499,11 +1391,11 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " "pgsz=%-3dKb as=%-2d ctags=%d start=%d " "kind=0x%x flags=0x%x apt=%s", - hi32(map_offset), lo32(map_offset), size, - sgt ? hi32((u64)sg_dma_address(sgt->sgl)) : 0, - sgt ? lo32((u64)sg_dma_address(sgt->sgl)) : 0, - sgt ? hi32((u64)sg_phys(sgt->sgl)) : 0, - sgt ? lo32((u64)sg_phys(sgt->sgl)) : 0, + u64_hi32(map_offset), u64_lo32(map_offset), size, + sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0, + sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0, + sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0, + sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0, vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), ctag_lines, ctag_offset, kind_v, flags, nvgpu_aperture_str(aperture)); @@ -1595,8 +1487,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, } } -static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, - struct dma_buf *dmabuf) +enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, + struct dma_buf *dmabuf) { struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf); if (buf_owner == NULL) { @@ -1617,80 +1509,6 @@ static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, } } -static u64 nvgpu_vm_map_duplicate_locked(struct vm_gk20a *vm, - struct dma_buf *dmabuf, - u64 offset_align, - u32 flags, - int kind, - struct sg_table **sgt, - bool user_mapped, - int rw_flag) -{ - struct gk20a *g = gk20a_from_vm(vm); - struct mapped_buffer_node *mapped_buffer = NULL; - - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { - mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, - offset_align); - if (!mapped_buffer) - return 0; - - if (mapped_buffer->dmabuf != dmabuf || - mapped_buffer->kind != (u32)kind) - return 0; - } else { - mapped_buffer = - find_mapped_buffer_reverse_locked(vm->mapped_buffers, - dmabuf, kind); - if (!mapped_buffer) - return 0; - } - - if (mapped_buffer->flags != flags) - return 0; - - BUG_ON(mapped_buffer->vm != vm); - - /* mark the buffer as used */ - if (user_mapped) { - if (mapped_buffer->user_mapped == 0) - vm->num_user_mapped_buffers++; - mapped_buffer->user_mapped++; - - /* If the mapping comes from user space, we own - * the handle ref. Since we reuse an - * existing mapping here, we need to give back those - * refs once in order not to leak. - */ - if (mapped_buffer->own_mem_ref) - dma_buf_put(mapped_buffer->dmabuf); - else - mapped_buffer->own_mem_ref = true; - } - kref_get(&mapped_buffer->ref); - - gk20a_dbg(gpu_dbg_map, - "gv: 0x%04x_%08x + 0x%-7zu " - "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " - "pgsz=%-3dKb as=%-2d ctags=%d start=%d " - "flags=0x%x apt=%s (reused)", - hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), - dmabuf->size, - hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), - lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), - hi32((u64)sg_phys(mapped_buffer->sgt->sgl)), - lo32((u64)sg_phys(mapped_buffer->sgt->sgl)), - vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, - vm_aspace_id(vm), - mapped_buffer->ctag_lines, mapped_buffer->ctag_offset, - mapped_buffer->flags, - nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf))); - - if (sgt) - *sgt = mapped_buffer->sgt; - return mapped_buffer->addr; -} - #if defined(CONFIG_GK20A_VIDMEM) static struct sg_table *gk20a_vidbuf_map_dma_buf( struct dma_buf_attachment *attach, enum dma_data_direction dir) @@ -1919,310 +1737,6 @@ int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, #endif } -static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl, - enum nvgpu_aperture aperture) -{ - u64 align = 0, chunk_align = 0; - u64 buf_addr; - - if (aperture == APERTURE_VIDMEM) { - struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); - struct page_alloc_chunk *chunk = NULL; - - nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, - page_alloc_chunk, list_entry) { - chunk_align = 1ULL << __ffs(chunk->base | chunk->length); - - if (align) - align = min(align, chunk_align); - else - align = chunk_align; - } - - return align; - } - - buf_addr = (u64)sg_dma_address(sgl); - - if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { - while (sgl) { - buf_addr = (u64)sg_phys(sgl); - chunk_align = 1ULL << __ffs(buf_addr | (u64)sgl->length); - - if (align) - align = min(align, chunk_align); - else - align = chunk_align; - sgl = sg_next(sgl); - } - - return align; - } - - align = 1ULL << __ffs(buf_addr); - - return align; -} - -u64 nvgpu_vm_map(struct vm_gk20a *vm, - struct dma_buf *dmabuf, - u64 offset_align, - u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, - int kind, - struct sg_table **sgt, - bool user_mapped, - int rw_flag, - u64 buffer_offset, - u64 mapping_size, - struct vm_gk20a_mapping_batch *batch) -{ - struct gk20a *g = gk20a_from_vm(vm); - struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags; - struct device *d = dev_from_vm(vm); - struct mapped_buffer_node *mapped_buffer = NULL; - bool inserted = false, va_allocated = false; - u64 map_offset = 0; - int err = 0; - struct buffer_attrs bfr = {NULL}; - struct gk20a_comptags comptags; - bool clear_ctags = false; - struct scatterlist *sgl; - u64 ctag_map_win_size = 0; - u32 ctag_map_win_ctagline = 0; - struct vm_reserved_va_node *va_node = NULL; - u32 ctag_offset; - enum nvgpu_aperture aperture; - - if (user_mapped && vm->userspace_managed && - !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { - nvgpu_err(g, - "%s: non-fixed-offset mapping not available on userspace managed address spaces", - __func__); - return -EFAULT; - } - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - - /* check if this buffer is already mapped */ - if (!vm->userspace_managed) { - map_offset = nvgpu_vm_map_duplicate_locked( - vm, dmabuf, offset_align, - flags, kind, sgt, - user_mapped, rw_flag); - if (map_offset) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - return map_offset; - } - } - - /* pin buffer to get phys/iovmm addr */ - bfr.sgt = gk20a_mm_pin(d, dmabuf); - if (IS_ERR(bfr.sgt)) { - /* Falling back to physical is actually possible - * here in many cases if we use 4K phys pages in the - * gmmu. However we have some regions which require - * contig regions to work properly (either phys-contig - * or contig through smmu io_vaspace). Until we can - * track the difference between those two cases we have - * to fail the mapping when we run out of SMMU space. - */ - nvgpu_warn(g, "oom allocating tracking buffer"); - goto clean_up; - } - - if (sgt) - *sgt = bfr.sgt; - - bfr.kind_v = kind; - bfr.size = dmabuf->size; - sgl = bfr.sgt->sgl; - - aperture = gk20a_dmabuf_aperture(g, dmabuf); - if (aperture == APERTURE_INVALID) { - err = -EINVAL; - goto clean_up; - } - - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) - map_offset = offset_align; - - bfr.align = gk20a_mm_get_align(g, sgl, aperture); - bfr.pgsz_idx = __get_pte_size(vm, map_offset, - min_t(u64, bfr.size, bfr.align)); - mapping_size = mapping_size ? mapping_size : bfr.size; - - /* Check if we should use a fixed offset for mapping this buffer */ - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { - err = validate_fixed_buffer(vm, &bfr, - offset_align, mapping_size, - &va_node); - if (err) - goto clean_up; - - map_offset = offset_align; - va_allocated = false; - } else - va_allocated = true; - - if (sgt) - *sgt = bfr.sgt; - - err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx); - if (unlikely(err)) { - nvgpu_err(g, "failure setting up kind and compression"); - goto clean_up; - } - - /* bar1 and pmu vm don't need ctag */ - if (!vm->enable_ctag) - bfr.ctag_lines = 0; - - gk20a_get_comptags(d, dmabuf, &comptags); - - /* ensure alignment to compression page size if compression enabled */ - if (bfr.ctag_offset) - mapping_size = ALIGN(mapping_size, - g->ops.fb.compression_page_size(g)); - - if (bfr.ctag_lines && !comptags.lines) { - const bool user_mappable = - !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS); - - /* allocate compression resources if needed */ - err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, - bfr.ctag_lines, user_mappable, - &ctag_map_win_size, - &ctag_map_win_ctagline); - if (err) { - /* ok to fall back here if we ran out */ - /* TBD: we can partially alloc ctags as well... */ - bfr.kind_v = bfr.uc_kind_v; - } else { - gk20a_get_comptags(d, dmabuf, &comptags); - - if (g->ops.ltc.cbc_ctrl) - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, - comptags.offset, - comptags.offset + - comptags.allocated_lines - 1); - else - clear_ctags = true; - } - } - - /* store the comptag info */ - bfr.ctag_offset = comptags.offset; - bfr.ctag_lines = comptags.lines; - bfr.ctag_allocated_lines = comptags.allocated_lines; - bfr.ctag_user_mappable = comptags.user_mappable; - - /* - * Calculate comptag index for this mapping. Differs in - * case of partial mapping. - */ - ctag_offset = comptags.offset; - if (ctag_offset) - ctag_offset += buffer_offset >> - ilog2(g->ops.fb.compression_page_size(g)); - - /* update gmmu ptes */ - map_offset = g->ops.mm.gmmu_map(vm, map_offset, - bfr.sgt, - buffer_offset, /* sg offset */ - mapping_size, - bfr.pgsz_idx, - bfr.kind_v, - ctag_offset, - flags, rw_flag, - clear_ctags, - false, - false, - batch, - aperture); - if (!map_offset) - goto clean_up; - -#if defined(NVHOST_DEBUG) - { - int i; - struct scatterlist *sg = NULL; - gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)"); - for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) { - u64 da = sg_dma_address(sg); - u64 pa = sg_phys(sg); - u64 len = sg->length; - gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x", - i, hi32(pa), lo32(pa), hi32(da), lo32(da), - hi32(len), lo32(len)); - } - } -#endif - - /* keep track of the buffer for unmapping */ - /* TBD: check for multiple mapping of same buffer */ - mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); - if (!mapped_buffer) { - nvgpu_warn(g, "oom allocating tracking buffer"); - goto clean_up; - } - mapped_buffer->dmabuf = dmabuf; - mapped_buffer->sgt = bfr.sgt; - mapped_buffer->addr = map_offset; - mapped_buffer->size = mapping_size; - mapped_buffer->pgsz_idx = bfr.pgsz_idx; - mapped_buffer->ctag_offset = bfr.ctag_offset; - mapped_buffer->ctag_lines = bfr.ctag_lines; - mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; - mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; - mapped_buffer->ctag_map_win_size = ctag_map_win_size; - mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline; - mapped_buffer->vm = vm; - mapped_buffer->flags = flags; - mapped_buffer->kind = kind; - mapped_buffer->va_allocated = va_allocated; - mapped_buffer->user_mapped = user_mapped ? 1 : 0; - mapped_buffer->own_mem_ref = user_mapped; - nvgpu_init_list_node(&mapped_buffer->va_buffers_list); - kref_init(&mapped_buffer->ref); - - err = insert_mapped_buffer(vm, mapped_buffer); - if (err) { - nvgpu_err(g, "failed to insert into mapped buffer tree"); - goto clean_up; - } - inserted = true; - if (user_mapped) - vm->num_user_mapped_buffers++; - - gk20a_dbg_info("allocated va @ 0x%llx", map_offset); - - if (va_node) { - nvgpu_list_add_tail(&mapped_buffer->va_buffers_list, - &va_node->va_buffers_list); - mapped_buffer->va_node = va_node; - } - - nvgpu_mutex_release(&vm->update_gmmu_lock); - - return map_offset; - -clean_up: - if (inserted) { - remove_mapped_buffer(vm, mapped_buffer); - if (user_mapped) - vm->num_user_mapped_buffers--; - } - nvgpu_kfree(g, mapped_buffer); - if (va_allocated) - gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); - if (!IS_ERR(bfr.sgt)) - gk20a_mm_unpin(d, dmabuf, bfr.sgt); - - nvgpu_mutex_release(&vm->update_gmmu_lock); - gk20a_dbg_info("err=%d\n", err); - return 0; -} - int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, u64 mapping_gva, u64 *compbits_win_size, @@ -2230,12 +1744,12 @@ int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, u32 *mapping_ctagline, u32 *flags) { - struct mapped_buffer_node *mapped_buffer; + struct nvgpu_mapped_buf *mapped_buffer; struct gk20a *g = vm->mm->g; nvgpu_mutex_acquire(&vm->update_gmmu_lock); - mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva); + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva); if (!mapped_buffer || !mapped_buffer->user_mapped) { @@ -2271,7 +1785,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm, u64 *mapping_iova, u32 flags) { - struct mapped_buffer_node *mapped_buffer; + struct nvgpu_mapped_buf *mapped_buffer; struct gk20a *g = gk20a_from_vm(vm); const bool fixed_mapping = (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0; @@ -2292,8 +1806,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm, nvgpu_mutex_acquire(&vm->update_gmmu_lock); - mapped_buffer = - find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva); + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva); if (!mapped_buffer || !mapped_buffer->user_mapped) { nvgpu_mutex_release(&vm->update_gmmu_lock); @@ -2537,12 +2050,12 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work) dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) { - struct mapped_buffer_node *buffer; + struct nvgpu_mapped_buf *buffer; dma_addr_t addr = 0; struct gk20a *g = gk20a_from_vm(vm); nvgpu_mutex_acquire(&vm->update_gmmu_lock); - buffer = find_mapped_buffer_locked(vm->mapped_buffers, gpu_vaddr); + buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); if (buffer) addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl, buffer->flags); @@ -3026,7 +2539,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, } /* NOTE! mapped_buffers lock must be held */ -void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, +void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, struct vm_gk20a_mapping_batch *batch) { struct vm_gk20a *vm = mapped_buffer->vm; @@ -3057,7 +2570,7 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, gk20a_dbg(gpu_dbg_map, "gv: 0x%04x_%08x pgsz=%-3dKb as=%-2d own_mem_ref=%d", - hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), + u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, vm_aspace_id(vm), mapped_buffer->own_mem_ref); @@ -3066,9 +2579,9 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, mapped_buffer->sgt); /* remove from mapped buffer tree and remove list, free */ - remove_mapped_buffer(vm, mapped_buffer); - if (!nvgpu_list_empty(&mapped_buffer->va_buffers_list)) - nvgpu_list_del(&mapped_buffer->va_buffers_list); + nvgpu_remove_mapped_buf(vm, mapped_buffer); + if (!nvgpu_list_empty(&mapped_buffer->buffer_list)) + nvgpu_list_del(&mapped_buffer->buffer_list); /* keep track of mapped buffers */ if (mapped_buffer->user_mapped) @@ -3082,22 +2595,6 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, return; } -void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) -{ - struct gk20a *g = vm->mm->g; - struct mapped_buffer_node *mapped_buffer; - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset); - if (!mapped_buffer) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); - return; - } - - kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); - nvgpu_mutex_release(&vm->update_gmmu_lock); -} static void gk20a_vm_free_entries(struct vm_gk20a *vm, struct gk20a_mm_entry *parent, @@ -3659,7 +3156,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, va_node->vaddr_start = vaddr_start; va_node->size = (u64)args->page_size * (u64)args->pages; va_node->pgsz_idx = pgsz_idx; - nvgpu_init_list_node(&va_node->va_buffers_list); + nvgpu_init_list_node(&va_node->buffer_list_head); nvgpu_init_list_node(&va_node->reserved_va_list); nvgpu_mutex_acquire(&vm->update_gmmu_lock); @@ -3723,15 +3220,15 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, nvgpu_mutex_acquire(&vm->update_gmmu_lock); va_node = addr_to_reservation(vm, args->offset); if (va_node) { - struct mapped_buffer_node *buffer, *n; + struct nvgpu_mapped_buf *buffer, *n; /* Decrement the ref count on all buffers in this va_node. This * allows userspace to let the kernel free mappings that are * only used by this va_node. */ nvgpu_list_for_each_entry_safe(buffer, n, - &va_node->va_buffers_list, - mapped_buffer_node, va_buffers_list) { - nvgpu_list_del(&buffer->va_buffers_list); + &va_node->buffer_list_head, + nvgpu_mapped_buf, buffer_list) { + nvgpu_list_del(&buffer->buffer_list); kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref); } @@ -3887,7 +3384,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, } ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, - flags, kind, NULL, true, + flags, kind, true, gk20a_mem_flag_none, buffer_offset, mapping_size, @@ -4296,18 +3793,17 @@ hw_was_off: gk20a_idle_nosuspend(g->dev); } -int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, - struct dma_buf **dmabuf, - u64 *offset) +int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, + struct dma_buf **dmabuf, + u64 *offset) { - struct mapped_buffer_node *mapped_buffer; + struct nvgpu_mapped_buf *mapped_buffer; gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); nvgpu_mutex_acquire(&vm->update_gmmu_lock); - mapped_buffer = find_mapped_buffer_range_locked(vm->mapped_buffers, - gpu_va); + mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); if (!mapped_buffer) { nvgpu_mutex_release(&vm->update_gmmu_lock); return -EINVAL; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 331843cc..357962c7 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -140,52 +140,9 @@ struct priv_cmd_entry { u32 size; /* in words */ }; -struct mapped_buffer_node { - struct vm_gk20a *vm; - struct nvgpu_rbtree_node node; - struct nvgpu_list_node va_buffers_list; - struct vm_reserved_va_node *va_node; - u64 addr; - u64 size; - struct dma_buf *dmabuf; - struct sg_table *sgt; - struct kref ref; - u32 user_mapped; - bool own_mem_ref; - u32 pgsz_idx; - u32 ctag_offset; - u32 ctag_lines; - u32 ctag_allocated_lines; - - /* For comptag mapping, these are the mapping window parameters */ - bool ctags_mappable; - u64 ctag_map_win_addr; /* non-zero if mapped */ - u64 ctag_map_win_size; /* non-zero if ctags_mappable */ - u32 ctag_map_win_ctagline; /* ctagline at win start, set if - * ctags_mappable */ - - u32 flags; - u32 kind; - bool va_allocated; -}; - -static inline struct mapped_buffer_node * -mapped_buffer_node_from_va_buffers_list(struct nvgpu_list_node *node) -{ - return (struct mapped_buffer_node *) - ((uintptr_t)node - offsetof(struct mapped_buffer_node, va_buffers_list)); -}; - -static inline struct mapped_buffer_node * -mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node) -{ - return (struct mapped_buffer_node *) - ((uintptr_t)node - offsetof(struct mapped_buffer_node, node)); -}; - struct vm_reserved_va_node { struct nvgpu_list_node reserved_va_list; - struct nvgpu_list_node va_buffers_list; + struct nvgpu_list_node buffer_list_head; u32 pgsz_idx; u64 vaddr_start; u64 size; @@ -431,11 +388,6 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem) return 0; } -u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, - u32 sysmem_mask, u32 vidmem_mask); -u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, - u32 sysmem_mask, u32 vidmem_mask); - void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry, size_t w, size_t data); u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry); @@ -532,8 +484,6 @@ const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, struct vm_gk20a *vm); -void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block); - int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index eab51175..75dfcc86 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -401,7 +401,7 @@ static void gp10b_remove_bar2_vm(struct gk20a *g) struct mm_gk20a *mm = &g->mm; gp10b_replayable_pagefault_buffer_deinit(g); - gk20a_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block); + nvgpu_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index 1fb772d5..e1ceffd4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -26,7 +26,10 @@ #include struct vm_gk20a; -struct mapped_buffer_node; +struct vm_reserved_va_node; +struct buffer_attrs; +struct gk20a_comptag_allocator; + /** * This header contains the OS agnostic APIs for dealing with VMs. Most of the @@ -44,6 +47,50 @@ struct vm_gk20a_mapping_batch { bool need_tlb_invalidate; }; +struct nvgpu_mapped_buf { + struct vm_gk20a *vm; + struct nvgpu_rbtree_node node; + struct nvgpu_list_node buffer_list; + struct vm_reserved_va_node *va_node; + u64 addr; + u64 size; + struct dma_buf *dmabuf; + struct sg_table *sgt; + struct kref ref; + u32 user_mapped; + bool own_mem_ref; + u32 pgsz_idx; + u32 ctag_offset; + u32 ctag_lines; + u32 ctag_allocated_lines; + + /* For comptag mapping, these are the mapping window parameters */ + bool ctags_mappable; + u64 ctag_map_win_addr; /* non-zero if mapped */ + u64 ctag_map_win_size; /* non-zero if ctags_mappable */ + u32 ctag_map_win_ctagline; /* ctagline at win start, set if + * ctags_mappable */ + + u32 flags; + u32 kind; + bool va_allocated; +}; + +static inline struct nvgpu_mapped_buf * +nvgpu_mapped_buf_from_buffer_list(struct nvgpu_list_node *node) +{ + return (struct nvgpu_mapped_buf *) + ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, + buffer_list)); +} + +static inline struct nvgpu_mapped_buf * +mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node) +{ + return (struct nvgpu_mapped_buf *) + ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node)); +} + struct vm_gk20a { struct mm_gk20a *mm; struct gk20a_as_share *as_share; /* as_share this represents */ @@ -102,6 +149,8 @@ struct vm_gk20a { void nvgpu_vm_get(struct vm_gk20a *vm); void nvgpu_vm_put(struct vm_gk20a *vm); +int vm_aspace_id(struct vm_gk20a *vm); + /* batching eliminates redundant cache flushes and invalidates */ void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); void nvgpu_vm_mapping_batch_finish( @@ -112,24 +161,45 @@ void nvgpu_vm_mapping_batch_finish_locked( /* get reference to all currently mapped buffers */ int nvgpu_vm_get_buffers(struct vm_gk20a *vm, - struct mapped_buffer_node ***mapped_buffers, + struct nvgpu_mapped_buf ***mapped_buffers, int *num_buffers); /* put references on the given buffers */ void nvgpu_vm_put_buffers(struct vm_gk20a *vm, - struct mapped_buffer_node **mapped_buffers, + struct nvgpu_mapped_buf **mapped_buffers, int num_buffers); /* Note: batch may be NULL if unmap op is not part of a batch */ int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, struct vm_gk20a_mapping_batch *batch); -void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, +void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, struct vm_gk20a_mapping_batch *batch); +/* + * These all require the VM update lock to be held. + */ +struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf( + struct vm_gk20a *vm, u64 addr); +struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range( + struct vm_gk20a *vm, u64 addr); +struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( + struct vm_gk20a *vm, u64 addr); + +int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, + struct dma_buf **dmabuf, + u64 *offset); + +int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, + struct nvgpu_mapped_buf *mapped_buffer); +void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, + struct nvgpu_mapped_buf *mapped_buffer); + void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm); void nvgpu_vm_remove_support(struct vm_gk20a *vm); +void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block); + int nvgpu_init_vm(struct mm_gk20a *mm, struct vm_gk20a *vm, u32 big_page_size, diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index cfa9e428..f4004f42 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -202,7 +202,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, static void vgpu_vm_remove_support(struct vm_gk20a *vm) { struct gk20a *g = vm->mm->g; - struct mapped_buffer_node *mapped_buffer; + struct nvgpu_mapped_buf *mapped_buffer; struct vm_reserved_va_node *va_node, *va_node_tmp; struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_as_share_params *p = &msg.params.as_share; -- cgit v1.2.2