From 0853109c9994d1375a10d13ae254690a4a4c9610 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Thu, 7 Sep 2017 16:45:07 -0700 Subject: gpu: nvgpu: Refactoring nvgpu_vm functions Refactor the last nvgpu_vm functions from the mm_gk20a.c code. This removes some usages of dma_buf from the mm_gk20a.c code, too, which helps make mm_gk20a.c less Linux specific. Also delete some header files that are no longer necessary in gk20a/mm_gk20a.c which are Linux specific. The mm_gk20a.c code is now quite close to being Linux free. JIRA NVGPU-30 JIRA NVGPU-138 Change-Id: I72b370bd85a7b029768b0fb4827d6abba42007c3 Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1566629 Reviewed-by: Konsta Holtta GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/common/linux/vm.c | 202 ++++++++++++++- drivers/gpu/nvgpu/common/linux/vm_priv.h | 10 +- drivers/gpu/nvgpu/common/mm/vm.c | 134 ++++++++++ drivers/gpu/nvgpu/common/mm/vm_area.c | 2 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 2 + drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 348 -------------------------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 - drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 2 + drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 10 + drivers/gpu/nvgpu/gm20b/mm_gm20b.h | 2 + drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 + drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 + drivers/gpu/nvgpu/include/nvgpu/vm.h | 7 +- drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c | 2 + drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 2 + 15 files changed, 366 insertions(+), 362 deletions(-) diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index b686d616..638d3e51 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -115,6 +115,108 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, return align; } +static int setup_kind_legacy(struct vm_gk20a *vm, struct buffer_attrs *bfr, + bool *pkind_compressible) +{ + struct gk20a *g = gk20a_from_vm(vm); + bool kind_compressible; + + if (unlikely(bfr->kind_v == g->ops.mm.get_kind_invalid())) + bfr->kind_v = g->ops.mm.get_kind_pitch(); + + if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) { + nvgpu_err(g, "kind 0x%x not supported", bfr->kind_v); + return -EINVAL; + } + + bfr->uc_kind_v = g->ops.mm.get_kind_invalid(); + /* find a suitable incompressible kind if it becomes necessary later */ + kind_compressible = gk20a_kind_is_compressible(bfr->kind_v); + if (kind_compressible) { + bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v); + if (unlikely(bfr->uc_kind_v == g->ops.mm.get_kind_invalid())) { + /* shouldn't happen, but it is worth cross-checking */ + nvgpu_err(g, "comptag kind 0x%x can't be" + " downgraded to uncompressed kind", + bfr->kind_v); + return -EINVAL; + } + } + + *pkind_compressible = kind_compressible; + return 0; +} + +static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, + u32 flags, + struct buffer_attrs *bfr, + enum gmmu_pgsz_gk20a pgsz_idx) +{ + bool kind_compressible; + struct gk20a *g = gk20a_from_vm(vm); + int ctag_granularity = g->ops.fb.compression_page_size(g); + + if (!bfr->use_kind_v) + bfr->kind_v = g->ops.mm.get_kind_invalid(); + if (!bfr->use_uc_kind_v) + bfr->uc_kind_v = g->ops.mm.get_kind_invalid(); + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { + kind_compressible = (bfr->kind_v != + g->ops.mm.get_kind_invalid()); + if (!kind_compressible) + bfr->kind_v = bfr->uc_kind_v; + } else { + int err = setup_kind_legacy(vm, bfr, &kind_compressible); + + if (err) + return err; + } + + /* comptags only supported for suitable kinds, 128KB pagesize */ + if (kind_compressible && + vm->gmmu_page_sizes[pgsz_idx] < + g->ops.fb.compressible_page_size(g)) { + /* it is safe to fall back to uncompressed as + functionality is not harmed */ + bfr->kind_v = bfr->uc_kind_v; + kind_compressible = false; + } + if (kind_compressible) + bfr->ctag_lines = DIV_ROUND_UP_ULL(bfr->size, ctag_granularity); + else + bfr->ctag_lines = 0; + + bfr->use_kind_v = (bfr->kind_v != g->ops.mm.get_kind_invalid()); + bfr->use_uc_kind_v = (bfr->uc_kind_v != g->ops.mm.get_kind_invalid()); + + return 0; +} + +int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, + struct dma_buf **dmabuf, + u64 *offset) +{ + struct nvgpu_mapped_buf *mapped_buffer; + + gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); + if (!mapped_buffer) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return -EINVAL; + } + + *dmabuf = mapped_buffer->dmabuf; + *offset = gpu_va - mapped_buffer->addr; + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + return 0; +} + /* * vm->update_gmmu_lock must be held. This checks to see if we already have * mapped the passed buffer into this VM. If so, just return the existing @@ -478,6 +580,67 @@ clean_up: return 0; } +int nvgpu_vm_map_buffer(struct vm_gk20a *vm, + int dmabuf_fd, + u64 *offset_align, + u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ + s16 compr_kind, + s16 incompr_kind, + u64 buffer_offset, + u64 mapping_size, + struct vm_gk20a_mapping_batch *batch) +{ + int err = 0; + struct dma_buf *dmabuf; + u64 ret_va; + + gk20a_dbg_fn(""); + + /* get ref to the mem handle (released on unmap_locked) */ + dmabuf = dma_buf_get(dmabuf_fd); + if (IS_ERR(dmabuf)) { + nvgpu_warn(gk20a_from_vm(vm), "%s: fd %d is not a dmabuf", + __func__, dmabuf_fd); + return PTR_ERR(dmabuf); + } + + /* verify that we're not overflowing the buffer, i.e. + * (buffer_offset + mapping_size)> dmabuf->size. + * + * Since buffer_offset + mapping_size could overflow, first check + * that mapping size < dmabuf_size, at which point we can subtract + * mapping_size from both sides for the final comparison. + */ + if ((mapping_size > dmabuf->size) || + (buffer_offset > (dmabuf->size - mapping_size))) { + nvgpu_err(gk20a_from_vm(vm), + "buf size %llx < (offset(%llx) + map_size(%llx))\n", + (u64)dmabuf->size, buffer_offset, mapping_size); + return -EINVAL; + } + + err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); + if (err) { + dma_buf_put(dmabuf); + return err; + } + + ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, + flags, compr_kind, incompr_kind, true, + gk20a_mem_flag_none, + buffer_offset, + mapping_size, + batch); + + *offset_align = ret_va; + if (!ret_va) { + dma_buf_put(dmabuf); + err = -EINVAL; + } + + return err; +} + void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) { struct gk20a *g = vm->mm->g; @@ -491,6 +654,43 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) return; } - nvgpu_ref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_ref); + nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref); nvgpu_mutex_release(&vm->update_gmmu_lock); } + +/* NOTE! mapped_buffers lock must be held */ +void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, + struct vm_gk20a_mapping_batch *batch) +{ + struct vm_gk20a *vm = mapped_buffer->vm; + struct gk20a *g = vm->mm->g; + + g->ops.mm.gmmu_unmap(vm, + mapped_buffer->addr, + mapped_buffer->size, + mapped_buffer->pgsz_idx, + mapped_buffer->va_allocated, + gk20a_mem_flag_none, + mapped_buffer->vm_area ? + mapped_buffer->vm_area->sparse : false, + batch); + + gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf, + mapped_buffer->sgt); + + /* remove from mapped buffer tree and remove list, free */ + nvgpu_remove_mapped_buf(vm, mapped_buffer); + if (!nvgpu_list_empty(&mapped_buffer->buffer_list)) + nvgpu_list_del(&mapped_buffer->buffer_list); + + /* keep track of mapped buffers */ + if (mapped_buffer->user_mapped) + vm->num_user_mapped_buffers--; + + if (mapped_buffer->own_mem_ref) + dma_buf_put(mapped_buffer->dmabuf); + + nvgpu_kfree(g, mapped_buffer); + + return; +} diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h index fa173d59..be7efa8b 100644 --- a/drivers/gpu/nvgpu/common/linux/vm_priv.h +++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h @@ -88,9 +88,9 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset); /* find buffer corresponding to va */ -int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, - struct dma_buf **dmabuf, - u64 *offset); +int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, + struct dma_buf **dmabuf, + u64 *offset); enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, struct dma_buf *dmabuf); @@ -98,9 +98,5 @@ int validate_fixed_buffer(struct vm_gk20a *vm, struct buffer_attrs *bfr, u64 map_offset, u64 map_size, struct nvgpu_vm_area **pva_node); -int setup_buffer_kind_and_compression(struct vm_gk20a *vm, - u32 flags, - struct buffer_attrs *bfr, - enum gmmu_pgsz_gk20a pgsz_idx); #endif diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index f8d58349..9f04ee01 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -641,3 +641,137 @@ struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( return mapped_buffer_from_rbtree_node(node); } + +int nvgpu_vm_get_buffers(struct vm_gk20a *vm, + struct nvgpu_mapped_buf ***mapped_buffers, + int *num_buffers) +{ + struct nvgpu_mapped_buf *mapped_buffer; + struct nvgpu_mapped_buf **buffer_list; + struct nvgpu_rbtree_node *node = NULL; + int i = 0; + + if (vm->userspace_managed) { + *mapped_buffers = NULL; + *num_buffers = 0; + return 0; + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) * + vm->num_user_mapped_buffers); + if (!buffer_list) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return -ENOMEM; + } + + nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); + while (node) { + mapped_buffer = mapped_buffer_from_rbtree_node(node); + if (mapped_buffer->user_mapped) { + buffer_list[i] = mapped_buffer; + nvgpu_ref_get(&mapped_buffer->ref); + i++; + } + nvgpu_rbtree_enum_next(&node, node); + } + + BUG_ON(i != vm->num_user_mapped_buffers); + + *num_buffers = vm->num_user_mapped_buffers; + *mapped_buffers = buffer_list; + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + return 0; +} + +void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref) +{ + struct nvgpu_mapped_buf *mapped_buffer = + container_of(ref, struct nvgpu_mapped_buf, ref); + nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); +} + +void nvgpu_vm_put_buffers(struct vm_gk20a *vm, + struct nvgpu_mapped_buf **mapped_buffers, + int num_buffers) +{ + int i; + struct vm_gk20a_mapping_batch batch; + + if (num_buffers == 0) + return; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + nvgpu_vm_mapping_batch_start(&batch); + vm->kref_put_batch = &batch; + + for (i = 0; i < num_buffers; ++i) + nvgpu_ref_put(&mapped_buffers[i]->ref, + nvgpu_vm_unmap_locked_ref); + + vm->kref_put_batch = NULL; + nvgpu_vm_mapping_batch_finish_locked(vm, &batch); + nvgpu_mutex_release(&vm->update_gmmu_lock); + + nvgpu_big_free(vm->mm->g, mapped_buffers); +} + +static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset, + struct vm_gk20a_mapping_batch *batch) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_mapped_buf *mapped_buffer; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); + if (!mapped_buffer) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); + return; + } + + if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + struct nvgpu_timeout timeout; + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + nvgpu_timeout_init(vm->mm->g, &timeout, 10000, + NVGPU_TIMER_RETRY_TIMER); + do { + if (nvgpu_atomic_read( + &mapped_buffer->ref.refcount) == 1) + break; + nvgpu_udelay(5); + } while (!nvgpu_timeout_expired_msg(&timeout, + "sync-unmap failed on 0x%llx")); + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + } + + if (mapped_buffer->user_mapped == 0) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + nvgpu_err(g, "addr already unmapped from user 0x%llx", offset); + return; + } + + mapped_buffer->user_mapped--; + if (mapped_buffer->user_mapped == 0) + vm->num_user_mapped_buffers--; + + vm->kref_put_batch = batch; + nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref); + vm->kref_put_batch = NULL; + + nvgpu_mutex_release(&vm->update_gmmu_lock); +} + +int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, + struct vm_gk20a_mapping_batch *batch) +{ + nvgpu_vm_unmap_user(vm, offset, batch); + return 0; +} diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c index de1623bc..88758b85 100644 --- a/drivers/gpu/nvgpu/common/mm/vm_area.c +++ b/drivers/gpu/nvgpu/common/mm/vm_area.c @@ -208,7 +208,7 @@ int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr) &vm_area->buffer_list_head, nvgpu_mapped_buf, buffer_list) { nvgpu_list_del(&buffer->buffer_list); - nvgpu_ref_put(&buffer->ref, gk20a_vm_unmap_locked_ref); + nvgpu_ref_put(&buffer->ref, nvgpu_vm_unmap_locked_ref); } /* if this was a sparse mapping, free the va */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 9d272646..b55f4517 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -759,6 +759,8 @@ struct gpu_ops { struct vm_gk20a *vm, u32 big_page_size); bool (*mmu_fault_pending)(struct gk20a *g); void (*fault_info_mem_destroy)(struct gk20a *g); + u32 (*get_kind_invalid)(void); + u32 (*get_kind_pitch)(void); } mm; /* * This function is called to allocate secure memory (memory diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 69d9e983..687951a9 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -22,11 +22,6 @@ * DEALINGS IN THE SOFTWARE. */ -#include -#include -#include -#include -#include #include #include @@ -46,8 +41,6 @@ #include #include -#include - #include "gk20a.h" #include "platform_gk20a.h" #include "mm_gk20a.h" @@ -64,13 +57,6 @@ #include #include -/* - * Necessary while transitioning to less coupled code. Will be removed once - * all the common APIs no longers have Linux stuff in them. - */ -#include "common/linux/vm_priv.h" -#include "common/linux/dmabuf.h" - /* * GPU mapping life cycle * ====================== @@ -330,209 +316,6 @@ int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) return vm->mmu_levels[0].lo_bit[0]; } -int nvgpu_vm_get_buffers(struct vm_gk20a *vm, - struct nvgpu_mapped_buf ***mapped_buffers, - int *num_buffers) -{ - struct nvgpu_mapped_buf *mapped_buffer; - struct nvgpu_mapped_buf **buffer_list; - struct nvgpu_rbtree_node *node = NULL; - int i = 0; - - if (vm->userspace_managed) { - *mapped_buffers = NULL; - *num_buffers = 0; - return 0; - } - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - - buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) * - vm->num_user_mapped_buffers); - if (!buffer_list) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - return -ENOMEM; - } - - nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); - while (node) { - mapped_buffer = mapped_buffer_from_rbtree_node(node); - if (mapped_buffer->user_mapped) { - buffer_list[i] = mapped_buffer; - nvgpu_ref_get(&mapped_buffer->ref); - i++; - } - nvgpu_rbtree_enum_next(&node, node); - } - - BUG_ON(i != vm->num_user_mapped_buffers); - - *num_buffers = vm->num_user_mapped_buffers; - *mapped_buffers = buffer_list; - - nvgpu_mutex_release(&vm->update_gmmu_lock); - - return 0; -} - -void gk20a_vm_unmap_locked_ref(struct nvgpu_ref *ref) -{ - struct nvgpu_mapped_buf *mapped_buffer = - container_of(ref, struct nvgpu_mapped_buf, ref); - nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); -} - -void nvgpu_vm_put_buffers(struct vm_gk20a *vm, - struct nvgpu_mapped_buf **mapped_buffers, - int num_buffers) -{ - int i; - struct vm_gk20a_mapping_batch batch; - - if (num_buffers == 0) - return; - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - nvgpu_vm_mapping_batch_start(&batch); - vm->kref_put_batch = &batch; - - for (i = 0; i < num_buffers; ++i) - nvgpu_ref_put(&mapped_buffers[i]->ref, - gk20a_vm_unmap_locked_ref); - - vm->kref_put_batch = NULL; - nvgpu_vm_mapping_batch_finish_locked(vm, &batch); - nvgpu_mutex_release(&vm->update_gmmu_lock); - - nvgpu_big_free(vm->mm->g, mapped_buffers); -} - -static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset, - struct vm_gk20a_mapping_batch *batch) -{ - struct gk20a *g = vm->mm->g; - struct nvgpu_mapped_buf *mapped_buffer; - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - - mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); - if (!mapped_buffer) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); - return; - } - - if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { - struct nvgpu_timeout timeout; - - nvgpu_mutex_release(&vm->update_gmmu_lock); - - nvgpu_timeout_init(vm->mm->g, &timeout, 10000, - NVGPU_TIMER_RETRY_TIMER); - do { - if (nvgpu_atomic_read( - &mapped_buffer->ref.refcount) == 1) - break; - nvgpu_udelay(5); - } while (!nvgpu_timeout_expired_msg(&timeout, - "sync-unmap failed on 0x%llx")); - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - } - - if (mapped_buffer->user_mapped == 0) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - nvgpu_err(g, "addr already unmapped from user 0x%llx", offset); - return; - } - - mapped_buffer->user_mapped--; - if (mapped_buffer->user_mapped == 0) - vm->num_user_mapped_buffers--; - - vm->kref_put_batch = batch; - nvgpu_ref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_ref); - vm->kref_put_batch = NULL; - - nvgpu_mutex_release(&vm->update_gmmu_lock); -} - -static int setup_kind_legacy(struct vm_gk20a *vm, struct buffer_attrs *bfr, - bool *pkind_compressible) -{ - struct gk20a *g = gk20a_from_vm(vm); - bool kind_compressible; - - if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) - bfr->kind_v = gmmu_pte_kind_pitch_v(); - - if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) { - nvgpu_err(g, "kind 0x%x not supported", bfr->kind_v); - return -EINVAL; - } - - bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); - /* find a suitable incompressible kind if it becomes necessary later */ - kind_compressible = gk20a_kind_is_compressible(bfr->kind_v); - if (kind_compressible) { - bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v); - if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) { - /* shouldn't happen, but it is worth cross-checking */ - nvgpu_err(g, "comptag kind 0x%x can't be" - " downgraded to uncompressed kind", - bfr->kind_v); - return -EINVAL; - } - } - - *pkind_compressible = kind_compressible; - return 0; -} - -int setup_buffer_kind_and_compression(struct vm_gk20a *vm, - u32 flags, - struct buffer_attrs *bfr, - enum gmmu_pgsz_gk20a pgsz_idx) -{ - bool kind_compressible; - struct gk20a *g = gk20a_from_vm(vm); - int ctag_granularity = g->ops.fb.compression_page_size(g); - - if (!bfr->use_kind_v) - bfr->kind_v = gmmu_pte_kind_invalid_v(); - if (!bfr->use_uc_kind_v) - bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); - - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { - kind_compressible = (bfr->kind_v != gmmu_pte_kind_invalid_v()); - if (!kind_compressible) - bfr->kind_v = bfr->uc_kind_v; - } else { - int err = setup_kind_legacy(vm, bfr, &kind_compressible); - - if (err) - return err; - } - - /* comptags only supported for suitable kinds, 128KB pagesize */ - if (kind_compressible && - vm->gmmu_page_sizes[pgsz_idx] < g->ops.fb.compressible_page_size(g)) { - /* it is safe to fall back to uncompressed as - functionality is not harmed */ - bfr->kind_v = bfr->uc_kind_v; - kind_compressible = false; - } - if (kind_compressible) - bfr->ctag_lines = DIV_ROUND_UP_ULL(bfr->size, ctag_granularity); - else - bfr->ctag_lines = 0; - - bfr->use_kind_v = (bfr->kind_v != gmmu_pte_kind_invalid_v()); - bfr->use_uc_kind_v = (bfr->uc_kind_v != gmmu_pte_kind_invalid_v()); - - return 0; -} - /* for gk20a the "video memory" apertures here are misnomers. */ static inline u32 big_valid_pde0_bits(struct gk20a *g, struct nvgpu_gmmu_pd *pd, u64 addr) @@ -698,43 +481,6 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, pd_write(g, pd, pd_offset + 1, pte_w[1]); } -/* NOTE! mapped_buffers lock must be held */ -void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, - struct vm_gk20a_mapping_batch *batch) -{ - struct vm_gk20a *vm = mapped_buffer->vm; - struct gk20a *g = vm->mm->g; - - g->ops.mm.gmmu_unmap(vm, - mapped_buffer->addr, - mapped_buffer->size, - mapped_buffer->pgsz_idx, - mapped_buffer->va_allocated, - gk20a_mem_flag_none, - mapped_buffer->vm_area ? - mapped_buffer->vm_area->sparse : false, - batch); - - gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf, - mapped_buffer->sgt); - - /* remove from mapped buffer tree and remove list, free */ - nvgpu_remove_mapped_buf(vm, mapped_buffer); - if (!nvgpu_list_empty(&mapped_buffer->buffer_list)) - nvgpu_list_del(&mapped_buffer->buffer_list); - - /* keep track of mapped buffers */ - if (mapped_buffer->user_mapped) - vm->num_user_mapped_buffers--; - - if (mapped_buffer->own_mem_ref) - dma_buf_put(mapped_buffer->dmabuf); - - nvgpu_kfree(g, mapped_buffer); - - return; -} - const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, .lo_bit = {26, 26}, @@ -853,76 +599,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, return __gk20a_vm_bind_channel(as_share->vm, ch); } -int nvgpu_vm_map_buffer(struct vm_gk20a *vm, - int dmabuf_fd, - u64 *offset_align, - u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ - s16 compr_kind, - s16 incompr_kind, - u64 buffer_offset, - u64 mapping_size, - struct vm_gk20a_mapping_batch *batch) -{ - int err = 0; - struct dma_buf *dmabuf; - u64 ret_va; - - gk20a_dbg_fn(""); - - /* get ref to the mem handle (released on unmap_locked) */ - dmabuf = dma_buf_get(dmabuf_fd); - if (IS_ERR(dmabuf)) { - nvgpu_warn(gk20a_from_vm(vm), "%s: fd %d is not a dmabuf", - __func__, dmabuf_fd); - return PTR_ERR(dmabuf); - } - - /* verify that we're not overflowing the buffer, i.e. - * (buffer_offset + mapping_size)> dmabuf->size. - * - * Since buffer_offset + mapping_size could overflow, first check - * that mapping size < dmabuf_size, at which point we can subtract - * mapping_size from both sides for the final comparison. - */ - if ((mapping_size > dmabuf->size) || - (buffer_offset > (dmabuf->size - mapping_size))) { - nvgpu_err(gk20a_from_vm(vm), - "buf size %llx < (offset(%llx) + map_size(%llx))\n", - (u64)dmabuf->size, buffer_offset, mapping_size); - return -EINVAL; - } - - err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); - if (err) { - dma_buf_put(dmabuf); - return err; - } - - ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, - flags, compr_kind, incompr_kind, true, - gk20a_mem_flag_none, - buffer_offset, - mapping_size, - batch); - - *offset_align = ret_va; - if (!ret_va) { - dma_buf_put(dmabuf); - err = -EINVAL; - } - - return err; -} - -int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, - struct vm_gk20a_mapping_batch *batch) -{ - gk20a_dbg_fn(""); - - nvgpu_vm_unmap_user(vm, offset, batch); - return 0; -} - int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) { int err; @@ -1298,30 +974,6 @@ hw_was_off: gk20a_idle_nosuspend(g); } -int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, - struct dma_buf **dmabuf, - u64 *offset) -{ - struct nvgpu_mapped_buf *mapped_buffer; - - gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - - mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); - if (!mapped_buffer) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - return -EINVAL; - } - - *dmabuf = mapped_buffer->dmabuf; - *offset = gpu_va - mapped_buffer->addr; - - nvgpu_mutex_release(&vm->update_gmmu_lock); - - return 0; -} - int gk20a_mm_suspend(struct gk20a *g) { gk20a_dbg_fn(""); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 04034d84..556cb234 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -386,6 +386,5 @@ extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd, u64 *buffer_id, u64 *buffer_len); -void gk20a_vm_unmap_locked_ref(struct nvgpu_ref *ref); #endif /* MM_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 50cc759a..e146d3ca 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -443,6 +443,8 @@ static const struct gpu_ops gm20b_ops = { .is_bar1_supported = gm20b_mm_is_bar1_supported, .init_inst_block = gk20a_init_inst_block, .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, + .get_kind_invalid = gm20b_get_kind_invalid, + .get_kind_pitch = gm20b_get_kind_pitch, }, .therm = { .init_therm_setup_hw = gm20b_init_therm_setup_hw, diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index dc332075..5cd7706d 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -74,3 +74,13 @@ u64 gm20b_gpu_phys_addr(struct gk20a *g, { return phys; } + +u32 gm20b_get_kind_invalid(void) +{ + return gmmu_pte_kind_invalid_v(); +} + +u32 gm20b_get_kind_pitch(void) +{ + return gmmu_pte_kind_pitch_v(); +} diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h index 602e18dc..af67845a 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h @@ -38,4 +38,6 @@ bool gm20b_mm_is_bar1_supported(struct gk20a *g); int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g); u64 gm20b_gpu_phys_addr(struct gk20a *g, struct nvgpu_gmmu_attrs *attrs, u64 phys); +u32 gm20b_get_kind_invalid(void); +u32 gm20b_get_kind_pitch(void); #endif diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 204c376b..54960b96 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -529,6 +529,8 @@ static const struct gpu_ops gp106_ops = { .init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup, .remove_bar2_vm = gp10b_remove_bar2_vm, .get_vidmem_size = gp106_mm_get_vidmem_size, + .get_kind_invalid = gm20b_get_kind_invalid, + .get_kind_pitch = gm20b_get_kind_pitch, }, .pramin = { .enter = gk20a_pramin_enter, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 90e8137b..b40ce4a1 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -488,6 +488,8 @@ static const struct gpu_ops gp10b_ops = { .init_bar2_vm = gb10b_init_bar2_vm, .init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup, .remove_bar2_vm = gp10b_remove_bar2_vm, + .get_kind_invalid = gm20b_get_kind_invalid, + .get_kind_pitch = gm20b_get_kind_pitch, }, .pramin = { .enter = gk20a_pramin_enter, diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index f8868065..8c56461c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -31,9 +31,10 @@ #include #include +struct dma_buf; + struct vm_gk20a; struct nvgpu_vm_area; -struct buffer_attrs; struct gk20a_comptag_allocator; /** @@ -212,6 +213,7 @@ int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, struct vm_gk20a_mapping_batch *batch); +void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref); /* * These all require the VM update lock to be held. @@ -223,9 +225,6 @@ struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range( struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( struct vm_gk20a *vm, u64 addr); -int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, - struct dma_buf **dmabuf, - u64 *offset); int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, struct nvgpu_mapped_buf *mapped_buffer); void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c index fc9b0e83..38d07ee2 100644 --- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c +++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c @@ -342,6 +342,8 @@ static const struct gpu_ops vgpu_gm20b_ops = { .is_bar1_supported = gm20b_mm_is_bar1_supported, .init_inst_block = gk20a_init_inst_block, .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, + .get_kind_invalid = gm20b_get_kind_invalid, + .get_kind_pitch = gm20b_get_kind_pitch, }, .therm = { .init_therm_setup_hw = gm20b_init_therm_setup_hw, diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 2c59d847..7f18bfe4 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -392,6 +392,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .init_bar2_vm = gb10b_init_bar2_vm, .init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup, .remove_bar2_vm = gp10b_remove_bar2_vm, + .get_kind_invalid = gm20b_get_kind_invalid, + .get_kind_pitch = gm20b_get_kind_pitch, }, .pramin = { .enter = gk20a_pramin_enter, -- cgit v1.2.2