From d13c256d5ee11da1664377481543005142d9bd30 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Tue, 17 Oct 2017 10:55:00 -0700 Subject: gpu: nvgpu: VM unmap refactoring Re-organize the unmap code to be better split between OS specific requirements and common core requirements. The new code flow works as follows: nvgpu_vm_unmap() Is the primary entrance to the unmap path. It takes a VM and a GPU virtual address to unmap. There's also an optional batch mapping struct. This function is responsible for making sure there is a real buffer and that if it's being called on a fixed mapping then the mapping will definitely be freed (since buffers are ref-counted). Then this function decrements the ref-count and returns. If the ref-count hits zero then __nvgpu_vm_unmap_ref() is called which just calls __nvgpu_vm_unmap() with the relevant batch struct if present. This is where the real work is done. __nvgpu_vm_unmap() clears the GMMU mapping, removes the mapped buffer from the various lists and trees it may be in and then calls the nvgpu_vm_unmap_system() function. This function handles any OS specific stuff and must be defined by all VM OS implementations. There's a a short cut used by some other core VM code to free mappings without going through nvgpu_vm_map(). Mostly they just directly decrement the mapping ref-count which can then call __nvgpu_vm_unmap_ref() if the ref-count hits zero. JIRA NVGPU-30 JIRA NVGPU-71 Change-Id: Ic626d37ab936819841bab45214f027b40ffa4e5a Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1583982 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/cde.c | 4 +- drivers/gpu/nvgpu/common/linux/ioctl_as.c | 10 +-- drivers/gpu/nvgpu/common/linux/ioctl_dbg.c | 4 +- drivers/gpu/nvgpu/common/linux/vm.c | 84 ++------------------ drivers/gpu/nvgpu/common/mm/vm.c | 118 ++++++++++++++++++++++++++--- drivers/gpu/nvgpu/common/mm/vm_area.c | 2 +- drivers/gpu/nvgpu/include/nvgpu/linux/vm.h | 4 - drivers/gpu/nvgpu/include/nvgpu/vm.h | 16 +++- 8 files changed, 136 insertions(+), 106 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c index 65c50726..2f2e886f 100644 --- a/drivers/gpu/nvgpu/common/linux/cde.c +++ b/drivers/gpu/nvgpu/common/linux/cde.c @@ -1167,7 +1167,7 @@ __releases(&l->cde_app->mutex) cde_ctx->init_cmd_executed = true; /* unmap the buffers - channel holds references to them now */ - nvgpu_vm_unmap(cde_ctx->vm, map_vaddr); + nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); return err; @@ -1175,7 +1175,7 @@ exit_unmap_surface: if (surface) dma_buf_vunmap(compbits_scatter_buf, surface); exit_unmap_vaddr: - nvgpu_vm_unmap(cde_ctx->vm, map_vaddr); + nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); exit_idle: gk20a_idle(g); return err; diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c index 08064370..18d0dd07 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c @@ -118,7 +118,10 @@ static int gk20a_as_ioctl_unmap_buffer( struct nvgpu_as_unmap_buffer_args *args) { gk20a_dbg_fn(""); - return nvgpu_vm_unmap_buffer(as_share->vm, args->offset, NULL); + + nvgpu_vm_unmap(as_share->vm, args->offset, NULL); + + return 0; } static int gk20a_as_ioctl_map_buffer_batch( @@ -155,10 +158,7 @@ static int gk20a_as_ioctl_map_buffer_batch( break; } - err = nvgpu_vm_unmap_buffer(as_share->vm, unmap_args.offset, - &batch); - if (err) - break; + nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); } if (err) { diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c index 403d9261..24bf813a 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c @@ -1142,7 +1142,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, return 0; err_unmap: - nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL); + nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL); err_remove_vm: nvgpu_vm_put(mm->perfbuf.vm); nvgpu_mutex_release(&g->dbg_sessions_lock); @@ -1386,7 +1386,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) err = g->ops.dbg_session_ops.perfbuffer_disable(g); - nvgpu_vm_unmap_buffer(vm, offset, NULL); + nvgpu_vm_unmap(vm, offset, NULL); nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); nvgpu_vm_put(vm); diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 984c2015..feb124f8 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -641,88 +641,20 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, return err; } -int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, - struct vm_gk20a_mapping_batch *batch) -{ - struct gk20a *g = vm->mm->g; - struct nvgpu_mapped_buf *mapped_buffer; - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - - mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); - if (!mapped_buffer) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); - return 0; - } - - if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { - struct nvgpu_timeout timeout; - - nvgpu_mutex_release(&vm->update_gmmu_lock); - - nvgpu_timeout_init(vm->mm->g, &timeout, 10000, - NVGPU_TIMER_RETRY_TIMER); - do { - if (nvgpu_atomic_read( - &mapped_buffer->ref.refcount) == 1) - break; - nvgpu_udelay(5); - } while (!nvgpu_timeout_expired_msg(&timeout, - "sync-unmap failed on 0x%llx")); - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - } - - if (mapped_buffer->user_mapped == 0) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - nvgpu_err(g, "addr already unmapped from user 0x%llx", offset); - return 0; - } - - mapped_buffer->user_mapped--; - if (mapped_buffer->user_mapped == 0) - vm->num_user_mapped_buffers--; - - vm->kref_put_batch = batch; - nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref); - vm->kref_put_batch = NULL; - - nvgpu_mutex_release(&vm->update_gmmu_lock); - return 0; -} - -/* NOTE! mapped_buffers lock must be held */ -void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, - struct vm_gk20a_mapping_batch *batch) +/* + * This is the function call-back for freeing OS specific components of an + * nvgpu_mapped_buf. This should most likely never be called outside of the + * core MM framework! + * + * Note: the VM lock will be held. + */ +void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) { struct vm_gk20a *vm = mapped_buffer->vm; - struct gk20a *g = vm->mm->g; - - g->ops.mm.gmmu_unmap(vm, - mapped_buffer->addr, - mapped_buffer->size, - mapped_buffer->pgsz_idx, - mapped_buffer->va_allocated, - gk20a_mem_flag_none, - mapped_buffer->vm_area ? - mapped_buffer->vm_area->sparse : false, - batch); gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf, mapped_buffer->sgt); - /* remove from mapped buffer tree and remove list, free */ - nvgpu_remove_mapped_buf(vm, mapped_buffer); - if (!nvgpu_list_empty(&mapped_buffer->buffer_list)) - nvgpu_list_del(&mapped_buffer->buffer_list); - - /* keep track of mapped buffers */ - if (mapped_buffer->user_mapped) - vm->num_user_mapped_buffers--; - if (mapped_buffer->own_mem_ref) dma_buf_put(mapped_buffer->dmabuf); - - nvgpu_kfree(g, mapped_buffer); } diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 88af6456..3d10ff48 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -36,6 +36,9 @@ #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" +static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, + struct vm_gk20a_mapping_batch *batch); + int vm_aspace_id(struct vm_gk20a *vm) { return vm->as_share ? vm->as_share->id : -1; @@ -538,7 +541,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm) nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); while (node) { mapped_buffer = mapped_buffer_from_rbtree_node(node); - nvgpu_vm_unmap_locked(mapped_buffer, NULL); + __nvgpu_vm_unmap(mapped_buffer, NULL); nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); } @@ -702,8 +705,7 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm, vm->kref_put_batch = &batch; for (i = 0; i < num_buffers; ++i) - nvgpu_ref_put(&mapped_buffers[i]->ref, - nvgpu_vm_unmap_locked_ref); + nvgpu_ref_put(&mapped_buffers[i]->ref, __nvgpu_vm_unmap_ref); vm->kref_put_batch = NULL; nvgpu_vm_mapping_batch_finish_locked(vm, &batch); @@ -712,26 +714,118 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm, nvgpu_big_free(vm->mm->g, mapped_buffers); } -void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref) +/* + * Really unmap. This does the real GMMU unmap and removes the mapping from the + * VM map tracking tree (and vm_area list if necessary). + */ +static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, + struct vm_gk20a_mapping_batch *batch) +{ + struct vm_gk20a *vm = mapped_buffer->vm; + struct gk20a *g = vm->mm->g; + + vm->num_user_mapped_buffers--; + + g->ops.mm.gmmu_unmap(vm, + mapped_buffer->addr, + mapped_buffer->size, + mapped_buffer->pgsz_idx, + mapped_buffer->va_allocated, + gk20a_mem_flag_none, + mapped_buffer->vm_area ? + mapped_buffer->vm_area->sparse : false, + batch); + + /* + * Remove from mapped buffer tree. Then delete the buffer from the + * linked list of mapped buffers; though note: not all mapped buffers + * are part of a vm_area. + */ + nvgpu_remove_mapped_buf(vm, mapped_buffer); + nvgpu_list_del(&mapped_buffer->buffer_list); + + /* + * OS specific freeing. This is after the generic freeing incase the + * generic freeing relies on some component of the OS specific + * nvgpu_mapped_buf in some abstraction or the like. + */ + nvgpu_vm_unmap_system(mapped_buffer); + + nvgpu_kfree(g, mapped_buffer); +} + +void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref) { struct nvgpu_mapped_buf *mapped_buffer = container_of(ref, struct nvgpu_mapped_buf, ref); - nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); + + __nvgpu_vm_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch); } -void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) +/* + * For fixed-offset buffers we must sync the buffer. That means we wait for the + * buffer to hit a ref-count of 1 before proceeding. + * + * Note: this requires the update_gmmu_lock to be held since we release it and + * re-aquire it in this function. + */ +static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm, + struct nvgpu_mapped_buf *mapped_buffer) +{ + struct nvgpu_timeout timeout; + int ret = 0; + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + /* + * 500ms second timer. + */ + nvgpu_timeout_init(vm->mm->g, &timeout, 50, NVGPU_TIMER_CPU_TIMER); + + do { + if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1) + break; + nvgpu_msleep(10); + } while (!nvgpu_timeout_expired_msg(&timeout, + "sync-unmap failed on 0x%llx")); + + if (nvgpu_timeout_expired(&timeout)) + ret = -ETIMEDOUT; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + return ret; +} + +void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset, + struct vm_gk20a_mapping_batch *batch) { - struct gk20a *g = vm->mm->g; struct nvgpu_mapped_buf *mapped_buffer; nvgpu_mutex_acquire(&vm->update_gmmu_lock); + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); - if (!mapped_buffer) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); - return; + if (!mapped_buffer) + goto done; + + if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer)) + /* + * Looks like we have failed... Better not continue in + * case the buffer is in use. + */ + goto done; } - nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref); + /* + * Make sure we have access to the batch if we end up calling through to + * the unmap_ref function. + */ + vm->kref_put_batch = batch; + nvgpu_ref_put(&mapped_buffer->ref, __nvgpu_vm_unmap_ref); + vm->kref_put_batch = NULL; + +done: nvgpu_mutex_release(&vm->update_gmmu_lock); + return; } diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c index 88758b85..fddec357 100644 --- a/drivers/gpu/nvgpu/common/mm/vm_area.c +++ b/drivers/gpu/nvgpu/common/mm/vm_area.c @@ -208,7 +208,7 @@ int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr) &vm_area->buffer_list_head, nvgpu_mapped_buf, buffer_list) { nvgpu_list_del(&buffer->buffer_list); - nvgpu_ref_put(&buffer->ref, nvgpu_vm_unmap_locked_ref); + nvgpu_ref_put(&buffer->ref, __nvgpu_vm_unmap_ref); } /* if this was a sparse mapping, free the va */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h index 7aacf496..3d9f9ea6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h @@ -98,10 +98,6 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, u64 mapping_size, struct vm_gk20a_mapping_batch *batch); -/* Note: batch may be NULL if unmap op is not part of a batch */ -int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, - struct vm_gk20a_mapping_batch *batch); - /* find buffer corresponding to va */ int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, struct dma_buf **dmabuf, diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index e529512b..84c7e0c7 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -207,11 +207,19 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm, struct nvgpu_mapped_buf **mapped_buffers, int num_buffers); -void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, - struct vm_gk20a_mapping_batch *batch); -void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref); +void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset, + struct vm_gk20a_mapping_batch *batch); -void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset); +/* + * Implemented by each OS. Called from within the core VM code to handle OS + * specific components of an nvgpu_mapped_buf. + */ +void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer); + +/* + * Don't use this outside of the core VM code! + */ +void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref); /* * These all require the VM update lock to be held. -- cgit v1.2.2