From bb2c8ef511763d9f61f7e3b4bbfa7a27b9d5c2b6 Mon Sep 17 00:00:00 2001 From: ddutta Date: Tue, 3 Mar 2020 14:49:36 +0530 Subject: gpu: nvgpu: decrease refcount when sync-unmap fails When nvgpu_vm_unmap_sync fails, nvgpu_unmap_sync currently bails out without decreasing the buffer refcount. This prevents from releasing the buffer, in case a deferred job completes after the timeout (which was observed 2 times during overnight stress tests). This also means that the fixed address is not re-useable. Throw out a warning when nvgpu_vm_unmap_sync fails, but proceed with decreasing refcount. Bug 200578193 Change-Id: Ie0cc7caa7d12ca0a3b42123a5f7a28bda72dabbc Signed-off-by: ddutta (cherry picked from commit a433f26d5bb1ec3253fc2655998b1ef7fb2847cb in dev-main) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2291352 Tested-by: Naveen Kumar S Tested-by: mobile promotions Reviewed-by: automaticguardword Reviewed-by: Bibek Basu Reviewed-by: mobile promotions --- drivers/gpu/nvgpu/common/mm/vm.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index dc928e18..4a7cc828 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -1180,6 +1180,7 @@ static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm, { struct nvgpu_timeout timeout; int ret = 0; + bool done = false; nvgpu_mutex_release(&vm->update_gmmu_lock); @@ -1189,16 +1190,18 @@ static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm, nvgpu_timeout_init(vm->mm->g, &timeout, 100, NVGPU_TIMER_CPU_TIMER); do { - if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1) { - break; - } - nvgpu_msleep(10); - } while (nvgpu_timeout_expired_msg(&timeout, + if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) <= 1) { + done = true; + } else if (nvgpu_timeout_expired_msg(&timeout, "sync-unmap failed on 0x%llx", - mapped_buffer->addr) == 0); + mapped_buffer->addr) != 0) { + done = true; + } else { + nvgpu_msleep(10); + } + } while (!done); - if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) != 1 && - nvgpu_timeout_expired(&timeout)) { + if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) > 1) { ret = -ETIMEDOUT; } @@ -1221,11 +1224,9 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset, if (mapped_buffer->flags & NVGPU_VM_MAP_FIXED_OFFSET) { if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer)) { - /* - * Looks like we have failed... Better not continue in - * case the buffer is in use. - */ - goto done; + nvgpu_warn(vm->mm->g, "%d references remaining on 0x%llx", + nvgpu_atomic_read(&mapped_buffer->ref.refcount), + mapped_buffer->addr); } } -- cgit v1.2.2