From e26ce10cc6b59314ccf5931a8c5b46a9e57b085a Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Thu, 5 Oct 2017 17:22:41 -0700 Subject: gpu: nvgpu: Convert VIDMEM work_struct to thread Convert the work_struct used by the vidmem background clearing to a thread to make it more cross platform. The thread waits on a condition variable to determine when work needs to be done. The signal comes from the DMA API when it enqueues a new nvgpu_mem that needs clearing. Add logic for handling suspend: the CE cannot be accessed while the GPU is suspended. As such the background thread must be paused while the GPU is suspended and the CE is not available. Several other changes were also made: o Move the code that enqueues a nvgpu_mem from the DMA API code to a function in the VIDMEM code. o Move nvgpu_vidmem_get_pending_alloc() to the Linux specific code as this function is only used there. It's a trivial function that QNX can easily implement as well. o Remove the was_empty logic from the enqueue. Now just always signal the condition variable when anew nvgpu_mem comes in. o Move CE suspend to after MM suspend. JIRA NVGPU-30 JIRA NVGPU-138 Change-Id: Ie9286ae5a127c3fced86dfb9794e7d81eab0491c Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1574498 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/common/linux/dma.c | 26 ++-- drivers/gpu/nvgpu/common/linux/vidmem.c | 43 +++--- drivers/gpu/nvgpu/common/mm/vidmem.c | 215 +++++++++++++++++++++++++--- drivers/gpu/nvgpu/gk20a/gk20a.c | 7 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 8 +- drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 10 ++ drivers/gpu/nvgpu/include/nvgpu/vidmem.h | 21 +-- 8 files changed, 267 insertions(+), 67 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index b62c4593..9e9d1007 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c @@ -514,7 +514,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) { #if defined(CONFIG_GK20A_VIDMEM) - bool was_empty; size_t mem_size = mem->size; dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); @@ -523,18 +522,19 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { - nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); - was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); - nvgpu_list_add_tail(&mem->clear_list_entry, - &g->mm.vidmem.clear_list_head); - atomic64_add(mem->aligned_size, - &g->mm.vidmem.bytes_pending.atomic_var); - nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); - - if (was_empty) { - cancel_work_sync(&g->mm.vidmem.clear_mem_worker); - schedule_work(&g->mm.vidmem.clear_mem_worker); - } + int err = nvgpu_vidmem_clear_list_enqueue(g, mem); + + /* + * If there's an error here then that means we can't clear the + * vidmem. That's too bad; however, we still own the nvgpu_mem + * buf so we have to free that. + * + * We don't need to worry about the vidmem allocator itself + * since when that gets cleaned up in the driver shutdown path + * all the outstanding allocs are force freed. + */ + if (err) + nvgpu_kfree(g, mem); } else { nvgpu_memset(g, mem, 0, 0, mem->aligned_size); nvgpu_free(mem->allocator, diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c index ea8e552f..92e7e504 100644 --- a/drivers/gpu/nvgpu/common/linux/vidmem.c +++ b/drivers/gpu/nvgpu/common/linux/vidmem.c @@ -84,6 +84,8 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf) nvgpu_kfree(g, linux_buf); nvgpu_vidmem_buf_free(g, buf); + + gk20a_put(g); } static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) @@ -160,13 +162,21 @@ struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) { - struct nvgpu_vidmem_buf *buf; + struct nvgpu_vidmem_buf *buf = NULL; struct nvgpu_vidmem_linux *priv; int err, fd; + /* + * This ref is released when the dma_buf is closed. + */ + if (!gk20a_get(g)) + return -ENODEV; + priv = nvgpu_kzalloc(g, sizeof(*priv)); - if (!priv) - return -ENOMEM; + if (!priv) { + err = -ENOMEM; + goto fail; + } buf = nvgpu_vidmem_user_alloc(g, bytes); if (!buf) { @@ -195,8 +205,10 @@ int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) return fd; fail: - nvgpu_kfree(g, priv); nvgpu_vidmem_buf_free(g, buf); + nvgpu_kfree(g, priv); + gk20a_put(g); + return err; } @@ -229,24 +241,9 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, return err; } -void nvgpu_vidmem_clear_mem_worker(struct work_struct *work) +void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) { - struct mm_gk20a *mm = container_of(work, struct mm_gk20a, - vidmem.clear_mem_worker); - struct gk20a *g = mm->g; - struct nvgpu_mem *mem; - - while ((mem = nvgpu_vidmem_get_pending_alloc(mm)) != NULL) { - nvgpu_vidmem_clear(g, mem); - nvgpu_free(mem->allocator, - (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); - nvgpu_free_sgtable(g, &mem->priv.sgt); - - WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, - &g->mm.vidmem.bytes_pending) < 0); - mem->size = 0; - mem->aperture = APERTURE_INVALID; - - nvgpu_kfree(g, mem); - } + nvgpu_free(vidmem->allocator, + (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); + nvgpu_free_sgtable(g, &vidmem->priv.sgt); } diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c index d1c5a2e8..60b819d7 100644 --- a/drivers/gpu/nvgpu/common/mm/vidmem.c +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c @@ -22,15 +22,55 @@ #include +#include #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" +/* + * This is expected to be called from the shutdown path (or the error path in + * the vidmem init code). As such we do not expect new vidmem frees to be + * enqueued. + */ void nvgpu_vidmem_destroy(struct gk20a *g) { + struct nvgpu_timeout timeout; + + nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER); + + /* + * Ensure that the thread runs one last time to flush anything in the + * queue. + */ + nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond); + + /* + * Wait for at most 1 second before just continuing on. It doesn't make + * sense to hang the system over some potential memory leaks. + */ + do { + bool empty; + + nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); + empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); + nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); + + if (empty) + break; + + nvgpu_msleep(10); + } while (!nvgpu_timeout_expired(&timeout)); + + /* + * Kill the vidmem clearing thread now. This will wake the thread up + * automatically and cause the wait_interruptible condition trigger. + */ + nvgpu_thread_stop(&g->mm.vidmem.clearing_thread); + if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) nvgpu_alloc_destroy(&g->mm.vidmem.allocator); } @@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g) return 0; } +void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) +{ + /* + * On the first increment of the pause_count (0 -> 1) take the pause + * lock and prevent the vidmem clearing thread from processing work + * items. + * + * Otherwise the increment is all that's needed - it's essentially a + * ref-count for the number of pause() calls. + * + * The sync component is implemented by waiting for the lock to be + * released by the clearing thread in case the thread is currently + * processing work items. + */ + if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1) + nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock); +} + +void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) +{ + /* + * And on the last decrement (1 -> 0) release the pause lock and let + * the vidmem clearing thread continue. + */ + if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) + nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); +} + +int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem) +{ + struct mm_gk20a *mm = &g->mm; + + /* + * Crap. Can't enqueue new vidmem bufs! CE may be gone! + * + * However, an errant app can hold a vidmem dma_buf FD open past when + * the nvgpu driver has exited. Thus when the FD does get closed + * eventually the dma_buf release function will try to call the vidmem + * free function which will attempt to enqueue the vidmem into the + * vidmem clearing thread. + */ + if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) + return -ENOSYS; + + nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); + nvgpu_list_add_tail(&mem->clear_list_entry, + &mm->vidmem.clear_list_head); + nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending); + nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); + + nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond); + + return 0; +} + +static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm) +{ + struct nvgpu_mem *mem = NULL; + + nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); + if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { + mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, + nvgpu_mem, clear_list_entry); + nvgpu_list_del(&mem->clear_list_entry); + } + nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); + + return mem; +} + +static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm) +{ + struct gk20a *g = mm->g; + struct nvgpu_mem *mem; + + while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) { + nvgpu_vidmem_clear(g, mem); + + WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, + &g->mm.vidmem.bytes_pending) < 0); + mem->size = 0; + mem->aperture = APERTURE_INVALID; + + __nvgpu_mem_free_vidmem_alloc(g, mem); + nvgpu_kfree(g, mem); + } +} + +static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr) +{ + struct mm_gk20a *mm = mm_ptr; + + /* + * Simple thread who's sole job is to periodically clear userspace + * vidmem allocations that have been recently freed. + * + * Since it doesn't make sense to run unless there's pending work a + * condition field is used to wait for work. When the DMA API frees a + * userspace vidmem buf it enqueues it into the clear list and alerts us + * that we have some work to do. + */ + + while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) { + int ret; + + /* + * Wait for work but also make sure we should not be paused. + */ + ret = NVGPU_COND_WAIT_INTERRUPTIBLE( + &mm->vidmem.clearing_thread_cond, + nvgpu_thread_should_stop( + &mm->vidmem.clearing_thread) || + !nvgpu_list_empty(&mm->vidmem.clear_list_head), + 0); + if (ret == -ERESTARTSYS) + continue; + + /* + * Use this lock to implement a pause mechanism. By taking this + * lock some other code can prevent this thread from processing + * work items. + */ + if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock)) + continue; + + nvgpu_vidmem_clear_pending_allocs(mm); + + nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); + } + + return 0; +} + int nvgpu_vidmem_init(struct mm_gk20a *mm) { struct gk20a *g = mm->g; @@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm) mm->vidmem.bootstrap_base = bootstrap_base; mm->vidmem.bootstrap_size = bootstrap_size; - nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); + err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond); + if (err) + goto fail; - INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker); nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); nvgpu_init_list_node(&mm->vidmem.clear_list_head); nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); + nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock); + nvgpu_atomic_set(&mm->vidmem.pause_count, 0); + + /* + * Start the thread off in the paused state. The thread doesn't have to + * be running for this to work. It will be woken up later on in + * finalize_poweron(). We won't necessarily have a CE context yet + * either, so hypothetically one could cause a race where we try to + * clear a vidmem struct before we have a CE context to do so. + */ + nvgpu_vidmem_thread_pause_sync(mm); + + err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm, + nvgpu_vidmem_clear_pending_allocs_thr, + "vidmem-clear"); + if (err) + goto fail; gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); return 0; + +fail: + nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond); + nvgpu_vidmem_destroy(g); + return err; } int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) @@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem) return err; } -struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm) -{ - struct nvgpu_mem *mem = NULL; - - nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); - if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { - mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, - nvgpu_mem, clear_list_entry); - nvgpu_list_del(&mem->clear_list_entry); - } - nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); - - return mem; -} - static int nvgpu_vidmem_clear_all(struct gk20a *g) { int err; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index e1bf2b4b..02baf683 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -97,8 +98,6 @@ int gk20a_prepare_poweroff(struct gk20a *g) if (gk20a_fifo_is_engine_busy(g)) return -EBUSY; - gk20a_ce_suspend(g); - ret = gk20a_channel_suspend(g); if (ret) return ret; @@ -111,6 +110,8 @@ int gk20a_prepare_poweroff(struct gk20a *g) ret |= gk20a_mm_suspend(g); ret |= gk20a_fifo_suspend(g); + gk20a_ce_suspend(g); + /* Disable GPCPLL */ if (g->ops.clk.suspend_clk_support) ret |= g->ops.clk.suspend_clk_support(g); @@ -323,6 +324,8 @@ int gk20a_finalize_poweron(struct gk20a *g) } } + nvgpu_vidmem_thread_unpause(&g->mm); + #if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) if (gk20a_platform_has_syncpoints(g) && g->syncpt_unit_size) { if (!nvgpu_mem_is_valid(&g->syncpt_mem)) { diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 687951a9..67ab307f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -978,9 +978,7 @@ int gk20a_mm_suspend(struct gk20a *g) { gk20a_dbg_fn(""); -#if defined(CONFIG_GK20A_VIDMEM) - cancel_work_sync(&g->mm.vidmem.clear_mem_worker); -#endif + nvgpu_vidmem_thread_pause_sync(&g->mm); g->ops.mm.cbc_clean(g); g->ops.mm.l2_flush(g, false); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 556cb234..13698cd7 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include struct nvgpu_pd_cache; @@ -272,7 +274,11 @@ struct mm_gk20a { struct nvgpu_list_node clear_list_head; struct nvgpu_mutex clear_list_mutex; - struct work_struct clear_mem_worker; + struct nvgpu_cond clearing_thread_cond; + struct nvgpu_thread clearing_thread; + struct nvgpu_mutex clearing_thread_lock; + nvgpu_atomic_t pause_count; + nvgpu_atomic64_t bytes_pending; } vidmem; }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 537409a8..6feacff7 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -272,6 +272,16 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, struct nvgpu_mem *dest, struct nvgpu_mem *src, int start_page, int nr_pages); +/* + * Really free a vidmem buffer. There's a fair amount of work involved in + * freeing vidmem buffers in the DMA API. This handles none of that - it only + * frees the underlying vidmem specific structures used in vidmem buffers. + * + * This is implemented in the OS specific code. If it's not necessary it can + * be a noop. But the symbol must at least be present. + */ +void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem); + /* * Buffer accessors - wrap between begin() and end() if there is no permanent * kernel mapping for this buffer. diff --git a/drivers/gpu/nvgpu/include/nvgpu/vidmem.h b/drivers/gpu/nvgpu/include/nvgpu/vidmem.h index 9e9f8301..690f8164 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vidmem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vidmem.h @@ -73,17 +73,19 @@ struct nvgpu_vidmem_buf *nvgpu_vidmem_user_alloc(struct gk20a *g, size_t bytes); void nvgpu_vidmem_buf_free(struct gk20a *g, struct nvgpu_vidmem_buf *buf); +int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem); + bool nvgpu_addr_is_vidmem_page_alloc(u64 addr); int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space); -struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm); - void nvgpu_vidmem_destroy(struct gk20a *g); int nvgpu_vidmem_init(struct mm_gk20a *mm); -void nvgpu_vidmem_clear_mem_worker(struct work_struct *work); int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem); +void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm); +void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm); + #else /* !defined(CONFIG_GK20A_VIDMEM) */ /* @@ -110,11 +112,6 @@ static inline int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) return -ENOSYS; } -static inline struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm) -{ - return NULL; -} - static inline void nvgpu_vidmem_destroy(struct gk20a *g) { } @@ -135,6 +132,14 @@ static inline int nvgpu_vidmem_clear(struct gk20a *g, return -ENOSYS; } +static inline void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) +{ +} + +static inline void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) +{ +} + #endif /* !defined(CONFIG_GK20A_VIDMEM) */ #endif /* __NVGPU_VIDMEM_H__ */ -- cgit v1.2.2