3 files changed, 231 insertions, 53 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index b62c4593..9e9d1007 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -514,7 +514,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
 static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
 {
 #if defined(CONFIG_GK20A_VIDMEM)
-        bool was_empty;
        size_t mem_size = mem->size;
        dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
@@ -523,18 +522,19 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
        WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
        if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
-                nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+                int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
-                was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
-                nvgpu_list_add_tail(&mem->clear_list_entry,
+                /*
-                              &g->mm.vidmem.clear_list_head);
+                 * If there's an error here then that means we can't clear the
-                atomic64_add(mem->aligned_size,
+                 * vidmem. That's too bad; however, we still own the nvgpu_mem
-                             &g->mm.vidmem.bytes_pending.atomic_var);
+                 * buf so we have to free that.
-                nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+                 *
+                 * We don't need to worry about the vidmem allocator itself
-                if (was_empty) {
+                 * since when that gets cleaned up in the driver shutdown path
-                        cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
+                 * all the outstanding allocs are force freed.
-                        schedule_work(&g->mm.vidmem.clear_mem_worker);
+                 */
-                }
+                if (err)
+                        nvgpu_kfree(g, mem);
        } else {
                nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
                nvgpu_free(mem->allocator,
diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c
index ea8e552f..92e7e504 100644
--- a/drivers/gpu/nvgpu/common/linux/vidmem.c
+++ b/drivers/gpu/nvgpu/common/linux/vidmem.c
@@ -84,6 +84,8 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
        nvgpu_kfree(g, linux_buf);
        nvgpu_vidmem_buf_free(g, buf);
+        gk20a_put(g);
 }
 static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
@@ -160,13 +162,21 @@ struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf)
 int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
 {
-        struct nvgpu_vidmem_buf *buf;
+        struct nvgpu_vidmem_buf *buf = NULL;
        struct nvgpu_vidmem_linux *priv;
        int err, fd;
+        /*
+         * This ref is released when the dma_buf is closed.
+         */
+        if (!gk20a_get(g))
+                return -ENODEV;
        priv = nvgpu_kzalloc(g, sizeof(*priv));
-        if (!priv)
+        if (!priv) {
-                return -ENOMEM;
+                err = -ENOMEM;
+                goto fail;
+        }
        buf = nvgpu_vidmem_user_alloc(g, bytes);
        if (!buf) {
@@ -195,8 +205,10 @@ int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
        return fd;
 fail:
-        nvgpu_kfree(g, priv);
        nvgpu_vidmem_buf_free(g, buf);
+        nvgpu_kfree(g, priv);
+        gk20a_put(g);
        return err;
 }
@@ -229,24 +241,9 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
        return err;
 }
-void nvgpu_vidmem_clear_mem_worker(struct work_struct *work)
+void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem)
 {
-        struct mm_gk20a *mm = container_of(work, struct mm_gk20a,
+        nvgpu_free(vidmem->allocator,
-                                        vidmem.clear_mem_worker);
+                   (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl));
-        struct gk20a *g = mm->g;
+        nvgpu_free_sgtable(g, &vidmem->priv.sgt);
-        struct nvgpu_mem *mem;
-        while ((mem = nvgpu_vidmem_get_pending_alloc(mm)) != NULL) {
-                nvgpu_vidmem_clear(g, mem);
-                nvgpu_free(mem->allocator,
-                           (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
-                nvgpu_free_sgtable(g, &mem->priv.sgt);
-                WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
-                                        &g->mm.vidmem.bytes_pending) < 0);
-                mem->size = 0;
-                mem->aperture = APERTURE_INVALID;
-                nvgpu_kfree(g, mem);
-        }
 }
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c
index d1c5a2e8..60b819d7 100644
--- a/drivers/gpu/nvgpu/common/mm/vidmem.c
+++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -22,15 +22,55 @@
 #include <linux/scatterlist.h>
+#include <nvgpu/timers.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/vidmem.h>
 #include <nvgpu/page_allocator.h>
+#include <nvgpu/enabled.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
+/*
+ * This is expected to be called from the shutdown path (or the error path in
+ * the vidmem init code). As such we do not expect new vidmem frees to be
+ * enqueued.
+ */
 void nvgpu_vidmem_destroy(struct gk20a *g)
 {
+        struct nvgpu_timeout timeout;
+        nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
+        /*
+         * Ensure that the thread runs one last time to flush anything in the
+         * queue.
+         */
+        nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond);
+        /*
+         * Wait for at most 1 second before just continuing on. It doesn't make
+         * sense to hang the system over some potential memory leaks.
+         */
+        do {
+                bool empty;
+                nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+                empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
+                nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+                if (empty)
+                        break;
+                nvgpu_msleep(10);
+        } while (!nvgpu_timeout_expired(&timeout));
+        /*
+         * Kill the vidmem clearing thread now. This will wake the thread up
+         * automatically and cause the wait_interruptible condition trigger.
+         */
+        nvgpu_thread_stop(&g->mm.vidmem.clearing_thread);
        if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
                nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
 }
@@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
        return 0;
 }
+void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm)
+{
+        /*
+         * On the first increment of the pause_count (0 -> 1) take the pause
+         * lock and prevent the vidmem clearing thread from processing work
+         * items.
+         *
+         * Otherwise the increment is all that's needed - it's essentially a
+         * ref-count for the number of pause() calls.
+         *
+         * The sync component is implemented by waiting for the lock to be
+         * released by the clearing thread in case the thread is currently
+         * processing work items.
+         */
+        if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1)
+                nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock);
+}
+void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm)
+{
+        /*
+         * And on the last decrement (1 -> 0) release the pause lock and let
+         * the vidmem clearing thread continue.
+         */
+        if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0)
+                nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
+}
+int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct mm_gk20a *mm = &g->mm;
+        /*
+         * Crap. Can't enqueue new vidmem bufs! CE may be gone!
+         *
+         * However, an errant app can hold a vidmem dma_buf FD open past when
+         * the nvgpu driver has exited. Thus when the FD does get closed
+         * eventually the dma_buf release function will try to call the vidmem
+         * free function which will attempt to enqueue the vidmem into the
+         * vidmem clearing thread.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
+                return -ENOSYS;
+        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
+        nvgpu_list_add_tail(&mem->clear_list_entry,
+                            &mm->vidmem.clear_list_head);
+        nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending);
+        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
+        nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond);
+        return 0;
+}
+static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm)
+{
+        struct nvgpu_mem *mem = NULL;
+        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
+        if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
+                mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
+                                nvgpu_mem, clear_list_entry);
+                nvgpu_list_del(&mem->clear_list_entry);
+        }
+        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
+        return mem;
+}
+static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm)
+{
+        struct gk20a *g = mm->g;
+        struct nvgpu_mem *mem;
+        while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) {
+                nvgpu_vidmem_clear(g, mem);
+                WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
+                                        &g->mm.vidmem.bytes_pending) < 0);
+                mem->size = 0;
+                mem->aperture = APERTURE_INVALID;
+                __nvgpu_mem_free_vidmem_alloc(g, mem);
+                nvgpu_kfree(g, mem);
+        }
+}
+static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr)
+{
+        struct mm_gk20a *mm = mm_ptr;
+        /*
+         * Simple thread who's sole job is to periodically clear userspace
+         * vidmem allocations that have been recently freed.
+         *
+         * Since it doesn't make sense to run unless there's pending work a
+         * condition field is used to wait for work. When the DMA API frees a
+         * userspace vidmem buf it enqueues it into the clear list and alerts us
+         * that we have some work to do.
+         */
+        while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) {
+                int ret;
+                /*
+                 * Wait for work but also make sure we should not be paused.
+                 */
+                ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
+                                &mm->vidmem.clearing_thread_cond,
+                                nvgpu_thread_should_stop(
+                                        &mm->vidmem.clearing_thread) ||
+                                !nvgpu_list_empty(&mm->vidmem.clear_list_head),
+                                0);
+                if (ret == -ERESTARTSYS)
+                        continue;
+                /*
+                 * Use this lock to implement a pause mechanism. By taking this
+                 * lock some other code can prevent this thread from processing
+                 * work items.
+                 */
+                if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock))
+                        continue;
+                nvgpu_vidmem_clear_pending_allocs(mm);
+                nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
+        }
+        return 0;
+}
 int nvgpu_vidmem_init(struct mm_gk20a *mm)
 {
        struct gk20a *g = mm->g;
@@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm)
        mm->vidmem.bootstrap_base = bootstrap_base;
        mm->vidmem.bootstrap_size = bootstrap_size;
-        nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
+        err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond);
+        if (err)
+                goto fail;
-        INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker);
        nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
        nvgpu_init_list_node(&mm->vidmem.clear_list_head);
        nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
+        nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock);
+        nvgpu_atomic_set(&mm->vidmem.pause_count, 0);
+        /*
+         * Start the thread off in the paused state. The thread doesn't have to
+         * be running for this to work. It will be woken up later on in
+         * finalize_poweron(). We won't necessarily have a CE context yet
+         * either, so hypothetically one could cause a race where we try to
+         * clear a vidmem struct before we have a CE context to do so.
+         */
+        nvgpu_vidmem_thread_pause_sync(mm);
+        err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm,
+                                  nvgpu_vidmem_clear_pending_allocs_thr,
+                                  "vidmem-clear");
+        if (err)
+                goto fail;
        gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
        return 0;
+fail:
+        nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond);
+        nvgpu_vidmem_destroy(g);
+        return err;
 }
 int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space)
@@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
        return err;
 }
-struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm)
-{
-        struct nvgpu_mem *mem = NULL;
-        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
-        if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
-                mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
-                                nvgpu_mem, clear_list_entry);
-                nvgpu_list_del(&mem->clear_list_entry);
-        }
-        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
-        return mem;
-}
 static int nvgpu_vidmem_clear_all(struct gk20a *g)
 {
        int err;

diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index b62c4593..9e9d1007 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -514,7 +514,6 @@ static void nvgpu_dma_free_sys(struct gk20a g, struct nvgpu_mem mem)
514	static void nvgpu_dma_free_vid(struct gk20a g, struct nvgpu_mem mem)	514	static void nvgpu_dma_free_vid(struct gk20a g, struct nvgpu_mem mem)
515	{	515	{
516	#if defined(CONFIG_GK20A_VIDMEM)	516	#if defined(CONFIG_GK20A_VIDMEM)
517	bool was_empty;
518	size_t mem_size = mem->size;	517	size_t mem_size = mem->size;
519		518
520	dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");	519	dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
@@ -523,18 +522,19 @@ static void nvgpu_dma_free_vid(struct gk20a g, struct nvgpu_mem mem)
523	WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);	522	WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
524		523
525	if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {	524	if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
526	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);	525	int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
527	was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);	526
528	nvgpu_list_add_tail(&mem->clear_list_entry,	527	/*
529	&g->mm.vidmem.clear_list_head);	528	* If there's an error here then that means we can't clear the
530	atomic64_add(mem->aligned_size,	529	* vidmem. That's too bad; however, we still own the nvgpu_mem
531	&g->mm.vidmem.bytes_pending.atomic_var);	530	* buf so we have to free that.
532	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);	531	*
533		532	* We don't need to worry about the vidmem allocator itself
534	if (was_empty) {	533	* since when that gets cleaned up in the driver shutdown path
535	cancel_work_sync(&g->mm.vidmem.clear_mem_worker);	534	* all the outstanding allocs are force freed.
536	schedule_work(&g->mm.vidmem.clear_mem_worker);	535	*/
537	}	536	if (err)
		537	nvgpu_kfree(g, mem);
538	} else {	538	} else {
539	nvgpu_memset(g, mem, 0, 0, mem->aligned_size);	539	nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
540	nvgpu_free(mem->allocator,	540	nvgpu_free(mem->allocator,


diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c index ea8e552f..92e7e504 100644 --- a/drivers/gpu/nvgpu/common/linux/vidmem.c +++ b/drivers/gpu/nvgpu/common/linux/vidmem.c
@@ -84,6 +84,8 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
84		84
85	nvgpu_kfree(g, linux_buf);	85	nvgpu_kfree(g, linux_buf);
86	nvgpu_vidmem_buf_free(g, buf);	86	nvgpu_vidmem_buf_free(g, buf);
		87
		88	gk20a_put(g);
87	}	89	}
88		90
89	static void gk20a_vidbuf_kmap(struct dma_buf dmabuf, unsigned long page_num)	91	static void gk20a_vidbuf_kmap(struct dma_buf dmabuf, unsigned long page_num)
@@ -160,13 +162,21 @@ struct gk20a nvgpu_vidmem_buf_owner(struct dma_buf dmabuf)
160		162
161	int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)	163	int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
162	{	164	{
163	struct nvgpu_vidmem_buf *buf;	165	struct nvgpu_vidmem_buf *buf = NULL;
164	struct nvgpu_vidmem_linux *priv;	166	struct nvgpu_vidmem_linux *priv;
165	int err, fd;	167	int err, fd;
166		168
		169	/*
		170	* This ref is released when the dma_buf is closed.
		171	*/
		172	if (!gk20a_get(g))
		173	return -ENODEV;
		174
167	priv = nvgpu_kzalloc(g, sizeof(*priv));	175	priv = nvgpu_kzalloc(g, sizeof(*priv));
168	if (!priv)	176	if (!priv) {
169	return -ENOMEM;	177	err = -ENOMEM;
		178	goto fail;
		179	}
170		180
171	buf = nvgpu_vidmem_user_alloc(g, bytes);	181	buf = nvgpu_vidmem_user_alloc(g, bytes);
172	if (!buf) {	182	if (!buf) {
@@ -195,8 +205,10 @@ int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
195	return fd;	205	return fd;
196		206
197	fail:	207	fail:
198	nvgpu_kfree(g, priv);
199	nvgpu_vidmem_buf_free(g, buf);	208	nvgpu_vidmem_buf_free(g, buf);
		209	nvgpu_kfree(g, priv);
		210	gk20a_put(g);
		211
200	return err;	212	return err;
201	}	213	}
202		214
@@ -229,24 +241,9 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a g, struct dma_buf dmabuf,
229	return err;	241	return err;
230	}	242	}
231		243
232	void nvgpu_vidmem_clear_mem_worker(struct work_struct *work)	244	void __nvgpu_mem_free_vidmem_alloc(struct gk20a g, struct nvgpu_mem vidmem)
233	{	245	{
234	struct mm_gk20a *mm = container_of(work, struct mm_gk20a,	246	nvgpu_free(vidmem->allocator,
235	vidmem.clear_mem_worker);	247	(u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl));
236	struct gk20a *g = mm->g;	248	nvgpu_free_sgtable(g, &vidmem->priv.sgt);
237	struct nvgpu_mem *mem;
238
239	while ((mem = nvgpu_vidmem_get_pending_alloc(mm)) != NULL) {
240	nvgpu_vidmem_clear(g, mem);
241	nvgpu_free(mem->allocator,
242	(u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
243	nvgpu_free_sgtable(g, &mem->priv.sgt);
244
245	WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
246	&g->mm.vidmem.bytes_pending) < 0);
247	mem->size = 0;
248	mem->aperture = APERTURE_INVALID;
249
250	nvgpu_kfree(g, mem);
251	}
252	}	249	}


diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c index d1c5a2e8..60b819d7 100644 --- a/drivers/gpu/nvgpu/common/mm/vidmem.c +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -22,15 +22,55 @@
22		22
23	#include <linux/scatterlist.h>	23	#include <linux/scatterlist.h>
24		24
		25	#include <nvgpu/timers.h>
25	#include <nvgpu/dma.h>	26	#include <nvgpu/dma.h>
26	#include <nvgpu/vidmem.h>	27	#include <nvgpu/vidmem.h>
27	#include <nvgpu/page_allocator.h>	28	#include <nvgpu/page_allocator.h>
		29	#include <nvgpu/enabled.h>
28		30
29	#include "gk20a/gk20a.h"	31	#include "gk20a/gk20a.h"
30	#include "gk20a/mm_gk20a.h"	32	#include "gk20a/mm_gk20a.h"
31		33
		34	/*
		35	* This is expected to be called from the shutdown path (or the error path in
		36	* the vidmem init code). As such we do not expect new vidmem frees to be
		37	* enqueued.
		38	*/
32	void nvgpu_vidmem_destroy(struct gk20a *g)	39	void nvgpu_vidmem_destroy(struct gk20a *g)
33	{	40	{
		41	struct nvgpu_timeout timeout;
		42
		43	nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
		44
		45	/*
		46	* Ensure that the thread runs one last time to flush anything in the
		47	* queue.
		48	*/
		49	nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond);
		50
		51	/*
		52	* Wait for at most 1 second before just continuing on. It doesn't make
		53	* sense to hang the system over some potential memory leaks.
		54	*/
		55	do {
		56	bool empty;
		57
		58	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
		59	empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
		60	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
		61
		62	if (empty)
		63	break;
		64
		65	nvgpu_msleep(10);
		66	} while (!nvgpu_timeout_expired(&timeout));
		67
		68	/*
		69	* Kill the vidmem clearing thread now. This will wake the thread up
		70	* automatically and cause the wait_interruptible condition trigger.
		71	*/
		72	nvgpu_thread_stop(&g->mm.vidmem.clearing_thread);
		73
34	if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))	74	if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
35	nvgpu_alloc_destroy(&g->mm.vidmem.allocator);	75	nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
36	}	76	}
@@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
107	return 0;	147	return 0;
108	}	148	}
109		149
		150	void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm)
		151	{
		152	/*
		153	* On the first increment of the pause_count (0 -> 1) take the pause
		154	* lock and prevent the vidmem clearing thread from processing work
		155	* items.
		156	*
		157	* Otherwise the increment is all that's needed - it's essentially a
		158	* ref-count for the number of pause() calls.
		159	*
		160	* The sync component is implemented by waiting for the lock to be
		161	* released by the clearing thread in case the thread is currently
		162	* processing work items.
		163	*/
		164	if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1)
		165	nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock);
		166	}
		167
		168	void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm)
		169	{
		170	/*
		171	* And on the last decrement (1 -> 0) release the pause lock and let
		172	* the vidmem clearing thread continue.
		173	*/
		174	if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0)
		175	nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
		176	}
		177
		178	int nvgpu_vidmem_clear_list_enqueue(struct gk20a g, struct nvgpu_mem mem)
		179	{
		180	struct mm_gk20a *mm = &g->mm;
		181
		182	/*
		183	* Crap. Can't enqueue new vidmem bufs! CE may be gone!
		184	*
		185	* However, an errant app can hold a vidmem dma_buf FD open past when
		186	* the nvgpu driver has exited. Thus when the FD does get closed
		187	* eventually the dma_buf release function will try to call the vidmem
		188	* free function which will attempt to enqueue the vidmem into the
		189	* vidmem clearing thread.
		190	*/
		191	if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
		192	return -ENOSYS;
		193
		194	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
		195	nvgpu_list_add_tail(&mem->clear_list_entry,
		196	&mm->vidmem.clear_list_head);
		197	nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending);
		198	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
		199
		200	nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond);
		201
		202	return 0;
		203	}
		204
		205	static struct nvgpu_mem nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a mm)
		206	{
		207	struct nvgpu_mem *mem = NULL;
		208
		209	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
		210	if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
		211	mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
		212	nvgpu_mem, clear_list_entry);
		213	nvgpu_list_del(&mem->clear_list_entry);
		214	}
		215	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
		216
		217	return mem;
		218	}
		219
		220	static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm)
		221	{
		222	struct gk20a *g = mm->g;
		223	struct nvgpu_mem *mem;
		224
		225	while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) {
		226	nvgpu_vidmem_clear(g, mem);
		227
		228	WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
		229	&g->mm.vidmem.bytes_pending) < 0);
		230	mem->size = 0;
		231	mem->aperture = APERTURE_INVALID;
		232
		233	__nvgpu_mem_free_vidmem_alloc(g, mem);
		234	nvgpu_kfree(g, mem);
		235	}
		236	}
		237
		238	static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr)
		239	{
		240	struct mm_gk20a *mm = mm_ptr;
		241
		242	/*
		243	* Simple thread who's sole job is to periodically clear userspace
		244	* vidmem allocations that have been recently freed.
		245	*
		246	* Since it doesn't make sense to run unless there's pending work a
		247	* condition field is used to wait for work. When the DMA API frees a
		248	* userspace vidmem buf it enqueues it into the clear list and alerts us
		249	* that we have some work to do.
		250	*/
		251
		252	while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) {
		253	int ret;
		254
		255	/*
		256	* Wait for work but also make sure we should not be paused.
		257	*/
		258	ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
		259	&mm->vidmem.clearing_thread_cond,
		260	nvgpu_thread_should_stop(
		261	&mm->vidmem.clearing_thread) \|\|
		262	!nvgpu_list_empty(&mm->vidmem.clear_list_head),
		263	0);
		264	if (ret == -ERESTARTSYS)
		265	continue;
		266
		267	/*
		268	* Use this lock to implement a pause mechanism. By taking this
		269	* lock some other code can prevent this thread from processing
		270	* work items.
		271	*/
		272	if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock))
		273	continue;
		274
		275	nvgpu_vidmem_clear_pending_allocs(mm);
		276
		277	nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
		278	}
		279
		280	return 0;
		281	}
		282
110	int nvgpu_vidmem_init(struct mm_gk20a *mm)	283	int nvgpu_vidmem_init(struct mm_gk20a *mm)
111	{	284	{
112	struct gk20a *g = mm->g;	285	struct gk20a *g = mm->g;
@@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm)
156	mm->vidmem.bootstrap_base = bootstrap_base;	329	mm->vidmem.bootstrap_base = bootstrap_base;
157	mm->vidmem.bootstrap_size = bootstrap_size;	330	mm->vidmem.bootstrap_size = bootstrap_size;
158		331
159	nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);	332	err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond);
		333	if (err)
		334	goto fail;
160		335
161	INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker);
162	nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);	336	nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
163	nvgpu_init_list_node(&mm->vidmem.clear_list_head);	337	nvgpu_init_list_node(&mm->vidmem.clear_list_head);
164	nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);	338	nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
		339	nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock);
		340	nvgpu_atomic_set(&mm->vidmem.pause_count, 0);
		341
		342	/*
		343	* Start the thread off in the paused state. The thread doesn't have to
		344	* be running for this to work. It will be woken up later on in
		345	* finalize_poweron(). We won't necessarily have a CE context yet
		346	* either, so hypothetically one could cause a race where we try to
		347	* clear a vidmem struct before we have a CE context to do so.
		348	*/
		349	nvgpu_vidmem_thread_pause_sync(mm);
		350
		351	err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm,
		352	nvgpu_vidmem_clear_pending_allocs_thr,
		353	"vidmem-clear");
		354	if (err)
		355	goto fail;
165		356
166	gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);	357	gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
167		358
168	return 0;	359	return 0;
		360
		361	fail:
		362	nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond);
		363	nvgpu_vidmem_destroy(g);
		364	return err;
169	}	365	}
170		366
171	int nvgpu_vidmem_get_space(struct gk20a g, u64 space)	367	int nvgpu_vidmem_get_space(struct gk20a g, u64 space)
@@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a g, struct nvgpu_mem mem)
244	return err;	440	return err;
245	}	441	}
246		442
247	struct nvgpu_mem nvgpu_vidmem_get_pending_alloc(struct mm_gk20a mm)
248	{
249	struct nvgpu_mem *mem = NULL;
250
251	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
252	if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
253	mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
254	nvgpu_mem, clear_list_entry);
255	nvgpu_list_del(&mem->clear_list_entry);
256	}
257	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
258
259	return mem;
260	}
261
262	static int nvgpu_vidmem_clear_all(struct gk20a *g)	443	static int nvgpu_vidmem_clear_all(struct gk20a *g)
263	{	444	{
264	int err;	445	int err;