1 files changed, 198 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c
index d1c5a2e8..60b819d7 100644
--- a/drivers/gpu/nvgpu/common/mm/vidmem.c
+++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -22,15 +22,55 @@
 #include <linux/scatterlist.h>
+#include <nvgpu/timers.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/vidmem.h>
 #include <nvgpu/page_allocator.h>
+#include <nvgpu/enabled.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
+/*
+ * This is expected to be called from the shutdown path (or the error path in
+ * the vidmem init code). As such we do not expect new vidmem frees to be
+ * enqueued.
+ */
 void nvgpu_vidmem_destroy(struct gk20a *g)
 {
+        struct nvgpu_timeout timeout;
+        nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
+        /*
+         * Ensure that the thread runs one last time to flush anything in the
+         * queue.
+         */
+        nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond);
+        /*
+         * Wait for at most 1 second before just continuing on. It doesn't make
+         * sense to hang the system over some potential memory leaks.
+         */
+        do {
+                bool empty;
+                nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+                empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
+                nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+                if (empty)
+                        break;
+                nvgpu_msleep(10);
+        } while (!nvgpu_timeout_expired(&timeout));
+        /*
+         * Kill the vidmem clearing thread now. This will wake the thread up
+         * automatically and cause the wait_interruptible condition trigger.
+         */
+        nvgpu_thread_stop(&g->mm.vidmem.clearing_thread);
        if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
                nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
 }
@@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
        return 0;
 }
+void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm)
+{
+        /*
+         * On the first increment of the pause_count (0 -> 1) take the pause
+         * lock and prevent the vidmem clearing thread from processing work
+         * items.
+         *
+         * Otherwise the increment is all that's needed - it's essentially a
+         * ref-count for the number of pause() calls.
+         *
+         * The sync component is implemented by waiting for the lock to be
+         * released by the clearing thread in case the thread is currently
+         * processing work items.
+         */
+        if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1)
+                nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock);
+}
+void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm)
+{
+        /*
+         * And on the last decrement (1 -> 0) release the pause lock and let
+         * the vidmem clearing thread continue.
+         */
+        if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0)
+                nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
+}
+int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct mm_gk20a *mm = &g->mm;
+        /*
+         * Crap. Can't enqueue new vidmem bufs! CE may be gone!
+         *
+         * However, an errant app can hold a vidmem dma_buf FD open past when
+         * the nvgpu driver has exited. Thus when the FD does get closed
+         * eventually the dma_buf release function will try to call the vidmem
+         * free function which will attempt to enqueue the vidmem into the
+         * vidmem clearing thread.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
+                return -ENOSYS;
+        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
+        nvgpu_list_add_tail(&mem->clear_list_entry,
+                            &mm->vidmem.clear_list_head);
+        nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending);
+        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
+        nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond);
+        return 0;
+}
+static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm)
+{
+        struct nvgpu_mem *mem = NULL;
+        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
+        if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
+                mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
+                                nvgpu_mem, clear_list_entry);
+                nvgpu_list_del(&mem->clear_list_entry);
+        }
+        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
+        return mem;
+}
+static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm)
+{
+        struct gk20a *g = mm->g;
+        struct nvgpu_mem *mem;
+        while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) {
+                nvgpu_vidmem_clear(g, mem);
+                WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
+                                        &g->mm.vidmem.bytes_pending) < 0);
+                mem->size = 0;
+                mem->aperture = APERTURE_INVALID;
+                __nvgpu_mem_free_vidmem_alloc(g, mem);
+                nvgpu_kfree(g, mem);
+        }
+}
+static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr)
+{
+        struct mm_gk20a *mm = mm_ptr;
+        /*
+         * Simple thread who's sole job is to periodically clear userspace
+         * vidmem allocations that have been recently freed.
+         *
+         * Since it doesn't make sense to run unless there's pending work a
+         * condition field is used to wait for work. When the DMA API frees a
+         * userspace vidmem buf it enqueues it into the clear list and alerts us
+         * that we have some work to do.
+         */
+        while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) {
+                int ret;
+                /*
+                 * Wait for work but also make sure we should not be paused.
+                 */
+                ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
+                                &mm->vidmem.clearing_thread_cond,
+                                nvgpu_thread_should_stop(
+                                        &mm->vidmem.clearing_thread) ||
+                                !nvgpu_list_empty(&mm->vidmem.clear_list_head),
+                                0);
+                if (ret == -ERESTARTSYS)
+                        continue;
+                /*
+                 * Use this lock to implement a pause mechanism. By taking this
+                 * lock some other code can prevent this thread from processing
+                 * work items.
+                 */
+                if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock))
+                        continue;
+                nvgpu_vidmem_clear_pending_allocs(mm);
+                nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
+        }
+        return 0;
+}
 int nvgpu_vidmem_init(struct mm_gk20a *mm)
 {
        struct gk20a *g = mm->g;
@@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm)
        mm->vidmem.bootstrap_base = bootstrap_base;
        mm->vidmem.bootstrap_size = bootstrap_size;
-        nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
+        err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond);
+        if (err)
+                goto fail;
-        INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker);
        nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
        nvgpu_init_list_node(&mm->vidmem.clear_list_head);
        nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
+        nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock);
+        nvgpu_atomic_set(&mm->vidmem.pause_count, 0);
+        /*
+         * Start the thread off in the paused state. The thread doesn't have to
+         * be running for this to work. It will be woken up later on in
+         * finalize_poweron(). We won't necessarily have a CE context yet
+         * either, so hypothetically one could cause a race where we try to
+         * clear a vidmem struct before we have a CE context to do so.
+         */
+        nvgpu_vidmem_thread_pause_sync(mm);
+        err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm,
+                                  nvgpu_vidmem_clear_pending_allocs_thr,
+                                  "vidmem-clear");
+        if (err)
+                goto fail;
        gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
        return 0;
+fail:
+        nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond);
+        nvgpu_vidmem_destroy(g);
+        return err;
 }
 int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space)
@@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
        return err;
 }
-struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm)
-{
-        struct nvgpu_mem *mem = NULL;
-        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
-        if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
-                mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
-                                nvgpu_mem, clear_list_entry);
-                nvgpu_list_del(&mem->clear_list_entry);
-        }
-        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
-        return mem;
-}
 static int nvgpu_vidmem_clear_all(struct gk20a *g)
 {
        int err;

diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c index d1c5a2e8..60b819d7 100644 --- a/drivers/gpu/nvgpu/common/mm/vidmem.c +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -22,15 +22,55 @@
22		22
23	#include <linux/scatterlist.h>	23	#include <linux/scatterlist.h>
24		24
		25	#include <nvgpu/timers.h>
25	#include <nvgpu/dma.h>	26	#include <nvgpu/dma.h>
26	#include <nvgpu/vidmem.h>	27	#include <nvgpu/vidmem.h>
27	#include <nvgpu/page_allocator.h>	28	#include <nvgpu/page_allocator.h>
		29	#include <nvgpu/enabled.h>
28		30
29	#include "gk20a/gk20a.h"	31	#include "gk20a/gk20a.h"
30	#include "gk20a/mm_gk20a.h"	32	#include "gk20a/mm_gk20a.h"
31		33
		34	/*
		35	* This is expected to be called from the shutdown path (or the error path in
		36	* the vidmem init code). As such we do not expect new vidmem frees to be
		37	* enqueued.
		38	*/
32	void nvgpu_vidmem_destroy(struct gk20a *g)	39	void nvgpu_vidmem_destroy(struct gk20a *g)
33	{	40	{
		41	struct nvgpu_timeout timeout;
		42
		43	nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
		44
		45	/*
		46	* Ensure that the thread runs one last time to flush anything in the
		47	* queue.
		48	*/
		49	nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond);
		50
		51	/*
		52	* Wait for at most 1 second before just continuing on. It doesn't make
		53	* sense to hang the system over some potential memory leaks.
		54	*/
		55	do {
		56	bool empty;
		57
		58	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
		59	empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
		60	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
		61
		62	if (empty)
		63	break;
		64
		65	nvgpu_msleep(10);
		66	} while (!nvgpu_timeout_expired(&timeout));
		67
		68	/*
		69	* Kill the vidmem clearing thread now. This will wake the thread up
		70	* automatically and cause the wait_interruptible condition trigger.
		71	*/
		72	nvgpu_thread_stop(&g->mm.vidmem.clearing_thread);
		73
34	if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))	74	if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
35	nvgpu_alloc_destroy(&g->mm.vidmem.allocator);	75	nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
36	}	76	}
@@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
107	return 0;	147	return 0;
108	}	148	}
109		149
		150	void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm)
		151	{
		152	/*
		153	* On the first increment of the pause_count (0 -> 1) take the pause
		154	* lock and prevent the vidmem clearing thread from processing work
		155	* items.
		156	*
		157	* Otherwise the increment is all that's needed - it's essentially a
		158	* ref-count for the number of pause() calls.
		159	*
		160	* The sync component is implemented by waiting for the lock to be
		161	* released by the clearing thread in case the thread is currently
		162	* processing work items.
		163	*/
		164	if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1)
		165	nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock);
		166	}
		167
		168	void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm)
		169	{
		170	/*
		171	* And on the last decrement (1 -> 0) release the pause lock and let
		172	* the vidmem clearing thread continue.
		173	*/
		174	if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0)
		175	nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
		176	}
		177
		178	int nvgpu_vidmem_clear_list_enqueue(struct gk20a g, struct nvgpu_mem mem)
		179	{
		180	struct mm_gk20a *mm = &g->mm;
		181
		182	/*
		183	* Crap. Can't enqueue new vidmem bufs! CE may be gone!
		184	*
		185	* However, an errant app can hold a vidmem dma_buf FD open past when
		186	* the nvgpu driver has exited. Thus when the FD does get closed
		187	* eventually the dma_buf release function will try to call the vidmem
		188	* free function which will attempt to enqueue the vidmem into the
		189	* vidmem clearing thread.
		190	*/
		191	if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
		192	return -ENOSYS;
		193
		194	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
		195	nvgpu_list_add_tail(&mem->clear_list_entry,
		196	&mm->vidmem.clear_list_head);
		197	nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending);
		198	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
		199
		200	nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond);
		201
		202	return 0;
		203	}
		204
		205	static struct nvgpu_mem nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a mm)
		206	{
		207	struct nvgpu_mem *mem = NULL;
		208
		209	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
		210	if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
		211	mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
		212	nvgpu_mem, clear_list_entry);
		213	nvgpu_list_del(&mem->clear_list_entry);
		214	}
		215	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
		216
		217	return mem;
		218	}
		219
		220	static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm)
		221	{
		222	struct gk20a *g = mm->g;
		223	struct nvgpu_mem *mem;
		224
		225	while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) {
		226	nvgpu_vidmem_clear(g, mem);
		227
		228	WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
		229	&g->mm.vidmem.bytes_pending) < 0);
		230	mem->size = 0;
		231	mem->aperture = APERTURE_INVALID;
		232
		233	__nvgpu_mem_free_vidmem_alloc(g, mem);
		234	nvgpu_kfree(g, mem);
		235	}
		236	}
		237
		238	static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr)
		239	{
		240	struct mm_gk20a *mm = mm_ptr;
		241
		242	/*
		243	* Simple thread who's sole job is to periodically clear userspace
		244	* vidmem allocations that have been recently freed.
		245	*
		246	* Since it doesn't make sense to run unless there's pending work a
		247	* condition field is used to wait for work. When the DMA API frees a
		248	* userspace vidmem buf it enqueues it into the clear list and alerts us
		249	* that we have some work to do.
		250	*/
		251
		252	while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) {
		253	int ret;
		254
		255	/*
		256	* Wait for work but also make sure we should not be paused.
		257	*/
		258	ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
		259	&mm->vidmem.clearing_thread_cond,
		260	nvgpu_thread_should_stop(
		261	&mm->vidmem.clearing_thread) \|\|
		262	!nvgpu_list_empty(&mm->vidmem.clear_list_head),
		263	0);
		264	if (ret == -ERESTARTSYS)
		265	continue;
		266
		267	/*
		268	* Use this lock to implement a pause mechanism. By taking this
		269	* lock some other code can prevent this thread from processing
		270	* work items.
		271	*/
		272	if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock))
		273	continue;
		274
		275	nvgpu_vidmem_clear_pending_allocs(mm);
		276
		277	nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
		278	}
		279
		280	return 0;
		281	}
		282
110	int nvgpu_vidmem_init(struct mm_gk20a *mm)	283	int nvgpu_vidmem_init(struct mm_gk20a *mm)
111	{	284	{
112	struct gk20a *g = mm->g;	285	struct gk20a *g = mm->g;
@@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm)
156	mm->vidmem.bootstrap_base = bootstrap_base;	329	mm->vidmem.bootstrap_base = bootstrap_base;
157	mm->vidmem.bootstrap_size = bootstrap_size;	330	mm->vidmem.bootstrap_size = bootstrap_size;
158		331
159	nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);	332	err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond);
		333	if (err)
		334	goto fail;
160		335
161	INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker);
162	nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);	336	nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
163	nvgpu_init_list_node(&mm->vidmem.clear_list_head);	337	nvgpu_init_list_node(&mm->vidmem.clear_list_head);
164	nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);	338	nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
		339	nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock);
		340	nvgpu_atomic_set(&mm->vidmem.pause_count, 0);
		341
		342	/*
		343	* Start the thread off in the paused state. The thread doesn't have to
		344	* be running for this to work. It will be woken up later on in
		345	* finalize_poweron(). We won't necessarily have a CE context yet
		346	* either, so hypothetically one could cause a race where we try to
		347	* clear a vidmem struct before we have a CE context to do so.
		348	*/
		349	nvgpu_vidmem_thread_pause_sync(mm);
		350
		351	err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm,
		352	nvgpu_vidmem_clear_pending_allocs_thr,
		353	"vidmem-clear");
		354	if (err)
		355	goto fail;
165		356
166	gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);	357	gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
167		358
168	return 0;	359	return 0;
		360
		361	fail:
		362	nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond);
		363	nvgpu_vidmem_destroy(g);
		364	return err;
169	}	365	}
170		366
171	int nvgpu_vidmem_get_space(struct gk20a g, u64 space)	367	int nvgpu_vidmem_get_space(struct gk20a g, u64 space)
@@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a g, struct nvgpu_mem mem)
244	return err;	440	return err;
245	}	441	}
246		442
247	struct nvgpu_mem nvgpu_vidmem_get_pending_alloc(struct mm_gk20a mm)
248	{
249	struct nvgpu_mem *mem = NULL;
250
251	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
252	if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
253	mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
254	nvgpu_mem, clear_list_entry);
255	nvgpu_list_del(&mem->clear_list_entry);
256	}
257	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
258
259	return mem;
260	}
261
262	static int nvgpu_vidmem_clear_all(struct gk20a *g)	443	static int nvgpu_vidmem_clear_all(struct gk20a *g)
263	{	444	{
264	int err;	445	int err;