summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm/vidmem.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-10-05 20:22:41 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-10-20 22:03:57 -0400
commite26ce10cc6b59314ccf5931a8c5b46a9e57b085a (patch)
tree2bcafc57fcb8679c09195ba08ccf7a6fdfc91fe1 /drivers/gpu/nvgpu/common/mm/vidmem.c
parent8c5ea40ccaad022401e45e61d5b6ff3354ffa413 (diff)
gpu: nvgpu: Convert VIDMEM work_struct to thread
Convert the work_struct used by the vidmem background clearing to a thread to make it more cross platform. The thread waits on a condition variable to determine when work needs to be done. The signal comes from the DMA API when it enqueues a new nvgpu_mem that needs clearing. Add logic for handling suspend: the CE cannot be accessed while the GPU is suspended. As such the background thread must be paused while the GPU is suspended and the CE is not available. Several other changes were also made: o Move the code that enqueues a nvgpu_mem from the DMA API code to a function in the VIDMEM code. o Move nvgpu_vidmem_get_pending_alloc() to the Linux specific code as this function is only used there. It's a trivial function that QNX can easily implement as well. o Remove the was_empty logic from the enqueue. Now just always signal the condition variable when anew nvgpu_mem comes in. o Move CE suspend to after MM suspend. JIRA NVGPU-30 JIRA NVGPU-138 Change-Id: Ie9286ae5a127c3fced86dfb9794e7d81eab0491c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1574498 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vidmem.c')
-rw-r--r--drivers/gpu/nvgpu/common/mm/vidmem.c215
1 files changed, 198 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c
index d1c5a2e8..60b819d7 100644
--- a/drivers/gpu/nvgpu/common/mm/vidmem.c
+++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -22,15 +22,55 @@
22 22
23#include <linux/scatterlist.h> 23#include <linux/scatterlist.h>
24 24
25#include <nvgpu/timers.h>
25#include <nvgpu/dma.h> 26#include <nvgpu/dma.h>
26#include <nvgpu/vidmem.h> 27#include <nvgpu/vidmem.h>
27#include <nvgpu/page_allocator.h> 28#include <nvgpu/page_allocator.h>
29#include <nvgpu/enabled.h>
28 30
29#include "gk20a/gk20a.h" 31#include "gk20a/gk20a.h"
30#include "gk20a/mm_gk20a.h" 32#include "gk20a/mm_gk20a.h"
31 33
34/*
35 * This is expected to be called from the shutdown path (or the error path in
36 * the vidmem init code). As such we do not expect new vidmem frees to be
37 * enqueued.
38 */
32void nvgpu_vidmem_destroy(struct gk20a *g) 39void nvgpu_vidmem_destroy(struct gk20a *g)
33{ 40{
41 struct nvgpu_timeout timeout;
42
43 nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
44
45 /*
46 * Ensure that the thread runs one last time to flush anything in the
47 * queue.
48 */
49 nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond);
50
51 /*
52 * Wait for at most 1 second before just continuing on. It doesn't make
53 * sense to hang the system over some potential memory leaks.
54 */
55 do {
56 bool empty;
57
58 nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
59 empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
60 nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
61
62 if (empty)
63 break;
64
65 nvgpu_msleep(10);
66 } while (!nvgpu_timeout_expired(&timeout));
67
68 /*
69 * Kill the vidmem clearing thread now. This will wake the thread up
70 * automatically and cause the wait_interruptible condition trigger.
71 */
72 nvgpu_thread_stop(&g->mm.vidmem.clearing_thread);
73
34 if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) 74 if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
35 nvgpu_alloc_destroy(&g->mm.vidmem.allocator); 75 nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
36} 76}
@@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
107 return 0; 147 return 0;
108} 148}
109 149
150void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm)
151{
152 /*
153 * On the first increment of the pause_count (0 -> 1) take the pause
154 * lock and prevent the vidmem clearing thread from processing work
155 * items.
156 *
157 * Otherwise the increment is all that's needed - it's essentially a
158 * ref-count for the number of pause() calls.
159 *
160 * The sync component is implemented by waiting for the lock to be
161 * released by the clearing thread in case the thread is currently
162 * processing work items.
163 */
164 if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1)
165 nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock);
166}
167
168void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm)
169{
170 /*
171 * And on the last decrement (1 -> 0) release the pause lock and let
172 * the vidmem clearing thread continue.
173 */
174 if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0)
175 nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
176}
177
178int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem)
179{
180 struct mm_gk20a *mm = &g->mm;
181
182 /*
183 * Crap. Can't enqueue new vidmem bufs! CE may be gone!
184 *
185 * However, an errant app can hold a vidmem dma_buf FD open past when
186 * the nvgpu driver has exited. Thus when the FD does get closed
187 * eventually the dma_buf release function will try to call the vidmem
188 * free function which will attempt to enqueue the vidmem into the
189 * vidmem clearing thread.
190 */
191 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
192 return -ENOSYS;
193
194 nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
195 nvgpu_list_add_tail(&mem->clear_list_entry,
196 &mm->vidmem.clear_list_head);
197 nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending);
198 nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
199
200 nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond);
201
202 return 0;
203}
204
205static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm)
206{
207 struct nvgpu_mem *mem = NULL;
208
209 nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
210 if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
211 mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
212 nvgpu_mem, clear_list_entry);
213 nvgpu_list_del(&mem->clear_list_entry);
214 }
215 nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
216
217 return mem;
218}
219
220static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm)
221{
222 struct gk20a *g = mm->g;
223 struct nvgpu_mem *mem;
224
225 while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) {
226 nvgpu_vidmem_clear(g, mem);
227
228 WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
229 &g->mm.vidmem.bytes_pending) < 0);
230 mem->size = 0;
231 mem->aperture = APERTURE_INVALID;
232
233 __nvgpu_mem_free_vidmem_alloc(g, mem);
234 nvgpu_kfree(g, mem);
235 }
236}
237
238static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr)
239{
240 struct mm_gk20a *mm = mm_ptr;
241
242 /*
243 * Simple thread who's sole job is to periodically clear userspace
244 * vidmem allocations that have been recently freed.
245 *
246 * Since it doesn't make sense to run unless there's pending work a
247 * condition field is used to wait for work. When the DMA API frees a
248 * userspace vidmem buf it enqueues it into the clear list and alerts us
249 * that we have some work to do.
250 */
251
252 while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) {
253 int ret;
254
255 /*
256 * Wait for work but also make sure we should not be paused.
257 */
258 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
259 &mm->vidmem.clearing_thread_cond,
260 nvgpu_thread_should_stop(
261 &mm->vidmem.clearing_thread) ||
262 !nvgpu_list_empty(&mm->vidmem.clear_list_head),
263 0);
264 if (ret == -ERESTARTSYS)
265 continue;
266
267 /*
268 * Use this lock to implement a pause mechanism. By taking this
269 * lock some other code can prevent this thread from processing
270 * work items.
271 */
272 if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock))
273 continue;
274
275 nvgpu_vidmem_clear_pending_allocs(mm);
276
277 nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
278 }
279
280 return 0;
281}
282
110int nvgpu_vidmem_init(struct mm_gk20a *mm) 283int nvgpu_vidmem_init(struct mm_gk20a *mm)
111{ 284{
112 struct gk20a *g = mm->g; 285 struct gk20a *g = mm->g;
@@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm)
156 mm->vidmem.bootstrap_base = bootstrap_base; 329 mm->vidmem.bootstrap_base = bootstrap_base;
157 mm->vidmem.bootstrap_size = bootstrap_size; 330 mm->vidmem.bootstrap_size = bootstrap_size;
158 331
159 nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); 332 err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond);
333 if (err)
334 goto fail;
160 335
161 INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker);
162 nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); 336 nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
163 nvgpu_init_list_node(&mm->vidmem.clear_list_head); 337 nvgpu_init_list_node(&mm->vidmem.clear_list_head);
164 nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); 338 nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
339 nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock);
340 nvgpu_atomic_set(&mm->vidmem.pause_count, 0);
341
342 /*
343 * Start the thread off in the paused state. The thread doesn't have to
344 * be running for this to work. It will be woken up later on in
345 * finalize_poweron(). We won't necessarily have a CE context yet
346 * either, so hypothetically one could cause a race where we try to
347 * clear a vidmem struct before we have a CE context to do so.
348 */
349 nvgpu_vidmem_thread_pause_sync(mm);
350
351 err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm,
352 nvgpu_vidmem_clear_pending_allocs_thr,
353 "vidmem-clear");
354 if (err)
355 goto fail;
165 356
166 gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); 357 gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
167 358
168 return 0; 359 return 0;
360
361fail:
362 nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond);
363 nvgpu_vidmem_destroy(g);
364 return err;
169} 365}
170 366
171int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) 367int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space)
@@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
244 return err; 440 return err;
245} 441}
246 442
247struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm)
248{
249 struct nvgpu_mem *mem = NULL;
250
251 nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
252 if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
253 mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
254 nvgpu_mem, clear_list_entry);
255 nvgpu_list_del(&mem->clear_list_entry);
256 }
257 nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
258
259 return mem;
260}
261
262static int nvgpu_vidmem_clear_all(struct gk20a *g) 443static int nvgpu_vidmem_clear_all(struct gk20a *g)
263{ 444{
264 int err; 445 int err;