summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm/vidmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vidmem.c')
-rw-r--r--drivers/gpu/nvgpu/common/mm/vidmem.c554
1 files changed, 554 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c
new file mode 100644
index 00000000..3526fce5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -0,0 +1,554 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/scatterlist.h>
24
25#include <nvgpu/timers.h>
26#include <nvgpu/dma.h>
27#include <nvgpu/vidmem.h>
28#include <nvgpu/page_allocator.h>
29#include <nvgpu/enabled.h>
30
31#include "gk20a/gk20a.h"
32#include "gk20a/mm_gk20a.h"
33
34/*
35 * This is expected to be called from the shutdown path (or the error path in
36 * the vidmem init code). As such we do not expect new vidmem frees to be
37 * enqueued.
38 */
39void nvgpu_vidmem_destroy(struct gk20a *g)
40{
41 struct nvgpu_timeout timeout;
42
43 nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
44
45 /*
46 * Ensure that the thread runs one last time to flush anything in the
47 * queue.
48 */
49 nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond);
50
51 /*
52 * Wait for at most 1 second before just continuing on. It doesn't make
53 * sense to hang the system over some potential memory leaks.
54 */
55 do {
56 bool empty;
57
58 nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
59 empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
60 nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
61
62 if (empty)
63 break;
64
65 nvgpu_msleep(10);
66 } while (!nvgpu_timeout_expired(&timeout));
67
68 /*
69 * Kill the vidmem clearing thread now. This will wake the thread up
70 * automatically and cause the wait_interruptible condition trigger.
71 */
72 nvgpu_thread_stop(&g->mm.vidmem.clearing_thread);
73
74 if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
75 nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
76}
77
78static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
79{
80 struct mm_gk20a *mm = &g->mm;
81 struct gk20a_fence *gk20a_fence_out = NULL;
82 u64 region2_base = 0;
83 int err = 0;
84
85 if (mm->vidmem.ce_ctx_id == (u32)~0)
86 return -EINVAL;
87
88 vidmem_dbg(g, "Clearing all VIDMEM:");
89
90 err = gk20a_ce_execute_ops(g,
91 mm->vidmem.ce_ctx_id,
92 0,
93 mm->vidmem.base,
94 mm->vidmem.bootstrap_base - mm->vidmem.base,
95 0x00000000,
96 NVGPU_CE_DST_LOCATION_LOCAL_FB,
97 NVGPU_CE_MEMSET,
98 NULL,
99 0,
100 NULL);
101 if (err) {
102 nvgpu_err(g,
103 "Failed to clear vidmem region 1 : %d", err);
104 return err;
105 }
106
107 region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size;
108
109 err = gk20a_ce_execute_ops(g,
110 mm->vidmem.ce_ctx_id,
111 0,
112 region2_base,
113 mm->vidmem.size - region2_base,
114 0x00000000,
115 NVGPU_CE_DST_LOCATION_LOCAL_FB,
116 NVGPU_CE_MEMSET,
117 NULL,
118 0,
119 &gk20a_fence_out);
120 if (err) {
121 nvgpu_err(g,
122 "Failed to clear vidmem region 2 : %d", err);
123 return err;
124 }
125
126 if (gk20a_fence_out) {
127 struct nvgpu_timeout timeout;
128
129 nvgpu_timeout_init(g, &timeout,
130 gk20a_get_gr_idle_timeout(g),
131 NVGPU_TIMER_CPU_TIMER);
132
133 do {
134 err = gk20a_fence_wait(g, gk20a_fence_out,
135 gk20a_get_gr_idle_timeout(g));
136 } while (err == -ERESTARTSYS &&
137 !nvgpu_timeout_expired(&timeout));
138
139 gk20a_fence_put(gk20a_fence_out);
140 if (err) {
141 nvgpu_err(g,
142 "fence wait failed for CE execute ops");
143 return err;
144 }
145 }
146
147 mm->vidmem.cleared = true;
148
149 vidmem_dbg(g, "Done!");
150
151 return 0;
152}
153
154void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm)
155{
156 /*
157 * On the first increment of the pause_count (0 -> 1) take the pause
158 * lock and prevent the vidmem clearing thread from processing work
159 * items.
160 *
161 * Otherwise the increment is all that's needed - it's essentially a
162 * ref-count for the number of pause() calls.
163 *
164 * The sync component is implemented by waiting for the lock to be
165 * released by the clearing thread in case the thread is currently
166 * processing work items.
167 */
168 if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1)
169 nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock);
170
171 vidmem_dbg(mm->g, "Clearing thread paused; new count=%d",
172 nvgpu_atomic_read(&mm->vidmem.pause_count));
173}
174
175void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm)
176{
177 vidmem_dbg(mm->g, "Unpausing clearing thread; current count=%d",
178 nvgpu_atomic_read(&mm->vidmem.pause_count));
179
180 /*
181 * And on the last decrement (1 -> 0) release the pause lock and let
182 * the vidmem clearing thread continue.
183 */
184 if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) {
185 nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
186 vidmem_dbg(mm->g, " > Clearing thread really unpaused!");
187 }
188}
189
190int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem)
191{
192 struct mm_gk20a *mm = &g->mm;
193
194 /*
195 * Crap. Can't enqueue new vidmem bufs! CE may be gone!
196 *
197 * However, an errant app can hold a vidmem dma_buf FD open past when
198 * the nvgpu driver has exited. Thus when the FD does get closed
199 * eventually the dma_buf release function will try to call the vidmem
200 * free function which will attempt to enqueue the vidmem into the
201 * vidmem clearing thread.
202 */
203 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
204 return -ENOSYS;
205
206 nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
207 nvgpu_list_add_tail(&mem->clear_list_entry,
208 &mm->vidmem.clear_list_head);
209 nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending);
210 nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
211
212 nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond);
213
214 return 0;
215}
216
217static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm)
218{
219 struct nvgpu_mem *mem = NULL;
220
221 nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
222 if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
223 mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
224 nvgpu_mem, clear_list_entry);
225 nvgpu_list_del(&mem->clear_list_entry);
226 }
227 nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
228
229 return mem;
230}
231
232static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm)
233{
234 struct gk20a *g = mm->g;
235 struct nvgpu_mem *mem;
236
237 vidmem_dbg(g, "Running VIDMEM clearing thread:");
238
239 while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) {
240 nvgpu_vidmem_clear(g, mem);
241
242 WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
243 &g->mm.vidmem.bytes_pending) < 0);
244 mem->size = 0;
245 mem->aperture = APERTURE_INVALID;
246
247 __nvgpu_mem_free_vidmem_alloc(g, mem);
248 nvgpu_kfree(g, mem);
249 }
250
251 vidmem_dbg(g, "Done!");
252}
253
254static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr)
255{
256 struct mm_gk20a *mm = mm_ptr;
257
258 /*
259 * Simple thread who's sole job is to periodically clear userspace
260 * vidmem allocations that have been recently freed.
261 *
262 * Since it doesn't make sense to run unless there's pending work a
263 * condition field is used to wait for work. When the DMA API frees a
264 * userspace vidmem buf it enqueues it into the clear list and alerts us
265 * that we have some work to do.
266 */
267
268 while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) {
269 int ret;
270
271 /*
272 * Wait for work but also make sure we should not be paused.
273 */
274 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
275 &mm->vidmem.clearing_thread_cond,
276 nvgpu_thread_should_stop(
277 &mm->vidmem.clearing_thread) ||
278 !nvgpu_list_empty(&mm->vidmem.clear_list_head),
279 0);
280 if (ret == -ERESTARTSYS)
281 continue;
282
283 /*
284 * Use this lock to implement a pause mechanism. By taking this
285 * lock some other code can prevent this thread from processing
286 * work items.
287 */
288 if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock))
289 continue;
290
291 nvgpu_vidmem_clear_pending_allocs(mm);
292
293 nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
294 }
295
296 return 0;
297}
298
299int nvgpu_vidmem_init(struct mm_gk20a *mm)
300{
301 struct gk20a *g = mm->g;
302 size_t size = g->ops.mm.get_vidmem_size ?
303 g->ops.mm.get_vidmem_size(g) : 0;
304 u64 bootstrap_base, bootstrap_size, base;
305 u64 default_page_size = SZ_64K;
306 int err;
307
308 static struct nvgpu_alloc_carveout wpr_co =
309 NVGPU_CARVEOUT("wpr-region", 0, SZ_16M);
310
311 if (!size)
312 return 0;
313
314 vidmem_dbg(g, "init begin");
315
316 wpr_co.base = size - SZ_256M;
317 bootstrap_base = wpr_co.base;
318 bootstrap_size = SZ_16M;
319 base = default_page_size;
320
321 /*
322 * Bootstrap allocator for use before the CE is initialized (CE
323 * initialization requires vidmem but we want to use the CE to zero
324 * out vidmem before allocating it...
325 */
326 err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator,
327 "vidmem-bootstrap",
328 bootstrap_base, bootstrap_size,
329 SZ_4K, 0);
330
331 err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator,
332 "vidmem",
333 base, size - base,
334 default_page_size,
335 GPU_ALLOC_4K_VIDMEM_PAGES);
336 if (err) {
337 nvgpu_err(g, "Failed to register vidmem for size %zu: %d",
338 size, err);
339 return err;
340 }
341
342 /* Reserve bootstrap region in vidmem allocator */
343 nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co);
344
345 mm->vidmem.base = base;
346 mm->vidmem.size = size - base;
347 mm->vidmem.bootstrap_base = bootstrap_base;
348 mm->vidmem.bootstrap_size = bootstrap_size;
349
350 err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond);
351 if (err)
352 goto fail;
353
354 nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
355 nvgpu_init_list_node(&mm->vidmem.clear_list_head);
356 nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
357 nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock);
358 nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
359 nvgpu_atomic_set(&mm->vidmem.pause_count, 0);
360
361 /*
362 * Start the thread off in the paused state. The thread doesn't have to
363 * be running for this to work. It will be woken up later on in
364 * finalize_poweron(). We won't necessarily have a CE context yet
365 * either, so hypothetically one could cause a race where we try to
366 * clear a vidmem struct before we have a CE context to do so.
367 */
368 nvgpu_vidmem_thread_pause_sync(mm);
369
370 err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm,
371 nvgpu_vidmem_clear_pending_allocs_thr,
372 "vidmem-clear");
373 if (err)
374 goto fail;
375
376 vidmem_dbg(g, "VIDMEM Total: %zu MB", size >> 20);
377 vidmem_dbg(g, "VIDMEM Ranges:");
378 vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx Primary",
379 mm->vidmem.base, mm->vidmem.base + mm->vidmem.size);
380 vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx Bootstrap",
381 mm->vidmem.bootstrap_base,
382 mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size);
383 vidmem_dbg(g, "VIDMEM carveouts:");
384 vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx %s",
385 wpr_co.base, wpr_co.base + wpr_co.length, wpr_co.name);
386
387 return 0;
388
389fail:
390 nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond);
391 nvgpu_vidmem_destroy(g);
392 return err;
393}
394
395int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space)
396{
397 struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator;
398
399 gk20a_dbg_fn("");
400
401 if (!nvgpu_alloc_initialized(allocator))
402 return -ENOSYS;
403
404 nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
405 *space = nvgpu_alloc_space(allocator) +
406 nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending);
407 nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
408 return 0;
409}
410
411int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
412{
413 struct gk20a_fence *gk20a_fence_out = NULL;
414 struct gk20a_fence *gk20a_last_fence = NULL;
415 struct nvgpu_page_alloc *alloc = NULL;
416 void *sgl = NULL;
417 int err = 0;
418
419 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
420 return -EINVAL;
421
422 alloc = mem->vidmem_alloc;
423
424 vidmem_dbg(g, "Clearing VIDMEM buf:");
425
426 nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) {
427 if (gk20a_last_fence)
428 gk20a_fence_put(gk20a_last_fence);
429
430 err = gk20a_ce_execute_ops(g,
431 g->mm.vidmem.ce_ctx_id,
432 0,
433 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
434 nvgpu_sgt_get_length(&alloc->sgt, sgl),
435 0x00000000,
436 NVGPU_CE_DST_LOCATION_LOCAL_FB,
437 NVGPU_CE_MEMSET,
438 NULL,
439 0,
440 &gk20a_fence_out);
441
442 if (err) {
443 nvgpu_err(g,
444 "Failed gk20a_ce_execute_ops[%d]", err);
445 return err;
446 }
447
448 vidmem_dbg(g, " > [0x%llx +0x%llx]",
449 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
450 nvgpu_sgt_get_length(&alloc->sgt, sgl));
451
452 gk20a_last_fence = gk20a_fence_out;
453 }
454
455 if (gk20a_last_fence) {
456 struct nvgpu_timeout timeout;
457
458 nvgpu_timeout_init(g, &timeout,
459 gk20a_get_gr_idle_timeout(g),
460 NVGPU_TIMER_CPU_TIMER);
461
462 do {
463 err = gk20a_fence_wait(g, gk20a_last_fence,
464 gk20a_get_gr_idle_timeout(g));
465 } while (err == -ERESTARTSYS &&
466 !nvgpu_timeout_expired(&timeout));
467
468 gk20a_fence_put(gk20a_last_fence);
469 if (err)
470 nvgpu_err(g,
471 "fence wait failed for CE execute ops");
472 }
473
474 vidmem_dbg(g, " Done");
475
476 return err;
477}
478
479static int nvgpu_vidmem_clear_all(struct gk20a *g)
480{
481 int err;
482
483 if (g->mm.vidmem.cleared)
484 return 0;
485
486 nvgpu_mutex_acquire(&g->mm.vidmem.first_clear_mutex);
487 if (!g->mm.vidmem.cleared) {
488 err = __nvgpu_vidmem_do_clear_all(g);
489 if (err) {
490 nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex);
491 nvgpu_err(g, "failed to clear whole vidmem");
492 return err;
493 }
494 }
495 nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex);
496
497 return 0;
498}
499
500struct nvgpu_vidmem_buf *nvgpu_vidmem_user_alloc(struct gk20a *g, size_t bytes)
501{
502 struct nvgpu_vidmem_buf *buf;
503 int err;
504
505 err = nvgpu_vidmem_clear_all(g);
506 if (err)
507 return NULL;
508
509 buf = nvgpu_kzalloc(g, sizeof(*buf));
510 if (!buf)
511 return NULL;
512
513 buf->g = g;
514 buf->mem = nvgpu_kzalloc(g, sizeof(*buf->mem));
515 if (!buf->mem)
516 goto fail;
517
518 err = nvgpu_dma_alloc_vid(g, bytes, buf->mem);
519 if (err)
520 goto fail;
521
522 /*
523 * Alerts the DMA API that when we free this vidmem buf we have to
524 * clear it to avoid leaking data to userspace.
525 */
526 buf->mem->mem_flags |= NVGPU_MEM_FLAG_USER_MEM;
527
528 return buf;
529
530fail:
531 /* buf will never be NULL here. */
532 nvgpu_kfree(g, buf->mem);
533 nvgpu_kfree(g, buf);
534 return NULL;
535}
536
537void nvgpu_vidmem_buf_free(struct gk20a *g, struct nvgpu_vidmem_buf *buf)
538{
539 /*
540 * In some error paths it's convenient to be able to "free" a NULL buf.
541 */
542 if (!buf)
543 return;
544
545 nvgpu_dma_free(g, buf->mem);
546
547 /*
548 * We don't free buf->mem here. This is handled by nvgpu_dma_free()!
549 * Since these buffers are cleared in the background the nvgpu_mem
550 * struct must live on through that. We transfer ownership here to the
551 * DMA API and let the DMA API free the buffer.
552 */
553 nvgpu_kfree(g, buf);
554}