diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/dma.c | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vidmem.c | 43 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vidmem.c | 215 |
3 files changed, 231 insertions, 53 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index b62c4593..9e9d1007 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c | |||
@@ -514,7 +514,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | |||
514 | static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | 514 | static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) |
515 | { | 515 | { |
516 | #if defined(CONFIG_GK20A_VIDMEM) | 516 | #if defined(CONFIG_GK20A_VIDMEM) |
517 | bool was_empty; | ||
518 | size_t mem_size = mem->size; | 517 | size_t mem_size = mem->size; |
519 | 518 | ||
520 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); | 519 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); |
@@ -523,18 +522,19 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | |||
523 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); | 522 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); |
524 | 523 | ||
525 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { | 524 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { |
526 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | 525 | int err = nvgpu_vidmem_clear_list_enqueue(g, mem); |
527 | was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); | 526 | |
528 | nvgpu_list_add_tail(&mem->clear_list_entry, | 527 | /* |
529 | &g->mm.vidmem.clear_list_head); | 528 | * If there's an error here then that means we can't clear the |
530 | atomic64_add(mem->aligned_size, | 529 | * vidmem. That's too bad; however, we still own the nvgpu_mem |
531 | &g->mm.vidmem.bytes_pending.atomic_var); | 530 | * buf so we have to free that. |
532 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | 531 | * |
533 | 532 | * We don't need to worry about the vidmem allocator itself | |
534 | if (was_empty) { | 533 | * since when that gets cleaned up in the driver shutdown path |
535 | cancel_work_sync(&g->mm.vidmem.clear_mem_worker); | 534 | * all the outstanding allocs are force freed. |
536 | schedule_work(&g->mm.vidmem.clear_mem_worker); | 535 | */ |
537 | } | 536 | if (err) |
537 | nvgpu_kfree(g, mem); | ||
538 | } else { | 538 | } else { |
539 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); | 539 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); |
540 | nvgpu_free(mem->allocator, | 540 | nvgpu_free(mem->allocator, |
diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c index ea8e552f..92e7e504 100644 --- a/drivers/gpu/nvgpu/common/linux/vidmem.c +++ b/drivers/gpu/nvgpu/common/linux/vidmem.c | |||
@@ -84,6 +84,8 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf) | |||
84 | 84 | ||
85 | nvgpu_kfree(g, linux_buf); | 85 | nvgpu_kfree(g, linux_buf); |
86 | nvgpu_vidmem_buf_free(g, buf); | 86 | nvgpu_vidmem_buf_free(g, buf); |
87 | |||
88 | gk20a_put(g); | ||
87 | } | 89 | } |
88 | 90 | ||
89 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) | 91 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) |
@@ -160,13 +162,21 @@ struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) | |||
160 | 162 | ||
161 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | 163 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) |
162 | { | 164 | { |
163 | struct nvgpu_vidmem_buf *buf; | 165 | struct nvgpu_vidmem_buf *buf = NULL; |
164 | struct nvgpu_vidmem_linux *priv; | 166 | struct nvgpu_vidmem_linux *priv; |
165 | int err, fd; | 167 | int err, fd; |
166 | 168 | ||
169 | /* | ||
170 | * This ref is released when the dma_buf is closed. | ||
171 | */ | ||
172 | if (!gk20a_get(g)) | ||
173 | return -ENODEV; | ||
174 | |||
167 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | 175 | priv = nvgpu_kzalloc(g, sizeof(*priv)); |
168 | if (!priv) | 176 | if (!priv) { |
169 | return -ENOMEM; | 177 | err = -ENOMEM; |
178 | goto fail; | ||
179 | } | ||
170 | 180 | ||
171 | buf = nvgpu_vidmem_user_alloc(g, bytes); | 181 | buf = nvgpu_vidmem_user_alloc(g, bytes); |
172 | if (!buf) { | 182 | if (!buf) { |
@@ -195,8 +205,10 @@ int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | |||
195 | return fd; | 205 | return fd; |
196 | 206 | ||
197 | fail: | 207 | fail: |
198 | nvgpu_kfree(g, priv); | ||
199 | nvgpu_vidmem_buf_free(g, buf); | 208 | nvgpu_vidmem_buf_free(g, buf); |
209 | nvgpu_kfree(g, priv); | ||
210 | gk20a_put(g); | ||
211 | |||
200 | return err; | 212 | return err; |
201 | } | 213 | } |
202 | 214 | ||
@@ -229,24 +241,9 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | |||
229 | return err; | 241 | return err; |
230 | } | 242 | } |
231 | 243 | ||
232 | void nvgpu_vidmem_clear_mem_worker(struct work_struct *work) | 244 | void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) |
233 | { | 245 | { |
234 | struct mm_gk20a *mm = container_of(work, struct mm_gk20a, | 246 | nvgpu_free(vidmem->allocator, |
235 | vidmem.clear_mem_worker); | 247 | (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); |
236 | struct gk20a *g = mm->g; | 248 | nvgpu_free_sgtable(g, &vidmem->priv.sgt); |
237 | struct nvgpu_mem *mem; | ||
238 | |||
239 | while ((mem = nvgpu_vidmem_get_pending_alloc(mm)) != NULL) { | ||
240 | nvgpu_vidmem_clear(g, mem); | ||
241 | nvgpu_free(mem->allocator, | ||
242 | (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); | ||
243 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
244 | |||
245 | WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, | ||
246 | &g->mm.vidmem.bytes_pending) < 0); | ||
247 | mem->size = 0; | ||
248 | mem->aperture = APERTURE_INVALID; | ||
249 | |||
250 | nvgpu_kfree(g, mem); | ||
251 | } | ||
252 | } | 249 | } |
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c index d1c5a2e8..60b819d7 100644 --- a/drivers/gpu/nvgpu/common/mm/vidmem.c +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c | |||
@@ -22,15 +22,55 @@ | |||
22 | 22 | ||
23 | #include <linux/scatterlist.h> | 23 | #include <linux/scatterlist.h> |
24 | 24 | ||
25 | #include <nvgpu/timers.h> | ||
25 | #include <nvgpu/dma.h> | 26 | #include <nvgpu/dma.h> |
26 | #include <nvgpu/vidmem.h> | 27 | #include <nvgpu/vidmem.h> |
27 | #include <nvgpu/page_allocator.h> | 28 | #include <nvgpu/page_allocator.h> |
29 | #include <nvgpu/enabled.h> | ||
28 | 30 | ||
29 | #include "gk20a/gk20a.h" | 31 | #include "gk20a/gk20a.h" |
30 | #include "gk20a/mm_gk20a.h" | 32 | #include "gk20a/mm_gk20a.h" |
31 | 33 | ||
34 | /* | ||
35 | * This is expected to be called from the shutdown path (or the error path in | ||
36 | * the vidmem init code). As such we do not expect new vidmem frees to be | ||
37 | * enqueued. | ||
38 | */ | ||
32 | void nvgpu_vidmem_destroy(struct gk20a *g) | 39 | void nvgpu_vidmem_destroy(struct gk20a *g) |
33 | { | 40 | { |
41 | struct nvgpu_timeout timeout; | ||
42 | |||
43 | nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER); | ||
44 | |||
45 | /* | ||
46 | * Ensure that the thread runs one last time to flush anything in the | ||
47 | * queue. | ||
48 | */ | ||
49 | nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond); | ||
50 | |||
51 | /* | ||
52 | * Wait for at most 1 second before just continuing on. It doesn't make | ||
53 | * sense to hang the system over some potential memory leaks. | ||
54 | */ | ||
55 | do { | ||
56 | bool empty; | ||
57 | |||
58 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | ||
59 | empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); | ||
60 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | ||
61 | |||
62 | if (empty) | ||
63 | break; | ||
64 | |||
65 | nvgpu_msleep(10); | ||
66 | } while (!nvgpu_timeout_expired(&timeout)); | ||
67 | |||
68 | /* | ||
69 | * Kill the vidmem clearing thread now. This will wake the thread up | ||
70 | * automatically and cause the wait_interruptible condition trigger. | ||
71 | */ | ||
72 | nvgpu_thread_stop(&g->mm.vidmem.clearing_thread); | ||
73 | |||
34 | if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) | 74 | if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) |
35 | nvgpu_alloc_destroy(&g->mm.vidmem.allocator); | 75 | nvgpu_alloc_destroy(&g->mm.vidmem.allocator); |
36 | } | 76 | } |
@@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g) | |||
107 | return 0; | 147 | return 0; |
108 | } | 148 | } |
109 | 149 | ||
150 | void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) | ||
151 | { | ||
152 | /* | ||
153 | * On the first increment of the pause_count (0 -> 1) take the pause | ||
154 | * lock and prevent the vidmem clearing thread from processing work | ||
155 | * items. | ||
156 | * | ||
157 | * Otherwise the increment is all that's needed - it's essentially a | ||
158 | * ref-count for the number of pause() calls. | ||
159 | * | ||
160 | * The sync component is implemented by waiting for the lock to be | ||
161 | * released by the clearing thread in case the thread is currently | ||
162 | * processing work items. | ||
163 | */ | ||
164 | if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1) | ||
165 | nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock); | ||
166 | } | ||
167 | |||
168 | void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) | ||
169 | { | ||
170 | /* | ||
171 | * And on the last decrement (1 -> 0) release the pause lock and let | ||
172 | * the vidmem clearing thread continue. | ||
173 | */ | ||
174 | if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) | ||
175 | nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); | ||
176 | } | ||
177 | |||
178 | int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem) | ||
179 | { | ||
180 | struct mm_gk20a *mm = &g->mm; | ||
181 | |||
182 | /* | ||
183 | * Crap. Can't enqueue new vidmem bufs! CE may be gone! | ||
184 | * | ||
185 | * However, an errant app can hold a vidmem dma_buf FD open past when | ||
186 | * the nvgpu driver has exited. Thus when the FD does get closed | ||
187 | * eventually the dma_buf release function will try to call the vidmem | ||
188 | * free function which will attempt to enqueue the vidmem into the | ||
189 | * vidmem clearing thread. | ||
190 | */ | ||
191 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
192 | return -ENOSYS; | ||
193 | |||
194 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
195 | nvgpu_list_add_tail(&mem->clear_list_entry, | ||
196 | &mm->vidmem.clear_list_head); | ||
197 | nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending); | ||
198 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
199 | |||
200 | nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond); | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm) | ||
206 | { | ||
207 | struct nvgpu_mem *mem = NULL; | ||
208 | |||
209 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
210 | if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { | ||
211 | mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, | ||
212 | nvgpu_mem, clear_list_entry); | ||
213 | nvgpu_list_del(&mem->clear_list_entry); | ||
214 | } | ||
215 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
216 | |||
217 | return mem; | ||
218 | } | ||
219 | |||
220 | static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm) | ||
221 | { | ||
222 | struct gk20a *g = mm->g; | ||
223 | struct nvgpu_mem *mem; | ||
224 | |||
225 | while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) { | ||
226 | nvgpu_vidmem_clear(g, mem); | ||
227 | |||
228 | WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, | ||
229 | &g->mm.vidmem.bytes_pending) < 0); | ||
230 | mem->size = 0; | ||
231 | mem->aperture = APERTURE_INVALID; | ||
232 | |||
233 | __nvgpu_mem_free_vidmem_alloc(g, mem); | ||
234 | nvgpu_kfree(g, mem); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr) | ||
239 | { | ||
240 | struct mm_gk20a *mm = mm_ptr; | ||
241 | |||
242 | /* | ||
243 | * Simple thread who's sole job is to periodically clear userspace | ||
244 | * vidmem allocations that have been recently freed. | ||
245 | * | ||
246 | * Since it doesn't make sense to run unless there's pending work a | ||
247 | * condition field is used to wait for work. When the DMA API frees a | ||
248 | * userspace vidmem buf it enqueues it into the clear list and alerts us | ||
249 | * that we have some work to do. | ||
250 | */ | ||
251 | |||
252 | while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) { | ||
253 | int ret; | ||
254 | |||
255 | /* | ||
256 | * Wait for work but also make sure we should not be paused. | ||
257 | */ | ||
258 | ret = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
259 | &mm->vidmem.clearing_thread_cond, | ||
260 | nvgpu_thread_should_stop( | ||
261 | &mm->vidmem.clearing_thread) || | ||
262 | !nvgpu_list_empty(&mm->vidmem.clear_list_head), | ||
263 | 0); | ||
264 | if (ret == -ERESTARTSYS) | ||
265 | continue; | ||
266 | |||
267 | /* | ||
268 | * Use this lock to implement a pause mechanism. By taking this | ||
269 | * lock some other code can prevent this thread from processing | ||
270 | * work items. | ||
271 | */ | ||
272 | if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock)) | ||
273 | continue; | ||
274 | |||
275 | nvgpu_vidmem_clear_pending_allocs(mm); | ||
276 | |||
277 | nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); | ||
278 | } | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
110 | int nvgpu_vidmem_init(struct mm_gk20a *mm) | 283 | int nvgpu_vidmem_init(struct mm_gk20a *mm) |
111 | { | 284 | { |
112 | struct gk20a *g = mm->g; | 285 | struct gk20a *g = mm->g; |
@@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm) | |||
156 | mm->vidmem.bootstrap_base = bootstrap_base; | 329 | mm->vidmem.bootstrap_base = bootstrap_base; |
157 | mm->vidmem.bootstrap_size = bootstrap_size; | 330 | mm->vidmem.bootstrap_size = bootstrap_size; |
158 | 331 | ||
159 | nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); | 332 | err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond); |
333 | if (err) | ||
334 | goto fail; | ||
160 | 335 | ||
161 | INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker); | ||
162 | nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); | 336 | nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); |
163 | nvgpu_init_list_node(&mm->vidmem.clear_list_head); | 337 | nvgpu_init_list_node(&mm->vidmem.clear_list_head); |
164 | nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); | 338 | nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); |
339 | nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock); | ||
340 | nvgpu_atomic_set(&mm->vidmem.pause_count, 0); | ||
341 | |||
342 | /* | ||
343 | * Start the thread off in the paused state. The thread doesn't have to | ||
344 | * be running for this to work. It will be woken up later on in | ||
345 | * finalize_poweron(). We won't necessarily have a CE context yet | ||
346 | * either, so hypothetically one could cause a race where we try to | ||
347 | * clear a vidmem struct before we have a CE context to do so. | ||
348 | */ | ||
349 | nvgpu_vidmem_thread_pause_sync(mm); | ||
350 | |||
351 | err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm, | ||
352 | nvgpu_vidmem_clear_pending_allocs_thr, | ||
353 | "vidmem-clear"); | ||
354 | if (err) | ||
355 | goto fail; | ||
165 | 356 | ||
166 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); | 357 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); |
167 | 358 | ||
168 | return 0; | 359 | return 0; |
360 | |||
361 | fail: | ||
362 | nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond); | ||
363 | nvgpu_vidmem_destroy(g); | ||
364 | return err; | ||
169 | } | 365 | } |
170 | 366 | ||
171 | int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) | 367 | int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) |
@@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem) | |||
244 | return err; | 440 | return err; |
245 | } | 441 | } |
246 | 442 | ||
247 | struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm) | ||
248 | { | ||
249 | struct nvgpu_mem *mem = NULL; | ||
250 | |||
251 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
252 | if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { | ||
253 | mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, | ||
254 | nvgpu_mem, clear_list_entry); | ||
255 | nvgpu_list_del(&mem->clear_list_entry); | ||
256 | } | ||
257 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
258 | |||
259 | return mem; | ||
260 | } | ||
261 | |||
262 | static int nvgpu_vidmem_clear_all(struct gk20a *g) | 443 | static int nvgpu_vidmem_clear_all(struct gk20a *g) |
263 | { | 444 | { |
264 | int err; | 445 | int err; |