diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/dma.c | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vidmem.c | 43 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vidmem.c | 215 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/vidmem.h | 21 |
8 files changed, 267 insertions, 67 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index b62c4593..9e9d1007 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c | |||
@@ -514,7 +514,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | |||
514 | static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | 514 | static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) |
515 | { | 515 | { |
516 | #if defined(CONFIG_GK20A_VIDMEM) | 516 | #if defined(CONFIG_GK20A_VIDMEM) |
517 | bool was_empty; | ||
518 | size_t mem_size = mem->size; | 517 | size_t mem_size = mem->size; |
519 | 518 | ||
520 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); | 519 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); |
@@ -523,18 +522,19 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | |||
523 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); | 522 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); |
524 | 523 | ||
525 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { | 524 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { |
526 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | 525 | int err = nvgpu_vidmem_clear_list_enqueue(g, mem); |
527 | was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); | 526 | |
528 | nvgpu_list_add_tail(&mem->clear_list_entry, | 527 | /* |
529 | &g->mm.vidmem.clear_list_head); | 528 | * If there's an error here then that means we can't clear the |
530 | atomic64_add(mem->aligned_size, | 529 | * vidmem. That's too bad; however, we still own the nvgpu_mem |
531 | &g->mm.vidmem.bytes_pending.atomic_var); | 530 | * buf so we have to free that. |
532 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | 531 | * |
533 | 532 | * We don't need to worry about the vidmem allocator itself | |
534 | if (was_empty) { | 533 | * since when that gets cleaned up in the driver shutdown path |
535 | cancel_work_sync(&g->mm.vidmem.clear_mem_worker); | 534 | * all the outstanding allocs are force freed. |
536 | schedule_work(&g->mm.vidmem.clear_mem_worker); | 535 | */ |
537 | } | 536 | if (err) |
537 | nvgpu_kfree(g, mem); | ||
538 | } else { | 538 | } else { |
539 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); | 539 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); |
540 | nvgpu_free(mem->allocator, | 540 | nvgpu_free(mem->allocator, |
diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c index ea8e552f..92e7e504 100644 --- a/drivers/gpu/nvgpu/common/linux/vidmem.c +++ b/drivers/gpu/nvgpu/common/linux/vidmem.c | |||
@@ -84,6 +84,8 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf) | |||
84 | 84 | ||
85 | nvgpu_kfree(g, linux_buf); | 85 | nvgpu_kfree(g, linux_buf); |
86 | nvgpu_vidmem_buf_free(g, buf); | 86 | nvgpu_vidmem_buf_free(g, buf); |
87 | |||
88 | gk20a_put(g); | ||
87 | } | 89 | } |
88 | 90 | ||
89 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) | 91 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) |
@@ -160,13 +162,21 @@ struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) | |||
160 | 162 | ||
161 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | 163 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) |
162 | { | 164 | { |
163 | struct nvgpu_vidmem_buf *buf; | 165 | struct nvgpu_vidmem_buf *buf = NULL; |
164 | struct nvgpu_vidmem_linux *priv; | 166 | struct nvgpu_vidmem_linux *priv; |
165 | int err, fd; | 167 | int err, fd; |
166 | 168 | ||
169 | /* | ||
170 | * This ref is released when the dma_buf is closed. | ||
171 | */ | ||
172 | if (!gk20a_get(g)) | ||
173 | return -ENODEV; | ||
174 | |||
167 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | 175 | priv = nvgpu_kzalloc(g, sizeof(*priv)); |
168 | if (!priv) | 176 | if (!priv) { |
169 | return -ENOMEM; | 177 | err = -ENOMEM; |
178 | goto fail; | ||
179 | } | ||
170 | 180 | ||
171 | buf = nvgpu_vidmem_user_alloc(g, bytes); | 181 | buf = nvgpu_vidmem_user_alloc(g, bytes); |
172 | if (!buf) { | 182 | if (!buf) { |
@@ -195,8 +205,10 @@ int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | |||
195 | return fd; | 205 | return fd; |
196 | 206 | ||
197 | fail: | 207 | fail: |
198 | nvgpu_kfree(g, priv); | ||
199 | nvgpu_vidmem_buf_free(g, buf); | 208 | nvgpu_vidmem_buf_free(g, buf); |
209 | nvgpu_kfree(g, priv); | ||
210 | gk20a_put(g); | ||
211 | |||
200 | return err; | 212 | return err; |
201 | } | 213 | } |
202 | 214 | ||
@@ -229,24 +241,9 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | |||
229 | return err; | 241 | return err; |
230 | } | 242 | } |
231 | 243 | ||
232 | void nvgpu_vidmem_clear_mem_worker(struct work_struct *work) | 244 | void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) |
233 | { | 245 | { |
234 | struct mm_gk20a *mm = container_of(work, struct mm_gk20a, | 246 | nvgpu_free(vidmem->allocator, |
235 | vidmem.clear_mem_worker); | 247 | (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); |
236 | struct gk20a *g = mm->g; | 248 | nvgpu_free_sgtable(g, &vidmem->priv.sgt); |
237 | struct nvgpu_mem *mem; | ||
238 | |||
239 | while ((mem = nvgpu_vidmem_get_pending_alloc(mm)) != NULL) { | ||
240 | nvgpu_vidmem_clear(g, mem); | ||
241 | nvgpu_free(mem->allocator, | ||
242 | (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); | ||
243 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
244 | |||
245 | WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, | ||
246 | &g->mm.vidmem.bytes_pending) < 0); | ||
247 | mem->size = 0; | ||
248 | mem->aperture = APERTURE_INVALID; | ||
249 | |||
250 | nvgpu_kfree(g, mem); | ||
251 | } | ||
252 | } | 249 | } |
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c index d1c5a2e8..60b819d7 100644 --- a/drivers/gpu/nvgpu/common/mm/vidmem.c +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c | |||
@@ -22,15 +22,55 @@ | |||
22 | 22 | ||
23 | #include <linux/scatterlist.h> | 23 | #include <linux/scatterlist.h> |
24 | 24 | ||
25 | #include <nvgpu/timers.h> | ||
25 | #include <nvgpu/dma.h> | 26 | #include <nvgpu/dma.h> |
26 | #include <nvgpu/vidmem.h> | 27 | #include <nvgpu/vidmem.h> |
27 | #include <nvgpu/page_allocator.h> | 28 | #include <nvgpu/page_allocator.h> |
29 | #include <nvgpu/enabled.h> | ||
28 | 30 | ||
29 | #include "gk20a/gk20a.h" | 31 | #include "gk20a/gk20a.h" |
30 | #include "gk20a/mm_gk20a.h" | 32 | #include "gk20a/mm_gk20a.h" |
31 | 33 | ||
34 | /* | ||
35 | * This is expected to be called from the shutdown path (or the error path in | ||
36 | * the vidmem init code). As such we do not expect new vidmem frees to be | ||
37 | * enqueued. | ||
38 | */ | ||
32 | void nvgpu_vidmem_destroy(struct gk20a *g) | 39 | void nvgpu_vidmem_destroy(struct gk20a *g) |
33 | { | 40 | { |
41 | struct nvgpu_timeout timeout; | ||
42 | |||
43 | nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER); | ||
44 | |||
45 | /* | ||
46 | * Ensure that the thread runs one last time to flush anything in the | ||
47 | * queue. | ||
48 | */ | ||
49 | nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond); | ||
50 | |||
51 | /* | ||
52 | * Wait for at most 1 second before just continuing on. It doesn't make | ||
53 | * sense to hang the system over some potential memory leaks. | ||
54 | */ | ||
55 | do { | ||
56 | bool empty; | ||
57 | |||
58 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | ||
59 | empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); | ||
60 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | ||
61 | |||
62 | if (empty) | ||
63 | break; | ||
64 | |||
65 | nvgpu_msleep(10); | ||
66 | } while (!nvgpu_timeout_expired(&timeout)); | ||
67 | |||
68 | /* | ||
69 | * Kill the vidmem clearing thread now. This will wake the thread up | ||
70 | * automatically and cause the wait_interruptible condition trigger. | ||
71 | */ | ||
72 | nvgpu_thread_stop(&g->mm.vidmem.clearing_thread); | ||
73 | |||
34 | if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) | 74 | if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) |
35 | nvgpu_alloc_destroy(&g->mm.vidmem.allocator); | 75 | nvgpu_alloc_destroy(&g->mm.vidmem.allocator); |
36 | } | 76 | } |
@@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g) | |||
107 | return 0; | 147 | return 0; |
108 | } | 148 | } |
109 | 149 | ||
150 | void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) | ||
151 | { | ||
152 | /* | ||
153 | * On the first increment of the pause_count (0 -> 1) take the pause | ||
154 | * lock and prevent the vidmem clearing thread from processing work | ||
155 | * items. | ||
156 | * | ||
157 | * Otherwise the increment is all that's needed - it's essentially a | ||
158 | * ref-count for the number of pause() calls. | ||
159 | * | ||
160 | * The sync component is implemented by waiting for the lock to be | ||
161 | * released by the clearing thread in case the thread is currently | ||
162 | * processing work items. | ||
163 | */ | ||
164 | if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1) | ||
165 | nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock); | ||
166 | } | ||
167 | |||
168 | void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) | ||
169 | { | ||
170 | /* | ||
171 | * And on the last decrement (1 -> 0) release the pause lock and let | ||
172 | * the vidmem clearing thread continue. | ||
173 | */ | ||
174 | if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) | ||
175 | nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); | ||
176 | } | ||
177 | |||
178 | int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem) | ||
179 | { | ||
180 | struct mm_gk20a *mm = &g->mm; | ||
181 | |||
182 | /* | ||
183 | * Crap. Can't enqueue new vidmem bufs! CE may be gone! | ||
184 | * | ||
185 | * However, an errant app can hold a vidmem dma_buf FD open past when | ||
186 | * the nvgpu driver has exited. Thus when the FD does get closed | ||
187 | * eventually the dma_buf release function will try to call the vidmem | ||
188 | * free function which will attempt to enqueue the vidmem into the | ||
189 | * vidmem clearing thread. | ||
190 | */ | ||
191 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
192 | return -ENOSYS; | ||
193 | |||
194 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
195 | nvgpu_list_add_tail(&mem->clear_list_entry, | ||
196 | &mm->vidmem.clear_list_head); | ||
197 | nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending); | ||
198 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
199 | |||
200 | nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond); | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm) | ||
206 | { | ||
207 | struct nvgpu_mem *mem = NULL; | ||
208 | |||
209 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
210 | if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { | ||
211 | mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, | ||
212 | nvgpu_mem, clear_list_entry); | ||
213 | nvgpu_list_del(&mem->clear_list_entry); | ||
214 | } | ||
215 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
216 | |||
217 | return mem; | ||
218 | } | ||
219 | |||
220 | static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm) | ||
221 | { | ||
222 | struct gk20a *g = mm->g; | ||
223 | struct nvgpu_mem *mem; | ||
224 | |||
225 | while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) { | ||
226 | nvgpu_vidmem_clear(g, mem); | ||
227 | |||
228 | WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, | ||
229 | &g->mm.vidmem.bytes_pending) < 0); | ||
230 | mem->size = 0; | ||
231 | mem->aperture = APERTURE_INVALID; | ||
232 | |||
233 | __nvgpu_mem_free_vidmem_alloc(g, mem); | ||
234 | nvgpu_kfree(g, mem); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr) | ||
239 | { | ||
240 | struct mm_gk20a *mm = mm_ptr; | ||
241 | |||
242 | /* | ||
243 | * Simple thread who's sole job is to periodically clear userspace | ||
244 | * vidmem allocations that have been recently freed. | ||
245 | * | ||
246 | * Since it doesn't make sense to run unless there's pending work a | ||
247 | * condition field is used to wait for work. When the DMA API frees a | ||
248 | * userspace vidmem buf it enqueues it into the clear list and alerts us | ||
249 | * that we have some work to do. | ||
250 | */ | ||
251 | |||
252 | while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) { | ||
253 | int ret; | ||
254 | |||
255 | /* | ||
256 | * Wait for work but also make sure we should not be paused. | ||
257 | */ | ||
258 | ret = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
259 | &mm->vidmem.clearing_thread_cond, | ||
260 | nvgpu_thread_should_stop( | ||
261 | &mm->vidmem.clearing_thread) || | ||
262 | !nvgpu_list_empty(&mm->vidmem.clear_list_head), | ||
263 | 0); | ||
264 | if (ret == -ERESTARTSYS) | ||
265 | continue; | ||
266 | |||
267 | /* | ||
268 | * Use this lock to implement a pause mechanism. By taking this | ||
269 | * lock some other code can prevent this thread from processing | ||
270 | * work items. | ||
271 | */ | ||
272 | if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock)) | ||
273 | continue; | ||
274 | |||
275 | nvgpu_vidmem_clear_pending_allocs(mm); | ||
276 | |||
277 | nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); | ||
278 | } | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
110 | int nvgpu_vidmem_init(struct mm_gk20a *mm) | 283 | int nvgpu_vidmem_init(struct mm_gk20a *mm) |
111 | { | 284 | { |
112 | struct gk20a *g = mm->g; | 285 | struct gk20a *g = mm->g; |
@@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm) | |||
156 | mm->vidmem.bootstrap_base = bootstrap_base; | 329 | mm->vidmem.bootstrap_base = bootstrap_base; |
157 | mm->vidmem.bootstrap_size = bootstrap_size; | 330 | mm->vidmem.bootstrap_size = bootstrap_size; |
158 | 331 | ||
159 | nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); | 332 | err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond); |
333 | if (err) | ||
334 | goto fail; | ||
160 | 335 | ||
161 | INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker); | ||
162 | nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); | 336 | nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); |
163 | nvgpu_init_list_node(&mm->vidmem.clear_list_head); | 337 | nvgpu_init_list_node(&mm->vidmem.clear_list_head); |
164 | nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); | 338 | nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); |
339 | nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock); | ||
340 | nvgpu_atomic_set(&mm->vidmem.pause_count, 0); | ||
341 | |||
342 | /* | ||
343 | * Start the thread off in the paused state. The thread doesn't have to | ||
344 | * be running for this to work. It will be woken up later on in | ||
345 | * finalize_poweron(). We won't necessarily have a CE context yet | ||
346 | * either, so hypothetically one could cause a race where we try to | ||
347 | * clear a vidmem struct before we have a CE context to do so. | ||
348 | */ | ||
349 | nvgpu_vidmem_thread_pause_sync(mm); | ||
350 | |||
351 | err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm, | ||
352 | nvgpu_vidmem_clear_pending_allocs_thr, | ||
353 | "vidmem-clear"); | ||
354 | if (err) | ||
355 | goto fail; | ||
165 | 356 | ||
166 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); | 357 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); |
167 | 358 | ||
168 | return 0; | 359 | return 0; |
360 | |||
361 | fail: | ||
362 | nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond); | ||
363 | nvgpu_vidmem_destroy(g); | ||
364 | return err; | ||
169 | } | 365 | } |
170 | 366 | ||
171 | int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) | 367 | int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) |
@@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem) | |||
244 | return err; | 440 | return err; |
245 | } | 441 | } |
246 | 442 | ||
247 | struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm) | ||
248 | { | ||
249 | struct nvgpu_mem *mem = NULL; | ||
250 | |||
251 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
252 | if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { | ||
253 | mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, | ||
254 | nvgpu_mem, clear_list_entry); | ||
255 | nvgpu_list_del(&mem->clear_list_entry); | ||
256 | } | ||
257 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
258 | |||
259 | return mem; | ||
260 | } | ||
261 | |||
262 | static int nvgpu_vidmem_clear_all(struct gk20a *g) | 443 | static int nvgpu_vidmem_clear_all(struct gk20a *g) |
263 | { | 444 | { |
264 | int err; | 445 | int err; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index e1bf2b4b..02baf683 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <nvgpu/pmu.h> | 33 | #include <nvgpu/pmu.h> |
34 | #include <nvgpu/gmmu.h> | 34 | #include <nvgpu/gmmu.h> |
35 | #include <nvgpu/ltc.h> | 35 | #include <nvgpu/ltc.h> |
36 | #include <nvgpu/vidmem.h> | ||
36 | 37 | ||
37 | #include <trace/events/gk20a.h> | 38 | #include <trace/events/gk20a.h> |
38 | 39 | ||
@@ -97,8 +98,6 @@ int gk20a_prepare_poweroff(struct gk20a *g) | |||
97 | if (gk20a_fifo_is_engine_busy(g)) | 98 | if (gk20a_fifo_is_engine_busy(g)) |
98 | return -EBUSY; | 99 | return -EBUSY; |
99 | 100 | ||
100 | gk20a_ce_suspend(g); | ||
101 | |||
102 | ret = gk20a_channel_suspend(g); | 101 | ret = gk20a_channel_suspend(g); |
103 | if (ret) | 102 | if (ret) |
104 | return ret; | 103 | return ret; |
@@ -111,6 +110,8 @@ int gk20a_prepare_poweroff(struct gk20a *g) | |||
111 | ret |= gk20a_mm_suspend(g); | 110 | ret |= gk20a_mm_suspend(g); |
112 | ret |= gk20a_fifo_suspend(g); | 111 | ret |= gk20a_fifo_suspend(g); |
113 | 112 | ||
113 | gk20a_ce_suspend(g); | ||
114 | |||
114 | /* Disable GPCPLL */ | 115 | /* Disable GPCPLL */ |
115 | if (g->ops.clk.suspend_clk_support) | 116 | if (g->ops.clk.suspend_clk_support) |
116 | ret |= g->ops.clk.suspend_clk_support(g); | 117 | ret |= g->ops.clk.suspend_clk_support(g); |
@@ -323,6 +324,8 @@ int gk20a_finalize_poweron(struct gk20a *g) | |||
323 | } | 324 | } |
324 | } | 325 | } |
325 | 326 | ||
327 | nvgpu_vidmem_thread_unpause(&g->mm); | ||
328 | |||
326 | #if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) | 329 | #if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) |
327 | if (gk20a_platform_has_syncpoints(g) && g->syncpt_unit_size) { | 330 | if (gk20a_platform_has_syncpoints(g) && g->syncpt_unit_size) { |
328 | if (!nvgpu_mem_is_valid(&g->syncpt_mem)) { | 331 | if (!nvgpu_mem_is_valid(&g->syncpt_mem)) { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 687951a9..67ab307f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -978,9 +978,7 @@ int gk20a_mm_suspend(struct gk20a *g) | |||
978 | { | 978 | { |
979 | gk20a_dbg_fn(""); | 979 | gk20a_dbg_fn(""); |
980 | 980 | ||
981 | #if defined(CONFIG_GK20A_VIDMEM) | 981 | nvgpu_vidmem_thread_pause_sync(&g->mm); |
982 | cancel_work_sync(&g->mm.vidmem.clear_mem_worker); | ||
983 | #endif | ||
984 | 982 | ||
985 | g->ops.mm.cbc_clean(g); | 983 | g->ops.mm.cbc_clean(g); |
986 | g->ops.mm.l2_flush(g, false); | 984 | g->ops.mm.l2_flush(g, false); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 556cb234..13698cd7 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -36,6 +36,8 @@ | |||
36 | #include <nvgpu/rbtree.h> | 36 | #include <nvgpu/rbtree.h> |
37 | #include <nvgpu/kref.h> | 37 | #include <nvgpu/kref.h> |
38 | #include <nvgpu/atomic.h> | 38 | #include <nvgpu/atomic.h> |
39 | #include <nvgpu/cond.h> | ||
40 | #include <nvgpu/thread.h> | ||
39 | 41 | ||
40 | struct nvgpu_pd_cache; | 42 | struct nvgpu_pd_cache; |
41 | 43 | ||
@@ -272,7 +274,11 @@ struct mm_gk20a { | |||
272 | struct nvgpu_list_node clear_list_head; | 274 | struct nvgpu_list_node clear_list_head; |
273 | struct nvgpu_mutex clear_list_mutex; | 275 | struct nvgpu_mutex clear_list_mutex; |
274 | 276 | ||
275 | struct work_struct clear_mem_worker; | 277 | struct nvgpu_cond clearing_thread_cond; |
278 | struct nvgpu_thread clearing_thread; | ||
279 | struct nvgpu_mutex clearing_thread_lock; | ||
280 | nvgpu_atomic_t pause_count; | ||
281 | |||
276 | nvgpu_atomic64_t bytes_pending; | 282 | nvgpu_atomic64_t bytes_pending; |
277 | } vidmem; | 283 | } vidmem; |
278 | }; | 284 | }; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 537409a8..6feacff7 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | |||
@@ -273,6 +273,16 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, | |||
273 | int start_page, int nr_pages); | 273 | int start_page, int nr_pages); |
274 | 274 | ||
275 | /* | 275 | /* |
276 | * Really free a vidmem buffer. There's a fair amount of work involved in | ||
277 | * freeing vidmem buffers in the DMA API. This handles none of that - it only | ||
278 | * frees the underlying vidmem specific structures used in vidmem buffers. | ||
279 | * | ||
280 | * This is implemented in the OS specific code. If it's not necessary it can | ||
281 | * be a noop. But the symbol must at least be present. | ||
282 | */ | ||
283 | void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem); | ||
284 | |||
285 | /* | ||
276 | * Buffer accessors - wrap between begin() and end() if there is no permanent | 286 | * Buffer accessors - wrap between begin() and end() if there is no permanent |
277 | * kernel mapping for this buffer. | 287 | * kernel mapping for this buffer. |
278 | */ | 288 | */ |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vidmem.h b/drivers/gpu/nvgpu/include/nvgpu/vidmem.h index 9e9f8301..690f8164 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vidmem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vidmem.h | |||
@@ -73,17 +73,19 @@ struct nvgpu_vidmem_buf *nvgpu_vidmem_user_alloc(struct gk20a *g, size_t bytes); | |||
73 | 73 | ||
74 | void nvgpu_vidmem_buf_free(struct gk20a *g, struct nvgpu_vidmem_buf *buf); | 74 | void nvgpu_vidmem_buf_free(struct gk20a *g, struct nvgpu_vidmem_buf *buf); |
75 | 75 | ||
76 | int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem); | ||
77 | |||
76 | bool nvgpu_addr_is_vidmem_page_alloc(u64 addr); | 78 | bool nvgpu_addr_is_vidmem_page_alloc(u64 addr); |
77 | int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space); | 79 | int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space); |
78 | 80 | ||
79 | struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm); | ||
80 | |||
81 | void nvgpu_vidmem_destroy(struct gk20a *g); | 81 | void nvgpu_vidmem_destroy(struct gk20a *g); |
82 | int nvgpu_vidmem_init(struct mm_gk20a *mm); | 82 | int nvgpu_vidmem_init(struct mm_gk20a *mm); |
83 | 83 | ||
84 | void nvgpu_vidmem_clear_mem_worker(struct work_struct *work); | ||
85 | int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem); | 84 | int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem); |
86 | 85 | ||
86 | void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm); | ||
87 | void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm); | ||
88 | |||
87 | #else /* !defined(CONFIG_GK20A_VIDMEM) */ | 89 | #else /* !defined(CONFIG_GK20A_VIDMEM) */ |
88 | 90 | ||
89 | /* | 91 | /* |
@@ -110,11 +112,6 @@ static inline int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) | |||
110 | return -ENOSYS; | 112 | return -ENOSYS; |
111 | } | 113 | } |
112 | 114 | ||
113 | static inline struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm) | ||
114 | { | ||
115 | return NULL; | ||
116 | } | ||
117 | |||
118 | static inline void nvgpu_vidmem_destroy(struct gk20a *g) | 115 | static inline void nvgpu_vidmem_destroy(struct gk20a *g) |
119 | { | 116 | { |
120 | } | 117 | } |
@@ -135,6 +132,14 @@ static inline int nvgpu_vidmem_clear(struct gk20a *g, | |||
135 | return -ENOSYS; | 132 | return -ENOSYS; |
136 | } | 133 | } |
137 | 134 | ||
135 | static inline void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) | ||
136 | { | ||
137 | } | ||
138 | |||
139 | static inline void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) | ||
140 | { | ||
141 | } | ||
142 | |||
138 | #endif /* !defined(CONFIG_GK20A_VIDMEM) */ | 143 | #endif /* !defined(CONFIG_GK20A_VIDMEM) */ |
139 | 144 | ||
140 | #endif /* __NVGPU_VIDMEM_H__ */ | 145 | #endif /* __NVGPU_VIDMEM_H__ */ |