diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vidmem.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vidmem.c | 215 |
1 files changed, 198 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c index d1c5a2e8..60b819d7 100644 --- a/drivers/gpu/nvgpu/common/mm/vidmem.c +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c | |||
@@ -22,15 +22,55 @@ | |||
22 | 22 | ||
23 | #include <linux/scatterlist.h> | 23 | #include <linux/scatterlist.h> |
24 | 24 | ||
25 | #include <nvgpu/timers.h> | ||
25 | #include <nvgpu/dma.h> | 26 | #include <nvgpu/dma.h> |
26 | #include <nvgpu/vidmem.h> | 27 | #include <nvgpu/vidmem.h> |
27 | #include <nvgpu/page_allocator.h> | 28 | #include <nvgpu/page_allocator.h> |
29 | #include <nvgpu/enabled.h> | ||
28 | 30 | ||
29 | #include "gk20a/gk20a.h" | 31 | #include "gk20a/gk20a.h" |
30 | #include "gk20a/mm_gk20a.h" | 32 | #include "gk20a/mm_gk20a.h" |
31 | 33 | ||
34 | /* | ||
35 | * This is expected to be called from the shutdown path (or the error path in | ||
36 | * the vidmem init code). As such we do not expect new vidmem frees to be | ||
37 | * enqueued. | ||
38 | */ | ||
32 | void nvgpu_vidmem_destroy(struct gk20a *g) | 39 | void nvgpu_vidmem_destroy(struct gk20a *g) |
33 | { | 40 | { |
41 | struct nvgpu_timeout timeout; | ||
42 | |||
43 | nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER); | ||
44 | |||
45 | /* | ||
46 | * Ensure that the thread runs one last time to flush anything in the | ||
47 | * queue. | ||
48 | */ | ||
49 | nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond); | ||
50 | |||
51 | /* | ||
52 | * Wait for at most 1 second before just continuing on. It doesn't make | ||
53 | * sense to hang the system over some potential memory leaks. | ||
54 | */ | ||
55 | do { | ||
56 | bool empty; | ||
57 | |||
58 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | ||
59 | empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); | ||
60 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | ||
61 | |||
62 | if (empty) | ||
63 | break; | ||
64 | |||
65 | nvgpu_msleep(10); | ||
66 | } while (!nvgpu_timeout_expired(&timeout)); | ||
67 | |||
68 | /* | ||
69 | * Kill the vidmem clearing thread now. This will wake the thread up | ||
70 | * automatically and cause the wait_interruptible condition trigger. | ||
71 | */ | ||
72 | nvgpu_thread_stop(&g->mm.vidmem.clearing_thread); | ||
73 | |||
34 | if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) | 74 | if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) |
35 | nvgpu_alloc_destroy(&g->mm.vidmem.allocator); | 75 | nvgpu_alloc_destroy(&g->mm.vidmem.allocator); |
36 | } | 76 | } |
@@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g) | |||
107 | return 0; | 147 | return 0; |
108 | } | 148 | } |
109 | 149 | ||
150 | void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) | ||
151 | { | ||
152 | /* | ||
153 | * On the first increment of the pause_count (0 -> 1) take the pause | ||
154 | * lock and prevent the vidmem clearing thread from processing work | ||
155 | * items. | ||
156 | * | ||
157 | * Otherwise the increment is all that's needed - it's essentially a | ||
158 | * ref-count for the number of pause() calls. | ||
159 | * | ||
160 | * The sync component is implemented by waiting for the lock to be | ||
161 | * released by the clearing thread in case the thread is currently | ||
162 | * processing work items. | ||
163 | */ | ||
164 | if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1) | ||
165 | nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock); | ||
166 | } | ||
167 | |||
168 | void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) | ||
169 | { | ||
170 | /* | ||
171 | * And on the last decrement (1 -> 0) release the pause lock and let | ||
172 | * the vidmem clearing thread continue. | ||
173 | */ | ||
174 | if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) | ||
175 | nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); | ||
176 | } | ||
177 | |||
178 | int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem) | ||
179 | { | ||
180 | struct mm_gk20a *mm = &g->mm; | ||
181 | |||
182 | /* | ||
183 | * Crap. Can't enqueue new vidmem bufs! CE may be gone! | ||
184 | * | ||
185 | * However, an errant app can hold a vidmem dma_buf FD open past when | ||
186 | * the nvgpu driver has exited. Thus when the FD does get closed | ||
187 | * eventually the dma_buf release function will try to call the vidmem | ||
188 | * free function which will attempt to enqueue the vidmem into the | ||
189 | * vidmem clearing thread. | ||
190 | */ | ||
191 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
192 | return -ENOSYS; | ||
193 | |||
194 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
195 | nvgpu_list_add_tail(&mem->clear_list_entry, | ||
196 | &mm->vidmem.clear_list_head); | ||
197 | nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending); | ||
198 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
199 | |||
200 | nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond); | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm) | ||
206 | { | ||
207 | struct nvgpu_mem *mem = NULL; | ||
208 | |||
209 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
210 | if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { | ||
211 | mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, | ||
212 | nvgpu_mem, clear_list_entry); | ||
213 | nvgpu_list_del(&mem->clear_list_entry); | ||
214 | } | ||
215 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
216 | |||
217 | return mem; | ||
218 | } | ||
219 | |||
220 | static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm) | ||
221 | { | ||
222 | struct gk20a *g = mm->g; | ||
223 | struct nvgpu_mem *mem; | ||
224 | |||
225 | while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) { | ||
226 | nvgpu_vidmem_clear(g, mem); | ||
227 | |||
228 | WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, | ||
229 | &g->mm.vidmem.bytes_pending) < 0); | ||
230 | mem->size = 0; | ||
231 | mem->aperture = APERTURE_INVALID; | ||
232 | |||
233 | __nvgpu_mem_free_vidmem_alloc(g, mem); | ||
234 | nvgpu_kfree(g, mem); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr) | ||
239 | { | ||
240 | struct mm_gk20a *mm = mm_ptr; | ||
241 | |||
242 | /* | ||
243 | * Simple thread who's sole job is to periodically clear userspace | ||
244 | * vidmem allocations that have been recently freed. | ||
245 | * | ||
246 | * Since it doesn't make sense to run unless there's pending work a | ||
247 | * condition field is used to wait for work. When the DMA API frees a | ||
248 | * userspace vidmem buf it enqueues it into the clear list and alerts us | ||
249 | * that we have some work to do. | ||
250 | */ | ||
251 | |||
252 | while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) { | ||
253 | int ret; | ||
254 | |||
255 | /* | ||
256 | * Wait for work but also make sure we should not be paused. | ||
257 | */ | ||
258 | ret = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
259 | &mm->vidmem.clearing_thread_cond, | ||
260 | nvgpu_thread_should_stop( | ||
261 | &mm->vidmem.clearing_thread) || | ||
262 | !nvgpu_list_empty(&mm->vidmem.clear_list_head), | ||
263 | 0); | ||
264 | if (ret == -ERESTARTSYS) | ||
265 | continue; | ||
266 | |||
267 | /* | ||
268 | * Use this lock to implement a pause mechanism. By taking this | ||
269 | * lock some other code can prevent this thread from processing | ||
270 | * work items. | ||
271 | */ | ||
272 | if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock)) | ||
273 | continue; | ||
274 | |||
275 | nvgpu_vidmem_clear_pending_allocs(mm); | ||
276 | |||
277 | nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); | ||
278 | } | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
110 | int nvgpu_vidmem_init(struct mm_gk20a *mm) | 283 | int nvgpu_vidmem_init(struct mm_gk20a *mm) |
111 | { | 284 | { |
112 | struct gk20a *g = mm->g; | 285 | struct gk20a *g = mm->g; |
@@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm) | |||
156 | mm->vidmem.bootstrap_base = bootstrap_base; | 329 | mm->vidmem.bootstrap_base = bootstrap_base; |
157 | mm->vidmem.bootstrap_size = bootstrap_size; | 330 | mm->vidmem.bootstrap_size = bootstrap_size; |
158 | 331 | ||
159 | nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); | 332 | err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond); |
333 | if (err) | ||
334 | goto fail; | ||
160 | 335 | ||
161 | INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker); | ||
162 | nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); | 336 | nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); |
163 | nvgpu_init_list_node(&mm->vidmem.clear_list_head); | 337 | nvgpu_init_list_node(&mm->vidmem.clear_list_head); |
164 | nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); | 338 | nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); |
339 | nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock); | ||
340 | nvgpu_atomic_set(&mm->vidmem.pause_count, 0); | ||
341 | |||
342 | /* | ||
343 | * Start the thread off in the paused state. The thread doesn't have to | ||
344 | * be running for this to work. It will be woken up later on in | ||
345 | * finalize_poweron(). We won't necessarily have a CE context yet | ||
346 | * either, so hypothetically one could cause a race where we try to | ||
347 | * clear a vidmem struct before we have a CE context to do so. | ||
348 | */ | ||
349 | nvgpu_vidmem_thread_pause_sync(mm); | ||
350 | |||
351 | err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm, | ||
352 | nvgpu_vidmem_clear_pending_allocs_thr, | ||
353 | "vidmem-clear"); | ||
354 | if (err) | ||
355 | goto fail; | ||
165 | 356 | ||
166 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); | 357 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); |
167 | 358 | ||
168 | return 0; | 359 | return 0; |
360 | |||
361 | fail: | ||
362 | nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond); | ||
363 | nvgpu_vidmem_destroy(g); | ||
364 | return err; | ||
169 | } | 365 | } |
170 | 366 | ||
171 | int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) | 367 | int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) |
@@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem) | |||
244 | return err; | 440 | return err; |
245 | } | 441 | } |
246 | 442 | ||
247 | struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm) | ||
248 | { | ||
249 | struct nvgpu_mem *mem = NULL; | ||
250 | |||
251 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
252 | if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { | ||
253 | mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, | ||
254 | nvgpu_mem, clear_list_entry); | ||
255 | nvgpu_list_del(&mem->clear_list_entry); | ||
256 | } | ||
257 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
258 | |||
259 | return mem; | ||
260 | } | ||
261 | |||
262 | static int nvgpu_vidmem_clear_all(struct gk20a *g) | 443 | static int nvgpu_vidmem_clear_all(struct gk20a *g) |
263 | { | 444 | { |
264 | int err; | 445 | int err; |