diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vm.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 1145 |
1 files changed, 1145 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c new file mode 100644 index 00000000..ebe8e381 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -0,0 +1,1145 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/bug.h> | ||
24 | #include <uapi/linux/nvgpu.h> | ||
25 | |||
26 | #include <nvgpu/log.h> | ||
27 | #include <nvgpu/dma.h> | ||
28 | #include <nvgpu/vm.h> | ||
29 | #include <nvgpu/vm_area.h> | ||
30 | #include <nvgpu/gmmu.h> | ||
31 | #include <nvgpu/lock.h> | ||
32 | #include <nvgpu/list.h> | ||
33 | #include <nvgpu/rbtree.h> | ||
34 | #include <nvgpu/semaphore.h> | ||
35 | #include <nvgpu/enabled.h> | ||
36 | |||
37 | #include <nvgpu/vgpu/vm.h> | ||
38 | |||
39 | #include "gk20a/gk20a.h" | ||
40 | #include "gk20a/mm_gk20a.h" | ||
41 | |||
42 | struct nvgpu_ctag_buffer_info { | ||
43 | u64 size; | ||
44 | enum gmmu_pgsz_gk20a pgsz_idx; | ||
45 | u32 flags; | ||
46 | |||
47 | s16 compr_kind; | ||
48 | s16 incompr_kind; | ||
49 | |||
50 | u32 ctag_lines; | ||
51 | }; | ||
52 | |||
53 | static int nvgpu_vm_compute_compression(struct vm_gk20a *vm, | ||
54 | struct nvgpu_ctag_buffer_info *binfo); | ||
55 | |||
56 | static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, | ||
57 | struct vm_gk20a_mapping_batch *batch); | ||
58 | |||
59 | int vm_aspace_id(struct vm_gk20a *vm) | ||
60 | { | ||
61 | return vm->as_share ? vm->as_share->id : -1; | ||
62 | } | ||
63 | |||
64 | static void __nvgpu_vm_free_entries(struct vm_gk20a *vm, | ||
65 | struct nvgpu_gmmu_pd *pd, | ||
66 | int level) | ||
67 | { | ||
68 | int i; | ||
69 | |||
70 | if (pd->mem) { | ||
71 | __nvgpu_pd_free(vm, pd); | ||
72 | pd->mem = NULL; | ||
73 | } | ||
74 | |||
75 | if (pd->entries) { | ||
76 | for (i = 0; i < pd->num_entries; i++) | ||
77 | __nvgpu_vm_free_entries(vm, &pd->entries[i], | ||
78 | level + 1); | ||
79 | nvgpu_vfree(vm->mm->g, pd->entries); | ||
80 | pd->entries = NULL; | ||
81 | } | ||
82 | } | ||
83 | |||
84 | static void nvgpu_vm_free_entries(struct vm_gk20a *vm, | ||
85 | struct nvgpu_gmmu_pd *pdb) | ||
86 | { | ||
87 | struct gk20a *g = vm->mm->g; | ||
88 | int i; | ||
89 | |||
90 | __nvgpu_pd_cache_free_direct(g, pdb); | ||
91 | |||
92 | if (!pdb->entries) | ||
93 | return; | ||
94 | |||
95 | for (i = 0; i < pdb->num_entries; i++) | ||
96 | __nvgpu_vm_free_entries(vm, &pdb->entries[i], 1); | ||
97 | |||
98 | nvgpu_vfree(g, pdb->entries); | ||
99 | pdb->entries = NULL; | ||
100 | } | ||
101 | |||
102 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, | ||
103 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
104 | |||
105 | { | ||
106 | struct gk20a *g = vm->mm->g; | ||
107 | struct nvgpu_allocator *vma = NULL; | ||
108 | u64 addr; | ||
109 | u64 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
110 | |||
111 | vma = vm->vma[pgsz_idx]; | ||
112 | |||
113 | if (pgsz_idx >= gmmu_nr_page_sizes) { | ||
114 | nvgpu_err(g, "(%s) invalid page size requested", vma->name); | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | if ((pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { | ||
119 | nvgpu_err(g, "(%s) unsupportd page size requested", vma->name); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | /* Be certain we round up to page_size if needed */ | ||
124 | size = (size + ((u64)page_size - 1)) & ~((u64)page_size - 1); | ||
125 | |||
126 | addr = nvgpu_alloc(vma, size); | ||
127 | if (!addr) { | ||
128 | nvgpu_err(g, "(%s) oom: sz=0x%llx", vma->name, size); | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | return addr; | ||
133 | } | ||
134 | |||
135 | int __nvgpu_vm_free_va(struct vm_gk20a *vm, u64 addr, | ||
136 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
137 | { | ||
138 | struct nvgpu_allocator *vma = vm->vma[pgsz_idx]; | ||
139 | |||
140 | nvgpu_free(vma, addr); | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) | ||
146 | { | ||
147 | memset(mapping_batch, 0, sizeof(*mapping_batch)); | ||
148 | mapping_batch->gpu_l2_flushed = false; | ||
149 | mapping_batch->need_tlb_invalidate = false; | ||
150 | } | ||
151 | |||
152 | void nvgpu_vm_mapping_batch_finish_locked( | ||
153 | struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch) | ||
154 | { | ||
155 | /* hanging kref_put batch pointer? */ | ||
156 | WARN_ON(vm->kref_put_batch == mapping_batch); | ||
157 | |||
158 | if (mapping_batch->need_tlb_invalidate) { | ||
159 | struct gk20a *g = gk20a_from_vm(vm); | ||
160 | g->ops.fb.tlb_invalidate(g, vm->pdb.mem); | ||
161 | } | ||
162 | } | ||
163 | |||
164 | void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm, | ||
165 | struct vm_gk20a_mapping_batch *mapping_batch) | ||
166 | { | ||
167 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
168 | nvgpu_vm_mapping_batch_finish_locked(vm, mapping_batch); | ||
169 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Determine if the passed address space can support big pages or not. | ||
174 | */ | ||
175 | int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) | ||
176 | { | ||
177 | u64 mask = ((u64)vm->big_page_size << 10) - 1; | ||
178 | |||
179 | if (base & mask || size & mask) | ||
180 | return 0; | ||
181 | return 1; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Initialize a semaphore pool. Just return successfully if we do not need | ||
186 | * semaphores (i.e when sync-pts are active). | ||
187 | */ | ||
188 | static int nvgpu_init_sema_pool(struct vm_gk20a *vm) | ||
189 | { | ||
190 | struct nvgpu_semaphore_sea *sema_sea; | ||
191 | struct mm_gk20a *mm = vm->mm; | ||
192 | struct gk20a *g = mm->g; | ||
193 | int err; | ||
194 | |||
195 | /* | ||
196 | * Don't waste the memory on semaphores if we don't need them. | ||
197 | */ | ||
198 | if (nvgpu_is_enabled(g, NVGPU_HAS_SYNCPOINTS)) | ||
199 | return 0; | ||
200 | |||
201 | if (vm->sema_pool) | ||
202 | return 0; | ||
203 | |||
204 | sema_sea = nvgpu_semaphore_sea_create(g); | ||
205 | if (!sema_sea) | ||
206 | return -ENOMEM; | ||
207 | |||
208 | vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea); | ||
209 | if (!vm->sema_pool) | ||
210 | return -ENOMEM; | ||
211 | |||
212 | /* | ||
213 | * Allocate a chunk of GPU VA space for mapping the semaphores. We will | ||
214 | * do a fixed alloc in the kernel VM so that all channels have the same | ||
215 | * RO address range for the semaphores. | ||
216 | * | ||
217 | * !!! TODO: cleanup. | ||
218 | */ | ||
219 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, | ||
220 | vm->va_limit - | ||
221 | mm->channel.kernel_size, | ||
222 | 512 * PAGE_SIZE, | ||
223 | SZ_4K); | ||
224 | if (!sema_sea->gpu_va) { | ||
225 | nvgpu_free(&vm->kernel, sema_sea->gpu_va); | ||
226 | nvgpu_vm_put(vm); | ||
227 | return -ENOMEM; | ||
228 | } | ||
229 | |||
230 | err = nvgpu_semaphore_pool_map(vm->sema_pool, vm); | ||
231 | if (err) { | ||
232 | nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); | ||
233 | nvgpu_free(vm->vma[gmmu_page_size_small], | ||
234 | vm->sema_pool->gpu_va); | ||
235 | return err; | ||
236 | } | ||
237 | |||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | static int __nvgpu_vm_init(struct mm_gk20a *mm, | ||
242 | struct vm_gk20a *vm, | ||
243 | u32 big_page_size, | ||
244 | u64 low_hole, | ||
245 | u64 kernel_reserved, | ||
246 | u64 aperture_size, | ||
247 | bool big_pages, | ||
248 | bool userspace_managed, | ||
249 | char *name) | ||
250 | { | ||
251 | int err; | ||
252 | char alloc_name[32]; | ||
253 | u64 kernel_vma_flags; | ||
254 | u64 user_vma_start, user_vma_limit; | ||
255 | u64 user_lp_vma_start, user_lp_vma_limit; | ||
256 | u64 kernel_vma_start, kernel_vma_limit; | ||
257 | struct gk20a *g = gk20a_from_mm(mm); | ||
258 | |||
259 | if (WARN_ON(kernel_reserved + low_hole > aperture_size)) | ||
260 | return -ENOMEM; | ||
261 | |||
262 | nvgpu_log_info(g, "Init space for %s: valimit=0x%llx, " | ||
263 | "LP size=0x%x lowhole=0x%llx", | ||
264 | name, aperture_size, | ||
265 | (unsigned int)big_page_size, low_hole); | ||
266 | |||
267 | vm->mm = mm; | ||
268 | |||
269 | vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K; | ||
270 | vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size; | ||
271 | vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K; | ||
272 | |||
273 | /* Set up vma pointers. */ | ||
274 | vm->vma[gmmu_page_size_small] = &vm->user; | ||
275 | vm->vma[gmmu_page_size_big] = &vm->user; | ||
276 | vm->vma[gmmu_page_size_kernel] = &vm->kernel; | ||
277 | if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) | ||
278 | vm->vma[gmmu_page_size_big] = &vm->user_lp; | ||
279 | |||
280 | vm->va_start = low_hole; | ||
281 | vm->va_limit = aperture_size; | ||
282 | |||
283 | vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big]; | ||
284 | vm->userspace_managed = userspace_managed; | ||
285 | vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size); | ||
286 | |||
287 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
288 | if (g->is_virtual && userspace_managed) { | ||
289 | nvgpu_err(g, "vGPU: no userspace managed addr space support"); | ||
290 | return -ENOSYS; | ||
291 | } | ||
292 | if (g->is_virtual && vgpu_vm_init(g, vm)) { | ||
293 | nvgpu_err(g, "Failed to init vGPU VM!"); | ||
294 | return -ENOMEM; | ||
295 | } | ||
296 | #endif | ||
297 | |||
298 | /* Initialize the page table data structures. */ | ||
299 | strncpy(vm->name, name, min(strlen(name), sizeof(vm->name))); | ||
300 | err = nvgpu_gmmu_init_page_table(vm); | ||
301 | if (err) | ||
302 | goto clean_up_vgpu_vm; | ||
303 | |||
304 | /* Setup vma limits. */ | ||
305 | if (kernel_reserved + low_hole < aperture_size) { | ||
306 | /* | ||
307 | * If big_pages are disabled for this VM then it only makes | ||
308 | * sense to make one VM, same as if the unified address flag | ||
309 | * is set. | ||
310 | */ | ||
311 | if (!big_pages || | ||
312 | nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) { | ||
313 | user_vma_start = low_hole; | ||
314 | user_vma_limit = vm->va_limit - kernel_reserved; | ||
315 | user_lp_vma_start = user_vma_limit; | ||
316 | user_lp_vma_limit = user_vma_limit; | ||
317 | } else { | ||
318 | user_vma_start = low_hole; | ||
319 | user_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
320 | user_lp_vma_start = __nv_gmmu_va_small_page_limit(); | ||
321 | user_lp_vma_limit = vm->va_limit - kernel_reserved; | ||
322 | } | ||
323 | } else { | ||
324 | user_vma_start = 0; | ||
325 | user_vma_limit = 0; | ||
326 | user_lp_vma_start = 0; | ||
327 | user_lp_vma_limit = 0; | ||
328 | } | ||
329 | kernel_vma_start = vm->va_limit - kernel_reserved; | ||
330 | kernel_vma_limit = vm->va_limit; | ||
331 | |||
332 | nvgpu_log_info(g, "user_vma [0x%llx,0x%llx)", | ||
333 | user_vma_start, user_vma_limit); | ||
334 | nvgpu_log_info(g, "user_lp_vma [0x%llx,0x%llx)", | ||
335 | user_lp_vma_start, user_lp_vma_limit); | ||
336 | nvgpu_log_info(g, "kernel_vma [0x%llx,0x%llx)", | ||
337 | kernel_vma_start, kernel_vma_limit); | ||
338 | |||
339 | if (WARN_ON(user_vma_start > user_vma_limit) || | ||
340 | WARN_ON(user_lp_vma_start > user_lp_vma_limit) || | ||
341 | WARN_ON(kernel_vma_start >= kernel_vma_limit)) { | ||
342 | err = -EINVAL; | ||
343 | goto clean_up_page_tables; | ||
344 | } | ||
345 | |||
346 | kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ? | ||
347 | 0 : GPU_ALLOC_GVA_SPACE; | ||
348 | |||
349 | /* | ||
350 | * A "user" area only makes sense for the GVA spaces. For VMs where | ||
351 | * there is no "user" area user_vma_start will be equal to | ||
352 | * user_vma_limit (i.e a 0 sized space). In such a situation the kernel | ||
353 | * area must be non-zero in length. | ||
354 | */ | ||
355 | if (user_vma_start >= user_vma_limit && | ||
356 | kernel_vma_start >= kernel_vma_limit) { | ||
357 | err = -EINVAL; | ||
358 | goto clean_up_page_tables; | ||
359 | } | ||
360 | |||
361 | /* | ||
362 | * Determine if big pages are possible in this VM. If a split address | ||
363 | * space is used then check the user_lp vma instead of the user vma. | ||
364 | */ | ||
365 | if (nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) | ||
366 | vm->big_pages = big_pages && | ||
367 | nvgpu_big_pages_possible(vm, user_vma_start, | ||
368 | user_vma_limit - user_vma_start); | ||
369 | else | ||
370 | vm->big_pages = big_pages && | ||
371 | nvgpu_big_pages_possible(vm, user_lp_vma_start, | ||
372 | user_lp_vma_limit - user_lp_vma_start); | ||
373 | |||
374 | /* | ||
375 | * User VMA. | ||
376 | */ | ||
377 | if (user_vma_start < user_vma_limit) { | ||
378 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); | ||
379 | err = __nvgpu_buddy_allocator_init(g, &vm->user, | ||
380 | vm, alloc_name, | ||
381 | user_vma_start, | ||
382 | user_vma_limit - | ||
383 | user_vma_start, | ||
384 | SZ_4K, | ||
385 | GPU_BALLOC_MAX_ORDER, | ||
386 | GPU_ALLOC_GVA_SPACE); | ||
387 | if (err) | ||
388 | goto clean_up_page_tables; | ||
389 | } else { | ||
390 | /* | ||
391 | * Make these allocator pointers point to the kernel allocator | ||
392 | * since we still use the legacy notion of page size to choose | ||
393 | * the allocator. | ||
394 | */ | ||
395 | vm->vma[0] = &vm->kernel; | ||
396 | vm->vma[1] = &vm->kernel; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * User VMA for large pages when a split address range is used. | ||
401 | */ | ||
402 | if (user_lp_vma_start < user_lp_vma_limit) { | ||
403 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name); | ||
404 | err = __nvgpu_buddy_allocator_init(g, &vm->user_lp, | ||
405 | vm, alloc_name, | ||
406 | user_lp_vma_start, | ||
407 | user_lp_vma_limit - | ||
408 | user_lp_vma_start, | ||
409 | vm->big_page_size, | ||
410 | GPU_BALLOC_MAX_ORDER, | ||
411 | GPU_ALLOC_GVA_SPACE); | ||
412 | if (err) | ||
413 | goto clean_up_allocators; | ||
414 | } | ||
415 | |||
416 | /* | ||
417 | * Kernel VMA. Must always exist for an address space. | ||
418 | */ | ||
419 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); | ||
420 | err = __nvgpu_buddy_allocator_init(g, &vm->kernel, | ||
421 | vm, alloc_name, | ||
422 | kernel_vma_start, | ||
423 | kernel_vma_limit - kernel_vma_start, | ||
424 | SZ_4K, | ||
425 | GPU_BALLOC_MAX_ORDER, | ||
426 | kernel_vma_flags); | ||
427 | if (err) | ||
428 | goto clean_up_allocators; | ||
429 | |||
430 | vm->mapped_buffers = NULL; | ||
431 | |||
432 | nvgpu_mutex_init(&vm->update_gmmu_lock); | ||
433 | nvgpu_ref_init(&vm->ref); | ||
434 | nvgpu_init_list_node(&vm->vm_area_list); | ||
435 | |||
436 | /* | ||
437 | * This is only necessary for channel address spaces. The best way to | ||
438 | * distinguish channel address spaces from other address spaces is by | ||
439 | * size - if the address space is 4GB or less, it's not a channel. | ||
440 | */ | ||
441 | if (vm->va_limit > SZ_4G) { | ||
442 | err = nvgpu_init_sema_pool(vm); | ||
443 | if (err) | ||
444 | goto clean_up_allocators; | ||
445 | } | ||
446 | |||
447 | return 0; | ||
448 | |||
449 | clean_up_allocators: | ||
450 | if (nvgpu_alloc_initialized(&vm->kernel)) | ||
451 | nvgpu_alloc_destroy(&vm->kernel); | ||
452 | if (nvgpu_alloc_initialized(&vm->user)) | ||
453 | nvgpu_alloc_destroy(&vm->user); | ||
454 | if (nvgpu_alloc_initialized(&vm->user_lp)) | ||
455 | nvgpu_alloc_destroy(&vm->user_lp); | ||
456 | clean_up_page_tables: | ||
457 | /* Cleans up nvgpu_gmmu_init_page_table() */ | ||
458 | __nvgpu_pd_cache_free_direct(g, &vm->pdb); | ||
459 | clean_up_vgpu_vm: | ||
460 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
461 | if (g->is_virtual) | ||
462 | vgpu_vm_remove(vm); | ||
463 | #endif | ||
464 | return err; | ||
465 | } | ||
466 | |||
467 | /** | ||
468 | * nvgpu_init_vm() - Initialize an address space. | ||
469 | * | ||
470 | * @mm - Parent MM. | ||
471 | * @vm - The VM to init. | ||
472 | * @big_page_size - Size of big pages associated with this VM. | ||
473 | * @low_hole - The size of the low hole (unaddressable memory at the bottom of | ||
474 | * the address space). | ||
475 | * @kernel_reserved - Space reserved for kernel only allocations. | ||
476 | * @aperture_size - Total size of the aperture. | ||
477 | * @big_pages - If true then big pages are possible in the VM. Note this does | ||
478 | * not guarantee that big pages will be possible. | ||
479 | * @name - Name of the address space. | ||
480 | * | ||
481 | * This function initializes an address space according to the following map: | ||
482 | * | ||
483 | * +--+ 0x0 | ||
484 | * | | | ||
485 | * +--+ @low_hole | ||
486 | * | | | ||
487 | * ~ ~ This is the "user" section. | ||
488 | * | | | ||
489 | * +--+ @aperture_size - @kernel_reserved | ||
490 | * | | | ||
491 | * ~ ~ This is the "kernel" section. | ||
492 | * | | | ||
493 | * +--+ @aperture_size | ||
494 | * | ||
495 | * The user section is therefor what ever is left over after the @low_hole and | ||
496 | * @kernel_reserved memory have been portioned out. The @kernel_reserved is | ||
497 | * always persent at the top of the memory space and the @low_hole is always at | ||
498 | * the bottom. | ||
499 | * | ||
500 | * For certain address spaces a "user" section makes no sense (bar1, etc) so in | ||
501 | * such cases the @kernel_reserved and @low_hole should sum to exactly | ||
502 | * @aperture_size. | ||
503 | */ | ||
504 | struct vm_gk20a *nvgpu_vm_init(struct gk20a *g, | ||
505 | u32 big_page_size, | ||
506 | u64 low_hole, | ||
507 | u64 kernel_reserved, | ||
508 | u64 aperture_size, | ||
509 | bool big_pages, | ||
510 | bool userspace_managed, | ||
511 | char *name) | ||
512 | { | ||
513 | struct vm_gk20a *vm = nvgpu_kzalloc(g, sizeof(*vm)); | ||
514 | |||
515 | if (!vm) | ||
516 | return NULL; | ||
517 | |||
518 | if (__nvgpu_vm_init(&g->mm, vm, big_page_size, low_hole, | ||
519 | kernel_reserved, aperture_size, big_pages, | ||
520 | userspace_managed, name)) { | ||
521 | nvgpu_kfree(g, vm); | ||
522 | return NULL; | ||
523 | } | ||
524 | |||
525 | return vm; | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Cleanup the VM! | ||
530 | */ | ||
531 | static void __nvgpu_vm_remove(struct vm_gk20a *vm) | ||
532 | { | ||
533 | struct nvgpu_mapped_buf *mapped_buffer; | ||
534 | struct nvgpu_vm_area *vm_area, *vm_area_tmp; | ||
535 | struct nvgpu_rbtree_node *node = NULL; | ||
536 | struct gk20a *g = vm->mm->g; | ||
537 | |||
538 | /* | ||
539 | * Do this outside of the update_gmmu_lock since unmapping the semaphore | ||
540 | * pool involves unmapping a GMMU mapping which means aquiring the | ||
541 | * update_gmmu_lock. | ||
542 | */ | ||
543 | if (!nvgpu_is_enabled(g, NVGPU_HAS_SYNCPOINTS)) { | ||
544 | if (vm->sema_pool) { | ||
545 | nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); | ||
546 | nvgpu_semaphore_pool_put(vm->sema_pool); | ||
547 | } | ||
548 | } | ||
549 | |||
550 | #if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) | ||
551 | if (nvgpu_mem_is_valid(&g->syncpt_mem) && vm->syncpt_ro_map_gpu_va) | ||
552 | nvgpu_gmmu_unmap(vm, &g->syncpt_mem, | ||
553 | vm->syncpt_ro_map_gpu_va); | ||
554 | #endif | ||
555 | |||
556 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
557 | |||
558 | nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); | ||
559 | while (node) { | ||
560 | mapped_buffer = mapped_buffer_from_rbtree_node(node); | ||
561 | __nvgpu_vm_unmap(mapped_buffer, NULL); | ||
562 | nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); | ||
563 | } | ||
564 | |||
565 | /* destroy remaining reserved memory areas */ | ||
566 | nvgpu_list_for_each_entry_safe(vm_area, vm_area_tmp, | ||
567 | &vm->vm_area_list, | ||
568 | nvgpu_vm_area, vm_area_list) { | ||
569 | nvgpu_list_del(&vm_area->vm_area_list); | ||
570 | nvgpu_kfree(vm->mm->g, vm_area); | ||
571 | } | ||
572 | |||
573 | if (nvgpu_alloc_initialized(&vm->kernel)) | ||
574 | nvgpu_alloc_destroy(&vm->kernel); | ||
575 | if (nvgpu_alloc_initialized(&vm->user)) | ||
576 | nvgpu_alloc_destroy(&vm->user); | ||
577 | if (nvgpu_alloc_initialized(&vm->user_lp)) | ||
578 | nvgpu_alloc_destroy(&vm->user_lp); | ||
579 | |||
580 | nvgpu_vm_free_entries(vm, &vm->pdb); | ||
581 | |||
582 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
583 | if (g->is_virtual) | ||
584 | vgpu_vm_remove(vm); | ||
585 | #endif | ||
586 | |||
587 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
588 | |||
589 | nvgpu_kfree(g, vm); | ||
590 | } | ||
591 | |||
592 | static void __nvgpu_vm_remove_ref(struct nvgpu_ref *ref) | ||
593 | { | ||
594 | struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); | ||
595 | |||
596 | __nvgpu_vm_remove(vm); | ||
597 | } | ||
598 | |||
599 | void nvgpu_vm_get(struct vm_gk20a *vm) | ||
600 | { | ||
601 | nvgpu_ref_get(&vm->ref); | ||
602 | } | ||
603 | |||
604 | void nvgpu_vm_put(struct vm_gk20a *vm) | ||
605 | { | ||
606 | nvgpu_ref_put(&vm->ref, __nvgpu_vm_remove_ref); | ||
607 | } | ||
608 | |||
609 | int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, | ||
610 | struct nvgpu_mapped_buf *mapped_buffer) | ||
611 | { | ||
612 | mapped_buffer->node.key_start = mapped_buffer->addr; | ||
613 | mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size; | ||
614 | |||
615 | nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers); | ||
616 | |||
617 | return 0; | ||
618 | } | ||
619 | |||
620 | void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, | ||
621 | struct nvgpu_mapped_buf *mapped_buffer) | ||
622 | { | ||
623 | nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers); | ||
624 | } | ||
625 | |||
626 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf( | ||
627 | struct vm_gk20a *vm, u64 addr) | ||
628 | { | ||
629 | struct nvgpu_rbtree_node *node = NULL; | ||
630 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
631 | |||
632 | nvgpu_rbtree_search(addr, &node, root); | ||
633 | if (!node) | ||
634 | return NULL; | ||
635 | |||
636 | return mapped_buffer_from_rbtree_node(node); | ||
637 | } | ||
638 | |||
639 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range( | ||
640 | struct vm_gk20a *vm, u64 addr) | ||
641 | { | ||
642 | struct nvgpu_rbtree_node *node = NULL; | ||
643 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
644 | |||
645 | nvgpu_rbtree_range_search(addr, &node, root); | ||
646 | if (!node) | ||
647 | return NULL; | ||
648 | |||
649 | return mapped_buffer_from_rbtree_node(node); | ||
650 | } | ||
651 | |||
652 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( | ||
653 | struct vm_gk20a *vm, u64 addr) | ||
654 | { | ||
655 | struct nvgpu_rbtree_node *node = NULL; | ||
656 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
657 | |||
658 | nvgpu_rbtree_less_than_search(addr, &node, root); | ||
659 | if (!node) | ||
660 | return NULL; | ||
661 | |||
662 | return mapped_buffer_from_rbtree_node(node); | ||
663 | } | ||
664 | |||
665 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, | ||
666 | struct nvgpu_mapped_buf ***mapped_buffers, | ||
667 | int *num_buffers) | ||
668 | { | ||
669 | struct nvgpu_mapped_buf *mapped_buffer; | ||
670 | struct nvgpu_mapped_buf **buffer_list; | ||
671 | struct nvgpu_rbtree_node *node = NULL; | ||
672 | int i = 0; | ||
673 | |||
674 | if (vm->userspace_managed) { | ||
675 | *mapped_buffers = NULL; | ||
676 | *num_buffers = 0; | ||
677 | return 0; | ||
678 | } | ||
679 | |||
680 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
681 | |||
682 | buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) * | ||
683 | vm->num_user_mapped_buffers); | ||
684 | if (!buffer_list) { | ||
685 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
686 | return -ENOMEM; | ||
687 | } | ||
688 | |||
689 | nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); | ||
690 | while (node) { | ||
691 | mapped_buffer = mapped_buffer_from_rbtree_node(node); | ||
692 | buffer_list[i] = mapped_buffer; | ||
693 | nvgpu_ref_get(&mapped_buffer->ref); | ||
694 | i++; | ||
695 | nvgpu_rbtree_enum_next(&node, node); | ||
696 | } | ||
697 | |||
698 | BUG_ON(i != vm->num_user_mapped_buffers); | ||
699 | |||
700 | *num_buffers = vm->num_user_mapped_buffers; | ||
701 | *mapped_buffers = buffer_list; | ||
702 | |||
703 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
704 | |||
705 | return 0; | ||
706 | } | ||
707 | |||
708 | void nvgpu_vm_put_buffers(struct vm_gk20a *vm, | ||
709 | struct nvgpu_mapped_buf **mapped_buffers, | ||
710 | int num_buffers) | ||
711 | { | ||
712 | int i; | ||
713 | struct vm_gk20a_mapping_batch batch; | ||
714 | |||
715 | if (num_buffers == 0) | ||
716 | return; | ||
717 | |||
718 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
719 | nvgpu_vm_mapping_batch_start(&batch); | ||
720 | vm->kref_put_batch = &batch; | ||
721 | |||
722 | for (i = 0; i < num_buffers; ++i) | ||
723 | nvgpu_ref_put(&mapped_buffers[i]->ref, __nvgpu_vm_unmap_ref); | ||
724 | |||
725 | vm->kref_put_batch = NULL; | ||
726 | nvgpu_vm_mapping_batch_finish_locked(vm, &batch); | ||
727 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
728 | |||
729 | nvgpu_big_free(vm->mm->g, mapped_buffers); | ||
730 | } | ||
731 | |||
732 | struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | ||
733 | struct nvgpu_os_buffer *os_buf, | ||
734 | struct nvgpu_sgt *sgt, | ||
735 | u64 map_addr, | ||
736 | u64 map_size, | ||
737 | u64 phys_offset, | ||
738 | int rw, | ||
739 | u32 flags, | ||
740 | s16 compr_kind, | ||
741 | s16 incompr_kind, | ||
742 | struct vm_gk20a_mapping_batch *batch, | ||
743 | enum nvgpu_aperture aperture) | ||
744 | { | ||
745 | struct gk20a *g = gk20a_from_vm(vm); | ||
746 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
747 | struct nvgpu_ctag_buffer_info binfo = { 0 }; | ||
748 | struct nvgpu_vm_area *vm_area = NULL; | ||
749 | int err = 0; | ||
750 | u64 align; | ||
751 | u32 ctag_offset = 0; | ||
752 | bool clear_ctags = false; | ||
753 | bool va_allocated = true; | ||
754 | |||
755 | /* | ||
756 | * The kind used as part of the key for map caching. HW may | ||
757 | * actually be programmed with the fallback kind in case the | ||
758 | * key kind is compressible but we're out of comptags. | ||
759 | */ | ||
760 | s16 map_key_kind; | ||
761 | |||
762 | /* | ||
763 | * The actual GMMU PTE kind | ||
764 | */ | ||
765 | u8 pte_kind; | ||
766 | |||
767 | if (vm->userspace_managed && | ||
768 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | ||
769 | nvgpu_err(g, | ||
770 | "non-fixed-offset mapping not available on " | ||
771 | "userspace managed address spaces"); | ||
772 | return ERR_PTR(-EINVAL); | ||
773 | } | ||
774 | |||
775 | binfo.flags = flags; | ||
776 | binfo.size = nvgpu_os_buf_get_size(os_buf); | ||
777 | binfo.compr_kind = (vm->enable_ctag && compr_kind != NV_KIND_INVALID ? | ||
778 | compr_kind : NV_KIND_INVALID); | ||
779 | binfo.incompr_kind = incompr_kind; | ||
780 | |||
781 | if (compr_kind != NV_KIND_INVALID) | ||
782 | map_key_kind = compr_kind; | ||
783 | else | ||
784 | map_key_kind = incompr_kind; | ||
785 | |||
786 | /* | ||
787 | * Check if this buffer is already mapped. | ||
788 | */ | ||
789 | if (!vm->userspace_managed) { | ||
790 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
791 | mapped_buffer = nvgpu_vm_find_mapping(vm, | ||
792 | os_buf, | ||
793 | map_addr, | ||
794 | flags, | ||
795 | map_key_kind); | ||
796 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
797 | |||
798 | if (mapped_buffer) { | ||
799 | nvgpu_ref_get(&mapped_buffer->ref); | ||
800 | return mapped_buffer; | ||
801 | } | ||
802 | } | ||
803 | |||
804 | /* | ||
805 | * Generate a new mapping! | ||
806 | */ | ||
807 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | ||
808 | if (!mapped_buffer) { | ||
809 | nvgpu_warn(g, "oom allocating tracking buffer"); | ||
810 | return ERR_PTR(-ENOMEM); | ||
811 | } | ||
812 | |||
813 | align = nvgpu_sgt_alignment(g, sgt); | ||
814 | if (g->mm.disable_bigpage) | ||
815 | binfo.pgsz_idx = gmmu_page_size_small; | ||
816 | else | ||
817 | binfo.pgsz_idx = __get_pte_size(vm, map_addr, | ||
818 | min_t(u64, binfo.size, align)); | ||
819 | map_size = map_size ? map_size : binfo.size; | ||
820 | map_size = ALIGN(map_size, SZ_4K); | ||
821 | |||
822 | if ((map_size > binfo.size) || | ||
823 | (phys_offset > (binfo.size - map_size))) { | ||
824 | err = -EINVAL; | ||
825 | goto clean_up_nolock; | ||
826 | } | ||
827 | |||
828 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
829 | |||
830 | /* | ||
831 | * Check if we should use a fixed offset for mapping this buffer. | ||
832 | */ | ||
833 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
834 | err = nvgpu_vm_area_validate_buffer(vm, | ||
835 | map_addr, | ||
836 | map_size, | ||
837 | binfo.pgsz_idx, | ||
838 | &vm_area); | ||
839 | if (err) | ||
840 | goto clean_up; | ||
841 | |||
842 | va_allocated = false; | ||
843 | } | ||
844 | |||
845 | err = nvgpu_vm_compute_compression(vm, &binfo); | ||
846 | if (err) { | ||
847 | nvgpu_err(g, "failure setting up compression"); | ||
848 | goto clean_up; | ||
849 | } | ||
850 | |||
851 | if (binfo.compr_kind != NV_KIND_INVALID) { | ||
852 | struct gk20a_comptags comptags = { 0 }; | ||
853 | |||
854 | /* | ||
855 | * Get the comptags state, alloc if necessary | ||
856 | */ | ||
857 | err = gk20a_alloc_or_get_comptags(g, os_buf, | ||
858 | &g->gr.comp_tags, | ||
859 | &comptags); | ||
860 | if (err) { | ||
861 | /* | ||
862 | * This is an irrecoverable failure and we need to | ||
863 | * abort. In particular, it is not safe to proceed with | ||
864 | * the incompressible fallback, since we cannot not mark | ||
865 | * our alloc failure anywere. Later we would retry | ||
866 | * allocation and break compressible map aliasing. | ||
867 | */ | ||
868 | nvgpu_err(g, "Error %d setting up comptags", err); | ||
869 | goto clean_up; | ||
870 | } | ||
871 | |||
872 | /* | ||
873 | * Newly allocated comptags needs to be cleared | ||
874 | */ | ||
875 | if (comptags.needs_clear) { | ||
876 | if (g->ops.ltc.cbc_ctrl) { | ||
877 | if (gk20a_comptags_start_clear(os_buf)) { | ||
878 | err = g->ops.ltc.cbc_ctrl( | ||
879 | g, gk20a_cbc_op_clear, | ||
880 | comptags.offset, | ||
881 | (comptags.offset + | ||
882 | comptags.lines - 1)); | ||
883 | gk20a_comptags_finish_clear( | ||
884 | os_buf, err == 0); | ||
885 | if (err) | ||
886 | goto clean_up; | ||
887 | } | ||
888 | } else { | ||
889 | /* | ||
890 | * Cleared as part of gmmu map | ||
891 | */ | ||
892 | clear_ctags = true; | ||
893 | } | ||
894 | } | ||
895 | |||
896 | /* | ||
897 | * Store the ctag offset for later use if we got the comptags | ||
898 | */ | ||
899 | if (comptags.lines) | ||
900 | ctag_offset = comptags.offset; | ||
901 | } | ||
902 | |||
903 | /* | ||
904 | * Figure out the kind and ctag offset for the GMMU page tables | ||
905 | */ | ||
906 | if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) { | ||
907 | /* | ||
908 | * Adjust the ctag_offset as per the buffer map offset | ||
909 | */ | ||
910 | ctag_offset += phys_offset >> | ||
911 | ilog2(g->ops.fb.compression_page_size(g)); | ||
912 | pte_kind = binfo.compr_kind; | ||
913 | } else if (binfo.incompr_kind != NV_KIND_INVALID) { | ||
914 | /* | ||
915 | * Incompressible kind, ctag offset will not be programmed | ||
916 | */ | ||
917 | ctag_offset = 0; | ||
918 | pte_kind = binfo.incompr_kind; | ||
919 | } else { | ||
920 | /* | ||
921 | * Caller required compression, but we cannot provide it | ||
922 | */ | ||
923 | nvgpu_err(g, "No comptags and no incompressible fallback kind"); | ||
924 | err = -ENOMEM; | ||
925 | goto clean_up; | ||
926 | } | ||
927 | |||
928 | if (clear_ctags) | ||
929 | clear_ctags = gk20a_comptags_start_clear(os_buf); | ||
930 | |||
931 | map_addr = g->ops.mm.gmmu_map(vm, | ||
932 | map_addr, | ||
933 | sgt, | ||
934 | phys_offset, | ||
935 | map_size, | ||
936 | binfo.pgsz_idx, | ||
937 | pte_kind, | ||
938 | ctag_offset, | ||
939 | flags, | ||
940 | rw, | ||
941 | clear_ctags, | ||
942 | false, | ||
943 | false, | ||
944 | batch, | ||
945 | aperture); | ||
946 | |||
947 | if (clear_ctags) | ||
948 | gk20a_comptags_finish_clear(os_buf, map_addr != 0); | ||
949 | |||
950 | if (!map_addr) { | ||
951 | err = -ENOMEM; | ||
952 | goto clean_up; | ||
953 | } | ||
954 | |||
955 | nvgpu_init_list_node(&mapped_buffer->buffer_list); | ||
956 | nvgpu_ref_init(&mapped_buffer->ref); | ||
957 | mapped_buffer->addr = map_addr; | ||
958 | mapped_buffer->size = map_size; | ||
959 | mapped_buffer->pgsz_idx = binfo.pgsz_idx; | ||
960 | mapped_buffer->vm = vm; | ||
961 | mapped_buffer->flags = flags; | ||
962 | mapped_buffer->kind = map_key_kind; | ||
963 | mapped_buffer->va_allocated = va_allocated; | ||
964 | mapped_buffer->vm_area = vm_area; | ||
965 | |||
966 | err = nvgpu_insert_mapped_buf(vm, mapped_buffer); | ||
967 | if (err) { | ||
968 | nvgpu_err(g, "failed to insert into mapped buffer tree"); | ||
969 | goto clean_up; | ||
970 | } | ||
971 | |||
972 | vm->num_user_mapped_buffers++; | ||
973 | |||
974 | if (vm_area) { | ||
975 | nvgpu_list_add_tail(&mapped_buffer->buffer_list, | ||
976 | &vm_area->buffer_list_head); | ||
977 | mapped_buffer->vm_area = vm_area; | ||
978 | } | ||
979 | |||
980 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
981 | |||
982 | return mapped_buffer; | ||
983 | |||
984 | clean_up: | ||
985 | if (mapped_buffer->addr) | ||
986 | g->ops.mm.gmmu_unmap(vm, | ||
987 | mapped_buffer->addr, | ||
988 | mapped_buffer->size, | ||
989 | mapped_buffer->pgsz_idx, | ||
990 | mapped_buffer->va_allocated, | ||
991 | gk20a_mem_flag_none, | ||
992 | mapped_buffer->vm_area ? | ||
993 | mapped_buffer->vm_area->sparse : false, | ||
994 | NULL); | ||
995 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
996 | clean_up_nolock: | ||
997 | nvgpu_kfree(g, mapped_buffer); | ||
998 | |||
999 | return ERR_PTR(err); | ||
1000 | } | ||
1001 | |||
1002 | /* | ||
1003 | * Really unmap. This does the real GMMU unmap and removes the mapping from the | ||
1004 | * VM map tracking tree (and vm_area list if necessary). | ||
1005 | */ | ||
1006 | static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, | ||
1007 | struct vm_gk20a_mapping_batch *batch) | ||
1008 | { | ||
1009 | struct vm_gk20a *vm = mapped_buffer->vm; | ||
1010 | struct gk20a *g = vm->mm->g; | ||
1011 | |||
1012 | vm->num_user_mapped_buffers--; | ||
1013 | |||
1014 | g->ops.mm.gmmu_unmap(vm, | ||
1015 | mapped_buffer->addr, | ||
1016 | mapped_buffer->size, | ||
1017 | mapped_buffer->pgsz_idx, | ||
1018 | mapped_buffer->va_allocated, | ||
1019 | gk20a_mem_flag_none, | ||
1020 | mapped_buffer->vm_area ? | ||
1021 | mapped_buffer->vm_area->sparse : false, | ||
1022 | batch); | ||
1023 | |||
1024 | /* | ||
1025 | * Remove from mapped buffer tree. Then delete the buffer from the | ||
1026 | * linked list of mapped buffers; though note: not all mapped buffers | ||
1027 | * are part of a vm_area. | ||
1028 | */ | ||
1029 | nvgpu_remove_mapped_buf(vm, mapped_buffer); | ||
1030 | nvgpu_list_del(&mapped_buffer->buffer_list); | ||
1031 | |||
1032 | /* | ||
1033 | * OS specific freeing. This is after the generic freeing incase the | ||
1034 | * generic freeing relies on some component of the OS specific | ||
1035 | * nvgpu_mapped_buf in some abstraction or the like. | ||
1036 | */ | ||
1037 | nvgpu_vm_unmap_system(mapped_buffer); | ||
1038 | |||
1039 | nvgpu_kfree(g, mapped_buffer); | ||
1040 | } | ||
1041 | |||
1042 | void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref) | ||
1043 | { | ||
1044 | struct nvgpu_mapped_buf *mapped_buffer = | ||
1045 | container_of(ref, struct nvgpu_mapped_buf, ref); | ||
1046 | |||
1047 | __nvgpu_vm_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch); | ||
1048 | } | ||
1049 | |||
1050 | /* | ||
1051 | * For fixed-offset buffers we must sync the buffer. That means we wait for the | ||
1052 | * buffer to hit a ref-count of 1 before proceeding. | ||
1053 | * | ||
1054 | * Note: this requires the update_gmmu_lock to be held since we release it and | ||
1055 | * re-aquire it in this function. | ||
1056 | */ | ||
1057 | static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm, | ||
1058 | struct nvgpu_mapped_buf *mapped_buffer) | ||
1059 | { | ||
1060 | struct nvgpu_timeout timeout; | ||
1061 | int ret = 0; | ||
1062 | |||
1063 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1064 | |||
1065 | /* | ||
1066 | * 500ms second timer. | ||
1067 | */ | ||
1068 | nvgpu_timeout_init(vm->mm->g, &timeout, 50, NVGPU_TIMER_CPU_TIMER); | ||
1069 | |||
1070 | do { | ||
1071 | if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1) | ||
1072 | break; | ||
1073 | nvgpu_msleep(10); | ||
1074 | } while (!nvgpu_timeout_expired_msg(&timeout, | ||
1075 | "sync-unmap failed on 0x%llx")); | ||
1076 | |||
1077 | if (nvgpu_timeout_expired(&timeout)) | ||
1078 | ret = -ETIMEDOUT; | ||
1079 | |||
1080 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
1081 | |||
1082 | return ret; | ||
1083 | } | ||
1084 | |||
1085 | void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset, | ||
1086 | struct vm_gk20a_mapping_batch *batch) | ||
1087 | { | ||
1088 | struct nvgpu_mapped_buf *mapped_buffer; | ||
1089 | |||
1090 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
1091 | |||
1092 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); | ||
1093 | if (!mapped_buffer) | ||
1094 | goto done; | ||
1095 | |||
1096 | if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
1097 | if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer)) | ||
1098 | /* | ||
1099 | * Looks like we have failed... Better not continue in | ||
1100 | * case the buffer is in use. | ||
1101 | */ | ||
1102 | goto done; | ||
1103 | } | ||
1104 | |||
1105 | /* | ||
1106 | * Make sure we have access to the batch if we end up calling through to | ||
1107 | * the unmap_ref function. | ||
1108 | */ | ||
1109 | vm->kref_put_batch = batch; | ||
1110 | nvgpu_ref_put(&mapped_buffer->ref, __nvgpu_vm_unmap_ref); | ||
1111 | vm->kref_put_batch = NULL; | ||
1112 | |||
1113 | done: | ||
1114 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1115 | return; | ||
1116 | } | ||
1117 | |||
1118 | static int nvgpu_vm_compute_compression(struct vm_gk20a *vm, | ||
1119 | struct nvgpu_ctag_buffer_info *binfo) | ||
1120 | { | ||
1121 | bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID); | ||
1122 | struct gk20a *g = gk20a_from_vm(vm); | ||
1123 | |||
1124 | if (kind_compressible && | ||
1125 | vm->gmmu_page_sizes[binfo->pgsz_idx] < | ||
1126 | g->ops.fb.compressible_page_size(g)) { | ||
1127 | /* | ||
1128 | * Let's double check that there is a fallback kind | ||
1129 | */ | ||
1130 | if (binfo->incompr_kind == NV_KIND_INVALID) { | ||
1131 | nvgpu_err(g, | ||
1132 | "Unsupported page size for compressible " | ||
1133 | "kind, but no fallback kind"); | ||
1134 | return -EINVAL; | ||
1135 | } else { | ||
1136 | nvgpu_log(g, gpu_dbg_map, | ||
1137 | "Unsupported page size for compressible " | ||
1138 | "kind, demoting to incompressible"); | ||
1139 | binfo->compr_kind = NV_KIND_INVALID; | ||
1140 | kind_compressible = false; | ||
1141 | } | ||
1142 | } | ||
1143 | |||
1144 | return 0; | ||
1145 | } | ||