summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm/vm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vm.c')
-rw-r--r--drivers/gpu/nvgpu/common/mm/vm.c1145
1 files changed, 1145 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
new file mode 100644
index 00000000..ebe8e381
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -0,0 +1,1145 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/bug.h>
24#include <uapi/linux/nvgpu.h>
25
26#include <nvgpu/log.h>
27#include <nvgpu/dma.h>
28#include <nvgpu/vm.h>
29#include <nvgpu/vm_area.h>
30#include <nvgpu/gmmu.h>
31#include <nvgpu/lock.h>
32#include <nvgpu/list.h>
33#include <nvgpu/rbtree.h>
34#include <nvgpu/semaphore.h>
35#include <nvgpu/enabled.h>
36
37#include <nvgpu/vgpu/vm.h>
38
39#include "gk20a/gk20a.h"
40#include "gk20a/mm_gk20a.h"
41
42struct nvgpu_ctag_buffer_info {
43 u64 size;
44 enum gmmu_pgsz_gk20a pgsz_idx;
45 u32 flags;
46
47 s16 compr_kind;
48 s16 incompr_kind;
49
50 u32 ctag_lines;
51};
52
53static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
54 struct nvgpu_ctag_buffer_info *binfo);
55
56static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
57 struct vm_gk20a_mapping_batch *batch);
58
59int vm_aspace_id(struct vm_gk20a *vm)
60{
61 return vm->as_share ? vm->as_share->id : -1;
62}
63
64static void __nvgpu_vm_free_entries(struct vm_gk20a *vm,
65 struct nvgpu_gmmu_pd *pd,
66 int level)
67{
68 int i;
69
70 if (pd->mem) {
71 __nvgpu_pd_free(vm, pd);
72 pd->mem = NULL;
73 }
74
75 if (pd->entries) {
76 for (i = 0; i < pd->num_entries; i++)
77 __nvgpu_vm_free_entries(vm, &pd->entries[i],
78 level + 1);
79 nvgpu_vfree(vm->mm->g, pd->entries);
80 pd->entries = NULL;
81 }
82}
83
84static void nvgpu_vm_free_entries(struct vm_gk20a *vm,
85 struct nvgpu_gmmu_pd *pdb)
86{
87 struct gk20a *g = vm->mm->g;
88 int i;
89
90 __nvgpu_pd_cache_free_direct(g, pdb);
91
92 if (!pdb->entries)
93 return;
94
95 for (i = 0; i < pdb->num_entries; i++)
96 __nvgpu_vm_free_entries(vm, &pdb->entries[i], 1);
97
98 nvgpu_vfree(g, pdb->entries);
99 pdb->entries = NULL;
100}
101
102u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,
103 enum gmmu_pgsz_gk20a pgsz_idx)
104
105{
106 struct gk20a *g = vm->mm->g;
107 struct nvgpu_allocator *vma = NULL;
108 u64 addr;
109 u64 page_size = vm->gmmu_page_sizes[pgsz_idx];
110
111 vma = vm->vma[pgsz_idx];
112
113 if (pgsz_idx >= gmmu_nr_page_sizes) {
114 nvgpu_err(g, "(%s) invalid page size requested", vma->name);
115 return 0;
116 }
117
118 if ((pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
119 nvgpu_err(g, "(%s) unsupportd page size requested", vma->name);
120 return 0;
121 }
122
123 /* Be certain we round up to page_size if needed */
124 size = (size + ((u64)page_size - 1)) & ~((u64)page_size - 1);
125
126 addr = nvgpu_alloc(vma, size);
127 if (!addr) {
128 nvgpu_err(g, "(%s) oom: sz=0x%llx", vma->name, size);
129 return 0;
130 }
131
132 return addr;
133}
134
135int __nvgpu_vm_free_va(struct vm_gk20a *vm, u64 addr,
136 enum gmmu_pgsz_gk20a pgsz_idx)
137{
138 struct nvgpu_allocator *vma = vm->vma[pgsz_idx];
139
140 nvgpu_free(vma, addr);
141
142 return 0;
143}
144
145void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
146{
147 memset(mapping_batch, 0, sizeof(*mapping_batch));
148 mapping_batch->gpu_l2_flushed = false;
149 mapping_batch->need_tlb_invalidate = false;
150}
151
152void nvgpu_vm_mapping_batch_finish_locked(
153 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch)
154{
155 /* hanging kref_put batch pointer? */
156 WARN_ON(vm->kref_put_batch == mapping_batch);
157
158 if (mapping_batch->need_tlb_invalidate) {
159 struct gk20a *g = gk20a_from_vm(vm);
160 g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
161 }
162}
163
164void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
165 struct vm_gk20a_mapping_batch *mapping_batch)
166{
167 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
168 nvgpu_vm_mapping_batch_finish_locked(vm, mapping_batch);
169 nvgpu_mutex_release(&vm->update_gmmu_lock);
170}
171
172/*
173 * Determine if the passed address space can support big pages or not.
174 */
175int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
176{
177 u64 mask = ((u64)vm->big_page_size << 10) - 1;
178
179 if (base & mask || size & mask)
180 return 0;
181 return 1;
182}
183
184/*
185 * Initialize a semaphore pool. Just return successfully if we do not need
186 * semaphores (i.e when sync-pts are active).
187 */
188static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
189{
190 struct nvgpu_semaphore_sea *sema_sea;
191 struct mm_gk20a *mm = vm->mm;
192 struct gk20a *g = mm->g;
193 int err;
194
195 /*
196 * Don't waste the memory on semaphores if we don't need them.
197 */
198 if (nvgpu_is_enabled(g, NVGPU_HAS_SYNCPOINTS))
199 return 0;
200
201 if (vm->sema_pool)
202 return 0;
203
204 sema_sea = nvgpu_semaphore_sea_create(g);
205 if (!sema_sea)
206 return -ENOMEM;
207
208 vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea);
209 if (!vm->sema_pool)
210 return -ENOMEM;
211
212 /*
213 * Allocate a chunk of GPU VA space for mapping the semaphores. We will
214 * do a fixed alloc in the kernel VM so that all channels have the same
215 * RO address range for the semaphores.
216 *
217 * !!! TODO: cleanup.
218 */
219 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
220 vm->va_limit -
221 mm->channel.kernel_size,
222 512 * PAGE_SIZE,
223 SZ_4K);
224 if (!sema_sea->gpu_va) {
225 nvgpu_free(&vm->kernel, sema_sea->gpu_va);
226 nvgpu_vm_put(vm);
227 return -ENOMEM;
228 }
229
230 err = nvgpu_semaphore_pool_map(vm->sema_pool, vm);
231 if (err) {
232 nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
233 nvgpu_free(vm->vma[gmmu_page_size_small],
234 vm->sema_pool->gpu_va);
235 return err;
236 }
237
238 return 0;
239}
240
241static int __nvgpu_vm_init(struct mm_gk20a *mm,
242 struct vm_gk20a *vm,
243 u32 big_page_size,
244 u64 low_hole,
245 u64 kernel_reserved,
246 u64 aperture_size,
247 bool big_pages,
248 bool userspace_managed,
249 char *name)
250{
251 int err;
252 char alloc_name[32];
253 u64 kernel_vma_flags;
254 u64 user_vma_start, user_vma_limit;
255 u64 user_lp_vma_start, user_lp_vma_limit;
256 u64 kernel_vma_start, kernel_vma_limit;
257 struct gk20a *g = gk20a_from_mm(mm);
258
259 if (WARN_ON(kernel_reserved + low_hole > aperture_size))
260 return -ENOMEM;
261
262 nvgpu_log_info(g, "Init space for %s: valimit=0x%llx, "
263 "LP size=0x%x lowhole=0x%llx",
264 name, aperture_size,
265 (unsigned int)big_page_size, low_hole);
266
267 vm->mm = mm;
268
269 vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K;
270 vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size;
271 vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K;
272
273 /* Set up vma pointers. */
274 vm->vma[gmmu_page_size_small] = &vm->user;
275 vm->vma[gmmu_page_size_big] = &vm->user;
276 vm->vma[gmmu_page_size_kernel] = &vm->kernel;
277 if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
278 vm->vma[gmmu_page_size_big] = &vm->user_lp;
279
280 vm->va_start = low_hole;
281 vm->va_limit = aperture_size;
282
283 vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big];
284 vm->userspace_managed = userspace_managed;
285 vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
286
287#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
288 if (g->is_virtual && userspace_managed) {
289 nvgpu_err(g, "vGPU: no userspace managed addr space support");
290 return -ENOSYS;
291 }
292 if (g->is_virtual && vgpu_vm_init(g, vm)) {
293 nvgpu_err(g, "Failed to init vGPU VM!");
294 return -ENOMEM;
295 }
296#endif
297
298 /* Initialize the page table data structures. */
299 strncpy(vm->name, name, min(strlen(name), sizeof(vm->name)));
300 err = nvgpu_gmmu_init_page_table(vm);
301 if (err)
302 goto clean_up_vgpu_vm;
303
304 /* Setup vma limits. */
305 if (kernel_reserved + low_hole < aperture_size) {
306 /*
307 * If big_pages are disabled for this VM then it only makes
308 * sense to make one VM, same as if the unified address flag
309 * is set.
310 */
311 if (!big_pages ||
312 nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) {
313 user_vma_start = low_hole;
314 user_vma_limit = vm->va_limit - kernel_reserved;
315 user_lp_vma_start = user_vma_limit;
316 user_lp_vma_limit = user_vma_limit;
317 } else {
318 user_vma_start = low_hole;
319 user_vma_limit = __nv_gmmu_va_small_page_limit();
320 user_lp_vma_start = __nv_gmmu_va_small_page_limit();
321 user_lp_vma_limit = vm->va_limit - kernel_reserved;
322 }
323 } else {
324 user_vma_start = 0;
325 user_vma_limit = 0;
326 user_lp_vma_start = 0;
327 user_lp_vma_limit = 0;
328 }
329 kernel_vma_start = vm->va_limit - kernel_reserved;
330 kernel_vma_limit = vm->va_limit;
331
332 nvgpu_log_info(g, "user_vma [0x%llx,0x%llx)",
333 user_vma_start, user_vma_limit);
334 nvgpu_log_info(g, "user_lp_vma [0x%llx,0x%llx)",
335 user_lp_vma_start, user_lp_vma_limit);
336 nvgpu_log_info(g, "kernel_vma [0x%llx,0x%llx)",
337 kernel_vma_start, kernel_vma_limit);
338
339 if (WARN_ON(user_vma_start > user_vma_limit) ||
340 WARN_ON(user_lp_vma_start > user_lp_vma_limit) ||
341 WARN_ON(kernel_vma_start >= kernel_vma_limit)) {
342 err = -EINVAL;
343 goto clean_up_page_tables;
344 }
345
346 kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ?
347 0 : GPU_ALLOC_GVA_SPACE;
348
349 /*
350 * A "user" area only makes sense for the GVA spaces. For VMs where
351 * there is no "user" area user_vma_start will be equal to
352 * user_vma_limit (i.e a 0 sized space). In such a situation the kernel
353 * area must be non-zero in length.
354 */
355 if (user_vma_start >= user_vma_limit &&
356 kernel_vma_start >= kernel_vma_limit) {
357 err = -EINVAL;
358 goto clean_up_page_tables;
359 }
360
361 /*
362 * Determine if big pages are possible in this VM. If a split address
363 * space is used then check the user_lp vma instead of the user vma.
364 */
365 if (nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
366 vm->big_pages = big_pages &&
367 nvgpu_big_pages_possible(vm, user_vma_start,
368 user_vma_limit - user_vma_start);
369 else
370 vm->big_pages = big_pages &&
371 nvgpu_big_pages_possible(vm, user_lp_vma_start,
372 user_lp_vma_limit - user_lp_vma_start);
373
374 /*
375 * User VMA.
376 */
377 if (user_vma_start < user_vma_limit) {
378 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
379 err = __nvgpu_buddy_allocator_init(g, &vm->user,
380 vm, alloc_name,
381 user_vma_start,
382 user_vma_limit -
383 user_vma_start,
384 SZ_4K,
385 GPU_BALLOC_MAX_ORDER,
386 GPU_ALLOC_GVA_SPACE);
387 if (err)
388 goto clean_up_page_tables;
389 } else {
390 /*
391 * Make these allocator pointers point to the kernel allocator
392 * since we still use the legacy notion of page size to choose
393 * the allocator.
394 */
395 vm->vma[0] = &vm->kernel;
396 vm->vma[1] = &vm->kernel;
397 }
398
399 /*
400 * User VMA for large pages when a split address range is used.
401 */
402 if (user_lp_vma_start < user_lp_vma_limit) {
403 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name);
404 err = __nvgpu_buddy_allocator_init(g, &vm->user_lp,
405 vm, alloc_name,
406 user_lp_vma_start,
407 user_lp_vma_limit -
408 user_lp_vma_start,
409 vm->big_page_size,
410 GPU_BALLOC_MAX_ORDER,
411 GPU_ALLOC_GVA_SPACE);
412 if (err)
413 goto clean_up_allocators;
414 }
415
416 /*
417 * Kernel VMA. Must always exist for an address space.
418 */
419 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
420 err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
421 vm, alloc_name,
422 kernel_vma_start,
423 kernel_vma_limit - kernel_vma_start,
424 SZ_4K,
425 GPU_BALLOC_MAX_ORDER,
426 kernel_vma_flags);
427 if (err)
428 goto clean_up_allocators;
429
430 vm->mapped_buffers = NULL;
431
432 nvgpu_mutex_init(&vm->update_gmmu_lock);
433 nvgpu_ref_init(&vm->ref);
434 nvgpu_init_list_node(&vm->vm_area_list);
435
436 /*
437 * This is only necessary for channel address spaces. The best way to
438 * distinguish channel address spaces from other address spaces is by
439 * size - if the address space is 4GB or less, it's not a channel.
440 */
441 if (vm->va_limit > SZ_4G) {
442 err = nvgpu_init_sema_pool(vm);
443 if (err)
444 goto clean_up_allocators;
445 }
446
447 return 0;
448
449clean_up_allocators:
450 if (nvgpu_alloc_initialized(&vm->kernel))
451 nvgpu_alloc_destroy(&vm->kernel);
452 if (nvgpu_alloc_initialized(&vm->user))
453 nvgpu_alloc_destroy(&vm->user);
454 if (nvgpu_alloc_initialized(&vm->user_lp))
455 nvgpu_alloc_destroy(&vm->user_lp);
456clean_up_page_tables:
457 /* Cleans up nvgpu_gmmu_init_page_table() */
458 __nvgpu_pd_cache_free_direct(g, &vm->pdb);
459clean_up_vgpu_vm:
460#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
461 if (g->is_virtual)
462 vgpu_vm_remove(vm);
463#endif
464 return err;
465}
466
467/**
468 * nvgpu_init_vm() - Initialize an address space.
469 *
470 * @mm - Parent MM.
471 * @vm - The VM to init.
472 * @big_page_size - Size of big pages associated with this VM.
473 * @low_hole - The size of the low hole (unaddressable memory at the bottom of
474 * the address space).
475 * @kernel_reserved - Space reserved for kernel only allocations.
476 * @aperture_size - Total size of the aperture.
477 * @big_pages - If true then big pages are possible in the VM. Note this does
478 * not guarantee that big pages will be possible.
479 * @name - Name of the address space.
480 *
481 * This function initializes an address space according to the following map:
482 *
483 * +--+ 0x0
484 * | |
485 * +--+ @low_hole
486 * | |
487 * ~ ~ This is the "user" section.
488 * | |
489 * +--+ @aperture_size - @kernel_reserved
490 * | |
491 * ~ ~ This is the "kernel" section.
492 * | |
493 * +--+ @aperture_size
494 *
495 * The user section is therefor what ever is left over after the @low_hole and
496 * @kernel_reserved memory have been portioned out. The @kernel_reserved is
497 * always persent at the top of the memory space and the @low_hole is always at
498 * the bottom.
499 *
500 * For certain address spaces a "user" section makes no sense (bar1, etc) so in
501 * such cases the @kernel_reserved and @low_hole should sum to exactly
502 * @aperture_size.
503 */
504struct vm_gk20a *nvgpu_vm_init(struct gk20a *g,
505 u32 big_page_size,
506 u64 low_hole,
507 u64 kernel_reserved,
508 u64 aperture_size,
509 bool big_pages,
510 bool userspace_managed,
511 char *name)
512{
513 struct vm_gk20a *vm = nvgpu_kzalloc(g, sizeof(*vm));
514
515 if (!vm)
516 return NULL;
517
518 if (__nvgpu_vm_init(&g->mm, vm, big_page_size, low_hole,
519 kernel_reserved, aperture_size, big_pages,
520 userspace_managed, name)) {
521 nvgpu_kfree(g, vm);
522 return NULL;
523 }
524
525 return vm;
526}
527
528/*
529 * Cleanup the VM!
530 */
531static void __nvgpu_vm_remove(struct vm_gk20a *vm)
532{
533 struct nvgpu_mapped_buf *mapped_buffer;
534 struct nvgpu_vm_area *vm_area, *vm_area_tmp;
535 struct nvgpu_rbtree_node *node = NULL;
536 struct gk20a *g = vm->mm->g;
537
538 /*
539 * Do this outside of the update_gmmu_lock since unmapping the semaphore
540 * pool involves unmapping a GMMU mapping which means aquiring the
541 * update_gmmu_lock.
542 */
543 if (!nvgpu_is_enabled(g, NVGPU_HAS_SYNCPOINTS)) {
544 if (vm->sema_pool) {
545 nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
546 nvgpu_semaphore_pool_put(vm->sema_pool);
547 }
548 }
549
550#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
551 if (nvgpu_mem_is_valid(&g->syncpt_mem) && vm->syncpt_ro_map_gpu_va)
552 nvgpu_gmmu_unmap(vm, &g->syncpt_mem,
553 vm->syncpt_ro_map_gpu_va);
554#endif
555
556 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
557
558 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
559 while (node) {
560 mapped_buffer = mapped_buffer_from_rbtree_node(node);
561 __nvgpu_vm_unmap(mapped_buffer, NULL);
562 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
563 }
564
565 /* destroy remaining reserved memory areas */
566 nvgpu_list_for_each_entry_safe(vm_area, vm_area_tmp,
567 &vm->vm_area_list,
568 nvgpu_vm_area, vm_area_list) {
569 nvgpu_list_del(&vm_area->vm_area_list);
570 nvgpu_kfree(vm->mm->g, vm_area);
571 }
572
573 if (nvgpu_alloc_initialized(&vm->kernel))
574 nvgpu_alloc_destroy(&vm->kernel);
575 if (nvgpu_alloc_initialized(&vm->user))
576 nvgpu_alloc_destroy(&vm->user);
577 if (nvgpu_alloc_initialized(&vm->user_lp))
578 nvgpu_alloc_destroy(&vm->user_lp);
579
580 nvgpu_vm_free_entries(vm, &vm->pdb);
581
582#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
583 if (g->is_virtual)
584 vgpu_vm_remove(vm);
585#endif
586
587 nvgpu_mutex_release(&vm->update_gmmu_lock);
588
589 nvgpu_kfree(g, vm);
590}
591
592static void __nvgpu_vm_remove_ref(struct nvgpu_ref *ref)
593{
594 struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
595
596 __nvgpu_vm_remove(vm);
597}
598
599void nvgpu_vm_get(struct vm_gk20a *vm)
600{
601 nvgpu_ref_get(&vm->ref);
602}
603
604void nvgpu_vm_put(struct vm_gk20a *vm)
605{
606 nvgpu_ref_put(&vm->ref, __nvgpu_vm_remove_ref);
607}
608
609int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
610 struct nvgpu_mapped_buf *mapped_buffer)
611{
612 mapped_buffer->node.key_start = mapped_buffer->addr;
613 mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size;
614
615 nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers);
616
617 return 0;
618}
619
620void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
621 struct nvgpu_mapped_buf *mapped_buffer)
622{
623 nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers);
624}
625
626struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf(
627 struct vm_gk20a *vm, u64 addr)
628{
629 struct nvgpu_rbtree_node *node = NULL;
630 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
631
632 nvgpu_rbtree_search(addr, &node, root);
633 if (!node)
634 return NULL;
635
636 return mapped_buffer_from_rbtree_node(node);
637}
638
639struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range(
640 struct vm_gk20a *vm, u64 addr)
641{
642 struct nvgpu_rbtree_node *node = NULL;
643 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
644
645 nvgpu_rbtree_range_search(addr, &node, root);
646 if (!node)
647 return NULL;
648
649 return mapped_buffer_from_rbtree_node(node);
650}
651
652struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than(
653 struct vm_gk20a *vm, u64 addr)
654{
655 struct nvgpu_rbtree_node *node = NULL;
656 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
657
658 nvgpu_rbtree_less_than_search(addr, &node, root);
659 if (!node)
660 return NULL;
661
662 return mapped_buffer_from_rbtree_node(node);
663}
664
665int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
666 struct nvgpu_mapped_buf ***mapped_buffers,
667 int *num_buffers)
668{
669 struct nvgpu_mapped_buf *mapped_buffer;
670 struct nvgpu_mapped_buf **buffer_list;
671 struct nvgpu_rbtree_node *node = NULL;
672 int i = 0;
673
674 if (vm->userspace_managed) {
675 *mapped_buffers = NULL;
676 *num_buffers = 0;
677 return 0;
678 }
679
680 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
681
682 buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) *
683 vm->num_user_mapped_buffers);
684 if (!buffer_list) {
685 nvgpu_mutex_release(&vm->update_gmmu_lock);
686 return -ENOMEM;
687 }
688
689 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
690 while (node) {
691 mapped_buffer = mapped_buffer_from_rbtree_node(node);
692 buffer_list[i] = mapped_buffer;
693 nvgpu_ref_get(&mapped_buffer->ref);
694 i++;
695 nvgpu_rbtree_enum_next(&node, node);
696 }
697
698 BUG_ON(i != vm->num_user_mapped_buffers);
699
700 *num_buffers = vm->num_user_mapped_buffers;
701 *mapped_buffers = buffer_list;
702
703 nvgpu_mutex_release(&vm->update_gmmu_lock);
704
705 return 0;
706}
707
708void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
709 struct nvgpu_mapped_buf **mapped_buffers,
710 int num_buffers)
711{
712 int i;
713 struct vm_gk20a_mapping_batch batch;
714
715 if (num_buffers == 0)
716 return;
717
718 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
719 nvgpu_vm_mapping_batch_start(&batch);
720 vm->kref_put_batch = &batch;
721
722 for (i = 0; i < num_buffers; ++i)
723 nvgpu_ref_put(&mapped_buffers[i]->ref, __nvgpu_vm_unmap_ref);
724
725 vm->kref_put_batch = NULL;
726 nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
727 nvgpu_mutex_release(&vm->update_gmmu_lock);
728
729 nvgpu_big_free(vm->mm->g, mapped_buffers);
730}
731
732struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
733 struct nvgpu_os_buffer *os_buf,
734 struct nvgpu_sgt *sgt,
735 u64 map_addr,
736 u64 map_size,
737 u64 phys_offset,
738 int rw,
739 u32 flags,
740 s16 compr_kind,
741 s16 incompr_kind,
742 struct vm_gk20a_mapping_batch *batch,
743 enum nvgpu_aperture aperture)
744{
745 struct gk20a *g = gk20a_from_vm(vm);
746 struct nvgpu_mapped_buf *mapped_buffer = NULL;
747 struct nvgpu_ctag_buffer_info binfo = { 0 };
748 struct nvgpu_vm_area *vm_area = NULL;
749 int err = 0;
750 u64 align;
751 u32 ctag_offset = 0;
752 bool clear_ctags = false;
753 bool va_allocated = true;
754
755 /*
756 * The kind used as part of the key for map caching. HW may
757 * actually be programmed with the fallback kind in case the
758 * key kind is compressible but we're out of comptags.
759 */
760 s16 map_key_kind;
761
762 /*
763 * The actual GMMU PTE kind
764 */
765 u8 pte_kind;
766
767 if (vm->userspace_managed &&
768 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
769 nvgpu_err(g,
770 "non-fixed-offset mapping not available on "
771 "userspace managed address spaces");
772 return ERR_PTR(-EINVAL);
773 }
774
775 binfo.flags = flags;
776 binfo.size = nvgpu_os_buf_get_size(os_buf);
777 binfo.compr_kind = (vm->enable_ctag && compr_kind != NV_KIND_INVALID ?
778 compr_kind : NV_KIND_INVALID);
779 binfo.incompr_kind = incompr_kind;
780
781 if (compr_kind != NV_KIND_INVALID)
782 map_key_kind = compr_kind;
783 else
784 map_key_kind = incompr_kind;
785
786 /*
787 * Check if this buffer is already mapped.
788 */
789 if (!vm->userspace_managed) {
790 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
791 mapped_buffer = nvgpu_vm_find_mapping(vm,
792 os_buf,
793 map_addr,
794 flags,
795 map_key_kind);
796 nvgpu_mutex_release(&vm->update_gmmu_lock);
797
798 if (mapped_buffer) {
799 nvgpu_ref_get(&mapped_buffer->ref);
800 return mapped_buffer;
801 }
802 }
803
804 /*
805 * Generate a new mapping!
806 */
807 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
808 if (!mapped_buffer) {
809 nvgpu_warn(g, "oom allocating tracking buffer");
810 return ERR_PTR(-ENOMEM);
811 }
812
813 align = nvgpu_sgt_alignment(g, sgt);
814 if (g->mm.disable_bigpage)
815 binfo.pgsz_idx = gmmu_page_size_small;
816 else
817 binfo.pgsz_idx = __get_pte_size(vm, map_addr,
818 min_t(u64, binfo.size, align));
819 map_size = map_size ? map_size : binfo.size;
820 map_size = ALIGN(map_size, SZ_4K);
821
822 if ((map_size > binfo.size) ||
823 (phys_offset > (binfo.size - map_size))) {
824 err = -EINVAL;
825 goto clean_up_nolock;
826 }
827
828 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
829
830 /*
831 * Check if we should use a fixed offset for mapping this buffer.
832 */
833 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
834 err = nvgpu_vm_area_validate_buffer(vm,
835 map_addr,
836 map_size,
837 binfo.pgsz_idx,
838 &vm_area);
839 if (err)
840 goto clean_up;
841
842 va_allocated = false;
843 }
844
845 err = nvgpu_vm_compute_compression(vm, &binfo);
846 if (err) {
847 nvgpu_err(g, "failure setting up compression");
848 goto clean_up;
849 }
850
851 if (binfo.compr_kind != NV_KIND_INVALID) {
852 struct gk20a_comptags comptags = { 0 };
853
854 /*
855 * Get the comptags state, alloc if necessary
856 */
857 err = gk20a_alloc_or_get_comptags(g, os_buf,
858 &g->gr.comp_tags,
859 &comptags);
860 if (err) {
861 /*
862 * This is an irrecoverable failure and we need to
863 * abort. In particular, it is not safe to proceed with
864 * the incompressible fallback, since we cannot not mark
865 * our alloc failure anywere. Later we would retry
866 * allocation and break compressible map aliasing.
867 */
868 nvgpu_err(g, "Error %d setting up comptags", err);
869 goto clean_up;
870 }
871
872 /*
873 * Newly allocated comptags needs to be cleared
874 */
875 if (comptags.needs_clear) {
876 if (g->ops.ltc.cbc_ctrl) {
877 if (gk20a_comptags_start_clear(os_buf)) {
878 err = g->ops.ltc.cbc_ctrl(
879 g, gk20a_cbc_op_clear,
880 comptags.offset,
881 (comptags.offset +
882 comptags.lines - 1));
883 gk20a_comptags_finish_clear(
884 os_buf, err == 0);
885 if (err)
886 goto clean_up;
887 }
888 } else {
889 /*
890 * Cleared as part of gmmu map
891 */
892 clear_ctags = true;
893 }
894 }
895
896 /*
897 * Store the ctag offset for later use if we got the comptags
898 */
899 if (comptags.lines)
900 ctag_offset = comptags.offset;
901 }
902
903 /*
904 * Figure out the kind and ctag offset for the GMMU page tables
905 */
906 if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) {
907 /*
908 * Adjust the ctag_offset as per the buffer map offset
909 */
910 ctag_offset += phys_offset >>
911 ilog2(g->ops.fb.compression_page_size(g));
912 pte_kind = binfo.compr_kind;
913 } else if (binfo.incompr_kind != NV_KIND_INVALID) {
914 /*
915 * Incompressible kind, ctag offset will not be programmed
916 */
917 ctag_offset = 0;
918 pte_kind = binfo.incompr_kind;
919 } else {
920 /*
921 * Caller required compression, but we cannot provide it
922 */
923 nvgpu_err(g, "No comptags and no incompressible fallback kind");
924 err = -ENOMEM;
925 goto clean_up;
926 }
927
928 if (clear_ctags)
929 clear_ctags = gk20a_comptags_start_clear(os_buf);
930
931 map_addr = g->ops.mm.gmmu_map(vm,
932 map_addr,
933 sgt,
934 phys_offset,
935 map_size,
936 binfo.pgsz_idx,
937 pte_kind,
938 ctag_offset,
939 flags,
940 rw,
941 clear_ctags,
942 false,
943 false,
944 batch,
945 aperture);
946
947 if (clear_ctags)
948 gk20a_comptags_finish_clear(os_buf, map_addr != 0);
949
950 if (!map_addr) {
951 err = -ENOMEM;
952 goto clean_up;
953 }
954
955 nvgpu_init_list_node(&mapped_buffer->buffer_list);
956 nvgpu_ref_init(&mapped_buffer->ref);
957 mapped_buffer->addr = map_addr;
958 mapped_buffer->size = map_size;
959 mapped_buffer->pgsz_idx = binfo.pgsz_idx;
960 mapped_buffer->vm = vm;
961 mapped_buffer->flags = flags;
962 mapped_buffer->kind = map_key_kind;
963 mapped_buffer->va_allocated = va_allocated;
964 mapped_buffer->vm_area = vm_area;
965
966 err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
967 if (err) {
968 nvgpu_err(g, "failed to insert into mapped buffer tree");
969 goto clean_up;
970 }
971
972 vm->num_user_mapped_buffers++;
973
974 if (vm_area) {
975 nvgpu_list_add_tail(&mapped_buffer->buffer_list,
976 &vm_area->buffer_list_head);
977 mapped_buffer->vm_area = vm_area;
978 }
979
980 nvgpu_mutex_release(&vm->update_gmmu_lock);
981
982 return mapped_buffer;
983
984clean_up:
985 if (mapped_buffer->addr)
986 g->ops.mm.gmmu_unmap(vm,
987 mapped_buffer->addr,
988 mapped_buffer->size,
989 mapped_buffer->pgsz_idx,
990 mapped_buffer->va_allocated,
991 gk20a_mem_flag_none,
992 mapped_buffer->vm_area ?
993 mapped_buffer->vm_area->sparse : false,
994 NULL);
995 nvgpu_mutex_release(&vm->update_gmmu_lock);
996clean_up_nolock:
997 nvgpu_kfree(g, mapped_buffer);
998
999 return ERR_PTR(err);
1000}
1001
1002/*
1003 * Really unmap. This does the real GMMU unmap and removes the mapping from the
1004 * VM map tracking tree (and vm_area list if necessary).
1005 */
1006static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
1007 struct vm_gk20a_mapping_batch *batch)
1008{
1009 struct vm_gk20a *vm = mapped_buffer->vm;
1010 struct gk20a *g = vm->mm->g;
1011
1012 vm->num_user_mapped_buffers--;
1013
1014 g->ops.mm.gmmu_unmap(vm,
1015 mapped_buffer->addr,
1016 mapped_buffer->size,
1017 mapped_buffer->pgsz_idx,
1018 mapped_buffer->va_allocated,
1019 gk20a_mem_flag_none,
1020 mapped_buffer->vm_area ?
1021 mapped_buffer->vm_area->sparse : false,
1022 batch);
1023
1024 /*
1025 * Remove from mapped buffer tree. Then delete the buffer from the
1026 * linked list of mapped buffers; though note: not all mapped buffers
1027 * are part of a vm_area.
1028 */
1029 nvgpu_remove_mapped_buf(vm, mapped_buffer);
1030 nvgpu_list_del(&mapped_buffer->buffer_list);
1031
1032 /*
1033 * OS specific freeing. This is after the generic freeing incase the
1034 * generic freeing relies on some component of the OS specific
1035 * nvgpu_mapped_buf in some abstraction or the like.
1036 */
1037 nvgpu_vm_unmap_system(mapped_buffer);
1038
1039 nvgpu_kfree(g, mapped_buffer);
1040}
1041
1042void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref)
1043{
1044 struct nvgpu_mapped_buf *mapped_buffer =
1045 container_of(ref, struct nvgpu_mapped_buf, ref);
1046
1047 __nvgpu_vm_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch);
1048}
1049
1050/*
1051 * For fixed-offset buffers we must sync the buffer. That means we wait for the
1052 * buffer to hit a ref-count of 1 before proceeding.
1053 *
1054 * Note: this requires the update_gmmu_lock to be held since we release it and
1055 * re-aquire it in this function.
1056 */
1057static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
1058 struct nvgpu_mapped_buf *mapped_buffer)
1059{
1060 struct nvgpu_timeout timeout;
1061 int ret = 0;
1062
1063 nvgpu_mutex_release(&vm->update_gmmu_lock);
1064
1065 /*
1066 * 500ms second timer.
1067 */
1068 nvgpu_timeout_init(vm->mm->g, &timeout, 50, NVGPU_TIMER_CPU_TIMER);
1069
1070 do {
1071 if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1)
1072 break;
1073 nvgpu_msleep(10);
1074 } while (!nvgpu_timeout_expired_msg(&timeout,
1075 "sync-unmap failed on 0x%llx"));
1076
1077 if (nvgpu_timeout_expired(&timeout))
1078 ret = -ETIMEDOUT;
1079
1080 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1081
1082 return ret;
1083}
1084
1085void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
1086 struct vm_gk20a_mapping_batch *batch)
1087{
1088 struct nvgpu_mapped_buf *mapped_buffer;
1089
1090 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1091
1092 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
1093 if (!mapped_buffer)
1094 goto done;
1095
1096 if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1097 if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer))
1098 /*
1099 * Looks like we have failed... Better not continue in
1100 * case the buffer is in use.
1101 */
1102 goto done;
1103 }
1104
1105 /*
1106 * Make sure we have access to the batch if we end up calling through to
1107 * the unmap_ref function.
1108 */
1109 vm->kref_put_batch = batch;
1110 nvgpu_ref_put(&mapped_buffer->ref, __nvgpu_vm_unmap_ref);
1111 vm->kref_put_batch = NULL;
1112
1113done:
1114 nvgpu_mutex_release(&vm->update_gmmu_lock);
1115 return;
1116}
1117
1118static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
1119 struct nvgpu_ctag_buffer_info *binfo)
1120{
1121 bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID);
1122 struct gk20a *g = gk20a_from_vm(vm);
1123
1124 if (kind_compressible &&
1125 vm->gmmu_page_sizes[binfo->pgsz_idx] <
1126 g->ops.fb.compressible_page_size(g)) {
1127 /*
1128 * Let's double check that there is a fallback kind
1129 */
1130 if (binfo->incompr_kind == NV_KIND_INVALID) {
1131 nvgpu_err(g,
1132 "Unsupported page size for compressible "
1133 "kind, but no fallback kind");
1134 return -EINVAL;
1135 } else {
1136 nvgpu_log(g, gpu_dbg_map,
1137 "Unsupported page size for compressible "
1138 "kind, demoting to incompressible");
1139 binfo->compr_kind = NV_KIND_INVALID;
1140 kind_compressible = false;
1141 }
1142 }
1143
1144 return 0;
1145}