diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-05-11 13:25:47 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-06-06 20:09:22 -0400 |
commit | 048c6b062ae381a329dccbc7ca0599113dbd7417 (patch) | |
tree | 24712fcaf967e22bd91bcb6a81195cf79ac08cc1 /drivers/gpu/nvgpu/common/mm | |
parent | c21f5bca9ae81804130e30ea3e6f7a18d51203dc (diff) |
gpu: nvgpu: Separate GMMU mapping impl from mm_gk20a.c
Separate the non-chip specific GMMU mapping implementation code
out of mm_gk20a.c. This puts all of the chip-agnostic code into
common/mm/gmmu.c in preparation for rewriting it.
JIRA NVGPU-12
JIRA NVGPU-30
Change-Id: I6f7fdac3422703f5e80bb22ad304dc27bba4814d
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1480228
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 517 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 21 |
2 files changed, 536 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index dc91cc2f..e63155f2 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -15,14 +15,81 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <nvgpu/log.h> | 17 | #include <nvgpu/log.h> |
18 | #include <nvgpu/list.h> | ||
18 | #include <nvgpu/dma.h> | 19 | #include <nvgpu/dma.h> |
19 | #include <nvgpu/gmmu.h> | 20 | #include <nvgpu/gmmu.h> |
20 | #include <nvgpu/nvgpu_mem.h> | 21 | #include <nvgpu/nvgpu_mem.h> |
21 | #include <nvgpu/enabled.h> | 22 | #include <nvgpu/enabled.h> |
23 | #include <nvgpu/page_allocator.h> | ||
22 | 24 | ||
23 | #include "gk20a/gk20a.h" | 25 | #include "gk20a/gk20a.h" |
24 | #include "gk20a/mm_gk20a.h" | 26 | #include "gk20a/mm_gk20a.h" |
25 | 27 | ||
28 | #define gmmu_dbg(g, fmt, args...) \ | ||
29 | nvgpu_log(g, gpu_dbg_map, fmt, ##args) | ||
30 | #define gmmu_dbg_v(g, fmt, args...) \ | ||
31 | nvgpu_log(g, gpu_dbg_map_v, fmt, ##args) | ||
32 | |||
33 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) | ||
34 | { | ||
35 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
36 | sg_phys(entry->mem.priv.sgt->sgl), | ||
37 | entry->mem.priv.sgt->sgl->length); | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry) | ||
42 | { | ||
43 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
44 | sg_phys(entry->mem.priv.sgt->sgl), | ||
45 | entry->mem.priv.sgt->sgl->length); | ||
46 | } | ||
47 | |||
48 | static int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
49 | { | ||
50 | gk20a_dbg_fn(""); | ||
51 | |||
52 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
53 | return map_gmmu_phys_pages(entry); | ||
54 | |||
55 | if (IS_ENABLED(CONFIG_ARM64)) { | ||
56 | if (entry->mem.aperture == APERTURE_VIDMEM) | ||
57 | return 0; | ||
58 | |||
59 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
60 | sg_phys(entry->mem.priv.sgt->sgl), | ||
61 | entry->mem.size); | ||
62 | } else { | ||
63 | int err = nvgpu_mem_begin(g, &entry->mem); | ||
64 | |||
65 | if (err) | ||
66 | return err; | ||
67 | } | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
73 | { | ||
74 | gk20a_dbg_fn(""); | ||
75 | |||
76 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
77 | unmap_gmmu_phys_pages(entry); | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | if (IS_ENABLED(CONFIG_ARM64)) { | ||
82 | if (entry->mem.aperture == APERTURE_VIDMEM) | ||
83 | return; | ||
84 | |||
85 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
86 | sg_phys(entry->mem.priv.sgt->sgl), | ||
87 | entry->mem.size); | ||
88 | } else { | ||
89 | nvgpu_mem_end(g, &entry->mem); | ||
90 | } | ||
91 | } | ||
92 | |||
26 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | 93 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, |
27 | struct gk20a_mm_entry *entry) | 94 | struct gk20a_mm_entry *entry) |
28 | { | 95 | { |
@@ -97,6 +164,44 @@ static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | |||
97 | return 0; | 164 | return 0; |
98 | } | 165 | } |
99 | 166 | ||
167 | static void free_gmmu_phys_pages(struct vm_gk20a *vm, | ||
168 | struct gk20a_mm_entry *entry) | ||
169 | { | ||
170 | gk20a_dbg_fn(""); | ||
171 | |||
172 | /* note: mem_desc slightly abused (wrt. nvgpu_free_gmmu_pages) */ | ||
173 | |||
174 | free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size)); | ||
175 | entry->mem.cpu_va = NULL; | ||
176 | |||
177 | sg_free_table(entry->mem.priv.sgt); | ||
178 | nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); | ||
179 | entry->mem.priv.sgt = NULL; | ||
180 | entry->mem.size = 0; | ||
181 | entry->mem.aperture = APERTURE_INVALID; | ||
182 | } | ||
183 | |||
184 | void nvgpu_free_gmmu_pages(struct vm_gk20a *vm, | ||
185 | struct gk20a_mm_entry *entry) | ||
186 | { | ||
187 | struct gk20a *g = gk20a_from_vm(vm); | ||
188 | |||
189 | gk20a_dbg_fn(""); | ||
190 | |||
191 | if (!entry->mem.size) | ||
192 | return; | ||
193 | |||
194 | if (entry->woffset) /* fake shadow mem */ | ||
195 | return; | ||
196 | |||
197 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
198 | free_gmmu_phys_pages(vm, entry); | ||
199 | return; | ||
200 | } | ||
201 | |||
202 | nvgpu_dma_free(g, &entry->mem); | ||
203 | } | ||
204 | |||
100 | /* | 205 | /* |
101 | * Allocate a phys contig region big enough for a full | 206 | * Allocate a phys contig region big enough for a full |
102 | * sized gmmu page table for the given gmmu_page_size. | 207 | * sized gmmu page table for the given gmmu_page_size. |
@@ -202,6 +307,9 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
202 | return vaddr; | 307 | return vaddr; |
203 | } | 308 | } |
204 | 309 | ||
310 | /* | ||
311 | * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. | ||
312 | */ | ||
205 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | 313 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, |
206 | struct nvgpu_mem *mem, | 314 | struct nvgpu_mem *mem, |
207 | u64 size, | 315 | u64 size, |
@@ -246,3 +354,412 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va) | |||
246 | 354 | ||
247 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 355 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
248 | } | 356 | } |
357 | |||
358 | static int update_gmmu_level_locked(struct vm_gk20a *vm, | ||
359 | struct gk20a_mm_entry *pte, | ||
360 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
361 | struct scatterlist **sgl, | ||
362 | u64 *offset, | ||
363 | u64 *iova, | ||
364 | u64 gpu_va, u64 gpu_end, | ||
365 | u8 kind_v, u64 *ctag, | ||
366 | bool cacheable, bool unmapped_pte, | ||
367 | int rw_flag, | ||
368 | bool sparse, | ||
369 | int lvl, | ||
370 | bool priv, | ||
371 | enum nvgpu_aperture aperture) | ||
372 | { | ||
373 | struct gk20a *g = gk20a_from_vm(vm); | ||
374 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | ||
375 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; | ||
376 | int err = 0; | ||
377 | u32 pde_i; | ||
378 | u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; | ||
379 | struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL; | ||
380 | |||
381 | gk20a_dbg_fn(""); | ||
382 | |||
383 | pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL)) | ||
384 | >> (u64)l->lo_bit[pgsz_idx]; | ||
385 | |||
386 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", | ||
387 | pgsz_idx, lvl, gpu_va, gpu_end-1, *iova); | ||
388 | |||
389 | while (gpu_va < gpu_end) { | ||
390 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); | ||
391 | |||
392 | /* Allocate next level */ | ||
393 | if (next_l->update_entry) { | ||
394 | if (!pte->entries) { | ||
395 | int num_entries = | ||
396 | 1 << | ||
397 | (l->hi_bit[pgsz_idx] | ||
398 | - l->lo_bit[pgsz_idx] + 1); | ||
399 | pte->entries = | ||
400 | nvgpu_vzalloc(g, | ||
401 | sizeof(struct gk20a_mm_entry) * | ||
402 | num_entries); | ||
403 | if (!pte->entries) | ||
404 | return -ENOMEM; | ||
405 | pte->pgsz = pgsz_idx; | ||
406 | pte->num_entries = num_entries; | ||
407 | } | ||
408 | prev_pte = next_pte; | ||
409 | next_pte = pte->entries + pde_i; | ||
410 | |||
411 | if (!next_pte->mem.size) { | ||
412 | err = nvgpu_zalloc_gmmu_page_table(vm, | ||
413 | pgsz_idx, next_l, next_pte, prev_pte); | ||
414 | if (err) | ||
415 | return err; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
420 | sgl, offset, iova, | ||
421 | kind_v, ctag, cacheable, unmapped_pte, | ||
422 | rw_flag, sparse, priv, aperture); | ||
423 | if (err) | ||
424 | return err; | ||
425 | |||
426 | if (next_l->update_entry) { | ||
427 | /* get cpu access to the ptes */ | ||
428 | err = map_gmmu_pages(g, next_pte); | ||
429 | if (err) { | ||
430 | nvgpu_err(g, | ||
431 | "couldn't map ptes for update as=%d", | ||
432 | vm_aspace_id(vm)); | ||
433 | return err; | ||
434 | } | ||
435 | err = update_gmmu_level_locked(vm, next_pte, | ||
436 | pgsz_idx, | ||
437 | sgl, | ||
438 | offset, | ||
439 | iova, | ||
440 | gpu_va, | ||
441 | next, | ||
442 | kind_v, ctag, cacheable, unmapped_pte, | ||
443 | rw_flag, sparse, lvl+1, priv, aperture); | ||
444 | unmap_gmmu_pages(g, next_pte); | ||
445 | |||
446 | if (err) | ||
447 | return err; | ||
448 | } | ||
449 | |||
450 | pde_i++; | ||
451 | gpu_va = next; | ||
452 | } | ||
453 | |||
454 | gk20a_dbg_fn("done"); | ||
455 | |||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | * This is the true top level GMMU mapping logic. This breaks down the incoming | ||
461 | * scatter gather table and does actual programming of GPU virtual address to | ||
462 | * physical* address. | ||
463 | * | ||
464 | * The update of each level of the page tables is farmed out to chip specific | ||
465 | * implementations. But the logic around that is generic to all chips. Every chip | ||
466 | * has some number of PDE levels and then a PTE level. | ||
467 | * | ||
468 | * Each chunk of the incoming SGT is sent to the chip specific implementation | ||
469 | * of page table update. | ||
470 | * | ||
471 | * [*] Note: the "physical" address may actually be an IO virtual address in the | ||
472 | * case of SMMU usage. | ||
473 | */ | ||
474 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
475 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
476 | struct sg_table *sgt, | ||
477 | u64 buffer_offset, | ||
478 | u64 gpu_va, u64 gpu_end, | ||
479 | u8 kind_v, u32 ctag_offset, | ||
480 | bool cacheable, bool unmapped_pte, | ||
481 | int rw_flag, | ||
482 | bool sparse, | ||
483 | bool priv, | ||
484 | enum nvgpu_aperture aperture) | ||
485 | { | ||
486 | struct gk20a *g = gk20a_from_vm(vm); | ||
487 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
488 | u64 ctag = (u64)ctag_offset * (u64)ctag_granularity; | ||
489 | u64 iova = 0; | ||
490 | u64 space_to_skip = buffer_offset; | ||
491 | u64 map_size = gpu_end - gpu_va; | ||
492 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
493 | int err; | ||
494 | struct scatterlist *sgl = NULL; | ||
495 | struct nvgpu_page_alloc *alloc = NULL; | ||
496 | struct page_alloc_chunk *chunk = NULL; | ||
497 | u64 length; | ||
498 | |||
499 | /* note: here we need to map kernel to small, since the | ||
500 | * low-level mmu code assumes 0 is small and 1 is big pages */ | ||
501 | if (pgsz_idx == gmmu_page_size_kernel) | ||
502 | pgsz_idx = gmmu_page_size_small; | ||
503 | |||
504 | if (space_to_skip & (page_size - 1)) | ||
505 | return -EINVAL; | ||
506 | |||
507 | err = map_gmmu_pages(g, &vm->pdb); | ||
508 | if (err) { | ||
509 | nvgpu_err(g, | ||
510 | "couldn't map ptes for update as=%d", | ||
511 | vm_aspace_id(vm)); | ||
512 | return err; | ||
513 | } | ||
514 | |||
515 | if (aperture == APERTURE_VIDMEM) { | ||
516 | gmmu_dbg_v(g, "vidmem map size_idx=%d, gpu_va=[%llx,%llx]", | ||
517 | pgsz_idx, gpu_va, gpu_end-1); | ||
518 | |||
519 | if (sgt) { | ||
520 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
521 | |||
522 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | ||
523 | page_alloc_chunk, list_entry) { | ||
524 | if (space_to_skip && | ||
525 | space_to_skip > chunk->length) { | ||
526 | space_to_skip -= chunk->length; | ||
527 | } else { | ||
528 | iova = chunk->base + space_to_skip; | ||
529 | length = chunk->length - space_to_skip; | ||
530 | length = min(length, map_size); | ||
531 | space_to_skip = 0; | ||
532 | |||
533 | err = update_gmmu_level_locked(vm, | ||
534 | &vm->pdb, pgsz_idx, | ||
535 | &sgl, | ||
536 | &space_to_skip, | ||
537 | &iova, | ||
538 | gpu_va, gpu_va + length, | ||
539 | kind_v, &ctag, | ||
540 | cacheable, unmapped_pte, | ||
541 | rw_flag, sparse, 0, priv, | ||
542 | aperture); | ||
543 | if (err) | ||
544 | break; | ||
545 | |||
546 | /* need to set explicit zero here */ | ||
547 | space_to_skip = 0; | ||
548 | gpu_va += length; | ||
549 | map_size -= length; | ||
550 | |||
551 | if (!map_size) | ||
552 | break; | ||
553 | } | ||
554 | } | ||
555 | } else { | ||
556 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
557 | &sgl, | ||
558 | &space_to_skip, | ||
559 | &iova, | ||
560 | gpu_va, gpu_end, | ||
561 | kind_v, &ctag, | ||
562 | cacheable, unmapped_pte, rw_flag, | ||
563 | sparse, 0, priv, | ||
564 | aperture); | ||
565 | } | ||
566 | } else { | ||
567 | gmmu_dbg_v(g, | ||
568 | "pgsz=%-6d, gpu_va: %#-12llx +%#-6llx phys: %#-12llx " | ||
569 | "buffer offset: %-4lld, nents: %d", | ||
570 | page_size, | ||
571 | gpu_va, gpu_end - gpu_va, | ||
572 | sgt ? g->ops.mm.get_iova_addr(g, sgt->sgl, 0) : 0ULL, | ||
573 | buffer_offset, | ||
574 | sgt ? sgt->nents : 0); | ||
575 | |||
576 | if (sgt) { | ||
577 | iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); | ||
578 | if (!vm->mm->bypass_smmu && iova) { | ||
579 | iova += space_to_skip; | ||
580 | } else { | ||
581 | sgl = sgt->sgl; | ||
582 | |||
583 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
584 | (u64)sg_phys(sgl), | ||
585 | sgl->length); | ||
586 | |||
587 | while (space_to_skip && sgl && | ||
588 | space_to_skip + page_size > sgl->length) { | ||
589 | space_to_skip -= sgl->length; | ||
590 | sgl = sg_next(sgl); | ||
591 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
592 | (u64)sg_phys(sgl), | ||
593 | sgl->length); | ||
594 | } | ||
595 | |||
596 | iova = sg_phys(sgl) + space_to_skip; | ||
597 | } | ||
598 | } | ||
599 | |||
600 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
601 | &sgl, | ||
602 | &space_to_skip, | ||
603 | &iova, | ||
604 | gpu_va, gpu_end, | ||
605 | kind_v, &ctag, | ||
606 | cacheable, unmapped_pte, rw_flag, | ||
607 | sparse, 0, priv, | ||
608 | aperture); | ||
609 | } | ||
610 | |||
611 | unmap_gmmu_pages(g, &vm->pdb); | ||
612 | |||
613 | mb(); | ||
614 | |||
615 | gk20a_dbg_fn("done"); | ||
616 | |||
617 | return err; | ||
618 | } | ||
619 | |||
620 | /** | ||
621 | * gk20a_locked_gmmu_map - Map a buffer into the GMMU | ||
622 | * | ||
623 | * This is for non-vGPU chips. It's part of the HAL at the moment but really | ||
624 | * should not be. Chip specific stuff is handled at the PTE/PDE programming | ||
625 | * layer. The rest of the logic is essentially generic for all chips. | ||
626 | * | ||
627 | * To call this function you must have locked the VM lock: vm->update_gmmu_lock. | ||
628 | * However, note: this function is not called directly. It's used through the | ||
629 | * mm.gmmu_lock() HAL. So before calling the mm.gmmu_lock() HAL make sure you | ||
630 | * have the update_gmmu_lock aquired. | ||
631 | */ | ||
632 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | ||
633 | u64 map_offset, | ||
634 | struct sg_table *sgt, | ||
635 | u64 buffer_offset, | ||
636 | u64 size, | ||
637 | int pgsz_idx, | ||
638 | u8 kind_v, | ||
639 | u32 ctag_offset, | ||
640 | u32 flags, | ||
641 | int rw_flag, | ||
642 | bool clear_ctags, | ||
643 | bool sparse, | ||
644 | bool priv, | ||
645 | struct vm_gk20a_mapping_batch *batch, | ||
646 | enum nvgpu_aperture aperture) | ||
647 | { | ||
648 | int err = 0; | ||
649 | bool allocated = false; | ||
650 | struct gk20a *g = gk20a_from_vm(vm); | ||
651 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
652 | u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); | ||
653 | |||
654 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | ||
655 | if (!map_offset) { | ||
656 | map_offset = __nvgpu_vm_alloc_va(vm, size, | ||
657 | pgsz_idx); | ||
658 | if (!map_offset) { | ||
659 | nvgpu_err(g, "failed to allocate va space"); | ||
660 | err = -ENOMEM; | ||
661 | goto fail_alloc; | ||
662 | } | ||
663 | allocated = true; | ||
664 | } | ||
665 | |||
666 | gmmu_dbg(g, | ||
667 | "gv: 0x%04x_%08x + 0x%-7llx " | ||
668 | "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " | ||
669 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | ||
670 | "kind=0x%x flags=0x%x apt=%s", | ||
671 | u64_hi32(map_offset), u64_lo32(map_offset), size, | ||
672 | sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0, | ||
673 | sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0, | ||
674 | sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0, | ||
675 | sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0, | ||
676 | vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), | ||
677 | ctag_lines, ctag_offset, | ||
678 | kind_v, flags, nvgpu_aperture_str(aperture)); | ||
679 | |||
680 | err = update_gmmu_ptes_locked(vm, pgsz_idx, | ||
681 | sgt, | ||
682 | buffer_offset, | ||
683 | map_offset, map_offset + size, | ||
684 | kind_v, | ||
685 | ctag_offset, | ||
686 | flags & | ||
687 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
688 | flags & | ||
689 | NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE, | ||
690 | rw_flag, | ||
691 | sparse, | ||
692 | priv, | ||
693 | aperture); | ||
694 | if (err) { | ||
695 | nvgpu_err(g, "failed to update ptes on map"); | ||
696 | goto fail_validate; | ||
697 | } | ||
698 | |||
699 | if (!batch) | ||
700 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | ||
701 | else | ||
702 | batch->need_tlb_invalidate = true; | ||
703 | |||
704 | return map_offset; | ||
705 | fail_validate: | ||
706 | if (allocated) | ||
707 | __nvgpu_vm_free_va(vm, map_offset, pgsz_idx); | ||
708 | fail_alloc: | ||
709 | nvgpu_err(g, "%s: failed with err=%d", __func__, err); | ||
710 | return 0; | ||
711 | } | ||
712 | |||
713 | void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | ||
714 | u64 vaddr, | ||
715 | u64 size, | ||
716 | int pgsz_idx, | ||
717 | bool va_allocated, | ||
718 | int rw_flag, | ||
719 | bool sparse, | ||
720 | struct vm_gk20a_mapping_batch *batch) | ||
721 | { | ||
722 | int err = 0; | ||
723 | struct gk20a *g = gk20a_from_vm(vm); | ||
724 | |||
725 | if (va_allocated) { | ||
726 | err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); | ||
727 | if (err) { | ||
728 | nvgpu_err(g, "failed to free va"); | ||
729 | return; | ||
730 | } | ||
731 | } | ||
732 | |||
733 | /* unmap here needs to know the page size we assigned at mapping */ | ||
734 | err = update_gmmu_ptes_locked(vm, | ||
735 | pgsz_idx, | ||
736 | NULL, /* n/a for unmap */ | ||
737 | 0, | ||
738 | vaddr, | ||
739 | vaddr + size, | ||
740 | 0, 0, false /* n/a for unmap */, | ||
741 | false, rw_flag, | ||
742 | sparse, 0, | ||
743 | APERTURE_INVALID); /* don't care for unmap */ | ||
744 | if (err) | ||
745 | nvgpu_err(g, "failed to update gmmu ptes on unmap"); | ||
746 | |||
747 | /* flush l2 so any dirty lines are written out *now*. | ||
748 | * also as we could potentially be switching this buffer | ||
749 | * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at | ||
750 | * some point in the future we need to invalidate l2. e.g. switching | ||
751 | * from a render buffer unmap (here) to later using the same memory | ||
752 | * for gmmu ptes. note the positioning of this relative to any smmu | ||
753 | * unmapping (below). */ | ||
754 | |||
755 | if (!batch) { | ||
756 | gk20a_mm_l2_flush(g, true); | ||
757 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | ||
758 | } else { | ||
759 | if (!batch->gpu_l2_flushed) { | ||
760 | gk20a_mm_l2_flush(g, true); | ||
761 | batch->gpu_l2_flushed = true; | ||
762 | } | ||
763 | batch->need_tlb_invalidate = true; | ||
764 | } | ||
765 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index e24d40bf..5ba386c9 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <nvgpu/dma.h> | 18 | #include <nvgpu/dma.h> |
19 | #include <nvgpu/vm.h> | 19 | #include <nvgpu/vm.h> |
20 | #include <nvgpu/vm_area.h> | 20 | #include <nvgpu/vm_area.h> |
21 | #include <nvgpu/gmmu.h> | ||
21 | #include <nvgpu/lock.h> | 22 | #include <nvgpu/lock.h> |
22 | #include <nvgpu/list.h> | 23 | #include <nvgpu/list.h> |
23 | #include <nvgpu/rbtree.h> | 24 | #include <nvgpu/rbtree.h> |
@@ -34,6 +35,22 @@ int vm_aspace_id(struct vm_gk20a *vm) | |||
34 | return vm->as_share ? vm->as_share->id : -1; | 35 | return vm->as_share ? vm->as_share->id : -1; |
35 | } | 36 | } |
36 | 37 | ||
38 | static void nvgpu_vm_free_entries(struct vm_gk20a *vm, | ||
39 | struct gk20a_mm_entry *parent, | ||
40 | int level) | ||
41 | { | ||
42 | int i; | ||
43 | |||
44 | if (parent->entries) | ||
45 | for (i = 0; i < parent->num_entries; i++) | ||
46 | nvgpu_vm_free_entries(vm, &parent->entries[i], level+1); | ||
47 | |||
48 | if (parent->mem.size) | ||
49 | nvgpu_free_gmmu_pages(vm, parent); | ||
50 | nvgpu_vfree(vm->mm->g, parent->entries); | ||
51 | parent->entries = NULL; | ||
52 | } | ||
53 | |||
37 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, | 54 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, |
38 | enum gmmu_pgsz_gk20a pgsz_idx) | 55 | enum gmmu_pgsz_gk20a pgsz_idx) |
39 | 56 | ||
@@ -421,7 +438,7 @@ clean_up_allocators: | |||
421 | clean_up_page_tables: | 438 | clean_up_page_tables: |
422 | /* Cleans up nvgpu_vm_init_page_tables() */ | 439 | /* Cleans up nvgpu_vm_init_page_tables() */ |
423 | nvgpu_vfree(g, vm->pdb.entries); | 440 | nvgpu_vfree(g, vm->pdb.entries); |
424 | free_gmmu_pages(vm, &vm->pdb); | 441 | nvgpu_free_gmmu_pages(vm, &vm->pdb); |
425 | clean_up_vgpu_vm: | 442 | clean_up_vgpu_vm: |
426 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 443 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
427 | if (g->is_virtual) | 444 | if (g->is_virtual) |
@@ -537,7 +554,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm) | |||
537 | if (nvgpu_alloc_initialized(&vm->user_lp)) | 554 | if (nvgpu_alloc_initialized(&vm->user_lp)) |
538 | nvgpu_alloc_destroy(&vm->user_lp); | 555 | nvgpu_alloc_destroy(&vm->user_lp); |
539 | 556 | ||
540 | gk20a_vm_free_entries(vm, &vm->pdb, 0); | 557 | nvgpu_vm_free_entries(vm, &vm->pdb, 0); |
541 | 558 | ||
542 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 559 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
543 | if (g->is_virtual) | 560 | if (g->is_virtual) |