diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 517 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 21 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 521 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 3 |
5 files changed, 539 insertions, 530 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index dc91cc2f..e63155f2 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -15,14 +15,81 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <nvgpu/log.h> | 17 | #include <nvgpu/log.h> |
18 | #include <nvgpu/list.h> | ||
18 | #include <nvgpu/dma.h> | 19 | #include <nvgpu/dma.h> |
19 | #include <nvgpu/gmmu.h> | 20 | #include <nvgpu/gmmu.h> |
20 | #include <nvgpu/nvgpu_mem.h> | 21 | #include <nvgpu/nvgpu_mem.h> |
21 | #include <nvgpu/enabled.h> | 22 | #include <nvgpu/enabled.h> |
23 | #include <nvgpu/page_allocator.h> | ||
22 | 24 | ||
23 | #include "gk20a/gk20a.h" | 25 | #include "gk20a/gk20a.h" |
24 | #include "gk20a/mm_gk20a.h" | 26 | #include "gk20a/mm_gk20a.h" |
25 | 27 | ||
28 | #define gmmu_dbg(g, fmt, args...) \ | ||
29 | nvgpu_log(g, gpu_dbg_map, fmt, ##args) | ||
30 | #define gmmu_dbg_v(g, fmt, args...) \ | ||
31 | nvgpu_log(g, gpu_dbg_map_v, fmt, ##args) | ||
32 | |||
33 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) | ||
34 | { | ||
35 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
36 | sg_phys(entry->mem.priv.sgt->sgl), | ||
37 | entry->mem.priv.sgt->sgl->length); | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry) | ||
42 | { | ||
43 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
44 | sg_phys(entry->mem.priv.sgt->sgl), | ||
45 | entry->mem.priv.sgt->sgl->length); | ||
46 | } | ||
47 | |||
48 | static int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
49 | { | ||
50 | gk20a_dbg_fn(""); | ||
51 | |||
52 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
53 | return map_gmmu_phys_pages(entry); | ||
54 | |||
55 | if (IS_ENABLED(CONFIG_ARM64)) { | ||
56 | if (entry->mem.aperture == APERTURE_VIDMEM) | ||
57 | return 0; | ||
58 | |||
59 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
60 | sg_phys(entry->mem.priv.sgt->sgl), | ||
61 | entry->mem.size); | ||
62 | } else { | ||
63 | int err = nvgpu_mem_begin(g, &entry->mem); | ||
64 | |||
65 | if (err) | ||
66 | return err; | ||
67 | } | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
73 | { | ||
74 | gk20a_dbg_fn(""); | ||
75 | |||
76 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
77 | unmap_gmmu_phys_pages(entry); | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | if (IS_ENABLED(CONFIG_ARM64)) { | ||
82 | if (entry->mem.aperture == APERTURE_VIDMEM) | ||
83 | return; | ||
84 | |||
85 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
86 | sg_phys(entry->mem.priv.sgt->sgl), | ||
87 | entry->mem.size); | ||
88 | } else { | ||
89 | nvgpu_mem_end(g, &entry->mem); | ||
90 | } | ||
91 | } | ||
92 | |||
26 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | 93 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, |
27 | struct gk20a_mm_entry *entry) | 94 | struct gk20a_mm_entry *entry) |
28 | { | 95 | { |
@@ -97,6 +164,44 @@ static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | |||
97 | return 0; | 164 | return 0; |
98 | } | 165 | } |
99 | 166 | ||
167 | static void free_gmmu_phys_pages(struct vm_gk20a *vm, | ||
168 | struct gk20a_mm_entry *entry) | ||
169 | { | ||
170 | gk20a_dbg_fn(""); | ||
171 | |||
172 | /* note: mem_desc slightly abused (wrt. nvgpu_free_gmmu_pages) */ | ||
173 | |||
174 | free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size)); | ||
175 | entry->mem.cpu_va = NULL; | ||
176 | |||
177 | sg_free_table(entry->mem.priv.sgt); | ||
178 | nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); | ||
179 | entry->mem.priv.sgt = NULL; | ||
180 | entry->mem.size = 0; | ||
181 | entry->mem.aperture = APERTURE_INVALID; | ||
182 | } | ||
183 | |||
184 | void nvgpu_free_gmmu_pages(struct vm_gk20a *vm, | ||
185 | struct gk20a_mm_entry *entry) | ||
186 | { | ||
187 | struct gk20a *g = gk20a_from_vm(vm); | ||
188 | |||
189 | gk20a_dbg_fn(""); | ||
190 | |||
191 | if (!entry->mem.size) | ||
192 | return; | ||
193 | |||
194 | if (entry->woffset) /* fake shadow mem */ | ||
195 | return; | ||
196 | |||
197 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
198 | free_gmmu_phys_pages(vm, entry); | ||
199 | return; | ||
200 | } | ||
201 | |||
202 | nvgpu_dma_free(g, &entry->mem); | ||
203 | } | ||
204 | |||
100 | /* | 205 | /* |
101 | * Allocate a phys contig region big enough for a full | 206 | * Allocate a phys contig region big enough for a full |
102 | * sized gmmu page table for the given gmmu_page_size. | 207 | * sized gmmu page table for the given gmmu_page_size. |
@@ -202,6 +307,9 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
202 | return vaddr; | 307 | return vaddr; |
203 | } | 308 | } |
204 | 309 | ||
310 | /* | ||
311 | * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. | ||
312 | */ | ||
205 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | 313 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, |
206 | struct nvgpu_mem *mem, | 314 | struct nvgpu_mem *mem, |
207 | u64 size, | 315 | u64 size, |
@@ -246,3 +354,412 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va) | |||
246 | 354 | ||
247 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 355 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
248 | } | 356 | } |
357 | |||
358 | static int update_gmmu_level_locked(struct vm_gk20a *vm, | ||
359 | struct gk20a_mm_entry *pte, | ||
360 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
361 | struct scatterlist **sgl, | ||
362 | u64 *offset, | ||
363 | u64 *iova, | ||
364 | u64 gpu_va, u64 gpu_end, | ||
365 | u8 kind_v, u64 *ctag, | ||
366 | bool cacheable, bool unmapped_pte, | ||
367 | int rw_flag, | ||
368 | bool sparse, | ||
369 | int lvl, | ||
370 | bool priv, | ||
371 | enum nvgpu_aperture aperture) | ||
372 | { | ||
373 | struct gk20a *g = gk20a_from_vm(vm); | ||
374 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | ||
375 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; | ||
376 | int err = 0; | ||
377 | u32 pde_i; | ||
378 | u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; | ||
379 | struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL; | ||
380 | |||
381 | gk20a_dbg_fn(""); | ||
382 | |||
383 | pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL)) | ||
384 | >> (u64)l->lo_bit[pgsz_idx]; | ||
385 | |||
386 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", | ||
387 | pgsz_idx, lvl, gpu_va, gpu_end-1, *iova); | ||
388 | |||
389 | while (gpu_va < gpu_end) { | ||
390 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); | ||
391 | |||
392 | /* Allocate next level */ | ||
393 | if (next_l->update_entry) { | ||
394 | if (!pte->entries) { | ||
395 | int num_entries = | ||
396 | 1 << | ||
397 | (l->hi_bit[pgsz_idx] | ||
398 | - l->lo_bit[pgsz_idx] + 1); | ||
399 | pte->entries = | ||
400 | nvgpu_vzalloc(g, | ||
401 | sizeof(struct gk20a_mm_entry) * | ||
402 | num_entries); | ||
403 | if (!pte->entries) | ||
404 | return -ENOMEM; | ||
405 | pte->pgsz = pgsz_idx; | ||
406 | pte->num_entries = num_entries; | ||
407 | } | ||
408 | prev_pte = next_pte; | ||
409 | next_pte = pte->entries + pde_i; | ||
410 | |||
411 | if (!next_pte->mem.size) { | ||
412 | err = nvgpu_zalloc_gmmu_page_table(vm, | ||
413 | pgsz_idx, next_l, next_pte, prev_pte); | ||
414 | if (err) | ||
415 | return err; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
420 | sgl, offset, iova, | ||
421 | kind_v, ctag, cacheable, unmapped_pte, | ||
422 | rw_flag, sparse, priv, aperture); | ||
423 | if (err) | ||
424 | return err; | ||
425 | |||
426 | if (next_l->update_entry) { | ||
427 | /* get cpu access to the ptes */ | ||
428 | err = map_gmmu_pages(g, next_pte); | ||
429 | if (err) { | ||
430 | nvgpu_err(g, | ||
431 | "couldn't map ptes for update as=%d", | ||
432 | vm_aspace_id(vm)); | ||
433 | return err; | ||
434 | } | ||
435 | err = update_gmmu_level_locked(vm, next_pte, | ||
436 | pgsz_idx, | ||
437 | sgl, | ||
438 | offset, | ||
439 | iova, | ||
440 | gpu_va, | ||
441 | next, | ||
442 | kind_v, ctag, cacheable, unmapped_pte, | ||
443 | rw_flag, sparse, lvl+1, priv, aperture); | ||
444 | unmap_gmmu_pages(g, next_pte); | ||
445 | |||
446 | if (err) | ||
447 | return err; | ||
448 | } | ||
449 | |||
450 | pde_i++; | ||
451 | gpu_va = next; | ||
452 | } | ||
453 | |||
454 | gk20a_dbg_fn("done"); | ||
455 | |||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | * This is the true top level GMMU mapping logic. This breaks down the incoming | ||
461 | * scatter gather table and does actual programming of GPU virtual address to | ||
462 | * physical* address. | ||
463 | * | ||
464 | * The update of each level of the page tables is farmed out to chip specific | ||
465 | * implementations. But the logic around that is generic to all chips. Every chip | ||
466 | * has some number of PDE levels and then a PTE level. | ||
467 | * | ||
468 | * Each chunk of the incoming SGT is sent to the chip specific implementation | ||
469 | * of page table update. | ||
470 | * | ||
471 | * [*] Note: the "physical" address may actually be an IO virtual address in the | ||
472 | * case of SMMU usage. | ||
473 | */ | ||
474 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
475 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
476 | struct sg_table *sgt, | ||
477 | u64 buffer_offset, | ||
478 | u64 gpu_va, u64 gpu_end, | ||
479 | u8 kind_v, u32 ctag_offset, | ||
480 | bool cacheable, bool unmapped_pte, | ||
481 | int rw_flag, | ||
482 | bool sparse, | ||
483 | bool priv, | ||
484 | enum nvgpu_aperture aperture) | ||
485 | { | ||
486 | struct gk20a *g = gk20a_from_vm(vm); | ||
487 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
488 | u64 ctag = (u64)ctag_offset * (u64)ctag_granularity; | ||
489 | u64 iova = 0; | ||
490 | u64 space_to_skip = buffer_offset; | ||
491 | u64 map_size = gpu_end - gpu_va; | ||
492 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
493 | int err; | ||
494 | struct scatterlist *sgl = NULL; | ||
495 | struct nvgpu_page_alloc *alloc = NULL; | ||
496 | struct page_alloc_chunk *chunk = NULL; | ||
497 | u64 length; | ||
498 | |||
499 | /* note: here we need to map kernel to small, since the | ||
500 | * low-level mmu code assumes 0 is small and 1 is big pages */ | ||
501 | if (pgsz_idx == gmmu_page_size_kernel) | ||
502 | pgsz_idx = gmmu_page_size_small; | ||
503 | |||
504 | if (space_to_skip & (page_size - 1)) | ||
505 | return -EINVAL; | ||
506 | |||
507 | err = map_gmmu_pages(g, &vm->pdb); | ||
508 | if (err) { | ||
509 | nvgpu_err(g, | ||
510 | "couldn't map ptes for update as=%d", | ||
511 | vm_aspace_id(vm)); | ||
512 | return err; | ||
513 | } | ||
514 | |||
515 | if (aperture == APERTURE_VIDMEM) { | ||
516 | gmmu_dbg_v(g, "vidmem map size_idx=%d, gpu_va=[%llx,%llx]", | ||
517 | pgsz_idx, gpu_va, gpu_end-1); | ||
518 | |||
519 | if (sgt) { | ||
520 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
521 | |||
522 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | ||
523 | page_alloc_chunk, list_entry) { | ||
524 | if (space_to_skip && | ||
525 | space_to_skip > chunk->length) { | ||
526 | space_to_skip -= chunk->length; | ||
527 | } else { | ||
528 | iova = chunk->base + space_to_skip; | ||
529 | length = chunk->length - space_to_skip; | ||
530 | length = min(length, map_size); | ||
531 | space_to_skip = 0; | ||
532 | |||
533 | err = update_gmmu_level_locked(vm, | ||
534 | &vm->pdb, pgsz_idx, | ||
535 | &sgl, | ||
536 | &space_to_skip, | ||
537 | &iova, | ||
538 | gpu_va, gpu_va + length, | ||
539 | kind_v, &ctag, | ||
540 | cacheable, unmapped_pte, | ||
541 | rw_flag, sparse, 0, priv, | ||
542 | aperture); | ||
543 | if (err) | ||
544 | break; | ||
545 | |||
546 | /* need to set explicit zero here */ | ||
547 | space_to_skip = 0; | ||
548 | gpu_va += length; | ||
549 | map_size -= length; | ||
550 | |||
551 | if (!map_size) | ||
552 | break; | ||
553 | } | ||
554 | } | ||
555 | } else { | ||
556 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
557 | &sgl, | ||
558 | &space_to_skip, | ||
559 | &iova, | ||
560 | gpu_va, gpu_end, | ||
561 | kind_v, &ctag, | ||
562 | cacheable, unmapped_pte, rw_flag, | ||
563 | sparse, 0, priv, | ||
564 | aperture); | ||
565 | } | ||
566 | } else { | ||
567 | gmmu_dbg_v(g, | ||
568 | "pgsz=%-6d, gpu_va: %#-12llx +%#-6llx phys: %#-12llx " | ||
569 | "buffer offset: %-4lld, nents: %d", | ||
570 | page_size, | ||
571 | gpu_va, gpu_end - gpu_va, | ||
572 | sgt ? g->ops.mm.get_iova_addr(g, sgt->sgl, 0) : 0ULL, | ||
573 | buffer_offset, | ||
574 | sgt ? sgt->nents : 0); | ||
575 | |||
576 | if (sgt) { | ||
577 | iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); | ||
578 | if (!vm->mm->bypass_smmu && iova) { | ||
579 | iova += space_to_skip; | ||
580 | } else { | ||
581 | sgl = sgt->sgl; | ||
582 | |||
583 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
584 | (u64)sg_phys(sgl), | ||
585 | sgl->length); | ||
586 | |||
587 | while (space_to_skip && sgl && | ||
588 | space_to_skip + page_size > sgl->length) { | ||
589 | space_to_skip -= sgl->length; | ||
590 | sgl = sg_next(sgl); | ||
591 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
592 | (u64)sg_phys(sgl), | ||
593 | sgl->length); | ||
594 | } | ||
595 | |||
596 | iova = sg_phys(sgl) + space_to_skip; | ||
597 | } | ||
598 | } | ||
599 | |||
600 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
601 | &sgl, | ||
602 | &space_to_skip, | ||
603 | &iova, | ||
604 | gpu_va, gpu_end, | ||
605 | kind_v, &ctag, | ||
606 | cacheable, unmapped_pte, rw_flag, | ||
607 | sparse, 0, priv, | ||
608 | aperture); | ||
609 | } | ||
610 | |||
611 | unmap_gmmu_pages(g, &vm->pdb); | ||
612 | |||
613 | mb(); | ||
614 | |||
615 | gk20a_dbg_fn("done"); | ||
616 | |||
617 | return err; | ||
618 | } | ||
619 | |||
620 | /** | ||
621 | * gk20a_locked_gmmu_map - Map a buffer into the GMMU | ||
622 | * | ||
623 | * This is for non-vGPU chips. It's part of the HAL at the moment but really | ||
624 | * should not be. Chip specific stuff is handled at the PTE/PDE programming | ||
625 | * layer. The rest of the logic is essentially generic for all chips. | ||
626 | * | ||
627 | * To call this function you must have locked the VM lock: vm->update_gmmu_lock. | ||
628 | * However, note: this function is not called directly. It's used through the | ||
629 | * mm.gmmu_lock() HAL. So before calling the mm.gmmu_lock() HAL make sure you | ||
630 | * have the update_gmmu_lock aquired. | ||
631 | */ | ||
632 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | ||
633 | u64 map_offset, | ||
634 | struct sg_table *sgt, | ||
635 | u64 buffer_offset, | ||
636 | u64 size, | ||
637 | int pgsz_idx, | ||
638 | u8 kind_v, | ||
639 | u32 ctag_offset, | ||
640 | u32 flags, | ||
641 | int rw_flag, | ||
642 | bool clear_ctags, | ||
643 | bool sparse, | ||
644 | bool priv, | ||
645 | struct vm_gk20a_mapping_batch *batch, | ||
646 | enum nvgpu_aperture aperture) | ||
647 | { | ||
648 | int err = 0; | ||
649 | bool allocated = false; | ||
650 | struct gk20a *g = gk20a_from_vm(vm); | ||
651 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
652 | u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); | ||
653 | |||
654 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | ||
655 | if (!map_offset) { | ||
656 | map_offset = __nvgpu_vm_alloc_va(vm, size, | ||
657 | pgsz_idx); | ||
658 | if (!map_offset) { | ||
659 | nvgpu_err(g, "failed to allocate va space"); | ||
660 | err = -ENOMEM; | ||
661 | goto fail_alloc; | ||
662 | } | ||
663 | allocated = true; | ||
664 | } | ||
665 | |||
666 | gmmu_dbg(g, | ||
667 | "gv: 0x%04x_%08x + 0x%-7llx " | ||
668 | "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " | ||
669 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | ||
670 | "kind=0x%x flags=0x%x apt=%s", | ||
671 | u64_hi32(map_offset), u64_lo32(map_offset), size, | ||
672 | sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0, | ||
673 | sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0, | ||
674 | sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0, | ||
675 | sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0, | ||
676 | vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), | ||
677 | ctag_lines, ctag_offset, | ||
678 | kind_v, flags, nvgpu_aperture_str(aperture)); | ||
679 | |||
680 | err = update_gmmu_ptes_locked(vm, pgsz_idx, | ||
681 | sgt, | ||
682 | buffer_offset, | ||
683 | map_offset, map_offset + size, | ||
684 | kind_v, | ||
685 | ctag_offset, | ||
686 | flags & | ||
687 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
688 | flags & | ||
689 | NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE, | ||
690 | rw_flag, | ||
691 | sparse, | ||
692 | priv, | ||
693 | aperture); | ||
694 | if (err) { | ||
695 | nvgpu_err(g, "failed to update ptes on map"); | ||
696 | goto fail_validate; | ||
697 | } | ||
698 | |||
699 | if (!batch) | ||
700 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | ||
701 | else | ||
702 | batch->need_tlb_invalidate = true; | ||
703 | |||
704 | return map_offset; | ||
705 | fail_validate: | ||
706 | if (allocated) | ||
707 | __nvgpu_vm_free_va(vm, map_offset, pgsz_idx); | ||
708 | fail_alloc: | ||
709 | nvgpu_err(g, "%s: failed with err=%d", __func__, err); | ||
710 | return 0; | ||
711 | } | ||
712 | |||
713 | void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | ||
714 | u64 vaddr, | ||
715 | u64 size, | ||
716 | int pgsz_idx, | ||
717 | bool va_allocated, | ||
718 | int rw_flag, | ||
719 | bool sparse, | ||
720 | struct vm_gk20a_mapping_batch *batch) | ||
721 | { | ||
722 | int err = 0; | ||
723 | struct gk20a *g = gk20a_from_vm(vm); | ||
724 | |||
725 | if (va_allocated) { | ||
726 | err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); | ||
727 | if (err) { | ||
728 | nvgpu_err(g, "failed to free va"); | ||
729 | return; | ||
730 | } | ||
731 | } | ||
732 | |||
733 | /* unmap here needs to know the page size we assigned at mapping */ | ||
734 | err = update_gmmu_ptes_locked(vm, | ||
735 | pgsz_idx, | ||
736 | NULL, /* n/a for unmap */ | ||
737 | 0, | ||
738 | vaddr, | ||
739 | vaddr + size, | ||
740 | 0, 0, false /* n/a for unmap */, | ||
741 | false, rw_flag, | ||
742 | sparse, 0, | ||
743 | APERTURE_INVALID); /* don't care for unmap */ | ||
744 | if (err) | ||
745 | nvgpu_err(g, "failed to update gmmu ptes on unmap"); | ||
746 | |||
747 | /* flush l2 so any dirty lines are written out *now*. | ||
748 | * also as we could potentially be switching this buffer | ||
749 | * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at | ||
750 | * some point in the future we need to invalidate l2. e.g. switching | ||
751 | * from a render buffer unmap (here) to later using the same memory | ||
752 | * for gmmu ptes. note the positioning of this relative to any smmu | ||
753 | * unmapping (below). */ | ||
754 | |||
755 | if (!batch) { | ||
756 | gk20a_mm_l2_flush(g, true); | ||
757 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | ||
758 | } else { | ||
759 | if (!batch->gpu_l2_flushed) { | ||
760 | gk20a_mm_l2_flush(g, true); | ||
761 | batch->gpu_l2_flushed = true; | ||
762 | } | ||
763 | batch->need_tlb_invalidate = true; | ||
764 | } | ||
765 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index e24d40bf..5ba386c9 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <nvgpu/dma.h> | 18 | #include <nvgpu/dma.h> |
19 | #include <nvgpu/vm.h> | 19 | #include <nvgpu/vm.h> |
20 | #include <nvgpu/vm_area.h> | 20 | #include <nvgpu/vm_area.h> |
21 | #include <nvgpu/gmmu.h> | ||
21 | #include <nvgpu/lock.h> | 22 | #include <nvgpu/lock.h> |
22 | #include <nvgpu/list.h> | 23 | #include <nvgpu/list.h> |
23 | #include <nvgpu/rbtree.h> | 24 | #include <nvgpu/rbtree.h> |
@@ -34,6 +35,22 @@ int vm_aspace_id(struct vm_gk20a *vm) | |||
34 | return vm->as_share ? vm->as_share->id : -1; | 35 | return vm->as_share ? vm->as_share->id : -1; |
35 | } | 36 | } |
36 | 37 | ||
38 | static void nvgpu_vm_free_entries(struct vm_gk20a *vm, | ||
39 | struct gk20a_mm_entry *parent, | ||
40 | int level) | ||
41 | { | ||
42 | int i; | ||
43 | |||
44 | if (parent->entries) | ||
45 | for (i = 0; i < parent->num_entries; i++) | ||
46 | nvgpu_vm_free_entries(vm, &parent->entries[i], level+1); | ||
47 | |||
48 | if (parent->mem.size) | ||
49 | nvgpu_free_gmmu_pages(vm, parent); | ||
50 | nvgpu_vfree(vm->mm->g, parent->entries); | ||
51 | parent->entries = NULL; | ||
52 | } | ||
53 | |||
37 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, | 54 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, |
38 | enum gmmu_pgsz_gk20a pgsz_idx) | 55 | enum gmmu_pgsz_gk20a pgsz_idx) |
39 | 56 | ||
@@ -421,7 +438,7 @@ clean_up_allocators: | |||
421 | clean_up_page_tables: | 438 | clean_up_page_tables: |
422 | /* Cleans up nvgpu_vm_init_page_tables() */ | 439 | /* Cleans up nvgpu_vm_init_page_tables() */ |
423 | nvgpu_vfree(g, vm->pdb.entries); | 440 | nvgpu_vfree(g, vm->pdb.entries); |
424 | free_gmmu_pages(vm, &vm->pdb); | 441 | nvgpu_free_gmmu_pages(vm, &vm->pdb); |
425 | clean_up_vgpu_vm: | 442 | clean_up_vgpu_vm: |
426 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 443 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
427 | if (g->is_virtual) | 444 | if (g->is_virtual) |
@@ -537,7 +554,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm) | |||
537 | if (nvgpu_alloc_initialized(&vm->user_lp)) | 554 | if (nvgpu_alloc_initialized(&vm->user_lp)) |
538 | nvgpu_alloc_destroy(&vm->user_lp); | 555 | nvgpu_alloc_destroy(&vm->user_lp); |
539 | 556 | ||
540 | gk20a_vm_free_entries(vm, &vm->pdb, 0); | 557 | nvgpu_vm_free_entries(vm, &vm->pdb, 0); |
541 | 558 | ||
542 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 559 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
543 | if (g->is_virtual) | 560 | if (g->is_virtual) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a1873a30..e7bcf6f0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -124,15 +124,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl) | |||
124 | * | 124 | * |
125 | */ | 125 | */ |
126 | 126 | ||
127 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
128 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
129 | struct sg_table *sgt, u64 buffer_offset, | ||
130 | u64 first_vaddr, u64 last_vaddr, | ||
131 | u8 kind_v, u32 ctag_offset, bool cacheable, | ||
132 | bool umapped_pte, int rw_flag, | ||
133 | bool sparse, | ||
134 | bool priv, | ||
135 | enum nvgpu_aperture aperture); | ||
136 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); | 127 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); |
137 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); | 128 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); |
138 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); | 129 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); |
@@ -781,104 +772,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g) | |||
781 | #endif | 772 | #endif |
782 | } | 773 | } |
783 | 774 | ||
784 | static void free_gmmu_phys_pages(struct vm_gk20a *vm, | ||
785 | struct gk20a_mm_entry *entry) | ||
786 | { | ||
787 | gk20a_dbg_fn(""); | ||
788 | |||
789 | /* note: mem_desc slightly abused (wrt. free_gmmu_pages) */ | ||
790 | |||
791 | free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size)); | ||
792 | entry->mem.cpu_va = NULL; | ||
793 | |||
794 | sg_free_table(entry->mem.priv.sgt); | ||
795 | nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); | ||
796 | entry->mem.priv.sgt = NULL; | ||
797 | entry->mem.size = 0; | ||
798 | entry->mem.aperture = APERTURE_INVALID; | ||
799 | } | ||
800 | |||
801 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) | ||
802 | { | ||
803 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
804 | sg_phys(entry->mem.priv.sgt->sgl), | ||
805 | entry->mem.priv.sgt->sgl->length); | ||
806 | return 0; | ||
807 | } | ||
808 | |||
809 | static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry) | ||
810 | { | ||
811 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
812 | sg_phys(entry->mem.priv.sgt->sgl), | ||
813 | entry->mem.priv.sgt->sgl->length); | ||
814 | } | ||
815 | |||
816 | void free_gmmu_pages(struct vm_gk20a *vm, | ||
817 | struct gk20a_mm_entry *entry) | ||
818 | { | ||
819 | struct gk20a *g = gk20a_from_vm(vm); | ||
820 | |||
821 | gk20a_dbg_fn(""); | ||
822 | |||
823 | if (!entry->mem.size) | ||
824 | return; | ||
825 | |||
826 | if (entry->woffset) /* fake shadow mem */ | ||
827 | return; | ||
828 | |||
829 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
830 | free_gmmu_phys_pages(vm, entry); | ||
831 | return; | ||
832 | } | ||
833 | |||
834 | nvgpu_dma_free(g, &entry->mem); | ||
835 | } | ||
836 | |||
837 | int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
838 | { | ||
839 | gk20a_dbg_fn(""); | ||
840 | |||
841 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
842 | return map_gmmu_phys_pages(entry); | ||
843 | |||
844 | if (IS_ENABLED(CONFIG_ARM64)) { | ||
845 | if (entry->mem.aperture == APERTURE_VIDMEM) | ||
846 | return 0; | ||
847 | |||
848 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
849 | sg_phys(entry->mem.priv.sgt->sgl), | ||
850 | entry->mem.size); | ||
851 | } else { | ||
852 | int err = nvgpu_mem_begin(g, &entry->mem); | ||
853 | |||
854 | if (err) | ||
855 | return err; | ||
856 | } | ||
857 | |||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
862 | { | ||
863 | gk20a_dbg_fn(""); | ||
864 | |||
865 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
866 | unmap_gmmu_phys_pages(entry); | ||
867 | return; | ||
868 | } | ||
869 | |||
870 | if (IS_ENABLED(CONFIG_ARM64)) { | ||
871 | if (entry->mem.aperture == APERTURE_VIDMEM) | ||
872 | return; | ||
873 | |||
874 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
875 | sg_phys(entry->mem.priv.sgt->sgl), | ||
876 | entry->mem.size); | ||
877 | } else { | ||
878 | nvgpu_mem_end(g, &entry->mem); | ||
879 | } | ||
880 | } | ||
881 | |||
882 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) | 775 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) |
883 | { | 776 | { |
884 | return vm->mmu_levels[0].lo_bit[0]; | 777 | return vm->mmu_levels[0].lo_bit[0]; |
@@ -909,21 +802,6 @@ static u32 pte_from_index(u32 i) | |||
909 | return i * gmmu_pte__size_v() / sizeof(u32); | 802 | return i * gmmu_pte__size_v() / sizeof(u32); |
910 | } | 803 | } |
911 | 804 | ||
912 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | ||
913 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx) | ||
914 | { | ||
915 | u32 ret; | ||
916 | /* mask off pde part */ | ||
917 | addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL); | ||
918 | |||
919 | /* shift over to get pte index. note assumption that pte index | ||
920 | * doesn't leak over into the high 32b */ | ||
921 | ret = (u32)(addr >> ilog2(vm->gmmu_page_sizes[pgsz_idx])); | ||
922 | |||
923 | gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret); | ||
924 | return ret; | ||
925 | } | ||
926 | |||
927 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, | 805 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, |
928 | struct nvgpu_mapped_buf ***mapped_buffers, | 806 | struct nvgpu_mapped_buf ***mapped_buffers, |
929 | int *num_buffers) | 807 | int *num_buffers) |
@@ -1096,141 +974,6 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
1096 | return 0; | 974 | return 0; |
1097 | } | 975 | } |
1098 | 976 | ||
1099 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | ||
1100 | u64 map_offset, | ||
1101 | struct sg_table *sgt, | ||
1102 | u64 buffer_offset, | ||
1103 | u64 size, | ||
1104 | int pgsz_idx, | ||
1105 | u8 kind_v, | ||
1106 | u32 ctag_offset, | ||
1107 | u32 flags, | ||
1108 | int rw_flag, | ||
1109 | bool clear_ctags, | ||
1110 | bool sparse, | ||
1111 | bool priv, | ||
1112 | struct vm_gk20a_mapping_batch *batch, | ||
1113 | enum nvgpu_aperture aperture) | ||
1114 | { | ||
1115 | int err = 0; | ||
1116 | bool allocated = false; | ||
1117 | struct gk20a *g = gk20a_from_vm(vm); | ||
1118 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1119 | u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); | ||
1120 | |||
1121 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | ||
1122 | if (!map_offset) { | ||
1123 | map_offset = __nvgpu_vm_alloc_va(vm, size, | ||
1124 | pgsz_idx); | ||
1125 | if (!map_offset) { | ||
1126 | nvgpu_err(g, "failed to allocate va space"); | ||
1127 | err = -ENOMEM; | ||
1128 | goto fail_alloc; | ||
1129 | } | ||
1130 | allocated = true; | ||
1131 | } | ||
1132 | |||
1133 | gk20a_dbg(gpu_dbg_map, | ||
1134 | "gv: 0x%04x_%08x + 0x%-7llx " | ||
1135 | "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " | ||
1136 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | ||
1137 | "kind=0x%x flags=0x%x apt=%s", | ||
1138 | u64_hi32(map_offset), u64_lo32(map_offset), size, | ||
1139 | sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0, | ||
1140 | sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0, | ||
1141 | sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0, | ||
1142 | sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0, | ||
1143 | vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), | ||
1144 | ctag_lines, ctag_offset, | ||
1145 | kind_v, flags, nvgpu_aperture_str(aperture)); | ||
1146 | |||
1147 | err = update_gmmu_ptes_locked(vm, pgsz_idx, | ||
1148 | sgt, | ||
1149 | buffer_offset, | ||
1150 | map_offset, map_offset + size, | ||
1151 | kind_v, | ||
1152 | ctag_offset, | ||
1153 | flags & | ||
1154 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1155 | flags & | ||
1156 | NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE, | ||
1157 | rw_flag, | ||
1158 | sparse, | ||
1159 | priv, | ||
1160 | aperture); | ||
1161 | if (err) { | ||
1162 | nvgpu_err(g, "failed to update ptes on map"); | ||
1163 | goto fail_validate; | ||
1164 | } | ||
1165 | |||
1166 | if (!batch) | ||
1167 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | ||
1168 | else | ||
1169 | batch->need_tlb_invalidate = true; | ||
1170 | |||
1171 | return map_offset; | ||
1172 | fail_validate: | ||
1173 | if (allocated) | ||
1174 | __nvgpu_vm_free_va(vm, map_offset, pgsz_idx); | ||
1175 | fail_alloc: | ||
1176 | nvgpu_err(g, "%s: failed with err=%d", __func__, err); | ||
1177 | return 0; | ||
1178 | } | ||
1179 | |||
1180 | void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | ||
1181 | u64 vaddr, | ||
1182 | u64 size, | ||
1183 | int pgsz_idx, | ||
1184 | bool va_allocated, | ||
1185 | int rw_flag, | ||
1186 | bool sparse, | ||
1187 | struct vm_gk20a_mapping_batch *batch) | ||
1188 | { | ||
1189 | int err = 0; | ||
1190 | struct gk20a *g = gk20a_from_vm(vm); | ||
1191 | |||
1192 | if (va_allocated) { | ||
1193 | err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); | ||
1194 | if (err) { | ||
1195 | nvgpu_err(g, "failed to free va"); | ||
1196 | return; | ||
1197 | } | ||
1198 | } | ||
1199 | |||
1200 | /* unmap here needs to know the page size we assigned at mapping */ | ||
1201 | err = update_gmmu_ptes_locked(vm, | ||
1202 | pgsz_idx, | ||
1203 | NULL, /* n/a for unmap */ | ||
1204 | 0, | ||
1205 | vaddr, | ||
1206 | vaddr + size, | ||
1207 | 0, 0, false /* n/a for unmap */, | ||
1208 | false, rw_flag, | ||
1209 | sparse, 0, | ||
1210 | APERTURE_INVALID); /* don't care for unmap */ | ||
1211 | if (err) | ||
1212 | nvgpu_err(g, "failed to update gmmu ptes on unmap"); | ||
1213 | |||
1214 | /* flush l2 so any dirty lines are written out *now*. | ||
1215 | * also as we could potentially be switching this buffer | ||
1216 | * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at | ||
1217 | * some point in the future we need to invalidate l2. e.g. switching | ||
1218 | * from a render buffer unmap (here) to later using the same memory | ||
1219 | * for gmmu ptes. note the positioning of this relative to any smmu | ||
1220 | * unmapping (below). */ | ||
1221 | |||
1222 | if (!batch) { | ||
1223 | gk20a_mm_l2_flush(g, true); | ||
1224 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | ||
1225 | } else { | ||
1226 | if (!batch->gpu_l2_flushed) { | ||
1227 | gk20a_mm_l2_flush(g, true); | ||
1228 | batch->gpu_l2_flushed = true; | ||
1229 | } | ||
1230 | batch->need_tlb_invalidate = true; | ||
1231 | } | ||
1232 | } | ||
1233 | |||
1234 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, | 977 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, |
1235 | struct dma_buf *dmabuf) | 978 | struct dma_buf *dmabuf) |
1236 | { | 979 | { |
@@ -2036,254 +1779,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2036 | return 0; | 1779 | return 0; |
2037 | } | 1780 | } |
2038 | 1781 | ||
2039 | static int update_gmmu_level_locked(struct vm_gk20a *vm, | ||
2040 | struct gk20a_mm_entry *pte, | ||
2041 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
2042 | struct scatterlist **sgl, | ||
2043 | u64 *offset, | ||
2044 | u64 *iova, | ||
2045 | u64 gpu_va, u64 gpu_end, | ||
2046 | u8 kind_v, u64 *ctag, | ||
2047 | bool cacheable, bool unmapped_pte, | ||
2048 | int rw_flag, | ||
2049 | bool sparse, | ||
2050 | int lvl, | ||
2051 | bool priv, | ||
2052 | enum nvgpu_aperture aperture) | ||
2053 | { | ||
2054 | struct gk20a *g = gk20a_from_vm(vm); | ||
2055 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | ||
2056 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; | ||
2057 | int err = 0; | ||
2058 | u32 pde_i; | ||
2059 | u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; | ||
2060 | struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL; | ||
2061 | |||
2062 | gk20a_dbg_fn(""); | ||
2063 | |||
2064 | pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL)) | ||
2065 | >> (u64)l->lo_bit[pgsz_idx]; | ||
2066 | |||
2067 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", | ||
2068 | pgsz_idx, lvl, gpu_va, gpu_end-1, *iova); | ||
2069 | |||
2070 | while (gpu_va < gpu_end) { | ||
2071 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); | ||
2072 | |||
2073 | /* Allocate next level */ | ||
2074 | if (next_l->update_entry) { | ||
2075 | if (!pte->entries) { | ||
2076 | int num_entries = | ||
2077 | 1 << | ||
2078 | (l->hi_bit[pgsz_idx] | ||
2079 | - l->lo_bit[pgsz_idx] + 1); | ||
2080 | pte->entries = | ||
2081 | nvgpu_vzalloc(g, | ||
2082 | sizeof(struct gk20a_mm_entry) * | ||
2083 | num_entries); | ||
2084 | if (!pte->entries) | ||
2085 | return -ENOMEM; | ||
2086 | pte->pgsz = pgsz_idx; | ||
2087 | pte->num_entries = num_entries; | ||
2088 | } | ||
2089 | prev_pte = next_pte; | ||
2090 | next_pte = pte->entries + pde_i; | ||
2091 | |||
2092 | if (!next_pte->mem.size) { | ||
2093 | err = nvgpu_zalloc_gmmu_page_table(vm, | ||
2094 | pgsz_idx, next_l, next_pte, prev_pte); | ||
2095 | if (err) | ||
2096 | return err; | ||
2097 | } | ||
2098 | } | ||
2099 | |||
2100 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
2101 | sgl, offset, iova, | ||
2102 | kind_v, ctag, cacheable, unmapped_pte, | ||
2103 | rw_flag, sparse, priv, aperture); | ||
2104 | if (err) | ||
2105 | return err; | ||
2106 | |||
2107 | if (next_l->update_entry) { | ||
2108 | /* get cpu access to the ptes */ | ||
2109 | err = map_gmmu_pages(g, next_pte); | ||
2110 | if (err) { | ||
2111 | nvgpu_err(g, | ||
2112 | "couldn't map ptes for update as=%d", | ||
2113 | vm_aspace_id(vm)); | ||
2114 | return err; | ||
2115 | } | ||
2116 | err = update_gmmu_level_locked(vm, next_pte, | ||
2117 | pgsz_idx, | ||
2118 | sgl, | ||
2119 | offset, | ||
2120 | iova, | ||
2121 | gpu_va, | ||
2122 | next, | ||
2123 | kind_v, ctag, cacheable, unmapped_pte, | ||
2124 | rw_flag, sparse, lvl+1, priv, aperture); | ||
2125 | unmap_gmmu_pages(g, next_pte); | ||
2126 | |||
2127 | if (err) | ||
2128 | return err; | ||
2129 | } | ||
2130 | |||
2131 | pde_i++; | ||
2132 | gpu_va = next; | ||
2133 | } | ||
2134 | |||
2135 | gk20a_dbg_fn("done"); | ||
2136 | |||
2137 | return 0; | ||
2138 | } | ||
2139 | |||
2140 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
2141 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
2142 | struct sg_table *sgt, | ||
2143 | u64 buffer_offset, | ||
2144 | u64 gpu_va, u64 gpu_end, | ||
2145 | u8 kind_v, u32 ctag_offset, | ||
2146 | bool cacheable, bool unmapped_pte, | ||
2147 | int rw_flag, | ||
2148 | bool sparse, | ||
2149 | bool priv, | ||
2150 | enum nvgpu_aperture aperture) | ||
2151 | { | ||
2152 | struct gk20a *g = gk20a_from_vm(vm); | ||
2153 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
2154 | u64 ctag = (u64)ctag_offset * (u64)ctag_granularity; | ||
2155 | u64 iova = 0; | ||
2156 | u64 space_to_skip = buffer_offset; | ||
2157 | u64 map_size = gpu_end - gpu_va; | ||
2158 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
2159 | int err; | ||
2160 | struct scatterlist *sgl = NULL; | ||
2161 | struct nvgpu_page_alloc *alloc = NULL; | ||
2162 | struct page_alloc_chunk *chunk = NULL; | ||
2163 | u64 length; | ||
2164 | |||
2165 | /* note: here we need to map kernel to small, since the | ||
2166 | * low-level mmu code assumes 0 is small and 1 is big pages */ | ||
2167 | if (pgsz_idx == gmmu_page_size_kernel) | ||
2168 | pgsz_idx = gmmu_page_size_small; | ||
2169 | |||
2170 | if (space_to_skip & (page_size - 1)) | ||
2171 | return -EINVAL; | ||
2172 | |||
2173 | err = map_gmmu_pages(g, &vm->pdb); | ||
2174 | if (err) { | ||
2175 | nvgpu_err(g, | ||
2176 | "couldn't map ptes for update as=%d", | ||
2177 | vm_aspace_id(vm)); | ||
2178 | return err; | ||
2179 | } | ||
2180 | |||
2181 | if (aperture == APERTURE_VIDMEM) { | ||
2182 | gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx", | ||
2183 | pgsz_idx, gpu_va, gpu_end-1, iova); | ||
2184 | |||
2185 | if (sgt) { | ||
2186 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
2187 | |||
2188 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | ||
2189 | page_alloc_chunk, list_entry) { | ||
2190 | if (space_to_skip && | ||
2191 | space_to_skip > chunk->length) { | ||
2192 | space_to_skip -= chunk->length; | ||
2193 | } else { | ||
2194 | iova = chunk->base + space_to_skip; | ||
2195 | length = chunk->length - space_to_skip; | ||
2196 | length = min(length, map_size); | ||
2197 | space_to_skip = 0; | ||
2198 | |||
2199 | err = update_gmmu_level_locked(vm, | ||
2200 | &vm->pdb, pgsz_idx, | ||
2201 | &sgl, | ||
2202 | &space_to_skip, | ||
2203 | &iova, | ||
2204 | gpu_va, gpu_va + length, | ||
2205 | kind_v, &ctag, | ||
2206 | cacheable, unmapped_pte, | ||
2207 | rw_flag, sparse, 0, priv, | ||
2208 | aperture); | ||
2209 | if (err) | ||
2210 | break; | ||
2211 | |||
2212 | /* need to set explicit zero here */ | ||
2213 | space_to_skip = 0; | ||
2214 | gpu_va += length; | ||
2215 | map_size -= length; | ||
2216 | |||
2217 | if (!map_size) | ||
2218 | break; | ||
2219 | } | ||
2220 | } | ||
2221 | } else { | ||
2222 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
2223 | &sgl, | ||
2224 | &space_to_skip, | ||
2225 | &iova, | ||
2226 | gpu_va, gpu_end, | ||
2227 | kind_v, &ctag, | ||
2228 | cacheable, unmapped_pte, rw_flag, | ||
2229 | sparse, 0, priv, | ||
2230 | aperture); | ||
2231 | } | ||
2232 | } else { | ||
2233 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d", | ||
2234 | pgsz_idx, | ||
2235 | sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0) | ||
2236 | : 0ULL, | ||
2237 | buffer_offset, | ||
2238 | sgt ? sgt->nents : 0); | ||
2239 | |||
2240 | gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", | ||
2241 | pgsz_idx, gpu_va, gpu_end-1, iova); | ||
2242 | |||
2243 | if (sgt) { | ||
2244 | iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); | ||
2245 | if (!vm->mm->bypass_smmu && iova) { | ||
2246 | iova += space_to_skip; | ||
2247 | } else { | ||
2248 | sgl = sgt->sgl; | ||
2249 | |||
2250 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
2251 | (u64)sg_phys(sgl), | ||
2252 | sgl->length); | ||
2253 | |||
2254 | while (space_to_skip && sgl && | ||
2255 | space_to_skip + page_size > sgl->length) { | ||
2256 | space_to_skip -= sgl->length; | ||
2257 | sgl = sg_next(sgl); | ||
2258 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
2259 | (u64)sg_phys(sgl), | ||
2260 | sgl->length); | ||
2261 | } | ||
2262 | |||
2263 | iova = sg_phys(sgl) + space_to_skip; | ||
2264 | } | ||
2265 | } | ||
2266 | |||
2267 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
2268 | &sgl, | ||
2269 | &space_to_skip, | ||
2270 | &iova, | ||
2271 | gpu_va, gpu_end, | ||
2272 | kind_v, &ctag, | ||
2273 | cacheable, unmapped_pte, rw_flag, | ||
2274 | sparse, 0, priv, | ||
2275 | aperture); | ||
2276 | } | ||
2277 | |||
2278 | unmap_gmmu_pages(g, &vm->pdb); | ||
2279 | |||
2280 | smp_mb(); | ||
2281 | |||
2282 | gk20a_dbg_fn("done"); | ||
2283 | |||
2284 | return err; | ||
2285 | } | ||
2286 | |||
2287 | /* NOTE! mapped_buffers lock must be held */ | 1782 | /* NOTE! mapped_buffers lock must be held */ |
2288 | void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, | 1783 | void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, |
2289 | struct vm_gk20a_mapping_batch *batch) | 1784 | struct vm_gk20a_mapping_batch *batch) |
@@ -2341,22 +1836,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, | |||
2341 | return; | 1836 | return; |
2342 | } | 1837 | } |
2343 | 1838 | ||
2344 | void gk20a_vm_free_entries(struct vm_gk20a *vm, | ||
2345 | struct gk20a_mm_entry *parent, | ||
2346 | int level) | ||
2347 | { | ||
2348 | int i; | ||
2349 | |||
2350 | if (parent->entries) | ||
2351 | for (i = 0; i < parent->num_entries; i++) | ||
2352 | gk20a_vm_free_entries(vm, &parent->entries[i], level+1); | ||
2353 | |||
2354 | if (parent->mem.size) | ||
2355 | free_gmmu_pages(vm, parent); | ||
2356 | nvgpu_vfree(vm->mm->g, parent->entries); | ||
2357 | parent->entries = NULL; | ||
2358 | } | ||
2359 | |||
2360 | const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { | 1839 | const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { |
2361 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, | 1840 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, |
2362 | .lo_bit = {26, 26}, | 1841 | .lo_bit = {26, 26}, |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7e2ba051..2581bc0d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -433,17 +433,10 @@ int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); | |||
433 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, | 433 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, |
434 | u64 offset, struct gk20a_buffer_state **state); | 434 | u64 offset, struct gk20a_buffer_state **state); |
435 | 435 | ||
436 | int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry); | ||
437 | void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry); | ||
438 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, | 436 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, |
439 | u64 addr_lo, u64 addr_hi, | 437 | u64 addr_lo, u64 addr_hi, |
440 | u32 *pde_lo, u32 *pde_hi); | 438 | u32 *pde_lo, u32 *pde_hi); |
441 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); | 439 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); |
442 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | ||
443 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); | ||
444 | void free_gmmu_pages(struct vm_gk20a *vm, | ||
445 | struct gk20a_mm_entry *entry); | ||
446 | |||
447 | u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); | 440 | u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); |
448 | 441 | ||
449 | struct gpu_ops; | 442 | struct gpu_ops; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 45c5def4..ed152cd8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h | |||
@@ -105,4 +105,7 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, | |||
105 | struct nvgpu_mem *mem, | 105 | struct nvgpu_mem *mem, |
106 | u64 gpu_va); | 106 | u64 gpu_va); |
107 | 107 | ||
108 | void nvgpu_free_gmmu_pages(struct vm_gk20a *vm, | ||
109 | struct gk20a_mm_entry *entry); | ||
110 | |||
108 | #endif | 111 | #endif |