aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/radeon_vm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_vm.c')
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c91
1 files changed, 84 insertions, 7 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 2aae6ce49d32..f8d5b65932e5 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -658,6 +658,84 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
658} 658}
659 659
660/** 660/**
661 * radeon_vm_frag_ptes - add fragment information to PTEs
662 *
663 * @rdev: radeon_device pointer
664 * @ib: IB for the update
665 * @pe_start: first PTE to handle
666 * @pe_end: last PTE to handle
667 * @addr: addr those PTEs should point to
668 * @flags: hw mapping flags
669 *
670 * Global and local mutex must be locked!
671 */
672static void radeon_vm_frag_ptes(struct radeon_device *rdev,
673 struct radeon_ib *ib,
674 uint64_t pe_start, uint64_t pe_end,
675 uint64_t addr, uint32_t flags)
676{
677 /**
678 * The MC L1 TLB supports variable sized pages, based on a fragment
679 * field in the PTE. When this field is set to a non-zero value, page
680 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
681 * flags are considered valid for all PTEs within the fragment range
682 * and corresponding mappings are assumed to be physically contiguous.
683 *
684 * The L1 TLB can store a single PTE for the whole fragment,
685 * significantly increasing the space available for translation
686 * caching. This leads to large improvements in throughput when the
687 * TLB is under pressure.
688 *
689 * The L2 TLB distributes small and large fragments into two
690 * asymmetric partitions. The large fragment cache is significantly
691 * larger. Thus, we try to use large fragments wherever possible.
692 * Userspace can support this by aligning virtual base address and
693 * allocation size to the fragment size.
694 */
695
696 /* NI is optimized for 256KB fragments, SI and newer for 64KB */
697 uint64_t frag_flags = rdev->family == CHIP_CAYMAN ?
698 R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
699 uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80;
700
701 uint64_t frag_start = ALIGN(pe_start, frag_align);
702 uint64_t frag_end = pe_end & ~(frag_align - 1);
703
704 unsigned count;
705
706 /* system pages are non continuously */
707 if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) ||
708 (frag_start >= frag_end)) {
709
710 count = (pe_end - pe_start) / 8;
711 radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
712 RADEON_GPU_PAGE_SIZE, flags);
713 return;
714 }
715
716 /* handle the 4K area at the beginning */
717 if (pe_start != frag_start) {
718 count = (frag_start - pe_start) / 8;
719 radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
720 RADEON_GPU_PAGE_SIZE, flags);
721 addr += RADEON_GPU_PAGE_SIZE * count;
722 }
723
724 /* handle the area in the middle */
725 count = (frag_end - frag_start) / 8;
726 radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count,
727 RADEON_GPU_PAGE_SIZE, flags | frag_flags);
728
729 /* handle the 4K area at the end */
730 if (frag_end != pe_end) {
731 addr += RADEON_GPU_PAGE_SIZE * count;
732 count = (pe_end - frag_end) / 8;
733 radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count,
734 RADEON_GPU_PAGE_SIZE, flags);
735 }
736}
737
738/**
661 * radeon_vm_update_ptes - make sure that page tables are valid 739 * radeon_vm_update_ptes - make sure that page tables are valid
662 * 740 *
663 * @rdev: radeon_device pointer 741 * @rdev: radeon_device pointer
@@ -703,10 +781,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
703 if ((last_pte + 8 * count) != pte) { 781 if ((last_pte + 8 * count) != pte) {
704 782
705 if (count) { 783 if (count) {
706 radeon_asic_vm_set_page(rdev, ib, last_pte, 784 radeon_vm_frag_ptes(rdev, ib, last_pte,
707 last_dst, count, 785 last_pte + 8 * count,
708 RADEON_GPU_PAGE_SIZE, 786 last_dst, flags);
709 flags);
710 } 787 }
711 788
712 count = nptes; 789 count = nptes;
@@ -721,9 +798,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
721 } 798 }
722 799
723 if (count) { 800 if (count) {
724 radeon_asic_vm_set_page(rdev, ib, last_pte, 801 radeon_vm_frag_ptes(rdev, ib, last_pte,
725 last_dst, count, 802 last_pte + 8 * count,
726 RADEON_GPU_PAGE_SIZE, flags); 803 last_dst, flags);
727 } 804 }
728} 805}
729 806