diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_vm.c')
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_vm.c | 91 |
1 files changed, 84 insertions, 7 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 2aae6ce49d32..f8d5b65932e5 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c | |||
@@ -658,6 +658,84 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, | |||
658 | } | 658 | } |
659 | 659 | ||
660 | /** | 660 | /** |
661 | * radeon_vm_frag_ptes - add fragment information to PTEs | ||
662 | * | ||
663 | * @rdev: radeon_device pointer | ||
664 | * @ib: IB for the update | ||
665 | * @pe_start: first PTE to handle | ||
666 | * @pe_end: last PTE to handle | ||
667 | * @addr: addr those PTEs should point to | ||
668 | * @flags: hw mapping flags | ||
669 | * | ||
670 | * Global and local mutex must be locked! | ||
671 | */ | ||
672 | static void radeon_vm_frag_ptes(struct radeon_device *rdev, | ||
673 | struct radeon_ib *ib, | ||
674 | uint64_t pe_start, uint64_t pe_end, | ||
675 | uint64_t addr, uint32_t flags) | ||
676 | { | ||
677 | /** | ||
678 | * The MC L1 TLB supports variable sized pages, based on a fragment | ||
679 | * field in the PTE. When this field is set to a non-zero value, page | ||
680 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE | ||
681 | * flags are considered valid for all PTEs within the fragment range | ||
682 | * and corresponding mappings are assumed to be physically contiguous. | ||
683 | * | ||
684 | * The L1 TLB can store a single PTE for the whole fragment, | ||
685 | * significantly increasing the space available for translation | ||
686 | * caching. This leads to large improvements in throughput when the | ||
687 | * TLB is under pressure. | ||
688 | * | ||
689 | * The L2 TLB distributes small and large fragments into two | ||
690 | * asymmetric partitions. The large fragment cache is significantly | ||
691 | * larger. Thus, we try to use large fragments wherever possible. | ||
692 | * Userspace can support this by aligning virtual base address and | ||
693 | * allocation size to the fragment size. | ||
694 | */ | ||
695 | |||
696 | /* NI is optimized for 256KB fragments, SI and newer for 64KB */ | ||
697 | uint64_t frag_flags = rdev->family == CHIP_CAYMAN ? | ||
698 | R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; | ||
699 | uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80; | ||
700 | |||
701 | uint64_t frag_start = ALIGN(pe_start, frag_align); | ||
702 | uint64_t frag_end = pe_end & ~(frag_align - 1); | ||
703 | |||
704 | unsigned count; | ||
705 | |||
706 | /* system pages are non continuously */ | ||
707 | if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || | ||
708 | (frag_start >= frag_end)) { | ||
709 | |||
710 | count = (pe_end - pe_start) / 8; | ||
711 | radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, | ||
712 | RADEON_GPU_PAGE_SIZE, flags); | ||
713 | return; | ||
714 | } | ||
715 | |||
716 | /* handle the 4K area at the beginning */ | ||
717 | if (pe_start != frag_start) { | ||
718 | count = (frag_start - pe_start) / 8; | ||
719 | radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, | ||
720 | RADEON_GPU_PAGE_SIZE, flags); | ||
721 | addr += RADEON_GPU_PAGE_SIZE * count; | ||
722 | } | ||
723 | |||
724 | /* handle the area in the middle */ | ||
725 | count = (frag_end - frag_start) / 8; | ||
726 | radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count, | ||
727 | RADEON_GPU_PAGE_SIZE, flags | frag_flags); | ||
728 | |||
729 | /* handle the 4K area at the end */ | ||
730 | if (frag_end != pe_end) { | ||
731 | addr += RADEON_GPU_PAGE_SIZE * count; | ||
732 | count = (pe_end - frag_end) / 8; | ||
733 | radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count, | ||
734 | RADEON_GPU_PAGE_SIZE, flags); | ||
735 | } | ||
736 | } | ||
737 | |||
738 | /** | ||
661 | * radeon_vm_update_ptes - make sure that page tables are valid | 739 | * radeon_vm_update_ptes - make sure that page tables are valid |
662 | * | 740 | * |
663 | * @rdev: radeon_device pointer | 741 | * @rdev: radeon_device pointer |
@@ -703,10 +781,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, | |||
703 | if ((last_pte + 8 * count) != pte) { | 781 | if ((last_pte + 8 * count) != pte) { |
704 | 782 | ||
705 | if (count) { | 783 | if (count) { |
706 | radeon_asic_vm_set_page(rdev, ib, last_pte, | 784 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
707 | last_dst, count, | 785 | last_pte + 8 * count, |
708 | RADEON_GPU_PAGE_SIZE, | 786 | last_dst, flags); |
709 | flags); | ||
710 | } | 787 | } |
711 | 788 | ||
712 | count = nptes; | 789 | count = nptes; |
@@ -721,9 +798,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, | |||
721 | } | 798 | } |
722 | 799 | ||
723 | if (count) { | 800 | if (count) { |
724 | radeon_asic_vm_set_page(rdev, ib, last_pte, | 801 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
725 | last_dst, count, | 802 | last_pte + 8 * count, |
726 | RADEON_GPU_PAGE_SIZE, flags); | 803 | last_dst, flags); |
727 | } | 804 | } |
728 | } | 805 | } |
729 | 806 | ||