diff options
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r-- | drivers/gpu/drm/radeon/cik.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/ni.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_vm.c | 91 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/si.c | 5 |
5 files changed, 98 insertions, 9 deletions
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 199eb194716f..dbb5b2e17c7c 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c | |||
@@ -5328,6 +5328,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) | |||
5328 | WREG32(MC_VM_MX_L1_TLB_CNTL, | 5328 | WREG32(MC_VM_MX_L1_TLB_CNTL, |
5329 | (0xA << 7) | | 5329 | (0xA << 7) | |
5330 | ENABLE_L1_TLB | | 5330 | ENABLE_L1_TLB | |
5331 | ENABLE_L1_FRAGMENT_PROCESSING | | ||
5331 | SYSTEM_ACCESS_MODE_NOT_IN_SYS | | 5332 | SYSTEM_ACCESS_MODE_NOT_IN_SYS | |
5332 | ENABLE_ADVANCED_DRIVER_MODEL | | 5333 | ENABLE_ADVANCED_DRIVER_MODEL | |
5333 | SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); | 5334 | SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); |
@@ -5340,7 +5341,8 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) | |||
5340 | CONTEXT1_IDENTITY_ACCESS_MODE(1)); | 5341 | CONTEXT1_IDENTITY_ACCESS_MODE(1)); |
5341 | WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); | 5342 | WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); |
5342 | WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | | 5343 | WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | |
5343 | L2_CACHE_BIGK_FRAGMENT_SIZE(6)); | 5344 | BANK_SELECT(4) | |
5345 | L2_CACHE_BIGK_FRAGMENT_SIZE(4)); | ||
5344 | /* setup context0 */ | 5346 | /* setup context0 */ |
5345 | WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); | 5347 | WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); |
5346 | WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); | 5348 | WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); |
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index d246e043421a..5e8db9bccba1 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c | |||
@@ -1228,12 +1228,14 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) | |||
1228 | SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); | 1228 | SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); |
1229 | /* Setup L2 cache */ | 1229 | /* Setup L2 cache */ |
1230 | WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | | 1230 | WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | |
1231 | ENABLE_L2_FRAGMENT_PROCESSING | | ||
1231 | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | | 1232 | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | |
1232 | ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | | 1233 | ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | |
1233 | EFFECTIVE_L2_QUEUE_SIZE(7) | | 1234 | EFFECTIVE_L2_QUEUE_SIZE(7) | |
1234 | CONTEXT1_IDENTITY_ACCESS_MODE(1)); | 1235 | CONTEXT1_IDENTITY_ACCESS_MODE(1)); |
1235 | WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); | 1236 | WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); |
1236 | WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | | 1237 | WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | |
1238 | BANK_SELECT(6) | | ||
1237 | L2_CACHE_BIGK_FRAGMENT_SIZE(6)); | 1239 | L2_CACHE_BIGK_FRAGMENT_SIZE(6)); |
1238 | /* setup context0 */ | 1240 | /* setup context0 */ |
1239 | WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); | 1241 | WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b58e1afdda76..325f3a586cb7 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -848,6 +848,11 @@ struct radeon_mec { | |||
848 | #define R600_PTE_READABLE (1 << 5) | 848 | #define R600_PTE_READABLE (1 << 5) |
849 | #define R600_PTE_WRITEABLE (1 << 6) | 849 | #define R600_PTE_WRITEABLE (1 << 6) |
850 | 850 | ||
851 | /* PTE (Page Table Entry) fragment field for different page sizes */ | ||
852 | #define R600_PTE_FRAG_4KB (0 << 7) | ||
853 | #define R600_PTE_FRAG_64KB (4 << 7) | ||
854 | #define R600_PTE_FRAG_256KB (6 << 7) | ||
855 | |||
851 | struct radeon_vm_pt { | 856 | struct radeon_vm_pt { |
852 | struct radeon_bo *bo; | 857 | struct radeon_bo *bo; |
853 | uint64_t addr; | 858 | uint64_t addr; |
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 2aae6ce49d32..f8d5b65932e5 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c | |||
@@ -658,6 +658,84 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, | |||
658 | } | 658 | } |
659 | 659 | ||
660 | /** | 660 | /** |
661 | * radeon_vm_frag_ptes - add fragment information to PTEs | ||
662 | * | ||
663 | * @rdev: radeon_device pointer | ||
664 | * @ib: IB for the update | ||
665 | * @pe_start: first PTE to handle | ||
666 | * @pe_end: last PTE to handle | ||
667 | * @addr: addr those PTEs should point to | ||
668 | * @flags: hw mapping flags | ||
669 | * | ||
670 | * Global and local mutex must be locked! | ||
671 | */ | ||
672 | static void radeon_vm_frag_ptes(struct radeon_device *rdev, | ||
673 | struct radeon_ib *ib, | ||
674 | uint64_t pe_start, uint64_t pe_end, | ||
675 | uint64_t addr, uint32_t flags) | ||
676 | { | ||
677 | /** | ||
678 | * The MC L1 TLB supports variable sized pages, based on a fragment | ||
679 | * field in the PTE. When this field is set to a non-zero value, page | ||
680 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE | ||
681 | * flags are considered valid for all PTEs within the fragment range | ||
682 | * and corresponding mappings are assumed to be physically contiguous. | ||
683 | * | ||
684 | * The L1 TLB can store a single PTE for the whole fragment, | ||
685 | * significantly increasing the space available for translation | ||
686 | * caching. This leads to large improvements in throughput when the | ||
687 | * TLB is under pressure. | ||
688 | * | ||
689 | * The L2 TLB distributes small and large fragments into two | ||
690 | * asymmetric partitions. The large fragment cache is significantly | ||
691 | * larger. Thus, we try to use large fragments wherever possible. | ||
692 | * Userspace can support this by aligning virtual base address and | ||
693 | * allocation size to the fragment size. | ||
694 | */ | ||
695 | |||
696 | /* NI is optimized for 256KB fragments, SI and newer for 64KB */ | ||
697 | uint64_t frag_flags = rdev->family == CHIP_CAYMAN ? | ||
698 | R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; | ||
699 | uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80; | ||
700 | |||
701 | uint64_t frag_start = ALIGN(pe_start, frag_align); | ||
702 | uint64_t frag_end = pe_end & ~(frag_align - 1); | ||
703 | |||
704 | unsigned count; | ||
705 | |||
706 | /* system pages are non continuously */ | ||
707 | if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || | ||
708 | (frag_start >= frag_end)) { | ||
709 | |||
710 | count = (pe_end - pe_start) / 8; | ||
711 | radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, | ||
712 | RADEON_GPU_PAGE_SIZE, flags); | ||
713 | return; | ||
714 | } | ||
715 | |||
716 | /* handle the 4K area at the beginning */ | ||
717 | if (pe_start != frag_start) { | ||
718 | count = (frag_start - pe_start) / 8; | ||
719 | radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, | ||
720 | RADEON_GPU_PAGE_SIZE, flags); | ||
721 | addr += RADEON_GPU_PAGE_SIZE * count; | ||
722 | } | ||
723 | |||
724 | /* handle the area in the middle */ | ||
725 | count = (frag_end - frag_start) / 8; | ||
726 | radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count, | ||
727 | RADEON_GPU_PAGE_SIZE, flags | frag_flags); | ||
728 | |||
729 | /* handle the 4K area at the end */ | ||
730 | if (frag_end != pe_end) { | ||
731 | addr += RADEON_GPU_PAGE_SIZE * count; | ||
732 | count = (pe_end - frag_end) / 8; | ||
733 | radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count, | ||
734 | RADEON_GPU_PAGE_SIZE, flags); | ||
735 | } | ||
736 | } | ||
737 | |||
738 | /** | ||
661 | * radeon_vm_update_ptes - make sure that page tables are valid | 739 | * radeon_vm_update_ptes - make sure that page tables are valid |
662 | * | 740 | * |
663 | * @rdev: radeon_device pointer | 741 | * @rdev: radeon_device pointer |
@@ -703,10 +781,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, | |||
703 | if ((last_pte + 8 * count) != pte) { | 781 | if ((last_pte + 8 * count) != pte) { |
704 | 782 | ||
705 | if (count) { | 783 | if (count) { |
706 | radeon_asic_vm_set_page(rdev, ib, last_pte, | 784 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
707 | last_dst, count, | 785 | last_pte + 8 * count, |
708 | RADEON_GPU_PAGE_SIZE, | 786 | last_dst, flags); |
709 | flags); | ||
710 | } | 787 | } |
711 | 788 | ||
712 | count = nptes; | 789 | count = nptes; |
@@ -721,9 +798,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, | |||
721 | } | 798 | } |
722 | 799 | ||
723 | if (count) { | 800 | if (count) { |
724 | radeon_asic_vm_set_page(rdev, ib, last_pte, | 801 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
725 | last_dst, count, | 802 | last_pte + 8 * count, |
726 | RADEON_GPU_PAGE_SIZE, flags); | 803 | last_dst, flags); |
727 | } | 804 | } |
728 | } | 805 | } |
729 | 806 | ||
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index ac708e006180..22ecbc07e9a6 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c | |||
@@ -4044,18 +4044,21 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) | |||
4044 | WREG32(MC_VM_MX_L1_TLB_CNTL, | 4044 | WREG32(MC_VM_MX_L1_TLB_CNTL, |
4045 | (0xA << 7) | | 4045 | (0xA << 7) | |
4046 | ENABLE_L1_TLB | | 4046 | ENABLE_L1_TLB | |
4047 | ENABLE_L1_FRAGMENT_PROCESSING | | ||
4047 | SYSTEM_ACCESS_MODE_NOT_IN_SYS | | 4048 | SYSTEM_ACCESS_MODE_NOT_IN_SYS | |
4048 | ENABLE_ADVANCED_DRIVER_MODEL | | 4049 | ENABLE_ADVANCED_DRIVER_MODEL | |
4049 | SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); | 4050 | SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); |
4050 | /* Setup L2 cache */ | 4051 | /* Setup L2 cache */ |
4051 | WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | | 4052 | WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | |
4053 | ENABLE_L2_FRAGMENT_PROCESSING | | ||
4052 | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | | 4054 | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | |
4053 | ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | | 4055 | ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | |
4054 | EFFECTIVE_L2_QUEUE_SIZE(7) | | 4056 | EFFECTIVE_L2_QUEUE_SIZE(7) | |
4055 | CONTEXT1_IDENTITY_ACCESS_MODE(1)); | 4057 | CONTEXT1_IDENTITY_ACCESS_MODE(1)); |
4056 | WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); | 4058 | WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); |
4057 | WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | | 4059 | WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | |
4058 | L2_CACHE_BIGK_FRAGMENT_SIZE(0)); | 4060 | BANK_SELECT(4) | |
4061 | L2_CACHE_BIGK_FRAGMENT_SIZE(4)); | ||
4059 | /* setup context0 */ | 4062 | /* setup context0 */ |
4060 | WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); | 4063 | WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); |
4061 | WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); | 4064 | WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); |