aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/cik.c4
-rw-r--r--drivers/gpu/drm/radeon/ni.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon.h5
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c91
-rw-r--r--drivers/gpu/drm/radeon/si.c5
5 files changed, 98 insertions, 9 deletions
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 199eb194716f..dbb5b2e17c7c 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -5328,6 +5328,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
5328 WREG32(MC_VM_MX_L1_TLB_CNTL, 5328 WREG32(MC_VM_MX_L1_TLB_CNTL,
5329 (0xA << 7) | 5329 (0xA << 7) |
5330 ENABLE_L1_TLB | 5330 ENABLE_L1_TLB |
5331 ENABLE_L1_FRAGMENT_PROCESSING |
5331 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 5332 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5332 ENABLE_ADVANCED_DRIVER_MODEL | 5333 ENABLE_ADVANCED_DRIVER_MODEL |
5333 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 5334 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
@@ -5340,7 +5341,8 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
5340 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 5341 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5341 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 5342 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5342 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 5343 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5343 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 5344 BANK_SELECT(4) |
5345 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5344 /* setup context0 */ 5346 /* setup context0 */
5345 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 5347 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5346 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 5348 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index d246e043421a..5e8db9bccba1 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1228,12 +1228,14 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1228 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 1228 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1229 /* Setup L2 cache */ 1229 /* Setup L2 cache */
1230 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 1230 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1231 ENABLE_L2_FRAGMENT_PROCESSING |
1231 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 1232 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1232 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 1233 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1233 EFFECTIVE_L2_QUEUE_SIZE(7) | 1234 EFFECTIVE_L2_QUEUE_SIZE(7) |
1234 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 1235 CONTEXT1_IDENTITY_ACCESS_MODE(1));
1235 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 1236 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1236 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 1237 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1238 BANK_SELECT(6) |
1237 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 1239 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1238 /* setup context0 */ 1240 /* setup context0 */
1239 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 1241 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b58e1afdda76..325f3a586cb7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -848,6 +848,11 @@ struct radeon_mec {
848#define R600_PTE_READABLE (1 << 5) 848#define R600_PTE_READABLE (1 << 5)
849#define R600_PTE_WRITEABLE (1 << 6) 849#define R600_PTE_WRITEABLE (1 << 6)
850 850
851/* PTE (Page Table Entry) fragment field for different page sizes */
852#define R600_PTE_FRAG_4KB (0 << 7)
853#define R600_PTE_FRAG_64KB (4 << 7)
854#define R600_PTE_FRAG_256KB (6 << 7)
855
851struct radeon_vm_pt { 856struct radeon_vm_pt {
852 struct radeon_bo *bo; 857 struct radeon_bo *bo;
853 uint64_t addr; 858 uint64_t addr;
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 2aae6ce49d32..f8d5b65932e5 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -658,6 +658,84 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
658} 658}
659 659
660/** 660/**
661 * radeon_vm_frag_ptes - add fragment information to PTEs
662 *
663 * @rdev: radeon_device pointer
664 * @ib: IB for the update
665 * @pe_start: first PTE to handle
666 * @pe_end: last PTE to handle
667 * @addr: addr those PTEs should point to
668 * @flags: hw mapping flags
669 *
670 * Global and local mutex must be locked!
671 */
672static void radeon_vm_frag_ptes(struct radeon_device *rdev,
673 struct radeon_ib *ib,
674 uint64_t pe_start, uint64_t pe_end,
675 uint64_t addr, uint32_t flags)
676{
677 /**
678 * The MC L1 TLB supports variable sized pages, based on a fragment
679 * field in the PTE. When this field is set to a non-zero value, page
680 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
681 * flags are considered valid for all PTEs within the fragment range
682 * and corresponding mappings are assumed to be physically contiguous.
683 *
684 * The L1 TLB can store a single PTE for the whole fragment,
685 * significantly increasing the space available for translation
686 * caching. This leads to large improvements in throughput when the
687 * TLB is under pressure.
688 *
689 * The L2 TLB distributes small and large fragments into two
690 * asymmetric partitions. The large fragment cache is significantly
691 * larger. Thus, we try to use large fragments wherever possible.
692 * Userspace can support this by aligning virtual base address and
693 * allocation size to the fragment size.
694 */
695
696 /* NI is optimized for 256KB fragments, SI and newer for 64KB */
697 uint64_t frag_flags = rdev->family == CHIP_CAYMAN ?
698 R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
699 uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80;
700
701 uint64_t frag_start = ALIGN(pe_start, frag_align);
702 uint64_t frag_end = pe_end & ~(frag_align - 1);
703
704 unsigned count;
705
706 /* system pages are non continuously */
707 if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) ||
708 (frag_start >= frag_end)) {
709
710 count = (pe_end - pe_start) / 8;
711 radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
712 RADEON_GPU_PAGE_SIZE, flags);
713 return;
714 }
715
716 /* handle the 4K area at the beginning */
717 if (pe_start != frag_start) {
718 count = (frag_start - pe_start) / 8;
719 radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
720 RADEON_GPU_PAGE_SIZE, flags);
721 addr += RADEON_GPU_PAGE_SIZE * count;
722 }
723
724 /* handle the area in the middle */
725 count = (frag_end - frag_start) / 8;
726 radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count,
727 RADEON_GPU_PAGE_SIZE, flags | frag_flags);
728
729 /* handle the 4K area at the end */
730 if (frag_end != pe_end) {
731 addr += RADEON_GPU_PAGE_SIZE * count;
732 count = (pe_end - frag_end) / 8;
733 radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count,
734 RADEON_GPU_PAGE_SIZE, flags);
735 }
736}
737
738/**
661 * radeon_vm_update_ptes - make sure that page tables are valid 739 * radeon_vm_update_ptes - make sure that page tables are valid
662 * 740 *
663 * @rdev: radeon_device pointer 741 * @rdev: radeon_device pointer
@@ -703,10 +781,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
703 if ((last_pte + 8 * count) != pte) { 781 if ((last_pte + 8 * count) != pte) {
704 782
705 if (count) { 783 if (count) {
706 radeon_asic_vm_set_page(rdev, ib, last_pte, 784 radeon_vm_frag_ptes(rdev, ib, last_pte,
707 last_dst, count, 785 last_pte + 8 * count,
708 RADEON_GPU_PAGE_SIZE, 786 last_dst, flags);
709 flags);
710 } 787 }
711 788
712 count = nptes; 789 count = nptes;
@@ -721,9 +798,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
721 } 798 }
722 799
723 if (count) { 800 if (count) {
724 radeon_asic_vm_set_page(rdev, ib, last_pte, 801 radeon_vm_frag_ptes(rdev, ib, last_pte,
725 last_dst, count, 802 last_pte + 8 * count,
726 RADEON_GPU_PAGE_SIZE, flags); 803 last_dst, flags);
727 } 804 }
728} 805}
729 806
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index ac708e006180..22ecbc07e9a6 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -4044,18 +4044,21 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
4044 WREG32(MC_VM_MX_L1_TLB_CNTL, 4044 WREG32(MC_VM_MX_L1_TLB_CNTL,
4045 (0xA << 7) | 4045 (0xA << 7) |
4046 ENABLE_L1_TLB | 4046 ENABLE_L1_TLB |
4047 ENABLE_L1_FRAGMENT_PROCESSING |
4047 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4048 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4048 ENABLE_ADVANCED_DRIVER_MODEL | 4049 ENABLE_ADVANCED_DRIVER_MODEL |
4049 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4050 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4050 /* Setup L2 cache */ 4051 /* Setup L2 cache */
4051 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 4052 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4053 ENABLE_L2_FRAGMENT_PROCESSING |
4052 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4054 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4053 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4055 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4054 EFFECTIVE_L2_QUEUE_SIZE(7) | 4056 EFFECTIVE_L2_QUEUE_SIZE(7) |
4055 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4057 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4056 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 4058 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4057 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4059 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4058 L2_CACHE_BIGK_FRAGMENT_SIZE(0)); 4060 BANK_SELECT(4) |
4061 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4059 /* setup context0 */ 4062 /* setup context0 */
4060 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 4063 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4061 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 4064 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);