aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2016-08-12 06:59:59 -0400
committerAlex Deucher <alexander.deucher@amd.com>2016-08-16 10:44:34 -0400
commit96105e5375892f63cc56fa707a1db0d74abc764d (patch)
treea217f3deefda61d3f50194afc41c9ccc1ab43e0a /drivers/gpu/drm/amd/amdgpu
parentdc157c6daa317c75058def8f9753909e5224cbec (diff)
drm/amdgpu: stop splitting PTE commands into smaller ones
It doesn't make much sense to create bigger commands first which we then need to split into smaller one again. Just make sure the commands we create aren't to big in the first place. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c71
5 files changed, 73 insertions, 149 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 387b4979f45a..1a7e05da470e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -833,6 +833,9 @@ struct amdgpu_ring {
833/* maximum number of VMIDs */ 833/* maximum number of VMIDs */
834#define AMDGPU_NUM_VM 16 834#define AMDGPU_NUM_VM 16
835 835
836/* Maximum number of PTEs the hardware can write with one command */
837#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
838
836/* number of entries in page table */ 839/* number of entries in page table */
837#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size) 840#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
838 841
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 12925016370b..673c258e49db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -639,7 +639,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
639 639
640 pde = pd_addr + pt_idx * 8; 640 pde = pd_addr + pt_idx * 8;
641 if (((last_pde + 8 * count) != pde) || 641 if (((last_pde + 8 * count) != pde) ||
642 ((last_pt + incr * count) != pt)) { 642 ((last_pt + incr * count) != pt) ||
643 (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
643 644
644 if (count) { 645 if (count) {
645 amdgpu_vm_update_pages(&params, last_pde, 646 amdgpu_vm_update_pages(&params, last_pde,
@@ -743,7 +744,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
743 next_pe_start = amdgpu_bo_gpu_offset(pt); 744 next_pe_start = amdgpu_bo_gpu_offset(pt);
744 next_pe_start += (addr & mask) * 8; 745 next_pe_start += (addr & mask) * 8;
745 746
746 if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) { 747 if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
748 ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
747 /* The next ptb is consecutive to current ptb. 749 /* The next ptb is consecutive to current ptb.
748 * Don't call amdgpu_vm_update_pages now. 750 * Don't call amdgpu_vm_update_pages now.
749 * Will update two ptbs together in future. 751 * Will update two ptbs together in future.
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index e5e44f42e20e..e71cd12104b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -694,24 +694,16 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
694 uint64_t pe, uint64_t src, 694 uint64_t pe, uint64_t src,
695 unsigned count) 695 unsigned count)
696{ 696{
697 while (count) { 697 unsigned bytes = count * 8;
698 unsigned bytes = count * 8; 698
699 if (bytes > 0x1FFFF8) 699 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
700 bytes = 0x1FFFF8; 700 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
701 701 ib->ptr[ib->length_dw++] = bytes;
702 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, 702 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
703 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 703 ib->ptr[ib->length_dw++] = lower_32_bits(src);
704 ib->ptr[ib->length_dw++] = bytes; 704 ib->ptr[ib->length_dw++] = upper_32_bits(src);
705 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 705 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
706 ib->ptr[ib->length_dw++] = lower_32_bits(src); 706 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
707 ib->ptr[ib->length_dw++] = upper_32_bits(src);
708 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
709 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
710
711 pe += bytes;
712 src += bytes;
713 count -= bytes / 8;
714 }
715} 707}
716 708
717/** 709/**
@@ -755,40 +747,21 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
755 * 747 *
756 * Update the page tables using sDMA (CIK). 748 * Update the page tables using sDMA (CIK).
757 */ 749 */
758static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, 750static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
759 uint64_t pe,
760 uint64_t addr, unsigned count, 751 uint64_t addr, unsigned count,
761 uint32_t incr, uint32_t flags) 752 uint32_t incr, uint32_t flags)
762{ 753{
763 uint64_t value; 754 /* for physically contiguous pages (vram) */
764 unsigned ndw; 755 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
765 756 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
766 while (count) { 757 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
767 ndw = count; 758 ib->ptr[ib->length_dw++] = flags; /* mask */
768 if (ndw > 0x7FFFF) 759 ib->ptr[ib->length_dw++] = 0;
769 ndw = 0x7FFFF; 760 ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
770 761 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
771 if (flags & AMDGPU_PTE_VALID) 762 ib->ptr[ib->length_dw++] = incr; /* increment size */
772 value = addr; 763 ib->ptr[ib->length_dw++] = 0;
773 else 764 ib->ptr[ib->length_dw++] = count; /* number of entries */
774 value = 0;
775
776 /* for physically contiguous pages (vram) */
777 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
778 ib->ptr[ib->length_dw++] = pe; /* dst addr */
779 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
780 ib->ptr[ib->length_dw++] = flags; /* mask */
781 ib->ptr[ib->length_dw++] = 0;
782 ib->ptr[ib->length_dw++] = value; /* value */
783 ib->ptr[ib->length_dw++] = upper_32_bits(value);
784 ib->ptr[ib->length_dw++] = incr; /* increment size */
785 ib->ptr[ib->length_dw++] = 0;
786 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
787
788 pe += ndw * 8;
789 addr += ndw * incr;
790 count -= ndw;
791 }
792} 765}
793 766
794/** 767/**
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index af0f0d283472..e82229686783 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -749,24 +749,16 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
749 uint64_t pe, uint64_t src, 749 uint64_t pe, uint64_t src,
750 unsigned count) 750 unsigned count)
751{ 751{
752 while (count) { 752 unsigned bytes = count * 8;
753 unsigned bytes = count * 8; 753
754 if (bytes > 0x1FFFF8) 754 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
755 bytes = 0x1FFFF8; 755 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
756 756 ib->ptr[ib->length_dw++] = bytes;
757 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 757 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
758 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 758 ib->ptr[ib->length_dw++] = lower_32_bits(src);
759 ib->ptr[ib->length_dw++] = bytes; 759 ib->ptr[ib->length_dw++] = upper_32_bits(src);
760 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 760 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
761 ib->ptr[ib->length_dw++] = lower_32_bits(src); 761 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
762 ib->ptr[ib->length_dw++] = upper_32_bits(src);
763 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
764 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
765
766 pe += bytes;
767 src += bytes;
768 count -= bytes / 8;
769 }
770} 762}
771 763
772/** 764/**
@@ -810,40 +802,21 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
810 * 802 *
811 * Update the page tables using sDMA (CIK). 803 * Update the page tables using sDMA (CIK).
812 */ 804 */
813static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, 805static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
814 uint64_t pe,
815 uint64_t addr, unsigned count, 806 uint64_t addr, unsigned count,
816 uint32_t incr, uint32_t flags) 807 uint32_t incr, uint32_t flags)
817{ 808{
818 uint64_t value; 809 /* for physically contiguous pages (vram) */
819 unsigned ndw; 810 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
820 811 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
821 while (count) { 812 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
822 ndw = count; 813 ib->ptr[ib->length_dw++] = flags; /* mask */
823 if (ndw > 0x7FFFF) 814 ib->ptr[ib->length_dw++] = 0;
824 ndw = 0x7FFFF; 815 ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
825 816 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
826 if (flags & AMDGPU_PTE_VALID) 817 ib->ptr[ib->length_dw++] = incr; /* increment size */
827 value = addr; 818 ib->ptr[ib->length_dw++] = 0;
828 else 819 ib->ptr[ib->length_dw++] = count; /* number of entries */
829 value = 0;
830
831 /* for physically contiguous pages (vram) */
832 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
833 ib->ptr[ib->length_dw++] = pe; /* dst addr */
834 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
835 ib->ptr[ib->length_dw++] = flags; /* mask */
836 ib->ptr[ib->length_dw++] = 0;
837 ib->ptr[ib->length_dw++] = value; /* value */
838 ib->ptr[ib->length_dw++] = upper_32_bits(value);
839 ib->ptr[ib->length_dw++] = incr; /* increment size */
840 ib->ptr[ib->length_dw++] = 0;
841 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
842
843 pe += ndw * 8;
844 addr += ndw * incr;
845 count -= ndw;
846 }
847} 820}
848 821
849/** 822/**
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 88faaee37258..bee4978bec73 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -976,24 +976,16 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
976 uint64_t pe, uint64_t src, 976 uint64_t pe, uint64_t src,
977 unsigned count) 977 unsigned count)
978{ 978{
979 while (count) { 979 unsigned bytes = count * 8;
980 unsigned bytes = count * 8; 980
981 if (bytes > 0x1FFFF8) 981 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
982 bytes = 0x1FFFF8; 982 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
983 983 ib->ptr[ib->length_dw++] = bytes;
984 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 984 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
985 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 985 ib->ptr[ib->length_dw++] = lower_32_bits(src);
986 ib->ptr[ib->length_dw++] = bytes; 986 ib->ptr[ib->length_dw++] = upper_32_bits(src);
987 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 987 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
988 ib->ptr[ib->length_dw++] = lower_32_bits(src); 988 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
989 ib->ptr[ib->length_dw++] = upper_32_bits(src);
990 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
991 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
992
993 pe += bytes;
994 src += bytes;
995 count -= bytes / 8;
996 }
997} 989}
998 990
999/** 991/**
@@ -1037,40 +1029,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1037 * 1029 *
1038 * Update the page tables using sDMA (CIK). 1030 * Update the page tables using sDMA (CIK).
1039 */ 1031 */
1040static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, 1032static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
1041 uint64_t pe,
1042 uint64_t addr, unsigned count, 1033 uint64_t addr, unsigned count,
1043 uint32_t incr, uint32_t flags) 1034 uint32_t incr, uint32_t flags)
1044{ 1035{
1045 uint64_t value; 1036 /* for physically contiguous pages (vram) */
1046 unsigned ndw; 1037 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
1047 1038 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1048 while (count) { 1039 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1049 ndw = count; 1040 ib->ptr[ib->length_dw++] = flags; /* mask */
1050 if (ndw > 0x7FFFF) 1041 ib->ptr[ib->length_dw++] = 0;
1051 ndw = 0x7FFFF; 1042 ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1052 1043 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1053 if (flags & AMDGPU_PTE_VALID) 1044 ib->ptr[ib->length_dw++] = incr; /* increment size */
1054 value = addr; 1045 ib->ptr[ib->length_dw++] = 0;
1055 else 1046 ib->ptr[ib->length_dw++] = count; /* number of entries */
1056 value = 0;
1057
1058 /* for physically contiguous pages (vram) */
1059 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
1060 ib->ptr[ib->length_dw++] = pe; /* dst addr */
1061 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1062 ib->ptr[ib->length_dw++] = flags; /* mask */
1063 ib->ptr[ib->length_dw++] = 0;
1064 ib->ptr[ib->length_dw++] = value; /* value */
1065 ib->ptr[ib->length_dw++] = upper_32_bits(value);
1066 ib->ptr[ib->length_dw++] = incr; /* increment size */
1067 ib->ptr[ib->length_dw++] = 0;
1068 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
1069
1070 pe += ndw * 8;
1071 addr += ndw * incr;
1072 count -= ndw;
1073 }
1074} 1047}
1075 1048
1076/** 1049/**