diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_blit_kms.c')
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_blit_kms.c | 348 |
1 files changed, 39 insertions, 309 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 2eb251858e72..dcf11bbc06d9 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c | |||
@@ -56,7 +56,9 @@ set_render_target(struct radeon_device *rdev, int format, | |||
56 | if (h < 8) | 56 | if (h < 8) |
57 | h = 8; | 57 | h = 8; |
58 | 58 | ||
59 | cb_color_info = ((format << 2) | (1 << 24) | (1 << 8)); | 59 | cb_color_info = CB_FORMAT(format) | |
60 | CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | | ||
61 | CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); | ||
60 | pitch = (w / 8) - 1; | 62 | pitch = (w / 8) - 1; |
61 | slice = ((w * h) / 64) - 1; | 63 | slice = ((w * h) / 64) - 1; |
62 | 64 | ||
@@ -67,7 +69,7 @@ set_render_target(struct radeon_device *rdev, int format, | |||
67 | radeon_ring_write(rdev, slice); | 69 | radeon_ring_write(rdev, slice); |
68 | radeon_ring_write(rdev, 0); | 70 | radeon_ring_write(rdev, 0); |
69 | radeon_ring_write(rdev, cb_color_info); | 71 | radeon_ring_write(rdev, cb_color_info); |
70 | radeon_ring_write(rdev, (1 << 4)); | 72 | radeon_ring_write(rdev, 0); |
71 | radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); | 73 | radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); |
72 | radeon_ring_write(rdev, 0); | 74 | radeon_ring_write(rdev, 0); |
73 | radeon_ring_write(rdev, 0); | 75 | radeon_ring_write(rdev, 0); |
@@ -133,12 +135,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) | |||
133 | u32 sq_vtx_constant_word2, sq_vtx_constant_word3; | 135 | u32 sq_vtx_constant_word2, sq_vtx_constant_word3; |
134 | 136 | ||
135 | /* high addr, stride */ | 137 | /* high addr, stride */ |
136 | sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); | 138 | sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | |
139 | SQ_VTXC_STRIDE(16); | ||
137 | #ifdef __BIG_ENDIAN | 140 | #ifdef __BIG_ENDIAN |
138 | sq_vtx_constant_word2 |= (2 << 30); | 141 | sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); |
139 | #endif | 142 | #endif |
140 | /* xyzw swizzles */ | 143 | /* xyzw swizzles */ |
141 | sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12); | 144 | sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) | |
145 | SQ_VTCX_SEL_Y(SQ_SEL_Y) | | ||
146 | SQ_VTCX_SEL_Z(SQ_SEL_Z) | | ||
147 | SQ_VTCX_SEL_W(SQ_SEL_W); | ||
142 | 148 | ||
143 | radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); | 149 | radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); |
144 | radeon_ring_write(rdev, 0x580); | 150 | radeon_ring_write(rdev, 0x580); |
@@ -149,7 +155,7 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) | |||
149 | radeon_ring_write(rdev, 0); | 155 | radeon_ring_write(rdev, 0); |
150 | radeon_ring_write(rdev, 0); | 156 | radeon_ring_write(rdev, 0); |
151 | radeon_ring_write(rdev, 0); | 157 | radeon_ring_write(rdev, 0); |
152 | radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); | 158 | radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER)); |
153 | 159 | ||
154 | if ((rdev->family == CHIP_CEDAR) || | 160 | if ((rdev->family == CHIP_CEDAR) || |
155 | (rdev->family == CHIP_PALM) || | 161 | (rdev->family == CHIP_PALM) || |
@@ -176,14 +182,19 @@ set_tex_resource(struct radeon_device *rdev, | |||
176 | if (h < 1) | 182 | if (h < 1) |
177 | h = 1; | 183 | h = 1; |
178 | 184 | ||
179 | sq_tex_resource_word0 = (1 << 0); /* 2D */ | 185 | sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D); |
180 | sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | | 186 | sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | |
181 | ((w - 1) << 18)); | 187 | ((w - 1) << 18)); |
182 | sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28); | 188 | sq_tex_resource_word1 = ((h - 1) << 0) | |
189 | TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1); | ||
183 | /* xyzw swizzles */ | 190 | /* xyzw swizzles */ |
184 | sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25); | 191 | sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) | |
192 | TEX_DST_SEL_Y(SQ_SEL_Y) | | ||
193 | TEX_DST_SEL_Z(SQ_SEL_Z) | | ||
194 | TEX_DST_SEL_W(SQ_SEL_W); | ||
185 | 195 | ||
186 | sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30); | 196 | sq_tex_resource_word7 = format | |
197 | S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE); | ||
187 | 198 | ||
188 | radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); | 199 | radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); |
189 | radeon_ring_write(rdev, 0); | 200 | radeon_ring_write(rdev, 0); |
@@ -584,31 +595,6 @@ set_default_state(struct radeon_device *rdev) | |||
584 | 595 | ||
585 | } | 596 | } |
586 | 597 | ||
587 | static inline uint32_t i2f(uint32_t input) | ||
588 | { | ||
589 | u32 result, i, exponent, fraction; | ||
590 | |||
591 | if ((input & 0x3fff) == 0) | ||
592 | result = 0; /* 0 is a special case */ | ||
593 | else { | ||
594 | exponent = 140; /* exponent biased by 127; */ | ||
595 | fraction = (input & 0x3fff) << 10; /* cheat and only | ||
596 | handle numbers below 2^^15 */ | ||
597 | for (i = 0; i < 14; i++) { | ||
598 | if (fraction & 0x800000) | ||
599 | break; | ||
600 | else { | ||
601 | fraction = fraction << 1; /* keep | ||
602 | shifting left until top bit = 1 */ | ||
603 | exponent = exponent - 1; | ||
604 | } | ||
605 | } | ||
606 | result = exponent << 23 | (fraction & 0x7fffff); /* mask | ||
607 | off top bit; assumed 1 */ | ||
608 | } | ||
609 | return result; | ||
610 | } | ||
611 | |||
612 | int evergreen_blit_init(struct radeon_device *rdev) | 598 | int evergreen_blit_init(struct radeon_device *rdev) |
613 | { | 599 | { |
614 | u32 obj_size; | 600 | u32 obj_size; |
@@ -617,6 +603,24 @@ int evergreen_blit_init(struct radeon_device *rdev) | |||
617 | u32 packet2s[16]; | 603 | u32 packet2s[16]; |
618 | int num_packet2s = 0; | 604 | int num_packet2s = 0; |
619 | 605 | ||
606 | rdev->r600_blit.primitives.set_render_target = set_render_target; | ||
607 | rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; | ||
608 | rdev->r600_blit.primitives.set_shaders = set_shaders; | ||
609 | rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; | ||
610 | rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; | ||
611 | rdev->r600_blit.primitives.set_scissors = set_scissors; | ||
612 | rdev->r600_blit.primitives.draw_auto = draw_auto; | ||
613 | rdev->r600_blit.primitives.set_default_state = set_default_state; | ||
614 | |||
615 | rdev->r600_blit.ring_size_common = 55; /* shaders + def state */ | ||
616 | rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */ | ||
617 | rdev->r600_blit.ring_size_common += 5; /* done copy */ | ||
618 | rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */ | ||
619 | |||
620 | rdev->r600_blit.ring_size_per_loop = 74; | ||
621 | |||
622 | rdev->r600_blit.max_dim = 16384; | ||
623 | |||
620 | /* pin copy shader into vram if already initialized */ | 624 | /* pin copy shader into vram if already initialized */ |
621 | if (rdev->r600_blit.shader_obj) | 625 | if (rdev->r600_blit.shader_obj) |
622 | goto done; | 626 | goto done; |
@@ -712,277 +716,3 @@ done: | |||
712 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); | 716 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); |
713 | return 0; | 717 | return 0; |
714 | } | 718 | } |
715 | |||
716 | void evergreen_blit_fini(struct radeon_device *rdev) | ||
717 | { | ||
718 | int r; | ||
719 | |||
720 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | ||
721 | if (rdev->r600_blit.shader_obj == NULL) | ||
722 | return; | ||
723 | /* If we can't reserve the bo, unref should be enough to destroy | ||
724 | * it when it becomes idle. | ||
725 | */ | ||
726 | r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); | ||
727 | if (!r) { | ||
728 | radeon_bo_unpin(rdev->r600_blit.shader_obj); | ||
729 | radeon_bo_unreserve(rdev->r600_blit.shader_obj); | ||
730 | } | ||
731 | radeon_bo_unref(&rdev->r600_blit.shader_obj); | ||
732 | } | ||
733 | |||
734 | static int evergreen_vb_ib_get(struct radeon_device *rdev) | ||
735 | { | ||
736 | int r; | ||
737 | r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); | ||
738 | if (r) { | ||
739 | DRM_ERROR("failed to get IB for vertex buffer\n"); | ||
740 | return r; | ||
741 | } | ||
742 | |||
743 | rdev->r600_blit.vb_total = 64*1024; | ||
744 | rdev->r600_blit.vb_used = 0; | ||
745 | return 0; | ||
746 | } | ||
747 | |||
748 | static void evergreen_vb_ib_put(struct radeon_device *rdev) | ||
749 | { | ||
750 | radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); | ||
751 | radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); | ||
752 | } | ||
753 | |||
754 | int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) | ||
755 | { | ||
756 | int r; | ||
757 | int ring_size, line_size; | ||
758 | int max_size; | ||
759 | /* loops of emits + fence emit possible */ | ||
760 | int dwords_per_loop = 74, num_loops; | ||
761 | |||
762 | r = evergreen_vb_ib_get(rdev); | ||
763 | if (r) | ||
764 | return r; | ||
765 | |||
766 | /* 8 bpp vs 32 bpp for xfer unit */ | ||
767 | if (size_bytes & 3) | ||
768 | line_size = 8192; | ||
769 | else | ||
770 | line_size = 8192 * 4; | ||
771 | |||
772 | max_size = 8192 * line_size; | ||
773 | |||
774 | /* major loops cover the max size transfer */ | ||
775 | num_loops = ((size_bytes + max_size) / max_size); | ||
776 | /* minor loops cover the extra non aligned bits */ | ||
777 | num_loops += ((size_bytes % line_size) ? 1 : 0); | ||
778 | /* calculate number of loops correctly */ | ||
779 | ring_size = num_loops * dwords_per_loop; | ||
780 | /* set default + shaders */ | ||
781 | ring_size += 55; /* shaders + def state */ | ||
782 | ring_size += 10; /* fence emit for VB IB */ | ||
783 | ring_size += 5; /* done copy */ | ||
784 | ring_size += 10; /* fence emit for done copy */ | ||
785 | r = radeon_ring_lock(rdev, ring_size); | ||
786 | if (r) | ||
787 | return r; | ||
788 | |||
789 | set_default_state(rdev); /* 36 */ | ||
790 | set_shaders(rdev); /* 16 */ | ||
791 | return 0; | ||
792 | } | ||
793 | |||
794 | void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) | ||
795 | { | ||
796 | int r; | ||
797 | |||
798 | if (rdev->r600_blit.vb_ib) | ||
799 | evergreen_vb_ib_put(rdev); | ||
800 | |||
801 | if (fence) | ||
802 | r = radeon_fence_emit(rdev, fence); | ||
803 | |||
804 | radeon_ring_unlock_commit(rdev); | ||
805 | } | ||
806 | |||
807 | void evergreen_kms_blit_copy(struct radeon_device *rdev, | ||
808 | u64 src_gpu_addr, u64 dst_gpu_addr, | ||
809 | int size_bytes) | ||
810 | { | ||
811 | int max_bytes; | ||
812 | u64 vb_gpu_addr; | ||
813 | u32 *vb; | ||
814 | |||
815 | DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, | ||
816 | size_bytes, rdev->r600_blit.vb_used); | ||
817 | vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); | ||
818 | if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { | ||
819 | max_bytes = 8192; | ||
820 | |||
821 | while (size_bytes) { | ||
822 | int cur_size = size_bytes; | ||
823 | int src_x = src_gpu_addr & 255; | ||
824 | int dst_x = dst_gpu_addr & 255; | ||
825 | int h = 1; | ||
826 | src_gpu_addr = src_gpu_addr & ~255ULL; | ||
827 | dst_gpu_addr = dst_gpu_addr & ~255ULL; | ||
828 | |||
829 | if (!src_x && !dst_x) { | ||
830 | h = (cur_size / max_bytes); | ||
831 | if (h > 8192) | ||
832 | h = 8192; | ||
833 | if (h == 0) | ||
834 | h = 1; | ||
835 | else | ||
836 | cur_size = max_bytes; | ||
837 | } else { | ||
838 | if (cur_size > max_bytes) | ||
839 | cur_size = max_bytes; | ||
840 | if (cur_size > (max_bytes - dst_x)) | ||
841 | cur_size = (max_bytes - dst_x); | ||
842 | if (cur_size > (max_bytes - src_x)) | ||
843 | cur_size = (max_bytes - src_x); | ||
844 | } | ||
845 | |||
846 | if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { | ||
847 | WARN_ON(1); | ||
848 | } | ||
849 | |||
850 | vb[0] = i2f(dst_x); | ||
851 | vb[1] = 0; | ||
852 | vb[2] = i2f(src_x); | ||
853 | vb[3] = 0; | ||
854 | |||
855 | vb[4] = i2f(dst_x); | ||
856 | vb[5] = i2f(h); | ||
857 | vb[6] = i2f(src_x); | ||
858 | vb[7] = i2f(h); | ||
859 | |||
860 | vb[8] = i2f(dst_x + cur_size); | ||
861 | vb[9] = i2f(h); | ||
862 | vb[10] = i2f(src_x + cur_size); | ||
863 | vb[11] = i2f(h); | ||
864 | |||
865 | /* src 10 */ | ||
866 | set_tex_resource(rdev, FMT_8, | ||
867 | src_x + cur_size, h, src_x + cur_size, | ||
868 | src_gpu_addr); | ||
869 | |||
870 | /* 5 */ | ||
871 | cp_set_surface_sync(rdev, | ||
872 | PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); | ||
873 | |||
874 | |||
875 | /* dst 17 */ | ||
876 | set_render_target(rdev, COLOR_8, | ||
877 | dst_x + cur_size, h, | ||
878 | dst_gpu_addr); | ||
879 | |||
880 | /* scissors 12 */ | ||
881 | set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); | ||
882 | |||
883 | /* 15 */ | ||
884 | vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; | ||
885 | set_vtx_resource(rdev, vb_gpu_addr); | ||
886 | |||
887 | /* draw 10 */ | ||
888 | draw_auto(rdev); | ||
889 | |||
890 | /* 5 */ | ||
891 | cp_set_surface_sync(rdev, | ||
892 | PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, | ||
893 | cur_size * h, dst_gpu_addr); | ||
894 | |||
895 | vb += 12; | ||
896 | rdev->r600_blit.vb_used += 12 * 4; | ||
897 | |||
898 | src_gpu_addr += cur_size * h; | ||
899 | dst_gpu_addr += cur_size * h; | ||
900 | size_bytes -= cur_size * h; | ||
901 | } | ||
902 | } else { | ||
903 | max_bytes = 8192 * 4; | ||
904 | |||
905 | while (size_bytes) { | ||
906 | int cur_size = size_bytes; | ||
907 | int src_x = (src_gpu_addr & 255); | ||
908 | int dst_x = (dst_gpu_addr & 255); | ||
909 | int h = 1; | ||
910 | src_gpu_addr = src_gpu_addr & ~255ULL; | ||
911 | dst_gpu_addr = dst_gpu_addr & ~255ULL; | ||
912 | |||
913 | if (!src_x && !dst_x) { | ||
914 | h = (cur_size / max_bytes); | ||
915 | if (h > 8192) | ||
916 | h = 8192; | ||
917 | if (h == 0) | ||
918 | h = 1; | ||
919 | else | ||
920 | cur_size = max_bytes; | ||
921 | } else { | ||
922 | if (cur_size > max_bytes) | ||
923 | cur_size = max_bytes; | ||
924 | if (cur_size > (max_bytes - dst_x)) | ||
925 | cur_size = (max_bytes - dst_x); | ||
926 | if (cur_size > (max_bytes - src_x)) | ||
927 | cur_size = (max_bytes - src_x); | ||
928 | } | ||
929 | |||
930 | if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { | ||
931 | WARN_ON(1); | ||
932 | } | ||
933 | |||
934 | vb[0] = i2f(dst_x / 4); | ||
935 | vb[1] = 0; | ||
936 | vb[2] = i2f(src_x / 4); | ||
937 | vb[3] = 0; | ||
938 | |||
939 | vb[4] = i2f(dst_x / 4); | ||
940 | vb[5] = i2f(h); | ||
941 | vb[6] = i2f(src_x / 4); | ||
942 | vb[7] = i2f(h); | ||
943 | |||
944 | vb[8] = i2f((dst_x + cur_size) / 4); | ||
945 | vb[9] = i2f(h); | ||
946 | vb[10] = i2f((src_x + cur_size) / 4); | ||
947 | vb[11] = i2f(h); | ||
948 | |||
949 | /* src 10 */ | ||
950 | set_tex_resource(rdev, FMT_8_8_8_8, | ||
951 | (src_x + cur_size) / 4, | ||
952 | h, (src_x + cur_size) / 4, | ||
953 | src_gpu_addr); | ||
954 | /* 5 */ | ||
955 | cp_set_surface_sync(rdev, | ||
956 | PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); | ||
957 | |||
958 | /* dst 17 */ | ||
959 | set_render_target(rdev, COLOR_8_8_8_8, | ||
960 | (dst_x + cur_size) / 4, h, | ||
961 | dst_gpu_addr); | ||
962 | |||
963 | /* scissors 12 */ | ||
964 | set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); | ||
965 | |||
966 | /* Vertex buffer setup 15 */ | ||
967 | vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; | ||
968 | set_vtx_resource(rdev, vb_gpu_addr); | ||
969 | |||
970 | /* draw 10 */ | ||
971 | draw_auto(rdev); | ||
972 | |||
973 | /* 5 */ | ||
974 | cp_set_surface_sync(rdev, | ||
975 | PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, | ||
976 | cur_size * h, dst_gpu_addr); | ||
977 | |||
978 | /* 74 ring dwords per loop */ | ||
979 | vb += 12; | ||
980 | rdev->r600_blit.vb_used += 12 * 4; | ||
981 | |||
982 | src_gpu_addr += cur_size * h; | ||
983 | dst_gpu_addr += cur_size * h; | ||
984 | size_bytes -= cur_size * h; | ||
985 | } | ||
986 | } | ||
987 | } | ||
988 | |||