aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/evergreen_blit_kms.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_blit_kms.c')
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c348
1 files changed, 39 insertions, 309 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2eb251858e72..dcf11bbc06d9 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -56,7 +56,9 @@ set_render_target(struct radeon_device *rdev, int format,
56 if (h < 8) 56 if (h < 8)
57 h = 8; 57 h = 8;
58 58
59 cb_color_info = ((format << 2) | (1 << 24) | (1 << 8)); 59 cb_color_info = CB_FORMAT(format) |
60 CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
61 CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
60 pitch = (w / 8) - 1; 62 pitch = (w / 8) - 1;
61 slice = ((w * h) / 64) - 1; 63 slice = ((w * h) / 64) - 1;
62 64
@@ -67,7 +69,7 @@ set_render_target(struct radeon_device *rdev, int format,
67 radeon_ring_write(rdev, slice); 69 radeon_ring_write(rdev, slice);
68 radeon_ring_write(rdev, 0); 70 radeon_ring_write(rdev, 0);
69 radeon_ring_write(rdev, cb_color_info); 71 radeon_ring_write(rdev, cb_color_info);
70 radeon_ring_write(rdev, (1 << 4)); 72 radeon_ring_write(rdev, 0);
71 radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); 73 radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
72 radeon_ring_write(rdev, 0); 74 radeon_ring_write(rdev, 0);
73 radeon_ring_write(rdev, 0); 75 radeon_ring_write(rdev, 0);
@@ -133,12 +135,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
133 u32 sq_vtx_constant_word2, sq_vtx_constant_word3; 135 u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
134 136
135 /* high addr, stride */ 137 /* high addr, stride */
136 sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); 138 sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
139 SQ_VTXC_STRIDE(16);
137#ifdef __BIG_ENDIAN 140#ifdef __BIG_ENDIAN
138 sq_vtx_constant_word2 |= (2 << 30); 141 sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
139#endif 142#endif
140 /* xyzw swizzles */ 143 /* xyzw swizzles */
141 sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12); 144 sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) |
145 SQ_VTCX_SEL_Y(SQ_SEL_Y) |
146 SQ_VTCX_SEL_Z(SQ_SEL_Z) |
147 SQ_VTCX_SEL_W(SQ_SEL_W);
142 148
143 radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); 149 radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
144 radeon_ring_write(rdev, 0x580); 150 radeon_ring_write(rdev, 0x580);
@@ -149,7 +155,7 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
149 radeon_ring_write(rdev, 0); 155 radeon_ring_write(rdev, 0);
150 radeon_ring_write(rdev, 0); 156 radeon_ring_write(rdev, 0);
151 radeon_ring_write(rdev, 0); 157 radeon_ring_write(rdev, 0);
152 radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); 158 radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER));
153 159
154 if ((rdev->family == CHIP_CEDAR) || 160 if ((rdev->family == CHIP_CEDAR) ||
155 (rdev->family == CHIP_PALM) || 161 (rdev->family == CHIP_PALM) ||
@@ -176,14 +182,19 @@ set_tex_resource(struct radeon_device *rdev,
176 if (h < 1) 182 if (h < 1)
177 h = 1; 183 h = 1;
178 184
179 sq_tex_resource_word0 = (1 << 0); /* 2D */ 185 sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D);
180 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | 186 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
181 ((w - 1) << 18)); 187 ((w - 1) << 18));
182 sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28); 188 sq_tex_resource_word1 = ((h - 1) << 0) |
189 TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
183 /* xyzw swizzles */ 190 /* xyzw swizzles */
184 sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25); 191 sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) |
192 TEX_DST_SEL_Y(SQ_SEL_Y) |
193 TEX_DST_SEL_Z(SQ_SEL_Z) |
194 TEX_DST_SEL_W(SQ_SEL_W);
185 195
186 sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30); 196 sq_tex_resource_word7 = format |
197 S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE);
187 198
188 radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); 199 radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
189 radeon_ring_write(rdev, 0); 200 radeon_ring_write(rdev, 0);
@@ -584,31 +595,6 @@ set_default_state(struct radeon_device *rdev)
584 595
585} 596}
586 597
587static inline uint32_t i2f(uint32_t input)
588{
589 u32 result, i, exponent, fraction;
590
591 if ((input & 0x3fff) == 0)
592 result = 0; /* 0 is a special case */
593 else {
594 exponent = 140; /* exponent biased by 127; */
595 fraction = (input & 0x3fff) << 10; /* cheat and only
596 handle numbers below 2^^15 */
597 for (i = 0; i < 14; i++) {
598 if (fraction & 0x800000)
599 break;
600 else {
601 fraction = fraction << 1; /* keep
602 shifting left until top bit = 1 */
603 exponent = exponent - 1;
604 }
605 }
606 result = exponent << 23 | (fraction & 0x7fffff); /* mask
607 off top bit; assumed 1 */
608 }
609 return result;
610}
611
612int evergreen_blit_init(struct radeon_device *rdev) 598int evergreen_blit_init(struct radeon_device *rdev)
613{ 599{
614 u32 obj_size; 600 u32 obj_size;
@@ -617,6 +603,24 @@ int evergreen_blit_init(struct radeon_device *rdev)
617 u32 packet2s[16]; 603 u32 packet2s[16];
618 int num_packet2s = 0; 604 int num_packet2s = 0;
619 605
606 rdev->r600_blit.primitives.set_render_target = set_render_target;
607 rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
608 rdev->r600_blit.primitives.set_shaders = set_shaders;
609 rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
610 rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
611 rdev->r600_blit.primitives.set_scissors = set_scissors;
612 rdev->r600_blit.primitives.draw_auto = draw_auto;
613 rdev->r600_blit.primitives.set_default_state = set_default_state;
614
615 rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
616 rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
617 rdev->r600_blit.ring_size_common += 5; /* done copy */
618 rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
619
620 rdev->r600_blit.ring_size_per_loop = 74;
621
622 rdev->r600_blit.max_dim = 16384;
623
620 /* pin copy shader into vram if already initialized */ 624 /* pin copy shader into vram if already initialized */
621 if (rdev->r600_blit.shader_obj) 625 if (rdev->r600_blit.shader_obj)
622 goto done; 626 goto done;
@@ -712,277 +716,3 @@ done:
712 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 716 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
713 return 0; 717 return 0;
714} 718}
715
716void evergreen_blit_fini(struct radeon_device *rdev)
717{
718 int r;
719
720 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
721 if (rdev->r600_blit.shader_obj == NULL)
722 return;
723 /* If we can't reserve the bo, unref should be enough to destroy
724 * it when it becomes idle.
725 */
726 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
727 if (!r) {
728 radeon_bo_unpin(rdev->r600_blit.shader_obj);
729 radeon_bo_unreserve(rdev->r600_blit.shader_obj);
730 }
731 radeon_bo_unref(&rdev->r600_blit.shader_obj);
732}
733
734static int evergreen_vb_ib_get(struct radeon_device *rdev)
735{
736 int r;
737 r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
738 if (r) {
739 DRM_ERROR("failed to get IB for vertex buffer\n");
740 return r;
741 }
742
743 rdev->r600_blit.vb_total = 64*1024;
744 rdev->r600_blit.vb_used = 0;
745 return 0;
746}
747
748static void evergreen_vb_ib_put(struct radeon_device *rdev)
749{
750 radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
751 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
752}
753
754int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
755{
756 int r;
757 int ring_size, line_size;
758 int max_size;
759 /* loops of emits + fence emit possible */
760 int dwords_per_loop = 74, num_loops;
761
762 r = evergreen_vb_ib_get(rdev);
763 if (r)
764 return r;
765
766 /* 8 bpp vs 32 bpp for xfer unit */
767 if (size_bytes & 3)
768 line_size = 8192;
769 else
770 line_size = 8192 * 4;
771
772 max_size = 8192 * line_size;
773
774 /* major loops cover the max size transfer */
775 num_loops = ((size_bytes + max_size) / max_size);
776 /* minor loops cover the extra non aligned bits */
777 num_loops += ((size_bytes % line_size) ? 1 : 0);
778 /* calculate number of loops correctly */
779 ring_size = num_loops * dwords_per_loop;
780 /* set default + shaders */
781 ring_size += 55; /* shaders + def state */
782 ring_size += 10; /* fence emit for VB IB */
783 ring_size += 5; /* done copy */
784 ring_size += 10; /* fence emit for done copy */
785 r = radeon_ring_lock(rdev, ring_size);
786 if (r)
787 return r;
788
789 set_default_state(rdev); /* 36 */
790 set_shaders(rdev); /* 16 */
791 return 0;
792}
793
794void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
795{
796 int r;
797
798 if (rdev->r600_blit.vb_ib)
799 evergreen_vb_ib_put(rdev);
800
801 if (fence)
802 r = radeon_fence_emit(rdev, fence);
803
804 radeon_ring_unlock_commit(rdev);
805}
806
807void evergreen_kms_blit_copy(struct radeon_device *rdev,
808 u64 src_gpu_addr, u64 dst_gpu_addr,
809 int size_bytes)
810{
811 int max_bytes;
812 u64 vb_gpu_addr;
813 u32 *vb;
814
815 DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
816 size_bytes, rdev->r600_blit.vb_used);
817 vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
818 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
819 max_bytes = 8192;
820
821 while (size_bytes) {
822 int cur_size = size_bytes;
823 int src_x = src_gpu_addr & 255;
824 int dst_x = dst_gpu_addr & 255;
825 int h = 1;
826 src_gpu_addr = src_gpu_addr & ~255ULL;
827 dst_gpu_addr = dst_gpu_addr & ~255ULL;
828
829 if (!src_x && !dst_x) {
830 h = (cur_size / max_bytes);
831 if (h > 8192)
832 h = 8192;
833 if (h == 0)
834 h = 1;
835 else
836 cur_size = max_bytes;
837 } else {
838 if (cur_size > max_bytes)
839 cur_size = max_bytes;
840 if (cur_size > (max_bytes - dst_x))
841 cur_size = (max_bytes - dst_x);
842 if (cur_size > (max_bytes - src_x))
843 cur_size = (max_bytes - src_x);
844 }
845
846 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
847 WARN_ON(1);
848 }
849
850 vb[0] = i2f(dst_x);
851 vb[1] = 0;
852 vb[2] = i2f(src_x);
853 vb[3] = 0;
854
855 vb[4] = i2f(dst_x);
856 vb[5] = i2f(h);
857 vb[6] = i2f(src_x);
858 vb[7] = i2f(h);
859
860 vb[8] = i2f(dst_x + cur_size);
861 vb[9] = i2f(h);
862 vb[10] = i2f(src_x + cur_size);
863 vb[11] = i2f(h);
864
865 /* src 10 */
866 set_tex_resource(rdev, FMT_8,
867 src_x + cur_size, h, src_x + cur_size,
868 src_gpu_addr);
869
870 /* 5 */
871 cp_set_surface_sync(rdev,
872 PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
873
874
875 /* dst 17 */
876 set_render_target(rdev, COLOR_8,
877 dst_x + cur_size, h,
878 dst_gpu_addr);
879
880 /* scissors 12 */
881 set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
882
883 /* 15 */
884 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
885 set_vtx_resource(rdev, vb_gpu_addr);
886
887 /* draw 10 */
888 draw_auto(rdev);
889
890 /* 5 */
891 cp_set_surface_sync(rdev,
892 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
893 cur_size * h, dst_gpu_addr);
894
895 vb += 12;
896 rdev->r600_blit.vb_used += 12 * 4;
897
898 src_gpu_addr += cur_size * h;
899 dst_gpu_addr += cur_size * h;
900 size_bytes -= cur_size * h;
901 }
902 } else {
903 max_bytes = 8192 * 4;
904
905 while (size_bytes) {
906 int cur_size = size_bytes;
907 int src_x = (src_gpu_addr & 255);
908 int dst_x = (dst_gpu_addr & 255);
909 int h = 1;
910 src_gpu_addr = src_gpu_addr & ~255ULL;
911 dst_gpu_addr = dst_gpu_addr & ~255ULL;
912
913 if (!src_x && !dst_x) {
914 h = (cur_size / max_bytes);
915 if (h > 8192)
916 h = 8192;
917 if (h == 0)
918 h = 1;
919 else
920 cur_size = max_bytes;
921 } else {
922 if (cur_size > max_bytes)
923 cur_size = max_bytes;
924 if (cur_size > (max_bytes - dst_x))
925 cur_size = (max_bytes - dst_x);
926 if (cur_size > (max_bytes - src_x))
927 cur_size = (max_bytes - src_x);
928 }
929
930 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
931 WARN_ON(1);
932 }
933
934 vb[0] = i2f(dst_x / 4);
935 vb[1] = 0;
936 vb[2] = i2f(src_x / 4);
937 vb[3] = 0;
938
939 vb[4] = i2f(dst_x / 4);
940 vb[5] = i2f(h);
941 vb[6] = i2f(src_x / 4);
942 vb[7] = i2f(h);
943
944 vb[8] = i2f((dst_x + cur_size) / 4);
945 vb[9] = i2f(h);
946 vb[10] = i2f((src_x + cur_size) / 4);
947 vb[11] = i2f(h);
948
949 /* src 10 */
950 set_tex_resource(rdev, FMT_8_8_8_8,
951 (src_x + cur_size) / 4,
952 h, (src_x + cur_size) / 4,
953 src_gpu_addr);
954 /* 5 */
955 cp_set_surface_sync(rdev,
956 PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
957
958 /* dst 17 */
959 set_render_target(rdev, COLOR_8_8_8_8,
960 (dst_x + cur_size) / 4, h,
961 dst_gpu_addr);
962
963 /* scissors 12 */
964 set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
965
966 /* Vertex buffer setup 15 */
967 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
968 set_vtx_resource(rdev, vb_gpu_addr);
969
970 /* draw 10 */
971 draw_auto(rdev);
972
973 /* 5 */
974 cp_set_surface_sync(rdev,
975 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
976 cur_size * h, dst_gpu_addr);
977
978 /* 74 ring dwords per loop */
979 vb += 12;
980 rdev->r600_blit.vb_used += 12 * 4;
981
982 src_gpu_addr += cur_size * h;
983 dst_gpu_addr += cur_size * h;
984 size_bytes -= cur_size * h;
985 }
986 }
987}
988