aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/evergreen_blit_kms.c
diff options
context:
space:
mode:
authorIlija Hadzic <ihadzic@research.bell-labs.com>2011-10-12 23:29:34 -0400
committerDave Airlie <airlied@redhat.com>2011-10-18 05:06:04 -0400
commiteb32d0c34e64666b5f9d9c040ac85d96ecd1e6ee (patch)
tree5071f5173acb19e857ef68100125982525eb8750 /drivers/gpu/drm/radeon/evergreen_blit_kms.c
parent43e5f612578e80b0b0f0122c0a42d2c71faea580 (diff)
drm/radeon/kms: simplify evergreen blit code
Covert 4k pages to multiples of 64x64x4 tiles. This is also more efficient than a scanline based approach from the MC's perspective. Signed-off-by: Alex Deucher <alexdeucher@gmail.com> Signed-off-by: Ilija Hadzic <ihadzic@research.bell-labs.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_blit_kms.c')
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c299
1 files changed, 121 insertions, 178 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 7eb78b3b30b7..f073dc9af116 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -44,6 +44,10 @@
44#define COLOR_5_6_5 0x8 44#define COLOR_5_6_5 0x8
45#define COLOR_8_8_8_8 0x1a 45#define COLOR_8_8_8_8 0x1a
46 46
47#define RECT_UNIT_H 32
48#define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H)
49#define MAX_RECT_DIM 16384
50
47/* emits 17 */ 51/* emits 17 */
48static void 52static void
49set_render_target(struct radeon_device *rdev, int format, 53set_render_target(struct radeon_device *rdev, int format,
@@ -56,7 +60,7 @@ set_render_target(struct radeon_device *rdev, int format,
56 if (h < 8) 60 if (h < 8)
57 h = 8; 61 h = 8;
58 62
59 cb_color_info = ((format << 2) | (1 << 24) | (1 << 8)); 63 cb_color_info = ((format << 2) | (1 << 24) | (2 << 8));
60 pitch = (w / 8) - 1; 64 pitch = (w / 8) - 1;
61 slice = ((w * h) / 64) - 1; 65 slice = ((w * h) / 64) - 1;
62 66
@@ -67,7 +71,7 @@ set_render_target(struct radeon_device *rdev, int format,
67 radeon_ring_write(rdev, slice); 71 radeon_ring_write(rdev, slice);
68 radeon_ring_write(rdev, 0); 72 radeon_ring_write(rdev, 0);
69 radeon_ring_write(rdev, cb_color_info); 73 radeon_ring_write(rdev, cb_color_info);
70 radeon_ring_write(rdev, (1 << 4)); 74 radeon_ring_write(rdev, 0);
71 radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); 75 radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
72 radeon_ring_write(rdev, 0); 76 radeon_ring_write(rdev, 0);
73 radeon_ring_write(rdev, 0); 77 radeon_ring_write(rdev, 0);
@@ -179,7 +183,7 @@ set_tex_resource(struct radeon_device *rdev,
179 sq_tex_resource_word0 = (1 << 0); /* 2D */ 183 sq_tex_resource_word0 = (1 << 0); /* 2D */
180 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | 184 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
181 ((w - 1) << 18)); 185 ((w - 1) << 18));
182 sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28); 186 sq_tex_resource_word1 = ((h - 1) << 0) | (2 << 28);
183 /* xyzw swizzles */ 187 /* xyzw swizzles */
184 sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25); 188 sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
185 189
@@ -751,30 +755,80 @@ static void evergreen_vb_ib_put(struct radeon_device *rdev)
751 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); 755 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
752} 756}
753 757
754int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) 758
759/* maps the rectangle to the buffer so that satisfies the following properties:
760 * - dimensions are less or equal to the hardware limit (MAX_RECT_DIM)
761 * - rectangle consists of integer number of pages
762 * - height is an integer multiple of RECT_UNIT_H
763 * - width is an integer multiple of RECT_UNIT_W
764 * - (the above three conditions also guarantee tile-aligned size)
765 * - it is as square as possible (sides ratio never greater than 2:1)
766 * - uses maximum number of pages that fit the above constraints
767 *
768 * input: buffer size, pointers to width/height variables
769 * return: number of pages that were successfully mapped to the rectangle
770 * width/height of the rectangle
771 */
772static unsigned evergreen_blit_create_rect(unsigned num_pages, int *width, int *height)
773{
774 unsigned max_pages;
775 unsigned pages = num_pages;
776 int w, h;
777
778 if (num_pages == 0) {
779 /* not supposed to be called with no pages, but just in case */
780 h = 0;
781 w = 0;
782 pages = 0;
783 WARN_ON(1);
784 } else {
785 int rect_order = 2;
786 h = RECT_UNIT_H;
787 while (num_pages / rect_order) {
788 h *= 2;
789 rect_order *= 4;
790 if (h >= MAX_RECT_DIM) {
791 h = MAX_RECT_DIM;
792 break;
793 }
794 }
795 max_pages = (MAX_RECT_DIM * h) / (RECT_UNIT_W * RECT_UNIT_H);
796 if (pages > max_pages)
797 pages = max_pages;
798 w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
799 w = (w / RECT_UNIT_W) * RECT_UNIT_W;
800 pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
801 BUG_ON(pages == 0);
802 }
803
804
805 DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
806
807 /* return width and height only of the caller wants it */
808 if (height)
809 *height = h;
810 if (width)
811 *width = w;
812
813 return pages;
814}
815
816int evergreen_blit_prepare_copy(struct radeon_device *rdev, unsigned num_pages)
755{ 817{
756 int r; 818 int r;
757 int ring_size, line_size; 819 int ring_size;
758 int max_size;
759 /* loops of emits + fence emit possible */ 820 /* loops of emits + fence emit possible */
760 int dwords_per_loop = 74, num_loops; 821 int dwords_per_loop = 74, num_loops = 0;
761 822
762 r = evergreen_vb_ib_get(rdev); 823 r = evergreen_vb_ib_get(rdev);
763 if (r) 824 if (r)
764 return r; 825 return r;
765 826
766 /* 8 bpp vs 32 bpp for xfer unit */ 827 /* num loops */
767 if (size_bytes & 3) 828 while (num_pages) {
768 line_size = 8192; 829 num_pages -= evergreen_blit_create_rect(num_pages, NULL, NULL);
769 else 830 num_loops++;
770 line_size = 8192 * 4; 831 }
771
772 max_size = 8192 * line_size;
773
774 /* major loops cover the max size transfer */
775 num_loops = ((size_bytes + max_size) / max_size);
776 /* minor loops cover the extra non aligned bits */
777 num_loops += ((size_bytes % line_size) ? 1 : 0);
778 /* calculate number of loops correctly */ 832 /* calculate number of loops correctly */
779 ring_size = num_loops * dwords_per_loop; 833 ring_size = num_loops * dwords_per_loop;
780 /* set default + shaders */ 834 /* set default + shaders */
@@ -806,183 +860,72 @@ void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *f
806 860
807void evergreen_kms_blit_copy(struct radeon_device *rdev, 861void evergreen_kms_blit_copy(struct radeon_device *rdev,
808 u64 src_gpu_addr, u64 dst_gpu_addr, 862 u64 src_gpu_addr, u64 dst_gpu_addr,
809 int size_bytes) 863 unsigned num_pages)
810{ 864{
811 int max_bytes;
812 u64 vb_gpu_addr; 865 u64 vb_gpu_addr;
813 u32 *vb; 866 u32 *vb;
814 867
815 DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, 868 DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
816 size_bytes, rdev->r600_blit.vb_used); 869 num_pages, rdev->r600_blit.vb_used);
817 vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); 870 vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
818 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
819 max_bytes = 8192;
820
821 while (size_bytes) {
822 int cur_size = size_bytes;
823 int src_x = src_gpu_addr & 255;
824 int dst_x = dst_gpu_addr & 255;
825 int h = 1;
826 src_gpu_addr = src_gpu_addr & ~255ULL;
827 dst_gpu_addr = dst_gpu_addr & ~255ULL;
828
829 if (!src_x && !dst_x) {
830 h = (cur_size / max_bytes);
831 if (h > 8192)
832 h = 8192;
833 if (h == 0)
834 h = 1;
835 else
836 cur_size = max_bytes;
837 } else {
838 if (cur_size > max_bytes)
839 cur_size = max_bytes;
840 if (cur_size > (max_bytes - dst_x))
841 cur_size = (max_bytes - dst_x);
842 if (cur_size > (max_bytes - src_x))
843 cur_size = (max_bytes - src_x);
844 }
845 871
846 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { 872 while (num_pages) {
847 WARN_ON(1); 873 int w, h;
848 } 874 unsigned size_in_bytes;
875 unsigned pages_per_loop = evergreen_blit_create_rect(num_pages, &w, &h);
849 876
850 vb[0] = i2f(dst_x); 877 size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
851 vb[1] = 0; 878 DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
852 vb[2] = i2f(src_x);
853 vb[3] = 0;
854 879
855 vb[4] = i2f(dst_x); 880 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
856 vb[5] = i2f(h); 881 WARN_ON(1);
857 vb[6] = i2f(src_x); 882 }
858 vb[7] = i2f(h);
859
860 vb[8] = i2f(dst_x + cur_size);
861 vb[9] = i2f(h);
862 vb[10] = i2f(src_x + cur_size);
863 vb[11] = i2f(h);
864
865 /* src 10 */
866 set_tex_resource(rdev, FMT_8,
867 src_x + cur_size, h, src_x + cur_size,
868 src_gpu_addr);
869
870 /* 5 */
871 cp_set_surface_sync(rdev,
872 PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
873 883
884 vb[0] = 0;
885 vb[1] = 0;
886 vb[2] = 0;
887 vb[3] = 0;
874 888
875 /* dst 17 */ 889 vb[4] = 0;
876 set_render_target(rdev, COLOR_8, 890 vb[5] = i2f(h);
877 dst_x + cur_size, h, 891 vb[6] = 0;
878 dst_gpu_addr); 892 vb[7] = i2f(h);
879 893
880 /* scissors 12 */ 894 vb[8] = i2f(w);
881 set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); 895 vb[9] = i2f(h);
896 vb[10] = i2f(w);
897 vb[11] = i2f(h);
882 898
883 /* 15 */ 899 /* src 10 */
884 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; 900 set_tex_resource(rdev, FMT_8_8_8_8, w, h, w, src_gpu_addr);
885 set_vtx_resource(rdev, vb_gpu_addr);
886 901
887 /* draw 10 */ 902 /* 5 */
888 draw_auto(rdev); 903 cp_set_surface_sync(rdev,
904 PACKET3_TC_ACTION_ENA, size_in_bytes, src_gpu_addr);
889 905
890 /* 5 */ 906 /* dst 17 */
891 cp_set_surface_sync(rdev, 907 set_render_target(rdev, COLOR_8_8_8_8, w, h, dst_gpu_addr);
892 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
893 cur_size * h, dst_gpu_addr);
894 908
895 vb += 12; 909 /* scissors 12 */
896 rdev->r600_blit.vb_used += 12 * 4; 910 set_scissors(rdev, 0, 0, w, h);
897 911
898 src_gpu_addr += cur_size * h; 912 /* Vertex buffer setup 15 */
899 dst_gpu_addr += cur_size * h; 913 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
900 size_bytes -= cur_size * h; 914 set_vtx_resource(rdev, vb_gpu_addr);
901 }
902 } else {
903 max_bytes = 8192 * 4;
904
905 while (size_bytes) {
906 int cur_size = size_bytes;
907 int src_x = (src_gpu_addr & 255);
908 int dst_x = (dst_gpu_addr & 255);
909 int h = 1;
910 src_gpu_addr = src_gpu_addr & ~255ULL;
911 dst_gpu_addr = dst_gpu_addr & ~255ULL;
912
913 if (!src_x && !dst_x) {
914 h = (cur_size / max_bytes);
915 if (h > 8192)
916 h = 8192;
917 if (h == 0)
918 h = 1;
919 else
920 cur_size = max_bytes;
921 } else {
922 if (cur_size > max_bytes)
923 cur_size = max_bytes;
924 if (cur_size > (max_bytes - dst_x))
925 cur_size = (max_bytes - dst_x);
926 if (cur_size > (max_bytes - src_x))
927 cur_size = (max_bytes - src_x);
928 }
929 915
930 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { 916 /* draw 10 */
931 WARN_ON(1); 917 draw_auto(rdev);
932 }
933 918
934 vb[0] = i2f(dst_x / 4); 919 /* 5 */
935 vb[1] = 0; 920 cp_set_surface_sync(rdev,
936 vb[2] = i2f(src_x / 4); 921 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
937 vb[3] = 0; 922 size_in_bytes, dst_gpu_addr);
938 923
939 vb[4] = i2f(dst_x / 4); 924 /* 74 ring dwords per loop */
940 vb[5] = i2f(h); 925 vb += 12;
941 vb[6] = i2f(src_x / 4); 926 rdev->r600_blit.vb_used += 4*12;
942 vb[7] = i2f(h); 927 src_gpu_addr += size_in_bytes;
943 928 dst_gpu_addr += size_in_bytes;
944 vb[8] = i2f((dst_x + cur_size) / 4); 929 num_pages -= pages_per_loop;
945 vb[9] = i2f(h);
946 vb[10] = i2f((src_x + cur_size) / 4);
947 vb[11] = i2f(h);
948
949 /* src 10 */
950 set_tex_resource(rdev, FMT_8_8_8_8,
951 (src_x + cur_size) / 4,
952 h, (src_x + cur_size) / 4,
953 src_gpu_addr);
954 /* 5 */
955 cp_set_surface_sync(rdev,
956 PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
957
958 /* dst 17 */
959 set_render_target(rdev, COLOR_8_8_8_8,
960 (dst_x + cur_size) / 4, h,
961 dst_gpu_addr);
962
963 /* scissors 12 */
964 set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
965
966 /* Vertex buffer setup 15 */
967 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
968 set_vtx_resource(rdev, vb_gpu_addr);
969
970 /* draw 10 */
971 draw_auto(rdev);
972
973 /* 5 */
974 cp_set_surface_sync(rdev,
975 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
976 cur_size * h, dst_gpu_addr);
977
978 /* 74 ring dwords per loop */
979 vb += 12;
980 rdev->r600_blit.vb_used += 12 * 4;
981
982 src_gpu_addr += cur_size * h;
983 dst_gpu_addr += cur_size * h;
984 size_bytes -= cur_size * h;
985 }
986 } 930 }
987} 931}
988