diff options
author | Ilija Hadzic <ihadzic@research.bell-labs.com> | 2011-10-12 23:29:34 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2011-10-18 05:06:04 -0400 |
commit | eb32d0c34e64666b5f9d9c040ac85d96ecd1e6ee (patch) | |
tree | 5071f5173acb19e857ef68100125982525eb8750 /drivers/gpu/drm/radeon/evergreen_blit_kms.c | |
parent | 43e5f612578e80b0b0f0122c0a42d2c71faea580 (diff) |
drm/radeon/kms: simplify evergreen blit code
Covert 4k pages to multiples of 64x64x4 tiles.
This is also more efficient than a scanline based
approach from the MC's perspective.
Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Ilija Hadzic <ihadzic@research.bell-labs.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_blit_kms.c')
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_blit_kms.c | 299 |
1 files changed, 121 insertions, 178 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 7eb78b3b30b7..f073dc9af116 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c | |||
@@ -44,6 +44,10 @@ | |||
44 | #define COLOR_5_6_5 0x8 | 44 | #define COLOR_5_6_5 0x8 |
45 | #define COLOR_8_8_8_8 0x1a | 45 | #define COLOR_8_8_8_8 0x1a |
46 | 46 | ||
47 | #define RECT_UNIT_H 32 | ||
48 | #define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H) | ||
49 | #define MAX_RECT_DIM 16384 | ||
50 | |||
47 | /* emits 17 */ | 51 | /* emits 17 */ |
48 | static void | 52 | static void |
49 | set_render_target(struct radeon_device *rdev, int format, | 53 | set_render_target(struct radeon_device *rdev, int format, |
@@ -56,7 +60,7 @@ set_render_target(struct radeon_device *rdev, int format, | |||
56 | if (h < 8) | 60 | if (h < 8) |
57 | h = 8; | 61 | h = 8; |
58 | 62 | ||
59 | cb_color_info = ((format << 2) | (1 << 24) | (1 << 8)); | 63 | cb_color_info = ((format << 2) | (1 << 24) | (2 << 8)); |
60 | pitch = (w / 8) - 1; | 64 | pitch = (w / 8) - 1; |
61 | slice = ((w * h) / 64) - 1; | 65 | slice = ((w * h) / 64) - 1; |
62 | 66 | ||
@@ -67,7 +71,7 @@ set_render_target(struct radeon_device *rdev, int format, | |||
67 | radeon_ring_write(rdev, slice); | 71 | radeon_ring_write(rdev, slice); |
68 | radeon_ring_write(rdev, 0); | 72 | radeon_ring_write(rdev, 0); |
69 | radeon_ring_write(rdev, cb_color_info); | 73 | radeon_ring_write(rdev, cb_color_info); |
70 | radeon_ring_write(rdev, (1 << 4)); | 74 | radeon_ring_write(rdev, 0); |
71 | radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); | 75 | radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); |
72 | radeon_ring_write(rdev, 0); | 76 | radeon_ring_write(rdev, 0); |
73 | radeon_ring_write(rdev, 0); | 77 | radeon_ring_write(rdev, 0); |
@@ -179,7 +183,7 @@ set_tex_resource(struct radeon_device *rdev, | |||
179 | sq_tex_resource_word0 = (1 << 0); /* 2D */ | 183 | sq_tex_resource_word0 = (1 << 0); /* 2D */ |
180 | sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | | 184 | sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | |
181 | ((w - 1) << 18)); | 185 | ((w - 1) << 18)); |
182 | sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28); | 186 | sq_tex_resource_word1 = ((h - 1) << 0) | (2 << 28); |
183 | /* xyzw swizzles */ | 187 | /* xyzw swizzles */ |
184 | sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25); | 188 | sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25); |
185 | 189 | ||
@@ -751,30 +755,80 @@ static void evergreen_vb_ib_put(struct radeon_device *rdev) | |||
751 | radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); | 755 | radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); |
752 | } | 756 | } |
753 | 757 | ||
754 | int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) | 758 | |
759 | /* maps the rectangle to the buffer so that satisfies the following properties: | ||
760 | * - dimensions are less or equal to the hardware limit (MAX_RECT_DIM) | ||
761 | * - rectangle consists of integer number of pages | ||
762 | * - height is an integer multiple of RECT_UNIT_H | ||
763 | * - width is an integer multiple of RECT_UNIT_W | ||
764 | * - (the above three conditions also guarantee tile-aligned size) | ||
765 | * - it is as square as possible (sides ratio never greater than 2:1) | ||
766 | * - uses maximum number of pages that fit the above constraints | ||
767 | * | ||
768 | * input: buffer size, pointers to width/height variables | ||
769 | * return: number of pages that were successfully mapped to the rectangle | ||
770 | * width/height of the rectangle | ||
771 | */ | ||
772 | static unsigned evergreen_blit_create_rect(unsigned num_pages, int *width, int *height) | ||
773 | { | ||
774 | unsigned max_pages; | ||
775 | unsigned pages = num_pages; | ||
776 | int w, h; | ||
777 | |||
778 | if (num_pages == 0) { | ||
779 | /* not supposed to be called with no pages, but just in case */ | ||
780 | h = 0; | ||
781 | w = 0; | ||
782 | pages = 0; | ||
783 | WARN_ON(1); | ||
784 | } else { | ||
785 | int rect_order = 2; | ||
786 | h = RECT_UNIT_H; | ||
787 | while (num_pages / rect_order) { | ||
788 | h *= 2; | ||
789 | rect_order *= 4; | ||
790 | if (h >= MAX_RECT_DIM) { | ||
791 | h = MAX_RECT_DIM; | ||
792 | break; | ||
793 | } | ||
794 | } | ||
795 | max_pages = (MAX_RECT_DIM * h) / (RECT_UNIT_W * RECT_UNIT_H); | ||
796 | if (pages > max_pages) | ||
797 | pages = max_pages; | ||
798 | w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h; | ||
799 | w = (w / RECT_UNIT_W) * RECT_UNIT_W; | ||
800 | pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H); | ||
801 | BUG_ON(pages == 0); | ||
802 | } | ||
803 | |||
804 | |||
805 | DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages); | ||
806 | |||
807 | /* return width and height only of the caller wants it */ | ||
808 | if (height) | ||
809 | *height = h; | ||
810 | if (width) | ||
811 | *width = w; | ||
812 | |||
813 | return pages; | ||
814 | } | ||
815 | |||
816 | int evergreen_blit_prepare_copy(struct radeon_device *rdev, unsigned num_pages) | ||
755 | { | 817 | { |
756 | int r; | 818 | int r; |
757 | int ring_size, line_size; | 819 | int ring_size; |
758 | int max_size; | ||
759 | /* loops of emits + fence emit possible */ | 820 | /* loops of emits + fence emit possible */ |
760 | int dwords_per_loop = 74, num_loops; | 821 | int dwords_per_loop = 74, num_loops = 0; |
761 | 822 | ||
762 | r = evergreen_vb_ib_get(rdev); | 823 | r = evergreen_vb_ib_get(rdev); |
763 | if (r) | 824 | if (r) |
764 | return r; | 825 | return r; |
765 | 826 | ||
766 | /* 8 bpp vs 32 bpp for xfer unit */ | 827 | /* num loops */ |
767 | if (size_bytes & 3) | 828 | while (num_pages) { |
768 | line_size = 8192; | 829 | num_pages -= evergreen_blit_create_rect(num_pages, NULL, NULL); |
769 | else | 830 | num_loops++; |
770 | line_size = 8192 * 4; | 831 | } |
771 | |||
772 | max_size = 8192 * line_size; | ||
773 | |||
774 | /* major loops cover the max size transfer */ | ||
775 | num_loops = ((size_bytes + max_size) / max_size); | ||
776 | /* minor loops cover the extra non aligned bits */ | ||
777 | num_loops += ((size_bytes % line_size) ? 1 : 0); | ||
778 | /* calculate number of loops correctly */ | 832 | /* calculate number of loops correctly */ |
779 | ring_size = num_loops * dwords_per_loop; | 833 | ring_size = num_loops * dwords_per_loop; |
780 | /* set default + shaders */ | 834 | /* set default + shaders */ |
@@ -806,183 +860,72 @@ void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *f | |||
806 | 860 | ||
807 | void evergreen_kms_blit_copy(struct radeon_device *rdev, | 861 | void evergreen_kms_blit_copy(struct radeon_device *rdev, |
808 | u64 src_gpu_addr, u64 dst_gpu_addr, | 862 | u64 src_gpu_addr, u64 dst_gpu_addr, |
809 | int size_bytes) | 863 | unsigned num_pages) |
810 | { | 864 | { |
811 | int max_bytes; | ||
812 | u64 vb_gpu_addr; | 865 | u64 vb_gpu_addr; |
813 | u32 *vb; | 866 | u32 *vb; |
814 | 867 | ||
815 | DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, | 868 | DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, |
816 | size_bytes, rdev->r600_blit.vb_used); | 869 | num_pages, rdev->r600_blit.vb_used); |
817 | vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); | 870 | vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); |
818 | if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { | ||
819 | max_bytes = 8192; | ||
820 | |||
821 | while (size_bytes) { | ||
822 | int cur_size = size_bytes; | ||
823 | int src_x = src_gpu_addr & 255; | ||
824 | int dst_x = dst_gpu_addr & 255; | ||
825 | int h = 1; | ||
826 | src_gpu_addr = src_gpu_addr & ~255ULL; | ||
827 | dst_gpu_addr = dst_gpu_addr & ~255ULL; | ||
828 | |||
829 | if (!src_x && !dst_x) { | ||
830 | h = (cur_size / max_bytes); | ||
831 | if (h > 8192) | ||
832 | h = 8192; | ||
833 | if (h == 0) | ||
834 | h = 1; | ||
835 | else | ||
836 | cur_size = max_bytes; | ||
837 | } else { | ||
838 | if (cur_size > max_bytes) | ||
839 | cur_size = max_bytes; | ||
840 | if (cur_size > (max_bytes - dst_x)) | ||
841 | cur_size = (max_bytes - dst_x); | ||
842 | if (cur_size > (max_bytes - src_x)) | ||
843 | cur_size = (max_bytes - src_x); | ||
844 | } | ||
845 | 871 | ||
846 | if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { | 872 | while (num_pages) { |
847 | WARN_ON(1); | 873 | int w, h; |
848 | } | 874 | unsigned size_in_bytes; |
875 | unsigned pages_per_loop = evergreen_blit_create_rect(num_pages, &w, &h); | ||
849 | 876 | ||
850 | vb[0] = i2f(dst_x); | 877 | size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE; |
851 | vb[1] = 0; | 878 | DRM_DEBUG("rectangle w=%d h=%d\n", w, h); |
852 | vb[2] = i2f(src_x); | ||
853 | vb[3] = 0; | ||
854 | 879 | ||
855 | vb[4] = i2f(dst_x); | 880 | if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { |
856 | vb[5] = i2f(h); | 881 | WARN_ON(1); |
857 | vb[6] = i2f(src_x); | 882 | } |
858 | vb[7] = i2f(h); | ||
859 | |||
860 | vb[8] = i2f(dst_x + cur_size); | ||
861 | vb[9] = i2f(h); | ||
862 | vb[10] = i2f(src_x + cur_size); | ||
863 | vb[11] = i2f(h); | ||
864 | |||
865 | /* src 10 */ | ||
866 | set_tex_resource(rdev, FMT_8, | ||
867 | src_x + cur_size, h, src_x + cur_size, | ||
868 | src_gpu_addr); | ||
869 | |||
870 | /* 5 */ | ||
871 | cp_set_surface_sync(rdev, | ||
872 | PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); | ||
873 | 883 | ||
884 | vb[0] = 0; | ||
885 | vb[1] = 0; | ||
886 | vb[2] = 0; | ||
887 | vb[3] = 0; | ||
874 | 888 | ||
875 | /* dst 17 */ | 889 | vb[4] = 0; |
876 | set_render_target(rdev, COLOR_8, | 890 | vb[5] = i2f(h); |
877 | dst_x + cur_size, h, | 891 | vb[6] = 0; |
878 | dst_gpu_addr); | 892 | vb[7] = i2f(h); |
879 | 893 | ||
880 | /* scissors 12 */ | 894 | vb[8] = i2f(w); |
881 | set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); | 895 | vb[9] = i2f(h); |
896 | vb[10] = i2f(w); | ||
897 | vb[11] = i2f(h); | ||
882 | 898 | ||
883 | /* 15 */ | 899 | /* src 10 */ |
884 | vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; | 900 | set_tex_resource(rdev, FMT_8_8_8_8, w, h, w, src_gpu_addr); |
885 | set_vtx_resource(rdev, vb_gpu_addr); | ||
886 | 901 | ||
887 | /* draw 10 */ | 902 | /* 5 */ |
888 | draw_auto(rdev); | 903 | cp_set_surface_sync(rdev, |
904 | PACKET3_TC_ACTION_ENA, size_in_bytes, src_gpu_addr); | ||
889 | 905 | ||
890 | /* 5 */ | 906 | /* dst 17 */ |
891 | cp_set_surface_sync(rdev, | 907 | set_render_target(rdev, COLOR_8_8_8_8, w, h, dst_gpu_addr); |
892 | PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, | ||
893 | cur_size * h, dst_gpu_addr); | ||
894 | 908 | ||
895 | vb += 12; | 909 | /* scissors 12 */ |
896 | rdev->r600_blit.vb_used += 12 * 4; | 910 | set_scissors(rdev, 0, 0, w, h); |
897 | 911 | ||
898 | src_gpu_addr += cur_size * h; | 912 | /* Vertex buffer setup 15 */ |
899 | dst_gpu_addr += cur_size * h; | 913 | vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; |
900 | size_bytes -= cur_size * h; | 914 | set_vtx_resource(rdev, vb_gpu_addr); |
901 | } | ||
902 | } else { | ||
903 | max_bytes = 8192 * 4; | ||
904 | |||
905 | while (size_bytes) { | ||
906 | int cur_size = size_bytes; | ||
907 | int src_x = (src_gpu_addr & 255); | ||
908 | int dst_x = (dst_gpu_addr & 255); | ||
909 | int h = 1; | ||
910 | src_gpu_addr = src_gpu_addr & ~255ULL; | ||
911 | dst_gpu_addr = dst_gpu_addr & ~255ULL; | ||
912 | |||
913 | if (!src_x && !dst_x) { | ||
914 | h = (cur_size / max_bytes); | ||
915 | if (h > 8192) | ||
916 | h = 8192; | ||
917 | if (h == 0) | ||
918 | h = 1; | ||
919 | else | ||
920 | cur_size = max_bytes; | ||
921 | } else { | ||
922 | if (cur_size > max_bytes) | ||
923 | cur_size = max_bytes; | ||
924 | if (cur_size > (max_bytes - dst_x)) | ||
925 | cur_size = (max_bytes - dst_x); | ||
926 | if (cur_size > (max_bytes - src_x)) | ||
927 | cur_size = (max_bytes - src_x); | ||
928 | } | ||
929 | 915 | ||
930 | if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { | 916 | /* draw 10 */ |
931 | WARN_ON(1); | 917 | draw_auto(rdev); |
932 | } | ||
933 | 918 | ||
934 | vb[0] = i2f(dst_x / 4); | 919 | /* 5 */ |
935 | vb[1] = 0; | 920 | cp_set_surface_sync(rdev, |
936 | vb[2] = i2f(src_x / 4); | 921 | PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, |
937 | vb[3] = 0; | 922 | size_in_bytes, dst_gpu_addr); |
938 | 923 | ||
939 | vb[4] = i2f(dst_x / 4); | 924 | /* 74 ring dwords per loop */ |
940 | vb[5] = i2f(h); | 925 | vb += 12; |
941 | vb[6] = i2f(src_x / 4); | 926 | rdev->r600_blit.vb_used += 4*12; |
942 | vb[7] = i2f(h); | 927 | src_gpu_addr += size_in_bytes; |
943 | 928 | dst_gpu_addr += size_in_bytes; | |
944 | vb[8] = i2f((dst_x + cur_size) / 4); | 929 | num_pages -= pages_per_loop; |
945 | vb[9] = i2f(h); | ||
946 | vb[10] = i2f((src_x + cur_size) / 4); | ||
947 | vb[11] = i2f(h); | ||
948 | |||
949 | /* src 10 */ | ||
950 | set_tex_resource(rdev, FMT_8_8_8_8, | ||
951 | (src_x + cur_size) / 4, | ||
952 | h, (src_x + cur_size) / 4, | ||
953 | src_gpu_addr); | ||
954 | /* 5 */ | ||
955 | cp_set_surface_sync(rdev, | ||
956 | PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); | ||
957 | |||
958 | /* dst 17 */ | ||
959 | set_render_target(rdev, COLOR_8_8_8_8, | ||
960 | (dst_x + cur_size) / 4, h, | ||
961 | dst_gpu_addr); | ||
962 | |||
963 | /* scissors 12 */ | ||
964 | set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); | ||
965 | |||
966 | /* Vertex buffer setup 15 */ | ||
967 | vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; | ||
968 | set_vtx_resource(rdev, vb_gpu_addr); | ||
969 | |||
970 | /* draw 10 */ | ||
971 | draw_auto(rdev); | ||
972 | |||
973 | /* 5 */ | ||
974 | cp_set_surface_sync(rdev, | ||
975 | PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, | ||
976 | cur_size * h, dst_gpu_addr); | ||
977 | |||
978 | /* 74 ring dwords per loop */ | ||
979 | vb += 12; | ||
980 | rdev->r600_blit.vb_used += 12 * 4; | ||
981 | |||
982 | src_gpu_addr += cur_size * h; | ||
983 | dst_gpu_addr += cur_size * h; | ||
984 | size_bytes -= cur_size * h; | ||
985 | } | ||
986 | } | 930 | } |
987 | } | 931 | } |
988 | |||