aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r600_blit_kms.c
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2011-05-17 05:09:43 -0400
committerDave Airlie <airlied@redhat.com>2011-10-18 05:06:22 -0400
commit7dbf41db327ebcbc0d28a2b39afcbd60664094e6 (patch)
tree67490b6a21710f5363c3e4051f60297ea2ff585b /drivers/gpu/drm/radeon/r600_blit_kms.c
parenteb32d0c34e64666b5f9d9c040ac85d96ecd1e6ee (diff)
drm/radeon/kms: simplify r6xx blit code
Covert 4k pages to multiples of 64x64x4 tiles. This is also more efficient than a scanline based approach from the MC's perspective. Signed-off-by: Alex Deucher <alexdeucher@gmail.com> Signed-off-by: Ilija Hadzic <ihadzic@research.bell-labs.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_blit_kms.c')
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c280
1 files changed, 107 insertions, 173 deletions
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index bbbafe685543..3940be619af7 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -42,6 +42,10 @@
42#define COLOR_5_6_5 0x8 42#define COLOR_5_6_5 0x8
43#define COLOR_8_8_8_8 0x1a 43#define COLOR_8_8_8_8 0x1a
44 44
45#define RECT_UNIT_H 32
46#define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H)
47#define MAX_RECT_DIM 8192
48
45/* emits 21 on rv770+, 23 on r600 */ 49/* emits 21 on rv770+, 23 on r600 */
46static void 50static void
47set_render_target(struct radeon_device *rdev, int format, 51set_render_target(struct radeon_device *rdev, int format,
@@ -600,13 +604,59 @@ static void r600_vb_ib_put(struct radeon_device *rdev)
600 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); 604 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
601} 605}
602 606
603int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) 607/* FIXME: the function is very similar to evergreen_blit_create_rect, except
608 that it different predefined constants; consider commonizing */
609static unsigned r600_blit_create_rect(unsigned num_pages, int *width, int *height)
610{
611 unsigned max_pages;
612 unsigned pages = num_pages;
613 int w, h;
614
615 if (num_pages == 0) {
616 /* not supposed to be called with no pages, but just in case */
617 h = 0;
618 w = 0;
619 pages = 0;
620 WARN_ON(1);
621 } else {
622 int rect_order = 2;
623 h = RECT_UNIT_H;
624 while (num_pages / rect_order) {
625 h *= 2;
626 rect_order *= 4;
627 if (h >= MAX_RECT_DIM) {
628 h = MAX_RECT_DIM;
629 break;
630 }
631 }
632 max_pages = (MAX_RECT_DIM * h) / (RECT_UNIT_W * RECT_UNIT_H);
633 if (pages > max_pages)
634 pages = max_pages;
635 w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
636 w = (w / RECT_UNIT_W) * RECT_UNIT_W;
637 pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
638 BUG_ON(pages == 0);
639 }
640
641
642 DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
643
644 /* return width and height only of the caller wants it */
645 if (height)
646 *height = h;
647 if (width)
648 *width = w;
649
650 return pages;
651}
652
653
654int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_pages)
604{ 655{
605 int r; 656 int r;
606 int ring_size, line_size; 657 int ring_size;
607 int max_size;
608 /* loops of emits 64 + fence emit possible */ 658 /* loops of emits 64 + fence emit possible */
609 int dwords_per_loop = 76, num_loops; 659 int dwords_per_loop = 76, num_loops = 0;
610 660
611 r = r600_vb_ib_get(rdev); 661 r = r600_vb_ib_get(rdev);
612 if (r) 662 if (r)
@@ -616,18 +666,12 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
616 if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) 666 if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
617 dwords_per_loop += 2; 667 dwords_per_loop += 2;
618 668
619 /* 8 bpp vs 32 bpp for xfer unit */ 669 /* num loops */
620 if (size_bytes & 3) 670 while (num_pages) {
621 line_size = 8192; 671 num_pages -= r600_blit_create_rect(num_pages, NULL, NULL);
622 else 672 num_loops++;
623 line_size = 8192*4; 673 }
624
625 max_size = 8192 * line_size;
626 674
627 /* major loops cover the max size transfer */
628 num_loops = ((size_bytes + max_size) / max_size);
629 /* minor loops cover the extra non aligned bits */
630 num_loops += ((size_bytes % line_size) ? 1 : 0);
631 /* calculate number of loops correctly */ 675 /* calculate number of loops correctly */
632 ring_size = num_loops * dwords_per_loop; 676 ring_size = num_loops * dwords_per_loop;
633 /* set default + shaders */ 677 /* set default + shaders */
@@ -659,182 +703,72 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
659 703
660void r600_kms_blit_copy(struct radeon_device *rdev, 704void r600_kms_blit_copy(struct radeon_device *rdev,
661 u64 src_gpu_addr, u64 dst_gpu_addr, 705 u64 src_gpu_addr, u64 dst_gpu_addr,
662 int size_bytes) 706 unsigned num_pages)
663{ 707{
664 int max_bytes;
665 u64 vb_gpu_addr; 708 u64 vb_gpu_addr;
666 u32 *vb; 709 u32 *vb;
667 710
668 DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, 711 DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
669 size_bytes, rdev->r600_blit.vb_used); 712 num_pages, rdev->r600_blit.vb_used);
670 vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); 713 vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
671 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
672 max_bytes = 8192;
673
674 while (size_bytes) {
675 int cur_size = size_bytes;
676 int src_x = src_gpu_addr & 255;
677 int dst_x = dst_gpu_addr & 255;
678 int h = 1;
679 src_gpu_addr = src_gpu_addr & ~255ULL;
680 dst_gpu_addr = dst_gpu_addr & ~255ULL;
681
682 if (!src_x && !dst_x) {
683 h = (cur_size / max_bytes);
684 if (h > 8192)
685 h = 8192;
686 if (h == 0)
687 h = 1;
688 else
689 cur_size = max_bytes;
690 } else {
691 if (cur_size > max_bytes)
692 cur_size = max_bytes;
693 if (cur_size > (max_bytes - dst_x))
694 cur_size = (max_bytes - dst_x);
695 if (cur_size > (max_bytes - src_x))
696 cur_size = (max_bytes - src_x);
697 }
698 714
699 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { 715 while (num_pages) {
700 WARN_ON(1); 716 int w, h;
701 } 717 unsigned size_in_bytes;
718 unsigned pages_per_loop = r600_blit_create_rect(num_pages, &w, &h);
702 719
703 vb[0] = i2f(dst_x); 720 size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
704 vb[1] = 0; 721 DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
705 vb[2] = i2f(src_x);
706 vb[3] = 0;
707 722
708 vb[4] = i2f(dst_x); 723 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
709 vb[5] = i2f(h); 724 WARN_ON(1);
710 vb[6] = i2f(src_x); 725 }
711 vb[7] = i2f(h);
712 726
713 vb[8] = i2f(dst_x + cur_size); 727 vb[0] = 0;
714 vb[9] = i2f(h); 728 vb[1] = 0;
715 vb[10] = i2f(src_x + cur_size); 729 vb[2] = 0;
716 vb[11] = i2f(h); 730 vb[3] = 0;
717 731
718 /* src 9 */ 732 vb[4] = 0;
719 set_tex_resource(rdev, FMT_8, 733 vb[5] = i2f(h);
720 src_x + cur_size, h, src_x + cur_size, 734 vb[6] = 0;
721 src_gpu_addr); 735 vb[7] = i2f(h);
722 736
723 /* 5 */ 737 vb[8] = i2f(w);
724 cp_set_surface_sync(rdev, 738 vb[9] = i2f(h);
725 PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); 739 vb[10] = i2f(w);
740 vb[11] = i2f(h);
726 741
727 /* dst 23 */ 742 /* src 9 */
728 set_render_target(rdev, COLOR_8, 743 set_tex_resource(rdev, FMT_8_8_8_8, w, h, w, src_gpu_addr);
729 dst_x + cur_size, h,
730 dst_gpu_addr);
731 744
732 /* scissors 12 */ 745 /* 5 */
733 set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); 746 cp_set_surface_sync(rdev,
747 PACKET3_TC_ACTION_ENA, size_in_bytes, src_gpu_addr);
734 748
735 /* 14 */ 749 /* dst 23 */
736 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; 750 set_render_target(rdev, COLOR_8_8_8_8, w, h, dst_gpu_addr);
737 set_vtx_resource(rdev, vb_gpu_addr);
738 751
739 /* draw 10 */ 752 /* scissors 12 */
740 draw_auto(rdev); 753 set_scissors(rdev, 0, 0, w, h);
741 754
742 /* 5 */ 755 /* Vertex buffer setup 14 */
743 cp_set_surface_sync(rdev, 756 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
744 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, 757 set_vtx_resource(rdev, vb_gpu_addr);
745 cur_size * h, dst_gpu_addr);
746 758
747 vb += 12; 759 /* draw 10 */
748 rdev->r600_blit.vb_used += 12 * 4; 760 draw_auto(rdev);
749 761
750 src_gpu_addr += cur_size * h; 762 /* 5 */
751 dst_gpu_addr += cur_size * h; 763 cp_set_surface_sync(rdev,
752 size_bytes -= cur_size * h; 764 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
753 } 765 size_in_bytes, dst_gpu_addr);
754 } else { 766
755 max_bytes = 8192 * 4; 767 /* 78 ring dwords per loop */
756 768 vb += 12;
757 while (size_bytes) { 769 rdev->r600_blit.vb_used += 4*12;
758 int cur_size = size_bytes; 770 src_gpu_addr += size_in_bytes;
759 int src_x = (src_gpu_addr & 255); 771 dst_gpu_addr += size_in_bytes;
760 int dst_x = (dst_gpu_addr & 255); 772 num_pages -= pages_per_loop;
761 int h = 1;
762 src_gpu_addr = src_gpu_addr & ~255ULL;
763 dst_gpu_addr = dst_gpu_addr & ~255ULL;
764
765 if (!src_x && !dst_x) {
766 h = (cur_size / max_bytes);
767 if (h > 8192)
768 h = 8192;
769 if (h == 0)
770 h = 1;
771 else
772 cur_size = max_bytes;
773 } else {
774 if (cur_size > max_bytes)
775 cur_size = max_bytes;
776 if (cur_size > (max_bytes - dst_x))
777 cur_size = (max_bytes - dst_x);
778 if (cur_size > (max_bytes - src_x))
779 cur_size = (max_bytes - src_x);
780 }
781
782 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
783 WARN_ON(1);
784 }
785
786 vb[0] = i2f(dst_x / 4);
787 vb[1] = 0;
788 vb[2] = i2f(src_x / 4);
789 vb[3] = 0;
790
791 vb[4] = i2f(dst_x / 4);
792 vb[5] = i2f(h);
793 vb[6] = i2f(src_x / 4);
794 vb[7] = i2f(h);
795
796 vb[8] = i2f((dst_x + cur_size) / 4);
797 vb[9] = i2f(h);
798 vb[10] = i2f((src_x + cur_size) / 4);
799 vb[11] = i2f(h);
800
801 /* src 9 */
802 set_tex_resource(rdev, FMT_8_8_8_8,
803 (src_x + cur_size) / 4,
804 h, (src_x + cur_size) / 4,
805 src_gpu_addr);
806 /* 5 */
807 cp_set_surface_sync(rdev,
808 PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
809
810 /* dst 23 */
811 set_render_target(rdev, COLOR_8_8_8_8,
812 (dst_x + cur_size) / 4, h,
813 dst_gpu_addr);
814
815 /* scissors 12 */
816 set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
817
818 /* Vertex buffer setup 14 */
819 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
820 set_vtx_resource(rdev, vb_gpu_addr);
821
822 /* draw 10 */
823 draw_auto(rdev);
824
825 /* 5 */
826 cp_set_surface_sync(rdev,
827 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
828 cur_size * h, dst_gpu_addr);
829
830 /* 78 ring dwords per loop */
831 vb += 12;
832 rdev->r600_blit.vb_used += 12 * 4;
833
834 src_gpu_addr += cur_size * h;
835 dst_gpu_addr += cur_size * h;
836 size_bytes -= cur_size * h;
837 }
838 } 773 }
839} 774}
840