diff options
-rw-r--r-- | drivers/gpu/drm/radeon/r600.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/r600_blit_kms.c | 280 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.h | 4 |
3 files changed, 111 insertions, 177 deletions
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index c4de1610088c..e106f30787fd 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c | |||
@@ -2363,14 +2363,14 @@ int r600_copy_blit(struct radeon_device *rdev, | |||
2363 | 2363 | ||
2364 | mutex_lock(&rdev->r600_blit.mutex); | 2364 | mutex_lock(&rdev->r600_blit.mutex); |
2365 | rdev->r600_blit.vb_ib = NULL; | 2365 | rdev->r600_blit.vb_ib = NULL; |
2366 | r = r600_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE); | 2366 | r = r600_blit_prepare_copy(rdev, num_pages); |
2367 | if (r) { | 2367 | if (r) { |
2368 | if (rdev->r600_blit.vb_ib) | 2368 | if (rdev->r600_blit.vb_ib) |
2369 | radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); | 2369 | radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); |
2370 | mutex_unlock(&rdev->r600_blit.mutex); | 2370 | mutex_unlock(&rdev->r600_blit.mutex); |
2371 | return r; | 2371 | return r; |
2372 | } | 2372 | } |
2373 | r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE); | 2373 | r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages); |
2374 | r600_blit_done_copy(rdev, fence); | 2374 | r600_blit_done_copy(rdev, fence); |
2375 | mutex_unlock(&rdev->r600_blit.mutex); | 2375 | mutex_unlock(&rdev->r600_blit.mutex); |
2376 | return 0; | 2376 | return 0; |
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index bbbafe685543..3940be619af7 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c | |||
@@ -42,6 +42,10 @@ | |||
42 | #define COLOR_5_6_5 0x8 | 42 | #define COLOR_5_6_5 0x8 |
43 | #define COLOR_8_8_8_8 0x1a | 43 | #define COLOR_8_8_8_8 0x1a |
44 | 44 | ||
45 | #define RECT_UNIT_H 32 | ||
46 | #define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H) | ||
47 | #define MAX_RECT_DIM 8192 | ||
48 | |||
45 | /* emits 21 on rv770+, 23 on r600 */ | 49 | /* emits 21 on rv770+, 23 on r600 */ |
46 | static void | 50 | static void |
47 | set_render_target(struct radeon_device *rdev, int format, | 51 | set_render_target(struct radeon_device *rdev, int format, |
@@ -600,13 +604,59 @@ static void r600_vb_ib_put(struct radeon_device *rdev) | |||
600 | radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); | 604 | radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); |
601 | } | 605 | } |
602 | 606 | ||
603 | int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) | 607 | /* FIXME: the function is very similar to evergreen_blit_create_rect, except |
608 | that it different predefined constants; consider commonizing */ | ||
609 | static unsigned r600_blit_create_rect(unsigned num_pages, int *width, int *height) | ||
610 | { | ||
611 | unsigned max_pages; | ||
612 | unsigned pages = num_pages; | ||
613 | int w, h; | ||
614 | |||
615 | if (num_pages == 0) { | ||
616 | /* not supposed to be called with no pages, but just in case */ | ||
617 | h = 0; | ||
618 | w = 0; | ||
619 | pages = 0; | ||
620 | WARN_ON(1); | ||
621 | } else { | ||
622 | int rect_order = 2; | ||
623 | h = RECT_UNIT_H; | ||
624 | while (num_pages / rect_order) { | ||
625 | h *= 2; | ||
626 | rect_order *= 4; | ||
627 | if (h >= MAX_RECT_DIM) { | ||
628 | h = MAX_RECT_DIM; | ||
629 | break; | ||
630 | } | ||
631 | } | ||
632 | max_pages = (MAX_RECT_DIM * h) / (RECT_UNIT_W * RECT_UNIT_H); | ||
633 | if (pages > max_pages) | ||
634 | pages = max_pages; | ||
635 | w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h; | ||
636 | w = (w / RECT_UNIT_W) * RECT_UNIT_W; | ||
637 | pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H); | ||
638 | BUG_ON(pages == 0); | ||
639 | } | ||
640 | |||
641 | |||
642 | DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages); | ||
643 | |||
644 | /* return width and height only of the caller wants it */ | ||
645 | if (height) | ||
646 | *height = h; | ||
647 | if (width) | ||
648 | *width = w; | ||
649 | |||
650 | return pages; | ||
651 | } | ||
652 | |||
653 | |||
654 | int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_pages) | ||
604 | { | 655 | { |
605 | int r; | 656 | int r; |
606 | int ring_size, line_size; | 657 | int ring_size; |
607 | int max_size; | ||
608 | /* loops of emits 64 + fence emit possible */ | 658 | /* loops of emits 64 + fence emit possible */ |
609 | int dwords_per_loop = 76, num_loops; | 659 | int dwords_per_loop = 76, num_loops = 0; |
610 | 660 | ||
611 | r = r600_vb_ib_get(rdev); | 661 | r = r600_vb_ib_get(rdev); |
612 | if (r) | 662 | if (r) |
@@ -616,18 +666,12 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) | |||
616 | if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) | 666 | if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) |
617 | dwords_per_loop += 2; | 667 | dwords_per_loop += 2; |
618 | 668 | ||
619 | /* 8 bpp vs 32 bpp for xfer unit */ | 669 | /* num loops */ |
620 | if (size_bytes & 3) | 670 | while (num_pages) { |
621 | line_size = 8192; | 671 | num_pages -= r600_blit_create_rect(num_pages, NULL, NULL); |
622 | else | 672 | num_loops++; |
623 | line_size = 8192*4; | 673 | } |
624 | |||
625 | max_size = 8192 * line_size; | ||
626 | 674 | ||
627 | /* major loops cover the max size transfer */ | ||
628 | num_loops = ((size_bytes + max_size) / max_size); | ||
629 | /* minor loops cover the extra non aligned bits */ | ||
630 | num_loops += ((size_bytes % line_size) ? 1 : 0); | ||
631 | /* calculate number of loops correctly */ | 675 | /* calculate number of loops correctly */ |
632 | ring_size = num_loops * dwords_per_loop; | 676 | ring_size = num_loops * dwords_per_loop; |
633 | /* set default + shaders */ | 677 | /* set default + shaders */ |
@@ -659,182 +703,72 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) | |||
659 | 703 | ||
660 | void r600_kms_blit_copy(struct radeon_device *rdev, | 704 | void r600_kms_blit_copy(struct radeon_device *rdev, |
661 | u64 src_gpu_addr, u64 dst_gpu_addr, | 705 | u64 src_gpu_addr, u64 dst_gpu_addr, |
662 | int size_bytes) | 706 | unsigned num_pages) |
663 | { | 707 | { |
664 | int max_bytes; | ||
665 | u64 vb_gpu_addr; | 708 | u64 vb_gpu_addr; |
666 | u32 *vb; | 709 | u32 *vb; |
667 | 710 | ||
668 | DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, | 711 | DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, |
669 | size_bytes, rdev->r600_blit.vb_used); | 712 | num_pages, rdev->r600_blit.vb_used); |
670 | vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); | 713 | vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); |
671 | if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { | ||
672 | max_bytes = 8192; | ||
673 | |||
674 | while (size_bytes) { | ||
675 | int cur_size = size_bytes; | ||
676 | int src_x = src_gpu_addr & 255; | ||
677 | int dst_x = dst_gpu_addr & 255; | ||
678 | int h = 1; | ||
679 | src_gpu_addr = src_gpu_addr & ~255ULL; | ||
680 | dst_gpu_addr = dst_gpu_addr & ~255ULL; | ||
681 | |||
682 | if (!src_x && !dst_x) { | ||
683 | h = (cur_size / max_bytes); | ||
684 | if (h > 8192) | ||
685 | h = 8192; | ||
686 | if (h == 0) | ||
687 | h = 1; | ||
688 | else | ||
689 | cur_size = max_bytes; | ||
690 | } else { | ||
691 | if (cur_size > max_bytes) | ||
692 | cur_size = max_bytes; | ||
693 | if (cur_size > (max_bytes - dst_x)) | ||
694 | cur_size = (max_bytes - dst_x); | ||
695 | if (cur_size > (max_bytes - src_x)) | ||
696 | cur_size = (max_bytes - src_x); | ||
697 | } | ||
698 | 714 | ||
699 | if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { | 715 | while (num_pages) { |
700 | WARN_ON(1); | 716 | int w, h; |
701 | } | 717 | unsigned size_in_bytes; |
718 | unsigned pages_per_loop = r600_blit_create_rect(num_pages, &w, &h); | ||
702 | 719 | ||
703 | vb[0] = i2f(dst_x); | 720 | size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE; |
704 | vb[1] = 0; | 721 | DRM_DEBUG("rectangle w=%d h=%d\n", w, h); |
705 | vb[2] = i2f(src_x); | ||
706 | vb[3] = 0; | ||
707 | 722 | ||
708 | vb[4] = i2f(dst_x); | 723 | if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { |
709 | vb[5] = i2f(h); | 724 | WARN_ON(1); |
710 | vb[6] = i2f(src_x); | 725 | } |
711 | vb[7] = i2f(h); | ||
712 | 726 | ||
713 | vb[8] = i2f(dst_x + cur_size); | 727 | vb[0] = 0; |
714 | vb[9] = i2f(h); | 728 | vb[1] = 0; |
715 | vb[10] = i2f(src_x + cur_size); | 729 | vb[2] = 0; |
716 | vb[11] = i2f(h); | 730 | vb[3] = 0; |
717 | 731 | ||
718 | /* src 9 */ | 732 | vb[4] = 0; |
719 | set_tex_resource(rdev, FMT_8, | 733 | vb[5] = i2f(h); |
720 | src_x + cur_size, h, src_x + cur_size, | 734 | vb[6] = 0; |
721 | src_gpu_addr); | 735 | vb[7] = i2f(h); |
722 | 736 | ||
723 | /* 5 */ | 737 | vb[8] = i2f(w); |
724 | cp_set_surface_sync(rdev, | 738 | vb[9] = i2f(h); |
725 | PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); | 739 | vb[10] = i2f(w); |
740 | vb[11] = i2f(h); | ||
726 | 741 | ||
727 | /* dst 23 */ | 742 | /* src 9 */ |
728 | set_render_target(rdev, COLOR_8, | 743 | set_tex_resource(rdev, FMT_8_8_8_8, w, h, w, src_gpu_addr); |
729 | dst_x + cur_size, h, | ||
730 | dst_gpu_addr); | ||
731 | 744 | ||
732 | /* scissors 12 */ | 745 | /* 5 */ |
733 | set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); | 746 | cp_set_surface_sync(rdev, |
747 | PACKET3_TC_ACTION_ENA, size_in_bytes, src_gpu_addr); | ||
734 | 748 | ||
735 | /* 14 */ | 749 | /* dst 23 */ |
736 | vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; | 750 | set_render_target(rdev, COLOR_8_8_8_8, w, h, dst_gpu_addr); |
737 | set_vtx_resource(rdev, vb_gpu_addr); | ||
738 | 751 | ||
739 | /* draw 10 */ | 752 | /* scissors 12 */ |
740 | draw_auto(rdev); | 753 | set_scissors(rdev, 0, 0, w, h); |
741 | 754 | ||
742 | /* 5 */ | 755 | /* Vertex buffer setup 14 */ |
743 | cp_set_surface_sync(rdev, | 756 | vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; |
744 | PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, | 757 | set_vtx_resource(rdev, vb_gpu_addr); |
745 | cur_size * h, dst_gpu_addr); | ||
746 | 758 | ||
747 | vb += 12; | 759 | /* draw 10 */ |
748 | rdev->r600_blit.vb_used += 12 * 4; | 760 | draw_auto(rdev); |
749 | 761 | ||
750 | src_gpu_addr += cur_size * h; | 762 | /* 5 */ |
751 | dst_gpu_addr += cur_size * h; | 763 | cp_set_surface_sync(rdev, |
752 | size_bytes -= cur_size * h; | 764 | PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, |
753 | } | 765 | size_in_bytes, dst_gpu_addr); |
754 | } else { | 766 | |
755 | max_bytes = 8192 * 4; | 767 | /* 78 ring dwords per loop */ |
756 | 768 | vb += 12; | |
757 | while (size_bytes) { | 769 | rdev->r600_blit.vb_used += 4*12; |
758 | int cur_size = size_bytes; | 770 | src_gpu_addr += size_in_bytes; |
759 | int src_x = (src_gpu_addr & 255); | 771 | dst_gpu_addr += size_in_bytes; |
760 | int dst_x = (dst_gpu_addr & 255); | 772 | num_pages -= pages_per_loop; |
761 | int h = 1; | ||
762 | src_gpu_addr = src_gpu_addr & ~255ULL; | ||
763 | dst_gpu_addr = dst_gpu_addr & ~255ULL; | ||
764 | |||
765 | if (!src_x && !dst_x) { | ||
766 | h = (cur_size / max_bytes); | ||
767 | if (h > 8192) | ||
768 | h = 8192; | ||
769 | if (h == 0) | ||
770 | h = 1; | ||
771 | else | ||
772 | cur_size = max_bytes; | ||
773 | } else { | ||
774 | if (cur_size > max_bytes) | ||
775 | cur_size = max_bytes; | ||
776 | if (cur_size > (max_bytes - dst_x)) | ||
777 | cur_size = (max_bytes - dst_x); | ||
778 | if (cur_size > (max_bytes - src_x)) | ||
779 | cur_size = (max_bytes - src_x); | ||
780 | } | ||
781 | |||
782 | if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { | ||
783 | WARN_ON(1); | ||
784 | } | ||
785 | |||
786 | vb[0] = i2f(dst_x / 4); | ||
787 | vb[1] = 0; | ||
788 | vb[2] = i2f(src_x / 4); | ||
789 | vb[3] = 0; | ||
790 | |||
791 | vb[4] = i2f(dst_x / 4); | ||
792 | vb[5] = i2f(h); | ||
793 | vb[6] = i2f(src_x / 4); | ||
794 | vb[7] = i2f(h); | ||
795 | |||
796 | vb[8] = i2f((dst_x + cur_size) / 4); | ||
797 | vb[9] = i2f(h); | ||
798 | vb[10] = i2f((src_x + cur_size) / 4); | ||
799 | vb[11] = i2f(h); | ||
800 | |||
801 | /* src 9 */ | ||
802 | set_tex_resource(rdev, FMT_8_8_8_8, | ||
803 | (src_x + cur_size) / 4, | ||
804 | h, (src_x + cur_size) / 4, | ||
805 | src_gpu_addr); | ||
806 | /* 5 */ | ||
807 | cp_set_surface_sync(rdev, | ||
808 | PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); | ||
809 | |||
810 | /* dst 23 */ | ||
811 | set_render_target(rdev, COLOR_8_8_8_8, | ||
812 | (dst_x + cur_size) / 4, h, | ||
813 | dst_gpu_addr); | ||
814 | |||
815 | /* scissors 12 */ | ||
816 | set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); | ||
817 | |||
818 | /* Vertex buffer setup 14 */ | ||
819 | vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; | ||
820 | set_vtx_resource(rdev, vb_gpu_addr); | ||
821 | |||
822 | /* draw 10 */ | ||
823 | draw_auto(rdev); | ||
824 | |||
825 | /* 5 */ | ||
826 | cp_set_surface_sync(rdev, | ||
827 | PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, | ||
828 | cur_size * h, dst_gpu_addr); | ||
829 | |||
830 | /* 78 ring dwords per loop */ | ||
831 | vb += 12; | ||
832 | rdev->r600_blit.vb_used += 12 * 4; | ||
833 | |||
834 | src_gpu_addr += cur_size * h; | ||
835 | dst_gpu_addr += cur_size * h; | ||
836 | size_bytes -= cur_size * h; | ||
837 | } | ||
838 | } | 773 | } |
839 | } | 774 | } |
840 | |||
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 62877ef7e0c6..24402e94d815 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h | |||
@@ -364,11 +364,11 @@ void r600_hdmi_init(struct drm_encoder *encoder); | |||
364 | int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); | 364 | int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); |
365 | void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); | 365 | void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); |
366 | /* r600 blit */ | 366 | /* r600 blit */ |
367 | int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); | 367 | int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_pages); |
368 | void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); | 368 | void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); |
369 | void r600_kms_blit_copy(struct radeon_device *rdev, | 369 | void r600_kms_blit_copy(struct radeon_device *rdev, |
370 | u64 src_gpu_addr, u64 dst_gpu_addr, | 370 | u64 src_gpu_addr, u64 dst_gpu_addr, |
371 | int size_bytes); | 371 | unsigned num_pages); |
372 | 372 | ||
373 | /* | 373 | /* |
374 | * rv770,rv730,rv710,rv740 | 374 | * rv770,rv730,rv710,rv740 |