diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/cik_sdma.c')
-rw-r--r-- | drivers/gpu/drm/radeon/cik_sdma.c | 253 |
1 files changed, 169 insertions, 84 deletions
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 8e9d0f1d858e..192278bc993c 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/firmware.h> | 24 | #include <linux/firmware.h> |
25 | #include <drm/drmP.h> | 25 | #include <drm/drmP.h> |
26 | #include "radeon.h" | 26 | #include "radeon.h" |
27 | #include "radeon_ucode.h" | ||
27 | #include "radeon_asic.h" | 28 | #include "radeon_asic.h" |
28 | #include "radeon_trace.h" | 29 | #include "radeon_trace.h" |
29 | #include "cikd.h" | 30 | #include "cikd.h" |
@@ -118,6 +119,7 @@ void cik_sdma_set_wptr(struct radeon_device *rdev, | |||
118 | reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET; | 119 | reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET; |
119 | 120 | ||
120 | WREG32(reg, (ring->wptr << 2) & 0x3fffc); | 121 | WREG32(reg, (ring->wptr << 2) & 0x3fffc); |
122 | (void)RREG32(reg); | ||
121 | } | 123 | } |
122 | 124 | ||
123 | /** | 125 | /** |
@@ -419,7 +421,6 @@ static int cik_sdma_rlc_resume(struct radeon_device *rdev) | |||
419 | */ | 421 | */ |
420 | static int cik_sdma_load_microcode(struct radeon_device *rdev) | 422 | static int cik_sdma_load_microcode(struct radeon_device *rdev) |
421 | { | 423 | { |
422 | const __be32 *fw_data; | ||
423 | int i; | 424 | int i; |
424 | 425 | ||
425 | if (!rdev->sdma_fw) | 426 | if (!rdev->sdma_fw) |
@@ -428,19 +429,48 @@ static int cik_sdma_load_microcode(struct radeon_device *rdev) | |||
428 | /* halt the MEs */ | 429 | /* halt the MEs */ |
429 | cik_sdma_enable(rdev, false); | 430 | cik_sdma_enable(rdev, false); |
430 | 431 | ||
431 | /* sdma0 */ | 432 | if (rdev->new_fw) { |
432 | fw_data = (const __be32 *)rdev->sdma_fw->data; | 433 | const struct sdma_firmware_header_v1_0 *hdr = |
433 | WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); | 434 | (const struct sdma_firmware_header_v1_0 *)rdev->sdma_fw->data; |
434 | for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) | 435 | const __le32 *fw_data; |
435 | WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); | 436 | u32 fw_size; |
436 | WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); | 437 | |
437 | 438 | radeon_ucode_print_sdma_hdr(&hdr->header); | |
438 | /* sdma1 */ | 439 | |
439 | fw_data = (const __be32 *)rdev->sdma_fw->data; | 440 | /* sdma0 */ |
440 | WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); | 441 | fw_data = (const __le32 *) |
441 | for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) | 442 | (rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
442 | WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); | 443 | fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; |
443 | WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); | 444 | WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); |
445 | for (i = 0; i < fw_size; i++) | ||
446 | WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, le32_to_cpup(fw_data++)); | ||
447 | WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); | ||
448 | |||
449 | /* sdma1 */ | ||
450 | fw_data = (const __le32 *) | ||
451 | (rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
452 | fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; | ||
453 | WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); | ||
454 | for (i = 0; i < fw_size; i++) | ||
455 | WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, le32_to_cpup(fw_data++)); | ||
456 | WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); | ||
457 | } else { | ||
458 | const __be32 *fw_data; | ||
459 | |||
460 | /* sdma0 */ | ||
461 | fw_data = (const __be32 *)rdev->sdma_fw->data; | ||
462 | WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); | ||
463 | for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) | ||
464 | WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); | ||
465 | WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); | ||
466 | |||
467 | /* sdma1 */ | ||
468 | fw_data = (const __be32 *)rdev->sdma_fw->data; | ||
469 | WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); | ||
470 | for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) | ||
471 | WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); | ||
472 | WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); | ||
473 | } | ||
444 | 474 | ||
445 | WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); | 475 | WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); |
446 | WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); | 476 | WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); |
@@ -566,7 +596,7 @@ int cik_copy_dma(struct radeon_device *rdev, | |||
566 | return r; | 596 | return r; |
567 | } | 597 | } |
568 | 598 | ||
569 | radeon_ring_unlock_commit(rdev, ring); | 599 | radeon_ring_unlock_commit(rdev, ring, false); |
570 | radeon_semaphore_free(rdev, &sem, *fence); | 600 | radeon_semaphore_free(rdev, &sem, *fence); |
571 | 601 | ||
572 | return r; | 602 | return r; |
@@ -608,7 +638,7 @@ int cik_sdma_ring_test(struct radeon_device *rdev, | |||
608 | radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr)); | 638 | radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr)); |
609 | radeon_ring_write(ring, 1); /* number of DWs to follow */ | 639 | radeon_ring_write(ring, 1); /* number of DWs to follow */ |
610 | radeon_ring_write(ring, 0xDEADBEEF); | 640 | radeon_ring_write(ring, 0xDEADBEEF); |
611 | radeon_ring_unlock_commit(rdev, ring); | 641 | radeon_ring_unlock_commit(rdev, ring, false); |
612 | 642 | ||
613 | for (i = 0; i < rdev->usec_timeout; i++) { | 643 | for (i = 0; i < rdev->usec_timeout; i++) { |
614 | tmp = readl(ptr); | 644 | tmp = readl(ptr); |
@@ -665,7 +695,7 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
665 | ib.ptr[4] = 0xDEADBEEF; | 695 | ib.ptr[4] = 0xDEADBEEF; |
666 | ib.length_dw = 5; | 696 | ib.length_dw = 5; |
667 | 697 | ||
668 | r = radeon_ib_schedule(rdev, &ib, NULL); | 698 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
669 | if (r) { | 699 | if (r) { |
670 | radeon_ib_free(rdev, &ib); | 700 | radeon_ib_free(rdev, &ib); |
671 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | 701 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); |
@@ -719,7 +749,43 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
719 | } | 749 | } |
720 | 750 | ||
721 | /** | 751 | /** |
722 | * cik_sdma_vm_set_page - update the page tables using sDMA | 752 | * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART |
753 | * | ||
754 | * @rdev: radeon_device pointer | ||
755 | * @ib: indirect buffer to fill with commands | ||
756 | * @pe: addr of the page entry | ||
757 | * @src: src addr to copy from | ||
758 | * @count: number of page entries to update | ||
759 | * | ||
760 | * Update PTEs by copying them from the GART using sDMA (CIK). | ||
761 | */ | ||
762 | void cik_sdma_vm_copy_pages(struct radeon_device *rdev, | ||
763 | struct radeon_ib *ib, | ||
764 | uint64_t pe, uint64_t src, | ||
765 | unsigned count) | ||
766 | { | ||
767 | while (count) { | ||
768 | unsigned bytes = count * 8; | ||
769 | if (bytes > 0x1FFFF8) | ||
770 | bytes = 0x1FFFF8; | ||
771 | |||
772 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, | ||
773 | SDMA_WRITE_SUB_OPCODE_LINEAR, 0); | ||
774 | ib->ptr[ib->length_dw++] = bytes; | ||
775 | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ | ||
776 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | ||
777 | ib->ptr[ib->length_dw++] = upper_32_bits(src); | ||
778 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | ||
779 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | ||
780 | |||
781 | pe += bytes; | ||
782 | src += bytes; | ||
783 | count -= bytes / 8; | ||
784 | } | ||
785 | } | ||
786 | |||
787 | /** | ||
788 | * cik_sdma_vm_write_pages - update PTEs by writing them manually | ||
723 | * | 789 | * |
724 | * @rdev: radeon_device pointer | 790 | * @rdev: radeon_device pointer |
725 | * @ib: indirect buffer to fill with commands | 791 | * @ib: indirect buffer to fill with commands |
@@ -729,84 +795,103 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
729 | * @incr: increase next addr by incr bytes | 795 | * @incr: increase next addr by incr bytes |
730 | * @flags: access flags | 796 | * @flags: access flags |
731 | * | 797 | * |
732 | * Update the page tables using sDMA (CIK). | 798 | * Update PTEs by writing them manually using sDMA (CIK). |
733 | */ | 799 | */ |
734 | void cik_sdma_vm_set_page(struct radeon_device *rdev, | 800 | void cik_sdma_vm_write_pages(struct radeon_device *rdev, |
735 | struct radeon_ib *ib, | 801 | struct radeon_ib *ib, |
736 | uint64_t pe, | 802 | uint64_t pe, |
737 | uint64_t addr, unsigned count, | 803 | uint64_t addr, unsigned count, |
738 | uint32_t incr, uint32_t flags) | 804 | uint32_t incr, uint32_t flags) |
739 | { | 805 | { |
740 | uint64_t value; | 806 | uint64_t value; |
741 | unsigned ndw; | 807 | unsigned ndw; |
742 | 808 | ||
743 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); | 809 | while (count) { |
744 | 810 | ndw = count * 2; | |
745 | if (flags == R600_PTE_GART) { | 811 | if (ndw > 0xFFFFE) |
746 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; | 812 | ndw = 0xFFFFE; |
747 | while (count) { | 813 | |
748 | unsigned bytes = count * 8; | 814 | /* for non-physically contiguous pages (system) */ |
749 | if (bytes > 0x1FFFF8) | 815 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, |
750 | bytes = 0x1FFFF8; | 816 | SDMA_WRITE_SUB_OPCODE_LINEAR, 0); |
751 | 817 | ib->ptr[ib->length_dw++] = pe; | |
752 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); | 818 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); |
753 | ib->ptr[ib->length_dw++] = bytes; | 819 | ib->ptr[ib->length_dw++] = ndw; |
754 | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ | 820 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
755 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | 821 | if (flags & R600_PTE_SYSTEM) { |
756 | ib->ptr[ib->length_dw++] = upper_32_bits(src); | ||
757 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | ||
758 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | ||
759 | |||
760 | pe += bytes; | ||
761 | src += bytes; | ||
762 | count -= bytes / 8; | ||
763 | } | ||
764 | } else if (flags & R600_PTE_SYSTEM) { | ||
765 | while (count) { | ||
766 | ndw = count * 2; | ||
767 | if (ndw > 0xFFFFE) | ||
768 | ndw = 0xFFFFE; | ||
769 | |||
770 | /* for non-physically contiguous pages (system) */ | ||
771 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); | ||
772 | ib->ptr[ib->length_dw++] = pe; | ||
773 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | ||
774 | ib->ptr[ib->length_dw++] = ndw; | ||
775 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | ||
776 | value = radeon_vm_map_gart(rdev, addr); | 822 | value = radeon_vm_map_gart(rdev, addr); |
777 | value &= 0xFFFFFFFFFFFFF000ULL; | 823 | value &= 0xFFFFFFFFFFFFF000ULL; |
778 | addr += incr; | 824 | } else if (flags & R600_PTE_VALID) { |
779 | value |= flags; | ||
780 | ib->ptr[ib->length_dw++] = value; | ||
781 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | ||
782 | } | ||
783 | } | ||
784 | } else { | ||
785 | while (count) { | ||
786 | ndw = count; | ||
787 | if (ndw > 0x7FFFF) | ||
788 | ndw = 0x7FFFF; | ||
789 | |||
790 | if (flags & R600_PTE_VALID) | ||
791 | value = addr; | 825 | value = addr; |
792 | else | 826 | } else { |
793 | value = 0; | 827 | value = 0; |
794 | /* for physically contiguous pages (vram) */ | 828 | } |
795 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); | 829 | addr += incr; |
796 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | 830 | value |= flags; |
797 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | 831 | ib->ptr[ib->length_dw++] = value; |
798 | ib->ptr[ib->length_dw++] = flags; /* mask */ | ||
799 | ib->ptr[ib->length_dw++] = 0; | ||
800 | ib->ptr[ib->length_dw++] = value; /* value */ | ||
801 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | 832 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
802 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | ||
803 | ib->ptr[ib->length_dw++] = 0; | ||
804 | ib->ptr[ib->length_dw++] = ndw; /* number of entries */ | ||
805 | pe += ndw * 8; | ||
806 | addr += ndw * incr; | ||
807 | count -= ndw; | ||
808 | } | 833 | } |
809 | } | 834 | } |
835 | } | ||
836 | |||
837 | /** | ||
838 | * cik_sdma_vm_set_pages - update the page tables using sDMA | ||
839 | * | ||
840 | * @rdev: radeon_device pointer | ||
841 | * @ib: indirect buffer to fill with commands | ||
842 | * @pe: addr of the page entry | ||
843 | * @addr: dst addr to write into pe | ||
844 | * @count: number of page entries to update | ||
845 | * @incr: increase next addr by incr bytes | ||
846 | * @flags: access flags | ||
847 | * | ||
848 | * Update the page tables using sDMA (CIK). | ||
849 | */ | ||
850 | void cik_sdma_vm_set_pages(struct radeon_device *rdev, | ||
851 | struct radeon_ib *ib, | ||
852 | uint64_t pe, | ||
853 | uint64_t addr, unsigned count, | ||
854 | uint32_t incr, uint32_t flags) | ||
855 | { | ||
856 | uint64_t value; | ||
857 | unsigned ndw; | ||
858 | |||
859 | while (count) { | ||
860 | ndw = count; | ||
861 | if (ndw > 0x7FFFF) | ||
862 | ndw = 0x7FFFF; | ||
863 | |||
864 | if (flags & R600_PTE_VALID) | ||
865 | value = addr; | ||
866 | else | ||
867 | value = 0; | ||
868 | |||
869 | /* for physically contiguous pages (vram) */ | ||
870 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); | ||
871 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | ||
872 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | ||
873 | ib->ptr[ib->length_dw++] = flags; /* mask */ | ||
874 | ib->ptr[ib->length_dw++] = 0; | ||
875 | ib->ptr[ib->length_dw++] = value; /* value */ | ||
876 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | ||
877 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | ||
878 | ib->ptr[ib->length_dw++] = 0; | ||
879 | ib->ptr[ib->length_dw++] = ndw; /* number of entries */ | ||
880 | |||
881 | pe += ndw * 8; | ||
882 | addr += ndw * incr; | ||
883 | count -= ndw; | ||
884 | } | ||
885 | } | ||
886 | |||
887 | /** | ||
888 | * cik_sdma_vm_pad_ib - pad the IB to the required number of dw | ||
889 | * | ||
890 | * @ib: indirect buffer to fill with padding | ||
891 | * | ||
892 | */ | ||
893 | void cik_sdma_vm_pad_ib(struct radeon_ib *ib) | ||
894 | { | ||
810 | while (ib->length_dw & 0x7) | 895 | while (ib->length_dw & 0x7) |
811 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); | 896 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); |
812 | } | 897 | } |