aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/cik_sdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/cik_sdma.c')
-rw-r--r--drivers/gpu/drm/radeon/cik_sdma.c253
1 files changed, 169 insertions, 84 deletions
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index 8e9d0f1d858e..192278bc993c 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -24,6 +24,7 @@
24#include <linux/firmware.h> 24#include <linux/firmware.h>
25#include <drm/drmP.h> 25#include <drm/drmP.h>
26#include "radeon.h" 26#include "radeon.h"
27#include "radeon_ucode.h"
27#include "radeon_asic.h" 28#include "radeon_asic.h"
28#include "radeon_trace.h" 29#include "radeon_trace.h"
29#include "cikd.h" 30#include "cikd.h"
@@ -118,6 +119,7 @@ void cik_sdma_set_wptr(struct radeon_device *rdev,
118 reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET; 119 reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET;
119 120
120 WREG32(reg, (ring->wptr << 2) & 0x3fffc); 121 WREG32(reg, (ring->wptr << 2) & 0x3fffc);
122 (void)RREG32(reg);
121} 123}
122 124
123/** 125/**
@@ -419,7 +421,6 @@ static int cik_sdma_rlc_resume(struct radeon_device *rdev)
419 */ 421 */
420static int cik_sdma_load_microcode(struct radeon_device *rdev) 422static int cik_sdma_load_microcode(struct radeon_device *rdev)
421{ 423{
422 const __be32 *fw_data;
423 int i; 424 int i;
424 425
425 if (!rdev->sdma_fw) 426 if (!rdev->sdma_fw)
@@ -428,19 +429,48 @@ static int cik_sdma_load_microcode(struct radeon_device *rdev)
428 /* halt the MEs */ 429 /* halt the MEs */
429 cik_sdma_enable(rdev, false); 430 cik_sdma_enable(rdev, false);
430 431
431 /* sdma0 */ 432 if (rdev->new_fw) {
432 fw_data = (const __be32 *)rdev->sdma_fw->data; 433 const struct sdma_firmware_header_v1_0 *hdr =
433 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 434 (const struct sdma_firmware_header_v1_0 *)rdev->sdma_fw->data;
434 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 435 const __le32 *fw_data;
435 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 436 u32 fw_size;
436 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 437
437 438 radeon_ucode_print_sdma_hdr(&hdr->header);
438 /* sdma1 */ 439
439 fw_data = (const __be32 *)rdev->sdma_fw->data; 440 /* sdma0 */
440 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 441 fw_data = (const __le32 *)
441 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 442 (rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
442 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 443 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
443 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 444 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
445 for (i = 0; i < fw_size; i++)
446 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, le32_to_cpup(fw_data++));
447 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
448
449 /* sdma1 */
450 fw_data = (const __le32 *)
451 (rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
452 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
453 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
454 for (i = 0; i < fw_size; i++)
455 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, le32_to_cpup(fw_data++));
456 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
457 } else {
458 const __be32 *fw_data;
459
460 /* sdma0 */
461 fw_data = (const __be32 *)rdev->sdma_fw->data;
462 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
463 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
464 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
465 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
466
467 /* sdma1 */
468 fw_data = (const __be32 *)rdev->sdma_fw->data;
469 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
470 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
471 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
472 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
473 }
444 474
445 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 475 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
446 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 476 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
@@ -566,7 +596,7 @@ int cik_copy_dma(struct radeon_device *rdev,
566 return r; 596 return r;
567 } 597 }
568 598
569 radeon_ring_unlock_commit(rdev, ring); 599 radeon_ring_unlock_commit(rdev, ring, false);
570 radeon_semaphore_free(rdev, &sem, *fence); 600 radeon_semaphore_free(rdev, &sem, *fence);
571 601
572 return r; 602 return r;
@@ -608,7 +638,7 @@ int cik_sdma_ring_test(struct radeon_device *rdev,
608 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr)); 638 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr));
609 radeon_ring_write(ring, 1); /* number of DWs to follow */ 639 radeon_ring_write(ring, 1); /* number of DWs to follow */
610 radeon_ring_write(ring, 0xDEADBEEF); 640 radeon_ring_write(ring, 0xDEADBEEF);
611 radeon_ring_unlock_commit(rdev, ring); 641 radeon_ring_unlock_commit(rdev, ring, false);
612 642
613 for (i = 0; i < rdev->usec_timeout; i++) { 643 for (i = 0; i < rdev->usec_timeout; i++) {
614 tmp = readl(ptr); 644 tmp = readl(ptr);
@@ -665,7 +695,7 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
665 ib.ptr[4] = 0xDEADBEEF; 695 ib.ptr[4] = 0xDEADBEEF;
666 ib.length_dw = 5; 696 ib.length_dw = 5;
667 697
668 r = radeon_ib_schedule(rdev, &ib, NULL); 698 r = radeon_ib_schedule(rdev, &ib, NULL, false);
669 if (r) { 699 if (r) {
670 radeon_ib_free(rdev, &ib); 700 radeon_ib_free(rdev, &ib);
671 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 701 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
@@ -719,7 +749,43 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
719} 749}
720 750
721/** 751/**
722 * cik_sdma_vm_set_page - update the page tables using sDMA 752 * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART
753 *
754 * @rdev: radeon_device pointer
755 * @ib: indirect buffer to fill with commands
756 * @pe: addr of the page entry
757 * @src: src addr to copy from
758 * @count: number of page entries to update
759 *
760 * Update PTEs by copying them from the GART using sDMA (CIK).
761 */
762void cik_sdma_vm_copy_pages(struct radeon_device *rdev,
763 struct radeon_ib *ib,
764 uint64_t pe, uint64_t src,
765 unsigned count)
766{
767 while (count) {
768 unsigned bytes = count * 8;
769 if (bytes > 0x1FFFF8)
770 bytes = 0x1FFFF8;
771
772 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
773 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
774 ib->ptr[ib->length_dw++] = bytes;
775 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
776 ib->ptr[ib->length_dw++] = lower_32_bits(src);
777 ib->ptr[ib->length_dw++] = upper_32_bits(src);
778 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
779 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
780
781 pe += bytes;
782 src += bytes;
783 count -= bytes / 8;
784 }
785}
786
787/**
788 * cik_sdma_vm_write_pages - update PTEs by writing them manually
723 * 789 *
724 * @rdev: radeon_device pointer 790 * @rdev: radeon_device pointer
725 * @ib: indirect buffer to fill with commands 791 * @ib: indirect buffer to fill with commands
@@ -729,84 +795,103 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
729 * @incr: increase next addr by incr bytes 795 * @incr: increase next addr by incr bytes
730 * @flags: access flags 796 * @flags: access flags
731 * 797 *
732 * Update the page tables using sDMA (CIK). 798 * Update PTEs by writing them manually using sDMA (CIK).
733 */ 799 */
734void cik_sdma_vm_set_page(struct radeon_device *rdev, 800void cik_sdma_vm_write_pages(struct radeon_device *rdev,
735 struct radeon_ib *ib, 801 struct radeon_ib *ib,
736 uint64_t pe, 802 uint64_t pe,
737 uint64_t addr, unsigned count, 803 uint64_t addr, unsigned count,
738 uint32_t incr, uint32_t flags) 804 uint32_t incr, uint32_t flags)
739{ 805{
740 uint64_t value; 806 uint64_t value;
741 unsigned ndw; 807 unsigned ndw;
742 808
743 trace_radeon_vm_set_page(pe, addr, count, incr, flags); 809 while (count) {
744 810 ndw = count * 2;
745 if (flags == R600_PTE_GART) { 811 if (ndw > 0xFFFFE)
746 uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 812 ndw = 0xFFFFE;
747 while (count) { 813
748 unsigned bytes = count * 8; 814 /* for non-physically contiguous pages (system) */
749 if (bytes > 0x1FFFF8) 815 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
750 bytes = 0x1FFFF8; 816 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
751 817 ib->ptr[ib->length_dw++] = pe;
752 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 818 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
753 ib->ptr[ib->length_dw++] = bytes; 819 ib->ptr[ib->length_dw++] = ndw;
754 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 820 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
755 ib->ptr[ib->length_dw++] = lower_32_bits(src); 821 if (flags & R600_PTE_SYSTEM) {
756 ib->ptr[ib->length_dw++] = upper_32_bits(src);
757 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
758 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
759
760 pe += bytes;
761 src += bytes;
762 count -= bytes / 8;
763 }
764 } else if (flags & R600_PTE_SYSTEM) {
765 while (count) {
766 ndw = count * 2;
767 if (ndw > 0xFFFFE)
768 ndw = 0xFFFFE;
769
770 /* for non-physically contiguous pages (system) */
771 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
772 ib->ptr[ib->length_dw++] = pe;
773 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
774 ib->ptr[ib->length_dw++] = ndw;
775 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
776 value = radeon_vm_map_gart(rdev, addr); 822 value = radeon_vm_map_gart(rdev, addr);
777 value &= 0xFFFFFFFFFFFFF000ULL; 823 value &= 0xFFFFFFFFFFFFF000ULL;
778 addr += incr; 824 } else if (flags & R600_PTE_VALID) {
779 value |= flags;
780 ib->ptr[ib->length_dw++] = value;
781 ib->ptr[ib->length_dw++] = upper_32_bits(value);
782 }
783 }
784 } else {
785 while (count) {
786 ndw = count;
787 if (ndw > 0x7FFFF)
788 ndw = 0x7FFFF;
789
790 if (flags & R600_PTE_VALID)
791 value = addr; 825 value = addr;
792 else 826 } else {
793 value = 0; 827 value = 0;
794 /* for physically contiguous pages (vram) */ 828 }
795 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); 829 addr += incr;
796 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 830 value |= flags;
797 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 831 ib->ptr[ib->length_dw++] = value;
798 ib->ptr[ib->length_dw++] = flags; /* mask */
799 ib->ptr[ib->length_dw++] = 0;
800 ib->ptr[ib->length_dw++] = value; /* value */
801 ib->ptr[ib->length_dw++] = upper_32_bits(value); 832 ib->ptr[ib->length_dw++] = upper_32_bits(value);
802 ib->ptr[ib->length_dw++] = incr; /* increment size */
803 ib->ptr[ib->length_dw++] = 0;
804 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
805 pe += ndw * 8;
806 addr += ndw * incr;
807 count -= ndw;
808 } 833 }
809 } 834 }
835}
836
837/**
838 * cik_sdma_vm_set_pages - update the page tables using sDMA
839 *
840 * @rdev: radeon_device pointer
841 * @ib: indirect buffer to fill with commands
842 * @pe: addr of the page entry
843 * @addr: dst addr to write into pe
844 * @count: number of page entries to update
845 * @incr: increase next addr by incr bytes
846 * @flags: access flags
847 *
848 * Update the page tables using sDMA (CIK).
849 */
850void cik_sdma_vm_set_pages(struct radeon_device *rdev,
851 struct radeon_ib *ib,
852 uint64_t pe,
853 uint64_t addr, unsigned count,
854 uint32_t incr, uint32_t flags)
855{
856 uint64_t value;
857 unsigned ndw;
858
859 while (count) {
860 ndw = count;
861 if (ndw > 0x7FFFF)
862 ndw = 0x7FFFF;
863
864 if (flags & R600_PTE_VALID)
865 value = addr;
866 else
867 value = 0;
868
869 /* for physically contiguous pages (vram) */
870 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
871 ib->ptr[ib->length_dw++] = pe; /* dst addr */
872 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
873 ib->ptr[ib->length_dw++] = flags; /* mask */
874 ib->ptr[ib->length_dw++] = 0;
875 ib->ptr[ib->length_dw++] = value; /* value */
876 ib->ptr[ib->length_dw++] = upper_32_bits(value);
877 ib->ptr[ib->length_dw++] = incr; /* increment size */
878 ib->ptr[ib->length_dw++] = 0;
879 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
880
881 pe += ndw * 8;
882 addr += ndw * incr;
883 count -= ndw;
884 }
885}
886
887/**
888 * cik_sdma_vm_pad_ib - pad the IB to the required number of dw
889 *
890 * @ib: indirect buffer to fill with padding
891 *
892 */
893void cik_sdma_vm_pad_ib(struct radeon_ib *ib)
894{
810 while (ib->length_dw & 0x7) 895 while (ib->length_dw & 0x7)
811 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); 896 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
812} 897}