diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2012-10-22 12:32:54 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2012-12-10 17:21:42 -0500 |
commit | deab48f140d28d788cb2b5705761a92b02e3440d (patch) | |
tree | 36b351ec99fb926d38d8e8c7a3a881513ff8c7d2 /drivers/gpu/drm/radeon | |
parent | 33e5467871b3007c4e6deea95b2cac38a55ff9f5 (diff) |
drm/radeon: add dma engine support for vm pt updates on si (v2)
Async DMA has a special packet for contiguous pt updates
which saves overhead.
v2: rebase
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r-- | drivers/gpu/drm/radeon/si.c | 104 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/sid.h | 10 |
2 files changed, 90 insertions, 24 deletions
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 93f7171e6bcd..f6e7815e1860 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c | |||
@@ -2825,30 +2825,86 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, | |||
2825 | { | 2825 | { |
2826 | struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; | 2826 | struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; |
2827 | uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); | 2827 | uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); |
2828 | 2828 | uint64_t value; | |
2829 | while (count) { | 2829 | unsigned ndw; |
2830 | unsigned ndw = 2 + count * 2; | 2830 | |
2831 | if (ndw > 0x3FFE) | 2831 | if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { |
2832 | ndw = 0x3FFE; | 2832 | while (count) { |
2833 | 2833 | ndw = 2 + count * 2; | |
2834 | radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); | 2834 | if (ndw > 0x3FFE) |
2835 | radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | | 2835 | ndw = 0x3FFE; |
2836 | WRITE_DATA_DST_SEL(1))); | 2836 | |
2837 | radeon_ring_write(ring, pe); | 2837 | radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); |
2838 | radeon_ring_write(ring, upper_32_bits(pe)); | 2838 | radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | |
2839 | for (; ndw > 2; ndw -= 2, --count, pe += 8) { | 2839 | WRITE_DATA_DST_SEL(1))); |
2840 | uint64_t value; | 2840 | radeon_ring_write(ring, pe); |
2841 | if (flags & RADEON_VM_PAGE_SYSTEM) { | 2841 | radeon_ring_write(ring, upper_32_bits(pe)); |
2842 | value = radeon_vm_map_gart(rdev, addr); | 2842 | for (; ndw > 2; ndw -= 2, --count, pe += 8) { |
2843 | value &= 0xFFFFFFFFFFFFF000ULL; | 2843 | if (flags & RADEON_VM_PAGE_SYSTEM) { |
2844 | } else if (flags & RADEON_VM_PAGE_VALID) | 2844 | value = radeon_vm_map_gart(rdev, addr); |
2845 | value = addr; | 2845 | value &= 0xFFFFFFFFFFFFF000ULL; |
2846 | else | 2846 | } else if (flags & RADEON_VM_PAGE_VALID) { |
2847 | value = 0; | 2847 | value = addr; |
2848 | addr += incr; | 2848 | } else { |
2849 | value |= r600_flags; | 2849 | value = 0; |
2850 | radeon_ring_write(ring, value); | 2850 | } |
2851 | radeon_ring_write(ring, upper_32_bits(value)); | 2851 | addr += incr; |
2852 | value |= r600_flags; | ||
2853 | radeon_ring_write(ring, value); | ||
2854 | radeon_ring_write(ring, upper_32_bits(value)); | ||
2855 | } | ||
2856 | } | ||
2857 | } else { | ||
2858 | /* DMA */ | ||
2859 | if (flags & RADEON_VM_PAGE_SYSTEM) { | ||
2860 | while (count) { | ||
2861 | ndw = count * 2; | ||
2862 | if (ndw > 0xFFFFE) | ||
2863 | ndw = 0xFFFFE; | ||
2864 | |||
2865 | /* for non-physically contiguous pages (system) */ | ||
2866 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw)); | ||
2867 | radeon_ring_write(ring, pe); | ||
2868 | radeon_ring_write(ring, upper_32_bits(pe) & 0xff); | ||
2869 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | ||
2870 | if (flags & RADEON_VM_PAGE_SYSTEM) { | ||
2871 | value = radeon_vm_map_gart(rdev, addr); | ||
2872 | value &= 0xFFFFFFFFFFFFF000ULL; | ||
2873 | } else if (flags & RADEON_VM_PAGE_VALID) { | ||
2874 | value = addr; | ||
2875 | } else { | ||
2876 | value = 0; | ||
2877 | } | ||
2878 | addr += incr; | ||
2879 | value |= r600_flags; | ||
2880 | radeon_ring_write(ring, value); | ||
2881 | radeon_ring_write(ring, upper_32_bits(value)); | ||
2882 | } | ||
2883 | } | ||
2884 | } else { | ||
2885 | while (count) { | ||
2886 | ndw = count * 2; | ||
2887 | if (ndw > 0xFFFFE) | ||
2888 | ndw = 0xFFFFE; | ||
2889 | |||
2890 | if (flags & RADEON_VM_PAGE_VALID) | ||
2891 | value = addr; | ||
2892 | else | ||
2893 | value = 0; | ||
2894 | /* for physically contiguous pages (vram) */ | ||
2895 | radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw)); | ||
2896 | radeon_ring_write(ring, pe); /* dst addr */ | ||
2897 | radeon_ring_write(ring, upper_32_bits(pe) & 0xff); | ||
2898 | radeon_ring_write(ring, r600_flags); /* mask */ | ||
2899 | radeon_ring_write(ring, 0); | ||
2900 | radeon_ring_write(ring, value); /* value */ | ||
2901 | radeon_ring_write(ring, upper_32_bits(value)); | ||
2902 | radeon_ring_write(ring, incr); /* increment size */ | ||
2903 | radeon_ring_write(ring, 0); | ||
2904 | pe += ndw * 4; | ||
2905 | addr += (ndw / 2) * incr; | ||
2906 | count -= ndw / 2; | ||
2907 | } | ||
2852 | } | 2908 | } |
2853 | } | 2909 | } |
2854 | } | 2910 | } |
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 0acd32759c7c..e153c254fbfb 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h | |||
@@ -972,6 +972,16 @@ | |||
972 | (((t) & 0x1) << 23) | \ | 972 | (((t) & 0x1) << 23) | \ |
973 | (((s) & 0x1) << 22) | \ | 973 | (((s) & 0x1) << 22) | \ |
974 | (((n) & 0xFFFFF) << 0)) | 974 | (((n) & 0xFFFFF) << 0)) |
975 | |||
976 | #define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \ | ||
977 | (((vmid) & 0xF) << 20) | \ | ||
978 | (((n) & 0xFFFFF) << 0)) | ||
979 | |||
980 | #define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \ | ||
981 | (1 << 26) | \ | ||
982 | (1 << 21) | \ | ||
983 | (((n) & 0xFFFFF) << 0)) | ||
984 | |||
975 | /* async DMA Packet types */ | 985 | /* async DMA Packet types */ |
976 | #define DMA_PACKET_WRITE 0x2 | 986 | #define DMA_PACKET_WRITE 0x2 |
977 | #define DMA_PACKET_COPY 0x3 | 987 | #define DMA_PACKET_COPY 0x3 |