diff options
author | Christian König <christian.koenig@amd.com> | 2014-07-30 15:05:17 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2014-08-05 08:53:55 -0400 |
commit | 03f62abd112d5150b6ce8957fa85d4f6e85e357f (patch) | |
tree | f0e6beb6d82b225f936bf33f1756c19bdc671208 /drivers/gpu | |
parent | 5a341be27fe23c31e4849b0a0506a4469bcbc283 (diff) |
drm/radeon: split PT setup in more functions
Move the decision what to use into the common VM code.
Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/radeon/cik_sdma.c | 189 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/ni_dma.c | 197 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.c | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.h | 64 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_vm.c | 67 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/si_dma.c | 172 |
7 files changed, 492 insertions, 247 deletions
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index db8ce30ff31f..bcf480510ac2 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c | |||
@@ -749,7 +749,43 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
749 | } | 749 | } |
750 | 750 | ||
751 | /** | 751 | /** |
752 | * cik_sdma_vm_set_page - update the page tables using sDMA | 752 | * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART |
753 | * | ||
754 | * @rdev: radeon_device pointer | ||
755 | * @ib: indirect buffer to fill with commands | ||
756 | * @pe: addr of the page entry | ||
757 | * @src: src addr to copy from | ||
758 | * @count: number of page entries to update | ||
759 | * | ||
760 | * Update PTEs by copying them from the GART using sDMA (CIK). | ||
761 | */ | ||
762 | void cik_sdma_vm_copy_pages(struct radeon_device *rdev, | ||
763 | struct radeon_ib *ib, | ||
764 | uint64_t pe, uint64_t src, | ||
765 | unsigned count) | ||
766 | { | ||
767 | while (count) { | ||
768 | unsigned bytes = count * 8; | ||
769 | if (bytes > 0x1FFFF8) | ||
770 | bytes = 0x1FFFF8; | ||
771 | |||
772 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, | ||
773 | SDMA_WRITE_SUB_OPCODE_LINEAR, 0); | ||
774 | ib->ptr[ib->length_dw++] = bytes; | ||
775 | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ | ||
776 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | ||
777 | ib->ptr[ib->length_dw++] = upper_32_bits(src); | ||
778 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | ||
779 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | ||
780 | |||
781 | pe += bytes; | ||
782 | src += bytes; | ||
783 | count -= bytes / 8; | ||
784 | } | ||
785 | } | ||
786 | |||
787 | /** | ||
788 | * cik_sdma_vm_write_pages - update PTEs by writing them manually | ||
753 | * | 789 | * |
754 | * @rdev: radeon_device pointer | 790 | * @rdev: radeon_device pointer |
755 | * @ib: indirect buffer to fill with commands | 791 | * @ib: indirect buffer to fill with commands |
@@ -759,84 +795,103 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
759 | * @incr: increase next addr by incr bytes | 795 | * @incr: increase next addr by incr bytes |
760 | * @flags: access flags | 796 | * @flags: access flags |
761 | * | 797 | * |
762 | * Update the page tables using sDMA (CIK). | 798 | * Update PTEs by writing them manually using sDMA (CIK). |
763 | */ | 799 | */ |
764 | void cik_sdma_vm_set_page(struct radeon_device *rdev, | 800 | void cik_sdma_vm_write_pages(struct radeon_device *rdev, |
765 | struct radeon_ib *ib, | 801 | struct radeon_ib *ib, |
766 | uint64_t pe, | 802 | uint64_t pe, |
767 | uint64_t addr, unsigned count, | 803 | uint64_t addr, unsigned count, |
768 | uint32_t incr, uint32_t flags) | 804 | uint32_t incr, uint32_t flags) |
769 | { | 805 | { |
770 | uint64_t value; | 806 | uint64_t value; |
771 | unsigned ndw; | 807 | unsigned ndw; |
772 | 808 | ||
773 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); | 809 | while (count) { |
774 | 810 | ndw = count * 2; | |
775 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { | 811 | if (ndw > 0xFFFFE) |
776 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; | 812 | ndw = 0xFFFFE; |
777 | while (count) { | 813 | |
778 | unsigned bytes = count * 8; | 814 | /* for non-physically contiguous pages (system) */ |
779 | if (bytes > 0x1FFFF8) | 815 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, |
780 | bytes = 0x1FFFF8; | 816 | SDMA_WRITE_SUB_OPCODE_LINEAR, 0); |
781 | 817 | ib->ptr[ib->length_dw++] = pe; | |
782 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); | 818 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); |
783 | ib->ptr[ib->length_dw++] = bytes; | 819 | ib->ptr[ib->length_dw++] = ndw; |
784 | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ | 820 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
785 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | 821 | if (flags & R600_PTE_SYSTEM) { |
786 | ib->ptr[ib->length_dw++] = upper_32_bits(src); | ||
787 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | ||
788 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | ||
789 | |||
790 | pe += bytes; | ||
791 | src += bytes; | ||
792 | count -= bytes / 8; | ||
793 | } | ||
794 | } else if (flags & R600_PTE_SYSTEM) { | ||
795 | while (count) { | ||
796 | ndw = count * 2; | ||
797 | if (ndw > 0xFFFFE) | ||
798 | ndw = 0xFFFFE; | ||
799 | |||
800 | /* for non-physically contiguous pages (system) */ | ||
801 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); | ||
802 | ib->ptr[ib->length_dw++] = pe; | ||
803 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | ||
804 | ib->ptr[ib->length_dw++] = ndw; | ||
805 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | ||
806 | value = radeon_vm_map_gart(rdev, addr); | 822 | value = radeon_vm_map_gart(rdev, addr); |
807 | value &= 0xFFFFFFFFFFFFF000ULL; | 823 | value &= 0xFFFFFFFFFFFFF000ULL; |
808 | addr += incr; | 824 | } else if (flags & R600_PTE_VALID) { |
809 | value |= flags; | ||
810 | ib->ptr[ib->length_dw++] = value; | ||
811 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | ||
812 | } | ||
813 | } | ||
814 | } else { | ||
815 | while (count) { | ||
816 | ndw = count; | ||
817 | if (ndw > 0x7FFFF) | ||
818 | ndw = 0x7FFFF; | ||
819 | |||
820 | if (flags & R600_PTE_VALID) | ||
821 | value = addr; | 825 | value = addr; |
822 | else | 826 | } else { |
823 | value = 0; | 827 | value = 0; |
824 | /* for physically contiguous pages (vram) */ | 828 | } |
825 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); | 829 | addr += incr; |
826 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | 830 | value |= flags; |
827 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | 831 | ib->ptr[ib->length_dw++] = value; |
828 | ib->ptr[ib->length_dw++] = flags; /* mask */ | ||
829 | ib->ptr[ib->length_dw++] = 0; | ||
830 | ib->ptr[ib->length_dw++] = value; /* value */ | ||
831 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | 832 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
832 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | ||
833 | ib->ptr[ib->length_dw++] = 0; | ||
834 | ib->ptr[ib->length_dw++] = ndw; /* number of entries */ | ||
835 | pe += ndw * 8; | ||
836 | addr += ndw * incr; | ||
837 | count -= ndw; | ||
838 | } | 833 | } |
839 | } | 834 | } |
835 | } | ||
836 | |||
837 | /** | ||
838 | * cik_sdma_vm_set_pages - update the page tables using sDMA | ||
839 | * | ||
840 | * @rdev: radeon_device pointer | ||
841 | * @ib: indirect buffer to fill with commands | ||
842 | * @pe: addr of the page entry | ||
843 | * @addr: dst addr to write into pe | ||
844 | * @count: number of page entries to update | ||
845 | * @incr: increase next addr by incr bytes | ||
846 | * @flags: access flags | ||
847 | * | ||
848 | * Update the page tables using sDMA (CIK). | ||
849 | */ | ||
850 | void cik_sdma_vm_set_pages(struct radeon_device *rdev, | ||
851 | struct radeon_ib *ib, | ||
852 | uint64_t pe, | ||
853 | uint64_t addr, unsigned count, | ||
854 | uint32_t incr, uint32_t flags) | ||
855 | { | ||
856 | uint64_t value; | ||
857 | unsigned ndw; | ||
858 | |||
859 | while (count) { | ||
860 | ndw = count; | ||
861 | if (ndw > 0x7FFFF) | ||
862 | ndw = 0x7FFFF; | ||
863 | |||
864 | if (flags & R600_PTE_VALID) | ||
865 | value = addr; | ||
866 | else | ||
867 | value = 0; | ||
868 | |||
869 | /* for physically contiguous pages (vram) */ | ||
870 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); | ||
871 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | ||
872 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | ||
873 | ib->ptr[ib->length_dw++] = flags; /* mask */ | ||
874 | ib->ptr[ib->length_dw++] = 0; | ||
875 | ib->ptr[ib->length_dw++] = value; /* value */ | ||
876 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | ||
877 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | ||
878 | ib->ptr[ib->length_dw++] = 0; | ||
879 | ib->ptr[ib->length_dw++] = ndw; /* number of entries */ | ||
880 | |||
881 | pe += ndw * 8; | ||
882 | addr += ndw * incr; | ||
883 | count -= ndw; | ||
884 | } | ||
885 | } | ||
886 | |||
887 | /** | ||
888 | * cik_sdma_vm_pad_ib - pad the IB to the required number of dw | ||
889 | * | ||
890 | * @ib: indirect buffer to fill with padding | ||
891 | * | ||
892 | */ | ||
893 | void cik_sdma_vm_pad_ib(struct radeon_ib *ib) | ||
894 | { | ||
840 | while (ib->length_dw & 0x7) | 895 | while (ib->length_dw & 0x7) |
841 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); | 896 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); |
842 | } | 897 | } |
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 66325ef39694..8a3e6221cece 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c | |||
@@ -307,7 +307,43 @@ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
307 | } | 307 | } |
308 | 308 | ||
309 | /** | 309 | /** |
310 | * cayman_dma_vm_set_page - update the page tables using the DMA | 310 | * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART |
311 | * | ||
312 | * @rdev: radeon_device pointer | ||
313 | * @ib: indirect buffer to fill with commands | ||
314 | * @pe: addr of the page entry | ||
315 | * @src: src addr where to copy from | ||
316 | * @count: number of page entries to update | ||
317 | * | ||
318 | * Update PTEs by copying them from the GART using the DMA (cayman/TN). | ||
319 | */ | ||
320 | void cayman_dma_vm_copy_pages(struct radeon_device *rdev, | ||
321 | struct radeon_ib *ib, | ||
322 | uint64_t pe, uint64_t src, | ||
323 | unsigned count) | ||
324 | { | ||
325 | unsigned ndw; | ||
326 | |||
327 | while (count) { | ||
328 | ndw = count * 2; | ||
329 | if (ndw > 0xFFFFE) | ||
330 | ndw = 0xFFFFE; | ||
331 | |||
332 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, | ||
333 | 0, 0, ndw); | ||
334 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | ||
335 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | ||
336 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | ||
337 | ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; | ||
338 | |||
339 | pe += ndw * 4; | ||
340 | src += ndw * 4; | ||
341 | count -= ndw / 2; | ||
342 | } | ||
343 | } | ||
344 | |||
345 | /** | ||
346 | * cayman_dma_vm_write_pages - update PTEs by writing them manually | ||
311 | * | 347 | * |
312 | * @rdev: radeon_device pointer | 348 | * @rdev: radeon_device pointer |
313 | * @ib: indirect buffer to fill with commands | 349 | * @ib: indirect buffer to fill with commands |
@@ -315,90 +351,103 @@ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
315 | * @addr: dst addr to write into pe | 351 | * @addr: dst addr to write into pe |
316 | * @count: number of page entries to update | 352 | * @count: number of page entries to update |
317 | * @incr: increase next addr by incr bytes | 353 | * @incr: increase next addr by incr bytes |
318 | * @flags: hw access flags | 354 | * @flags: hw access flags |
319 | * | 355 | * |
320 | * Update the page tables using the DMA (cayman/TN). | 356 | * Update PTEs by writing them manually using the DMA (cayman/TN). |
321 | */ | 357 | */ |
322 | void cayman_dma_vm_set_page(struct radeon_device *rdev, | 358 | void cayman_dma_vm_write_pages(struct radeon_device *rdev, |
323 | struct radeon_ib *ib, | 359 | struct radeon_ib *ib, |
324 | uint64_t pe, | 360 | uint64_t pe, |
325 | uint64_t addr, unsigned count, | 361 | uint64_t addr, unsigned count, |
326 | uint32_t incr, uint32_t flags) | 362 | uint32_t incr, uint32_t flags) |
327 | { | 363 | { |
328 | uint64_t value; | 364 | uint64_t value; |
329 | unsigned ndw; | 365 | unsigned ndw; |
330 | 366 | ||
331 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); | 367 | while (count) { |
332 | 368 | ndw = count * 2; | |
333 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { | 369 | if (ndw > 0xFFFFE) |
334 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; | 370 | ndw = 0xFFFFE; |
335 | while (count) { | 371 | |
336 | ndw = count * 2; | 372 | /* for non-physically contiguous pages (system) */ |
337 | if (ndw > 0xFFFFE) | 373 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, |
338 | ndw = 0xFFFFE; | 374 | 0, 0, ndw); |
339 | 375 | ib->ptr[ib->length_dw++] = pe; | |
340 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, | 376 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; |
341 | 0, 0, ndw); | 377 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
342 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | 378 | if (flags & R600_PTE_SYSTEM) { |
343 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | 379 | value = radeon_vm_map_gart(rdev, addr); |
344 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | 380 | value &= 0xFFFFFFFFFFFFF000ULL; |
345 | ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; | 381 | } else if (flags & R600_PTE_VALID) { |
346 | |||
347 | pe += ndw * 4; | ||
348 | src += ndw * 4; | ||
349 | count -= ndw / 2; | ||
350 | } | ||
351 | |||
352 | } else if ((flags & R600_PTE_SYSTEM) || (count == 1)) { | ||
353 | while (count) { | ||
354 | ndw = count * 2; | ||
355 | if (ndw > 0xFFFFE) | ||
356 | ndw = 0xFFFFE; | ||
357 | |||
358 | /* for non-physically contiguous pages (system) */ | ||
359 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); | ||
360 | ib->ptr[ib->length_dw++] = pe; | ||
361 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | ||
362 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | ||
363 | if (flags & R600_PTE_SYSTEM) { | ||
364 | value = radeon_vm_map_gart(rdev, addr); | ||
365 | value &= 0xFFFFFFFFFFFFF000ULL; | ||
366 | } else if (flags & R600_PTE_VALID) { | ||
367 | value = addr; | ||
368 | } else { | ||
369 | value = 0; | ||
370 | } | ||
371 | addr += incr; | ||
372 | value |= flags; | ||
373 | ib->ptr[ib->length_dw++] = value; | ||
374 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | ||
375 | } | ||
376 | } | ||
377 | } else { | ||
378 | while (count) { | ||
379 | ndw = count * 2; | ||
380 | if (ndw > 0xFFFFE) | ||
381 | ndw = 0xFFFFE; | ||
382 | |||
383 | if (flags & R600_PTE_VALID) | ||
384 | value = addr; | 382 | value = addr; |
385 | else | 383 | } else { |
386 | value = 0; | 384 | value = 0; |
387 | /* for physically contiguous pages (vram) */ | 385 | } |
388 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); | 386 | addr += incr; |
389 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | 387 | value |= flags; |
390 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | 388 | ib->ptr[ib->length_dw++] = value; |
391 | ib->ptr[ib->length_dw++] = flags; /* mask */ | ||
392 | ib->ptr[ib->length_dw++] = 0; | ||
393 | ib->ptr[ib->length_dw++] = value; /* value */ | ||
394 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | 389 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
395 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | ||
396 | ib->ptr[ib->length_dw++] = 0; | ||
397 | pe += ndw * 4; | ||
398 | addr += (ndw / 2) * incr; | ||
399 | count -= ndw / 2; | ||
400 | } | 390 | } |
401 | } | 391 | } |
392 | } | ||
393 | |||
394 | /** | ||
395 | * cayman_dma_vm_set_pages - update the page tables using the DMA | ||
396 | * | ||
397 | * @rdev: radeon_device pointer | ||
398 | * @ib: indirect buffer to fill with commands | ||
399 | * @pe: addr of the page entry | ||
400 | * @addr: dst addr to write into pe | ||
401 | * @count: number of page entries to update | ||
402 | * @incr: increase next addr by incr bytes | ||
403 | * @flags: hw access flags | ||
404 | * | ||
405 | * Update the page tables using the DMA (cayman/TN). | ||
406 | */ | ||
407 | void cayman_dma_vm_set_pages(struct radeon_device *rdev, | ||
408 | struct radeon_ib *ib, | ||
409 | uint64_t pe, | ||
410 | uint64_t addr, unsigned count, | ||
411 | uint32_t incr, uint32_t flags) | ||
412 | { | ||
413 | uint64_t value; | ||
414 | unsigned ndw; | ||
415 | |||
416 | while (count) { | ||
417 | ndw = count * 2; | ||
418 | if (ndw > 0xFFFFE) | ||
419 | ndw = 0xFFFFE; | ||
420 | |||
421 | if (flags & R600_PTE_VALID) | ||
422 | value = addr; | ||
423 | else | ||
424 | value = 0; | ||
425 | |||
426 | /* for physically contiguous pages (vram) */ | ||
427 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); | ||
428 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | ||
429 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | ||
430 | ib->ptr[ib->length_dw++] = flags; /* mask */ | ||
431 | ib->ptr[ib->length_dw++] = 0; | ||
432 | ib->ptr[ib->length_dw++] = value; /* value */ | ||
433 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | ||
434 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | ||
435 | ib->ptr[ib->length_dw++] = 0; | ||
436 | |||
437 | pe += ndw * 4; | ||
438 | addr += (ndw / 2) * incr; | ||
439 | count -= ndw / 2; | ||
440 | } | ||
441 | } | ||
442 | |||
443 | /** | ||
444 | * cayman_dma_vm_pad_ib - pad the IB to the required number of dw | ||
445 | * | ||
446 | * @ib: indirect buffer to fill with padding | ||
447 | * | ||
448 | */ | ||
449 | void cayman_dma_vm_pad_ib(struct radeon_ib *ib) | ||
450 | { | ||
402 | while (ib->length_dw & 0x7) | 451 | while (ib->length_dw & 0x7) |
403 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); | 452 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); |
404 | } | 453 | } |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 56fc7d2da149..142cad63c3fa 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -1797,11 +1797,21 @@ struct radeon_asic { | |||
1797 | struct { | 1797 | struct { |
1798 | int (*init)(struct radeon_device *rdev); | 1798 | int (*init)(struct radeon_device *rdev); |
1799 | void (*fini)(struct radeon_device *rdev); | 1799 | void (*fini)(struct radeon_device *rdev); |
1800 | void (*set_page)(struct radeon_device *rdev, | 1800 | void (*copy_pages)(struct radeon_device *rdev, |
1801 | struct radeon_ib *ib, | 1801 | struct radeon_ib *ib, |
1802 | uint64_t pe, | 1802 | uint64_t pe, uint64_t src, |
1803 | uint64_t addr, unsigned count, | 1803 | unsigned count); |
1804 | uint32_t incr, uint32_t flags); | 1804 | void (*write_pages)(struct radeon_device *rdev, |
1805 | struct radeon_ib *ib, | ||
1806 | uint64_t pe, | ||
1807 | uint64_t addr, unsigned count, | ||
1808 | uint32_t incr, uint32_t flags); | ||
1809 | void (*set_pages)(struct radeon_device *rdev, | ||
1810 | struct radeon_ib *ib, | ||
1811 | uint64_t pe, | ||
1812 | uint64_t addr, unsigned count, | ||
1813 | uint32_t incr, uint32_t flags); | ||
1814 | void (*pad_ib)(struct radeon_ib *ib); | ||
1805 | } vm; | 1815 | } vm; |
1806 | /* ring specific callbacks */ | 1816 | /* ring specific callbacks */ |
1807 | struct radeon_asic_ring *ring[RADEON_NUM_RINGS]; | 1817 | struct radeon_asic_ring *ring[RADEON_NUM_RINGS]; |
@@ -2761,7 +2771,10 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); | |||
2761 | #define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f)) | 2771 | #define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f)) |
2762 | #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) | 2772 | #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) |
2763 | #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) | 2773 | #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) |
2764 | #define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags))) | 2774 | #define radeon_asic_vm_copy_pages(rdev, ib, pe, src, count) ((rdev)->asic->vm.copy_pages((rdev), (ib), (pe), (src), (count))) |
2775 | #define radeon_asic_vm_write_pages(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.write_pages((rdev), (ib), (pe), (addr), (count), (incr), (flags))) | ||
2776 | #define radeon_asic_vm_set_pages(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_pages((rdev), (ib), (pe), (addr), (count), (incr), (flags))) | ||
2777 | #define radeon_asic_vm_pad_ib(rdev, ib) ((rdev)->asic->vm.pad_ib((ib))) | ||
2765 | #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_start((rdev), (cp)) | 2778 | #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_start((rdev), (cp)) |
2766 | #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_test((rdev), (cp)) | 2779 | #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_test((rdev), (cp)) |
2767 | #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ib_test((rdev), (cp)) | 2780 | #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ib_test((rdev), (cp)) |
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index c49a01f92b4d..eeeeabe09758 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c | |||
@@ -1613,7 +1613,10 @@ static struct radeon_asic cayman_asic = { | |||
1613 | .vm = { | 1613 | .vm = { |
1614 | .init = &cayman_vm_init, | 1614 | .init = &cayman_vm_init, |
1615 | .fini = &cayman_vm_fini, | 1615 | .fini = &cayman_vm_fini, |
1616 | .set_page = &cayman_dma_vm_set_page, | 1616 | .copy_pages = &cayman_dma_vm_copy_pages, |
1617 | .write_pages = &cayman_dma_vm_write_pages, | ||
1618 | .set_pages = &cayman_dma_vm_set_pages, | ||
1619 | .pad_ib = &cayman_dma_vm_pad_ib, | ||
1617 | }, | 1620 | }, |
1618 | .ring = { | 1621 | .ring = { |
1619 | [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, | 1622 | [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, |
@@ -1713,7 +1716,10 @@ static struct radeon_asic trinity_asic = { | |||
1713 | .vm = { | 1716 | .vm = { |
1714 | .init = &cayman_vm_init, | 1717 | .init = &cayman_vm_init, |
1715 | .fini = &cayman_vm_fini, | 1718 | .fini = &cayman_vm_fini, |
1716 | .set_page = &cayman_dma_vm_set_page, | 1719 | .copy_pages = &cayman_dma_vm_copy_pages, |
1720 | .write_pages = &cayman_dma_vm_write_pages, | ||
1721 | .set_pages = &cayman_dma_vm_set_pages, | ||
1722 | .pad_ib = &cayman_dma_vm_pad_ib, | ||
1717 | }, | 1723 | }, |
1718 | .ring = { | 1724 | .ring = { |
1719 | [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, | 1725 | [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, |
@@ -1843,7 +1849,10 @@ static struct radeon_asic si_asic = { | |||
1843 | .vm = { | 1849 | .vm = { |
1844 | .init = &si_vm_init, | 1850 | .init = &si_vm_init, |
1845 | .fini = &si_vm_fini, | 1851 | .fini = &si_vm_fini, |
1846 | .set_page = &si_dma_vm_set_page, | 1852 | .copy_pages = &si_dma_vm_copy_pages, |
1853 | .write_pages = &si_dma_vm_write_pages, | ||
1854 | .set_pages = &si_dma_vm_set_pages, | ||
1855 | .pad_ib = &cayman_dma_vm_pad_ib, | ||
1847 | }, | 1856 | }, |
1848 | .ring = { | 1857 | .ring = { |
1849 | [RADEON_RING_TYPE_GFX_INDEX] = &si_gfx_ring, | 1858 | [RADEON_RING_TYPE_GFX_INDEX] = &si_gfx_ring, |
@@ -2001,7 +2010,10 @@ static struct radeon_asic ci_asic = { | |||
2001 | .vm = { | 2010 | .vm = { |
2002 | .init = &cik_vm_init, | 2011 | .init = &cik_vm_init, |
2003 | .fini = &cik_vm_fini, | 2012 | .fini = &cik_vm_fini, |
2004 | .set_page = &cik_sdma_vm_set_page, | 2013 | .copy_pages = &cik_sdma_vm_copy_pages, |
2014 | .write_pages = &cik_sdma_vm_write_pages, | ||
2015 | .set_pages = &cik_sdma_vm_set_pages, | ||
2016 | .pad_ib = &cik_sdma_vm_pad_ib, | ||
2005 | }, | 2017 | }, |
2006 | .ring = { | 2018 | .ring = { |
2007 | [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, | 2019 | [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, |
@@ -2105,7 +2117,10 @@ static struct radeon_asic kv_asic = { | |||
2105 | .vm = { | 2117 | .vm = { |
2106 | .init = &cik_vm_init, | 2118 | .init = &cik_vm_init, |
2107 | .fini = &cik_vm_fini, | 2119 | .fini = &cik_vm_fini, |
2108 | .set_page = &cik_sdma_vm_set_page, | 2120 | .copy_pages = &cik_sdma_vm_copy_pages, |
2121 | .write_pages = &cik_sdma_vm_write_pages, | ||
2122 | .set_pages = &cik_sdma_vm_set_pages, | ||
2123 | .pad_ib = &cik_sdma_vm_pad_ib, | ||
2109 | }, | 2124 | }, |
2110 | .ring = { | 2125 | .ring = { |
2111 | [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, | 2126 | [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, |
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 3cf6be6666fc..275a5dc01780 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h | |||
@@ -607,11 +607,22 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, | |||
607 | struct radeon_ib *ib); | 607 | struct radeon_ib *ib); |
608 | bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); | 608 | bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); |
609 | bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); | 609 | bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); |
610 | void cayman_dma_vm_set_page(struct radeon_device *rdev, | 610 | |
611 | struct radeon_ib *ib, | 611 | void cayman_dma_vm_copy_pages(struct radeon_device *rdev, |
612 | uint64_t pe, | 612 | struct radeon_ib *ib, |
613 | uint64_t addr, unsigned count, | 613 | uint64_t pe, uint64_t src, |
614 | uint32_t incr, uint32_t flags); | 614 | unsigned count); |
615 | void cayman_dma_vm_write_pages(struct radeon_device *rdev, | ||
616 | struct radeon_ib *ib, | ||
617 | uint64_t pe, | ||
618 | uint64_t addr, unsigned count, | ||
619 | uint32_t incr, uint32_t flags); | ||
620 | void cayman_dma_vm_set_pages(struct radeon_device *rdev, | ||
621 | struct radeon_ib *ib, | ||
622 | uint64_t pe, | ||
623 | uint64_t addr, unsigned count, | ||
624 | uint32_t incr, uint32_t flags); | ||
625 | void cayman_dma_vm_pad_ib(struct radeon_ib *ib); | ||
615 | 626 | ||
616 | void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); | 627 | void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); |
617 | 628 | ||
@@ -694,11 +705,22 @@ int si_copy_dma(struct radeon_device *rdev, | |||
694 | uint64_t src_offset, uint64_t dst_offset, | 705 | uint64_t src_offset, uint64_t dst_offset, |
695 | unsigned num_gpu_pages, | 706 | unsigned num_gpu_pages, |
696 | struct radeon_fence **fence); | 707 | struct radeon_fence **fence); |
697 | void si_dma_vm_set_page(struct radeon_device *rdev, | 708 | |
698 | struct radeon_ib *ib, | 709 | void si_dma_vm_copy_pages(struct radeon_device *rdev, |
699 | uint64_t pe, | 710 | struct radeon_ib *ib, |
700 | uint64_t addr, unsigned count, | 711 | uint64_t pe, uint64_t src, |
701 | uint32_t incr, uint32_t flags); | 712 | unsigned count); |
713 | void si_dma_vm_write_pages(struct radeon_device *rdev, | ||
714 | struct radeon_ib *ib, | ||
715 | uint64_t pe, | ||
716 | uint64_t addr, unsigned count, | ||
717 | uint32_t incr, uint32_t flags); | ||
718 | void si_dma_vm_set_pages(struct radeon_device *rdev, | ||
719 | struct radeon_ib *ib, | ||
720 | uint64_t pe, | ||
721 | uint64_t addr, unsigned count, | ||
722 | uint32_t incr, uint32_t flags); | ||
723 | |||
702 | void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); | 724 | void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); |
703 | u32 si_get_xclk(struct radeon_device *rdev); | 725 | u32 si_get_xclk(struct radeon_device *rdev); |
704 | uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); | 726 | uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); |
@@ -772,11 +794,23 @@ int cik_irq_process(struct radeon_device *rdev); | |||
772 | int cik_vm_init(struct radeon_device *rdev); | 794 | int cik_vm_init(struct radeon_device *rdev); |
773 | void cik_vm_fini(struct radeon_device *rdev); | 795 | void cik_vm_fini(struct radeon_device *rdev); |
774 | void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); | 796 | void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); |
775 | void cik_sdma_vm_set_page(struct radeon_device *rdev, | 797 | |
776 | struct radeon_ib *ib, | 798 | void cik_sdma_vm_copy_pages(struct radeon_device *rdev, |
777 | uint64_t pe, | 799 | struct radeon_ib *ib, |
778 | uint64_t addr, unsigned count, | 800 | uint64_t pe, uint64_t src, |
779 | uint32_t incr, uint32_t flags); | 801 | unsigned count); |
802 | void cik_sdma_vm_write_pages(struct radeon_device *rdev, | ||
803 | struct radeon_ib *ib, | ||
804 | uint64_t pe, | ||
805 | uint64_t addr, unsigned count, | ||
806 | uint32_t incr, uint32_t flags); | ||
807 | void cik_sdma_vm_set_pages(struct radeon_device *rdev, | ||
808 | struct radeon_ib *ib, | ||
809 | uint64_t pe, | ||
810 | uint64_t addr, unsigned count, | ||
811 | uint32_t incr, uint32_t flags); | ||
812 | void cik_sdma_vm_pad_ib(struct radeon_ib *ib); | ||
813 | |||
780 | void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); | 814 | void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); |
781 | int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); | 815 | int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); |
782 | u32 cik_gfx_get_rptr(struct radeon_device *rdev, | 816 | u32 cik_gfx_get_rptr(struct radeon_device *rdev, |
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index d520ab71b748..e97588162030 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c | |||
@@ -341,6 +341,42 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, | |||
341 | } | 341 | } |
342 | 342 | ||
343 | /** | 343 | /** |
344 | * radeon_vm_set_pages - helper to call the right asic function | ||
345 | * | ||
346 | * @rdev: radeon_device pointer | ||
347 | * @ib: indirect buffer to fill with commands | ||
348 | * @pe: addr of the page entry | ||
349 | * @addr: dst addr to write into pe | ||
350 | * @count: number of page entries to update | ||
351 | * @incr: increase next addr by incr bytes | ||
352 | * @flags: hw access flags | ||
353 | * | ||
354 | * Traces the parameters and calls the right asic functions | ||
355 | * to setup the page table using the DMA. | ||
356 | */ | ||
357 | static void radeon_vm_set_pages(struct radeon_device *rdev, | ||
358 | struct radeon_ib *ib, | ||
359 | uint64_t pe, | ||
360 | uint64_t addr, unsigned count, | ||
361 | uint32_t incr, uint32_t flags) | ||
362 | { | ||
363 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); | ||
364 | |||
365 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { | ||
366 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; | ||
367 | radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); | ||
368 | |||
369 | } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { | ||
370 | radeon_asic_vm_write_pages(rdev, ib, pe, addr, | ||
371 | count, incr, flags); | ||
372 | |||
373 | } else { | ||
374 | radeon_asic_vm_set_pages(rdev, ib, pe, addr, | ||
375 | count, incr, flags); | ||
376 | } | ||
377 | } | ||
378 | |||
379 | /** | ||
344 | * radeon_vm_clear_bo - initially clear the page dir/table | 380 | * radeon_vm_clear_bo - initially clear the page dir/table |
345 | * | 381 | * |
346 | * @rdev: radeon_device pointer | 382 | * @rdev: radeon_device pointer |
@@ -381,7 +417,8 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, | |||
381 | 417 | ||
382 | ib.length_dw = 0; | 418 | ib.length_dw = 0; |
383 | 419 | ||
384 | radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); | 420 | radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); |
421 | radeon_asic_vm_pad_ib(rdev, &ib); | ||
385 | 422 | ||
386 | r = radeon_ib_schedule(rdev, &ib, NULL); | 423 | r = radeon_ib_schedule(rdev, &ib, NULL); |
387 | if (r) | 424 | if (r) |
@@ -634,9 +671,9 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, | |||
634 | ((last_pt + incr * count) != pt)) { | 671 | ((last_pt + incr * count) != pt)) { |
635 | 672 | ||
636 | if (count) { | 673 | if (count) { |
637 | radeon_asic_vm_set_page(rdev, &ib, last_pde, | 674 | radeon_vm_set_pages(rdev, &ib, last_pde, |
638 | last_pt, count, incr, | 675 | last_pt, count, incr, |
639 | R600_PTE_VALID); | 676 | R600_PTE_VALID); |
640 | } | 677 | } |
641 | 678 | ||
642 | count = 1; | 679 | count = 1; |
@@ -648,10 +685,11 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, | |||
648 | } | 685 | } |
649 | 686 | ||
650 | if (count) | 687 | if (count) |
651 | radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, | 688 | radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, |
652 | incr, R600_PTE_VALID); | 689 | incr, R600_PTE_VALID); |
653 | 690 | ||
654 | if (ib.length_dw != 0) { | 691 | if (ib.length_dw != 0) { |
692 | radeon_asic_vm_pad_ib(rdev, &ib); | ||
655 | radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); | 693 | radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); |
656 | radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); | 694 | radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); |
657 | r = radeon_ib_schedule(rdev, &ib, NULL); | 695 | r = radeon_ib_schedule(rdev, &ib, NULL); |
@@ -719,30 +757,30 @@ static void radeon_vm_frag_ptes(struct radeon_device *rdev, | |||
719 | (frag_start >= frag_end)) { | 757 | (frag_start >= frag_end)) { |
720 | 758 | ||
721 | count = (pe_end - pe_start) / 8; | 759 | count = (pe_end - pe_start) / 8; |
722 | radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, | 760 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
723 | RADEON_GPU_PAGE_SIZE, flags); | 761 | RADEON_GPU_PAGE_SIZE, flags); |
724 | return; | 762 | return; |
725 | } | 763 | } |
726 | 764 | ||
727 | /* handle the 4K area at the beginning */ | 765 | /* handle the 4K area at the beginning */ |
728 | if (pe_start != frag_start) { | 766 | if (pe_start != frag_start) { |
729 | count = (frag_start - pe_start) / 8; | 767 | count = (frag_start - pe_start) / 8; |
730 | radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, | 768 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
731 | RADEON_GPU_PAGE_SIZE, flags); | 769 | RADEON_GPU_PAGE_SIZE, flags); |
732 | addr += RADEON_GPU_PAGE_SIZE * count; | 770 | addr += RADEON_GPU_PAGE_SIZE * count; |
733 | } | 771 | } |
734 | 772 | ||
735 | /* handle the area in the middle */ | 773 | /* handle the area in the middle */ |
736 | count = (frag_end - frag_start) / 8; | 774 | count = (frag_end - frag_start) / 8; |
737 | radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count, | 775 | radeon_vm_set_pages(rdev, ib, frag_start, addr, count, |
738 | RADEON_GPU_PAGE_SIZE, flags | frag_flags); | 776 | RADEON_GPU_PAGE_SIZE, flags | frag_flags); |
739 | 777 | ||
740 | /* handle the 4K area at the end */ | 778 | /* handle the 4K area at the end */ |
741 | if (frag_end != pe_end) { | 779 | if (frag_end != pe_end) { |
742 | addr += RADEON_GPU_PAGE_SIZE * count; | 780 | addr += RADEON_GPU_PAGE_SIZE * count; |
743 | count = (pe_end - frag_end) / 8; | 781 | count = (pe_end - frag_end) / 8; |
744 | radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count, | 782 | radeon_vm_set_pages(rdev, ib, frag_end, addr, count, |
745 | RADEON_GPU_PAGE_SIZE, flags); | 783 | RADEON_GPU_PAGE_SIZE, flags); |
746 | } | 784 | } |
747 | } | 785 | } |
748 | 786 | ||
@@ -900,6 +938,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, | |||
900 | bo_va->it.last + 1, addr, | 938 | bo_va->it.last + 1, addr, |
901 | radeon_vm_page_flags(bo_va->flags)); | 939 | radeon_vm_page_flags(bo_va->flags)); |
902 | 940 | ||
941 | radeon_asic_vm_pad_ib(rdev, &ib); | ||
903 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); | 942 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); |
904 | r = radeon_ib_schedule(rdev, &ib, NULL); | 943 | r = radeon_ib_schedule(rdev, &ib, NULL); |
905 | if (r) { | 944 | if (r) { |
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index a26e842cf391..716505129450 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c | |||
@@ -56,7 +56,41 @@ bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
56 | } | 56 | } |
57 | 57 | ||
58 | /** | 58 | /** |
59 | * si_dma_vm_set_page - update the page tables using the DMA | 59 | * si_dma_vm_copy_pages - update PTEs by copying them from the GART |
60 | * | ||
61 | * @rdev: radeon_device pointer | ||
62 | * @ib: indirect buffer to fill with commands | ||
63 | * @pe: addr of the page entry | ||
64 | * @src: src addr where to copy from | ||
65 | * @count: number of page entries to update | ||
66 | * | ||
67 | * Update PTEs by copying them from the GART using the DMA (SI). | ||
68 | */ | ||
69 | void si_dma_vm_copy_pages(struct radeon_device *rdev, | ||
70 | struct radeon_ib *ib, | ||
71 | uint64_t pe, uint64_t src, | ||
72 | unsigned count) | ||
73 | { | ||
74 | while (count) { | ||
75 | unsigned bytes = count * 8; | ||
76 | if (bytes > 0xFFFF8) | ||
77 | bytes = 0xFFFF8; | ||
78 | |||
79 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, | ||
80 | 1, 0, 0, bytes); | ||
81 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | ||
82 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | ||
83 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | ||
84 | ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; | ||
85 | |||
86 | pe += bytes; | ||
87 | src += bytes; | ||
88 | count -= bytes / 8; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | /** | ||
93 | * si_dma_vm_write_pages - update PTEs by writing them manually | ||
60 | * | 94 | * |
61 | * @rdev: radeon_device pointer | 95 | * @rdev: radeon_device pointer |
62 | * @ib: indirect buffer to fill with commands | 96 | * @ib: indirect buffer to fill with commands |
@@ -66,83 +100,89 @@ bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
66 | * @incr: increase next addr by incr bytes | 100 | * @incr: increase next addr by incr bytes |
67 | * @flags: access flags | 101 | * @flags: access flags |
68 | * | 102 | * |
69 | * Update the page tables using the DMA (SI). | 103 | * Update PTEs by writing them manually using the DMA (SI). |
70 | */ | 104 | */ |
71 | void si_dma_vm_set_page(struct radeon_device *rdev, | 105 | void si_dma_vm_write_pages(struct radeon_device *rdev, |
72 | struct radeon_ib *ib, | 106 | struct radeon_ib *ib, |
73 | uint64_t pe, | 107 | uint64_t pe, |
74 | uint64_t addr, unsigned count, | 108 | uint64_t addr, unsigned count, |
75 | uint32_t incr, uint32_t flags) | 109 | uint32_t incr, uint32_t flags) |
76 | { | 110 | { |
77 | uint64_t value; | 111 | uint64_t value; |
78 | unsigned ndw; | 112 | unsigned ndw; |
79 | 113 | ||
80 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); | 114 | while (count) { |
81 | 115 | ndw = count * 2; | |
82 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { | 116 | if (ndw > 0xFFFFE) |
83 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; | 117 | ndw = 0xFFFFE; |
84 | while (count) { | 118 | |
85 | unsigned bytes = count * 8; | 119 | /* for non-physically contiguous pages (system) */ |
86 | if (bytes > 0xFFFF8) | 120 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); |
87 | bytes = 0xFFFF8; | 121 | ib->ptr[ib->length_dw++] = pe; |
88 | 122 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | |
89 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, | 123 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
90 | 1, 0, 0, bytes); | 124 | if (flags & R600_PTE_SYSTEM) { |
91 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | ||
92 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | ||
93 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | ||
94 | ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; | ||
95 | |||
96 | pe += bytes; | ||
97 | src += bytes; | ||
98 | count -= bytes / 8; | ||
99 | } | ||
100 | } else if (flags & R600_PTE_SYSTEM) { | ||
101 | while (count) { | ||
102 | ndw = count * 2; | ||
103 | if (ndw > 0xFFFFE) | ||
104 | ndw = 0xFFFFE; | ||
105 | |||
106 | /* for non-physically contiguous pages (system) */ | ||
107 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); | ||
108 | ib->ptr[ib->length_dw++] = pe; | ||
109 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | ||
110 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | ||
111 | value = radeon_vm_map_gart(rdev, addr); | 125 | value = radeon_vm_map_gart(rdev, addr); |
112 | value &= 0xFFFFFFFFFFFFF000ULL; | 126 | value &= 0xFFFFFFFFFFFFF000ULL; |
113 | addr += incr; | 127 | } else if (flags & R600_PTE_VALID) { |
114 | value |= flags; | ||
115 | ib->ptr[ib->length_dw++] = value; | ||
116 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | ||
117 | } | ||
118 | } | ||
119 | } else { | ||
120 | while (count) { | ||
121 | ndw = count * 2; | ||
122 | if (ndw > 0xFFFFE) | ||
123 | ndw = 0xFFFFE; | ||
124 | |||
125 | if (flags & R600_PTE_VALID) | ||
126 | value = addr; | 128 | value = addr; |
127 | else | 129 | } else { |
128 | value = 0; | 130 | value = 0; |
129 | /* for physically contiguous pages (vram) */ | 131 | } |
130 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); | 132 | addr += incr; |
131 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | 133 | value |= flags; |
132 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | 134 | ib->ptr[ib->length_dw++] = value; |
133 | ib->ptr[ib->length_dw++] = flags; /* mask */ | ||
134 | ib->ptr[ib->length_dw++] = 0; | ||
135 | ib->ptr[ib->length_dw++] = value; /* value */ | ||
136 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | 135 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
137 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | ||
138 | ib->ptr[ib->length_dw++] = 0; | ||
139 | pe += ndw * 4; | ||
140 | addr += (ndw / 2) * incr; | ||
141 | count -= ndw / 2; | ||
142 | } | 136 | } |
143 | } | 137 | } |
144 | while (ib->length_dw & 0x7) | 138 | } |
145 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); | 139 | |
140 | /** | ||
141 | * si_dma_vm_set_pages - update the page tables using the DMA | ||
142 | * | ||
143 | * @rdev: radeon_device pointer | ||
144 | * @ib: indirect buffer to fill with commands | ||
145 | * @pe: addr of the page entry | ||
146 | * @addr: dst addr to write into pe | ||
147 | * @count: number of page entries to update | ||
148 | * @incr: increase next addr by incr bytes | ||
149 | * @flags: access flags | ||
150 | * | ||
151 | * Update the page tables using the DMA (SI). | ||
152 | */ | ||
153 | void si_dma_vm_set_pages(struct radeon_device *rdev, | ||
154 | struct radeon_ib *ib, | ||
155 | uint64_t pe, | ||
156 | uint64_t addr, unsigned count, | ||
157 | uint32_t incr, uint32_t flags) | ||
158 | { | ||
159 | uint64_t value; | ||
160 | unsigned ndw; | ||
161 | |||
162 | while (count) { | ||
163 | ndw = count * 2; | ||
164 | if (ndw > 0xFFFFE) | ||
165 | ndw = 0xFFFFE; | ||
166 | |||
167 | if (flags & R600_PTE_VALID) | ||
168 | value = addr; | ||
169 | else | ||
170 | value = 0; | ||
171 | |||
172 | /* for physically contiguous pages (vram) */ | ||
173 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); | ||
174 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | ||
175 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | ||
176 | ib->ptr[ib->length_dw++] = flags; /* mask */ | ||
177 | ib->ptr[ib->length_dw++] = 0; | ||
178 | ib->ptr[ib->length_dw++] = value; /* value */ | ||
179 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | ||
180 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | ||
181 | ib->ptr[ib->length_dw++] = 0; | ||
182 | pe += ndw * 4; | ||
183 | addr += (ndw / 2) * incr; | ||
184 | count -= ndw / 2; | ||
185 | } | ||
146 | } | 186 | } |
147 | 187 | ||
148 | void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) | 188 | void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) |