diff options
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_cs.c | 807 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreend.h | 29 |
3 files changed, 417 insertions, 435 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index a2d478e8692a..a1e12bf2d0f7 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c | |||
@@ -3280,14 +3280,14 @@ void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, | |||
3280 | struct radeon_ring *ring = &rdev->ring[fence->ring]; | 3280 | struct radeon_ring *ring = &rdev->ring[fence->ring]; |
3281 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; | 3281 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; |
3282 | /* write the fence */ | 3282 | /* write the fence */ |
3283 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); | 3283 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0)); |
3284 | radeon_ring_write(ring, addr & 0xfffffffc); | 3284 | radeon_ring_write(ring, addr & 0xfffffffc); |
3285 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); | 3285 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); |
3286 | radeon_ring_write(ring, fence->seq); | 3286 | radeon_ring_write(ring, fence->seq); |
3287 | /* generate an interrupt */ | 3287 | /* generate an interrupt */ |
3288 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); | 3288 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0)); |
3289 | /* flush HDP */ | 3289 | /* flush HDP */ |
3290 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | 3290 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0)); |
3291 | radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); | 3291 | radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); |
3292 | radeon_ring_write(ring, 1); | 3292 | radeon_ring_write(ring, 1); |
3293 | } | 3293 | } |
@@ -3310,7 +3310,7 @@ void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, | |||
3310 | while ((next_rptr & 7) != 5) | 3310 | while ((next_rptr & 7) != 5) |
3311 | next_rptr++; | 3311 | next_rptr++; |
3312 | next_rptr += 3; | 3312 | next_rptr += 3; |
3313 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | 3313 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1)); |
3314 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); | 3314 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); |
3315 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); | 3315 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); |
3316 | radeon_ring_write(ring, next_rptr); | 3316 | radeon_ring_write(ring, next_rptr); |
@@ -3320,8 +3320,8 @@ void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, | |||
3320 | * Pad as necessary with NOPs. | 3320 | * Pad as necessary with NOPs. |
3321 | */ | 3321 | */ |
3322 | while ((ring->wptr & 7) != 5) | 3322 | while ((ring->wptr & 7) != 5) |
3323 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | 3323 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0)); |
3324 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); | 3324 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0)); |
3325 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); | 3325 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); |
3326 | radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); | 3326 | radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); |
3327 | 3327 | ||
@@ -3380,7 +3380,7 @@ int evergreen_copy_dma(struct radeon_device *rdev, | |||
3380 | if (cur_size_in_dw > 0xFFFFF) | 3380 | if (cur_size_in_dw > 0xFFFFF) |
3381 | cur_size_in_dw = 0xFFFFF; | 3381 | cur_size_in_dw = 0xFFFFF; |
3382 | size_in_dw -= cur_size_in_dw; | 3382 | size_in_dw -= cur_size_in_dw; |
3383 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); | 3383 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw)); |
3384 | radeon_ring_write(ring, dst_offset & 0xfffffffc); | 3384 | radeon_ring_write(ring, dst_offset & 0xfffffffc); |
3385 | radeon_ring_write(ring, src_offset & 0xfffffffc); | 3385 | radeon_ring_write(ring, src_offset & 0xfffffffc); |
3386 | radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); | 3386 | radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); |
@@ -3488,7 +3488,7 @@ static int evergreen_startup(struct radeon_device *rdev) | |||
3488 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | 3488 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; |
3489 | r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, | 3489 | r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, |
3490 | DMA_RB_RPTR, DMA_RB_WPTR, | 3490 | DMA_RB_RPTR, DMA_RB_WPTR, |
3491 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | 3491 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0)); |
3492 | if (r) | 3492 | if (r) |
3493 | return r; | 3493 | return r; |
3494 | 3494 | ||
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 4a9760ab8774..d8f5d5fcd303 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c | |||
@@ -2670,16 +2670,6 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) | |||
2670 | return 0; | 2670 | return 0; |
2671 | } | 2671 | } |
2672 | 2672 | ||
2673 | /* | ||
2674 | * DMA | ||
2675 | */ | ||
2676 | |||
2677 | #define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28) | ||
2678 | #define GET_DMA_COUNT(h) ((h) & 0x000fffff) | ||
2679 | #define GET_DMA_T(h) (((h) & 0x00800000) >> 23) | ||
2680 | #define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26) | ||
2681 | #define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20) | ||
2682 | |||
2683 | /** | 2673 | /** |
2684 | * evergreen_dma_cs_parse() - parse the DMA IB | 2674 | * evergreen_dma_cs_parse() - parse the DMA IB |
2685 | * @p: parser structure holding parsing context. | 2675 | * @p: parser structure holding parsing context. |
@@ -2693,9 +2683,9 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) | |||
2693 | { | 2683 | { |
2694 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; | 2684 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; |
2695 | struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc; | 2685 | struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc; |
2696 | u32 header, cmd, count, tiled, new_cmd, misc; | 2686 | u32 header, cmd, count, sub_cmd; |
2697 | volatile u32 *ib = p->ib.ptr; | 2687 | volatile u32 *ib = p->ib.ptr; |
2698 | u32 idx, idx_value; | 2688 | u32 idx; |
2699 | u64 src_offset, dst_offset, dst2_offset; | 2689 | u64 src_offset, dst_offset, dst2_offset; |
2700 | int r; | 2690 | int r; |
2701 | 2691 | ||
@@ -2709,9 +2699,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) | |||
2709 | header = radeon_get_ib_value(p, idx); | 2699 | header = radeon_get_ib_value(p, idx); |
2710 | cmd = GET_DMA_CMD(header); | 2700 | cmd = GET_DMA_CMD(header); |
2711 | count = GET_DMA_COUNT(header); | 2701 | count = GET_DMA_COUNT(header); |
2712 | tiled = GET_DMA_T(header); | 2702 | sub_cmd = GET_DMA_SUB_CMD(header); |
2713 | new_cmd = GET_DMA_NEW(header); | ||
2714 | misc = GET_DMA_MISC(header); | ||
2715 | 2703 | ||
2716 | switch (cmd) { | 2704 | switch (cmd) { |
2717 | case DMA_PACKET_WRITE: | 2705 | case DMA_PACKET_WRITE: |
@@ -2720,19 +2708,27 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) | |||
2720 | DRM_ERROR("bad DMA_PACKET_WRITE\n"); | 2708 | DRM_ERROR("bad DMA_PACKET_WRITE\n"); |
2721 | return -EINVAL; | 2709 | return -EINVAL; |
2722 | } | 2710 | } |
2723 | if (tiled) { | 2711 | switch (sub_cmd) { |
2712 | /* tiled */ | ||
2713 | case 8: | ||
2724 | dst_offset = ib[idx+1]; | 2714 | dst_offset = ib[idx+1]; |
2725 | dst_offset <<= 8; | 2715 | dst_offset <<= 8; |
2726 | 2716 | ||
2727 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | 2717 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); |
2728 | p->idx += count + 7; | 2718 | p->idx += count + 7; |
2729 | } else { | 2719 | break; |
2720 | /* linear */ | ||
2721 | case 0: | ||
2730 | dst_offset = ib[idx+1]; | 2722 | dst_offset = ib[idx+1]; |
2731 | dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; | 2723 | dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; |
2732 | 2724 | ||
2733 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | 2725 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); |
2734 | ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | 2726 | ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; |
2735 | p->idx += count + 3; | 2727 | p->idx += count + 3; |
2728 | break; | ||
2729 | default: | ||
2730 | DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib[idx+0]); | ||
2731 | return -EINVAL; | ||
2736 | } | 2732 | } |
2737 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | 2733 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { |
2738 | dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", | 2734 | dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", |
@@ -2751,338 +2747,330 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) | |||
2751 | DRM_ERROR("bad DMA_PACKET_COPY\n"); | 2747 | DRM_ERROR("bad DMA_PACKET_COPY\n"); |
2752 | return -EINVAL; | 2748 | return -EINVAL; |
2753 | } | 2749 | } |
2754 | if (tiled) { | 2750 | switch (sub_cmd) { |
2755 | idx_value = radeon_get_ib_value(p, idx + 2); | 2751 | /* Copy L2L, DW aligned */ |
2756 | if (new_cmd) { | 2752 | case 0x00: |
2757 | switch (misc) { | 2753 | /* L2L, dw */ |
2758 | case 0: | 2754 | src_offset = ib[idx+2]; |
2759 | /* L2T, frame to fields */ | 2755 | src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; |
2760 | if (idx_value & (1 << 31)) { | 2756 | dst_offset = ib[idx+1]; |
2761 | DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); | 2757 | dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; |
2762 | return -EINVAL; | 2758 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { |
2763 | } | 2759 | dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", |
2764 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | 2760 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); |
2765 | if (r) { | 2761 | return -EINVAL; |
2766 | DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); | 2762 | } |
2767 | return -EINVAL; | 2763 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { |
2768 | } | 2764 | dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", |
2769 | dst_offset = ib[idx+1]; | 2765 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); |
2770 | dst_offset <<= 8; | 2766 | return -EINVAL; |
2771 | dst2_offset = ib[idx+2]; | 2767 | } |
2772 | dst2_offset <<= 8; | 2768 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); |
2773 | src_offset = ib[idx+8]; | 2769 | ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); |
2774 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | 2770 | ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; |
2775 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | 2771 | ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; |
2776 | dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", | 2772 | p->idx += 5; |
2777 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | 2773 | break; |
2778 | return -EINVAL; | 2774 | /* Copy L2T/T2L */ |
2779 | } | 2775 | case 0x08: |
2780 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | 2776 | /* detile bit */ |
2781 | dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", | 2777 | if (ib[idx + 2] & (1 << 31)) { |
2782 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | 2778 | /* tiled src, linear dst */ |
2783 | return -EINVAL; | 2779 | src_offset = ib[idx+1]; |
2784 | } | 2780 | src_offset <<= 8; |
2785 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | 2781 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); |
2786 | dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", | 2782 | |
2787 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | 2783 | dst_offset = radeon_get_ib_value(p, idx + 7); |
2788 | return -EINVAL; | 2784 | dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; |
2789 | } | 2785 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); |
2790 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | 2786 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; |
2791 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
2792 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2793 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2794 | p->idx += 10; | ||
2795 | break; | ||
2796 | case 1: | ||
2797 | /* L2T, T2L partial */ | ||
2798 | if (p->family < CHIP_CAYMAN) { | ||
2799 | DRM_ERROR("L2T, T2L Partial is cayman only !\n"); | ||
2800 | return -EINVAL; | ||
2801 | } | ||
2802 | /* detile bit */ | ||
2803 | if (idx_value & (1 << 31)) { | ||
2804 | /* tiled src, linear dst */ | ||
2805 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
2806 | |||
2807 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2808 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2809 | } else { | ||
2810 | /* linear src, tiled dst */ | ||
2811 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2812 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2813 | |||
2814 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2815 | } | ||
2816 | p->idx += 12; | ||
2817 | break; | ||
2818 | case 3: | ||
2819 | /* L2T, broadcast */ | ||
2820 | if (idx_value & (1 << 31)) { | ||
2821 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
2822 | return -EINVAL; | ||
2823 | } | ||
2824 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
2825 | if (r) { | ||
2826 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
2827 | return -EINVAL; | ||
2828 | } | ||
2829 | dst_offset = ib[idx+1]; | ||
2830 | dst_offset <<= 8; | ||
2831 | dst2_offset = ib[idx+2]; | ||
2832 | dst2_offset <<= 8; | ||
2833 | src_offset = ib[idx+8]; | ||
2834 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | ||
2835 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2836 | dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", | ||
2837 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2838 | return -EINVAL; | ||
2839 | } | ||
2840 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2841 | dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", | ||
2842 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2843 | return -EINVAL; | ||
2844 | } | ||
2845 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
2846 | dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", | ||
2847 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
2848 | return -EINVAL; | ||
2849 | } | ||
2850 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2851 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
2852 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2853 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2854 | p->idx += 10; | ||
2855 | break; | ||
2856 | case 4: | ||
2857 | /* L2T, T2L */ | ||
2858 | /* detile bit */ | ||
2859 | if (idx_value & (1 << 31)) { | ||
2860 | /* tiled src, linear dst */ | ||
2861 | src_offset = ib[idx+1]; | ||
2862 | src_offset <<= 8; | ||
2863 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
2864 | |||
2865 | dst_offset = ib[idx+7]; | ||
2866 | dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
2867 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2868 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2869 | } else { | ||
2870 | /* linear src, tiled dst */ | ||
2871 | src_offset = ib[idx+7]; | ||
2872 | src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
2873 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2874 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2875 | |||
2876 | dst_offset = ib[idx+1]; | ||
2877 | dst_offset <<= 8; | ||
2878 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2879 | } | ||
2880 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2881 | dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", | ||
2882 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2883 | return -EINVAL; | ||
2884 | } | ||
2885 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2886 | dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", | ||
2887 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2888 | return -EINVAL; | ||
2889 | } | ||
2890 | p->idx += 9; | ||
2891 | break; | ||
2892 | case 5: | ||
2893 | /* T2T partial */ | ||
2894 | if (p->family < CHIP_CAYMAN) { | ||
2895 | DRM_ERROR("L2T, T2L Partial is cayman only !\n"); | ||
2896 | return -EINVAL; | ||
2897 | } | ||
2898 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
2899 | ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2900 | p->idx += 13; | ||
2901 | break; | ||
2902 | case 7: | ||
2903 | /* L2T, broadcast */ | ||
2904 | if (idx_value & (1 << 31)) { | ||
2905 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
2906 | return -EINVAL; | ||
2907 | } | ||
2908 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
2909 | if (r) { | ||
2910 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
2911 | return -EINVAL; | ||
2912 | } | ||
2913 | dst_offset = ib[idx+1]; | ||
2914 | dst_offset <<= 8; | ||
2915 | dst2_offset = ib[idx+2]; | ||
2916 | dst2_offset <<= 8; | ||
2917 | src_offset = ib[idx+8]; | ||
2918 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | ||
2919 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2920 | dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", | ||
2921 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2922 | return -EINVAL; | ||
2923 | } | ||
2924 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2925 | dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", | ||
2926 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2927 | return -EINVAL; | ||
2928 | } | ||
2929 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
2930 | dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", | ||
2931 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
2932 | return -EINVAL; | ||
2933 | } | ||
2934 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2935 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
2936 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2937 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2938 | p->idx += 10; | ||
2939 | break; | ||
2940 | default: | ||
2941 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
2942 | return -EINVAL; | ||
2943 | } | ||
2944 | } else { | 2787 | } else { |
2945 | switch (misc) { | 2788 | /* linear src, tiled dst */ |
2946 | case 0: | 2789 | src_offset = ib[idx+7]; |
2947 | /* detile bit */ | 2790 | src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; |
2948 | if (idx_value & (1 << 31)) { | 2791 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); |
2949 | /* tiled src, linear dst */ | 2792 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; |
2950 | src_offset = ib[idx+1]; | 2793 | |
2951 | src_offset <<= 8; | 2794 | dst_offset = ib[idx+1]; |
2952 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | 2795 | dst_offset <<= 8; |
2953 | 2796 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | |
2954 | dst_offset = ib[idx+7]; | ||
2955 | dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
2956 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2957 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2958 | } else { | ||
2959 | /* linear src, tiled dst */ | ||
2960 | src_offset = ib[idx+7]; | ||
2961 | src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
2962 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2963 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2964 | |||
2965 | dst_offset = ib[idx+1]; | ||
2966 | dst_offset <<= 8; | ||
2967 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2968 | } | ||
2969 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2970 | dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", | ||
2971 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2972 | return -EINVAL; | ||
2973 | } | ||
2974 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2975 | dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", | ||
2976 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2977 | return -EINVAL; | ||
2978 | } | ||
2979 | p->idx += 9; | ||
2980 | break; | ||
2981 | default: | ||
2982 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
2983 | return -EINVAL; | ||
2984 | } | ||
2985 | } | 2797 | } |
2986 | } else { | 2798 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { |
2987 | if (new_cmd) { | 2799 | dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", |
2988 | switch (misc) { | 2800 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); |
2989 | case 0: | 2801 | return -EINVAL; |
2990 | /* L2L, byte */ | 2802 | } |
2991 | src_offset = ib[idx+2]; | 2803 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { |
2992 | src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; | 2804 | dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n", |
2993 | dst_offset = ib[idx+1]; | 2805 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); |
2994 | dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; | 2806 | return -EINVAL; |
2995 | if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { | 2807 | } |
2996 | dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", | 2808 | p->idx += 9; |
2997 | src_offset + count, radeon_bo_size(src_reloc->robj)); | 2809 | break; |
2998 | return -EINVAL; | 2810 | /* Copy L2L, byte aligned */ |
2999 | } | 2811 | case 0x40: |
3000 | if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { | 2812 | /* L2L, byte */ |
3001 | dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", | 2813 | src_offset = ib[idx+2]; |
3002 | dst_offset + count, radeon_bo_size(dst_reloc->robj)); | 2814 | src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; |
3003 | return -EINVAL; | 2815 | dst_offset = ib[idx+1]; |
3004 | } | 2816 | dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; |
3005 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); | 2817 | if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { |
3006 | ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); | 2818 | dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", |
3007 | ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | 2819 | src_offset + count, radeon_bo_size(src_reloc->robj)); |
3008 | ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | 2820 | return -EINVAL; |
3009 | p->idx += 5; | 2821 | } |
3010 | break; | 2822 | if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { |
3011 | case 1: | 2823 | dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", |
3012 | /* L2L, partial */ | 2824 | dst_offset + count, radeon_bo_size(dst_reloc->robj)); |
3013 | if (p->family < CHIP_CAYMAN) { | 2825 | return -EINVAL; |
3014 | DRM_ERROR("L2L Partial is cayman only !\n"); | 2826 | } |
3015 | return -EINVAL; | 2827 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); |
3016 | } | 2828 | ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); |
3017 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); | 2829 | ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; |
3018 | ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | 2830 | ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; |
3019 | ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); | 2831 | p->idx += 5; |
3020 | ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | 2832 | break; |
3021 | 2833 | /* Copy L2L, partial */ | |
3022 | p->idx += 9; | 2834 | case 0x41: |
3023 | break; | 2835 | /* L2L, partial */ |
3024 | case 4: | 2836 | if (p->family < CHIP_CAYMAN) { |
3025 | /* L2L, dw, broadcast */ | 2837 | DRM_ERROR("L2L Partial is cayman only !\n"); |
3026 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | 2838 | return -EINVAL; |
3027 | if (r) { | 2839 | } |
3028 | DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); | 2840 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); |
3029 | return -EINVAL; | 2841 | ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; |
3030 | } | 2842 | ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); |
3031 | dst_offset = ib[idx+1]; | 2843 | ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; |
3032 | dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; | 2844 | |
3033 | dst2_offset = ib[idx+2]; | 2845 | p->idx += 9; |
3034 | dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32; | 2846 | break; |
3035 | src_offset = ib[idx+3]; | 2847 | /* Copy L2L, DW aligned, broadcast */ |
3036 | src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; | 2848 | case 0x44: |
3037 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | 2849 | /* L2L, dw, broadcast */ |
3038 | dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", | 2850 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); |
3039 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | 2851 | if (r) { |
3040 | return -EINVAL; | 2852 | DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); |
3041 | } | 2853 | return -EINVAL; |
3042 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | 2854 | } |
3043 | dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", | 2855 | dst_offset = ib[idx+1]; |
3044 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | 2856 | dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; |
3045 | return -EINVAL; | 2857 | dst2_offset = ib[idx+2]; |
3046 | } | 2858 | dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32; |
3047 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | 2859 | src_offset = ib[idx+3]; |
3048 | dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", | 2860 | src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; |
3049 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | 2861 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { |
3050 | return -EINVAL; | 2862 | dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", |
3051 | } | 2863 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); |
3052 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | 2864 | return -EINVAL; |
3053 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); | 2865 | } |
3054 | ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | 2866 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { |
3055 | ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | 2867 | dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", |
3056 | ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; | 2868 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); |
3057 | ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | 2869 | return -EINVAL; |
3058 | p->idx += 7; | 2870 | } |
3059 | break; | 2871 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { |
3060 | default: | 2872 | dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", |
3061 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | 2873 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); |
3062 | return -EINVAL; | 2874 | return -EINVAL; |
3063 | } | 2875 | } |
2876 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2877 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2878 | ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2879 | ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2880 | ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; | ||
2881 | ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2882 | p->idx += 7; | ||
2883 | break; | ||
2884 | /* Copy L2T Frame to Field */ | ||
2885 | case 0x48: | ||
2886 | if (ib[idx + 2] & (1 << 31)) { | ||
2887 | DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); | ||
2888 | return -EINVAL; | ||
2889 | } | ||
2890 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
2891 | if (r) { | ||
2892 | DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); | ||
2893 | return -EINVAL; | ||
2894 | } | ||
2895 | dst_offset = ib[idx+1]; | ||
2896 | dst_offset <<= 8; | ||
2897 | dst2_offset = ib[idx+2]; | ||
2898 | dst2_offset <<= 8; | ||
2899 | src_offset = ib[idx+8]; | ||
2900 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | ||
2901 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2902 | dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", | ||
2903 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2904 | return -EINVAL; | ||
2905 | } | ||
2906 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2907 | dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", | ||
2908 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2909 | return -EINVAL; | ||
2910 | } | ||
2911 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
2912 | dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", | ||
2913 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
2914 | return -EINVAL; | ||
2915 | } | ||
2916 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2917 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
2918 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2919 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2920 | p->idx += 10; | ||
2921 | break; | ||
2922 | /* Copy L2T/T2L, partial */ | ||
2923 | case 0x49: | ||
2924 | /* L2T, T2L partial */ | ||
2925 | if (p->family < CHIP_CAYMAN) { | ||
2926 | DRM_ERROR("L2T, T2L Partial is cayman only !\n"); | ||
2927 | return -EINVAL; | ||
2928 | } | ||
2929 | /* detile bit */ | ||
2930 | if (ib[idx + 2 ] & (1 << 31)) { | ||
2931 | /* tiled src, linear dst */ | ||
2932 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
2933 | |||
2934 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2935 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2936 | } else { | ||
2937 | /* linear src, tiled dst */ | ||
2938 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2939 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2940 | |||
2941 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2942 | } | ||
2943 | p->idx += 12; | ||
2944 | break; | ||
2945 | /* Copy L2T broadcast */ | ||
2946 | case 0x4b: | ||
2947 | /* L2T, broadcast */ | ||
2948 | if (ib[idx + 2] & (1 << 31)) { | ||
2949 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
2950 | return -EINVAL; | ||
2951 | } | ||
2952 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
2953 | if (r) { | ||
2954 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
2955 | return -EINVAL; | ||
2956 | } | ||
2957 | dst_offset = ib[idx+1]; | ||
2958 | dst_offset <<= 8; | ||
2959 | dst2_offset = ib[idx+2]; | ||
2960 | dst2_offset <<= 8; | ||
2961 | src_offset = ib[idx+8]; | ||
2962 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | ||
2963 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2964 | dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", | ||
2965 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2966 | return -EINVAL; | ||
2967 | } | ||
2968 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2969 | dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", | ||
2970 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2971 | return -EINVAL; | ||
2972 | } | ||
2973 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
2974 | dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", | ||
2975 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
2976 | return -EINVAL; | ||
2977 | } | ||
2978 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2979 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
2980 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2981 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2982 | p->idx += 10; | ||
2983 | break; | ||
2984 | /* Copy L2T/T2L (tile units) */ | ||
2985 | case 0x4c: | ||
2986 | /* L2T, T2L */ | ||
2987 | /* detile bit */ | ||
2988 | if (ib[idx + 2] & (1 << 31)) { | ||
2989 | /* tiled src, linear dst */ | ||
2990 | src_offset = ib[idx+1]; | ||
2991 | src_offset <<= 8; | ||
2992 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
2993 | |||
2994 | dst_offset = ib[idx+7]; | ||
2995 | dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
2996 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2997 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
3064 | } else { | 2998 | } else { |
3065 | /* L2L, dw */ | 2999 | /* linear src, tiled dst */ |
3066 | src_offset = ib[idx+2]; | 3000 | src_offset = ib[idx+7]; |
3067 | src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; | 3001 | src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; |
3002 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3003 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3004 | |||
3068 | dst_offset = ib[idx+1]; | 3005 | dst_offset = ib[idx+1]; |
3069 | dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; | 3006 | dst_offset <<= 8; |
3070 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | 3007 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); |
3071 | dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", | ||
3072 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
3073 | return -EINVAL; | ||
3074 | } | ||
3075 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3076 | dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", | ||
3077 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3078 | return -EINVAL; | ||
3079 | } | ||
3080 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3081 | ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3082 | ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
3083 | ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3084 | p->idx += 5; | ||
3085 | } | 3008 | } |
3009 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
3010 | dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", | ||
3011 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
3012 | return -EINVAL; | ||
3013 | } | ||
3014 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3015 | dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", | ||
3016 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3017 | return -EINVAL; | ||
3018 | } | ||
3019 | p->idx += 9; | ||
3020 | break; | ||
3021 | /* Copy T2T, partial (tile units) */ | ||
3022 | case 0x4d: | ||
3023 | /* T2T partial */ | ||
3024 | if (p->family < CHIP_CAYMAN) { | ||
3025 | DRM_ERROR("L2T, T2L Partial is cayman only !\n"); | ||
3026 | return -EINVAL; | ||
3027 | } | ||
3028 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
3029 | ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
3030 | p->idx += 13; | ||
3031 | break; | ||
3032 | /* Copy L2T broadcast (tile units) */ | ||
3033 | case 0x4f: | ||
3034 | /* L2T, broadcast */ | ||
3035 | if (ib[idx + 2] & (1 << 31)) { | ||
3036 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
3037 | return -EINVAL; | ||
3038 | } | ||
3039 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
3040 | if (r) { | ||
3041 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
3042 | return -EINVAL; | ||
3043 | } | ||
3044 | dst_offset = ib[idx+1]; | ||
3045 | dst_offset <<= 8; | ||
3046 | dst2_offset = ib[idx+2]; | ||
3047 | dst2_offset <<= 8; | ||
3048 | src_offset = ib[idx+8]; | ||
3049 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | ||
3050 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
3051 | dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", | ||
3052 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
3053 | return -EINVAL; | ||
3054 | } | ||
3055 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3056 | dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", | ||
3057 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3058 | return -EINVAL; | ||
3059 | } | ||
3060 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
3061 | dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", | ||
3062 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
3063 | return -EINVAL; | ||
3064 | } | ||
3065 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
3066 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
3067 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3068 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3069 | p->idx += 10; | ||
3070 | break; | ||
3071 | default: | ||
3072 | DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib[idx+0]); | ||
3073 | return -EINVAL; | ||
3086 | } | 3074 | } |
3087 | break; | 3075 | break; |
3088 | case DMA_PACKET_CONSTANT_FILL: | 3076 | case DMA_PACKET_CONSTANT_FILL: |
@@ -3435,88 +3423,79 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) | |||
3435 | int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) | 3423 | int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) |
3436 | { | 3424 | { |
3437 | u32 idx = 0; | 3425 | u32 idx = 0; |
3438 | u32 header, cmd, count, tiled, new_cmd, misc; | 3426 | u32 header, cmd, count, sub_cmd; |
3439 | 3427 | ||
3440 | do { | 3428 | do { |
3441 | header = ib->ptr[idx]; | 3429 | header = ib->ptr[idx]; |
3442 | cmd = GET_DMA_CMD(header); | 3430 | cmd = GET_DMA_CMD(header); |
3443 | count = GET_DMA_COUNT(header); | 3431 | count = GET_DMA_COUNT(header); |
3444 | tiled = GET_DMA_T(header); | 3432 | sub_cmd = GET_DMA_SUB_CMD(header); |
3445 | new_cmd = GET_DMA_NEW(header); | ||
3446 | misc = GET_DMA_MISC(header); | ||
3447 | 3433 | ||
3448 | switch (cmd) { | 3434 | switch (cmd) { |
3449 | case DMA_PACKET_WRITE: | 3435 | case DMA_PACKET_WRITE: |
3450 | if (tiled) | 3436 | switch (sub_cmd) { |
3437 | /* tiled */ | ||
3438 | case 8: | ||
3451 | idx += count + 7; | 3439 | idx += count + 7; |
3452 | else | 3440 | break; |
3441 | /* linear */ | ||
3442 | case 0: | ||
3453 | idx += count + 3; | 3443 | idx += count + 3; |
3444 | break; | ||
3445 | default: | ||
3446 | DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]); | ||
3447 | return -EINVAL; | ||
3448 | } | ||
3454 | break; | 3449 | break; |
3455 | case DMA_PACKET_COPY: | 3450 | case DMA_PACKET_COPY: |
3456 | if (tiled) { | 3451 | switch (sub_cmd) { |
3457 | if (new_cmd) { | 3452 | /* Copy L2L, DW aligned */ |
3458 | switch (misc) { | 3453 | case 0x00: |
3459 | case 0: | 3454 | idx += 5; |
3460 | /* L2T, frame to fields */ | 3455 | break; |
3461 | idx += 10; | 3456 | /* Copy L2T/T2L */ |
3462 | break; | 3457 | case 0x08: |
3463 | case 1: | 3458 | idx += 9; |
3464 | /* L2T, T2L partial */ | 3459 | break; |
3465 | idx += 12; | 3460 | /* Copy L2L, byte aligned */ |
3466 | break; | 3461 | case 0x40: |
3467 | case 3: | 3462 | idx += 5; |
3468 | /* L2T, broadcast */ | 3463 | break; |
3469 | idx += 10; | 3464 | /* Copy L2L, partial */ |
3470 | break; | 3465 | case 0x41: |
3471 | case 4: | 3466 | idx += 9; |
3472 | /* L2T, T2L */ | 3467 | break; |
3473 | idx += 9; | 3468 | /* Copy L2L, DW aligned, broadcast */ |
3474 | break; | 3469 | case 0x44: |
3475 | case 5: | 3470 | idx += 7; |
3476 | /* T2T partial */ | 3471 | break; |
3477 | idx += 13; | 3472 | /* Copy L2T Frame to Field */ |
3478 | break; | 3473 | case 0x48: |
3479 | case 7: | 3474 | idx += 10; |
3480 | /* L2T, broadcast */ | 3475 | break; |
3481 | idx += 10; | 3476 | /* Copy L2T/T2L, partial */ |
3482 | break; | 3477 | case 0x49: |
3483 | default: | 3478 | idx += 12; |
3484 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | 3479 | break; |
3485 | return -EINVAL; | 3480 | /* Copy L2T broadcast */ |
3486 | } | 3481 | case 0x4b: |
3487 | } else { | 3482 | idx += 10; |
3488 | switch (misc) { | 3483 | break; |
3489 | case 0: | 3484 | /* Copy L2T/T2L (tile units) */ |
3490 | idx += 9; | 3485 | case 0x4c: |
3491 | break; | 3486 | idx += 9; |
3492 | default: | 3487 | break; |
3493 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | 3488 | /* Copy T2T, partial (tile units) */ |
3494 | return -EINVAL; | 3489 | case 0x4d: |
3495 | } | 3490 | idx += 13; |
3496 | } | 3491 | break; |
3497 | } else { | 3492 | /* Copy L2T broadcast (tile units) */ |
3498 | if (new_cmd) { | 3493 | case 0x4f: |
3499 | switch (misc) { | 3494 | idx += 10; |
3500 | case 0: | 3495 | break; |
3501 | /* L2L, byte */ | 3496 | default: |
3502 | idx += 5; | 3497 | DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]); |
3503 | break; | 3498 | return -EINVAL; |
3504 | case 1: | ||
3505 | /* L2L, partial */ | ||
3506 | idx += 9; | ||
3507 | break; | ||
3508 | case 4: | ||
3509 | /* L2L, dw, broadcast */ | ||
3510 | idx += 7; | ||
3511 | break; | ||
3512 | default: | ||
3513 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
3514 | return -EINVAL; | ||
3515 | } | ||
3516 | } else { | ||
3517 | /* L2L, dw */ | ||
3518 | idx += 5; | ||
3519 | } | ||
3520 | } | 3499 | } |
3521 | break; | 3500 | break; |
3522 | case DMA_PACKET_CONSTANT_FILL: | 3501 | case DMA_PACKET_CONSTANT_FILL: |
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 70388fe4ee59..5ad29f3fc722 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h | |||
@@ -924,20 +924,23 @@ | |||
924 | #define CAYMAN_DMA1_CNTL 0xd82c | 924 | #define CAYMAN_DMA1_CNTL 0xd82c |
925 | 925 | ||
926 | /* async DMA packets */ | 926 | /* async DMA packets */ |
927 | #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ | 927 | #define DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) | \ |
928 | (((t) & 0x1) << 23) | \ | 928 | (((sub_cmd) & 0xFF) << 20) |\ |
929 | (((s) & 0x1) << 22) | \ | 929 | (((n) & 0xFFFFF) << 0)) |
930 | (((n) & 0xFFFFF) << 0)) | 930 | #define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28) |
931 | #define GET_DMA_COUNT(h) ((h) & 0x000fffff) | ||
932 | #define GET_DMA_SUB_CMD(h) (((h) & 0x0ff00000) >> 20) | ||
933 | |||
931 | /* async DMA Packet types */ | 934 | /* async DMA Packet types */ |
932 | #define DMA_PACKET_WRITE 0x2 | 935 | #define DMA_PACKET_WRITE 0x2 |
933 | #define DMA_PACKET_COPY 0x3 | 936 | #define DMA_PACKET_COPY 0x3 |
934 | #define DMA_PACKET_INDIRECT_BUFFER 0x4 | 937 | #define DMA_PACKET_INDIRECT_BUFFER 0x4 |
935 | #define DMA_PACKET_SEMAPHORE 0x5 | 938 | #define DMA_PACKET_SEMAPHORE 0x5 |
936 | #define DMA_PACKET_FENCE 0x6 | 939 | #define DMA_PACKET_FENCE 0x6 |
937 | #define DMA_PACKET_TRAP 0x7 | 940 | #define DMA_PACKET_TRAP 0x7 |
938 | #define DMA_PACKET_SRBM_WRITE 0x9 | 941 | #define DMA_PACKET_SRBM_WRITE 0x9 |
939 | #define DMA_PACKET_CONSTANT_FILL 0xd | 942 | #define DMA_PACKET_CONSTANT_FILL 0xd |
940 | #define DMA_PACKET_NOP 0xf | 943 | #define DMA_PACKET_NOP 0xf |
941 | 944 | ||
942 | /* PCIE link stuff */ | 945 | /* PCIE link stuff */ |
943 | #define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ | 946 | #define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ |