aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c16
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c807
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h29
3 files changed, 417 insertions, 435 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index a2d478e8692a..a1e12bf2d0f7 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3280,14 +3280,14 @@ void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
3280 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3280 struct radeon_ring *ring = &rdev->ring[fence->ring];
3281 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3281 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3282 /* write the fence */ 3282 /* write the fence */
3283 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); 3283 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0));
3284 radeon_ring_write(ring, addr & 0xfffffffc); 3284 radeon_ring_write(ring, addr & 0xfffffffc);
3285 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); 3285 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
3286 radeon_ring_write(ring, fence->seq); 3286 radeon_ring_write(ring, fence->seq);
3287 /* generate an interrupt */ 3287 /* generate an interrupt */
3288 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); 3288 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0));
3289 /* flush HDP */ 3289 /* flush HDP */
3290 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 3290 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0));
3291 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 3291 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
3292 radeon_ring_write(ring, 1); 3292 radeon_ring_write(ring, 1);
3293} 3293}
@@ -3310,7 +3310,7 @@ void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
3310 while ((next_rptr & 7) != 5) 3310 while ((next_rptr & 7) != 5)
3311 next_rptr++; 3311 next_rptr++;
3312 next_rptr += 3; 3312 next_rptr += 3;
3313 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 3313 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1));
3314 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3314 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3315 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 3315 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
3316 radeon_ring_write(ring, next_rptr); 3316 radeon_ring_write(ring, next_rptr);
@@ -3320,8 +3320,8 @@ void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
3320 * Pad as necessary with NOPs. 3320 * Pad as necessary with NOPs.
3321 */ 3321 */
3322 while ((ring->wptr & 7) != 5) 3322 while ((ring->wptr & 7) != 5)
3323 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 3323 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
3324 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); 3324 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0));
3325 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 3325 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
3326 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 3326 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
3327 3327
@@ -3380,7 +3380,7 @@ int evergreen_copy_dma(struct radeon_device *rdev,
3380 if (cur_size_in_dw > 0xFFFFF) 3380 if (cur_size_in_dw > 0xFFFFF)
3381 cur_size_in_dw = 0xFFFFF; 3381 cur_size_in_dw = 0xFFFFF;
3382 size_in_dw -= cur_size_in_dw; 3382 size_in_dw -= cur_size_in_dw;
3383 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); 3383 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw));
3384 radeon_ring_write(ring, dst_offset & 0xfffffffc); 3384 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3385 radeon_ring_write(ring, src_offset & 0xfffffffc); 3385 radeon_ring_write(ring, src_offset & 0xfffffffc);
3386 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); 3386 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
@@ -3488,7 +3488,7 @@ static int evergreen_startup(struct radeon_device *rdev)
3488 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 3488 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3489 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 3489 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
3490 DMA_RB_RPTR, DMA_RB_WPTR, 3490 DMA_RB_RPTR, DMA_RB_WPTR,
3491 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 3491 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
3492 if (r) 3492 if (r)
3493 return r; 3493 return r;
3494 3494
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 4a9760ab8774..d8f5d5fcd303 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2670,16 +2670,6 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
2670 return 0; 2670 return 0;
2671} 2671}
2672 2672
2673/*
2674 * DMA
2675 */
2676
2677#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2678#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2679#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2680#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2681#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2682
2683/** 2673/**
2684 * evergreen_dma_cs_parse() - parse the DMA IB 2674 * evergreen_dma_cs_parse() - parse the DMA IB
2685 * @p: parser structure holding parsing context. 2675 * @p: parser structure holding parsing context.
@@ -2693,9 +2683,9 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2693{ 2683{
2694 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 2684 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2695 struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc; 2685 struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2696 u32 header, cmd, count, tiled, new_cmd, misc; 2686 u32 header, cmd, count, sub_cmd;
2697 volatile u32 *ib = p->ib.ptr; 2687 volatile u32 *ib = p->ib.ptr;
2698 u32 idx, idx_value; 2688 u32 idx;
2699 u64 src_offset, dst_offset, dst2_offset; 2689 u64 src_offset, dst_offset, dst2_offset;
2700 int r; 2690 int r;
2701 2691
@@ -2709,9 +2699,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2709 header = radeon_get_ib_value(p, idx); 2699 header = radeon_get_ib_value(p, idx);
2710 cmd = GET_DMA_CMD(header); 2700 cmd = GET_DMA_CMD(header);
2711 count = GET_DMA_COUNT(header); 2701 count = GET_DMA_COUNT(header);
2712 tiled = GET_DMA_T(header); 2702 sub_cmd = GET_DMA_SUB_CMD(header);
2713 new_cmd = GET_DMA_NEW(header);
2714 misc = GET_DMA_MISC(header);
2715 2703
2716 switch (cmd) { 2704 switch (cmd) {
2717 case DMA_PACKET_WRITE: 2705 case DMA_PACKET_WRITE:
@@ -2720,19 +2708,27 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2720 DRM_ERROR("bad DMA_PACKET_WRITE\n"); 2708 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2721 return -EINVAL; 2709 return -EINVAL;
2722 } 2710 }
2723 if (tiled) { 2711 switch (sub_cmd) {
2712 /* tiled */
2713 case 8:
2724 dst_offset = ib[idx+1]; 2714 dst_offset = ib[idx+1];
2725 dst_offset <<= 8; 2715 dst_offset <<= 8;
2726 2716
2727 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2717 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2728 p->idx += count + 7; 2718 p->idx += count + 7;
2729 } else { 2719 break;
2720 /* linear */
2721 case 0:
2730 dst_offset = ib[idx+1]; 2722 dst_offset = ib[idx+1];
2731 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; 2723 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2732 2724
2733 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2725 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2734 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2726 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2735 p->idx += count + 3; 2727 p->idx += count + 3;
2728 break;
2729 default:
2730 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib[idx+0]);
2731 return -EINVAL;
2736 } 2732 }
2737 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2733 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2738 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", 2734 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
@@ -2751,338 +2747,330 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2751 DRM_ERROR("bad DMA_PACKET_COPY\n"); 2747 DRM_ERROR("bad DMA_PACKET_COPY\n");
2752 return -EINVAL; 2748 return -EINVAL;
2753 } 2749 }
2754 if (tiled) { 2750 switch (sub_cmd) {
2755 idx_value = radeon_get_ib_value(p, idx + 2); 2751 /* Copy L2L, DW aligned */
2756 if (new_cmd) { 2752 case 0x00:
2757 switch (misc) { 2753 /* L2L, dw */
2758 case 0: 2754 src_offset = ib[idx+2];
2759 /* L2T, frame to fields */ 2755 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
2760 if (idx_value & (1 << 31)) { 2756 dst_offset = ib[idx+1];
2761 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); 2757 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
2762 return -EINVAL; 2758 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2763 } 2759 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2764 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 2760 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2765 if (r) { 2761 return -EINVAL;
2766 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); 2762 }
2767 return -EINVAL; 2763 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2768 } 2764 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2769 dst_offset = ib[idx+1]; 2765 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2770 dst_offset <<= 8; 2766 return -EINVAL;
2771 dst2_offset = ib[idx+2]; 2767 }
2772 dst2_offset <<= 8; 2768 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2773 src_offset = ib[idx+8]; 2769 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2774 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; 2770 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2775 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2771 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2776 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", 2772 p->idx += 5;
2777 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2773 break;
2778 return -EINVAL; 2774 /* Copy L2T/T2L */
2779 } 2775 case 0x08:
2780 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2776 /* detile bit */
2781 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", 2777 if (ib[idx + 2] & (1 << 31)) {
2782 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2778 /* tiled src, linear dst */
2783 return -EINVAL; 2779 src_offset = ib[idx+1];
2784 } 2780 src_offset <<= 8;
2785 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 2781 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2786 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", 2782
2787 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 2783 dst_offset = radeon_get_ib_value(p, idx + 7);
2788 return -EINVAL; 2784 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
2789 } 2785 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2790 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2786 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2791 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2792 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2793 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2794 p->idx += 10;
2795 break;
2796 case 1:
2797 /* L2T, T2L partial */
2798 if (p->family < CHIP_CAYMAN) {
2799 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2800 return -EINVAL;
2801 }
2802 /* detile bit */
2803 if (idx_value & (1 << 31)) {
2804 /* tiled src, linear dst */
2805 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2806
2807 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2808 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2809 } else {
2810 /* linear src, tiled dst */
2811 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2812 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2813
2814 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2815 }
2816 p->idx += 12;
2817 break;
2818 case 3:
2819 /* L2T, broadcast */
2820 if (idx_value & (1 << 31)) {
2821 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2822 return -EINVAL;
2823 }
2824 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2825 if (r) {
2826 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2827 return -EINVAL;
2828 }
2829 dst_offset = ib[idx+1];
2830 dst_offset <<= 8;
2831 dst2_offset = ib[idx+2];
2832 dst2_offset <<= 8;
2833 src_offset = ib[idx+8];
2834 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2835 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2836 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2837 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2838 return -EINVAL;
2839 }
2840 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2841 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
2842 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2843 return -EINVAL;
2844 }
2845 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2846 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
2847 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2848 return -EINVAL;
2849 }
2850 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2851 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2852 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2853 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2854 p->idx += 10;
2855 break;
2856 case 4:
2857 /* L2T, T2L */
2858 /* detile bit */
2859 if (idx_value & (1 << 31)) {
2860 /* tiled src, linear dst */
2861 src_offset = ib[idx+1];
2862 src_offset <<= 8;
2863 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2864
2865 dst_offset = ib[idx+7];
2866 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
2867 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2868 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2869 } else {
2870 /* linear src, tiled dst */
2871 src_offset = ib[idx+7];
2872 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
2873 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2874 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2875
2876 dst_offset = ib[idx+1];
2877 dst_offset <<= 8;
2878 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2879 }
2880 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2881 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
2882 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2883 return -EINVAL;
2884 }
2885 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2886 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
2887 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2888 return -EINVAL;
2889 }
2890 p->idx += 9;
2891 break;
2892 case 5:
2893 /* T2T partial */
2894 if (p->family < CHIP_CAYMAN) {
2895 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2896 return -EINVAL;
2897 }
2898 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2899 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2900 p->idx += 13;
2901 break;
2902 case 7:
2903 /* L2T, broadcast */
2904 if (idx_value & (1 << 31)) {
2905 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2906 return -EINVAL;
2907 }
2908 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2909 if (r) {
2910 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2911 return -EINVAL;
2912 }
2913 dst_offset = ib[idx+1];
2914 dst_offset <<= 8;
2915 dst2_offset = ib[idx+2];
2916 dst2_offset <<= 8;
2917 src_offset = ib[idx+8];
2918 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2919 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2920 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2921 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2922 return -EINVAL;
2923 }
2924 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2925 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
2926 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2927 return -EINVAL;
2928 }
2929 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2930 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
2931 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2932 return -EINVAL;
2933 }
2934 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2935 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2936 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2937 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2938 p->idx += 10;
2939 break;
2940 default:
2941 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
2942 return -EINVAL;
2943 }
2944 } else { 2787 } else {
2945 switch (misc) { 2788 /* linear src, tiled dst */
2946 case 0: 2789 src_offset = ib[idx+7];
2947 /* detile bit */ 2790 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
2948 if (idx_value & (1 << 31)) { 2791 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2949 /* tiled src, linear dst */ 2792 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2950 src_offset = ib[idx+1]; 2793
2951 src_offset <<= 8; 2794 dst_offset = ib[idx+1];
2952 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 2795 dst_offset <<= 8;
2953 2796 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2954 dst_offset = ib[idx+7];
2955 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
2956 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2957 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2958 } else {
2959 /* linear src, tiled dst */
2960 src_offset = ib[idx+7];
2961 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
2962 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2963 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2964
2965 dst_offset = ib[idx+1];
2966 dst_offset <<= 8;
2967 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2968 }
2969 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2970 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2971 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2972 return -EINVAL;
2973 }
2974 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2975 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
2976 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2977 return -EINVAL;
2978 }
2979 p->idx += 9;
2980 break;
2981 default:
2982 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
2983 return -EINVAL;
2984 }
2985 } 2797 }
2986 } else { 2798 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2987 if (new_cmd) { 2799 dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2988 switch (misc) { 2800 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2989 case 0: 2801 return -EINVAL;
2990 /* L2L, byte */ 2802 }
2991 src_offset = ib[idx+2]; 2803 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2992 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; 2804 dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2993 dst_offset = ib[idx+1]; 2805 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2994 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; 2806 return -EINVAL;
2995 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { 2807 }
2996 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", 2808 p->idx += 9;
2997 src_offset + count, radeon_bo_size(src_reloc->robj)); 2809 break;
2998 return -EINVAL; 2810 /* Copy L2L, byte aligned */
2999 } 2811 case 0x40:
3000 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { 2812 /* L2L, byte */
3001 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", 2813 src_offset = ib[idx+2];
3002 dst_offset + count, radeon_bo_size(dst_reloc->robj)); 2814 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3003 return -EINVAL; 2815 dst_offset = ib[idx+1];
3004 } 2816 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3005 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); 2817 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3006 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); 2818 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
3007 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2819 src_offset + count, radeon_bo_size(src_reloc->robj));
3008 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2820 return -EINVAL;
3009 p->idx += 5; 2821 }
3010 break; 2822 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3011 case 1: 2823 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
3012 /* L2L, partial */ 2824 dst_offset + count, radeon_bo_size(dst_reloc->robj));
3013 if (p->family < CHIP_CAYMAN) { 2825 return -EINVAL;
3014 DRM_ERROR("L2L Partial is cayman only !\n"); 2826 }
3015 return -EINVAL; 2827 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3016 } 2828 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3017 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); 2829 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3018 ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2830 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3019 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); 2831 p->idx += 5;
3020 ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2832 break;
3021 2833 /* Copy L2L, partial */
3022 p->idx += 9; 2834 case 0x41:
3023 break; 2835 /* L2L, partial */
3024 case 4: 2836 if (p->family < CHIP_CAYMAN) {
3025 /* L2L, dw, broadcast */ 2837 DRM_ERROR("L2L Partial is cayman only !\n");
3026 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 2838 return -EINVAL;
3027 if (r) { 2839 }
3028 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); 2840 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3029 return -EINVAL; 2841 ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3030 } 2842 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3031 dst_offset = ib[idx+1]; 2843 ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3032 dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; 2844
3033 dst2_offset = ib[idx+2]; 2845 p->idx += 9;
3034 dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32; 2846 break;
3035 src_offset = ib[idx+3]; 2847 /* Copy L2L, DW aligned, broadcast */
3036 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; 2848 case 0x44:
3037 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2849 /* L2L, dw, broadcast */
3038 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", 2850 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3039 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2851 if (r) {
3040 return -EINVAL; 2852 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3041 } 2853 return -EINVAL;
3042 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2854 }
3043 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", 2855 dst_offset = ib[idx+1];
3044 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2856 dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3045 return -EINVAL; 2857 dst2_offset = ib[idx+2];
3046 } 2858 dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
3047 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 2859 src_offset = ib[idx+3];
3048 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", 2860 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
3049 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 2861 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3050 return -EINVAL; 2862 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
3051 } 2863 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3052 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2864 return -EINVAL;
3053 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); 2865 }
3054 ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2866 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3055 ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2867 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
3056 ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; 2868 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3057 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2869 return -EINVAL;
3058 p->idx += 7; 2870 }
3059 break; 2871 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3060 default: 2872 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
3061 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); 2873 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3062 return -EINVAL; 2874 return -EINVAL;
3063 } 2875 }
2876 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2877 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
2878 ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2879 ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2880 ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
2881 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2882 p->idx += 7;
2883 break;
2884 /* Copy L2T Frame to Field */
2885 case 0x48:
2886 if (ib[idx + 2] & (1 << 31)) {
2887 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2888 return -EINVAL;
2889 }
2890 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2891 if (r) {
2892 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2893 return -EINVAL;
2894 }
2895 dst_offset = ib[idx+1];
2896 dst_offset <<= 8;
2897 dst2_offset = ib[idx+2];
2898 dst2_offset <<= 8;
2899 src_offset = ib[idx+8];
2900 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2901 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2902 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2903 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2904 return -EINVAL;
2905 }
2906 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2907 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2908 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2909 return -EINVAL;
2910 }
2911 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2912 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2913 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2914 return -EINVAL;
2915 }
2916 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2917 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2918 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2919 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2920 p->idx += 10;
2921 break;
2922 /* Copy L2T/T2L, partial */
2923 case 0x49:
2924 /* L2T, T2L partial */
2925 if (p->family < CHIP_CAYMAN) {
2926 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2927 return -EINVAL;
2928 }
2929 /* detile bit */
2930 if (ib[idx + 2 ] & (1 << 31)) {
2931 /* tiled src, linear dst */
2932 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2933
2934 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2935 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2936 } else {
2937 /* linear src, tiled dst */
2938 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2939 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2940
2941 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2942 }
2943 p->idx += 12;
2944 break;
2945 /* Copy L2T broadcast */
2946 case 0x4b:
2947 /* L2T, broadcast */
2948 if (ib[idx + 2] & (1 << 31)) {
2949 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2950 return -EINVAL;
2951 }
2952 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2953 if (r) {
2954 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2955 return -EINVAL;
2956 }
2957 dst_offset = ib[idx+1];
2958 dst_offset <<= 8;
2959 dst2_offset = ib[idx+2];
2960 dst2_offset <<= 8;
2961 src_offset = ib[idx+8];
2962 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2963 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2964 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2965 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2966 return -EINVAL;
2967 }
2968 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2969 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
2970 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2971 return -EINVAL;
2972 }
2973 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2974 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
2975 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2976 return -EINVAL;
2977 }
2978 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2979 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2980 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2981 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2982 p->idx += 10;
2983 break;
2984 /* Copy L2T/T2L (tile units) */
2985 case 0x4c:
2986 /* L2T, T2L */
2987 /* detile bit */
2988 if (ib[idx + 2] & (1 << 31)) {
2989 /* tiled src, linear dst */
2990 src_offset = ib[idx+1];
2991 src_offset <<= 8;
2992 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2993
2994 dst_offset = ib[idx+7];
2995 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
2996 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2997 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3064 } else { 2998 } else {
3065 /* L2L, dw */ 2999 /* linear src, tiled dst */
3066 src_offset = ib[idx+2]; 3000 src_offset = ib[idx+7];
3067 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; 3001 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3002 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3003 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3004
3068 dst_offset = ib[idx+1]; 3005 dst_offset = ib[idx+1];
3069 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; 3006 dst_offset <<= 8;
3070 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3007 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3071 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
3072 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3073 return -EINVAL;
3074 }
3075 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3076 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
3077 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3078 return -EINVAL;
3079 }
3080 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3081 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3082 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3083 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3084 p->idx += 5;
3085 } 3008 }
3009 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3010 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3011 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3012 return -EINVAL;
3013 }
3014 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3015 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3016 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3017 return -EINVAL;
3018 }
3019 p->idx += 9;
3020 break;
3021 /* Copy T2T, partial (tile units) */
3022 case 0x4d:
3023 /* T2T partial */
3024 if (p->family < CHIP_CAYMAN) {
3025 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3026 return -EINVAL;
3027 }
3028 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3029 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3030 p->idx += 13;
3031 break;
3032 /* Copy L2T broadcast (tile units) */
3033 case 0x4f:
3034 /* L2T, broadcast */
3035 if (ib[idx + 2] & (1 << 31)) {
3036 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3037 return -EINVAL;
3038 }
3039 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3040 if (r) {
3041 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3042 return -EINVAL;
3043 }
3044 dst_offset = ib[idx+1];
3045 dst_offset <<= 8;
3046 dst2_offset = ib[idx+2];
3047 dst2_offset <<= 8;
3048 src_offset = ib[idx+8];
3049 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
3050 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3051 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3052 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3053 return -EINVAL;
3054 }
3055 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3056 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3057 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3058 return -EINVAL;
3059 }
3060 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3061 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3062 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3063 return -EINVAL;
3064 }
3065 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3066 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3067 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3068 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3069 p->idx += 10;
3070 break;
3071 default:
3072 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib[idx+0]);
3073 return -EINVAL;
3086 } 3074 }
3087 break; 3075 break;
3088 case DMA_PACKET_CONSTANT_FILL: 3076 case DMA_PACKET_CONSTANT_FILL:
@@ -3435,88 +3423,79 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3435int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 3423int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3436{ 3424{
3437 u32 idx = 0; 3425 u32 idx = 0;
3438 u32 header, cmd, count, tiled, new_cmd, misc; 3426 u32 header, cmd, count, sub_cmd;
3439 3427
3440 do { 3428 do {
3441 header = ib->ptr[idx]; 3429 header = ib->ptr[idx];
3442 cmd = GET_DMA_CMD(header); 3430 cmd = GET_DMA_CMD(header);
3443 count = GET_DMA_COUNT(header); 3431 count = GET_DMA_COUNT(header);
3444 tiled = GET_DMA_T(header); 3432 sub_cmd = GET_DMA_SUB_CMD(header);
3445 new_cmd = GET_DMA_NEW(header);
3446 misc = GET_DMA_MISC(header);
3447 3433
3448 switch (cmd) { 3434 switch (cmd) {
3449 case DMA_PACKET_WRITE: 3435 case DMA_PACKET_WRITE:
3450 if (tiled) 3436 switch (sub_cmd) {
3437 /* tiled */
3438 case 8:
3451 idx += count + 7; 3439 idx += count + 7;
3452 else 3440 break;
3441 /* linear */
3442 case 0:
3453 idx += count + 3; 3443 idx += count + 3;
3444 break;
3445 default:
3446 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3447 return -EINVAL;
3448 }
3454 break; 3449 break;
3455 case DMA_PACKET_COPY: 3450 case DMA_PACKET_COPY:
3456 if (tiled) { 3451 switch (sub_cmd) {
3457 if (new_cmd) { 3452 /* Copy L2L, DW aligned */
3458 switch (misc) { 3453 case 0x00:
3459 case 0: 3454 idx += 5;
3460 /* L2T, frame to fields */ 3455 break;
3461 idx += 10; 3456 /* Copy L2T/T2L */
3462 break; 3457 case 0x08:
3463 case 1: 3458 idx += 9;
3464 /* L2T, T2L partial */ 3459 break;
3465 idx += 12; 3460 /* Copy L2L, byte aligned */
3466 break; 3461 case 0x40:
3467 case 3: 3462 idx += 5;
3468 /* L2T, broadcast */ 3463 break;
3469 idx += 10; 3464 /* Copy L2L, partial */
3470 break; 3465 case 0x41:
3471 case 4: 3466 idx += 9;
3472 /* L2T, T2L */ 3467 break;
3473 idx += 9; 3468 /* Copy L2L, DW aligned, broadcast */
3474 break; 3469 case 0x44:
3475 case 5: 3470 idx += 7;
3476 /* T2T partial */ 3471 break;
3477 idx += 13; 3472 /* Copy L2T Frame to Field */
3478 break; 3473 case 0x48:
3479 case 7: 3474 idx += 10;
3480 /* L2T, broadcast */ 3475 break;
3481 idx += 10; 3476 /* Copy L2T/T2L, partial */
3482 break; 3477 case 0x49:
3483 default: 3478 idx += 12;
3484 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); 3479 break;
3485 return -EINVAL; 3480 /* Copy L2T broadcast */
3486 } 3481 case 0x4b:
3487 } else { 3482 idx += 10;
3488 switch (misc) { 3483 break;
3489 case 0: 3484 /* Copy L2T/T2L (tile units) */
3490 idx += 9; 3485 case 0x4c:
3491 break; 3486 idx += 9;
3492 default: 3487 break;
3493 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); 3488 /* Copy T2T, partial (tile units) */
3494 return -EINVAL; 3489 case 0x4d:
3495 } 3490 idx += 13;
3496 } 3491 break;
3497 } else { 3492 /* Copy L2T broadcast (tile units) */
3498 if (new_cmd) { 3493 case 0x4f:
3499 switch (misc) { 3494 idx += 10;
3500 case 0: 3495 break;
3501 /* L2L, byte */ 3496 default:
3502 idx += 5; 3497 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3503 break; 3498 return -EINVAL;
3504 case 1:
3505 /* L2L, partial */
3506 idx += 9;
3507 break;
3508 case 4:
3509 /* L2L, dw, broadcast */
3510 idx += 7;
3511 break;
3512 default:
3513 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3514 return -EINVAL;
3515 }
3516 } else {
3517 /* L2L, dw */
3518 idx += 5;
3519 }
3520 } 3499 }
3521 break; 3500 break;
3522 case DMA_PACKET_CONSTANT_FILL: 3501 case DMA_PACKET_CONSTANT_FILL:
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 70388fe4ee59..5ad29f3fc722 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -924,20 +924,23 @@
924#define CAYMAN_DMA1_CNTL 0xd82c 924#define CAYMAN_DMA1_CNTL 0xd82c
925 925
926/* async DMA packets */ 926/* async DMA packets */
927#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ 927#define DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) | \
928 (((t) & 0x1) << 23) | \ 928 (((sub_cmd) & 0xFF) << 20) |\
929 (((s) & 0x1) << 22) | \ 929 (((n) & 0xFFFFF) << 0))
930 (((n) & 0xFFFFF) << 0)) 930#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
931#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
932#define GET_DMA_SUB_CMD(h) (((h) & 0x0ff00000) >> 20)
933
931/* async DMA Packet types */ 934/* async DMA Packet types */
932#define DMA_PACKET_WRITE 0x2 935#define DMA_PACKET_WRITE 0x2
933#define DMA_PACKET_COPY 0x3 936#define DMA_PACKET_COPY 0x3
934#define DMA_PACKET_INDIRECT_BUFFER 0x4 937#define DMA_PACKET_INDIRECT_BUFFER 0x4
935#define DMA_PACKET_SEMAPHORE 0x5 938#define DMA_PACKET_SEMAPHORE 0x5
936#define DMA_PACKET_FENCE 0x6 939#define DMA_PACKET_FENCE 0x6
937#define DMA_PACKET_TRAP 0x7 940#define DMA_PACKET_TRAP 0x7
938#define DMA_PACKET_SRBM_WRITE 0x9 941#define DMA_PACKET_SRBM_WRITE 0x9
939#define DMA_PACKET_CONSTANT_FILL 0xd 942#define DMA_PACKET_CONSTANT_FILL 0xd
940#define DMA_PACKET_NOP 0xf 943#define DMA_PACKET_NOP 0xf
941 944
942/* PCIE link stuff */ 945/* PCIE link stuff */
943#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ 946#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */