aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/evergreen_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_cs.c')
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c603
1 files changed, 600 insertions, 3 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 62c227104781..74c6b42d2597 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -34,6 +34,8 @@
34#define MAX(a,b) (((a)>(b))?(a):(b)) 34#define MAX(a,b) (((a)>(b))?(a):(b))
35#define MIN(a,b) (((a)<(b))?(a):(b)) 35#define MIN(a,b) (((a)<(b))?(a):(b))
36 36
37int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
38 struct radeon_cs_reloc **cs_reloc);
37static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, 39static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
38 struct radeon_cs_reloc **cs_reloc); 40 struct radeon_cs_reloc **cs_reloc);
39 41
@@ -507,20 +509,28 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
507 /* height is npipes htiles aligned == npipes * 8 pixel aligned */ 509 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
508 nby = round_up(nby, track->npipes * 8); 510 nby = round_up(nby, track->npipes * 8);
509 } else { 511 } else {
512 /* always assume 8x8 htile */
513 /* align is htile align * 8, htile align vary according to
514 * number of pipe and tile width and nby
515 */
510 switch (track->npipes) { 516 switch (track->npipes) {
511 case 8: 517 case 8:
518 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
512 nbx = round_up(nbx, 64 * 8); 519 nbx = round_up(nbx, 64 * 8);
513 nby = round_up(nby, 64 * 8); 520 nby = round_up(nby, 64 * 8);
514 break; 521 break;
515 case 4: 522 case 4:
523 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
516 nbx = round_up(nbx, 64 * 8); 524 nbx = round_up(nbx, 64 * 8);
517 nby = round_up(nby, 32 * 8); 525 nby = round_up(nby, 32 * 8);
518 break; 526 break;
519 case 2: 527 case 2:
528 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
520 nbx = round_up(nbx, 32 * 8); 529 nbx = round_up(nbx, 32 * 8);
521 nby = round_up(nby, 32 * 8); 530 nby = round_up(nby, 32 * 8);
522 break; 531 break;
523 case 1: 532 case 1:
533 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
524 nbx = round_up(nbx, 32 * 8); 534 nbx = round_up(nbx, 32 * 8);
525 nby = round_up(nby, 16 * 8); 535 nby = round_up(nby, 16 * 8);
526 break; 536 break;
@@ -531,9 +541,10 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
531 } 541 }
532 } 542 }
533 /* compute number of htile */ 543 /* compute number of htile */
534 nbx = nbx / 8; 544 nbx = nbx >> 3;
535 nby = nby / 8; 545 nby = nby >> 3;
536 size = nbx * nby * 4; 546 /* size must be aligned on npipes * 2K boundary */
547 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
537 size += track->htile_offset; 548 size += track->htile_offset;
538 549
539 if (size > radeon_bo_size(track->htile_bo)) { 550 if (size > radeon_bo_size(track->htile_bo)) {
@@ -1790,6 +1801,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1790 case DB_HTILE_SURFACE: 1801 case DB_HTILE_SURFACE:
1791 /* 8x8 only */ 1802 /* 8x8 only */
1792 track->htile_surface = radeon_get_ib_value(p, idx); 1803 track->htile_surface = radeon_get_ib_value(p, idx);
1804 /* force 8x8 htile width and height */
1805 ib[idx] |= 3;
1793 track->db_dirty = true; 1806 track->db_dirty = true;
1794 break; 1807 break;
1795 case CB_IMMED0_BASE: 1808 case CB_IMMED0_BASE:
@@ -2243,6 +2256,18 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
2243 command = radeon_get_ib_value(p, idx+4); 2256 command = radeon_get_ib_value(p, idx+4);
2244 size = command & 0x1fffff; 2257 size = command & 0x1fffff;
2245 info = radeon_get_ib_value(p, idx+1); 2258 info = radeon_get_ib_value(p, idx+1);
2259 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2260 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2261 ((((info & 0x00300000) >> 20) == 0) &&
2262 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2263 ((((info & 0x60000000) >> 29) == 0) &&
2264 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2265 /* non mem to mem copies requires dw aligned count */
2266 if (size % 4) {
2267 DRM_ERROR("CP DMA command requires dw count alignment\n");
2268 return -EINVAL;
2269 }
2270 }
2246 if (command & PACKET3_CP_DMA_CMD_SAS) { 2271 if (command & PACKET3_CP_DMA_CMD_SAS) {
2247 /* src address space is register */ 2272 /* src address space is register */
2248 /* GDS is ok */ 2273 /* GDS is ok */
@@ -2804,6 +2829,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
2804 return 0; 2829 return 0;
2805} 2830}
2806 2831
2832/*
2833 * DMA
2834 */
2835
2836#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2837#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2838#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2839#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2840#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2841
2842/**
2843 * evergreen_dma_cs_parse() - parse the DMA IB
2844 * @p: parser structure holding parsing context.
2845 *
2846 * Parses the DMA IB from the CS ioctl and updates
2847 * the GPU addresses based on the reloc information and
2848 * checks for errors. (Evergreen-Cayman)
2849 * Returns 0 for success and an error on failure.
2850 **/
2851int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2852{
2853 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2854 struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2855 u32 header, cmd, count, tiled, new_cmd, misc;
2856 volatile u32 *ib = p->ib.ptr;
2857 u32 idx, idx_value;
2858 u64 src_offset, dst_offset, dst2_offset;
2859 int r;
2860
2861 do {
2862 if (p->idx >= ib_chunk->length_dw) {
2863 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2864 p->idx, ib_chunk->length_dw);
2865 return -EINVAL;
2866 }
2867 idx = p->idx;
2868 header = radeon_get_ib_value(p, idx);
2869 cmd = GET_DMA_CMD(header);
2870 count = GET_DMA_COUNT(header);
2871 tiled = GET_DMA_T(header);
2872 new_cmd = GET_DMA_NEW(header);
2873 misc = GET_DMA_MISC(header);
2874
2875 switch (cmd) {
2876 case DMA_PACKET_WRITE:
2877 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2878 if (r) {
2879 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2880 return -EINVAL;
2881 }
2882 if (tiled) {
2883 dst_offset = ib[idx+1];
2884 dst_offset <<= 8;
2885
2886 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2887 p->idx += count + 7;
2888 } else {
2889 dst_offset = ib[idx+1];
2890 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2891
2892 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2893 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2894 p->idx += count + 3;
2895 }
2896 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2897 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2898 dst_offset, radeon_bo_size(dst_reloc->robj));
2899 return -EINVAL;
2900 }
2901 break;
2902 case DMA_PACKET_COPY:
2903 r = r600_dma_cs_next_reloc(p, &src_reloc);
2904 if (r) {
2905 DRM_ERROR("bad DMA_PACKET_COPY\n");
2906 return -EINVAL;
2907 }
2908 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2909 if (r) {
2910 DRM_ERROR("bad DMA_PACKET_COPY\n");
2911 return -EINVAL;
2912 }
2913 if (tiled) {
2914 idx_value = radeon_get_ib_value(p, idx + 2);
2915 if (new_cmd) {
2916 switch (misc) {
2917 case 0:
2918 /* L2T, frame to fields */
2919 if (idx_value & (1 << 31)) {
2920 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2921 return -EINVAL;
2922 }
2923 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2924 if (r) {
2925 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2926 return -EINVAL;
2927 }
2928 dst_offset = ib[idx+1];
2929 dst_offset <<= 8;
2930 dst2_offset = ib[idx+2];
2931 dst2_offset <<= 8;
2932 src_offset = ib[idx+8];
2933 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2934 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2935 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2936 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2937 return -EINVAL;
2938 }
2939 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2940 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2941 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2942 return -EINVAL;
2943 }
2944 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2945 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2946 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2947 return -EINVAL;
2948 }
2949 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2950 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2951 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2952 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2953 p->idx += 10;
2954 break;
2955 case 1:
2956 /* L2T, T2L partial */
2957 if (p->family < CHIP_CAYMAN) {
2958 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2959 return -EINVAL;
2960 }
2961 /* detile bit */
2962 if (idx_value & (1 << 31)) {
2963 /* tiled src, linear dst */
2964 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2965
2966 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2967 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2968 } else {
2969 /* linear src, tiled dst */
2970 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2971 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2972
2973 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2974 }
2975 p->idx += 12;
2976 break;
2977 case 3:
2978 /* L2T, broadcast */
2979 if (idx_value & (1 << 31)) {
2980 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2981 return -EINVAL;
2982 }
2983 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2984 if (r) {
2985 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2986 return -EINVAL;
2987 }
2988 dst_offset = ib[idx+1];
2989 dst_offset <<= 8;
2990 dst2_offset = ib[idx+2];
2991 dst2_offset <<= 8;
2992 src_offset = ib[idx+8];
2993 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2994 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2995 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2996 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2997 return -EINVAL;
2998 }
2999 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3000 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3001 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3002 return -EINVAL;
3003 }
3004 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3005 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3006 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3007 return -EINVAL;
3008 }
3009 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3010 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3011 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3012 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3013 p->idx += 10;
3014 break;
3015 case 4:
3016 /* L2T, T2L */
3017 /* detile bit */
3018 if (idx_value & (1 << 31)) {
3019 /* tiled src, linear dst */
3020 src_offset = ib[idx+1];
3021 src_offset <<= 8;
3022 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3023
3024 dst_offset = ib[idx+7];
3025 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3026 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3027 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3028 } else {
3029 /* linear src, tiled dst */
3030 src_offset = ib[idx+7];
3031 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3032 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3033 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3034
3035 dst_offset = ib[idx+1];
3036 dst_offset <<= 8;
3037 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3038 }
3039 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3040 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3041 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3042 return -EINVAL;
3043 }
3044 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3045 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3046 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3047 return -EINVAL;
3048 }
3049 p->idx += 9;
3050 break;
3051 case 5:
3052 /* T2T partial */
3053 if (p->family < CHIP_CAYMAN) {
3054 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3055 return -EINVAL;
3056 }
3057 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3058 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3059 p->idx += 13;
3060 break;
3061 case 7:
3062 /* L2T, broadcast */
3063 if (idx_value & (1 << 31)) {
3064 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3065 return -EINVAL;
3066 }
3067 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3068 if (r) {
3069 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3070 return -EINVAL;
3071 }
3072 dst_offset = ib[idx+1];
3073 dst_offset <<= 8;
3074 dst2_offset = ib[idx+2];
3075 dst2_offset <<= 8;
3076 src_offset = ib[idx+8];
3077 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
3078 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3079 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3080 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3081 return -EINVAL;
3082 }
3083 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3084 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3085 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3086 return -EINVAL;
3087 }
3088 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3089 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3090 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3091 return -EINVAL;
3092 }
3093 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3094 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3095 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3096 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3097 p->idx += 10;
3098 break;
3099 default:
3100 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3101 return -EINVAL;
3102 }
3103 } else {
3104 switch (misc) {
3105 case 0:
3106 /* detile bit */
3107 if (idx_value & (1 << 31)) {
3108 /* tiled src, linear dst */
3109 src_offset = ib[idx+1];
3110 src_offset <<= 8;
3111 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3112
3113 dst_offset = ib[idx+7];
3114 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3115 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3116 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3117 } else {
3118 /* linear src, tiled dst */
3119 src_offset = ib[idx+7];
3120 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3121 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3122 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3123
3124 dst_offset = ib[idx+1];
3125 dst_offset <<= 8;
3126 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3127 }
3128 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3129 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3130 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3131 return -EINVAL;
3132 }
3133 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3134 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3135 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3136 return -EINVAL;
3137 }
3138 p->idx += 9;
3139 break;
3140 default:
3141 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3142 return -EINVAL;
3143 }
3144 }
3145 } else {
3146 if (new_cmd) {
3147 switch (misc) {
3148 case 0:
3149 /* L2L, byte */
3150 src_offset = ib[idx+2];
3151 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3152 dst_offset = ib[idx+1];
3153 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3154 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3155 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
3156 src_offset + count, radeon_bo_size(src_reloc->robj));
3157 return -EINVAL;
3158 }
3159 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3160 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
3161 dst_offset + count, radeon_bo_size(dst_reloc->robj));
3162 return -EINVAL;
3163 }
3164 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3165 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3166 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3167 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3168 p->idx += 5;
3169 break;
3170 case 1:
3171 /* L2L, partial */
3172 if (p->family < CHIP_CAYMAN) {
3173 DRM_ERROR("L2L Partial is cayman only !\n");
3174 return -EINVAL;
3175 }
3176 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3177 ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3178 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3179 ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3180
3181 p->idx += 9;
3182 break;
3183 case 4:
3184 /* L2L, dw, broadcast */
3185 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3186 if (r) {
3187 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3188 return -EINVAL;
3189 }
3190 dst_offset = ib[idx+1];
3191 dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3192 dst2_offset = ib[idx+2];
3193 dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
3194 src_offset = ib[idx+3];
3195 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
3196 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3197 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
3198 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3199 return -EINVAL;
3200 }
3201 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3202 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
3203 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3204 return -EINVAL;
3205 }
3206 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3207 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
3208 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3209 return -EINVAL;
3210 }
3211 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3212 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
3213 ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3214 ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3215 ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
3216 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3217 p->idx += 7;
3218 break;
3219 default:
3220 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3221 return -EINVAL;
3222 }
3223 } else {
3224 /* L2L, dw */
3225 src_offset = ib[idx+2];
3226 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3227 dst_offset = ib[idx+1];
3228 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3229 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3230 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
3231 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3232 return -EINVAL;
3233 }
3234 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3235 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
3236 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3237 return -EINVAL;
3238 }
3239 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3240 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3241 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3242 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3243 p->idx += 5;
3244 }
3245 }
3246 break;
3247 case DMA_PACKET_CONSTANT_FILL:
3248 r = r600_dma_cs_next_reloc(p, &dst_reloc);
3249 if (r) {
3250 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3251 return -EINVAL;
3252 }
3253 dst_offset = ib[idx+1];
3254 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
3255 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3256 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3257 dst_offset, radeon_bo_size(dst_reloc->robj));
3258 return -EINVAL;
3259 }
3260 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3261 ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
3262 p->idx += 4;
3263 break;
3264 case DMA_PACKET_NOP:
3265 p->idx += 1;
3266 break;
3267 default:
3268 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3269 return -EINVAL;
3270 }
3271 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3272#if 0
3273 for (r = 0; r < p->ib->length_dw; r++) {
3274 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
3275 mdelay(1);
3276 }
3277#endif
3278 return 0;
3279}
3280
2807/* vm parser */ 3281/* vm parser */
2808static bool evergreen_vm_reg_valid(u32 reg) 3282static bool evergreen_vm_reg_valid(u32 reg)
2809{ 3283{
@@ -3010,6 +3484,18 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3010 case PACKET3_CP_DMA: 3484 case PACKET3_CP_DMA:
3011 command = ib[idx + 4]; 3485 command = ib[idx + 4];
3012 info = ib[idx + 1]; 3486 info = ib[idx + 1];
3487 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3488 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3489 ((((info & 0x00300000) >> 20) == 0) &&
3490 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3491 ((((info & 0x60000000) >> 29) == 0) &&
3492 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3493 /* non mem to mem copies requires dw aligned count */
3494 if ((command & 0x1fffff) % 4) {
3495 DRM_ERROR("CP DMA command requires dw count alignment\n");
3496 return -EINVAL;
3497 }
3498 }
3013 if (command & PACKET3_CP_DMA_CMD_SAS) { 3499 if (command & PACKET3_CP_DMA_CMD_SAS) {
3014 /* src address space is register */ 3500 /* src address space is register */
3015 if (((info & 0x60000000) >> 29) == 0) { 3501 if (((info & 0x60000000) >> 29) == 0) {
@@ -3094,3 +3580,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3094 3580
3095 return ret; 3581 return ret;
3096} 3582}
3583
3584/**
3585 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3586 * @rdev: radeon_device pointer
3587 * @ib: radeon_ib pointer
3588 *
3589 * Parses the DMA IB from the VM CS ioctl
3590 * checks for errors. (Cayman-SI)
3591 * Returns 0 for success and an error on failure.
3592 **/
3593int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3594{
3595 u32 idx = 0;
3596 u32 header, cmd, count, tiled, new_cmd, misc;
3597
3598 do {
3599 header = ib->ptr[idx];
3600 cmd = GET_DMA_CMD(header);
3601 count = GET_DMA_COUNT(header);
3602 tiled = GET_DMA_T(header);
3603 new_cmd = GET_DMA_NEW(header);
3604 misc = GET_DMA_MISC(header);
3605
3606 switch (cmd) {
3607 case DMA_PACKET_WRITE:
3608 if (tiled)
3609 idx += count + 7;
3610 else
3611 idx += count + 3;
3612 break;
3613 case DMA_PACKET_COPY:
3614 if (tiled) {
3615 if (new_cmd) {
3616 switch (misc) {
3617 case 0:
3618 /* L2T, frame to fields */
3619 idx += 10;
3620 break;
3621 case 1:
3622 /* L2T, T2L partial */
3623 idx += 12;
3624 break;
3625 case 3:
3626 /* L2T, broadcast */
3627 idx += 10;
3628 break;
3629 case 4:
3630 /* L2T, T2L */
3631 idx += 9;
3632 break;
3633 case 5:
3634 /* T2T partial */
3635 idx += 13;
3636 break;
3637 case 7:
3638 /* L2T, broadcast */
3639 idx += 10;
3640 break;
3641 default:
3642 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3643 return -EINVAL;
3644 }
3645 } else {
3646 switch (misc) {
3647 case 0:
3648 idx += 9;
3649 break;
3650 default:
3651 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3652 return -EINVAL;
3653 }
3654 }
3655 } else {
3656 if (new_cmd) {
3657 switch (misc) {
3658 case 0:
3659 /* L2L, byte */
3660 idx += 5;
3661 break;
3662 case 1:
3663 /* L2L, partial */
3664 idx += 9;
3665 break;
3666 case 4:
3667 /* L2L, dw, broadcast */
3668 idx += 7;
3669 break;
3670 default:
3671 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3672 return -EINVAL;
3673 }
3674 } else {
3675 /* L2L, dw */
3676 idx += 5;
3677 }
3678 }
3679 break;
3680 case DMA_PACKET_CONSTANT_FILL:
3681 idx += 4;
3682 break;
3683 case DMA_PACKET_NOP:
3684 idx += 1;
3685 break;
3686 default:
3687 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3688 return -EINVAL;
3689 }
3690 } while (idx < ib->length_dw);
3691
3692 return 0;
3693}