diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_cs.c')
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_cs.c | 603 |
1 files changed, 600 insertions, 3 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 62c227104781..74c6b42d2597 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #define MAX(a,b) (((a)>(b))?(a):(b)) | 34 | #define MAX(a,b) (((a)>(b))?(a):(b)) |
35 | #define MIN(a,b) (((a)<(b))?(a):(b)) | 35 | #define MIN(a,b) (((a)<(b))?(a):(b)) |
36 | 36 | ||
37 | int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, | ||
38 | struct radeon_cs_reloc **cs_reloc); | ||
37 | static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, | 39 | static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, |
38 | struct radeon_cs_reloc **cs_reloc); | 40 | struct radeon_cs_reloc **cs_reloc); |
39 | 41 | ||
@@ -507,20 +509,28 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p, | |||
507 | /* height is npipes htiles aligned == npipes * 8 pixel aligned */ | 509 | /* height is npipes htiles aligned == npipes * 8 pixel aligned */ |
508 | nby = round_up(nby, track->npipes * 8); | 510 | nby = round_up(nby, track->npipes * 8); |
509 | } else { | 511 | } else { |
512 | /* always assume 8x8 htile */ | ||
513 | /* align is htile align * 8, htile align vary according to | ||
514 | * number of pipe and tile width and nby | ||
515 | */ | ||
510 | switch (track->npipes) { | 516 | switch (track->npipes) { |
511 | case 8: | 517 | case 8: |
518 | /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ | ||
512 | nbx = round_up(nbx, 64 * 8); | 519 | nbx = round_up(nbx, 64 * 8); |
513 | nby = round_up(nby, 64 * 8); | 520 | nby = round_up(nby, 64 * 8); |
514 | break; | 521 | break; |
515 | case 4: | 522 | case 4: |
523 | /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ | ||
516 | nbx = round_up(nbx, 64 * 8); | 524 | nbx = round_up(nbx, 64 * 8); |
517 | nby = round_up(nby, 32 * 8); | 525 | nby = round_up(nby, 32 * 8); |
518 | break; | 526 | break; |
519 | case 2: | 527 | case 2: |
528 | /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ | ||
520 | nbx = round_up(nbx, 32 * 8); | 529 | nbx = round_up(nbx, 32 * 8); |
521 | nby = round_up(nby, 32 * 8); | 530 | nby = round_up(nby, 32 * 8); |
522 | break; | 531 | break; |
523 | case 1: | 532 | case 1: |
533 | /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ | ||
524 | nbx = round_up(nbx, 32 * 8); | 534 | nbx = round_up(nbx, 32 * 8); |
525 | nby = round_up(nby, 16 * 8); | 535 | nby = round_up(nby, 16 * 8); |
526 | break; | 536 | break; |
@@ -531,9 +541,10 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p, | |||
531 | } | 541 | } |
532 | } | 542 | } |
533 | /* compute number of htile */ | 543 | /* compute number of htile */ |
534 | nbx = nbx / 8; | 544 | nbx = nbx >> 3; |
535 | nby = nby / 8; | 545 | nby = nby >> 3; |
536 | size = nbx * nby * 4; | 546 | /* size must be aligned on npipes * 2K boundary */ |
547 | size = roundup(nbx * nby * 4, track->npipes * (2 << 10)); | ||
537 | size += track->htile_offset; | 548 | size += track->htile_offset; |
538 | 549 | ||
539 | if (size > radeon_bo_size(track->htile_bo)) { | 550 | if (size > radeon_bo_size(track->htile_bo)) { |
@@ -1790,6 +1801,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) | |||
1790 | case DB_HTILE_SURFACE: | 1801 | case DB_HTILE_SURFACE: |
1791 | /* 8x8 only */ | 1802 | /* 8x8 only */ |
1792 | track->htile_surface = radeon_get_ib_value(p, idx); | 1803 | track->htile_surface = radeon_get_ib_value(p, idx); |
1804 | /* force 8x8 htile width and height */ | ||
1805 | ib[idx] |= 3; | ||
1793 | track->db_dirty = true; | 1806 | track->db_dirty = true; |
1794 | break; | 1807 | break; |
1795 | case CB_IMMED0_BASE: | 1808 | case CB_IMMED0_BASE: |
@@ -2243,6 +2256,18 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, | |||
2243 | command = radeon_get_ib_value(p, idx+4); | 2256 | command = radeon_get_ib_value(p, idx+4); |
2244 | size = command & 0x1fffff; | 2257 | size = command & 0x1fffff; |
2245 | info = radeon_get_ib_value(p, idx+1); | 2258 | info = radeon_get_ib_value(p, idx+1); |
2259 | if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */ | ||
2260 | (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */ | ||
2261 | ((((info & 0x00300000) >> 20) == 0) && | ||
2262 | (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */ | ||
2263 | ((((info & 0x60000000) >> 29) == 0) && | ||
2264 | (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ | ||
2265 | /* non mem to mem copies requires dw aligned count */ | ||
2266 | if (size % 4) { | ||
2267 | DRM_ERROR("CP DMA command requires dw count alignment\n"); | ||
2268 | return -EINVAL; | ||
2269 | } | ||
2270 | } | ||
2246 | if (command & PACKET3_CP_DMA_CMD_SAS) { | 2271 | if (command & PACKET3_CP_DMA_CMD_SAS) { |
2247 | /* src address space is register */ | 2272 | /* src address space is register */ |
2248 | /* GDS is ok */ | 2273 | /* GDS is ok */ |
@@ -2804,6 +2829,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) | |||
2804 | return 0; | 2829 | return 0; |
2805 | } | 2830 | } |
2806 | 2831 | ||
2832 | /* | ||
2833 | * DMA | ||
2834 | */ | ||
2835 | |||
2836 | #define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28) | ||
2837 | #define GET_DMA_COUNT(h) ((h) & 0x000fffff) | ||
2838 | #define GET_DMA_T(h) (((h) & 0x00800000) >> 23) | ||
2839 | #define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26) | ||
2840 | #define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20) | ||
2841 | |||
2842 | /** | ||
2843 | * evergreen_dma_cs_parse() - parse the DMA IB | ||
2844 | * @p: parser structure holding parsing context. | ||
2845 | * | ||
2846 | * Parses the DMA IB from the CS ioctl and updates | ||
2847 | * the GPU addresses based on the reloc information and | ||
2848 | * checks for errors. (Evergreen-Cayman) | ||
2849 | * Returns 0 for success and an error on failure. | ||
2850 | **/ | ||
2851 | int evergreen_dma_cs_parse(struct radeon_cs_parser *p) | ||
2852 | { | ||
2853 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; | ||
2854 | struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc; | ||
2855 | u32 header, cmd, count, tiled, new_cmd, misc; | ||
2856 | volatile u32 *ib = p->ib.ptr; | ||
2857 | u32 idx, idx_value; | ||
2858 | u64 src_offset, dst_offset, dst2_offset; | ||
2859 | int r; | ||
2860 | |||
2861 | do { | ||
2862 | if (p->idx >= ib_chunk->length_dw) { | ||
2863 | DRM_ERROR("Can not parse packet at %d after CS end %d !\n", | ||
2864 | p->idx, ib_chunk->length_dw); | ||
2865 | return -EINVAL; | ||
2866 | } | ||
2867 | idx = p->idx; | ||
2868 | header = radeon_get_ib_value(p, idx); | ||
2869 | cmd = GET_DMA_CMD(header); | ||
2870 | count = GET_DMA_COUNT(header); | ||
2871 | tiled = GET_DMA_T(header); | ||
2872 | new_cmd = GET_DMA_NEW(header); | ||
2873 | misc = GET_DMA_MISC(header); | ||
2874 | |||
2875 | switch (cmd) { | ||
2876 | case DMA_PACKET_WRITE: | ||
2877 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
2878 | if (r) { | ||
2879 | DRM_ERROR("bad DMA_PACKET_WRITE\n"); | ||
2880 | return -EINVAL; | ||
2881 | } | ||
2882 | if (tiled) { | ||
2883 | dst_offset = ib[idx+1]; | ||
2884 | dst_offset <<= 8; | ||
2885 | |||
2886 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2887 | p->idx += count + 7; | ||
2888 | } else { | ||
2889 | dst_offset = ib[idx+1]; | ||
2890 | dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; | ||
2891 | |||
2892 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2893 | ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2894 | p->idx += count + 3; | ||
2895 | } | ||
2896 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2897 | dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", | ||
2898 | dst_offset, radeon_bo_size(dst_reloc->robj)); | ||
2899 | return -EINVAL; | ||
2900 | } | ||
2901 | break; | ||
2902 | case DMA_PACKET_COPY: | ||
2903 | r = r600_dma_cs_next_reloc(p, &src_reloc); | ||
2904 | if (r) { | ||
2905 | DRM_ERROR("bad DMA_PACKET_COPY\n"); | ||
2906 | return -EINVAL; | ||
2907 | } | ||
2908 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
2909 | if (r) { | ||
2910 | DRM_ERROR("bad DMA_PACKET_COPY\n"); | ||
2911 | return -EINVAL; | ||
2912 | } | ||
2913 | if (tiled) { | ||
2914 | idx_value = radeon_get_ib_value(p, idx + 2); | ||
2915 | if (new_cmd) { | ||
2916 | switch (misc) { | ||
2917 | case 0: | ||
2918 | /* L2T, frame to fields */ | ||
2919 | if (idx_value & (1 << 31)) { | ||
2920 | DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); | ||
2921 | return -EINVAL; | ||
2922 | } | ||
2923 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
2924 | if (r) { | ||
2925 | DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); | ||
2926 | return -EINVAL; | ||
2927 | } | ||
2928 | dst_offset = ib[idx+1]; | ||
2929 | dst_offset <<= 8; | ||
2930 | dst2_offset = ib[idx+2]; | ||
2931 | dst2_offset <<= 8; | ||
2932 | src_offset = ib[idx+8]; | ||
2933 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | ||
2934 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2935 | dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", | ||
2936 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2937 | return -EINVAL; | ||
2938 | } | ||
2939 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2940 | dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", | ||
2941 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2942 | return -EINVAL; | ||
2943 | } | ||
2944 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
2945 | dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", | ||
2946 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
2947 | return -EINVAL; | ||
2948 | } | ||
2949 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2950 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
2951 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2952 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2953 | p->idx += 10; | ||
2954 | break; | ||
2955 | case 1: | ||
2956 | /* L2T, T2L partial */ | ||
2957 | if (p->family < CHIP_CAYMAN) { | ||
2958 | DRM_ERROR("L2T, T2L Partial is cayman only !\n"); | ||
2959 | return -EINVAL; | ||
2960 | } | ||
2961 | /* detile bit */ | ||
2962 | if (idx_value & (1 << 31)) { | ||
2963 | /* tiled src, linear dst */ | ||
2964 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
2965 | |||
2966 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2967 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2968 | } else { | ||
2969 | /* linear src, tiled dst */ | ||
2970 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2971 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2972 | |||
2973 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2974 | } | ||
2975 | p->idx += 12; | ||
2976 | break; | ||
2977 | case 3: | ||
2978 | /* L2T, broadcast */ | ||
2979 | if (idx_value & (1 << 31)) { | ||
2980 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
2981 | return -EINVAL; | ||
2982 | } | ||
2983 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
2984 | if (r) { | ||
2985 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
2986 | return -EINVAL; | ||
2987 | } | ||
2988 | dst_offset = ib[idx+1]; | ||
2989 | dst_offset <<= 8; | ||
2990 | dst2_offset = ib[idx+2]; | ||
2991 | dst2_offset <<= 8; | ||
2992 | src_offset = ib[idx+8]; | ||
2993 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | ||
2994 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2995 | dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", | ||
2996 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2997 | return -EINVAL; | ||
2998 | } | ||
2999 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3000 | dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", | ||
3001 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3002 | return -EINVAL; | ||
3003 | } | ||
3004 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
3005 | dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", | ||
3006 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
3007 | return -EINVAL; | ||
3008 | } | ||
3009 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
3010 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
3011 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3012 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3013 | p->idx += 10; | ||
3014 | break; | ||
3015 | case 4: | ||
3016 | /* L2T, T2L */ | ||
3017 | /* detile bit */ | ||
3018 | if (idx_value & (1 << 31)) { | ||
3019 | /* tiled src, linear dst */ | ||
3020 | src_offset = ib[idx+1]; | ||
3021 | src_offset <<= 8; | ||
3022 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
3023 | |||
3024 | dst_offset = ib[idx+7]; | ||
3025 | dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
3026 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3027 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
3028 | } else { | ||
3029 | /* linear src, tiled dst */ | ||
3030 | src_offset = ib[idx+7]; | ||
3031 | src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
3032 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3033 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3034 | |||
3035 | dst_offset = ib[idx+1]; | ||
3036 | dst_offset <<= 8; | ||
3037 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
3038 | } | ||
3039 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
3040 | dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", | ||
3041 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
3042 | return -EINVAL; | ||
3043 | } | ||
3044 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3045 | dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", | ||
3046 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3047 | return -EINVAL; | ||
3048 | } | ||
3049 | p->idx += 9; | ||
3050 | break; | ||
3051 | case 5: | ||
3052 | /* T2T partial */ | ||
3053 | if (p->family < CHIP_CAYMAN) { | ||
3054 | DRM_ERROR("L2T, T2L Partial is cayman only !\n"); | ||
3055 | return -EINVAL; | ||
3056 | } | ||
3057 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
3058 | ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
3059 | p->idx += 13; | ||
3060 | break; | ||
3061 | case 7: | ||
3062 | /* L2T, broadcast */ | ||
3063 | if (idx_value & (1 << 31)) { | ||
3064 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
3065 | return -EINVAL; | ||
3066 | } | ||
3067 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
3068 | if (r) { | ||
3069 | DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); | ||
3070 | return -EINVAL; | ||
3071 | } | ||
3072 | dst_offset = ib[idx+1]; | ||
3073 | dst_offset <<= 8; | ||
3074 | dst2_offset = ib[idx+2]; | ||
3075 | dst2_offset <<= 8; | ||
3076 | src_offset = ib[idx+8]; | ||
3077 | src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; | ||
3078 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
3079 | dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", | ||
3080 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
3081 | return -EINVAL; | ||
3082 | } | ||
3083 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3084 | dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", | ||
3085 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3086 | return -EINVAL; | ||
3087 | } | ||
3088 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
3089 | dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", | ||
3090 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
3091 | return -EINVAL; | ||
3092 | } | ||
3093 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
3094 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); | ||
3095 | ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3096 | ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3097 | p->idx += 10; | ||
3098 | break; | ||
3099 | default: | ||
3100 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
3101 | return -EINVAL; | ||
3102 | } | ||
3103 | } else { | ||
3104 | switch (misc) { | ||
3105 | case 0: | ||
3106 | /* detile bit */ | ||
3107 | if (idx_value & (1 << 31)) { | ||
3108 | /* tiled src, linear dst */ | ||
3109 | src_offset = ib[idx+1]; | ||
3110 | src_offset <<= 8; | ||
3111 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
3112 | |||
3113 | dst_offset = ib[idx+7]; | ||
3114 | dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
3115 | ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3116 | ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
3117 | } else { | ||
3118 | /* linear src, tiled dst */ | ||
3119 | src_offset = ib[idx+7]; | ||
3120 | src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; | ||
3121 | ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3122 | ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3123 | |||
3124 | dst_offset = ib[idx+1]; | ||
3125 | dst_offset <<= 8; | ||
3126 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
3127 | } | ||
3128 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
3129 | dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", | ||
3130 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
3131 | return -EINVAL; | ||
3132 | } | ||
3133 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3134 | dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", | ||
3135 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3136 | return -EINVAL; | ||
3137 | } | ||
3138 | p->idx += 9; | ||
3139 | break; | ||
3140 | default: | ||
3141 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
3142 | return -EINVAL; | ||
3143 | } | ||
3144 | } | ||
3145 | } else { | ||
3146 | if (new_cmd) { | ||
3147 | switch (misc) { | ||
3148 | case 0: | ||
3149 | /* L2L, byte */ | ||
3150 | src_offset = ib[idx+2]; | ||
3151 | src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; | ||
3152 | dst_offset = ib[idx+1]; | ||
3153 | dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; | ||
3154 | if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { | ||
3155 | dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", | ||
3156 | src_offset + count, radeon_bo_size(src_reloc->robj)); | ||
3157 | return -EINVAL; | ||
3158 | } | ||
3159 | if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { | ||
3160 | dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", | ||
3161 | dst_offset + count, radeon_bo_size(dst_reloc->robj)); | ||
3162 | return -EINVAL; | ||
3163 | } | ||
3164 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); | ||
3165 | ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); | ||
3166 | ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
3167 | ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3168 | p->idx += 5; | ||
3169 | break; | ||
3170 | case 1: | ||
3171 | /* L2L, partial */ | ||
3172 | if (p->family < CHIP_CAYMAN) { | ||
3173 | DRM_ERROR("L2L Partial is cayman only !\n"); | ||
3174 | return -EINVAL; | ||
3175 | } | ||
3176 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); | ||
3177 | ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3178 | ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); | ||
3179 | ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
3180 | |||
3181 | p->idx += 9; | ||
3182 | break; | ||
3183 | case 4: | ||
3184 | /* L2L, dw, broadcast */ | ||
3185 | r = r600_dma_cs_next_reloc(p, &dst2_reloc); | ||
3186 | if (r) { | ||
3187 | DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); | ||
3188 | return -EINVAL; | ||
3189 | } | ||
3190 | dst_offset = ib[idx+1]; | ||
3191 | dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; | ||
3192 | dst2_offset = ib[idx+2]; | ||
3193 | dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32; | ||
3194 | src_offset = ib[idx+3]; | ||
3195 | src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; | ||
3196 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
3197 | dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", | ||
3198 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
3199 | return -EINVAL; | ||
3200 | } | ||
3201 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3202 | dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", | ||
3203 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3204 | return -EINVAL; | ||
3205 | } | ||
3206 | if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { | ||
3207 | dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", | ||
3208 | dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); | ||
3209 | return -EINVAL; | ||
3210 | } | ||
3211 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3212 | ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3213 | ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3214 | ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
3215 | ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; | ||
3216 | ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3217 | p->idx += 7; | ||
3218 | break; | ||
3219 | default: | ||
3220 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
3221 | return -EINVAL; | ||
3222 | } | ||
3223 | } else { | ||
3224 | /* L2L, dw */ | ||
3225 | src_offset = ib[idx+2]; | ||
3226 | src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; | ||
3227 | dst_offset = ib[idx+1]; | ||
3228 | dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; | ||
3229 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
3230 | dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", | ||
3231 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
3232 | return -EINVAL; | ||
3233 | } | ||
3234 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3235 | dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", | ||
3236 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
3237 | return -EINVAL; | ||
3238 | } | ||
3239 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3240 | ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3241 | ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
3242 | ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
3243 | p->idx += 5; | ||
3244 | } | ||
3245 | } | ||
3246 | break; | ||
3247 | case DMA_PACKET_CONSTANT_FILL: | ||
3248 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
3249 | if (r) { | ||
3250 | DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); | ||
3251 | return -EINVAL; | ||
3252 | } | ||
3253 | dst_offset = ib[idx+1]; | ||
3254 | dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; | ||
3255 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
3256 | dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", | ||
3257 | dst_offset, radeon_bo_size(dst_reloc->robj)); | ||
3258 | return -EINVAL; | ||
3259 | } | ||
3260 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
3261 | ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; | ||
3262 | p->idx += 4; | ||
3263 | break; | ||
3264 | case DMA_PACKET_NOP: | ||
3265 | p->idx += 1; | ||
3266 | break; | ||
3267 | default: | ||
3268 | DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); | ||
3269 | return -EINVAL; | ||
3270 | } | ||
3271 | } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); | ||
3272 | #if 0 | ||
3273 | for (r = 0; r < p->ib->length_dw; r++) { | ||
3274 | printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); | ||
3275 | mdelay(1); | ||
3276 | } | ||
3277 | #endif | ||
3278 | return 0; | ||
3279 | } | ||
3280 | |||
2807 | /* vm parser */ | 3281 | /* vm parser */ |
2808 | static bool evergreen_vm_reg_valid(u32 reg) | 3282 | static bool evergreen_vm_reg_valid(u32 reg) |
2809 | { | 3283 | { |
@@ -3010,6 +3484,18 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, | |||
3010 | case PACKET3_CP_DMA: | 3484 | case PACKET3_CP_DMA: |
3011 | command = ib[idx + 4]; | 3485 | command = ib[idx + 4]; |
3012 | info = ib[idx + 1]; | 3486 | info = ib[idx + 1]; |
3487 | if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */ | ||
3488 | (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */ | ||
3489 | ((((info & 0x00300000) >> 20) == 0) && | ||
3490 | (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */ | ||
3491 | ((((info & 0x60000000) >> 29) == 0) && | ||
3492 | (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ | ||
3493 | /* non mem to mem copies requires dw aligned count */ | ||
3494 | if ((command & 0x1fffff) % 4) { | ||
3495 | DRM_ERROR("CP DMA command requires dw count alignment\n"); | ||
3496 | return -EINVAL; | ||
3497 | } | ||
3498 | } | ||
3013 | if (command & PACKET3_CP_DMA_CMD_SAS) { | 3499 | if (command & PACKET3_CP_DMA_CMD_SAS) { |
3014 | /* src address space is register */ | 3500 | /* src address space is register */ |
3015 | if (((info & 0x60000000) >> 29) == 0) { | 3501 | if (((info & 0x60000000) >> 29) == 0) { |
@@ -3094,3 +3580,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) | |||
3094 | 3580 | ||
3095 | return ret; | 3581 | return ret; |
3096 | } | 3582 | } |
3583 | |||
3584 | /** | ||
3585 | * evergreen_dma_ib_parse() - parse the DMA IB for VM | ||
3586 | * @rdev: radeon_device pointer | ||
3587 | * @ib: radeon_ib pointer | ||
3588 | * | ||
3589 | * Parses the DMA IB from the VM CS ioctl | ||
3590 | * checks for errors. (Cayman-SI) | ||
3591 | * Returns 0 for success and an error on failure. | ||
3592 | **/ | ||
3593 | int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) | ||
3594 | { | ||
3595 | u32 idx = 0; | ||
3596 | u32 header, cmd, count, tiled, new_cmd, misc; | ||
3597 | |||
3598 | do { | ||
3599 | header = ib->ptr[idx]; | ||
3600 | cmd = GET_DMA_CMD(header); | ||
3601 | count = GET_DMA_COUNT(header); | ||
3602 | tiled = GET_DMA_T(header); | ||
3603 | new_cmd = GET_DMA_NEW(header); | ||
3604 | misc = GET_DMA_MISC(header); | ||
3605 | |||
3606 | switch (cmd) { | ||
3607 | case DMA_PACKET_WRITE: | ||
3608 | if (tiled) | ||
3609 | idx += count + 7; | ||
3610 | else | ||
3611 | idx += count + 3; | ||
3612 | break; | ||
3613 | case DMA_PACKET_COPY: | ||
3614 | if (tiled) { | ||
3615 | if (new_cmd) { | ||
3616 | switch (misc) { | ||
3617 | case 0: | ||
3618 | /* L2T, frame to fields */ | ||
3619 | idx += 10; | ||
3620 | break; | ||
3621 | case 1: | ||
3622 | /* L2T, T2L partial */ | ||
3623 | idx += 12; | ||
3624 | break; | ||
3625 | case 3: | ||
3626 | /* L2T, broadcast */ | ||
3627 | idx += 10; | ||
3628 | break; | ||
3629 | case 4: | ||
3630 | /* L2T, T2L */ | ||
3631 | idx += 9; | ||
3632 | break; | ||
3633 | case 5: | ||
3634 | /* T2T partial */ | ||
3635 | idx += 13; | ||
3636 | break; | ||
3637 | case 7: | ||
3638 | /* L2T, broadcast */ | ||
3639 | idx += 10; | ||
3640 | break; | ||
3641 | default: | ||
3642 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
3643 | return -EINVAL; | ||
3644 | } | ||
3645 | } else { | ||
3646 | switch (misc) { | ||
3647 | case 0: | ||
3648 | idx += 9; | ||
3649 | break; | ||
3650 | default: | ||
3651 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
3652 | return -EINVAL; | ||
3653 | } | ||
3654 | } | ||
3655 | } else { | ||
3656 | if (new_cmd) { | ||
3657 | switch (misc) { | ||
3658 | case 0: | ||
3659 | /* L2L, byte */ | ||
3660 | idx += 5; | ||
3661 | break; | ||
3662 | case 1: | ||
3663 | /* L2L, partial */ | ||
3664 | idx += 9; | ||
3665 | break; | ||
3666 | case 4: | ||
3667 | /* L2L, dw, broadcast */ | ||
3668 | idx += 7; | ||
3669 | break; | ||
3670 | default: | ||
3671 | DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); | ||
3672 | return -EINVAL; | ||
3673 | } | ||
3674 | } else { | ||
3675 | /* L2L, dw */ | ||
3676 | idx += 5; | ||
3677 | } | ||
3678 | } | ||
3679 | break; | ||
3680 | case DMA_PACKET_CONSTANT_FILL: | ||
3681 | idx += 4; | ||
3682 | break; | ||
3683 | case DMA_PACKET_NOP: | ||
3684 | idx += 1; | ||
3685 | break; | ||
3686 | default: | ||
3687 | DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); | ||
3688 | return -EINVAL; | ||
3689 | } | ||
3690 | } while (idx < ib->length_dw); | ||
3691 | |||
3692 | return 0; | ||
3693 | } | ||