aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c603
-rw-r--r--drivers/gpu/drm/radeon/r100.c23
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c285
-rw-r--r--drivers/gpu/drm/radeon/radeon.h19
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c26
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h3
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_cp.c14
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon_cursor.c17
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c4
-rw-r--r--include/uapi/drm/radeon_drm.h1
16 files changed, 897 insertions, 127 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 62c227104781..74c6b42d2597 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -34,6 +34,8 @@
34#define MAX(a,b) (((a)>(b))?(a):(b)) 34#define MAX(a,b) (((a)>(b))?(a):(b))
35#define MIN(a,b) (((a)<(b))?(a):(b)) 35#define MIN(a,b) (((a)<(b))?(a):(b))
36 36
37int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
38 struct radeon_cs_reloc **cs_reloc);
37static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, 39static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
38 struct radeon_cs_reloc **cs_reloc); 40 struct radeon_cs_reloc **cs_reloc);
39 41
@@ -507,20 +509,28 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
507 /* height is npipes htiles aligned == npipes * 8 pixel aligned */ 509 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
508 nby = round_up(nby, track->npipes * 8); 510 nby = round_up(nby, track->npipes * 8);
509 } else { 511 } else {
512 /* always assume 8x8 htile */
513 /* align is htile align * 8, htile align vary according to
514 * number of pipe and tile width and nby
515 */
510 switch (track->npipes) { 516 switch (track->npipes) {
511 case 8: 517 case 8:
518 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
512 nbx = round_up(nbx, 64 * 8); 519 nbx = round_up(nbx, 64 * 8);
513 nby = round_up(nby, 64 * 8); 520 nby = round_up(nby, 64 * 8);
514 break; 521 break;
515 case 4: 522 case 4:
523 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
516 nbx = round_up(nbx, 64 * 8); 524 nbx = round_up(nbx, 64 * 8);
517 nby = round_up(nby, 32 * 8); 525 nby = round_up(nby, 32 * 8);
518 break; 526 break;
519 case 2: 527 case 2:
528 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
520 nbx = round_up(nbx, 32 * 8); 529 nbx = round_up(nbx, 32 * 8);
521 nby = round_up(nby, 32 * 8); 530 nby = round_up(nby, 32 * 8);
522 break; 531 break;
523 case 1: 532 case 1:
533 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
524 nbx = round_up(nbx, 32 * 8); 534 nbx = round_up(nbx, 32 * 8);
525 nby = round_up(nby, 16 * 8); 535 nby = round_up(nby, 16 * 8);
526 break; 536 break;
@@ -531,9 +541,10 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
531 } 541 }
532 } 542 }
533 /* compute number of htile */ 543 /* compute number of htile */
534 nbx = nbx / 8; 544 nbx = nbx >> 3;
535 nby = nby / 8; 545 nby = nby >> 3;
536 size = nbx * nby * 4; 546 /* size must be aligned on npipes * 2K boundary */
547 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
537 size += track->htile_offset; 548 size += track->htile_offset;
538 549
539 if (size > radeon_bo_size(track->htile_bo)) { 550 if (size > radeon_bo_size(track->htile_bo)) {
@@ -1790,6 +1801,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1790 case DB_HTILE_SURFACE: 1801 case DB_HTILE_SURFACE:
1791 /* 8x8 only */ 1802 /* 8x8 only */
1792 track->htile_surface = radeon_get_ib_value(p, idx); 1803 track->htile_surface = radeon_get_ib_value(p, idx);
1804 /* force 8x8 htile width and height */
1805 ib[idx] |= 3;
1793 track->db_dirty = true; 1806 track->db_dirty = true;
1794 break; 1807 break;
1795 case CB_IMMED0_BASE: 1808 case CB_IMMED0_BASE:
@@ -2243,6 +2256,18 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
2243 command = radeon_get_ib_value(p, idx+4); 2256 command = radeon_get_ib_value(p, idx+4);
2244 size = command & 0x1fffff; 2257 size = command & 0x1fffff;
2245 info = radeon_get_ib_value(p, idx+1); 2258 info = radeon_get_ib_value(p, idx+1);
2259 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2260 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2261 ((((info & 0x00300000) >> 20) == 0) &&
2262 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2263 ((((info & 0x60000000) >> 29) == 0) &&
2264 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2265 /* non mem to mem copies requires dw aligned count */
2266 if (size % 4) {
2267 DRM_ERROR("CP DMA command requires dw count alignment\n");
2268 return -EINVAL;
2269 }
2270 }
2246 if (command & PACKET3_CP_DMA_CMD_SAS) { 2271 if (command & PACKET3_CP_DMA_CMD_SAS) {
2247 /* src address space is register */ 2272 /* src address space is register */
2248 /* GDS is ok */ 2273 /* GDS is ok */
@@ -2804,6 +2829,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
2804 return 0; 2829 return 0;
2805} 2830}
2806 2831
2832/*
2833 * DMA
2834 */
2835
2836#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2837#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2838#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2839#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2840#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2841
2842/**
2843 * evergreen_dma_cs_parse() - parse the DMA IB
2844 * @p: parser structure holding parsing context.
2845 *
2846 * Parses the DMA IB from the CS ioctl and updates
2847 * the GPU addresses based on the reloc information and
2848 * checks for errors. (Evergreen-Cayman)
2849 * Returns 0 for success and an error on failure.
2850 **/
2851int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2852{
2853 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2854 struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2855 u32 header, cmd, count, tiled, new_cmd, misc;
2856 volatile u32 *ib = p->ib.ptr;
2857 u32 idx, idx_value;
2858 u64 src_offset, dst_offset, dst2_offset;
2859 int r;
2860
2861 do {
2862 if (p->idx >= ib_chunk->length_dw) {
2863 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2864 p->idx, ib_chunk->length_dw);
2865 return -EINVAL;
2866 }
2867 idx = p->idx;
2868 header = radeon_get_ib_value(p, idx);
2869 cmd = GET_DMA_CMD(header);
2870 count = GET_DMA_COUNT(header);
2871 tiled = GET_DMA_T(header);
2872 new_cmd = GET_DMA_NEW(header);
2873 misc = GET_DMA_MISC(header);
2874
2875 switch (cmd) {
2876 case DMA_PACKET_WRITE:
2877 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2878 if (r) {
2879 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2880 return -EINVAL;
2881 }
2882 if (tiled) {
2883 dst_offset = ib[idx+1];
2884 dst_offset <<= 8;
2885
2886 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2887 p->idx += count + 7;
2888 } else {
2889 dst_offset = ib[idx+1];
2890 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2891
2892 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2893 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2894 p->idx += count + 3;
2895 }
2896 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2897 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2898 dst_offset, radeon_bo_size(dst_reloc->robj));
2899 return -EINVAL;
2900 }
2901 break;
2902 case DMA_PACKET_COPY:
2903 r = r600_dma_cs_next_reloc(p, &src_reloc);
2904 if (r) {
2905 DRM_ERROR("bad DMA_PACKET_COPY\n");
2906 return -EINVAL;
2907 }
2908 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2909 if (r) {
2910 DRM_ERROR("bad DMA_PACKET_COPY\n");
2911 return -EINVAL;
2912 }
2913 if (tiled) {
2914 idx_value = radeon_get_ib_value(p, idx + 2);
2915 if (new_cmd) {
2916 switch (misc) {
2917 case 0:
2918 /* L2T, frame to fields */
2919 if (idx_value & (1 << 31)) {
2920 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2921 return -EINVAL;
2922 }
2923 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2924 if (r) {
2925 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2926 return -EINVAL;
2927 }
2928 dst_offset = ib[idx+1];
2929 dst_offset <<= 8;
2930 dst2_offset = ib[idx+2];
2931 dst2_offset <<= 8;
2932 src_offset = ib[idx+8];
2933 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2934 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2935 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2936 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2937 return -EINVAL;
2938 }
2939 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2940 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2941 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2942 return -EINVAL;
2943 }
2944 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2945 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2946 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2947 return -EINVAL;
2948 }
2949 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2950 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2951 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2952 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2953 p->idx += 10;
2954 break;
2955 case 1:
2956 /* L2T, T2L partial */
2957 if (p->family < CHIP_CAYMAN) {
2958 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2959 return -EINVAL;
2960 }
2961 /* detile bit */
2962 if (idx_value & (1 << 31)) {
2963 /* tiled src, linear dst */
2964 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2965
2966 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2967 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2968 } else {
2969 /* linear src, tiled dst */
2970 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2971 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2972
2973 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2974 }
2975 p->idx += 12;
2976 break;
2977 case 3:
2978 /* L2T, broadcast */
2979 if (idx_value & (1 << 31)) {
2980 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2981 return -EINVAL;
2982 }
2983 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2984 if (r) {
2985 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2986 return -EINVAL;
2987 }
2988 dst_offset = ib[idx+1];
2989 dst_offset <<= 8;
2990 dst2_offset = ib[idx+2];
2991 dst2_offset <<= 8;
2992 src_offset = ib[idx+8];
2993 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2994 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2995 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2996 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2997 return -EINVAL;
2998 }
2999 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3000 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3001 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3002 return -EINVAL;
3003 }
3004 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3005 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3006 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3007 return -EINVAL;
3008 }
3009 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3010 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3011 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3012 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3013 p->idx += 10;
3014 break;
3015 case 4:
3016 /* L2T, T2L */
3017 /* detile bit */
3018 if (idx_value & (1 << 31)) {
3019 /* tiled src, linear dst */
3020 src_offset = ib[idx+1];
3021 src_offset <<= 8;
3022 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3023
3024 dst_offset = ib[idx+7];
3025 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3026 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3027 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3028 } else {
3029 /* linear src, tiled dst */
3030 src_offset = ib[idx+7];
3031 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3032 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3033 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3034
3035 dst_offset = ib[idx+1];
3036 dst_offset <<= 8;
3037 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3038 }
3039 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3040 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3041 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3042 return -EINVAL;
3043 }
3044 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3045 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3046 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3047 return -EINVAL;
3048 }
3049 p->idx += 9;
3050 break;
3051 case 5:
3052 /* T2T partial */
3053 if (p->family < CHIP_CAYMAN) {
3054 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3055 return -EINVAL;
3056 }
3057 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3058 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3059 p->idx += 13;
3060 break;
3061 case 7:
3062 /* L2T, broadcast */
3063 if (idx_value & (1 << 31)) {
3064 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3065 return -EINVAL;
3066 }
3067 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3068 if (r) {
3069 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3070 return -EINVAL;
3071 }
3072 dst_offset = ib[idx+1];
3073 dst_offset <<= 8;
3074 dst2_offset = ib[idx+2];
3075 dst2_offset <<= 8;
3076 src_offset = ib[idx+8];
3077 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
3078 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3079 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3080 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3081 return -EINVAL;
3082 }
3083 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3084 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3085 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3086 return -EINVAL;
3087 }
3088 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3089 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3090 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3091 return -EINVAL;
3092 }
3093 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3094 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3095 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3096 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3097 p->idx += 10;
3098 break;
3099 default:
3100 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3101 return -EINVAL;
3102 }
3103 } else {
3104 switch (misc) {
3105 case 0:
3106 /* detile bit */
3107 if (idx_value & (1 << 31)) {
3108 /* tiled src, linear dst */
3109 src_offset = ib[idx+1];
3110 src_offset <<= 8;
3111 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3112
3113 dst_offset = ib[idx+7];
3114 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3115 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3116 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3117 } else {
3118 /* linear src, tiled dst */
3119 src_offset = ib[idx+7];
3120 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3121 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3122 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3123
3124 dst_offset = ib[idx+1];
3125 dst_offset <<= 8;
3126 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3127 }
3128 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3129 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3130 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3131 return -EINVAL;
3132 }
3133 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3134 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3135 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3136 return -EINVAL;
3137 }
3138 p->idx += 9;
3139 break;
3140 default:
3141 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3142 return -EINVAL;
3143 }
3144 }
3145 } else {
3146 if (new_cmd) {
3147 switch (misc) {
3148 case 0:
3149 /* L2L, byte */
3150 src_offset = ib[idx+2];
3151 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3152 dst_offset = ib[idx+1];
3153 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3154 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3155 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
3156 src_offset + count, radeon_bo_size(src_reloc->robj));
3157 return -EINVAL;
3158 }
3159 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3160 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
3161 dst_offset + count, radeon_bo_size(dst_reloc->robj));
3162 return -EINVAL;
3163 }
3164 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3165 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3166 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3167 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3168 p->idx += 5;
3169 break;
3170 case 1:
3171 /* L2L, partial */
3172 if (p->family < CHIP_CAYMAN) {
3173 DRM_ERROR("L2L Partial is cayman only !\n");
3174 return -EINVAL;
3175 }
3176 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3177 ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3178 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3179 ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3180
3181 p->idx += 9;
3182 break;
3183 case 4:
3184 /* L2L, dw, broadcast */
3185 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3186 if (r) {
3187 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3188 return -EINVAL;
3189 }
3190 dst_offset = ib[idx+1];
3191 dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3192 dst2_offset = ib[idx+2];
3193 dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
3194 src_offset = ib[idx+3];
3195 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
3196 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3197 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
3198 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3199 return -EINVAL;
3200 }
3201 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3202 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
3203 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3204 return -EINVAL;
3205 }
3206 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3207 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
3208 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3209 return -EINVAL;
3210 }
3211 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3212 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
3213 ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3214 ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3215 ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
3216 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3217 p->idx += 7;
3218 break;
3219 default:
3220 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3221 return -EINVAL;
3222 }
3223 } else {
3224 /* L2L, dw */
3225 src_offset = ib[idx+2];
3226 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3227 dst_offset = ib[idx+1];
3228 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3229 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3230 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
3231 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3232 return -EINVAL;
3233 }
3234 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3235 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
3236 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3237 return -EINVAL;
3238 }
3239 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3240 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3241 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3242 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3243 p->idx += 5;
3244 }
3245 }
3246 break;
3247 case DMA_PACKET_CONSTANT_FILL:
3248 r = r600_dma_cs_next_reloc(p, &dst_reloc);
3249 if (r) {
3250 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3251 return -EINVAL;
3252 }
3253 dst_offset = ib[idx+1];
3254 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
3255 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3256 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3257 dst_offset, radeon_bo_size(dst_reloc->robj));
3258 return -EINVAL;
3259 }
3260 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3261 ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
3262 p->idx += 4;
3263 break;
3264 case DMA_PACKET_NOP:
3265 p->idx += 1;
3266 break;
3267 default:
3268 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3269 return -EINVAL;
3270 }
3271 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3272#if 0
3273 for (r = 0; r < p->ib->length_dw; r++) {
3274 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
3275 mdelay(1);
3276 }
3277#endif
3278 return 0;
3279}
3280
2807/* vm parser */ 3281/* vm parser */
2808static bool evergreen_vm_reg_valid(u32 reg) 3282static bool evergreen_vm_reg_valid(u32 reg)
2809{ 3283{
@@ -3010,6 +3484,18 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3010 case PACKET3_CP_DMA: 3484 case PACKET3_CP_DMA:
3011 command = ib[idx + 4]; 3485 command = ib[idx + 4];
3012 info = ib[idx + 1]; 3486 info = ib[idx + 1];
3487 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3488 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3489 ((((info & 0x00300000) >> 20) == 0) &&
3490 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3491 ((((info & 0x60000000) >> 29) == 0) &&
3492 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3493 /* non mem to mem copies requires dw aligned count */
3494 if ((command & 0x1fffff) % 4) {
3495 DRM_ERROR("CP DMA command requires dw count alignment\n");
3496 return -EINVAL;
3497 }
3498 }
3013 if (command & PACKET3_CP_DMA_CMD_SAS) { 3499 if (command & PACKET3_CP_DMA_CMD_SAS) {
3014 /* src address space is register */ 3500 /* src address space is register */
3015 if (((info & 0x60000000) >> 29) == 0) { 3501 if (((info & 0x60000000) >> 29) == 0) {
@@ -3094,3 +3580,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3094 3580
3095 return ret; 3581 return ret;
3096} 3582}
3583
3584/**
3585 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3586 * @rdev: radeon_device pointer
3587 * @ib: radeon_ib pointer
3588 *
3589 * Parses the DMA IB from the VM CS ioctl
3590 * checks for errors. (Cayman-SI)
3591 * Returns 0 for success and an error on failure.
3592 **/
3593int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3594{
3595 u32 idx = 0;
3596 u32 header, cmd, count, tiled, new_cmd, misc;
3597
3598 do {
3599 header = ib->ptr[idx];
3600 cmd = GET_DMA_CMD(header);
3601 count = GET_DMA_COUNT(header);
3602 tiled = GET_DMA_T(header);
3603 new_cmd = GET_DMA_NEW(header);
3604 misc = GET_DMA_MISC(header);
3605
3606 switch (cmd) {
3607 case DMA_PACKET_WRITE:
3608 if (tiled)
3609 idx += count + 7;
3610 else
3611 idx += count + 3;
3612 break;
3613 case DMA_PACKET_COPY:
3614 if (tiled) {
3615 if (new_cmd) {
3616 switch (misc) {
3617 case 0:
3618 /* L2T, frame to fields */
3619 idx += 10;
3620 break;
3621 case 1:
3622 /* L2T, T2L partial */
3623 idx += 12;
3624 break;
3625 case 3:
3626 /* L2T, broadcast */
3627 idx += 10;
3628 break;
3629 case 4:
3630 /* L2T, T2L */
3631 idx += 9;
3632 break;
3633 case 5:
3634 /* T2T partial */
3635 idx += 13;
3636 break;
3637 case 7:
3638 /* L2T, broadcast */
3639 idx += 10;
3640 break;
3641 default:
3642 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3643 return -EINVAL;
3644 }
3645 } else {
3646 switch (misc) {
3647 case 0:
3648 idx += 9;
3649 break;
3650 default:
3651 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3652 return -EINVAL;
3653 }
3654 }
3655 } else {
3656 if (new_cmd) {
3657 switch (misc) {
3658 case 0:
3659 /* L2L, byte */
3660 idx += 5;
3661 break;
3662 case 1:
3663 /* L2L, partial */
3664 idx += 9;
3665 break;
3666 case 4:
3667 /* L2L, dw, broadcast */
3668 idx += 7;
3669 break;
3670 default:
3671 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3672 return -EINVAL;
3673 }
3674 } else {
3675 /* L2L, dw */
3676 idx += 5;
3677 }
3678 }
3679 break;
3680 case DMA_PACKET_CONSTANT_FILL:
3681 idx += 4;
3682 break;
3683 case DMA_PACKET_NOP:
3684 idx += 1;
3685 break;
3686 default:
3687 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3688 return -EINVAL;
3689 }
3690 } while (idx < ib->length_dw);
3691
3692 return 0;
3693}
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 376884f1bcd2..8ff7cac222dc 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4135,23 +4135,36 @@ int r100_init(struct radeon_device *rdev)
4135 return 0; 4135 return 0;
4136} 4136}
4137 4137
4138uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) 4138uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
4139 bool always_indirect)
4139{ 4140{
4140 if (reg < rdev->rmmio_size) 4141 if (reg < rdev->rmmio_size && !always_indirect)
4141 return readl(((void __iomem *)rdev->rmmio) + reg); 4142 return readl(((void __iomem *)rdev->rmmio) + reg);
4142 else { 4143 else {
4144 unsigned long flags;
4145 uint32_t ret;
4146
4147 spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4143 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 4148 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4144 return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 4149 ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4150 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4151
4152 return ret;
4145 } 4153 }
4146} 4154}
4147 4155
4148void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 4156void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
4157 bool always_indirect)
4149{ 4158{
4150 if (reg < rdev->rmmio_size) 4159 if (reg < rdev->rmmio_size && !always_indirect)
4151 writel(v, ((void __iomem *)rdev->rmmio) + reg); 4160 writel(v, ((void __iomem *)rdev->rmmio) + reg);
4152 else { 4161 else {
4162 unsigned long flags;
4163
4164 spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4153 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 4165 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4154 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 4166 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4167 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4155 } 4168 }
4156} 4169}
4157 4170
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 5d6e7f959e75..0be768be530c 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
657 /* nby is npipes htiles aligned == npipes * 8 pixel aligned */ 657 /* nby is npipes htiles aligned == npipes * 8 pixel aligned */
658 nby = round_up(nby, track->npipes * 8); 658 nby = round_up(nby, track->npipes * 8);
659 } else { 659 } else {
660 /* htile widht & nby (8 or 4) make 2 bits number */ 660 /* always assume 8x8 htile */
661 tmp = track->htile_surface & 3;
662 /* align is htile align * 8, htile align vary according to 661 /* align is htile align * 8, htile align vary according to
663 * number of pipe and tile width and nby 662 * number of pipe and tile width and nby
664 */ 663 */
665 switch (track->npipes) { 664 switch (track->npipes) {
666 case 8: 665 case 8:
667 switch (tmp) { 666 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
668 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 667 nbx = round_up(nbx, 64 * 8);
669 nbx = round_up(nbx, 64 * 8); 668 nby = round_up(nby, 64 * 8);
670 nby = round_up(nby, 64 * 8);
671 break;
672 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
673 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
674 nbx = round_up(nbx, 64 * 8);
675 nby = round_up(nby, 32 * 8);
676 break;
677 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
678 nbx = round_up(nbx, 32 * 8);
679 nby = round_up(nby, 32 * 8);
680 break;
681 default:
682 return -EINVAL;
683 }
684 break; 669 break;
685 case 4: 670 case 4:
686 switch (tmp) { 671 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
687 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 672 nbx = round_up(nbx, 64 * 8);
688 nbx = round_up(nbx, 64 * 8); 673 nby = round_up(nby, 32 * 8);
689 nby = round_up(nby, 32 * 8);
690 break;
691 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
692 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
693 nbx = round_up(nbx, 32 * 8);
694 nby = round_up(nby, 32 * 8);
695 break;
696 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
697 nbx = round_up(nbx, 32 * 8);
698 nby = round_up(nby, 16 * 8);
699 break;
700 default:
701 return -EINVAL;
702 }
703 break; 674 break;
704 case 2: 675 case 2:
705 switch (tmp) { 676 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
706 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 677 nbx = round_up(nbx, 32 * 8);
707 nbx = round_up(nbx, 32 * 8); 678 nby = round_up(nby, 32 * 8);
708 nby = round_up(nby, 32 * 8);
709 break;
710 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
711 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
712 nbx = round_up(nbx, 32 * 8);
713 nby = round_up(nby, 16 * 8);
714 break;
715 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
716 nbx = round_up(nbx, 16 * 8);
717 nby = round_up(nby, 16 * 8);
718 break;
719 default:
720 return -EINVAL;
721 }
722 break; 679 break;
723 case 1: 680 case 1:
724 switch (tmp) { 681 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
725 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 682 nbx = round_up(nbx, 32 * 8);
726 nbx = round_up(nbx, 32 * 8); 683 nby = round_up(nby, 16 * 8);
727 nby = round_up(nby, 16 * 8);
728 break;
729 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
730 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
731 nbx = round_up(nbx, 16 * 8);
732 nby = round_up(nby, 16 * 8);
733 break;
734 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
735 nbx = round_up(nbx, 16 * 8);
736 nby = round_up(nby, 8 * 8);
737 break;
738 default:
739 return -EINVAL;
740 }
741 break; 684 break;
742 default: 685 default:
743 dev_warn(p->dev, "%s:%d invalid num pipes %d\n", 686 dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
@@ -746,9 +689,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
746 } 689 }
747 } 690 }
748 /* compute number of htile */ 691 /* compute number of htile */
749 nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4; 692 nbx = nbx >> 3;
750 nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4; 693 nby = nby >> 3;
751 size = nbx * nby * 4; 694 /* size must be aligned on npipes * 2K boundary */
695 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
752 size += track->htile_offset; 696 size += track->htile_offset;
753 697
754 if (size > radeon_bo_size(track->htile_bo)) { 698 if (size > radeon_bo_size(track->htile_bo)) {
@@ -1492,6 +1436,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1492 break; 1436 break;
1493 case DB_HTILE_SURFACE: 1437 case DB_HTILE_SURFACE:
1494 track->htile_surface = radeon_get_ib_value(p, idx); 1438 track->htile_surface = radeon_get_ib_value(p, idx);
1439 /* force 8x8 htile width and height */
1440 ib[idx] |= 3;
1495 track->db_dirty = true; 1441 track->db_dirty = true;
1496 break; 1442 break;
1497 case SQ_PGM_START_FS: 1443 case SQ_PGM_START_FS:
@@ -2568,3 +2514,196 @@ void r600_cs_legacy_init(void)
2568{ 2514{
2569 r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; 2515 r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
2570} 2516}
2517
2518/*
2519 * DMA
2520 */
2521/**
2522 * r600_dma_cs_next_reloc() - parse next reloc
2523 * @p: parser structure holding parsing context.
2524 * @cs_reloc: reloc informations
2525 *
2526 * Return the next reloc, do bo validation and compute
2527 * GPU offset using the provided start.
2528 **/
2529int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
2530 struct radeon_cs_reloc **cs_reloc)
2531{
2532 struct radeon_cs_chunk *relocs_chunk;
2533 unsigned idx;
2534
2535 if (p->chunk_relocs_idx == -1) {
2536 DRM_ERROR("No relocation chunk !\n");
2537 return -EINVAL;
2538 }
2539 *cs_reloc = NULL;
2540 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
2541 idx = p->dma_reloc_idx;
2542 if (idx >= relocs_chunk->length_dw) {
2543 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
2544 idx, relocs_chunk->length_dw);
2545 return -EINVAL;
2546 }
2547 *cs_reloc = p->relocs_ptr[idx];
2548 p->dma_reloc_idx++;
2549 return 0;
2550}
2551
2552#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2553#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
2554#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2555
2556/**
2557 * r600_dma_cs_parse() - parse the DMA IB
2558 * @p: parser structure holding parsing context.
2559 *
2560 * Parses the DMA IB from the CS ioctl and updates
2561 * the GPU addresses based on the reloc information and
2562 * checks for errors. (R6xx-R7xx)
2563 * Returns 0 for success and an error on failure.
2564 **/
2565int r600_dma_cs_parse(struct radeon_cs_parser *p)
2566{
2567 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2568 struct radeon_cs_reloc *src_reloc, *dst_reloc;
2569 u32 header, cmd, count, tiled;
2570 volatile u32 *ib = p->ib.ptr;
2571 u32 idx, idx_value;
2572 u64 src_offset, dst_offset;
2573 int r;
2574
2575 do {
2576 if (p->idx >= ib_chunk->length_dw) {
2577 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2578 p->idx, ib_chunk->length_dw);
2579 return -EINVAL;
2580 }
2581 idx = p->idx;
2582 header = radeon_get_ib_value(p, idx);
2583 cmd = GET_DMA_CMD(header);
2584 count = GET_DMA_COUNT(header);
2585 tiled = GET_DMA_T(header);
2586
2587 switch (cmd) {
2588 case DMA_PACKET_WRITE:
2589 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2590 if (r) {
2591 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2592 return -EINVAL;
2593 }
2594 if (tiled) {
2595 dst_offset = ib[idx+1];
2596 dst_offset <<= 8;
2597
2598 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2599 p->idx += count + 5;
2600 } else {
2601 dst_offset = ib[idx+1];
2602 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2603
2604 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2605 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2606 p->idx += count + 3;
2607 }
2608 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2609 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2610 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2611 return -EINVAL;
2612 }
2613 break;
2614 case DMA_PACKET_COPY:
2615 r = r600_dma_cs_next_reloc(p, &src_reloc);
2616 if (r) {
2617 DRM_ERROR("bad DMA_PACKET_COPY\n");
2618 return -EINVAL;
2619 }
2620 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2621 if (r) {
2622 DRM_ERROR("bad DMA_PACKET_COPY\n");
2623 return -EINVAL;
2624 }
2625 if (tiled) {
2626 idx_value = radeon_get_ib_value(p, idx + 2);
2627 /* detile bit */
2628 if (idx_value & (1 << 31)) {
2629 /* tiled src, linear dst */
2630 src_offset = ib[idx+1];
2631 src_offset <<= 8;
2632 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2633
2634 dst_offset = ib[idx+5];
2635 dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
2636 ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2637 ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2638 } else {
2639 /* linear src, tiled dst */
2640 src_offset = ib[idx+5];
2641 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
2642 ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2643 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2644
2645 dst_offset = ib[idx+1];
2646 dst_offset <<= 8;
2647 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2648 }
2649 p->idx += 7;
2650 } else {
2651 src_offset = ib[idx+2];
2652 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
2653 dst_offset = ib[idx+1];
2654 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
2655
2656 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2657 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2658 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2659 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2660 p->idx += 5;
2661 }
2662 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2663 dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
2664 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2665 return -EINVAL;
2666 }
2667 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2668 dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
2669 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2670 return -EINVAL;
2671 }
2672 break;
2673 case DMA_PACKET_CONSTANT_FILL:
2674 if (p->family < CHIP_RV770) {
2675 DRM_ERROR("Constant Fill is 7xx only !\n");
2676 return -EINVAL;
2677 }
2678 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2679 if (r) {
2680 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2681 return -EINVAL;
2682 }
2683 dst_offset = ib[idx+1];
2684 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
2685 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2686 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
2687 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2688 return -EINVAL;
2689 }
2690 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2691 ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
2692 p->idx += 4;
2693 break;
2694 case DMA_PACKET_NOP:
2695 p->idx += 1;
2696 break;
2697 default:
2698 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
2699 return -EINVAL;
2700 }
2701 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2702#if 0
2703 for (r = 0; r < p->ib->length_dw; r++) {
2704 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
2705 mdelay(1);
2706 }
2707#endif
2708 return 0;
2709}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 1b9120a875ef..5dc744d43d12 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -839,6 +839,7 @@ struct radeon_cs_parser {
839 struct radeon_cs_reloc *relocs; 839 struct radeon_cs_reloc *relocs;
840 struct radeon_cs_reloc **relocs_ptr; 840 struct radeon_cs_reloc **relocs_ptr;
841 struct list_head validated; 841 struct list_head validated;
842 unsigned dma_reloc_idx;
842 /* indices of various chunks */ 843 /* indices of various chunks */
843 int chunk_ib_idx; 844 int chunk_ib_idx;
844 int chunk_relocs_idx; 845 int chunk_relocs_idx;
@@ -1556,6 +1557,8 @@ struct radeon_device {
1556 /* Register mmio */ 1557 /* Register mmio */
1557 resource_size_t rmmio_base; 1558 resource_size_t rmmio_base;
1558 resource_size_t rmmio_size; 1559 resource_size_t rmmio_size;
1560 /* protects concurrent MM_INDEX/DATA based register access */
1561 spinlock_t mmio_idx_lock;
1559 void __iomem *rmmio; 1562 void __iomem *rmmio;
1560 radeon_rreg_t mc_rreg; 1563 radeon_rreg_t mc_rreg;
1561 radeon_wreg_t mc_wreg; 1564 radeon_wreg_t mc_wreg;
@@ -1631,8 +1634,10 @@ int radeon_device_init(struct radeon_device *rdev,
1631void radeon_device_fini(struct radeon_device *rdev); 1634void radeon_device_fini(struct radeon_device *rdev);
1632int radeon_gpu_wait_for_idle(struct radeon_device *rdev); 1635int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
1633 1636
1634uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg); 1637uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
1635void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); 1638 bool always_indirect);
1639void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
1640 bool always_indirect);
1636u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); 1641u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
1637void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); 1642void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
1638 1643
@@ -1648,9 +1653,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
1648#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg)) 1653#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg))
1649#define RREG16(reg) readw((rdev->rmmio) + (reg)) 1654#define RREG16(reg) readw((rdev->rmmio) + (reg))
1650#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg)) 1655#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg))
1651#define RREG32(reg) r100_mm_rreg(rdev, (reg)) 1656#define RREG32(reg) r100_mm_rreg(rdev, (reg), false)
1652#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg))) 1657#define RREG32_IDX(reg) r100_mm_rreg(rdev, (reg), true)
1653#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v)) 1658#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg), false))
1659#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v), false)
1660#define WREG32_IDX(reg, v) r100_mm_wreg(rdev, (reg), (v), true)
1654#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) 1661#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
1655#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) 1662#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
1656#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg)) 1663#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
@@ -1675,7 +1682,7 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
1675 tmp_ |= ((val) & ~(mask)); \ 1682 tmp_ |= ((val) & ~(mask)); \
1676 WREG32_PLL(reg, tmp_); \ 1683 WREG32_PLL(reg, tmp_); \
1677 } while (0) 1684 } while (0)
1678#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg))) 1685#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
1679#define RREG32_IO(reg) r100_io_rreg(rdev, (reg)) 1686#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
1680#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v)) 1687#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
1681 1688
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 3ea0475f9a95..596bcbe80ed0 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -952,7 +952,7 @@ static struct radeon_asic r600_asic = {
952 .ib_execute = &r600_dma_ring_ib_execute, 952 .ib_execute = &r600_dma_ring_ib_execute,
953 .emit_fence = &r600_dma_fence_ring_emit, 953 .emit_fence = &r600_dma_fence_ring_emit,
954 .emit_semaphore = &r600_dma_semaphore_ring_emit, 954 .emit_semaphore = &r600_dma_semaphore_ring_emit,
955 .cs_parse = NULL, 955 .cs_parse = &r600_dma_cs_parse,
956 .ring_test = &r600_dma_ring_test, 956 .ring_test = &r600_dma_ring_test,
957 .ib_test = &r600_dma_ib_test, 957 .ib_test = &r600_dma_ib_test,
958 .is_lockup = &r600_dma_is_lockup, 958 .is_lockup = &r600_dma_is_lockup,
@@ -1036,7 +1036,7 @@ static struct radeon_asic rs780_asic = {
1036 .ib_execute = &r600_dma_ring_ib_execute, 1036 .ib_execute = &r600_dma_ring_ib_execute,
1037 .emit_fence = &r600_dma_fence_ring_emit, 1037 .emit_fence = &r600_dma_fence_ring_emit,
1038 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1038 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1039 .cs_parse = NULL, 1039 .cs_parse = &r600_dma_cs_parse,
1040 .ring_test = &r600_dma_ring_test, 1040 .ring_test = &r600_dma_ring_test,
1041 .ib_test = &r600_dma_ib_test, 1041 .ib_test = &r600_dma_ib_test,
1042 .is_lockup = &r600_dma_is_lockup, 1042 .is_lockup = &r600_dma_is_lockup,
@@ -1120,7 +1120,7 @@ static struct radeon_asic rv770_asic = {
1120 .ib_execute = &r600_dma_ring_ib_execute, 1120 .ib_execute = &r600_dma_ring_ib_execute,
1121 .emit_fence = &r600_dma_fence_ring_emit, 1121 .emit_fence = &r600_dma_fence_ring_emit,
1122 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1122 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1123 .cs_parse = NULL, 1123 .cs_parse = &r600_dma_cs_parse,
1124 .ring_test = &r600_dma_ring_test, 1124 .ring_test = &r600_dma_ring_test,
1125 .ib_test = &r600_dma_ib_test, 1125 .ib_test = &r600_dma_ib_test,
1126 .is_lockup = &r600_dma_is_lockup, 1126 .is_lockup = &r600_dma_is_lockup,
@@ -1204,7 +1204,7 @@ static struct radeon_asic evergreen_asic = {
1204 .ib_execute = &evergreen_dma_ring_ib_execute, 1204 .ib_execute = &evergreen_dma_ring_ib_execute,
1205 .emit_fence = &evergreen_dma_fence_ring_emit, 1205 .emit_fence = &evergreen_dma_fence_ring_emit,
1206 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1206 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1207 .cs_parse = NULL, 1207 .cs_parse = &evergreen_dma_cs_parse,
1208 .ring_test = &r600_dma_ring_test, 1208 .ring_test = &r600_dma_ring_test,
1209 .ib_test = &r600_dma_ib_test, 1209 .ib_test = &r600_dma_ib_test,
1210 .is_lockup = &r600_dma_is_lockup, 1210 .is_lockup = &r600_dma_is_lockup,
@@ -1288,7 +1288,7 @@ static struct radeon_asic sumo_asic = {
1288 .ib_execute = &evergreen_dma_ring_ib_execute, 1288 .ib_execute = &evergreen_dma_ring_ib_execute,
1289 .emit_fence = &evergreen_dma_fence_ring_emit, 1289 .emit_fence = &evergreen_dma_fence_ring_emit,
1290 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1290 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1291 .cs_parse = NULL, 1291 .cs_parse = &evergreen_dma_cs_parse,
1292 .ring_test = &r600_dma_ring_test, 1292 .ring_test = &r600_dma_ring_test,
1293 .ib_test = &r600_dma_ib_test, 1293 .ib_test = &r600_dma_ib_test,
1294 .is_lockup = &r600_dma_is_lockup, 1294 .is_lockup = &r600_dma_is_lockup,
@@ -1372,7 +1372,7 @@ static struct radeon_asic btc_asic = {
1372 .ib_execute = &evergreen_dma_ring_ib_execute, 1372 .ib_execute = &evergreen_dma_ring_ib_execute,
1373 .emit_fence = &evergreen_dma_fence_ring_emit, 1373 .emit_fence = &evergreen_dma_fence_ring_emit,
1374 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1374 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1375 .cs_parse = NULL, 1375 .cs_parse = &evergreen_dma_cs_parse,
1376 .ring_test = &r600_dma_ring_test, 1376 .ring_test = &r600_dma_ring_test,
1377 .ib_test = &r600_dma_ib_test, 1377 .ib_test = &r600_dma_ib_test,
1378 .is_lockup = &r600_dma_is_lockup, 1378 .is_lockup = &r600_dma_is_lockup,
@@ -1484,9 +1484,10 @@ static struct radeon_asic cayman_asic = {
1484 }, 1484 },
1485 [R600_RING_TYPE_DMA_INDEX] = { 1485 [R600_RING_TYPE_DMA_INDEX] = {
1486 .ib_execute = &cayman_dma_ring_ib_execute, 1486 .ib_execute = &cayman_dma_ring_ib_execute,
1487 .ib_parse = &evergreen_dma_ib_parse,
1487 .emit_fence = &evergreen_dma_fence_ring_emit, 1488 .emit_fence = &evergreen_dma_fence_ring_emit,
1488 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1489 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1489 .cs_parse = NULL, 1490 .cs_parse = &evergreen_dma_cs_parse,
1490 .ring_test = &r600_dma_ring_test, 1491 .ring_test = &r600_dma_ring_test,
1491 .ib_test = &r600_dma_ib_test, 1492 .ib_test = &r600_dma_ib_test,
1492 .is_lockup = &cayman_dma_is_lockup, 1493 .is_lockup = &cayman_dma_is_lockup,
@@ -1494,9 +1495,10 @@ static struct radeon_asic cayman_asic = {
1494 }, 1495 },
1495 [CAYMAN_RING_TYPE_DMA1_INDEX] = { 1496 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1496 .ib_execute = &cayman_dma_ring_ib_execute, 1497 .ib_execute = &cayman_dma_ring_ib_execute,
1498 .ib_parse = &evergreen_dma_ib_parse,
1497 .emit_fence = &evergreen_dma_fence_ring_emit, 1499 .emit_fence = &evergreen_dma_fence_ring_emit,
1498 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1500 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1499 .cs_parse = NULL, 1501 .cs_parse = &evergreen_dma_cs_parse,
1500 .ring_test = &r600_dma_ring_test, 1502 .ring_test = &r600_dma_ring_test,
1501 .ib_test = &r600_dma_ib_test, 1503 .ib_test = &r600_dma_ib_test,
1502 .is_lockup = &cayman_dma_is_lockup, 1504 .is_lockup = &cayman_dma_is_lockup,
@@ -1609,9 +1611,10 @@ static struct radeon_asic trinity_asic = {
1609 }, 1611 },
1610 [R600_RING_TYPE_DMA_INDEX] = { 1612 [R600_RING_TYPE_DMA_INDEX] = {
1611 .ib_execute = &cayman_dma_ring_ib_execute, 1613 .ib_execute = &cayman_dma_ring_ib_execute,
1614 .ib_parse = &evergreen_dma_ib_parse,
1612 .emit_fence = &evergreen_dma_fence_ring_emit, 1615 .emit_fence = &evergreen_dma_fence_ring_emit,
1613 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1616 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1614 .cs_parse = NULL, 1617 .cs_parse = &evergreen_dma_cs_parse,
1615 .ring_test = &r600_dma_ring_test, 1618 .ring_test = &r600_dma_ring_test,
1616 .ib_test = &r600_dma_ib_test, 1619 .ib_test = &r600_dma_ib_test,
1617 .is_lockup = &cayman_dma_is_lockup, 1620 .is_lockup = &cayman_dma_is_lockup,
@@ -1619,9 +1622,10 @@ static struct radeon_asic trinity_asic = {
1619 }, 1622 },
1620 [CAYMAN_RING_TYPE_DMA1_INDEX] = { 1623 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1621 .ib_execute = &cayman_dma_ring_ib_execute, 1624 .ib_execute = &cayman_dma_ring_ib_execute,
1625 .ib_parse = &evergreen_dma_ib_parse,
1622 .emit_fence = &evergreen_dma_fence_ring_emit, 1626 .emit_fence = &evergreen_dma_fence_ring_emit,
1623 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1627 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1624 .cs_parse = NULL, 1628 .cs_parse = &evergreen_dma_cs_parse,
1625 .ring_test = &r600_dma_ring_test, 1629 .ring_test = &r600_dma_ring_test,
1626 .ib_test = &r600_dma_ib_test, 1630 .ib_test = &r600_dma_ib_test,
1627 .is_lockup = &cayman_dma_is_lockup, 1631 .is_lockup = &cayman_dma_is_lockup,
@@ -1734,6 +1738,7 @@ static struct radeon_asic si_asic = {
1734 }, 1738 },
1735 [R600_RING_TYPE_DMA_INDEX] = { 1739 [R600_RING_TYPE_DMA_INDEX] = {
1736 .ib_execute = &cayman_dma_ring_ib_execute, 1740 .ib_execute = &cayman_dma_ring_ib_execute,
1741 .ib_parse = &evergreen_dma_ib_parse,
1737 .emit_fence = &evergreen_dma_fence_ring_emit, 1742 .emit_fence = &evergreen_dma_fence_ring_emit,
1738 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1743 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1739 .cs_parse = NULL, 1744 .cs_parse = NULL,
@@ -1744,6 +1749,7 @@ static struct radeon_asic si_asic = {
1744 }, 1749 },
1745 [CAYMAN_RING_TYPE_DMA1_INDEX] = { 1750 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1746 .ib_execute = &cayman_dma_ring_ib_execute, 1751 .ib_execute = &cayman_dma_ring_ib_execute,
1752 .ib_parse = &evergreen_dma_ib_parse,
1747 .emit_fence = &evergreen_dma_fence_ring_emit, 1753 .emit_fence = &evergreen_dma_fence_ring_emit,
1748 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1754 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1749 .cs_parse = NULL, 1755 .cs_parse = NULL,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index c338931190a5..5f4882cc2152 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -304,6 +304,7 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
304uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); 304uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
305void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); 305void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
306int r600_cs_parse(struct radeon_cs_parser *p); 306int r600_cs_parse(struct radeon_cs_parser *p);
307int r600_dma_cs_parse(struct radeon_cs_parser *p);
307void r600_fence_ring_emit(struct radeon_device *rdev, 308void r600_fence_ring_emit(struct radeon_device *rdev,
308 struct radeon_fence *fence); 309 struct radeon_fence *fence);
309void r600_semaphore_ring_emit(struct radeon_device *rdev, 310void r600_semaphore_ring_emit(struct radeon_device *rdev,
@@ -430,6 +431,7 @@ u32 evergreen_get_vblank_counter(struct radeon_device *rdev, int crtc);
430int evergreen_irq_set(struct radeon_device *rdev); 431int evergreen_irq_set(struct radeon_device *rdev);
431int evergreen_irq_process(struct radeon_device *rdev); 432int evergreen_irq_process(struct radeon_device *rdev);
432extern int evergreen_cs_parse(struct radeon_cs_parser *p); 433extern int evergreen_cs_parse(struct radeon_cs_parser *p);
434extern int evergreen_dma_cs_parse(struct radeon_cs_parser *p);
433extern void evergreen_pm_misc(struct radeon_device *rdev); 435extern void evergreen_pm_misc(struct radeon_device *rdev);
434extern void evergreen_pm_prepare(struct radeon_device *rdev); 436extern void evergreen_pm_prepare(struct radeon_device *rdev);
435extern void evergreen_pm_finish(struct radeon_device *rdev); 437extern void evergreen_pm_finish(struct radeon_device *rdev);
@@ -471,6 +473,7 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
471 uint64_t addr, unsigned count, 473 uint64_t addr, unsigned count,
472 uint32_t incr, uint32_t flags); 474 uint32_t incr, uint32_t flags);
473int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); 475int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
476int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
474void cayman_dma_ring_ib_execute(struct radeon_device *rdev, 477void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
475 struct radeon_ib *ib); 478 struct radeon_ib *ib);
476bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); 479bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 45b660b27cfc..4af89126e223 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -3246,11 +3246,9 @@ static uint32_t combios_detect_ram(struct drm_device *dev, int ram,
3246 while (ram--) { 3246 while (ram--) {
3247 addr = ram * 1024 * 1024; 3247 addr = ram * 1024 * 1024;
3248 /* write to each page */ 3248 /* write to each page */
3249 WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER); 3249 WREG32_IDX((addr) | RADEON_MM_APER, 0xdeadbeef);
3250 WREG32(RADEON_MM_DATA, 0xdeadbeef);
3251 /* read back and verify */ 3250 /* read back and verify */
3252 WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER); 3251 if (RREG32_IDX((addr) | RADEON_MM_APER) != 0xdeadbeef)
3253 if (RREG32(RADEON_MM_DATA) != 0xdeadbeef)
3254 return 0; 3252 return 0;
3255 } 3253 }
3256 3254
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 8b2797dc7b64..9143fc45e35b 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -116,20 +116,6 @@ u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index)
116 } 116 }
117} 117}
118 118
119u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr)
120{
121 u32 ret;
122
123 if (addr < 0x10000)
124 ret = DRM_READ32(dev_priv->mmio, addr);
125 else {
126 DRM_WRITE32(dev_priv->mmio, RADEON_MM_INDEX, addr);
127 ret = DRM_READ32(dev_priv->mmio, RADEON_MM_DATA);
128 }
129
130 return ret;
131}
132
133static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr) 119static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
134{ 120{
135 u32 ret; 121 u32 ret;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc563fb..396baba0141a 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
43 return 0; 43 return 0;
44 } 44 }
45 chunk = &p->chunks[p->chunk_relocs_idx]; 45 chunk = &p->chunks[p->chunk_relocs_idx];
46 p->dma_reloc_idx = 0;
46 /* FIXME: we assume that each relocs use 4 dwords */ 47 /* FIXME: we assume that each relocs use 4 dwords */
47 p->nrelocs = chunk->length_dw / 4; 48 p->nrelocs = chunk->length_dw / 4;
48 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); 49 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
@@ -111,6 +112,18 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
111 } else 112 } else
112 p->ring = RADEON_RING_TYPE_GFX_INDEX; 113 p->ring = RADEON_RING_TYPE_GFX_INDEX;
113 break; 114 break;
115 case RADEON_CS_RING_DMA:
116 if (p->rdev->family >= CHIP_CAYMAN) {
117 if (p->priority > 0)
118 p->ring = R600_RING_TYPE_DMA_INDEX;
119 else
120 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
121 } else if (p->rdev->family >= CHIP_R600) {
122 p->ring = R600_RING_TYPE_DMA_INDEX;
123 } else {
124 return -EINVAL;
125 }
126 break;
114 } 127 }
115 return 0; 128 return 0;
116} 129}
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 0fe56c9f64bd..ad6df625e8b8 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -66,24 +66,25 @@ static void radeon_hide_cursor(struct drm_crtc *crtc)
66 struct radeon_device *rdev = crtc->dev->dev_private; 66 struct radeon_device *rdev = crtc->dev->dev_private;
67 67
68 if (ASIC_IS_DCE4(rdev)) { 68 if (ASIC_IS_DCE4(rdev)) {
69 WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); 69 WREG32_IDX(EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset,
70 WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | 70 EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
71 EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); 71 EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
72 } else if (ASIC_IS_AVIVO(rdev)) { 72 } else if (ASIC_IS_AVIVO(rdev)) {
73 WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); 73 WREG32_IDX(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset,
74 WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); 74 (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
75 } else { 75 } else {
76 u32 reg;
76 switch (radeon_crtc->crtc_id) { 77 switch (radeon_crtc->crtc_id) {
77 case 0: 78 case 0:
78 WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL); 79 reg = RADEON_CRTC_GEN_CNTL;
79 break; 80 break;
80 case 1: 81 case 1:
81 WREG32(RADEON_MM_INDEX, RADEON_CRTC2_GEN_CNTL); 82 reg = RADEON_CRTC2_GEN_CNTL;
82 break; 83 break;
83 default: 84 default:
84 return; 85 return;
85 } 86 }
86 WREG32_P(RADEON_MM_DATA, 0, ~RADEON_CRTC_CUR_EN); 87 WREG32_IDX(reg, RREG32_IDX(reg) & ~RADEON_CRTC_CUR_EN);
87 } 88 }
88} 89}
89 90
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f888c374..49b06590001e 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1059,6 +1059,7 @@ int radeon_device_init(struct radeon_device *rdev,
1059 1059
1060 /* Registers mapping */ 1060 /* Registers mapping */
1061 /* TODO: block userspace mapping of io register */ 1061 /* TODO: block userspace mapping of io register */
1062 spin_lock_init(&rdev->mmio_idx_lock);
1062 rdev->rmmio_base = pci_resource_start(rdev->pdev, 2); 1063 rdev->rmmio_base = pci_resource_start(rdev->pdev, 2);
1063 rdev->rmmio_size = pci_resource_len(rdev->pdev, 2); 1064 rdev->rmmio_size = pci_resource_len(rdev->pdev, 2);
1064 rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size); 1065 rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bfa2a6015727..310c0e5254ba 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -378,8 +378,12 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
378 work->old_rbo = rbo; 378 work->old_rbo = rbo;
379 obj = new_radeon_fb->obj; 379 obj = new_radeon_fb->obj;
380 rbo = gem_to_radeon_bo(obj); 380 rbo = gem_to_radeon_bo(obj);
381
382 spin_lock(&rbo->tbo.bdev->fence_lock);
381 if (rbo->tbo.sync_obj) 383 if (rbo->tbo.sync_obj)
382 work->fence = radeon_fence_ref(rbo->tbo.sync_obj); 384 work->fence = radeon_fence_ref(rbo->tbo.sync_obj);
385 spin_unlock(&rbo->tbo.bdev->fence_lock);
386
383 INIT_WORK(&work->work, radeon_unpin_work_func); 387 INIT_WORK(&work->work, radeon_unpin_work_func);
384 388
385 /* We borrow the event spin lock for protecting unpin_work */ 389 /* We borrow the event spin lock for protecting unpin_work */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 91b64278c4ff..9b1a727d3c9e 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -66,9 +66,11 @@
66 * 2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880 66 * 2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880
67 * 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures 67 * 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures
68 * 2.25.0 - eg+: new info request for num SE and num SH 68 * 2.25.0 - eg+: new info request for num SE and num SH
69 * 2.26.0 - r600-eg: fix htile size computation
70 * 2.27.0 - r600-SI: Add CS ioctl support for async DMA
69 */ 71 */
70#define KMS_DRIVER_MAJOR 2 72#define KMS_DRIVER_MAJOR 2
71#define KMS_DRIVER_MINOR 25 73#define KMS_DRIVER_MINOR 27
72#define KMS_DRIVER_PATCHLEVEL 0 74#define KMS_DRIVER_PATCHLEVEL 0
73int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); 75int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
74int radeon_driver_unload_kms(struct drm_device *dev); 76int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index a1b59ca96d01..e7fdf163a8ca 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -366,7 +366,6 @@ extern int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file
366extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv); 366extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv);
367extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc); 367extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc);
368extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base); 368extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base);
369extern u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr);
370 369
371extern void radeon_freelist_reset(struct drm_device * dev); 370extern void radeon_freelist_reset(struct drm_device * dev);
372extern struct drm_buf *radeon_freelist_get(struct drm_device * dev); 371extern struct drm_buf *radeon_freelist_get(struct drm_device * dev);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 93d3445477be..883c95d8d90f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -96,9 +96,9 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
96 } 96 }
97 if (domain & RADEON_GEM_DOMAIN_CPU) { 97 if (domain & RADEON_GEM_DOMAIN_CPU) {
98 if (rbo->rdev->flags & RADEON_IS_AGP) { 98 if (rbo->rdev->flags & RADEON_IS_AGP) {
99 rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT; 99 rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM;
100 } else { 100 } else {
101 rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; 101 rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
102 } 102 }
103 } 103 }
104 if (!c) 104 if (!c)
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 5645a878faec..eeda91774c8a 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -917,6 +917,7 @@ struct drm_radeon_gem_va {
917/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */ 917/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
918#define RADEON_CS_RING_GFX 0 918#define RADEON_CS_RING_GFX 0
919#define RADEON_CS_RING_COMPUTE 1 919#define RADEON_CS_RING_COMPUTE 1
920#define RADEON_CS_RING_DMA 2
920/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ 921/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
921/* 0 = normal, + = higher priority, - = lower priority */ 922/* 0 = normal, + = higher priority, - = lower priority */
922 923