aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/evergreen_cs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 11:26:17 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 11:26:17 -0500
commit3c2e81ef344a90bb0a39d84af6878b4aeff568a2 (patch)
treebd8c8b23466174899d2fe4d35af6e1e838edb068 /drivers/gpu/drm/radeon/evergreen_cs.c
parent221392c3ad0432e39fd74a349364f66cb0ed78f6 (diff)
parent55bde6b1442fed8af67b92d21acce67db454c9f9 (diff)
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull DRM updates from Dave Airlie: "This is the one and only next pull for 3.8, we had a regression we found last week, so I was waiting for that to resolve itself, and I ended up with some Intel fixes on top as well. Highlights: - new driver: nvidia tegra 20/30/hdmi support - radeon: add support for previously unused DMA engines, more HDMI regs, eviction speeds ups and fixes - i915: HSW support enable, agp removal on GEN6, seqno wrapping - exynos: IPP subsystem support (image post proc), HDMI - nouveau: display class reworking, nv20->40 z compression - ttm: start of locking fixes, rcu usage for lookups, - core: documentation updates, docbook integration, monotonic clock usage, move from connector to object properties" * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (590 commits) drm/exynos: add gsc ipp driver drm/exynos: add rotator ipp driver drm/exynos: add fimc ipp driver drm/exynos: add iommu support for ipp drm/exynos: add ipp subsystem drm/exynos: support device tree for fimd radeon: fix regression with eviction since evict caching changes drm/radeon: add more pedantic checks in the CP DMA checker drm/radeon: bump version for CS ioctl support for async DMA drm/radeon: enable the async DMA rings in the CS ioctl drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI drm/radeon/kms: add evergreen/cayman CS parser for async DMA (v2) drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2) drm/radeon: fix htile buffer size computation for command stream checker drm/radeon: fix fence locking in the pageflip callback drm/radeon: make indirect register access concurrency-safe drm/radeon: add W|RREG32_IDX for MM_INDEX|DATA based mmio accesss drm/exynos: support extended screen coordinate of fimd drm/exynos: fix x, y coordinates for right bottom pixel drm/exynos: fix fb offset calculation for plane ...
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_cs.c')
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c739
1 files changed, 736 insertions, 3 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index c042e497e450..74c6b42d2597 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -34,6 +34,8 @@
34#define MAX(a,b) (((a)>(b))?(a):(b)) 34#define MAX(a,b) (((a)>(b))?(a):(b))
35#define MIN(a,b) (((a)<(b))?(a):(b)) 35#define MIN(a,b) (((a)<(b))?(a):(b))
36 36
37int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
38 struct radeon_cs_reloc **cs_reloc);
37static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, 39static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
38 struct radeon_cs_reloc **cs_reloc); 40 struct radeon_cs_reloc **cs_reloc);
39 41
@@ -507,20 +509,28 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
507 /* height is npipes htiles aligned == npipes * 8 pixel aligned */ 509 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
508 nby = round_up(nby, track->npipes * 8); 510 nby = round_up(nby, track->npipes * 8);
509 } else { 511 } else {
512 /* always assume 8x8 htile */
513 /* align is htile align * 8, htile align vary according to
514 * number of pipe and tile width and nby
515 */
510 switch (track->npipes) { 516 switch (track->npipes) {
511 case 8: 517 case 8:
518 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
512 nbx = round_up(nbx, 64 * 8); 519 nbx = round_up(nbx, 64 * 8);
513 nby = round_up(nby, 64 * 8); 520 nby = round_up(nby, 64 * 8);
514 break; 521 break;
515 case 4: 522 case 4:
523 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
516 nbx = round_up(nbx, 64 * 8); 524 nbx = round_up(nbx, 64 * 8);
517 nby = round_up(nby, 32 * 8); 525 nby = round_up(nby, 32 * 8);
518 break; 526 break;
519 case 2: 527 case 2:
528 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
520 nbx = round_up(nbx, 32 * 8); 529 nbx = round_up(nbx, 32 * 8);
521 nby = round_up(nby, 32 * 8); 530 nby = round_up(nby, 32 * 8);
522 break; 531 break;
523 case 1: 532 case 1:
533 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
524 nbx = round_up(nbx, 32 * 8); 534 nbx = round_up(nbx, 32 * 8);
525 nby = round_up(nby, 16 * 8); 535 nby = round_up(nby, 16 * 8);
526 break; 536 break;
@@ -531,9 +541,10 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
531 } 541 }
532 } 542 }
533 /* compute number of htile */ 543 /* compute number of htile */
534 nbx = nbx / 8; 544 nbx = nbx >> 3;
535 nby = nby / 8; 545 nby = nby >> 3;
536 size = nbx * nby * 4; 546 /* size must be aligned on npipes * 2K boundary */
547 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
537 size += track->htile_offset; 548 size += track->htile_offset;
538 549
539 if (size > radeon_bo_size(track->htile_bo)) { 550 if (size > radeon_bo_size(track->htile_bo)) {
@@ -1790,6 +1801,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1790 case DB_HTILE_SURFACE: 1801 case DB_HTILE_SURFACE:
1791 /* 8x8 only */ 1802 /* 8x8 only */
1792 track->htile_surface = radeon_get_ib_value(p, idx); 1803 track->htile_surface = radeon_get_ib_value(p, idx);
1804 /* force 8x8 htile width and height */
1805 ib[idx] |= 3;
1793 track->db_dirty = true; 1806 track->db_dirty = true;
1794 break; 1807 break;
1795 case CB_IMMED0_BASE: 1808 case CB_IMMED0_BASE:
@@ -2232,6 +2245,107 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
2232 ib[idx+2] = upper_32_bits(offset) & 0xff; 2245 ib[idx+2] = upper_32_bits(offset) & 0xff;
2233 } 2246 }
2234 break; 2247 break;
2248 case PACKET3_CP_DMA:
2249 {
2250 u32 command, size, info;
2251 u64 offset, tmp;
2252 if (pkt->count != 4) {
2253 DRM_ERROR("bad CP DMA\n");
2254 return -EINVAL;
2255 }
2256 command = radeon_get_ib_value(p, idx+4);
2257 size = command & 0x1fffff;
2258 info = radeon_get_ib_value(p, idx+1);
2259 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2260 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2261 ((((info & 0x00300000) >> 20) == 0) &&
2262 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2263 ((((info & 0x60000000) >> 29) == 0) &&
2264 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2265 /* non mem to mem copies requires dw aligned count */
2266 if (size % 4) {
2267 DRM_ERROR("CP DMA command requires dw count alignment\n");
2268 return -EINVAL;
2269 }
2270 }
2271 if (command & PACKET3_CP_DMA_CMD_SAS) {
2272 /* src address space is register */
2273 /* GDS is ok */
2274 if (((info & 0x60000000) >> 29) != 1) {
2275 DRM_ERROR("CP DMA SAS not supported\n");
2276 return -EINVAL;
2277 }
2278 } else {
2279 if (command & PACKET3_CP_DMA_CMD_SAIC) {
2280 DRM_ERROR("CP DMA SAIC only supported for registers\n");
2281 return -EINVAL;
2282 }
2283 /* src address space is memory */
2284 if (((info & 0x60000000) >> 29) == 0) {
2285 r = evergreen_cs_packet_next_reloc(p, &reloc);
2286 if (r) {
2287 DRM_ERROR("bad CP DMA SRC\n");
2288 return -EINVAL;
2289 }
2290
2291 tmp = radeon_get_ib_value(p, idx) +
2292 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2293
2294 offset = reloc->lobj.gpu_offset + tmp;
2295
2296 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2297 dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2298 tmp + size, radeon_bo_size(reloc->robj));
2299 return -EINVAL;
2300 }
2301
2302 ib[idx] = offset;
2303 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2304 } else if (((info & 0x60000000) >> 29) != 2) {
2305 DRM_ERROR("bad CP DMA SRC_SEL\n");
2306 return -EINVAL;
2307 }
2308 }
2309 if (command & PACKET3_CP_DMA_CMD_DAS) {
2310 /* dst address space is register */
2311 /* GDS is ok */
2312 if (((info & 0x00300000) >> 20) != 1) {
2313 DRM_ERROR("CP DMA DAS not supported\n");
2314 return -EINVAL;
2315 }
2316 } else {
2317 /* dst address space is memory */
2318 if (command & PACKET3_CP_DMA_CMD_DAIC) {
2319 DRM_ERROR("CP DMA DAIC only supported for registers\n");
2320 return -EINVAL;
2321 }
2322 if (((info & 0x00300000) >> 20) == 0) {
2323 r = evergreen_cs_packet_next_reloc(p, &reloc);
2324 if (r) {
2325 DRM_ERROR("bad CP DMA DST\n");
2326 return -EINVAL;
2327 }
2328
2329 tmp = radeon_get_ib_value(p, idx+2) +
2330 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2331
2332 offset = reloc->lobj.gpu_offset + tmp;
2333
2334 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2335 dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2336 tmp + size, radeon_bo_size(reloc->robj));
2337 return -EINVAL;
2338 }
2339
2340 ib[idx+2] = offset;
2341 ib[idx+3] = upper_32_bits(offset) & 0xff;
2342 } else {
2343 DRM_ERROR("bad CP DMA DST_SEL\n");
2344 return -EINVAL;
2345 }
2346 }
2347 break;
2348 }
2235 case PACKET3_SURFACE_SYNC: 2349 case PACKET3_SURFACE_SYNC:
2236 if (pkt->count != 3) { 2350 if (pkt->count != 3) {
2237 DRM_ERROR("bad SURFACE_SYNC\n"); 2351 DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2715,6 +2829,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
2715 return 0; 2829 return 0;
2716} 2830}
2717 2831
2832/*
2833 * DMA
2834 */
2835
2836#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2837#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2838#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2839#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2840#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2841
2842/**
2843 * evergreen_dma_cs_parse() - parse the DMA IB
2844 * @p: parser structure holding parsing context.
2845 *
2846 * Parses the DMA IB from the CS ioctl and updates
2847 * the GPU addresses based on the reloc information and
2848 * checks for errors. (Evergreen-Cayman)
2849 * Returns 0 for success and an error on failure.
2850 **/
2851int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2852{
2853 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2854 struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2855 u32 header, cmd, count, tiled, new_cmd, misc;
2856 volatile u32 *ib = p->ib.ptr;
2857 u32 idx, idx_value;
2858 u64 src_offset, dst_offset, dst2_offset;
2859 int r;
2860
2861 do {
2862 if (p->idx >= ib_chunk->length_dw) {
2863 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2864 p->idx, ib_chunk->length_dw);
2865 return -EINVAL;
2866 }
2867 idx = p->idx;
2868 header = radeon_get_ib_value(p, idx);
2869 cmd = GET_DMA_CMD(header);
2870 count = GET_DMA_COUNT(header);
2871 tiled = GET_DMA_T(header);
2872 new_cmd = GET_DMA_NEW(header);
2873 misc = GET_DMA_MISC(header);
2874
2875 switch (cmd) {
2876 case DMA_PACKET_WRITE:
2877 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2878 if (r) {
2879 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2880 return -EINVAL;
2881 }
2882 if (tiled) {
2883 dst_offset = ib[idx+1];
2884 dst_offset <<= 8;
2885
2886 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2887 p->idx += count + 7;
2888 } else {
2889 dst_offset = ib[idx+1];
2890 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2891
2892 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2893 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2894 p->idx += count + 3;
2895 }
2896 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2897 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2898 dst_offset, radeon_bo_size(dst_reloc->robj));
2899 return -EINVAL;
2900 }
2901 break;
2902 case DMA_PACKET_COPY:
2903 r = r600_dma_cs_next_reloc(p, &src_reloc);
2904 if (r) {
2905 DRM_ERROR("bad DMA_PACKET_COPY\n");
2906 return -EINVAL;
2907 }
2908 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2909 if (r) {
2910 DRM_ERROR("bad DMA_PACKET_COPY\n");
2911 return -EINVAL;
2912 }
2913 if (tiled) {
2914 idx_value = radeon_get_ib_value(p, idx + 2);
2915 if (new_cmd) {
2916 switch (misc) {
2917 case 0:
2918 /* L2T, frame to fields */
2919 if (idx_value & (1 << 31)) {
2920 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2921 return -EINVAL;
2922 }
2923 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2924 if (r) {
2925 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2926 return -EINVAL;
2927 }
2928 dst_offset = ib[idx+1];
2929 dst_offset <<= 8;
2930 dst2_offset = ib[idx+2];
2931 dst2_offset <<= 8;
2932 src_offset = ib[idx+8];
2933 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2934 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2935 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2936 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2937 return -EINVAL;
2938 }
2939 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2940 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2941 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2942 return -EINVAL;
2943 }
2944 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2945 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2946 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2947 return -EINVAL;
2948 }
2949 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2950 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2951 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2952 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2953 p->idx += 10;
2954 break;
2955 case 1:
2956 /* L2T, T2L partial */
2957 if (p->family < CHIP_CAYMAN) {
2958 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2959 return -EINVAL;
2960 }
2961 /* detile bit */
2962 if (idx_value & (1 << 31)) {
2963 /* tiled src, linear dst */
2964 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2965
2966 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2967 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2968 } else {
2969 /* linear src, tiled dst */
2970 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2971 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2972
2973 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2974 }
2975 p->idx += 12;
2976 break;
2977 case 3:
2978 /* L2T, broadcast */
2979 if (idx_value & (1 << 31)) {
2980 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2981 return -EINVAL;
2982 }
2983 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2984 if (r) {
2985 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2986 return -EINVAL;
2987 }
2988 dst_offset = ib[idx+1];
2989 dst_offset <<= 8;
2990 dst2_offset = ib[idx+2];
2991 dst2_offset <<= 8;
2992 src_offset = ib[idx+8];
2993 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2994 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2995 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2996 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2997 return -EINVAL;
2998 }
2999 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3000 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3001 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3002 return -EINVAL;
3003 }
3004 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3005 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3006 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3007 return -EINVAL;
3008 }
3009 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3010 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3011 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3012 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3013 p->idx += 10;
3014 break;
3015 case 4:
3016 /* L2T, T2L */
3017 /* detile bit */
3018 if (idx_value & (1 << 31)) {
3019 /* tiled src, linear dst */
3020 src_offset = ib[idx+1];
3021 src_offset <<= 8;
3022 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3023
3024 dst_offset = ib[idx+7];
3025 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3026 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3027 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3028 } else {
3029 /* linear src, tiled dst */
3030 src_offset = ib[idx+7];
3031 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3032 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3033 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3034
3035 dst_offset = ib[idx+1];
3036 dst_offset <<= 8;
3037 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3038 }
3039 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3040 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3041 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3042 return -EINVAL;
3043 }
3044 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3045 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3046 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3047 return -EINVAL;
3048 }
3049 p->idx += 9;
3050 break;
3051 case 5:
3052 /* T2T partial */
3053 if (p->family < CHIP_CAYMAN) {
3054 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3055 return -EINVAL;
3056 }
3057 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3058 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3059 p->idx += 13;
3060 break;
3061 case 7:
3062 /* L2T, broadcast */
3063 if (idx_value & (1 << 31)) {
3064 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3065 return -EINVAL;
3066 }
3067 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3068 if (r) {
3069 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3070 return -EINVAL;
3071 }
3072 dst_offset = ib[idx+1];
3073 dst_offset <<= 8;
3074 dst2_offset = ib[idx+2];
3075 dst2_offset <<= 8;
3076 src_offset = ib[idx+8];
3077 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
3078 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3079 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3080 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3081 return -EINVAL;
3082 }
3083 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3084 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3085 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3086 return -EINVAL;
3087 }
3088 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3089 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3090 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3091 return -EINVAL;
3092 }
3093 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3094 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3095 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3096 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3097 p->idx += 10;
3098 break;
3099 default:
3100 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3101 return -EINVAL;
3102 }
3103 } else {
3104 switch (misc) {
3105 case 0:
3106 /* detile bit */
3107 if (idx_value & (1 << 31)) {
3108 /* tiled src, linear dst */
3109 src_offset = ib[idx+1];
3110 src_offset <<= 8;
3111 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3112
3113 dst_offset = ib[idx+7];
3114 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3115 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3116 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3117 } else {
3118 /* linear src, tiled dst */
3119 src_offset = ib[idx+7];
3120 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3121 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3122 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3123
3124 dst_offset = ib[idx+1];
3125 dst_offset <<= 8;
3126 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3127 }
3128 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3129 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3130 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3131 return -EINVAL;
3132 }
3133 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3134 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3135 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3136 return -EINVAL;
3137 }
3138 p->idx += 9;
3139 break;
3140 default:
3141 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3142 return -EINVAL;
3143 }
3144 }
3145 } else {
3146 if (new_cmd) {
3147 switch (misc) {
3148 case 0:
3149 /* L2L, byte */
3150 src_offset = ib[idx+2];
3151 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3152 dst_offset = ib[idx+1];
3153 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3154 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3155 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
3156 src_offset + count, radeon_bo_size(src_reloc->robj));
3157 return -EINVAL;
3158 }
3159 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3160 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
3161 dst_offset + count, radeon_bo_size(dst_reloc->robj));
3162 return -EINVAL;
3163 }
3164 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3165 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3166 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3167 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3168 p->idx += 5;
3169 break;
3170 case 1:
3171 /* L2L, partial */
3172 if (p->family < CHIP_CAYMAN) {
3173 DRM_ERROR("L2L Partial is cayman only !\n");
3174 return -EINVAL;
3175 }
3176 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3177 ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3178 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3179 ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3180
3181 p->idx += 9;
3182 break;
3183 case 4:
3184 /* L2L, dw, broadcast */
3185 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3186 if (r) {
3187 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3188 return -EINVAL;
3189 }
3190 dst_offset = ib[idx+1];
3191 dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3192 dst2_offset = ib[idx+2];
3193 dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
3194 src_offset = ib[idx+3];
3195 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
3196 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3197 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
3198 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3199 return -EINVAL;
3200 }
3201 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3202 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
3203 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3204 return -EINVAL;
3205 }
3206 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3207 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
3208 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3209 return -EINVAL;
3210 }
3211 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3212 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
3213 ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3214 ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3215 ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
3216 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3217 p->idx += 7;
3218 break;
3219 default:
3220 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3221 return -EINVAL;
3222 }
3223 } else {
3224 /* L2L, dw */
3225 src_offset = ib[idx+2];
3226 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3227 dst_offset = ib[idx+1];
3228 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3229 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3230 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
3231 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3232 return -EINVAL;
3233 }
3234 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3235 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
3236 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3237 return -EINVAL;
3238 }
3239 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3240 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3241 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3242 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3243 p->idx += 5;
3244 }
3245 }
3246 break;
3247 case DMA_PACKET_CONSTANT_FILL:
3248 r = r600_dma_cs_next_reloc(p, &dst_reloc);
3249 if (r) {
3250 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3251 return -EINVAL;
3252 }
3253 dst_offset = ib[idx+1];
3254 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
3255 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3256 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3257 dst_offset, radeon_bo_size(dst_reloc->robj));
3258 return -EINVAL;
3259 }
3260 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3261 ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
3262 p->idx += 4;
3263 break;
3264 case DMA_PACKET_NOP:
3265 p->idx += 1;
3266 break;
3267 default:
3268 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3269 return -EINVAL;
3270 }
3271 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3272#if 0
3273 for (r = 0; r < p->ib->length_dw; r++) {
3274 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
3275 mdelay(1);
3276 }
3277#endif
3278 return 0;
3279}
3280
2718/* vm parser */ 3281/* vm parser */
2719static bool evergreen_vm_reg_valid(u32 reg) 3282static bool evergreen_vm_reg_valid(u32 reg)
2720{ 3283{
@@ -2843,6 +3406,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
2843 u32 idx = pkt->idx + 1; 3406 u32 idx = pkt->idx + 1;
2844 u32 idx_value = ib[idx]; 3407 u32 idx_value = ib[idx];
2845 u32 start_reg, end_reg, reg, i; 3408 u32 start_reg, end_reg, reg, i;
3409 u32 command, info;
2846 3410
2847 switch (pkt->opcode) { 3411 switch (pkt->opcode) {
2848 case PACKET3_NOP: 3412 case PACKET3_NOP:
@@ -2917,6 +3481,64 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
2917 return -EINVAL; 3481 return -EINVAL;
2918 } 3482 }
2919 break; 3483 break;
3484 case PACKET3_CP_DMA:
3485 command = ib[idx + 4];
3486 info = ib[idx + 1];
3487 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3488 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3489 ((((info & 0x00300000) >> 20) == 0) &&
3490 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3491 ((((info & 0x60000000) >> 29) == 0) &&
3492 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3493 /* non mem to mem copies requires dw aligned count */
3494 if ((command & 0x1fffff) % 4) {
3495 DRM_ERROR("CP DMA command requires dw count alignment\n");
3496 return -EINVAL;
3497 }
3498 }
3499 if (command & PACKET3_CP_DMA_CMD_SAS) {
3500 /* src address space is register */
3501 if (((info & 0x60000000) >> 29) == 0) {
3502 start_reg = idx_value << 2;
3503 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3504 reg = start_reg;
3505 if (!evergreen_vm_reg_valid(reg)) {
3506 DRM_ERROR("CP DMA Bad SRC register\n");
3507 return -EINVAL;
3508 }
3509 } else {
3510 for (i = 0; i < (command & 0x1fffff); i++) {
3511 reg = start_reg + (4 * i);
3512 if (!evergreen_vm_reg_valid(reg)) {
3513 DRM_ERROR("CP DMA Bad SRC register\n");
3514 return -EINVAL;
3515 }
3516 }
3517 }
3518 }
3519 }
3520 if (command & PACKET3_CP_DMA_CMD_DAS) {
3521 /* dst address space is register */
3522 if (((info & 0x00300000) >> 20) == 0) {
3523 start_reg = ib[idx + 2];
3524 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3525 reg = start_reg;
3526 if (!evergreen_vm_reg_valid(reg)) {
3527 DRM_ERROR("CP DMA Bad DST register\n");
3528 return -EINVAL;
3529 }
3530 } else {
3531 for (i = 0; i < (command & 0x1fffff); i++) {
3532 reg = start_reg + (4 * i);
3533 if (!evergreen_vm_reg_valid(reg)) {
3534 DRM_ERROR("CP DMA Bad DST register\n");
3535 return -EINVAL;
3536 }
3537 }
3538 }
3539 }
3540 }
3541 break;
2920 default: 3542 default:
2921 return -EINVAL; 3543 return -EINVAL;
2922 } 3544 }
@@ -2958,3 +3580,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2958 3580
2959 return ret; 3581 return ret;
2960} 3582}
3583
3584/**
3585 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3586 * @rdev: radeon_device pointer
3587 * @ib: radeon_ib pointer
3588 *
3589 * Parses the DMA IB from the VM CS ioctl
3590 * checks for errors. (Cayman-SI)
3591 * Returns 0 for success and an error on failure.
3592 **/
3593int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3594{
3595 u32 idx = 0;
3596 u32 header, cmd, count, tiled, new_cmd, misc;
3597
3598 do {
3599 header = ib->ptr[idx];
3600 cmd = GET_DMA_CMD(header);
3601 count = GET_DMA_COUNT(header);
3602 tiled = GET_DMA_T(header);
3603 new_cmd = GET_DMA_NEW(header);
3604 misc = GET_DMA_MISC(header);
3605
3606 switch (cmd) {
3607 case DMA_PACKET_WRITE:
3608 if (tiled)
3609 idx += count + 7;
3610 else
3611 idx += count + 3;
3612 break;
3613 case DMA_PACKET_COPY:
3614 if (tiled) {
3615 if (new_cmd) {
3616 switch (misc) {
3617 case 0:
3618 /* L2T, frame to fields */
3619 idx += 10;
3620 break;
3621 case 1:
3622 /* L2T, T2L partial */
3623 idx += 12;
3624 break;
3625 case 3:
3626 /* L2T, broadcast */
3627 idx += 10;
3628 break;
3629 case 4:
3630 /* L2T, T2L */
3631 idx += 9;
3632 break;
3633 case 5:
3634 /* T2T partial */
3635 idx += 13;
3636 break;
3637 case 7:
3638 /* L2T, broadcast */
3639 idx += 10;
3640 break;
3641 default:
3642 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3643 return -EINVAL;
3644 }
3645 } else {
3646 switch (misc) {
3647 case 0:
3648 idx += 9;
3649 break;
3650 default:
3651 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3652 return -EINVAL;
3653 }
3654 }
3655 } else {
3656 if (new_cmd) {
3657 switch (misc) {
3658 case 0:
3659 /* L2L, byte */
3660 idx += 5;
3661 break;
3662 case 1:
3663 /* L2L, partial */
3664 idx += 9;
3665 break;
3666 case 4:
3667 /* L2L, dw, broadcast */
3668 idx += 7;
3669 break;
3670 default:
3671 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3672 return -EINVAL;
3673 }
3674 } else {
3675 /* L2L, dw */
3676 idx += 5;
3677 }
3678 }
3679 break;
3680 case DMA_PACKET_CONSTANT_FILL:
3681 idx += 4;
3682 break;
3683 case DMA_PACKET_NOP:
3684 idx += 1;
3685 break;
3686 default:
3687 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3688 return -EINVAL;
3689 }
3690 } while (idx < ib->length_dw);
3691
3692 return 0;
3693}