aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2011-11-18 10:19:47 -0500
committerAlex Deucher <alexander.deucher@amd.com>2012-12-14 10:45:25 -0500
commitcf4ccd016bae1a03bb38170eb54b5db4b04e0545 (patch)
tree63b0f56e633db1f515841831cb81e91864f38da0 /drivers/gpu/drm/radeon
parent4ac0533abaec2b83a7f2c675010eedd55664bc26 (diff)
drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2)
Allows us to use the DMA ring from userspace. DMA doesn't have a good NOP packet in which to embed the reloc idx, so userspace has to add a reloc for each buffer used and order them to match the command stream. v2: fix address bounds checking, reloc indexing Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c193
-rw-r--r--drivers/gpu/drm/radeon/radeon.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c1
5 files changed, 199 insertions, 3 deletions
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 0b4d833d923c..0be768be530c 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -2514,3 +2514,196 @@ void r600_cs_legacy_init(void)
2514{ 2514{
2515 r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; 2515 r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
2516} 2516}
2517
2518/*
2519 * DMA
2520 */
2521/**
2522 * r600_dma_cs_next_reloc() - parse next reloc
2523 * @p: parser structure holding parsing context.
2524 * @cs_reloc: reloc informations
2525 *
2526 * Return the next reloc, do bo validation and compute
2527 * GPU offset using the provided start.
2528 **/
2529int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
2530 struct radeon_cs_reloc **cs_reloc)
2531{
2532 struct radeon_cs_chunk *relocs_chunk;
2533 unsigned idx;
2534
2535 if (p->chunk_relocs_idx == -1) {
2536 DRM_ERROR("No relocation chunk !\n");
2537 return -EINVAL;
2538 }
2539 *cs_reloc = NULL;
2540 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
2541 idx = p->dma_reloc_idx;
2542 if (idx >= relocs_chunk->length_dw) {
2543 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
2544 idx, relocs_chunk->length_dw);
2545 return -EINVAL;
2546 }
2547 *cs_reloc = p->relocs_ptr[idx];
2548 p->dma_reloc_idx++;
2549 return 0;
2550}
2551
2552#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2553#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
2554#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2555
2556/**
2557 * r600_dma_cs_parse() - parse the DMA IB
2558 * @p: parser structure holding parsing context.
2559 *
2560 * Parses the DMA IB from the CS ioctl and updates
2561 * the GPU addresses based on the reloc information and
2562 * checks for errors. (R6xx-R7xx)
2563 * Returns 0 for success and an error on failure.
2564 **/
2565int r600_dma_cs_parse(struct radeon_cs_parser *p)
2566{
2567 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2568 struct radeon_cs_reloc *src_reloc, *dst_reloc;
2569 u32 header, cmd, count, tiled;
2570 volatile u32 *ib = p->ib.ptr;
2571 u32 idx, idx_value;
2572 u64 src_offset, dst_offset;
2573 int r;
2574
2575 do {
2576 if (p->idx >= ib_chunk->length_dw) {
2577 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2578 p->idx, ib_chunk->length_dw);
2579 return -EINVAL;
2580 }
2581 idx = p->idx;
2582 header = radeon_get_ib_value(p, idx);
2583 cmd = GET_DMA_CMD(header);
2584 count = GET_DMA_COUNT(header);
2585 tiled = GET_DMA_T(header);
2586
2587 switch (cmd) {
2588 case DMA_PACKET_WRITE:
2589 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2590 if (r) {
2591 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2592 return -EINVAL;
2593 }
2594 if (tiled) {
2595 dst_offset = ib[idx+1];
2596 dst_offset <<= 8;
2597
2598 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2599 p->idx += count + 5;
2600 } else {
2601 dst_offset = ib[idx+1];
2602 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2603
2604 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2605 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2606 p->idx += count + 3;
2607 }
2608 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2609 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2610 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2611 return -EINVAL;
2612 }
2613 break;
2614 case DMA_PACKET_COPY:
2615 r = r600_dma_cs_next_reloc(p, &src_reloc);
2616 if (r) {
2617 DRM_ERROR("bad DMA_PACKET_COPY\n");
2618 return -EINVAL;
2619 }
2620 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2621 if (r) {
2622 DRM_ERROR("bad DMA_PACKET_COPY\n");
2623 return -EINVAL;
2624 }
2625 if (tiled) {
2626 idx_value = radeon_get_ib_value(p, idx + 2);
2627 /* detile bit */
2628 if (idx_value & (1 << 31)) {
2629 /* tiled src, linear dst */
2630 src_offset = ib[idx+1];
2631 src_offset <<= 8;
2632 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2633
2634 dst_offset = ib[idx+5];
2635 dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
2636 ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2637 ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2638 } else {
2639 /* linear src, tiled dst */
2640 src_offset = ib[idx+5];
2641 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
2642 ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2643 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2644
2645 dst_offset = ib[idx+1];
2646 dst_offset <<= 8;
2647 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2648 }
2649 p->idx += 7;
2650 } else {
2651 src_offset = ib[idx+2];
2652 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
2653 dst_offset = ib[idx+1];
2654 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
2655
2656 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2657 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2658 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2659 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2660 p->idx += 5;
2661 }
2662 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2663 dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
2664 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2665 return -EINVAL;
2666 }
2667 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2668 dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
2669 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2670 return -EINVAL;
2671 }
2672 break;
2673 case DMA_PACKET_CONSTANT_FILL:
2674 if (p->family < CHIP_RV770) {
2675 DRM_ERROR("Constant Fill is 7xx only !\n");
2676 return -EINVAL;
2677 }
2678 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2679 if (r) {
2680 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2681 return -EINVAL;
2682 }
2683 dst_offset = ib[idx+1];
2684 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
2685 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2686 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
2687 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2688 return -EINVAL;
2689 }
2690 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2691 ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
2692 p->idx += 4;
2693 break;
2694 case DMA_PACKET_NOP:
2695 p->idx += 1;
2696 break;
2697 default:
2698 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
2699 return -EINVAL;
2700 }
2701 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2702#if 0
2703 for (r = 0; r < p->ib->length_dw; r++) {
2704 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
2705 mdelay(1);
2706 }
2707#endif
2708 return 0;
2709}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 285fb3f203af..5dc744d43d12 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -839,6 +839,7 @@ struct radeon_cs_parser {
839 struct radeon_cs_reloc *relocs; 839 struct radeon_cs_reloc *relocs;
840 struct radeon_cs_reloc **relocs_ptr; 840 struct radeon_cs_reloc **relocs_ptr;
841 struct list_head validated; 841 struct list_head validated;
842 unsigned dma_reloc_idx;
842 /* indices of various chunks */ 843 /* indices of various chunks */
843 int chunk_ib_idx; 844 int chunk_ib_idx;
844 int chunk_relocs_idx; 845 int chunk_relocs_idx;
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 3ea0475f9a95..d3603417e5df 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -952,7 +952,7 @@ static struct radeon_asic r600_asic = {
952 .ib_execute = &r600_dma_ring_ib_execute, 952 .ib_execute = &r600_dma_ring_ib_execute,
953 .emit_fence = &r600_dma_fence_ring_emit, 953 .emit_fence = &r600_dma_fence_ring_emit,
954 .emit_semaphore = &r600_dma_semaphore_ring_emit, 954 .emit_semaphore = &r600_dma_semaphore_ring_emit,
955 .cs_parse = NULL, 955 .cs_parse = &r600_dma_cs_parse,
956 .ring_test = &r600_dma_ring_test, 956 .ring_test = &r600_dma_ring_test,
957 .ib_test = &r600_dma_ib_test, 957 .ib_test = &r600_dma_ib_test,
958 .is_lockup = &r600_dma_is_lockup, 958 .is_lockup = &r600_dma_is_lockup,
@@ -1036,7 +1036,7 @@ static struct radeon_asic rs780_asic = {
1036 .ib_execute = &r600_dma_ring_ib_execute, 1036 .ib_execute = &r600_dma_ring_ib_execute,
1037 .emit_fence = &r600_dma_fence_ring_emit, 1037 .emit_fence = &r600_dma_fence_ring_emit,
1038 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1038 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1039 .cs_parse = NULL, 1039 .cs_parse = &r600_dma_cs_parse,
1040 .ring_test = &r600_dma_ring_test, 1040 .ring_test = &r600_dma_ring_test,
1041 .ib_test = &r600_dma_ib_test, 1041 .ib_test = &r600_dma_ib_test,
1042 .is_lockup = &r600_dma_is_lockup, 1042 .is_lockup = &r600_dma_is_lockup,
@@ -1120,7 +1120,7 @@ static struct radeon_asic rv770_asic = {
1120 .ib_execute = &r600_dma_ring_ib_execute, 1120 .ib_execute = &r600_dma_ring_ib_execute,
1121 .emit_fence = &r600_dma_fence_ring_emit, 1121 .emit_fence = &r600_dma_fence_ring_emit,
1122 .emit_semaphore = &r600_dma_semaphore_ring_emit, 1122 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1123 .cs_parse = NULL, 1123 .cs_parse = &r600_dma_cs_parse,
1124 .ring_test = &r600_dma_ring_test, 1124 .ring_test = &r600_dma_ring_test,
1125 .ib_test = &r600_dma_ib_test, 1125 .ib_test = &r600_dma_ib_test,
1126 .is_lockup = &r600_dma_is_lockup, 1126 .is_lockup = &r600_dma_is_lockup,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index c338931190a5..b311c0a2ec66 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -304,6 +304,7 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
304uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); 304uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
305void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); 305void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
306int r600_cs_parse(struct radeon_cs_parser *p); 306int r600_cs_parse(struct radeon_cs_parser *p);
307int r600_dma_cs_parse(struct radeon_cs_parser *p);
307void r600_fence_ring_emit(struct radeon_device *rdev, 308void r600_fence_ring_emit(struct radeon_device *rdev,
308 struct radeon_fence *fence); 309 struct radeon_fence *fence);
309void r600_semaphore_ring_emit(struct radeon_device *rdev, 310void r600_semaphore_ring_emit(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc563fb..1b32a5ab972d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
43 return 0; 43 return 0;
44 } 44 }
45 chunk = &p->chunks[p->chunk_relocs_idx]; 45 chunk = &p->chunks[p->chunk_relocs_idx];
46 p->dma_reloc_idx = 0;
46 /* FIXME: we assume that each relocs use 4 dwords */ 47 /* FIXME: we assume that each relocs use 4 dwords */
47 p->nrelocs = chunk->length_dw / 4; 48 p->nrelocs = chunk->length_dw / 4;
48 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); 49 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);