diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2011-11-18 10:19:47 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2012-12-14 10:45:25 -0500 |
commit | cf4ccd016bae1a03bb38170eb54b5db4b04e0545 (patch) | |
tree | 63b0f56e633db1f515841831cb81e91864f38da0 /drivers/gpu/drm/radeon | |
parent | 4ac0533abaec2b83a7f2c675010eedd55664bc26 (diff) |
drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2)
Allows us to use the DMA ring from userspace.
DMA doesn't have a good NOP packet in which to embed the
reloc idx, so userspace has to add a reloc for each
buffer used and order them to match the command stream.
v2: fix address bounds checking, reloc indexing
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r-- | drivers/gpu/drm/radeon/r600_cs.c | 193 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_cs.c | 1 |
5 files changed, 199 insertions, 3 deletions
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 0b4d833d923c..0be768be530c 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c | |||
@@ -2514,3 +2514,196 @@ void r600_cs_legacy_init(void) | |||
2514 | { | 2514 | { |
2515 | r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; | 2515 | r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; |
2516 | } | 2516 | } |
2517 | |||
2518 | /* | ||
2519 | * DMA | ||
2520 | */ | ||
2521 | /** | ||
2522 | * r600_dma_cs_next_reloc() - parse next reloc | ||
2523 | * @p: parser structure holding parsing context. | ||
2524 | * @cs_reloc: reloc informations | ||
2525 | * | ||
2526 | * Return the next reloc, do bo validation and compute | ||
2527 | * GPU offset using the provided start. | ||
2528 | **/ | ||
2529 | int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, | ||
2530 | struct radeon_cs_reloc **cs_reloc) | ||
2531 | { | ||
2532 | struct radeon_cs_chunk *relocs_chunk; | ||
2533 | unsigned idx; | ||
2534 | |||
2535 | if (p->chunk_relocs_idx == -1) { | ||
2536 | DRM_ERROR("No relocation chunk !\n"); | ||
2537 | return -EINVAL; | ||
2538 | } | ||
2539 | *cs_reloc = NULL; | ||
2540 | relocs_chunk = &p->chunks[p->chunk_relocs_idx]; | ||
2541 | idx = p->dma_reloc_idx; | ||
2542 | if (idx >= relocs_chunk->length_dw) { | ||
2543 | DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", | ||
2544 | idx, relocs_chunk->length_dw); | ||
2545 | return -EINVAL; | ||
2546 | } | ||
2547 | *cs_reloc = p->relocs_ptr[idx]; | ||
2548 | p->dma_reloc_idx++; | ||
2549 | return 0; | ||
2550 | } | ||
2551 | |||
2552 | #define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28) | ||
2553 | #define GET_DMA_COUNT(h) ((h) & 0x0000ffff) | ||
2554 | #define GET_DMA_T(h) (((h) & 0x00800000) >> 23) | ||
2555 | |||
2556 | /** | ||
2557 | * r600_dma_cs_parse() - parse the DMA IB | ||
2558 | * @p: parser structure holding parsing context. | ||
2559 | * | ||
2560 | * Parses the DMA IB from the CS ioctl and updates | ||
2561 | * the GPU addresses based on the reloc information and | ||
2562 | * checks for errors. (R6xx-R7xx) | ||
2563 | * Returns 0 for success and an error on failure. | ||
2564 | **/ | ||
2565 | int r600_dma_cs_parse(struct radeon_cs_parser *p) | ||
2566 | { | ||
2567 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; | ||
2568 | struct radeon_cs_reloc *src_reloc, *dst_reloc; | ||
2569 | u32 header, cmd, count, tiled; | ||
2570 | volatile u32 *ib = p->ib.ptr; | ||
2571 | u32 idx, idx_value; | ||
2572 | u64 src_offset, dst_offset; | ||
2573 | int r; | ||
2574 | |||
2575 | do { | ||
2576 | if (p->idx >= ib_chunk->length_dw) { | ||
2577 | DRM_ERROR("Can not parse packet at %d after CS end %d !\n", | ||
2578 | p->idx, ib_chunk->length_dw); | ||
2579 | return -EINVAL; | ||
2580 | } | ||
2581 | idx = p->idx; | ||
2582 | header = radeon_get_ib_value(p, idx); | ||
2583 | cmd = GET_DMA_CMD(header); | ||
2584 | count = GET_DMA_COUNT(header); | ||
2585 | tiled = GET_DMA_T(header); | ||
2586 | |||
2587 | switch (cmd) { | ||
2588 | case DMA_PACKET_WRITE: | ||
2589 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
2590 | if (r) { | ||
2591 | DRM_ERROR("bad DMA_PACKET_WRITE\n"); | ||
2592 | return -EINVAL; | ||
2593 | } | ||
2594 | if (tiled) { | ||
2595 | dst_offset = ib[idx+1]; | ||
2596 | dst_offset <<= 8; | ||
2597 | |||
2598 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2599 | p->idx += count + 5; | ||
2600 | } else { | ||
2601 | dst_offset = ib[idx+1]; | ||
2602 | dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; | ||
2603 | |||
2604 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2605 | ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2606 | p->idx += count + 3; | ||
2607 | } | ||
2608 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2609 | dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", | ||
2610 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2611 | return -EINVAL; | ||
2612 | } | ||
2613 | break; | ||
2614 | case DMA_PACKET_COPY: | ||
2615 | r = r600_dma_cs_next_reloc(p, &src_reloc); | ||
2616 | if (r) { | ||
2617 | DRM_ERROR("bad DMA_PACKET_COPY\n"); | ||
2618 | return -EINVAL; | ||
2619 | } | ||
2620 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
2621 | if (r) { | ||
2622 | DRM_ERROR("bad DMA_PACKET_COPY\n"); | ||
2623 | return -EINVAL; | ||
2624 | } | ||
2625 | if (tiled) { | ||
2626 | idx_value = radeon_get_ib_value(p, idx + 2); | ||
2627 | /* detile bit */ | ||
2628 | if (idx_value & (1 << 31)) { | ||
2629 | /* tiled src, linear dst */ | ||
2630 | src_offset = ib[idx+1]; | ||
2631 | src_offset <<= 8; | ||
2632 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
2633 | |||
2634 | dst_offset = ib[idx+5]; | ||
2635 | dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; | ||
2636 | ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2637 | ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2638 | } else { | ||
2639 | /* linear src, tiled dst */ | ||
2640 | src_offset = ib[idx+5]; | ||
2641 | src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; | ||
2642 | ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2643 | ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2644 | |||
2645 | dst_offset = ib[idx+1]; | ||
2646 | dst_offset <<= 8; | ||
2647 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2648 | } | ||
2649 | p->idx += 7; | ||
2650 | } else { | ||
2651 | src_offset = ib[idx+2]; | ||
2652 | src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; | ||
2653 | dst_offset = ib[idx+1]; | ||
2654 | dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; | ||
2655 | |||
2656 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2657 | ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2658 | ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2659 | ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2660 | p->idx += 5; | ||
2661 | } | ||
2662 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2663 | dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n", | ||
2664 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2665 | return -EINVAL; | ||
2666 | } | ||
2667 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2668 | dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n", | ||
2669 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2670 | return -EINVAL; | ||
2671 | } | ||
2672 | break; | ||
2673 | case DMA_PACKET_CONSTANT_FILL: | ||
2674 | if (p->family < CHIP_RV770) { | ||
2675 | DRM_ERROR("Constant Fill is 7xx only !\n"); | ||
2676 | return -EINVAL; | ||
2677 | } | ||
2678 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
2679 | if (r) { | ||
2680 | DRM_ERROR("bad DMA_PACKET_WRITE\n"); | ||
2681 | return -EINVAL; | ||
2682 | } | ||
2683 | dst_offset = ib[idx+1]; | ||
2684 | dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; | ||
2685 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2686 | dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", | ||
2687 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2688 | return -EINVAL; | ||
2689 | } | ||
2690 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2691 | ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; | ||
2692 | p->idx += 4; | ||
2693 | break; | ||
2694 | case DMA_PACKET_NOP: | ||
2695 | p->idx += 1; | ||
2696 | break; | ||
2697 | default: | ||
2698 | DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); | ||
2699 | return -EINVAL; | ||
2700 | } | ||
2701 | } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); | ||
2702 | #if 0 | ||
2703 | for (r = 0; r < p->ib->length_dw; r++) { | ||
2704 | printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); | ||
2705 | mdelay(1); | ||
2706 | } | ||
2707 | #endif | ||
2708 | return 0; | ||
2709 | } | ||
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 285fb3f203af..5dc744d43d12 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -839,6 +839,7 @@ struct radeon_cs_parser { | |||
839 | struct radeon_cs_reloc *relocs; | 839 | struct radeon_cs_reloc *relocs; |
840 | struct radeon_cs_reloc **relocs_ptr; | 840 | struct radeon_cs_reloc **relocs_ptr; |
841 | struct list_head validated; | 841 | struct list_head validated; |
842 | unsigned dma_reloc_idx; | ||
842 | /* indices of various chunks */ | 843 | /* indices of various chunks */ |
843 | int chunk_ib_idx; | 844 | int chunk_ib_idx; |
844 | int chunk_relocs_idx; | 845 | int chunk_relocs_idx; |
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 3ea0475f9a95..d3603417e5df 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c | |||
@@ -952,7 +952,7 @@ static struct radeon_asic r600_asic = { | |||
952 | .ib_execute = &r600_dma_ring_ib_execute, | 952 | .ib_execute = &r600_dma_ring_ib_execute, |
953 | .emit_fence = &r600_dma_fence_ring_emit, | 953 | .emit_fence = &r600_dma_fence_ring_emit, |
954 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | 954 | .emit_semaphore = &r600_dma_semaphore_ring_emit, |
955 | .cs_parse = NULL, | 955 | .cs_parse = &r600_dma_cs_parse, |
956 | .ring_test = &r600_dma_ring_test, | 956 | .ring_test = &r600_dma_ring_test, |
957 | .ib_test = &r600_dma_ib_test, | 957 | .ib_test = &r600_dma_ib_test, |
958 | .is_lockup = &r600_dma_is_lockup, | 958 | .is_lockup = &r600_dma_is_lockup, |
@@ -1036,7 +1036,7 @@ static struct radeon_asic rs780_asic = { | |||
1036 | .ib_execute = &r600_dma_ring_ib_execute, | 1036 | .ib_execute = &r600_dma_ring_ib_execute, |
1037 | .emit_fence = &r600_dma_fence_ring_emit, | 1037 | .emit_fence = &r600_dma_fence_ring_emit, |
1038 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | 1038 | .emit_semaphore = &r600_dma_semaphore_ring_emit, |
1039 | .cs_parse = NULL, | 1039 | .cs_parse = &r600_dma_cs_parse, |
1040 | .ring_test = &r600_dma_ring_test, | 1040 | .ring_test = &r600_dma_ring_test, |
1041 | .ib_test = &r600_dma_ib_test, | 1041 | .ib_test = &r600_dma_ib_test, |
1042 | .is_lockup = &r600_dma_is_lockup, | 1042 | .is_lockup = &r600_dma_is_lockup, |
@@ -1120,7 +1120,7 @@ static struct radeon_asic rv770_asic = { | |||
1120 | .ib_execute = &r600_dma_ring_ib_execute, | 1120 | .ib_execute = &r600_dma_ring_ib_execute, |
1121 | .emit_fence = &r600_dma_fence_ring_emit, | 1121 | .emit_fence = &r600_dma_fence_ring_emit, |
1122 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | 1122 | .emit_semaphore = &r600_dma_semaphore_ring_emit, |
1123 | .cs_parse = NULL, | 1123 | .cs_parse = &r600_dma_cs_parse, |
1124 | .ring_test = &r600_dma_ring_test, | 1124 | .ring_test = &r600_dma_ring_test, |
1125 | .ib_test = &r600_dma_ib_test, | 1125 | .ib_test = &r600_dma_ib_test, |
1126 | .is_lockup = &r600_dma_is_lockup, | 1126 | .is_lockup = &r600_dma_is_lockup, |
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index c338931190a5..b311c0a2ec66 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h | |||
@@ -304,6 +304,7 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); | |||
304 | uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); | 304 | uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); |
305 | void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); | 305 | void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); |
306 | int r600_cs_parse(struct radeon_cs_parser *p); | 306 | int r600_cs_parse(struct radeon_cs_parser *p); |
307 | int r600_dma_cs_parse(struct radeon_cs_parser *p); | ||
307 | void r600_fence_ring_emit(struct radeon_device *rdev, | 308 | void r600_fence_ring_emit(struct radeon_device *rdev, |
308 | struct radeon_fence *fence); | 309 | struct radeon_fence *fence); |
309 | void r600_semaphore_ring_emit(struct radeon_device *rdev, | 310 | void r600_semaphore_ring_emit(struct radeon_device *rdev, |
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 41672cc563fb..1b32a5ab972d 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c | |||
@@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) | |||
43 | return 0; | 43 | return 0; |
44 | } | 44 | } |
45 | chunk = &p->chunks[p->chunk_relocs_idx]; | 45 | chunk = &p->chunks[p->chunk_relocs_idx]; |
46 | p->dma_reloc_idx = 0; | ||
46 | /* FIXME: we assume that each relocs use 4 dwords */ | 47 | /* FIXME: we assume that each relocs use 4 dwords */ |
47 | p->nrelocs = chunk->length_dw / 4; | 48 | p->nrelocs = chunk->length_dw / 4; |
48 | p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); | 49 | p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); |