diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_cs.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r600_cs.c | 285 |
1 files changed, 212 insertions, 73 deletions
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 5d6e7f959e75..0be768be530c 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c | |||
@@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) | |||
657 | /* nby is npipes htiles aligned == npipes * 8 pixel aligned */ | 657 | /* nby is npipes htiles aligned == npipes * 8 pixel aligned */ |
658 | nby = round_up(nby, track->npipes * 8); | 658 | nby = round_up(nby, track->npipes * 8); |
659 | } else { | 659 | } else { |
660 | /* htile widht & nby (8 or 4) make 2 bits number */ | 660 | /* always assume 8x8 htile */ |
661 | tmp = track->htile_surface & 3; | ||
662 | /* align is htile align * 8, htile align vary according to | 661 | /* align is htile align * 8, htile align vary according to |
663 | * number of pipe and tile width and nby | 662 | * number of pipe and tile width and nby |
664 | */ | 663 | */ |
665 | switch (track->npipes) { | 664 | switch (track->npipes) { |
666 | case 8: | 665 | case 8: |
667 | switch (tmp) { | 666 | /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ |
668 | case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ | 667 | nbx = round_up(nbx, 64 * 8); |
669 | nbx = round_up(nbx, 64 * 8); | 668 | nby = round_up(nby, 64 * 8); |
670 | nby = round_up(nby, 64 * 8); | ||
671 | break; | ||
672 | case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ | ||
673 | case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ | ||
674 | nbx = round_up(nbx, 64 * 8); | ||
675 | nby = round_up(nby, 32 * 8); | ||
676 | break; | ||
677 | case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ | ||
678 | nbx = round_up(nbx, 32 * 8); | ||
679 | nby = round_up(nby, 32 * 8); | ||
680 | break; | ||
681 | default: | ||
682 | return -EINVAL; | ||
683 | } | ||
684 | break; | 669 | break; |
685 | case 4: | 670 | case 4: |
686 | switch (tmp) { | 671 | /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ |
687 | case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ | 672 | nbx = round_up(nbx, 64 * 8); |
688 | nbx = round_up(nbx, 64 * 8); | 673 | nby = round_up(nby, 32 * 8); |
689 | nby = round_up(nby, 32 * 8); | ||
690 | break; | ||
691 | case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ | ||
692 | case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ | ||
693 | nbx = round_up(nbx, 32 * 8); | ||
694 | nby = round_up(nby, 32 * 8); | ||
695 | break; | ||
696 | case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ | ||
697 | nbx = round_up(nbx, 32 * 8); | ||
698 | nby = round_up(nby, 16 * 8); | ||
699 | break; | ||
700 | default: | ||
701 | return -EINVAL; | ||
702 | } | ||
703 | break; | 674 | break; |
704 | case 2: | 675 | case 2: |
705 | switch (tmp) { | 676 | /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ |
706 | case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ | 677 | nbx = round_up(nbx, 32 * 8); |
707 | nbx = round_up(nbx, 32 * 8); | 678 | nby = round_up(nby, 32 * 8); |
708 | nby = round_up(nby, 32 * 8); | ||
709 | break; | ||
710 | case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ | ||
711 | case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ | ||
712 | nbx = round_up(nbx, 32 * 8); | ||
713 | nby = round_up(nby, 16 * 8); | ||
714 | break; | ||
715 | case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ | ||
716 | nbx = round_up(nbx, 16 * 8); | ||
717 | nby = round_up(nby, 16 * 8); | ||
718 | break; | ||
719 | default: | ||
720 | return -EINVAL; | ||
721 | } | ||
722 | break; | 679 | break; |
723 | case 1: | 680 | case 1: |
724 | switch (tmp) { | 681 | /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ |
725 | case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ | 682 | nbx = round_up(nbx, 32 * 8); |
726 | nbx = round_up(nbx, 32 * 8); | 683 | nby = round_up(nby, 16 * 8); |
727 | nby = round_up(nby, 16 * 8); | ||
728 | break; | ||
729 | case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ | ||
730 | case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ | ||
731 | nbx = round_up(nbx, 16 * 8); | ||
732 | nby = round_up(nby, 16 * 8); | ||
733 | break; | ||
734 | case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ | ||
735 | nbx = round_up(nbx, 16 * 8); | ||
736 | nby = round_up(nby, 8 * 8); | ||
737 | break; | ||
738 | default: | ||
739 | return -EINVAL; | ||
740 | } | ||
741 | break; | 684 | break; |
742 | default: | 685 | default: |
743 | dev_warn(p->dev, "%s:%d invalid num pipes %d\n", | 686 | dev_warn(p->dev, "%s:%d invalid num pipes %d\n", |
@@ -746,9 +689,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) | |||
746 | } | 689 | } |
747 | } | 690 | } |
748 | /* compute number of htile */ | 691 | /* compute number of htile */ |
749 | nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4; | 692 | nbx = nbx >> 3; |
750 | nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4; | 693 | nby = nby >> 3; |
751 | size = nbx * nby * 4; | 694 | /* size must be aligned on npipes * 2K boundary */ |
695 | size = roundup(nbx * nby * 4, track->npipes * (2 << 10)); | ||
752 | size += track->htile_offset; | 696 | size += track->htile_offset; |
753 | 697 | ||
754 | if (size > radeon_bo_size(track->htile_bo)) { | 698 | if (size > radeon_bo_size(track->htile_bo)) { |
@@ -1492,6 +1436,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) | |||
1492 | break; | 1436 | break; |
1493 | case DB_HTILE_SURFACE: | 1437 | case DB_HTILE_SURFACE: |
1494 | track->htile_surface = radeon_get_ib_value(p, idx); | 1438 | track->htile_surface = radeon_get_ib_value(p, idx); |
1439 | /* force 8x8 htile width and height */ | ||
1440 | ib[idx] |= 3; | ||
1495 | track->db_dirty = true; | 1441 | track->db_dirty = true; |
1496 | break; | 1442 | break; |
1497 | case SQ_PGM_START_FS: | 1443 | case SQ_PGM_START_FS: |
@@ -2568,3 +2514,196 @@ void r600_cs_legacy_init(void) | |||
2568 | { | 2514 | { |
2569 | r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; | 2515 | r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; |
2570 | } | 2516 | } |
2517 | |||
2518 | /* | ||
2519 | * DMA | ||
2520 | */ | ||
2521 | /** | ||
2522 | * r600_dma_cs_next_reloc() - parse next reloc | ||
2523 | * @p: parser structure holding parsing context. | ||
2524 | * @cs_reloc: reloc informations | ||
2525 | * | ||
2526 | * Return the next reloc, do bo validation and compute | ||
2527 | * GPU offset using the provided start. | ||
2528 | **/ | ||
2529 | int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, | ||
2530 | struct radeon_cs_reloc **cs_reloc) | ||
2531 | { | ||
2532 | struct radeon_cs_chunk *relocs_chunk; | ||
2533 | unsigned idx; | ||
2534 | |||
2535 | if (p->chunk_relocs_idx == -1) { | ||
2536 | DRM_ERROR("No relocation chunk !\n"); | ||
2537 | return -EINVAL; | ||
2538 | } | ||
2539 | *cs_reloc = NULL; | ||
2540 | relocs_chunk = &p->chunks[p->chunk_relocs_idx]; | ||
2541 | idx = p->dma_reloc_idx; | ||
2542 | if (idx >= relocs_chunk->length_dw) { | ||
2543 | DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", | ||
2544 | idx, relocs_chunk->length_dw); | ||
2545 | return -EINVAL; | ||
2546 | } | ||
2547 | *cs_reloc = p->relocs_ptr[idx]; | ||
2548 | p->dma_reloc_idx++; | ||
2549 | return 0; | ||
2550 | } | ||
2551 | |||
2552 | #define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28) | ||
2553 | #define GET_DMA_COUNT(h) ((h) & 0x0000ffff) | ||
2554 | #define GET_DMA_T(h) (((h) & 0x00800000) >> 23) | ||
2555 | |||
2556 | /** | ||
2557 | * r600_dma_cs_parse() - parse the DMA IB | ||
2558 | * @p: parser structure holding parsing context. | ||
2559 | * | ||
2560 | * Parses the DMA IB from the CS ioctl and updates | ||
2561 | * the GPU addresses based on the reloc information and | ||
2562 | * checks for errors. (R6xx-R7xx) | ||
2563 | * Returns 0 for success and an error on failure. | ||
2564 | **/ | ||
2565 | int r600_dma_cs_parse(struct radeon_cs_parser *p) | ||
2566 | { | ||
2567 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; | ||
2568 | struct radeon_cs_reloc *src_reloc, *dst_reloc; | ||
2569 | u32 header, cmd, count, tiled; | ||
2570 | volatile u32 *ib = p->ib.ptr; | ||
2571 | u32 idx, idx_value; | ||
2572 | u64 src_offset, dst_offset; | ||
2573 | int r; | ||
2574 | |||
2575 | do { | ||
2576 | if (p->idx >= ib_chunk->length_dw) { | ||
2577 | DRM_ERROR("Can not parse packet at %d after CS end %d !\n", | ||
2578 | p->idx, ib_chunk->length_dw); | ||
2579 | return -EINVAL; | ||
2580 | } | ||
2581 | idx = p->idx; | ||
2582 | header = radeon_get_ib_value(p, idx); | ||
2583 | cmd = GET_DMA_CMD(header); | ||
2584 | count = GET_DMA_COUNT(header); | ||
2585 | tiled = GET_DMA_T(header); | ||
2586 | |||
2587 | switch (cmd) { | ||
2588 | case DMA_PACKET_WRITE: | ||
2589 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
2590 | if (r) { | ||
2591 | DRM_ERROR("bad DMA_PACKET_WRITE\n"); | ||
2592 | return -EINVAL; | ||
2593 | } | ||
2594 | if (tiled) { | ||
2595 | dst_offset = ib[idx+1]; | ||
2596 | dst_offset <<= 8; | ||
2597 | |||
2598 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2599 | p->idx += count + 5; | ||
2600 | } else { | ||
2601 | dst_offset = ib[idx+1]; | ||
2602 | dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; | ||
2603 | |||
2604 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2605 | ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2606 | p->idx += count + 3; | ||
2607 | } | ||
2608 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2609 | dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", | ||
2610 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2611 | return -EINVAL; | ||
2612 | } | ||
2613 | break; | ||
2614 | case DMA_PACKET_COPY: | ||
2615 | r = r600_dma_cs_next_reloc(p, &src_reloc); | ||
2616 | if (r) { | ||
2617 | DRM_ERROR("bad DMA_PACKET_COPY\n"); | ||
2618 | return -EINVAL; | ||
2619 | } | ||
2620 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
2621 | if (r) { | ||
2622 | DRM_ERROR("bad DMA_PACKET_COPY\n"); | ||
2623 | return -EINVAL; | ||
2624 | } | ||
2625 | if (tiled) { | ||
2626 | idx_value = radeon_get_ib_value(p, idx + 2); | ||
2627 | /* detile bit */ | ||
2628 | if (idx_value & (1 << 31)) { | ||
2629 | /* tiled src, linear dst */ | ||
2630 | src_offset = ib[idx+1]; | ||
2631 | src_offset <<= 8; | ||
2632 | ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); | ||
2633 | |||
2634 | dst_offset = ib[idx+5]; | ||
2635 | dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; | ||
2636 | ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2637 | ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2638 | } else { | ||
2639 | /* linear src, tiled dst */ | ||
2640 | src_offset = ib[idx+5]; | ||
2641 | src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; | ||
2642 | ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2643 | ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2644 | |||
2645 | dst_offset = ib[idx+1]; | ||
2646 | dst_offset <<= 8; | ||
2647 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); | ||
2648 | } | ||
2649 | p->idx += 7; | ||
2650 | } else { | ||
2651 | src_offset = ib[idx+2]; | ||
2652 | src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; | ||
2653 | dst_offset = ib[idx+1]; | ||
2654 | dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; | ||
2655 | |||
2656 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2657 | ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2658 | ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; | ||
2659 | ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; | ||
2660 | p->idx += 5; | ||
2661 | } | ||
2662 | if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { | ||
2663 | dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n", | ||
2664 | src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); | ||
2665 | return -EINVAL; | ||
2666 | } | ||
2667 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2668 | dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n", | ||
2669 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2670 | return -EINVAL; | ||
2671 | } | ||
2672 | break; | ||
2673 | case DMA_PACKET_CONSTANT_FILL: | ||
2674 | if (p->family < CHIP_RV770) { | ||
2675 | DRM_ERROR("Constant Fill is 7xx only !\n"); | ||
2676 | return -EINVAL; | ||
2677 | } | ||
2678 | r = r600_dma_cs_next_reloc(p, &dst_reloc); | ||
2679 | if (r) { | ||
2680 | DRM_ERROR("bad DMA_PACKET_WRITE\n"); | ||
2681 | return -EINVAL; | ||
2682 | } | ||
2683 | dst_offset = ib[idx+1]; | ||
2684 | dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; | ||
2685 | if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { | ||
2686 | dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", | ||
2687 | dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); | ||
2688 | return -EINVAL; | ||
2689 | } | ||
2690 | ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); | ||
2691 | ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; | ||
2692 | p->idx += 4; | ||
2693 | break; | ||
2694 | case DMA_PACKET_NOP: | ||
2695 | p->idx += 1; | ||
2696 | break; | ||
2697 | default: | ||
2698 | DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); | ||
2699 | return -EINVAL; | ||
2700 | } | ||
2701 | } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); | ||
2702 | #if 0 | ||
2703 | for (r = 0; r < p->ib->length_dw; r++) { | ||
2704 | printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); | ||
2705 | mdelay(1); | ||
2706 | } | ||
2707 | #endif | ||
2708 | return 0; | ||
2709 | } | ||