aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r600_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_cs.c')
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c285
1 files changed, 212 insertions, 73 deletions
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 5d6e7f959e75..0be768be530c 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
657 /* nby is npipes htiles aligned == npipes * 8 pixel aligned */ 657 /* nby is npipes htiles aligned == npipes * 8 pixel aligned */
658 nby = round_up(nby, track->npipes * 8); 658 nby = round_up(nby, track->npipes * 8);
659 } else { 659 } else {
660 /* htile widht & nby (8 or 4) make 2 bits number */ 660 /* always assume 8x8 htile */
661 tmp = track->htile_surface & 3;
662 /* align is htile align * 8, htile align vary according to 661 /* align is htile align * 8, htile align vary according to
663 * number of pipe and tile width and nby 662 * number of pipe and tile width and nby
664 */ 663 */
665 switch (track->npipes) { 664 switch (track->npipes) {
666 case 8: 665 case 8:
667 switch (tmp) { 666 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
668 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 667 nbx = round_up(nbx, 64 * 8);
669 nbx = round_up(nbx, 64 * 8); 668 nby = round_up(nby, 64 * 8);
670 nby = round_up(nby, 64 * 8);
671 break;
672 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
673 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
674 nbx = round_up(nbx, 64 * 8);
675 nby = round_up(nby, 32 * 8);
676 break;
677 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
678 nbx = round_up(nbx, 32 * 8);
679 nby = round_up(nby, 32 * 8);
680 break;
681 default:
682 return -EINVAL;
683 }
684 break; 669 break;
685 case 4: 670 case 4:
686 switch (tmp) { 671 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
687 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 672 nbx = round_up(nbx, 64 * 8);
688 nbx = round_up(nbx, 64 * 8); 673 nby = round_up(nby, 32 * 8);
689 nby = round_up(nby, 32 * 8);
690 break;
691 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
692 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
693 nbx = round_up(nbx, 32 * 8);
694 nby = round_up(nby, 32 * 8);
695 break;
696 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
697 nbx = round_up(nbx, 32 * 8);
698 nby = round_up(nby, 16 * 8);
699 break;
700 default:
701 return -EINVAL;
702 }
703 break; 674 break;
704 case 2: 675 case 2:
705 switch (tmp) { 676 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
706 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 677 nbx = round_up(nbx, 32 * 8);
707 nbx = round_up(nbx, 32 * 8); 678 nby = round_up(nby, 32 * 8);
708 nby = round_up(nby, 32 * 8);
709 break;
710 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
711 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
712 nbx = round_up(nbx, 32 * 8);
713 nby = round_up(nby, 16 * 8);
714 break;
715 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
716 nbx = round_up(nbx, 16 * 8);
717 nby = round_up(nby, 16 * 8);
718 break;
719 default:
720 return -EINVAL;
721 }
722 break; 679 break;
723 case 1: 680 case 1:
724 switch (tmp) { 681 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
725 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 682 nbx = round_up(nbx, 32 * 8);
726 nbx = round_up(nbx, 32 * 8); 683 nby = round_up(nby, 16 * 8);
727 nby = round_up(nby, 16 * 8);
728 break;
729 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
730 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
731 nbx = round_up(nbx, 16 * 8);
732 nby = round_up(nby, 16 * 8);
733 break;
734 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
735 nbx = round_up(nbx, 16 * 8);
736 nby = round_up(nby, 8 * 8);
737 break;
738 default:
739 return -EINVAL;
740 }
741 break; 684 break;
742 default: 685 default:
743 dev_warn(p->dev, "%s:%d invalid num pipes %d\n", 686 dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
@@ -746,9 +689,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
746 } 689 }
747 } 690 }
748 /* compute number of htile */ 691 /* compute number of htile */
749 nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4; 692 nbx = nbx >> 3;
750 nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4; 693 nby = nby >> 3;
751 size = nbx * nby * 4; 694 /* size must be aligned on npipes * 2K boundary */
695 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
752 size += track->htile_offset; 696 size += track->htile_offset;
753 697
754 if (size > radeon_bo_size(track->htile_bo)) { 698 if (size > radeon_bo_size(track->htile_bo)) {
@@ -1492,6 +1436,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1492 break; 1436 break;
1493 case DB_HTILE_SURFACE: 1437 case DB_HTILE_SURFACE:
1494 track->htile_surface = radeon_get_ib_value(p, idx); 1438 track->htile_surface = radeon_get_ib_value(p, idx);
1439 /* force 8x8 htile width and height */
1440 ib[idx] |= 3;
1495 track->db_dirty = true; 1441 track->db_dirty = true;
1496 break; 1442 break;
1497 case SQ_PGM_START_FS: 1443 case SQ_PGM_START_FS:
@@ -2568,3 +2514,196 @@ void r600_cs_legacy_init(void)
2568{ 2514{
2569 r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; 2515 r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
2570} 2516}
2517
2518/*
2519 * DMA
2520 */
2521/**
2522 * r600_dma_cs_next_reloc() - parse next reloc
2523 * @p: parser structure holding parsing context.
2524 * @cs_reloc: reloc informations
2525 *
2526 * Return the next reloc, do bo validation and compute
2527 * GPU offset using the provided start.
2528 **/
2529int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
2530 struct radeon_cs_reloc **cs_reloc)
2531{
2532 struct radeon_cs_chunk *relocs_chunk;
2533 unsigned idx;
2534
2535 if (p->chunk_relocs_idx == -1) {
2536 DRM_ERROR("No relocation chunk !\n");
2537 return -EINVAL;
2538 }
2539 *cs_reloc = NULL;
2540 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
2541 idx = p->dma_reloc_idx;
2542 if (idx >= relocs_chunk->length_dw) {
2543 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
2544 idx, relocs_chunk->length_dw);
2545 return -EINVAL;
2546 }
2547 *cs_reloc = p->relocs_ptr[idx];
2548 p->dma_reloc_idx++;
2549 return 0;
2550}
2551
2552#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2553#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
2554#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2555
2556/**
2557 * r600_dma_cs_parse() - parse the DMA IB
2558 * @p: parser structure holding parsing context.
2559 *
2560 * Parses the DMA IB from the CS ioctl and updates
2561 * the GPU addresses based on the reloc information and
2562 * checks for errors. (R6xx-R7xx)
2563 * Returns 0 for success and an error on failure.
2564 **/
2565int r600_dma_cs_parse(struct radeon_cs_parser *p)
2566{
2567 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2568 struct radeon_cs_reloc *src_reloc, *dst_reloc;
2569 u32 header, cmd, count, tiled;
2570 volatile u32 *ib = p->ib.ptr;
2571 u32 idx, idx_value;
2572 u64 src_offset, dst_offset;
2573 int r;
2574
2575 do {
2576 if (p->idx >= ib_chunk->length_dw) {
2577 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2578 p->idx, ib_chunk->length_dw);
2579 return -EINVAL;
2580 }
2581 idx = p->idx;
2582 header = radeon_get_ib_value(p, idx);
2583 cmd = GET_DMA_CMD(header);
2584 count = GET_DMA_COUNT(header);
2585 tiled = GET_DMA_T(header);
2586
2587 switch (cmd) {
2588 case DMA_PACKET_WRITE:
2589 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2590 if (r) {
2591 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2592 return -EINVAL;
2593 }
2594 if (tiled) {
2595 dst_offset = ib[idx+1];
2596 dst_offset <<= 8;
2597
2598 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2599 p->idx += count + 5;
2600 } else {
2601 dst_offset = ib[idx+1];
2602 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2603
2604 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2605 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2606 p->idx += count + 3;
2607 }
2608 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2609 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2610 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2611 return -EINVAL;
2612 }
2613 break;
2614 case DMA_PACKET_COPY:
2615 r = r600_dma_cs_next_reloc(p, &src_reloc);
2616 if (r) {
2617 DRM_ERROR("bad DMA_PACKET_COPY\n");
2618 return -EINVAL;
2619 }
2620 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2621 if (r) {
2622 DRM_ERROR("bad DMA_PACKET_COPY\n");
2623 return -EINVAL;
2624 }
2625 if (tiled) {
2626 idx_value = radeon_get_ib_value(p, idx + 2);
2627 /* detile bit */
2628 if (idx_value & (1 << 31)) {
2629 /* tiled src, linear dst */
2630 src_offset = ib[idx+1];
2631 src_offset <<= 8;
2632 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2633
2634 dst_offset = ib[idx+5];
2635 dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
2636 ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2637 ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2638 } else {
2639 /* linear src, tiled dst */
2640 src_offset = ib[idx+5];
2641 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
2642 ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2643 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2644
2645 dst_offset = ib[idx+1];
2646 dst_offset <<= 8;
2647 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2648 }
2649 p->idx += 7;
2650 } else {
2651 src_offset = ib[idx+2];
2652 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
2653 dst_offset = ib[idx+1];
2654 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
2655
2656 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2657 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2658 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2659 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2660 p->idx += 5;
2661 }
2662 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2663 dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
2664 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2665 return -EINVAL;
2666 }
2667 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2668 dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
2669 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2670 return -EINVAL;
2671 }
2672 break;
2673 case DMA_PACKET_CONSTANT_FILL:
2674 if (p->family < CHIP_RV770) {
2675 DRM_ERROR("Constant Fill is 7xx only !\n");
2676 return -EINVAL;
2677 }
2678 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2679 if (r) {
2680 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2681 return -EINVAL;
2682 }
2683 dst_offset = ib[idx+1];
2684 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
2685 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2686 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
2687 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2688 return -EINVAL;
2689 }
2690 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2691 ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
2692 p->idx += 4;
2693 break;
2694 case DMA_PACKET_NOP:
2695 p->idx += 1;
2696 break;
2697 default:
2698 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
2699 return -EINVAL;
2700 }
2701 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2702#if 0
2703 for (r = 0; r < p->ib->length_dw; r++) {
2704 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
2705 mdelay(1);
2706 }
2707#endif
2708 return 0;
2709}