aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/ni.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2013-08-13 05:56:54 -0400
committerAlex Deucher <alexander.deucher@amd.com>2013-08-30 16:30:42 -0400
commit2483b4ea982efe8a544697d3f9642932e9af4dc1 (patch)
treef739e1b55b5e200817c174d4eae6f22935d152bf /drivers/gpu/drm/radeon/ni.c
parente409b128625732926c112cc9b709fb7bb1aa387f (diff)
drm/radeon: separate DMA code
Similar to separating the UVD code, just put the DMA functions into separate files. Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/ni.c')
-rw-r--r--drivers/gpu/drm/radeon/ni.c293
1 files changed, 7 insertions, 286 deletions
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 0205fa1594fa..2db8ce0023ac 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -174,6 +174,11 @@ extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
174extern void evergreen_program_aspm(struct radeon_device *rdev); 174extern void evergreen_program_aspm(struct radeon_device *rdev);
175extern void sumo_rlc_fini(struct radeon_device *rdev); 175extern void sumo_rlc_fini(struct radeon_device *rdev);
176extern int sumo_rlc_init(struct radeon_device *rdev); 176extern int sumo_rlc_init(struct radeon_device *rdev);
177extern void cayman_dma_vm_set_page(struct radeon_device *rdev,
178 struct radeon_ib *ib,
179 uint64_t pe,
180 uint64_t addr, unsigned count,
181 uint32_t incr, uint32_t flags);
177 182
178/* Firmware Names */ 183/* Firmware Names */
179MODULE_FIRMWARE("radeon/BARTS_pfp.bin"); 184MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
@@ -1595,186 +1600,7 @@ static int cayman_cp_resume(struct radeon_device *rdev)
1595 return 0; 1600 return 0;
1596} 1601}
1597 1602
1598/* 1603u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1599 * DMA
1600 * Starting with R600, the GPU has an asynchronous
1601 * DMA engine. The programming model is very similar
1602 * to the 3D engine (ring buffer, IBs, etc.), but the
1603 * DMA controller has it's own packet format that is
1604 * different form the PM4 format used by the 3D engine.
1605 * It supports copying data, writing embedded data,
1606 * solid fills, and a number of other things. It also
1607 * has support for tiling/detiling of buffers.
1608 * Cayman and newer support two asynchronous DMA engines.
1609 */
1610/**
1611 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1612 *
1613 * @rdev: radeon_device pointer
1614 * @ib: IB object to schedule
1615 *
1616 * Schedule an IB in the DMA ring (cayman-SI).
1617 */
1618void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1619 struct radeon_ib *ib)
1620{
1621 struct radeon_ring *ring = &rdev->ring[ib->ring];
1622
1623 if (rdev->wb.enabled) {
1624 u32 next_rptr = ring->wptr + 4;
1625 while ((next_rptr & 7) != 5)
1626 next_rptr++;
1627 next_rptr += 3;
1628 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1629 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1630 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1631 radeon_ring_write(ring, next_rptr);
1632 }
1633
1634 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1635 * Pad as necessary with NOPs.
1636 */
1637 while ((ring->wptr & 7) != 5)
1638 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1639 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1640 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1641 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1642
1643}
1644
1645/**
1646 * cayman_dma_stop - stop the async dma engines
1647 *
1648 * @rdev: radeon_device pointer
1649 *
1650 * Stop the async dma engines (cayman-SI).
1651 */
1652void cayman_dma_stop(struct radeon_device *rdev)
1653{
1654 u32 rb_cntl;
1655
1656 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1657
1658 /* dma0 */
1659 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1660 rb_cntl &= ~DMA_RB_ENABLE;
1661 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1662
1663 /* dma1 */
1664 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1665 rb_cntl &= ~DMA_RB_ENABLE;
1666 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1667
1668 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1669 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1670}
1671
1672/**
1673 * cayman_dma_resume - setup and start the async dma engines
1674 *
1675 * @rdev: radeon_device pointer
1676 *
1677 * Set up the DMA ring buffers and enable them. (cayman-SI).
1678 * Returns 0 for success, error for failure.
1679 */
1680int cayman_dma_resume(struct radeon_device *rdev)
1681{
1682 struct radeon_ring *ring;
1683 u32 rb_cntl, dma_cntl, ib_cntl;
1684 u32 rb_bufsz;
1685 u32 reg_offset, wb_offset;
1686 int i, r;
1687
1688 /* Reset dma */
1689 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1690 RREG32(SRBM_SOFT_RESET);
1691 udelay(50);
1692 WREG32(SRBM_SOFT_RESET, 0);
1693
1694 for (i = 0; i < 2; i++) {
1695 if (i == 0) {
1696 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1697 reg_offset = DMA0_REGISTER_OFFSET;
1698 wb_offset = R600_WB_DMA_RPTR_OFFSET;
1699 } else {
1700 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1701 reg_offset = DMA1_REGISTER_OFFSET;
1702 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1703 }
1704
1705 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1706 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1707
1708 /* Set ring buffer size in dwords */
1709 rb_bufsz = drm_order(ring->ring_size / 4);
1710 rb_cntl = rb_bufsz << 1;
1711#ifdef __BIG_ENDIAN
1712 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1713#endif
1714 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1715
1716 /* Initialize the ring buffer's read and write pointers */
1717 WREG32(DMA_RB_RPTR + reg_offset, 0);
1718 WREG32(DMA_RB_WPTR + reg_offset, 0);
1719
1720 /* set the wb address whether it's enabled or not */
1721 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1722 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1723 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1724 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1725
1726 if (rdev->wb.enabled)
1727 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1728
1729 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1730
1731 /* enable DMA IBs */
1732 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1733#ifdef __BIG_ENDIAN
1734 ib_cntl |= DMA_IB_SWAP_ENABLE;
1735#endif
1736 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1737
1738 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1739 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1740 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1741
1742 ring->wptr = 0;
1743 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1744
1745 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1746
1747 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1748
1749 ring->ready = true;
1750
1751 r = radeon_ring_test(rdev, ring->idx, ring);
1752 if (r) {
1753 ring->ready = false;
1754 return r;
1755 }
1756 }
1757
1758 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1759
1760 return 0;
1761}
1762
1763/**
1764 * cayman_dma_fini - tear down the async dma engines
1765 *
1766 * @rdev: radeon_device pointer
1767 *
1768 * Stop the async dma engines and free the rings (cayman-SI).
1769 */
1770void cayman_dma_fini(struct radeon_device *rdev)
1771{
1772 cayman_dma_stop(rdev);
1773 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1774 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1775}
1776
1777static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1778{ 1604{
1779 u32 reset_mask = 0; 1605 u32 reset_mask = 0;
1780 u32 tmp; 1606 u32 tmp;
@@ -2027,34 +1853,6 @@ bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2027 return radeon_ring_test_lockup(rdev, ring); 1853 return radeon_ring_test_lockup(rdev, ring);
2028} 1854}
2029 1855
2030/**
2031 * cayman_dma_is_lockup - Check if the DMA engine is locked up
2032 *
2033 * @rdev: radeon_device pointer
2034 * @ring: radeon_ring structure holding ring information
2035 *
2036 * Check if the async DMA engine is locked up.
2037 * Returns true if the engine appears to be locked up, false if not.
2038 */
2039bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2040{
2041 u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2042 u32 mask;
2043
2044 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2045 mask = RADEON_RESET_DMA;
2046 else
2047 mask = RADEON_RESET_DMA1;
2048
2049 if (!(reset_mask & mask)) {
2050 radeon_ring_lockup_update(ring);
2051 return false;
2052 }
2053 /* force ring activities */
2054 radeon_ring_force_activity(rdev, ring);
2055 return radeon_ring_test_lockup(rdev, ring);
2056}
2057
2058static int cayman_startup(struct radeon_device *rdev) 1856static int cayman_startup(struct radeon_device *rdev)
2059{ 1857{
2060 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1858 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -2658,61 +2456,7 @@ void cayman_vm_set_page(struct radeon_device *rdev,
2658 } 2456 }
2659 } 2457 }
2660 } else { 2458 } else {
2661 if ((flags & RADEON_VM_PAGE_SYSTEM) || 2459 cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
2662 (count == 1)) {
2663 while (count) {
2664 ndw = count * 2;
2665 if (ndw > 0xFFFFE)
2666 ndw = 0xFFFFE;
2667
2668 /* for non-physically contiguous pages (system) */
2669 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2670 ib->ptr[ib->length_dw++] = pe;
2671 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2672 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2673 if (flags & RADEON_VM_PAGE_SYSTEM) {
2674 value = radeon_vm_map_gart(rdev, addr);
2675 value &= 0xFFFFFFFFFFFFF000ULL;
2676 } else if (flags & RADEON_VM_PAGE_VALID) {
2677 value = addr;
2678 } else {
2679 value = 0;
2680 }
2681 addr += incr;
2682 value |= r600_flags;
2683 ib->ptr[ib->length_dw++] = value;
2684 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2685 }
2686 }
2687 while (ib->length_dw & 0x7)
2688 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2689 } else {
2690 while (count) {
2691 ndw = count * 2;
2692 if (ndw > 0xFFFFE)
2693 ndw = 0xFFFFE;
2694
2695 if (flags & RADEON_VM_PAGE_VALID)
2696 value = addr;
2697 else
2698 value = 0;
2699 /* for physically contiguous pages (vram) */
2700 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2701 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2702 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2703 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2704 ib->ptr[ib->length_dw++] = 0;
2705 ib->ptr[ib->length_dw++] = value; /* value */
2706 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2707 ib->ptr[ib->length_dw++] = incr; /* increment size */
2708 ib->ptr[ib->length_dw++] = 0;
2709 pe += ndw * 4;
2710 addr += (ndw / 2) * incr;
2711 count -= ndw / 2;
2712 }
2713 }
2714 while (ib->length_dw & 0x7)
2715 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2716 } 2460 }
2717} 2461}
2718 2462
@@ -2746,26 +2490,3 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2746 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 2490 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2747 radeon_ring_write(ring, 0x0); 2491 radeon_ring_write(ring, 0x0);
2748} 2492}
2749
2750void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2751{
2752 struct radeon_ring *ring = &rdev->ring[ridx];
2753
2754 if (vm == NULL)
2755 return;
2756
2757 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2758 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2759 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2760
2761 /* flush hdp cache */
2762 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2763 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2764 radeon_ring_write(ring, 1);
2765
2766 /* bits 0-7 are the VM contexts0-7 */
2767 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2768 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2769 radeon_ring_write(ring, 1 << vm->id);
2770}
2771