aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2012-09-27 15:08:35 -0400
committerAlex Deucher <alexander.deucher@amd.com>2012-12-10 16:53:23 -0500
commit4d75658bffea78f0c6f82fd46df1ec983ccacdf0 (patch)
treea6c111fe8fb7ebb76af46924ec0bc5c8f7cc961b /drivers/gpu
parent71bfe916ebe6d026cd3d0e41c398574fc1228e03 (diff)
drm/radeon/kms: Add initial support for async DMA on r6xx/r7xx
Uses the new multi-ring infrastucture. 6xx/7xx has a single async DMA ring. Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/radeon/r600.c471
-rw-r--r--drivers/gpu/drm/radeon/r600d.h54
-rw-r--r--drivers/gpu/drm/radeon/radeon.h11
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c39
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h13
-rw-r--r--drivers/gpu/drm/radeon/rv770.c31
-rw-r--r--drivers/gpu/drm/radeon/rv770d.h23
7 files changed, 630 insertions, 12 deletions
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index cda280d157da..ee06c8781cd4 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1370,6 +1370,29 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1370 return radeon_ring_test_lockup(rdev, ring); 1370 return radeon_ring_test_lockup(rdev, ring);
1371} 1371}
1372 1372
1373/**
1374 * r600_dma_is_lockup - Check if the DMA engine is locked up
1375 *
1376 * @rdev: radeon_device pointer
1377 * @ring: radeon_ring structure holding ring information
1378 *
1379 * Check if the async DMA engine is locked up (r6xx-evergreen).
1380 * Returns true if the engine appears to be locked up, false if not.
1381 */
1382bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1383{
1384 u32 dma_status_reg;
1385
1386 dma_status_reg = RREG32(DMA_STATUS_REG);
1387 if (dma_status_reg & DMA_IDLE) {
1388 radeon_ring_lockup_update(ring);
1389 return false;
1390 }
1391 /* force ring activities */
1392 radeon_ring_force_activity(rdev, ring);
1393 return radeon_ring_test_lockup(rdev, ring);
1394}
1395
1373int r600_asic_reset(struct radeon_device *rdev) 1396int r600_asic_reset(struct radeon_device *rdev)
1374{ 1397{
1375 return r600_gpu_soft_reset(rdev); 1398 return r600_gpu_soft_reset(rdev);
@@ -1594,6 +1617,7 @@ static void r600_gpu_init(struct radeon_device *rdev)
1594 WREG32(GB_TILING_CONFIG, tiling_config); 1617 WREG32(GB_TILING_CONFIG, tiling_config);
1595 WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); 1618 WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
1596 WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); 1619 WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
1620 WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff);
1597 1621
1598 tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); 1622 tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
1599 WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); 1623 WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
@@ -1871,6 +1895,7 @@ void r600_cp_stop(struct radeon_device *rdev)
1871 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1895 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1872 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); 1896 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
1873 WREG32(SCRATCH_UMSK, 0); 1897 WREG32(SCRATCH_UMSK, 0);
1898 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1874} 1899}
1875 1900
1876int r600_init_microcode(struct radeon_device *rdev) 1901int r600_init_microcode(struct radeon_device *rdev)
@@ -2196,6 +2221,128 @@ void r600_cp_fini(struct radeon_device *rdev)
2196 radeon_scratch_free(rdev, ring->rptr_save_reg); 2221 radeon_scratch_free(rdev, ring->rptr_save_reg);
2197} 2222}
2198 2223
2224/*
2225 * DMA
2226 * Starting with R600, the GPU has an asynchronous
2227 * DMA engine. The programming model is very similar
2228 * to the 3D engine (ring buffer, IBs, etc.), but the
2229 * DMA controller has it's own packet format that is
2230 * different form the PM4 format used by the 3D engine.
2231 * It supports copying data, writing embedded data,
2232 * solid fills, and a number of other things. It also
2233 * has support for tiling/detiling of buffers.
2234 */
2235/**
2236 * r600_dma_stop - stop the async dma engine
2237 *
2238 * @rdev: radeon_device pointer
2239 *
2240 * Stop the async dma engine (r6xx-evergreen).
2241 */
2242void r600_dma_stop(struct radeon_device *rdev)
2243{
2244 u32 rb_cntl = RREG32(DMA_RB_CNTL);
2245
2246 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2247
2248 rb_cntl &= ~DMA_RB_ENABLE;
2249 WREG32(DMA_RB_CNTL, rb_cntl);
2250
2251 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
2252}
2253
2254/**
2255 * r600_dma_resume - setup and start the async dma engine
2256 *
2257 * @rdev: radeon_device pointer
2258 *
2259 * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
2260 * Returns 0 for success, error for failure.
2261 */
2262int r600_dma_resume(struct radeon_device *rdev)
2263{
2264 struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2265 u32 rb_cntl, dma_cntl;
2266 u32 rb_bufsz;
2267 int r;
2268
2269 /* Reset dma */
2270 if (rdev->family >= CHIP_RV770)
2271 WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
2272 else
2273 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
2274 RREG32(SRBM_SOFT_RESET);
2275 udelay(50);
2276 WREG32(SRBM_SOFT_RESET, 0);
2277
2278 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
2279 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
2280
2281 /* Set ring buffer size in dwords */
2282 rb_bufsz = drm_order(ring->ring_size / 4);
2283 rb_cntl = rb_bufsz << 1;
2284#ifdef __BIG_ENDIAN
2285 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
2286#endif
2287 WREG32(DMA_RB_CNTL, rb_cntl);
2288
2289 /* Initialize the ring buffer's read and write pointers */
2290 WREG32(DMA_RB_RPTR, 0);
2291 WREG32(DMA_RB_WPTR, 0);
2292
2293 /* set the wb address whether it's enabled or not */
2294 WREG32(DMA_RB_RPTR_ADDR_HI,
2295 upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
2296 WREG32(DMA_RB_RPTR_ADDR_LO,
2297 ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
2298
2299 if (rdev->wb.enabled)
2300 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
2301
2302 WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
2303
2304 /* enable DMA IBs */
2305 WREG32(DMA_IB_CNTL, DMA_IB_ENABLE);
2306
2307 dma_cntl = RREG32(DMA_CNTL);
2308 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
2309 WREG32(DMA_CNTL, dma_cntl);
2310
2311 if (rdev->family >= CHIP_RV770)
2312 WREG32(DMA_MODE, 1);
2313
2314 ring->wptr = 0;
2315 WREG32(DMA_RB_WPTR, ring->wptr << 2);
2316
2317 ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
2318
2319 WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
2320
2321 ring->ready = true;
2322
2323 r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
2324 if (r) {
2325 ring->ready = false;
2326 return r;
2327 }
2328
2329 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2330
2331 return 0;
2332}
2333
2334/**
2335 * r600_dma_fini - tear down the async dma engine
2336 *
2337 * @rdev: radeon_device pointer
2338 *
2339 * Stop the async dma engine and free the ring (r6xx-evergreen).
2340 */
2341void r600_dma_fini(struct radeon_device *rdev)
2342{
2343 r600_dma_stop(rdev);
2344 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2345}
2199 2346
2200/* 2347/*
2201 * GPU scratch registers helpers function. 2348 * GPU scratch registers helpers function.
@@ -2252,6 +2399,64 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2252 return r; 2399 return r;
2253} 2400}
2254 2401
2402/**
2403 * r600_dma_ring_test - simple async dma engine test
2404 *
2405 * @rdev: radeon_device pointer
2406 * @ring: radeon_ring structure holding ring information
2407 *
2408 * Test the DMA engine by writing using it to write an
2409 * value to memory. (r6xx-SI).
2410 * Returns 0 for success, error for failure.
2411 */
2412int r600_dma_ring_test(struct radeon_device *rdev,
2413 struct radeon_ring *ring)
2414{
2415 unsigned i;
2416 int r;
2417 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2418 u32 tmp;
2419
2420 if (!ptr) {
2421 DRM_ERROR("invalid vram scratch pointer\n");
2422 return -EINVAL;
2423 }
2424
2425 tmp = 0xCAFEDEAD;
2426 writel(tmp, ptr);
2427
2428 r = radeon_ring_lock(rdev, ring, 4);
2429 if (r) {
2430 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2431 return r;
2432 }
2433 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
2434 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2435 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
2436 radeon_ring_write(ring, 0xDEADBEEF);
2437 radeon_ring_unlock_commit(rdev, ring);
2438
2439 for (i = 0; i < rdev->usec_timeout; i++) {
2440 tmp = readl(ptr);
2441 if (tmp == 0xDEADBEEF)
2442 break;
2443 DRM_UDELAY(1);
2444 }
2445
2446 if (i < rdev->usec_timeout) {
2447 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2448 } else {
2449 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2450 ring->idx, tmp);
2451 r = -EINVAL;
2452 }
2453 return r;
2454}
2455
2456/*
2457 * CP fences/semaphores
2458 */
2459
2255void r600_fence_ring_emit(struct radeon_device *rdev, 2460void r600_fence_ring_emit(struct radeon_device *rdev,
2256 struct radeon_fence *fence) 2461 struct radeon_fence *fence)
2257{ 2462{
@@ -2315,6 +2520,58 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
2315 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); 2520 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
2316} 2521}
2317 2522
2523/*
2524 * DMA fences/semaphores
2525 */
2526
2527/**
2528 * r600_dma_fence_ring_emit - emit a fence on the DMA ring
2529 *
2530 * @rdev: radeon_device pointer
2531 * @fence: radeon fence object
2532 *
2533 * Add a DMA fence packet to the ring to write
2534 * the fence seq number and DMA trap packet to generate
2535 * an interrupt if needed (r6xx-r7xx).
2536 */
2537void r600_dma_fence_ring_emit(struct radeon_device *rdev,
2538 struct radeon_fence *fence)
2539{
2540 struct radeon_ring *ring = &rdev->ring[fence->ring];
2541 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2542 /* write the fence */
2543 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
2544 radeon_ring_write(ring, addr & 0xfffffffc);
2545 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
2546 radeon_ring_write(ring, fence->seq);
2547 /* generate an interrupt */
2548 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
2549}
2550
2551/**
2552 * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
2553 *
2554 * @rdev: radeon_device pointer
2555 * @ring: radeon_ring structure holding ring information
2556 * @semaphore: radeon semaphore object
2557 * @emit_wait: wait or signal semaphore
2558 *
2559 * Add a DMA semaphore packet to the ring wait on or signal
2560 * other rings (r6xx-SI).
2561 */
2562void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
2563 struct radeon_ring *ring,
2564 struct radeon_semaphore *semaphore,
2565 bool emit_wait)
2566{
2567 u64 addr = semaphore->gpu_addr;
2568 u32 s = emit_wait ? 0 : 1;
2569
2570 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
2571 radeon_ring_write(ring, addr & 0xfffffffc);
2572 radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
2573}
2574
2318int r600_copy_blit(struct radeon_device *rdev, 2575int r600_copy_blit(struct radeon_device *rdev,
2319 uint64_t src_offset, 2576 uint64_t src_offset,
2320 uint64_t dst_offset, 2577 uint64_t dst_offset,
@@ -2334,6 +2591,80 @@ int r600_copy_blit(struct radeon_device *rdev,
2334 return 0; 2591 return 0;
2335} 2592}
2336 2593
2594/**
2595 * r600_copy_dma - copy pages using the DMA engine
2596 *
2597 * @rdev: radeon_device pointer
2598 * @src_offset: src GPU address
2599 * @dst_offset: dst GPU address
2600 * @num_gpu_pages: number of GPU pages to xfer
2601 * @fence: radeon fence object
2602 *
2603 * Copy GPU paging using the DMA engine (r6xx-r7xx).
2604 * Used by the radeon ttm implementation to move pages if
2605 * registered as the asic copy callback.
2606 */
2607int r600_copy_dma(struct radeon_device *rdev,
2608 uint64_t src_offset, uint64_t dst_offset,
2609 unsigned num_gpu_pages,
2610 struct radeon_fence **fence)
2611{
2612 struct radeon_semaphore *sem = NULL;
2613 int ring_index = rdev->asic->copy.dma_ring_index;
2614 struct radeon_ring *ring = &rdev->ring[ring_index];
2615 u32 size_in_dw, cur_size_in_dw;
2616 int i, num_loops;
2617 int r = 0;
2618
2619 r = radeon_semaphore_create(rdev, &sem);
2620 if (r) {
2621 DRM_ERROR("radeon: moving bo (%d).\n", r);
2622 return r;
2623 }
2624
2625 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
2626 num_loops = DIV_ROUND_UP(size_in_dw, 0xffff);
2627 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
2628 if (r) {
2629 DRM_ERROR("radeon: moving bo (%d).\n", r);
2630 radeon_semaphore_free(rdev, &sem, NULL);
2631 return r;
2632 }
2633
2634 if (radeon_fence_need_sync(*fence, ring->idx)) {
2635 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2636 ring->idx);
2637 radeon_fence_note_sync(*fence, ring->idx);
2638 } else {
2639 radeon_semaphore_free(rdev, &sem, NULL);
2640 }
2641
2642 for (i = 0; i < num_loops; i++) {
2643 cur_size_in_dw = size_in_dw;
2644 if (cur_size_in_dw > 0xFFFF)
2645 cur_size_in_dw = 0xFFFF;
2646 size_in_dw -= cur_size_in_dw;
2647 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
2648 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2649 radeon_ring_write(ring, src_offset & 0xfffffffc);
2650 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
2651 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
2652 src_offset += cur_size_in_dw * 4;
2653 dst_offset += cur_size_in_dw * 4;
2654 }
2655
2656 r = radeon_fence_emit(rdev, fence, ring->idx);
2657 if (r) {
2658 radeon_ring_unlock_undo(rdev, ring);
2659 return r;
2660 }
2661
2662 radeon_ring_unlock_commit(rdev, ring);
2663 radeon_semaphore_free(rdev, &sem, *fence);
2664
2665 return r;
2666}
2667
2337int r600_set_surface_reg(struct radeon_device *rdev, int reg, 2668int r600_set_surface_reg(struct radeon_device *rdev, int reg,
2338 uint32_t tiling_flags, uint32_t pitch, 2669 uint32_t tiling_flags, uint32_t pitch,
2339 uint32_t offset, uint32_t obj_size) 2670 uint32_t offset, uint32_t obj_size)
@@ -2349,7 +2680,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
2349 2680
2350static int r600_startup(struct radeon_device *rdev) 2681static int r600_startup(struct radeon_device *rdev)
2351{ 2682{
2352 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2683 struct radeon_ring *ring;
2353 int r; 2684 int r;
2354 2685
2355 /* enable pcie gen2 link */ 2686 /* enable pcie gen2 link */
@@ -2394,6 +2725,12 @@ static int r600_startup(struct radeon_device *rdev)
2394 return r; 2725 return r;
2395 } 2726 }
2396 2727
2728 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2729 if (r) {
2730 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2731 return r;
2732 }
2733
2397 /* Enable IRQ */ 2734 /* Enable IRQ */
2398 r = r600_irq_init(rdev); 2735 r = r600_irq_init(rdev);
2399 if (r) { 2736 if (r) {
@@ -2403,12 +2740,20 @@ static int r600_startup(struct radeon_device *rdev)
2403 } 2740 }
2404 r600_irq_set(rdev); 2741 r600_irq_set(rdev);
2405 2742
2743 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2406 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 2744 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2407 R600_CP_RB_RPTR, R600_CP_RB_WPTR, 2745 R600_CP_RB_RPTR, R600_CP_RB_WPTR,
2408 0, 0xfffff, RADEON_CP_PACKET2); 2746 0, 0xfffff, RADEON_CP_PACKET2);
2747 if (r)
2748 return r;
2409 2749
2750 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2751 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2752 DMA_RB_RPTR, DMA_RB_WPTR,
2753 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2410 if (r) 2754 if (r)
2411 return r; 2755 return r;
2756
2412 r = r600_cp_load_microcode(rdev); 2757 r = r600_cp_load_microcode(rdev);
2413 if (r) 2758 if (r)
2414 return r; 2759 return r;
@@ -2416,6 +2761,10 @@ static int r600_startup(struct radeon_device *rdev)
2416 if (r) 2761 if (r)
2417 return r; 2762 return r;
2418 2763
2764 r = r600_dma_resume(rdev);
2765 if (r)
2766 return r;
2767
2419 r = radeon_ib_pool_init(rdev); 2768 r = radeon_ib_pool_init(rdev);
2420 if (r) { 2769 if (r) {
2421 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 2770 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2471,7 +2820,7 @@ int r600_suspend(struct radeon_device *rdev)
2471{ 2820{
2472 r600_audio_fini(rdev); 2821 r600_audio_fini(rdev);
2473 r600_cp_stop(rdev); 2822 r600_cp_stop(rdev);
2474 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 2823 r600_dma_stop(rdev);
2475 r600_irq_suspend(rdev); 2824 r600_irq_suspend(rdev);
2476 radeon_wb_disable(rdev); 2825 radeon_wb_disable(rdev);
2477 r600_pcie_gart_disable(rdev); 2826 r600_pcie_gart_disable(rdev);
@@ -2544,6 +2893,9 @@ int r600_init(struct radeon_device *rdev)
2544 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 2893 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
2545 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 2894 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
2546 2895
2896 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
2897 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
2898
2547 rdev->ih.ring_obj = NULL; 2899 rdev->ih.ring_obj = NULL;
2548 r600_ih_ring_init(rdev, 64 * 1024); 2900 r600_ih_ring_init(rdev, 64 * 1024);
2549 2901
@@ -2556,6 +2908,7 @@ int r600_init(struct radeon_device *rdev)
2556 if (r) { 2908 if (r) {
2557 dev_err(rdev->dev, "disabling GPU acceleration\n"); 2909 dev_err(rdev->dev, "disabling GPU acceleration\n");
2558 r600_cp_fini(rdev); 2910 r600_cp_fini(rdev);
2911 r600_dma_fini(rdev);
2559 r600_irq_fini(rdev); 2912 r600_irq_fini(rdev);
2560 radeon_wb_fini(rdev); 2913 radeon_wb_fini(rdev);
2561 radeon_ib_pool_fini(rdev); 2914 radeon_ib_pool_fini(rdev);
@@ -2572,6 +2925,7 @@ void r600_fini(struct radeon_device *rdev)
2572 r600_audio_fini(rdev); 2925 r600_audio_fini(rdev);
2573 r600_blit_fini(rdev); 2926 r600_blit_fini(rdev);
2574 r600_cp_fini(rdev); 2927 r600_cp_fini(rdev);
2928 r600_dma_fini(rdev);
2575 r600_irq_fini(rdev); 2929 r600_irq_fini(rdev);
2576 radeon_wb_fini(rdev); 2930 radeon_wb_fini(rdev);
2577 radeon_ib_pool_fini(rdev); 2931 radeon_ib_pool_fini(rdev);
@@ -2674,6 +3028,104 @@ free_scratch:
2674 return r; 3028 return r;
2675} 3029}
2676 3030
3031/**
3032 * r600_dma_ib_test - test an IB on the DMA engine
3033 *
3034 * @rdev: radeon_device pointer
3035 * @ring: radeon_ring structure holding ring information
3036 *
3037 * Test a simple IB in the DMA ring (r6xx-SI).
3038 * Returns 0 on success, error on failure.
3039 */
3040int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3041{
3042 struct radeon_ib ib;
3043 unsigned i;
3044 int r;
3045 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3046 u32 tmp = 0;
3047
3048 if (!ptr) {
3049 DRM_ERROR("invalid vram scratch pointer\n");
3050 return -EINVAL;
3051 }
3052
3053 tmp = 0xCAFEDEAD;
3054 writel(tmp, ptr);
3055
3056 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3057 if (r) {
3058 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3059 return r;
3060 }
3061
3062 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
3063 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3064 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
3065 ib.ptr[3] = 0xDEADBEEF;
3066 ib.length_dw = 4;
3067
3068 r = radeon_ib_schedule(rdev, &ib, NULL);
3069 if (r) {
3070 radeon_ib_free(rdev, &ib);
3071 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3072 return r;
3073 }
3074 r = radeon_fence_wait(ib.fence, false);
3075 if (r) {
3076 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3077 return r;
3078 }
3079 for (i = 0; i < rdev->usec_timeout; i++) {
3080 tmp = readl(ptr);
3081 if (tmp == 0xDEADBEEF)
3082 break;
3083 DRM_UDELAY(1);
3084 }
3085 if (i < rdev->usec_timeout) {
3086 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3087 } else {
3088 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3089 r = -EINVAL;
3090 }
3091 radeon_ib_free(rdev, &ib);
3092 return r;
3093}
3094
3095/**
3096 * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
3097 *
3098 * @rdev: radeon_device pointer
3099 * @ib: IB object to schedule
3100 *
3101 * Schedule an IB in the DMA ring (r6xx-r7xx).
3102 */
3103void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3104{
3105 struct radeon_ring *ring = &rdev->ring[ib->ring];
3106
3107 if (rdev->wb.enabled) {
3108 u32 next_rptr = ring->wptr + 4;
3109 while ((next_rptr & 7) != 5)
3110 next_rptr++;
3111 next_rptr += 3;
3112 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
3113 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3114 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
3115 radeon_ring_write(ring, next_rptr);
3116 }
3117
3118 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
3119 * Pad as necessary with NOPs.
3120 */
3121 while ((ring->wptr & 7) != 5)
3122 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
3123 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
3124 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
3125 radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
3126
3127}
3128
2677/* 3129/*
2678 * Interrupts 3130 * Interrupts
2679 * 3131 *
@@ -2865,6 +3317,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev)
2865 u32 tmp; 3317 u32 tmp;
2866 3318
2867 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 3319 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3320 tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
3321 WREG32(DMA_CNTL, tmp);
2868 WREG32(GRBM_INT_CNTL, 0); 3322 WREG32(GRBM_INT_CNTL, 0);
2869 WREG32(DxMODE_INT_MASK, 0); 3323 WREG32(DxMODE_INT_MASK, 0);
2870 WREG32(D1GRPH_INTERRUPT_CONTROL, 0); 3324 WREG32(D1GRPH_INTERRUPT_CONTROL, 0);
@@ -3006,6 +3460,7 @@ int r600_irq_set(struct radeon_device *rdev)
3006 u32 grbm_int_cntl = 0; 3460 u32 grbm_int_cntl = 0;
3007 u32 hdmi0, hdmi1; 3461 u32 hdmi0, hdmi1;
3008 u32 d1grph = 0, d2grph = 0; 3462 u32 d1grph = 0, d2grph = 0;
3463 u32 dma_cntl;
3009 3464
3010 if (!rdev->irq.installed) { 3465 if (!rdev->irq.installed) {
3011 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 3466 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3040,12 +3495,19 @@ int r600_irq_set(struct radeon_device *rdev)
3040 hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; 3495 hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
3041 hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; 3496 hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
3042 } 3497 }
3498 dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
3043 3499
3044 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 3500 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3045 DRM_DEBUG("r600_irq_set: sw int\n"); 3501 DRM_DEBUG("r600_irq_set: sw int\n");
3046 cp_int_cntl |= RB_INT_ENABLE; 3502 cp_int_cntl |= RB_INT_ENABLE;
3047 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 3503 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3048 } 3504 }
3505
3506 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3507 DRM_DEBUG("r600_irq_set: sw int dma\n");
3508 dma_cntl |= TRAP_ENABLE;
3509 }
3510
3049 if (rdev->irq.crtc_vblank_int[0] || 3511 if (rdev->irq.crtc_vblank_int[0] ||
3050 atomic_read(&rdev->irq.pflip[0])) { 3512 atomic_read(&rdev->irq.pflip[0])) {
3051 DRM_DEBUG("r600_irq_set: vblank 0\n"); 3513 DRM_DEBUG("r600_irq_set: vblank 0\n");
@@ -3090,6 +3552,7 @@ int r600_irq_set(struct radeon_device *rdev)
3090 } 3552 }
3091 3553
3092 WREG32(CP_INT_CNTL, cp_int_cntl); 3554 WREG32(CP_INT_CNTL, cp_int_cntl);
3555 WREG32(DMA_CNTL, dma_cntl);
3093 WREG32(DxMODE_INT_MASK, mode_int); 3556 WREG32(DxMODE_INT_MASK, mode_int);
3094 WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); 3557 WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph);
3095 WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); 3558 WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph);
@@ -3469,6 +3932,10 @@ restart_ih:
3469 DRM_DEBUG("IH: CP EOP\n"); 3932 DRM_DEBUG("IH: CP EOP\n");
3470 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 3933 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3471 break; 3934 break;
3935 case 224: /* DMA trap event */
3936 DRM_DEBUG("IH: DMA trap\n");
3937 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3938 break;
3472 case 233: /* GUI IDLE */ 3939 case 233: /* GUI IDLE */
3473 DRM_DEBUG("IH: GUI idle\n"); 3940 DRM_DEBUG("IH: GUI idle\n");
3474 break; 3941 break;
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index fa6f37099ba9..a596c554a3a0 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -590,9 +590,59 @@
590#define WAIT_2D_IDLECLEAN_bit (1 << 16) 590#define WAIT_2D_IDLECLEAN_bit (1 << 16)
591#define WAIT_3D_IDLECLEAN_bit (1 << 17) 591#define WAIT_3D_IDLECLEAN_bit (1 << 17)
592 592
593/* async DMA */
594#define DMA_TILING_CONFIG 0x3ec4
595#define DMA_CONFIG 0x3e4c
596
597#define DMA_RB_CNTL 0xd000
598# define DMA_RB_ENABLE (1 << 0)
599# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
600# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
601# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
602# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
603# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
604#define DMA_RB_BASE 0xd004
605#define DMA_RB_RPTR 0xd008
606#define DMA_RB_WPTR 0xd00c
607
608#define DMA_RB_RPTR_ADDR_HI 0xd01c
609#define DMA_RB_RPTR_ADDR_LO 0xd020
610
611#define DMA_IB_CNTL 0xd024
612# define DMA_IB_ENABLE (1 << 0)
613# define DMA_IB_SWAP_ENABLE (1 << 4)
614#define DMA_IB_RPTR 0xd028
615#define DMA_CNTL 0xd02c
616# define TRAP_ENABLE (1 << 0)
617# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
618# define SEM_WAIT_INT_ENABLE (1 << 2)
619# define DATA_SWAP_ENABLE (1 << 3)
620# define FENCE_SWAP_ENABLE (1 << 4)
621# define CTXEMPTY_INT_ENABLE (1 << 28)
622#define DMA_STATUS_REG 0xd034
623# define DMA_IDLE (1 << 0)
624#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
625#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
626#define DMA_MODE 0xd0bc
627
628/* async DMA packets */
629#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
630 (((t) & 0x1) << 23) | \
631 (((s) & 0x1) << 22) | \
632 (((n) & 0xFFFF) << 0))
633/* async DMA Packet types */
634#define DMA_PACKET_WRITE 0x2
635#define DMA_PACKET_COPY 0x3
636#define DMA_PACKET_INDIRECT_BUFFER 0x4
637#define DMA_PACKET_SEMAPHORE 0x5
638#define DMA_PACKET_FENCE 0x6
639#define DMA_PACKET_TRAP 0x7
640#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
641#define DMA_PACKET_NOP 0xf
642
593#define IH_RB_CNTL 0x3e00 643#define IH_RB_CNTL 0x3e00
594# define IH_RB_ENABLE (1 << 0) 644# define IH_RB_ENABLE (1 << 0)
595# define IH_IB_SIZE(x) ((x) << 1) /* log2 */ 645# define IH_RB_SIZE(x) ((x) << 1) /* log2 */
596# define IH_RB_FULL_DRAIN_ENABLE (1 << 6) 646# define IH_RB_FULL_DRAIN_ENABLE (1 << 6)
597# define IH_WPTR_WRITEBACK_ENABLE (1 << 8) 647# define IH_WPTR_WRITEBACK_ENABLE (1 << 8)
598# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */ 648# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */
@@ -637,7 +687,9 @@
637#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20 687#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20
638 688
639#define SRBM_SOFT_RESET 0xe60 689#define SRBM_SOFT_RESET 0xe60
690# define SOFT_RESET_DMA (1 << 12)
640# define SOFT_RESET_RLC (1 << 13) 691# define SOFT_RESET_RLC (1 << 13)
692# define RV770_SOFT_RESET_DMA (1 << 20)
641 693
642#define CP_INT_CNTL 0xc124 694#define CP_INT_CNTL 0xc124
643# define CNTX_BUSY_INT_ENABLE (1 << 19) 695# define CNTX_BUSY_INT_ENABLE (1 << 19)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8c42d54c2e26..461bf53709f5 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
109#define RADEON_BIOS_NUM_SCRATCH 8 109#define RADEON_BIOS_NUM_SCRATCH 8
110 110
111/* max number of rings */ 111/* max number of rings */
112#define RADEON_NUM_RINGS 3 112#define RADEON_NUM_RINGS 4
113 113
114/* fence seq are set to this number when signaled */ 114/* fence seq are set to this number when signaled */
115#define RADEON_FENCE_SIGNALED_SEQ 0LL 115#define RADEON_FENCE_SIGNALED_SEQ 0LL
@@ -122,6 +122,9 @@ extern int radeon_lockup_timeout;
122#define CAYMAN_RING_TYPE_CP1_INDEX 1 122#define CAYMAN_RING_TYPE_CP1_INDEX 1
123#define CAYMAN_RING_TYPE_CP2_INDEX 2 123#define CAYMAN_RING_TYPE_CP2_INDEX 2
124 124
125/* R600+ has an async dma ring */
126#define R600_RING_TYPE_DMA_INDEX 3
127
125/* hardcode those limit for now */ 128/* hardcode those limit for now */
126#define RADEON_VA_IB_OFFSET (1 << 20) 129#define RADEON_VA_IB_OFFSET (1 << 20)
127#define RADEON_VA_RESERVED_SIZE (8 << 20) 130#define RADEON_VA_RESERVED_SIZE (8 << 20)
@@ -787,6 +790,11 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne
787void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); 790void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
788 791
789 792
793/* r600 async dma */
794void r600_dma_stop(struct radeon_device *rdev);
795int r600_dma_resume(struct radeon_device *rdev);
796void r600_dma_fini(struct radeon_device *rdev);
797
790/* 798/*
791 * CS. 799 * CS.
792 */ 800 */
@@ -883,6 +891,7 @@ struct radeon_wb {
883#define RADEON_WB_CP_RPTR_OFFSET 1024 891#define RADEON_WB_CP_RPTR_OFFSET 1024
884#define RADEON_WB_CP1_RPTR_OFFSET 1280 892#define RADEON_WB_CP1_RPTR_OFFSET 1280
885#define RADEON_WB_CP2_RPTR_OFFSET 1536 893#define RADEON_WB_CP2_RPTR_OFFSET 1536
894#define R600_WB_DMA_RPTR_OFFSET 1792
886#define R600_WB_IH_WPTR_OFFSET 2048 895#define R600_WB_IH_WPTR_OFFSET 2048
887#define R600_WB_EVENT_OFFSET 3072 896#define R600_WB_EVENT_OFFSET 3072
888 897
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 654520b95ab7..3cf9b29fb53f 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -947,6 +947,15 @@ static struct radeon_asic r600_asic = {
947 .ring_test = &r600_ring_test, 947 .ring_test = &r600_ring_test,
948 .ib_test = &r600_ib_test, 948 .ib_test = &r600_ib_test,
949 .is_lockup = &r600_gpu_is_lockup, 949 .is_lockup = &r600_gpu_is_lockup,
950 },
951 [R600_RING_TYPE_DMA_INDEX] = {
952 .ib_execute = &r600_dma_ring_ib_execute,
953 .emit_fence = &r600_dma_fence_ring_emit,
954 .emit_semaphore = &r600_dma_semaphore_ring_emit,
955 .cs_parse = NULL,
956 .ring_test = &r600_dma_ring_test,
957 .ib_test = &r600_dma_ib_test,
958 .is_lockup = &r600_dma_is_lockup,
950 } 959 }
951 }, 960 },
952 .irq = { 961 .irq = {
@@ -963,8 +972,8 @@ static struct radeon_asic r600_asic = {
963 .copy = { 972 .copy = {
964 .blit = &r600_copy_blit, 973 .blit = &r600_copy_blit,
965 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 974 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
966 .dma = NULL, 975 .dma = &r600_copy_dma,
967 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 976 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
968 .copy = &r600_copy_blit, 977 .copy = &r600_copy_blit,
969 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 978 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
970 }, 979 },
@@ -1022,6 +1031,15 @@ static struct radeon_asic rs780_asic = {
1022 .ring_test = &r600_ring_test, 1031 .ring_test = &r600_ring_test,
1023 .ib_test = &r600_ib_test, 1032 .ib_test = &r600_ib_test,
1024 .is_lockup = &r600_gpu_is_lockup, 1033 .is_lockup = &r600_gpu_is_lockup,
1034 },
1035 [R600_RING_TYPE_DMA_INDEX] = {
1036 .ib_execute = &r600_dma_ring_ib_execute,
1037 .emit_fence = &r600_dma_fence_ring_emit,
1038 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1039 .cs_parse = NULL,
1040 .ring_test = &r600_dma_ring_test,
1041 .ib_test = &r600_dma_ib_test,
1042 .is_lockup = &r600_dma_is_lockup,
1025 } 1043 }
1026 }, 1044 },
1027 .irq = { 1045 .irq = {
@@ -1038,8 +1056,8 @@ static struct radeon_asic rs780_asic = {
1038 .copy = { 1056 .copy = {
1039 .blit = &r600_copy_blit, 1057 .blit = &r600_copy_blit,
1040 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1058 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1041 .dma = NULL, 1059 .dma = &r600_copy_dma,
1042 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1060 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1043 .copy = &r600_copy_blit, 1061 .copy = &r600_copy_blit,
1044 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1062 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1045 }, 1063 },
@@ -1097,6 +1115,15 @@ static struct radeon_asic rv770_asic = {
1097 .ring_test = &r600_ring_test, 1115 .ring_test = &r600_ring_test,
1098 .ib_test = &r600_ib_test, 1116 .ib_test = &r600_ib_test,
1099 .is_lockup = &r600_gpu_is_lockup, 1117 .is_lockup = &r600_gpu_is_lockup,
1118 },
1119 [R600_RING_TYPE_DMA_INDEX] = {
1120 .ib_execute = &r600_dma_ring_ib_execute,
1121 .emit_fence = &r600_dma_fence_ring_emit,
1122 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1123 .cs_parse = NULL,
1124 .ring_test = &r600_dma_ring_test,
1125 .ib_test = &r600_dma_ib_test,
1126 .is_lockup = &r600_dma_is_lockup,
1100 } 1127 }
1101 }, 1128 },
1102 .irq = { 1129 .irq = {
@@ -1113,8 +1140,8 @@ static struct radeon_asic rv770_asic = {
1113 .copy = { 1140 .copy = {
1114 .blit = &r600_copy_blit, 1141 .blit = &r600_copy_blit,
1115 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1142 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1116 .dma = NULL, 1143 .dma = &r600_copy_dma,
1117 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1144 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1118 .copy = &r600_copy_blit, 1145 .copy = &r600_copy_blit,
1119 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1146 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1120 }, 1147 },
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5e3a0e5c6be1..70a5b1f0e43e 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -309,6 +309,14 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
309 struct radeon_ring *cp, 309 struct radeon_ring *cp,
310 struct radeon_semaphore *semaphore, 310 struct radeon_semaphore *semaphore,
311 bool emit_wait); 311 bool emit_wait);
312void r600_dma_fence_ring_emit(struct radeon_device *rdev,
313 struct radeon_fence *fence);
314void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
315 struct radeon_ring *ring,
316 struct radeon_semaphore *semaphore,
317 bool emit_wait);
318void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
319bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
312bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); 320bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
313int r600_asic_reset(struct radeon_device *rdev); 321int r600_asic_reset(struct radeon_device *rdev);
314int r600_set_surface_reg(struct radeon_device *rdev, int reg, 322int r600_set_surface_reg(struct radeon_device *rdev, int reg,
@@ -316,11 +324,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg,
316 uint32_t offset, uint32_t obj_size); 324 uint32_t offset, uint32_t obj_size);
317void r600_clear_surface_reg(struct radeon_device *rdev, int reg); 325void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
318int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); 326int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
327int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
319void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); 328void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
320int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); 329int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
330int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
321int r600_copy_blit(struct radeon_device *rdev, 331int r600_copy_blit(struct radeon_device *rdev,
322 uint64_t src_offset, uint64_t dst_offset, 332 uint64_t src_offset, uint64_t dst_offset,
323 unsigned num_gpu_pages, struct radeon_fence **fence); 333 unsigned num_gpu_pages, struct radeon_fence **fence);
334int r600_copy_dma(struct radeon_device *rdev,
335 uint64_t src_offset, uint64_t dst_offset,
336 unsigned num_gpu_pages, struct radeon_fence **fence);
324void r600_hpd_init(struct radeon_device *rdev); 337void r600_hpd_init(struct radeon_device *rdev);
325void r600_hpd_fini(struct radeon_device *rdev); 338void r600_hpd_fini(struct radeon_device *rdev);
326bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); 339bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 79814a08c8e5..87c979c4f721 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev)
316 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 316 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
317 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 317 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
318 WREG32(SCRATCH_UMSK, 0); 318 WREG32(SCRATCH_UMSK, 0);
319 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
319} 320}
320 321
321static int rv770_cp_load_microcode(struct radeon_device *rdev) 322static int rv770_cp_load_microcode(struct radeon_device *rdev)
@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev)
583 WREG32(GB_TILING_CONFIG, gb_tiling_config); 584 WREG32(GB_TILING_CONFIG, gb_tiling_config);
584 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 585 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
585 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 586 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
587 WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
588 WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
586 589
587 WREG32(CGTS_SYS_TCC_DISABLE, 0); 590 WREG32(CGTS_SYS_TCC_DISABLE, 0);
588 WREG32(CGTS_TCC_DISABLE, 0); 591 WREG32(CGTS_TCC_DISABLE, 0);
@@ -886,7 +889,7 @@ static int rv770_mc_init(struct radeon_device *rdev)
886 889
887static int rv770_startup(struct radeon_device *rdev) 890static int rv770_startup(struct radeon_device *rdev)
888{ 891{
889 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 892 struct radeon_ring *ring;
890 int r; 893 int r;
891 894
892 /* enable pcie gen2 link */ 895 /* enable pcie gen2 link */
@@ -932,6 +935,12 @@ static int rv770_startup(struct radeon_device *rdev)
932 return r; 935 return r;
933 } 936 }
934 937
938 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
939 if (r) {
940 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
941 return r;
942 }
943
935 /* Enable IRQ */ 944 /* Enable IRQ */
936 r = r600_irq_init(rdev); 945 r = r600_irq_init(rdev);
937 if (r) { 946 if (r) {
@@ -941,11 +950,20 @@ static int rv770_startup(struct radeon_device *rdev)
941 } 950 }
942 r600_irq_set(rdev); 951 r600_irq_set(rdev);
943 952
953 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
944 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 954 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
945 R600_CP_RB_RPTR, R600_CP_RB_WPTR, 955 R600_CP_RB_RPTR, R600_CP_RB_WPTR,
946 0, 0xfffff, RADEON_CP_PACKET2); 956 0, 0xfffff, RADEON_CP_PACKET2);
947 if (r) 957 if (r)
948 return r; 958 return r;
959
960 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
961 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
962 DMA_RB_RPTR, DMA_RB_WPTR,
963 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
964 if (r)
965 return r;
966
949 r = rv770_cp_load_microcode(rdev); 967 r = rv770_cp_load_microcode(rdev);
950 if (r) 968 if (r)
951 return r; 969 return r;
@@ -953,6 +971,10 @@ static int rv770_startup(struct radeon_device *rdev)
953 if (r) 971 if (r)
954 return r; 972 return r;
955 973
974 r = r600_dma_resume(rdev);
975 if (r)
976 return r;
977
956 r = radeon_ib_pool_init(rdev); 978 r = radeon_ib_pool_init(rdev);
957 if (r) { 979 if (r) {
958 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 980 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -995,7 +1017,7 @@ int rv770_suspend(struct radeon_device *rdev)
995{ 1017{
996 r600_audio_fini(rdev); 1018 r600_audio_fini(rdev);
997 r700_cp_stop(rdev); 1019 r700_cp_stop(rdev);
998 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1020 r600_dma_stop(rdev);
999 r600_irq_suspend(rdev); 1021 r600_irq_suspend(rdev);
1000 radeon_wb_disable(rdev); 1022 radeon_wb_disable(rdev);
1001 rv770_pcie_gart_disable(rdev); 1023 rv770_pcie_gart_disable(rdev);
@@ -1066,6 +1088,9 @@ int rv770_init(struct radeon_device *rdev)
1066 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 1088 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
1067 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 1089 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
1068 1090
1091 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
1092 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
1093
1069 rdev->ih.ring_obj = NULL; 1094 rdev->ih.ring_obj = NULL;
1070 r600_ih_ring_init(rdev, 64 * 1024); 1095 r600_ih_ring_init(rdev, 64 * 1024);
1071 1096
@@ -1078,6 +1103,7 @@ int rv770_init(struct radeon_device *rdev)
1078 if (r) { 1103 if (r) {
1079 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1104 dev_err(rdev->dev, "disabling GPU acceleration\n");
1080 r700_cp_fini(rdev); 1105 r700_cp_fini(rdev);
1106 r600_dma_fini(rdev);
1081 r600_irq_fini(rdev); 1107 r600_irq_fini(rdev);
1082 radeon_wb_fini(rdev); 1108 radeon_wb_fini(rdev);
1083 radeon_ib_pool_fini(rdev); 1109 radeon_ib_pool_fini(rdev);
@@ -1093,6 +1119,7 @@ void rv770_fini(struct radeon_device *rdev)
1093{ 1119{
1094 r600_blit_fini(rdev); 1120 r600_blit_fini(rdev);
1095 r700_cp_fini(rdev); 1121 r700_cp_fini(rdev);
1122 r600_dma_fini(rdev);
1096 r600_irq_fini(rdev); 1123 r600_irq_fini(rdev);
1097 radeon_wb_fini(rdev); 1124 radeon_wb_fini(rdev);
1098 radeon_ib_pool_fini(rdev); 1125 radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index e2d9dc8e751e..20e29d23d348 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -109,6 +109,9 @@
109#define PIPE_TILING__SHIFT 1 109#define PIPE_TILING__SHIFT 1
110#define PIPE_TILING__MASK 0x0000000e 110#define PIPE_TILING__MASK 0x0000000e
111 111
112#define DMA_TILING_CONFIG 0x3ec8
113#define DMA_TILING_CONFIG2 0xd0b8
114
112#define GC_USER_SHADER_PIPE_CONFIG 0x8954 115#define GC_USER_SHADER_PIPE_CONFIG 0x8954
113#define INACTIVE_QD_PIPES(x) ((x) << 8) 116#define INACTIVE_QD_PIPES(x) ((x) << 8)
114#define INACTIVE_QD_PIPES_MASK 0x0000FF00 117#define INACTIVE_QD_PIPES_MASK 0x0000FF00
@@ -358,6 +361,26 @@
358 361
359#define WAIT_UNTIL 0x8040 362#define WAIT_UNTIL 0x8040
360 363
364/* async DMA */
365#define DMA_RB_RPTR 0xd008
366#define DMA_RB_WPTR 0xd00c
367
368/* async DMA packets */
369#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
370 (((t) & 0x1) << 23) | \
371 (((s) & 0x1) << 22) | \
372 (((n) & 0xFFFF) << 0))
373/* async DMA Packet types */
374#define DMA_PACKET_WRITE 0x2
375#define DMA_PACKET_COPY 0x3
376#define DMA_PACKET_INDIRECT_BUFFER 0x4
377#define DMA_PACKET_SEMAPHORE 0x5
378#define DMA_PACKET_FENCE 0x6
379#define DMA_PACKET_TRAP 0x7
380#define DMA_PACKET_CONSTANT_FILL 0xd
381#define DMA_PACKET_NOP 0xf
382
383
361#define SRBM_STATUS 0x0E50 384#define SRBM_STATUS 0x0E50
362 385
363/* DCE 3.2 HDMI */ 386/* DCE 3.2 HDMI */