diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2012-09-27 15:08:35 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2012-12-10 16:53:23 -0500 |
commit | 4d75658bffea78f0c6f82fd46df1ec983ccacdf0 (patch) | |
tree | a6c111fe8fb7ebb76af46924ec0bc5c8f7cc961b /drivers/gpu/drm/radeon/r600.c | |
parent | 71bfe916ebe6d026cd3d0e41c398574fc1228e03 (diff) |
drm/radeon/kms: Add initial support for async DMA on r6xx/r7xx
Uses the new multi-ring infrastucture. 6xx/7xx has a single
async DMA ring.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r600.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r600.c | 471 |
1 files changed, 469 insertions, 2 deletions
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cda280d157da..ee06c8781cd4 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c | |||
@@ -1370,6 +1370,29 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
1370 | return radeon_ring_test_lockup(rdev, ring); | 1370 | return radeon_ring_test_lockup(rdev, ring); |
1371 | } | 1371 | } |
1372 | 1372 | ||
1373 | /** | ||
1374 | * r600_dma_is_lockup - Check if the DMA engine is locked up | ||
1375 | * | ||
1376 | * @rdev: radeon_device pointer | ||
1377 | * @ring: radeon_ring structure holding ring information | ||
1378 | * | ||
1379 | * Check if the async DMA engine is locked up (r6xx-evergreen). | ||
1380 | * Returns true if the engine appears to be locked up, false if not. | ||
1381 | */ | ||
1382 | bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | ||
1383 | { | ||
1384 | u32 dma_status_reg; | ||
1385 | |||
1386 | dma_status_reg = RREG32(DMA_STATUS_REG); | ||
1387 | if (dma_status_reg & DMA_IDLE) { | ||
1388 | radeon_ring_lockup_update(ring); | ||
1389 | return false; | ||
1390 | } | ||
1391 | /* force ring activities */ | ||
1392 | radeon_ring_force_activity(rdev, ring); | ||
1393 | return radeon_ring_test_lockup(rdev, ring); | ||
1394 | } | ||
1395 | |||
1373 | int r600_asic_reset(struct radeon_device *rdev) | 1396 | int r600_asic_reset(struct radeon_device *rdev) |
1374 | { | 1397 | { |
1375 | return r600_gpu_soft_reset(rdev); | 1398 | return r600_gpu_soft_reset(rdev); |
@@ -1594,6 +1617,7 @@ static void r600_gpu_init(struct radeon_device *rdev) | |||
1594 | WREG32(GB_TILING_CONFIG, tiling_config); | 1617 | WREG32(GB_TILING_CONFIG, tiling_config); |
1595 | WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); | 1618 | WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); |
1596 | WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); | 1619 | WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); |
1620 | WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff); | ||
1597 | 1621 | ||
1598 | tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); | 1622 | tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); |
1599 | WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); | 1623 | WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); |
@@ -1871,6 +1895,7 @@ void r600_cp_stop(struct radeon_device *rdev) | |||
1871 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | 1895 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
1872 | WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); | 1896 | WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); |
1873 | WREG32(SCRATCH_UMSK, 0); | 1897 | WREG32(SCRATCH_UMSK, 0); |
1898 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | ||
1874 | } | 1899 | } |
1875 | 1900 | ||
1876 | int r600_init_microcode(struct radeon_device *rdev) | 1901 | int r600_init_microcode(struct radeon_device *rdev) |
@@ -2196,6 +2221,128 @@ void r600_cp_fini(struct radeon_device *rdev) | |||
2196 | radeon_scratch_free(rdev, ring->rptr_save_reg); | 2221 | radeon_scratch_free(rdev, ring->rptr_save_reg); |
2197 | } | 2222 | } |
2198 | 2223 | ||
2224 | /* | ||
2225 | * DMA | ||
2226 | * Starting with R600, the GPU has an asynchronous | ||
2227 | * DMA engine. The programming model is very similar | ||
2228 | * to the 3D engine (ring buffer, IBs, etc.), but the | ||
2229 | * DMA controller has it's own packet format that is | ||
2230 | * different form the PM4 format used by the 3D engine. | ||
2231 | * It supports copying data, writing embedded data, | ||
2232 | * solid fills, and a number of other things. It also | ||
2233 | * has support for tiling/detiling of buffers. | ||
2234 | */ | ||
2235 | /** | ||
2236 | * r600_dma_stop - stop the async dma engine | ||
2237 | * | ||
2238 | * @rdev: radeon_device pointer | ||
2239 | * | ||
2240 | * Stop the async dma engine (r6xx-evergreen). | ||
2241 | */ | ||
2242 | void r600_dma_stop(struct radeon_device *rdev) | ||
2243 | { | ||
2244 | u32 rb_cntl = RREG32(DMA_RB_CNTL); | ||
2245 | |||
2246 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | ||
2247 | |||
2248 | rb_cntl &= ~DMA_RB_ENABLE; | ||
2249 | WREG32(DMA_RB_CNTL, rb_cntl); | ||
2250 | |||
2251 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; | ||
2252 | } | ||
2253 | |||
2254 | /** | ||
2255 | * r600_dma_resume - setup and start the async dma engine | ||
2256 | * | ||
2257 | * @rdev: radeon_device pointer | ||
2258 | * | ||
2259 | * Set up the DMA ring buffer and enable it. (r6xx-evergreen). | ||
2260 | * Returns 0 for success, error for failure. | ||
2261 | */ | ||
2262 | int r600_dma_resume(struct radeon_device *rdev) | ||
2263 | { | ||
2264 | struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
2265 | u32 rb_cntl, dma_cntl; | ||
2266 | u32 rb_bufsz; | ||
2267 | int r; | ||
2268 | |||
2269 | /* Reset dma */ | ||
2270 | if (rdev->family >= CHIP_RV770) | ||
2271 | WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); | ||
2272 | else | ||
2273 | WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); | ||
2274 | RREG32(SRBM_SOFT_RESET); | ||
2275 | udelay(50); | ||
2276 | WREG32(SRBM_SOFT_RESET, 0); | ||
2277 | |||
2278 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); | ||
2279 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); | ||
2280 | |||
2281 | /* Set ring buffer size in dwords */ | ||
2282 | rb_bufsz = drm_order(ring->ring_size / 4); | ||
2283 | rb_cntl = rb_bufsz << 1; | ||
2284 | #ifdef __BIG_ENDIAN | ||
2285 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; | ||
2286 | #endif | ||
2287 | WREG32(DMA_RB_CNTL, rb_cntl); | ||
2288 | |||
2289 | /* Initialize the ring buffer's read and write pointers */ | ||
2290 | WREG32(DMA_RB_RPTR, 0); | ||
2291 | WREG32(DMA_RB_WPTR, 0); | ||
2292 | |||
2293 | /* set the wb address whether it's enabled or not */ | ||
2294 | WREG32(DMA_RB_RPTR_ADDR_HI, | ||
2295 | upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); | ||
2296 | WREG32(DMA_RB_RPTR_ADDR_LO, | ||
2297 | ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); | ||
2298 | |||
2299 | if (rdev->wb.enabled) | ||
2300 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; | ||
2301 | |||
2302 | WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); | ||
2303 | |||
2304 | /* enable DMA IBs */ | ||
2305 | WREG32(DMA_IB_CNTL, DMA_IB_ENABLE); | ||
2306 | |||
2307 | dma_cntl = RREG32(DMA_CNTL); | ||
2308 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; | ||
2309 | WREG32(DMA_CNTL, dma_cntl); | ||
2310 | |||
2311 | if (rdev->family >= CHIP_RV770) | ||
2312 | WREG32(DMA_MODE, 1); | ||
2313 | |||
2314 | ring->wptr = 0; | ||
2315 | WREG32(DMA_RB_WPTR, ring->wptr << 2); | ||
2316 | |||
2317 | ring->rptr = RREG32(DMA_RB_RPTR) >> 2; | ||
2318 | |||
2319 | WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); | ||
2320 | |||
2321 | ring->ready = true; | ||
2322 | |||
2323 | r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); | ||
2324 | if (r) { | ||
2325 | ring->ready = false; | ||
2326 | return r; | ||
2327 | } | ||
2328 | |||
2329 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); | ||
2330 | |||
2331 | return 0; | ||
2332 | } | ||
2333 | |||
2334 | /** | ||
2335 | * r600_dma_fini - tear down the async dma engine | ||
2336 | * | ||
2337 | * @rdev: radeon_device pointer | ||
2338 | * | ||
2339 | * Stop the async dma engine and free the ring (r6xx-evergreen). | ||
2340 | */ | ||
2341 | void r600_dma_fini(struct radeon_device *rdev) | ||
2342 | { | ||
2343 | r600_dma_stop(rdev); | ||
2344 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); | ||
2345 | } | ||
2199 | 2346 | ||
2200 | /* | 2347 | /* |
2201 | * GPU scratch registers helpers function. | 2348 | * GPU scratch registers helpers function. |
@@ -2252,6 +2399,64 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
2252 | return r; | 2399 | return r; |
2253 | } | 2400 | } |
2254 | 2401 | ||
2402 | /** | ||
2403 | * r600_dma_ring_test - simple async dma engine test | ||
2404 | * | ||
2405 | * @rdev: radeon_device pointer | ||
2406 | * @ring: radeon_ring structure holding ring information | ||
2407 | * | ||
2408 | * Test the DMA engine by writing using it to write an | ||
2409 | * value to memory. (r6xx-SI). | ||
2410 | * Returns 0 for success, error for failure. | ||
2411 | */ | ||
2412 | int r600_dma_ring_test(struct radeon_device *rdev, | ||
2413 | struct radeon_ring *ring) | ||
2414 | { | ||
2415 | unsigned i; | ||
2416 | int r; | ||
2417 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; | ||
2418 | u32 tmp; | ||
2419 | |||
2420 | if (!ptr) { | ||
2421 | DRM_ERROR("invalid vram scratch pointer\n"); | ||
2422 | return -EINVAL; | ||
2423 | } | ||
2424 | |||
2425 | tmp = 0xCAFEDEAD; | ||
2426 | writel(tmp, ptr); | ||
2427 | |||
2428 | r = radeon_ring_lock(rdev, ring, 4); | ||
2429 | if (r) { | ||
2430 | DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); | ||
2431 | return r; | ||
2432 | } | ||
2433 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | ||
2434 | radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); | ||
2435 | radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); | ||
2436 | radeon_ring_write(ring, 0xDEADBEEF); | ||
2437 | radeon_ring_unlock_commit(rdev, ring); | ||
2438 | |||
2439 | for (i = 0; i < rdev->usec_timeout; i++) { | ||
2440 | tmp = readl(ptr); | ||
2441 | if (tmp == 0xDEADBEEF) | ||
2442 | break; | ||
2443 | DRM_UDELAY(1); | ||
2444 | } | ||
2445 | |||
2446 | if (i < rdev->usec_timeout) { | ||
2447 | DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); | ||
2448 | } else { | ||
2449 | DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", | ||
2450 | ring->idx, tmp); | ||
2451 | r = -EINVAL; | ||
2452 | } | ||
2453 | return r; | ||
2454 | } | ||
2455 | |||
2456 | /* | ||
2457 | * CP fences/semaphores | ||
2458 | */ | ||
2459 | |||
2255 | void r600_fence_ring_emit(struct radeon_device *rdev, | 2460 | void r600_fence_ring_emit(struct radeon_device *rdev, |
2256 | struct radeon_fence *fence) | 2461 | struct radeon_fence *fence) |
2257 | { | 2462 | { |
@@ -2315,6 +2520,58 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, | |||
2315 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); | 2520 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); |
2316 | } | 2521 | } |
2317 | 2522 | ||
2523 | /* | ||
2524 | * DMA fences/semaphores | ||
2525 | */ | ||
2526 | |||
2527 | /** | ||
2528 | * r600_dma_fence_ring_emit - emit a fence on the DMA ring | ||
2529 | * | ||
2530 | * @rdev: radeon_device pointer | ||
2531 | * @fence: radeon fence object | ||
2532 | * | ||
2533 | * Add a DMA fence packet to the ring to write | ||
2534 | * the fence seq number and DMA trap packet to generate | ||
2535 | * an interrupt if needed (r6xx-r7xx). | ||
2536 | */ | ||
2537 | void r600_dma_fence_ring_emit(struct radeon_device *rdev, | ||
2538 | struct radeon_fence *fence) | ||
2539 | { | ||
2540 | struct radeon_ring *ring = &rdev->ring[fence->ring]; | ||
2541 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; | ||
2542 | /* write the fence */ | ||
2543 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); | ||
2544 | radeon_ring_write(ring, addr & 0xfffffffc); | ||
2545 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); | ||
2546 | radeon_ring_write(ring, fence->seq); | ||
2547 | /* generate an interrupt */ | ||
2548 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); | ||
2549 | } | ||
2550 | |||
2551 | /** | ||
2552 | * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring | ||
2553 | * | ||
2554 | * @rdev: radeon_device pointer | ||
2555 | * @ring: radeon_ring structure holding ring information | ||
2556 | * @semaphore: radeon semaphore object | ||
2557 | * @emit_wait: wait or signal semaphore | ||
2558 | * | ||
2559 | * Add a DMA semaphore packet to the ring wait on or signal | ||
2560 | * other rings (r6xx-SI). | ||
2561 | */ | ||
2562 | void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, | ||
2563 | struct radeon_ring *ring, | ||
2564 | struct radeon_semaphore *semaphore, | ||
2565 | bool emit_wait) | ||
2566 | { | ||
2567 | u64 addr = semaphore->gpu_addr; | ||
2568 | u32 s = emit_wait ? 0 : 1; | ||
2569 | |||
2570 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); | ||
2571 | radeon_ring_write(ring, addr & 0xfffffffc); | ||
2572 | radeon_ring_write(ring, upper_32_bits(addr) & 0xff); | ||
2573 | } | ||
2574 | |||
2318 | int r600_copy_blit(struct radeon_device *rdev, | 2575 | int r600_copy_blit(struct radeon_device *rdev, |
2319 | uint64_t src_offset, | 2576 | uint64_t src_offset, |
2320 | uint64_t dst_offset, | 2577 | uint64_t dst_offset, |
@@ -2334,6 +2591,80 @@ int r600_copy_blit(struct radeon_device *rdev, | |||
2334 | return 0; | 2591 | return 0; |
2335 | } | 2592 | } |
2336 | 2593 | ||
2594 | /** | ||
2595 | * r600_copy_dma - copy pages using the DMA engine | ||
2596 | * | ||
2597 | * @rdev: radeon_device pointer | ||
2598 | * @src_offset: src GPU address | ||
2599 | * @dst_offset: dst GPU address | ||
2600 | * @num_gpu_pages: number of GPU pages to xfer | ||
2601 | * @fence: radeon fence object | ||
2602 | * | ||
2603 | * Copy GPU paging using the DMA engine (r6xx-r7xx). | ||
2604 | * Used by the radeon ttm implementation to move pages if | ||
2605 | * registered as the asic copy callback. | ||
2606 | */ | ||
2607 | int r600_copy_dma(struct radeon_device *rdev, | ||
2608 | uint64_t src_offset, uint64_t dst_offset, | ||
2609 | unsigned num_gpu_pages, | ||
2610 | struct radeon_fence **fence) | ||
2611 | { | ||
2612 | struct radeon_semaphore *sem = NULL; | ||
2613 | int ring_index = rdev->asic->copy.dma_ring_index; | ||
2614 | struct radeon_ring *ring = &rdev->ring[ring_index]; | ||
2615 | u32 size_in_dw, cur_size_in_dw; | ||
2616 | int i, num_loops; | ||
2617 | int r = 0; | ||
2618 | |||
2619 | r = radeon_semaphore_create(rdev, &sem); | ||
2620 | if (r) { | ||
2621 | DRM_ERROR("radeon: moving bo (%d).\n", r); | ||
2622 | return r; | ||
2623 | } | ||
2624 | |||
2625 | size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; | ||
2626 | num_loops = DIV_ROUND_UP(size_in_dw, 0xffff); | ||
2627 | r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); | ||
2628 | if (r) { | ||
2629 | DRM_ERROR("radeon: moving bo (%d).\n", r); | ||
2630 | radeon_semaphore_free(rdev, &sem, NULL); | ||
2631 | return r; | ||
2632 | } | ||
2633 | |||
2634 | if (radeon_fence_need_sync(*fence, ring->idx)) { | ||
2635 | radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, | ||
2636 | ring->idx); | ||
2637 | radeon_fence_note_sync(*fence, ring->idx); | ||
2638 | } else { | ||
2639 | radeon_semaphore_free(rdev, &sem, NULL); | ||
2640 | } | ||
2641 | |||
2642 | for (i = 0; i < num_loops; i++) { | ||
2643 | cur_size_in_dw = size_in_dw; | ||
2644 | if (cur_size_in_dw > 0xFFFF) | ||
2645 | cur_size_in_dw = 0xFFFF; | ||
2646 | size_in_dw -= cur_size_in_dw; | ||
2647 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); | ||
2648 | radeon_ring_write(ring, dst_offset & 0xfffffffc); | ||
2649 | radeon_ring_write(ring, src_offset & 0xfffffffc); | ||
2650 | radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); | ||
2651 | radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); | ||
2652 | src_offset += cur_size_in_dw * 4; | ||
2653 | dst_offset += cur_size_in_dw * 4; | ||
2654 | } | ||
2655 | |||
2656 | r = radeon_fence_emit(rdev, fence, ring->idx); | ||
2657 | if (r) { | ||
2658 | radeon_ring_unlock_undo(rdev, ring); | ||
2659 | return r; | ||
2660 | } | ||
2661 | |||
2662 | radeon_ring_unlock_commit(rdev, ring); | ||
2663 | radeon_semaphore_free(rdev, &sem, *fence); | ||
2664 | |||
2665 | return r; | ||
2666 | } | ||
2667 | |||
2337 | int r600_set_surface_reg(struct radeon_device *rdev, int reg, | 2668 | int r600_set_surface_reg(struct radeon_device *rdev, int reg, |
2338 | uint32_t tiling_flags, uint32_t pitch, | 2669 | uint32_t tiling_flags, uint32_t pitch, |
2339 | uint32_t offset, uint32_t obj_size) | 2670 | uint32_t offset, uint32_t obj_size) |
@@ -2349,7 +2680,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg) | |||
2349 | 2680 | ||
2350 | static int r600_startup(struct radeon_device *rdev) | 2681 | static int r600_startup(struct radeon_device *rdev) |
2351 | { | 2682 | { |
2352 | struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | 2683 | struct radeon_ring *ring; |
2353 | int r; | 2684 | int r; |
2354 | 2685 | ||
2355 | /* enable pcie gen2 link */ | 2686 | /* enable pcie gen2 link */ |
@@ -2394,6 +2725,12 @@ static int r600_startup(struct radeon_device *rdev) | |||
2394 | return r; | 2725 | return r; |
2395 | } | 2726 | } |
2396 | 2727 | ||
2728 | r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); | ||
2729 | if (r) { | ||
2730 | dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); | ||
2731 | return r; | ||
2732 | } | ||
2733 | |||
2397 | /* Enable IRQ */ | 2734 | /* Enable IRQ */ |
2398 | r = r600_irq_init(rdev); | 2735 | r = r600_irq_init(rdev); |
2399 | if (r) { | 2736 | if (r) { |
@@ -2403,12 +2740,20 @@ static int r600_startup(struct radeon_device *rdev) | |||
2403 | } | 2740 | } |
2404 | r600_irq_set(rdev); | 2741 | r600_irq_set(rdev); |
2405 | 2742 | ||
2743 | ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | ||
2406 | r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, | 2744 | r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, |
2407 | R600_CP_RB_RPTR, R600_CP_RB_WPTR, | 2745 | R600_CP_RB_RPTR, R600_CP_RB_WPTR, |
2408 | 0, 0xfffff, RADEON_CP_PACKET2); | 2746 | 0, 0xfffff, RADEON_CP_PACKET2); |
2747 | if (r) | ||
2748 | return r; | ||
2409 | 2749 | ||
2750 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
2751 | r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, | ||
2752 | DMA_RB_RPTR, DMA_RB_WPTR, | ||
2753 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
2410 | if (r) | 2754 | if (r) |
2411 | return r; | 2755 | return r; |
2756 | |||
2412 | r = r600_cp_load_microcode(rdev); | 2757 | r = r600_cp_load_microcode(rdev); |
2413 | if (r) | 2758 | if (r) |
2414 | return r; | 2759 | return r; |
@@ -2416,6 +2761,10 @@ static int r600_startup(struct radeon_device *rdev) | |||
2416 | if (r) | 2761 | if (r) |
2417 | return r; | 2762 | return r; |
2418 | 2763 | ||
2764 | r = r600_dma_resume(rdev); | ||
2765 | if (r) | ||
2766 | return r; | ||
2767 | |||
2419 | r = radeon_ib_pool_init(rdev); | 2768 | r = radeon_ib_pool_init(rdev); |
2420 | if (r) { | 2769 | if (r) { |
2421 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); | 2770 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); |
@@ -2471,7 +2820,7 @@ int r600_suspend(struct radeon_device *rdev) | |||
2471 | { | 2820 | { |
2472 | r600_audio_fini(rdev); | 2821 | r600_audio_fini(rdev); |
2473 | r600_cp_stop(rdev); | 2822 | r600_cp_stop(rdev); |
2474 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | 2823 | r600_dma_stop(rdev); |
2475 | r600_irq_suspend(rdev); | 2824 | r600_irq_suspend(rdev); |
2476 | radeon_wb_disable(rdev); | 2825 | radeon_wb_disable(rdev); |
2477 | r600_pcie_gart_disable(rdev); | 2826 | r600_pcie_gart_disable(rdev); |
@@ -2544,6 +2893,9 @@ int r600_init(struct radeon_device *rdev) | |||
2544 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; | 2893 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; |
2545 | r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); | 2894 | r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); |
2546 | 2895 | ||
2896 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; | ||
2897 | r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); | ||
2898 | |||
2547 | rdev->ih.ring_obj = NULL; | 2899 | rdev->ih.ring_obj = NULL; |
2548 | r600_ih_ring_init(rdev, 64 * 1024); | 2900 | r600_ih_ring_init(rdev, 64 * 1024); |
2549 | 2901 | ||
@@ -2556,6 +2908,7 @@ int r600_init(struct radeon_device *rdev) | |||
2556 | if (r) { | 2908 | if (r) { |
2557 | dev_err(rdev->dev, "disabling GPU acceleration\n"); | 2909 | dev_err(rdev->dev, "disabling GPU acceleration\n"); |
2558 | r600_cp_fini(rdev); | 2910 | r600_cp_fini(rdev); |
2911 | r600_dma_fini(rdev); | ||
2559 | r600_irq_fini(rdev); | 2912 | r600_irq_fini(rdev); |
2560 | radeon_wb_fini(rdev); | 2913 | radeon_wb_fini(rdev); |
2561 | radeon_ib_pool_fini(rdev); | 2914 | radeon_ib_pool_fini(rdev); |
@@ -2572,6 +2925,7 @@ void r600_fini(struct radeon_device *rdev) | |||
2572 | r600_audio_fini(rdev); | 2925 | r600_audio_fini(rdev); |
2573 | r600_blit_fini(rdev); | 2926 | r600_blit_fini(rdev); |
2574 | r600_cp_fini(rdev); | 2927 | r600_cp_fini(rdev); |
2928 | r600_dma_fini(rdev); | ||
2575 | r600_irq_fini(rdev); | 2929 | r600_irq_fini(rdev); |
2576 | radeon_wb_fini(rdev); | 2930 | radeon_wb_fini(rdev); |
2577 | radeon_ib_pool_fini(rdev); | 2931 | radeon_ib_pool_fini(rdev); |
@@ -2674,6 +3028,104 @@ free_scratch: | |||
2674 | return r; | 3028 | return r; |
2675 | } | 3029 | } |
2676 | 3030 | ||
3031 | /** | ||
3032 | * r600_dma_ib_test - test an IB on the DMA engine | ||
3033 | * | ||
3034 | * @rdev: radeon_device pointer | ||
3035 | * @ring: radeon_ring structure holding ring information | ||
3036 | * | ||
3037 | * Test a simple IB in the DMA ring (r6xx-SI). | ||
3038 | * Returns 0 on success, error on failure. | ||
3039 | */ | ||
3040 | int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | ||
3041 | { | ||
3042 | struct radeon_ib ib; | ||
3043 | unsigned i; | ||
3044 | int r; | ||
3045 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; | ||
3046 | u32 tmp = 0; | ||
3047 | |||
3048 | if (!ptr) { | ||
3049 | DRM_ERROR("invalid vram scratch pointer\n"); | ||
3050 | return -EINVAL; | ||
3051 | } | ||
3052 | |||
3053 | tmp = 0xCAFEDEAD; | ||
3054 | writel(tmp, ptr); | ||
3055 | |||
3056 | r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); | ||
3057 | if (r) { | ||
3058 | DRM_ERROR("radeon: failed to get ib (%d).\n", r); | ||
3059 | return r; | ||
3060 | } | ||
3061 | |||
3062 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); | ||
3063 | ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; | ||
3064 | ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; | ||
3065 | ib.ptr[3] = 0xDEADBEEF; | ||
3066 | ib.length_dw = 4; | ||
3067 | |||
3068 | r = radeon_ib_schedule(rdev, &ib, NULL); | ||
3069 | if (r) { | ||
3070 | radeon_ib_free(rdev, &ib); | ||
3071 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | ||
3072 | return r; | ||
3073 | } | ||
3074 | r = radeon_fence_wait(ib.fence, false); | ||
3075 | if (r) { | ||
3076 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | ||
3077 | return r; | ||
3078 | } | ||
3079 | for (i = 0; i < rdev->usec_timeout; i++) { | ||
3080 | tmp = readl(ptr); | ||
3081 | if (tmp == 0xDEADBEEF) | ||
3082 | break; | ||
3083 | DRM_UDELAY(1); | ||
3084 | } | ||
3085 | if (i < rdev->usec_timeout) { | ||
3086 | DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); | ||
3087 | } else { | ||
3088 | DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); | ||
3089 | r = -EINVAL; | ||
3090 | } | ||
3091 | radeon_ib_free(rdev, &ib); | ||
3092 | return r; | ||
3093 | } | ||
3094 | |||
3095 | /** | ||
3096 | * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine | ||
3097 | * | ||
3098 | * @rdev: radeon_device pointer | ||
3099 | * @ib: IB object to schedule | ||
3100 | * | ||
3101 | * Schedule an IB in the DMA ring (r6xx-r7xx). | ||
3102 | */ | ||
3103 | void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) | ||
3104 | { | ||
3105 | struct radeon_ring *ring = &rdev->ring[ib->ring]; | ||
3106 | |||
3107 | if (rdev->wb.enabled) { | ||
3108 | u32 next_rptr = ring->wptr + 4; | ||
3109 | while ((next_rptr & 7) != 5) | ||
3110 | next_rptr++; | ||
3111 | next_rptr += 3; | ||
3112 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | ||
3113 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); | ||
3114 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); | ||
3115 | radeon_ring_write(ring, next_rptr); | ||
3116 | } | ||
3117 | |||
3118 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. | ||
3119 | * Pad as necessary with NOPs. | ||
3120 | */ | ||
3121 | while ((ring->wptr & 7) != 5) | ||
3122 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
3123 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); | ||
3124 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); | ||
3125 | radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); | ||
3126 | |||
3127 | } | ||
3128 | |||
2677 | /* | 3129 | /* |
2678 | * Interrupts | 3130 | * Interrupts |
2679 | * | 3131 | * |
@@ -2865,6 +3317,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev) | |||
2865 | u32 tmp; | 3317 | u32 tmp; |
2866 | 3318 | ||
2867 | WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); | 3319 | WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); |
3320 | tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; | ||
3321 | WREG32(DMA_CNTL, tmp); | ||
2868 | WREG32(GRBM_INT_CNTL, 0); | 3322 | WREG32(GRBM_INT_CNTL, 0); |
2869 | WREG32(DxMODE_INT_MASK, 0); | 3323 | WREG32(DxMODE_INT_MASK, 0); |
2870 | WREG32(D1GRPH_INTERRUPT_CONTROL, 0); | 3324 | WREG32(D1GRPH_INTERRUPT_CONTROL, 0); |
@@ -3006,6 +3460,7 @@ int r600_irq_set(struct radeon_device *rdev) | |||
3006 | u32 grbm_int_cntl = 0; | 3460 | u32 grbm_int_cntl = 0; |
3007 | u32 hdmi0, hdmi1; | 3461 | u32 hdmi0, hdmi1; |
3008 | u32 d1grph = 0, d2grph = 0; | 3462 | u32 d1grph = 0, d2grph = 0; |
3463 | u32 dma_cntl; | ||
3009 | 3464 | ||
3010 | if (!rdev->irq.installed) { | 3465 | if (!rdev->irq.installed) { |
3011 | WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); | 3466 | WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); |
@@ -3040,12 +3495,19 @@ int r600_irq_set(struct radeon_device *rdev) | |||
3040 | hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; | 3495 | hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; |
3041 | hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; | 3496 | hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; |
3042 | } | 3497 | } |
3498 | dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE; | ||
3043 | 3499 | ||
3044 | if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { | 3500 | if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { |
3045 | DRM_DEBUG("r600_irq_set: sw int\n"); | 3501 | DRM_DEBUG("r600_irq_set: sw int\n"); |
3046 | cp_int_cntl |= RB_INT_ENABLE; | 3502 | cp_int_cntl |= RB_INT_ENABLE; |
3047 | cp_int_cntl |= TIME_STAMP_INT_ENABLE; | 3503 | cp_int_cntl |= TIME_STAMP_INT_ENABLE; |
3048 | } | 3504 | } |
3505 | |||
3506 | if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { | ||
3507 | DRM_DEBUG("r600_irq_set: sw int dma\n"); | ||
3508 | dma_cntl |= TRAP_ENABLE; | ||
3509 | } | ||
3510 | |||
3049 | if (rdev->irq.crtc_vblank_int[0] || | 3511 | if (rdev->irq.crtc_vblank_int[0] || |
3050 | atomic_read(&rdev->irq.pflip[0])) { | 3512 | atomic_read(&rdev->irq.pflip[0])) { |
3051 | DRM_DEBUG("r600_irq_set: vblank 0\n"); | 3513 | DRM_DEBUG("r600_irq_set: vblank 0\n"); |
@@ -3090,6 +3552,7 @@ int r600_irq_set(struct radeon_device *rdev) | |||
3090 | } | 3552 | } |
3091 | 3553 | ||
3092 | WREG32(CP_INT_CNTL, cp_int_cntl); | 3554 | WREG32(CP_INT_CNTL, cp_int_cntl); |
3555 | WREG32(DMA_CNTL, dma_cntl); | ||
3093 | WREG32(DxMODE_INT_MASK, mode_int); | 3556 | WREG32(DxMODE_INT_MASK, mode_int); |
3094 | WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); | 3557 | WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); |
3095 | WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); | 3558 | WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); |
@@ -3469,6 +3932,10 @@ restart_ih: | |||
3469 | DRM_DEBUG("IH: CP EOP\n"); | 3932 | DRM_DEBUG("IH: CP EOP\n"); |
3470 | radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); | 3933 | radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); |
3471 | break; | 3934 | break; |
3935 | case 224: /* DMA trap event */ | ||
3936 | DRM_DEBUG("IH: DMA trap\n"); | ||
3937 | radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); | ||
3938 | break; | ||
3472 | case 233: /* GUI IDLE */ | 3939 | case 233: /* GUI IDLE */ |
3473 | DRM_DEBUG("IH: GUI idle\n"); | 3940 | DRM_DEBUG("IH: GUI idle\n"); |
3474 | break; | 3941 | break; |