aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2012-12-10 17:46:03 -0500
committerDave Airlie <airlied@redhat.com>2012-12-10 17:46:03 -0500
commita636a9829175987e74ddd28a2e87ed17ff7adfdc (patch)
tree1c2f8e4425a1434215c3c5ae89b2f2e874ef2c3d /drivers
parent97a875cbdf89a4638eea57c2b456c7cc4e3e8b21 (diff)
parentbf66a786c92488dfc99cc7f19bc9eda7b4c98fa6 (diff)
Merge branch 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux into drm-next
Alex writes: "adds support for the asynchronous DMA engines on r6xx-SI. These engines are used for ttm bo moves and VM page table updates currently. They could also be exposed via the CS ioctl for userspace use, but I haven't had a chance to add proper CS checker patches for them yet. These patches have been tested extensively internally for months, so they should be pretty solid." * 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux: drm/radeon: use DMA engine for VM page table updates on SI drm/radeon: add dma engine support for vm pt updates on si (v2) drm/radeon: use DMA engine for VM page table updates on cayman/TN drm/radeon: add dma engine support for vm pt updates on ni (v5) drm/radeon: use async dma for ttm buffer moves on 6xx-SI drm/radeon/kms: add support for dma rings to radeon_test_moves() drm/radeon/kms: Add initial support for async DMA on SI drm/radeon/kms: Add initial support for async DMA on cayman/TN drm/radeon/kms: Add initial support for async DMA on evergreen drm/radeon/kms: Add initial support for async DMA on r6xx/r7xx
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c200
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h31
-rw-r--r--drivers/gpu/drm/radeon/ni.c341
-rw-r--r--drivers/gpu/drm/radeon/nid.h75
-rw-r--r--drivers/gpu/drm/radeon/r600.c471
-rw-r--r--drivers/gpu/drm/radeon/r600d.h54
-rw-r--r--drivers/gpu/drm/radeon/radeon.h18
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c192
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h30
-rw-r--r--drivers/gpu/drm/radeon/radeon_test.c37
-rw-r--r--drivers/gpu/drm/radeon/rv770.c31
-rw-r--r--drivers/gpu/drm/radeon/rv770d.h23
-rw-r--r--drivers/gpu/drm/radeon/si.c283
-rw-r--r--drivers/gpu/drm/radeon/sid.h57
14 files changed, 1742 insertions, 101 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 78de2e4097b5..c66251e4a9b9 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2034,6 +2034,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
2034 WREG32(GB_ADDR_CONFIG, gb_addr_config); 2034 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2035 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 2035 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2036 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 2036 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2037 WREG32(DMA_TILING_CONFIG, gb_addr_config);
2037 2038
2038 tmp = gb_addr_config & NUM_PIPES_MASK; 2039 tmp = gb_addr_config & NUM_PIPES_MASK;
2039 tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends, 2040 tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends,
@@ -2403,8 +2404,12 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
2403 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 2404 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2404 cayman_cp_int_cntl_setup(rdev, 1, 0); 2405 cayman_cp_int_cntl_setup(rdev, 1, 0);
2405 cayman_cp_int_cntl_setup(rdev, 2, 0); 2406 cayman_cp_int_cntl_setup(rdev, 2, 0);
2407 tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
2408 WREG32(CAYMAN_DMA1_CNTL, tmp);
2406 } else 2409 } else
2407 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 2410 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2411 tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
2412 WREG32(DMA_CNTL, tmp);
2408 WREG32(GRBM_INT_CNTL, 0); 2413 WREG32(GRBM_INT_CNTL, 0);
2409 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 2414 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
2410 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 2415 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -2457,6 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
2457 u32 grbm_int_cntl = 0; 2462 u32 grbm_int_cntl = 0;
2458 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; 2463 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
2459 u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; 2464 u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
2465 u32 dma_cntl, dma_cntl1 = 0;
2460 2466
2461 if (!rdev->irq.installed) { 2467 if (!rdev->irq.installed) {
2462 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 2468 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -2484,6 +2490,8 @@ int evergreen_irq_set(struct radeon_device *rdev)
2484 afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; 2490 afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
2485 afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; 2491 afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
2486 2492
2493 dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
2494
2487 if (rdev->family >= CHIP_CAYMAN) { 2495 if (rdev->family >= CHIP_CAYMAN) {
2488 /* enable CP interrupts on all rings */ 2496 /* enable CP interrupts on all rings */
2489 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 2497 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
@@ -2506,6 +2514,19 @@ int evergreen_irq_set(struct radeon_device *rdev)
2506 } 2514 }
2507 } 2515 }
2508 2516
2517 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
2518 DRM_DEBUG("r600_irq_set: sw int dma\n");
2519 dma_cntl |= TRAP_ENABLE;
2520 }
2521
2522 if (rdev->family >= CHIP_CAYMAN) {
2523 dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
2524 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
2525 DRM_DEBUG("r600_irq_set: sw int dma1\n");
2526 dma_cntl1 |= TRAP_ENABLE;
2527 }
2528 }
2529
2509 if (rdev->irq.crtc_vblank_int[0] || 2530 if (rdev->irq.crtc_vblank_int[0] ||
2510 atomic_read(&rdev->irq.pflip[0])) { 2531 atomic_read(&rdev->irq.pflip[0])) {
2511 DRM_DEBUG("evergreen_irq_set: vblank 0\n"); 2532 DRM_DEBUG("evergreen_irq_set: vblank 0\n");
@@ -2591,6 +2612,12 @@ int evergreen_irq_set(struct radeon_device *rdev)
2591 cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2); 2612 cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2);
2592 } else 2613 } else
2593 WREG32(CP_INT_CNTL, cp_int_cntl); 2614 WREG32(CP_INT_CNTL, cp_int_cntl);
2615
2616 WREG32(DMA_CNTL, dma_cntl);
2617
2618 if (rdev->family >= CHIP_CAYMAN)
2619 WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
2620
2594 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 2621 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
2595 2622
2596 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 2623 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3126,9 +3153,19 @@ restart_ih:
3126 } else 3153 } else
3127 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 3154 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3128 break; 3155 break;
3156 case 224: /* DMA trap event */
3157 DRM_DEBUG("IH: DMA trap\n");
3158 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3159 break;
3129 case 233: /* GUI IDLE */ 3160 case 233: /* GUI IDLE */
3130 DRM_DEBUG("IH: GUI idle\n"); 3161 DRM_DEBUG("IH: GUI idle\n");
3131 break; 3162 break;
3163 case 244: /* DMA trap event */
3164 if (rdev->family >= CHIP_CAYMAN) {
3165 DRM_DEBUG("IH: DMA1 trap\n");
3166 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
3167 }
3168 break;
3132 default: 3169 default:
3133 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 3170 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3134 break; 3171 break;
@@ -3154,6 +3191,143 @@ restart_ih:
3154 return IRQ_HANDLED; 3191 return IRQ_HANDLED;
3155} 3192}
3156 3193
3194/**
3195 * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
3196 *
3197 * @rdev: radeon_device pointer
3198 * @fence: radeon fence object
3199 *
3200 * Add a DMA fence packet to the ring to write
3201 * the fence seq number and DMA trap packet to generate
3202 * an interrupt if needed (evergreen-SI).
3203 */
3204void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
3205 struct radeon_fence *fence)
3206{
3207 struct radeon_ring *ring = &rdev->ring[fence->ring];
3208 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3209 /* write the fence */
3210 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
3211 radeon_ring_write(ring, addr & 0xfffffffc);
3212 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
3213 radeon_ring_write(ring, fence->seq);
3214 /* generate an interrupt */
3215 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
3216 /* flush HDP */
3217 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
3218 radeon_ring_write(ring, (0xf << 16) | HDP_MEM_COHERENCY_FLUSH_CNTL);
3219 radeon_ring_write(ring, 1);
3220}
3221
3222/**
3223 * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
3224 *
3225 * @rdev: radeon_device pointer
3226 * @ib: IB object to schedule
3227 *
3228 * Schedule an IB in the DMA ring (evergreen).
3229 */
3230void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
3231 struct radeon_ib *ib)
3232{
3233 struct radeon_ring *ring = &rdev->ring[ib->ring];
3234
3235 if (rdev->wb.enabled) {
3236 u32 next_rptr = ring->wptr + 4;
3237 while ((next_rptr & 7) != 5)
3238 next_rptr++;
3239 next_rptr += 3;
3240 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
3241 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3242 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
3243 radeon_ring_write(ring, next_rptr);
3244 }
3245
3246 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
3247 * Pad as necessary with NOPs.
3248 */
3249 while ((ring->wptr & 7) != 5)
3250 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
3251 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
3252 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
3253 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
3254
3255}
3256
3257/**
3258 * evergreen_copy_dma - copy pages using the DMA engine
3259 *
3260 * @rdev: radeon_device pointer
3261 * @src_offset: src GPU address
3262 * @dst_offset: dst GPU address
3263 * @num_gpu_pages: number of GPU pages to xfer
3264 * @fence: radeon fence object
3265 *
3266 * Copy GPU paging using the DMA engine (evergreen-cayman).
3267 * Used by the radeon ttm implementation to move pages if
3268 * registered as the asic copy callback.
3269 */
3270int evergreen_copy_dma(struct radeon_device *rdev,
3271 uint64_t src_offset, uint64_t dst_offset,
3272 unsigned num_gpu_pages,
3273 struct radeon_fence **fence)
3274{
3275 struct radeon_semaphore *sem = NULL;
3276 int ring_index = rdev->asic->copy.dma_ring_index;
3277 struct radeon_ring *ring = &rdev->ring[ring_index];
3278 u32 size_in_dw, cur_size_in_dw;
3279 int i, num_loops;
3280 int r = 0;
3281
3282 r = radeon_semaphore_create(rdev, &sem);
3283 if (r) {
3284 DRM_ERROR("radeon: moving bo (%d).\n", r);
3285 return r;
3286 }
3287
3288 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
3289 num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
3290 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
3291 if (r) {
3292 DRM_ERROR("radeon: moving bo (%d).\n", r);
3293 radeon_semaphore_free(rdev, &sem, NULL);
3294 return r;
3295 }
3296
3297 if (radeon_fence_need_sync(*fence, ring->idx)) {
3298 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3299 ring->idx);
3300 radeon_fence_note_sync(*fence, ring->idx);
3301 } else {
3302 radeon_semaphore_free(rdev, &sem, NULL);
3303 }
3304
3305 for (i = 0; i < num_loops; i++) {
3306 cur_size_in_dw = size_in_dw;
3307 if (cur_size_in_dw > 0xFFFFF)
3308 cur_size_in_dw = 0xFFFFF;
3309 size_in_dw -= cur_size_in_dw;
3310 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
3311 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3312 radeon_ring_write(ring, src_offset & 0xfffffffc);
3313 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
3314 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
3315 src_offset += cur_size_in_dw * 4;
3316 dst_offset += cur_size_in_dw * 4;
3317 }
3318
3319 r = radeon_fence_emit(rdev, fence, ring->idx);
3320 if (r) {
3321 radeon_ring_unlock_undo(rdev, ring);
3322 return r;
3323 }
3324
3325 radeon_ring_unlock_commit(rdev, ring);
3326 radeon_semaphore_free(rdev, &sem, *fence);
3327
3328 return r;
3329}
3330
3157static int evergreen_startup(struct radeon_device *rdev) 3331static int evergreen_startup(struct radeon_device *rdev)
3158{ 3332{
3159 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3333 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -3217,6 +3391,12 @@ static int evergreen_startup(struct radeon_device *rdev)
3217 return r; 3391 return r;
3218 } 3392 }
3219 3393
3394 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
3395 if (r) {
3396 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
3397 return r;
3398 }
3399
3220 /* Enable IRQ */ 3400 /* Enable IRQ */
3221 r = r600_irq_init(rdev); 3401 r = r600_irq_init(rdev);
3222 if (r) { 3402 if (r) {
@@ -3231,12 +3411,23 @@ static int evergreen_startup(struct radeon_device *rdev)
3231 0, 0xfffff, RADEON_CP_PACKET2); 3411 0, 0xfffff, RADEON_CP_PACKET2);
3232 if (r) 3412 if (r)
3233 return r; 3413 return r;
3414
3415 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3416 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
3417 DMA_RB_RPTR, DMA_RB_WPTR,
3418 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
3419 if (r)
3420 return r;
3421
3234 r = evergreen_cp_load_microcode(rdev); 3422 r = evergreen_cp_load_microcode(rdev);
3235 if (r) 3423 if (r)
3236 return r; 3424 return r;
3237 r = evergreen_cp_resume(rdev); 3425 r = evergreen_cp_resume(rdev);
3238 if (r) 3426 if (r)
3239 return r; 3427 return r;
3428 r = r600_dma_resume(rdev);
3429 if (r)
3430 return r;
3240 3431
3241 r = radeon_ib_pool_init(rdev); 3432 r = radeon_ib_pool_init(rdev);
3242 if (r) { 3433 if (r) {
@@ -3283,11 +3474,9 @@ int evergreen_resume(struct radeon_device *rdev)
3283 3474
3284int evergreen_suspend(struct radeon_device *rdev) 3475int evergreen_suspend(struct radeon_device *rdev)
3285{ 3476{
3286 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3287
3288 r600_audio_fini(rdev); 3477 r600_audio_fini(rdev);
3289 r700_cp_stop(rdev); 3478 r700_cp_stop(rdev);
3290 ring->ready = false; 3479 r600_dma_stop(rdev);
3291 evergreen_irq_suspend(rdev); 3480 evergreen_irq_suspend(rdev);
3292 radeon_wb_disable(rdev); 3481 radeon_wb_disable(rdev);
3293 evergreen_pcie_gart_disable(rdev); 3482 evergreen_pcie_gart_disable(rdev);
@@ -3364,6 +3553,9 @@ int evergreen_init(struct radeon_device *rdev)
3364 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 3553 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
3365 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 3554 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
3366 3555
3556 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
3557 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
3558
3367 rdev->ih.ring_obj = NULL; 3559 rdev->ih.ring_obj = NULL;
3368 r600_ih_ring_init(rdev, 64 * 1024); 3560 r600_ih_ring_init(rdev, 64 * 1024);
3369 3561
@@ -3376,6 +3568,7 @@ int evergreen_init(struct radeon_device *rdev)
3376 if (r) { 3568 if (r) {
3377 dev_err(rdev->dev, "disabling GPU acceleration\n"); 3569 dev_err(rdev->dev, "disabling GPU acceleration\n");
3378 r700_cp_fini(rdev); 3570 r700_cp_fini(rdev);
3571 r600_dma_fini(rdev);
3379 r600_irq_fini(rdev); 3572 r600_irq_fini(rdev);
3380 radeon_wb_fini(rdev); 3573 radeon_wb_fini(rdev);
3381 radeon_ib_pool_fini(rdev); 3574 radeon_ib_pool_fini(rdev);
@@ -3403,6 +3596,7 @@ void evergreen_fini(struct radeon_device *rdev)
3403 r600_audio_fini(rdev); 3596 r600_audio_fini(rdev);
3404 r600_blit_fini(rdev); 3597 r600_blit_fini(rdev);
3405 r700_cp_fini(rdev); 3598 r700_cp_fini(rdev);
3599 r600_dma_fini(rdev);
3406 r600_irq_fini(rdev); 3600 r600_irq_fini(rdev);
3407 radeon_wb_fini(rdev); 3601 radeon_wb_fini(rdev);
3408 radeon_ib_pool_fini(rdev); 3602 radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index cae7ab4219ef..7b4a650e33b2 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -905,6 +905,37 @@
905# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) 905# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
906# define DC_HPDx_EN (1 << 28) 906# define DC_HPDx_EN (1 << 28)
907 907
908/* ASYNC DMA */
909#define DMA_RB_RPTR 0xd008
910#define DMA_RB_WPTR 0xd00c
911
912#define DMA_CNTL 0xd02c
913# define TRAP_ENABLE (1 << 0)
914# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
915# define SEM_WAIT_INT_ENABLE (1 << 2)
916# define DATA_SWAP_ENABLE (1 << 3)
917# define FENCE_SWAP_ENABLE (1 << 4)
918# define CTXEMPTY_INT_ENABLE (1 << 28)
919#define DMA_TILING_CONFIG 0xD0B8
920
921#define CAYMAN_DMA1_CNTL 0xd82c
922
923/* async DMA packets */
924#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
925 (((t) & 0x1) << 23) | \
926 (((s) & 0x1) << 22) | \
927 (((n) & 0xFFFFF) << 0))
928/* async DMA Packet types */
929#define DMA_PACKET_WRITE 0x2
930#define DMA_PACKET_COPY 0x3
931#define DMA_PACKET_INDIRECT_BUFFER 0x4
932#define DMA_PACKET_SEMAPHORE 0x5
933#define DMA_PACKET_FENCE 0x6
934#define DMA_PACKET_TRAP 0x7
935#define DMA_PACKET_SRBM_WRITE 0x9
936#define DMA_PACKET_CONSTANT_FILL 0xd
937#define DMA_PACKET_NOP 0xf
938
908/* PCIE link stuff */ 939/* PCIE link stuff */
909#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ 940#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */
910#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ 941#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 30c18a6e0044..39e8be1d1e89 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
611 WREG32(GB_ADDR_CONFIG, gb_addr_config); 611 WREG32(GB_ADDR_CONFIG, gb_addr_config);
612 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 612 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
613 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 613 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
614 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
615 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
614 616
615 tmp = gb_addr_config & NUM_PIPES_MASK; 617 tmp = gb_addr_config & NUM_PIPES_MASK;
616 tmp = r6xx_remap_render_backend(rdev, tmp, 618 tmp = r6xx_remap_render_backend(rdev, tmp,
@@ -915,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
915 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 917 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
916 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 918 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
917 WREG32(SCRATCH_UMSK, 0); 919 WREG32(SCRATCH_UMSK, 0);
920 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
918 } 921 }
919} 922}
920 923
@@ -1128,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev)
1128 return 0; 1131 return 0;
1129} 1132}
1130 1133
1134/*
1135 * DMA
1136 * Starting with R600, the GPU has an asynchronous
1137 * DMA engine. The programming model is very similar
1138 * to the 3D engine (ring buffer, IBs, etc.), but the
1139 * DMA controller has it's own packet format that is
1140 * different form the PM4 format used by the 3D engine.
1141 * It supports copying data, writing embedded data,
1142 * solid fills, and a number of other things. It also
1143 * has support for tiling/detiling of buffers.
1144 * Cayman and newer support two asynchronous DMA engines.
1145 */
1146/**
1147 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1148 *
1149 * @rdev: radeon_device pointer
1150 * @ib: IB object to schedule
1151 *
1152 * Schedule an IB in the DMA ring (cayman-SI).
1153 */
1154void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1155 struct radeon_ib *ib)
1156{
1157 struct radeon_ring *ring = &rdev->ring[ib->ring];
1158
1159 if (rdev->wb.enabled) {
1160 u32 next_rptr = ring->wptr + 4;
1161 while ((next_rptr & 7) != 5)
1162 next_rptr++;
1163 next_rptr += 3;
1164 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1165 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1166 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1167 radeon_ring_write(ring, next_rptr);
1168 }
1169
1170 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1171 * Pad as necessary with NOPs.
1172 */
1173 while ((ring->wptr & 7) != 5)
1174 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1175 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1176 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1177 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1178
1179}
1180
1181/**
1182 * cayman_dma_stop - stop the async dma engines
1183 *
1184 * @rdev: radeon_device pointer
1185 *
1186 * Stop the async dma engines (cayman-SI).
1187 */
1188void cayman_dma_stop(struct radeon_device *rdev)
1189{
1190 u32 rb_cntl;
1191
1192 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1193
1194 /* dma0 */
1195 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1196 rb_cntl &= ~DMA_RB_ENABLE;
1197 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1198
1199 /* dma1 */
1200 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1201 rb_cntl &= ~DMA_RB_ENABLE;
1202 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1203
1204 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1205 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1206}
1207
1208/**
1209 * cayman_dma_resume - setup and start the async dma engines
1210 *
1211 * @rdev: radeon_device pointer
1212 *
1213 * Set up the DMA ring buffers and enable them. (cayman-SI).
1214 * Returns 0 for success, error for failure.
1215 */
1216int cayman_dma_resume(struct radeon_device *rdev)
1217{
1218 struct radeon_ring *ring;
1219 u32 rb_cntl, dma_cntl;
1220 u32 rb_bufsz;
1221 u32 reg_offset, wb_offset;
1222 int i, r;
1223
1224 /* Reset dma */
1225 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1226 RREG32(SRBM_SOFT_RESET);
1227 udelay(50);
1228 WREG32(SRBM_SOFT_RESET, 0);
1229
1230 for (i = 0; i < 2; i++) {
1231 if (i == 0) {
1232 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1233 reg_offset = DMA0_REGISTER_OFFSET;
1234 wb_offset = R600_WB_DMA_RPTR_OFFSET;
1235 } else {
1236 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1237 reg_offset = DMA1_REGISTER_OFFSET;
1238 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1239 }
1240
1241 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1242 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1243
1244 /* Set ring buffer size in dwords */
1245 rb_bufsz = drm_order(ring->ring_size / 4);
1246 rb_cntl = rb_bufsz << 1;
1247#ifdef __BIG_ENDIAN
1248 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1249#endif
1250 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1251
1252 /* Initialize the ring buffer's read and write pointers */
1253 WREG32(DMA_RB_RPTR + reg_offset, 0);
1254 WREG32(DMA_RB_WPTR + reg_offset, 0);
1255
1256 /* set the wb address whether it's enabled or not */
1257 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1258 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1259 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1260 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1261
1262 if (rdev->wb.enabled)
1263 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1264
1265 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1266
1267 /* enable DMA IBs */
1268 WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
1269
1270 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1271 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1272 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1273
1274 ring->wptr = 0;
1275 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1276
1277 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1278
1279 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1280
1281 ring->ready = true;
1282
1283 r = radeon_ring_test(rdev, ring->idx, ring);
1284 if (r) {
1285 ring->ready = false;
1286 return r;
1287 }
1288 }
1289
1290 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1291
1292 return 0;
1293}
1294
1295/**
1296 * cayman_dma_fini - tear down the async dma engines
1297 *
1298 * @rdev: radeon_device pointer
1299 *
1300 * Stop the async dma engines and free the rings (cayman-SI).
1301 */
1302void cayman_dma_fini(struct radeon_device *rdev)
1303{
1304 cayman_dma_stop(rdev);
1305 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1306 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1307}
1308
1131static int cayman_gpu_soft_reset(struct radeon_device *rdev) 1309static int cayman_gpu_soft_reset(struct radeon_device *rdev)
1132{ 1310{
1133 struct evergreen_mc_save save; 1311 struct evergreen_mc_save save;
@@ -1218,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev)
1218 return cayman_gpu_soft_reset(rdev); 1396 return cayman_gpu_soft_reset(rdev);
1219} 1397}
1220 1398
1399/**
1400 * cayman_dma_is_lockup - Check if the DMA engine is locked up
1401 *
1402 * @rdev: radeon_device pointer
1403 * @ring: radeon_ring structure holding ring information
1404 *
1405 * Check if the async DMA engine is locked up (cayman-SI).
1406 * Returns true if the engine appears to be locked up, false if not.
1407 */
1408bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1409{
1410 u32 dma_status_reg;
1411
1412 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1413 dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1414 else
1415 dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1416 if (dma_status_reg & DMA_IDLE) {
1417 radeon_ring_lockup_update(ring);
1418 return false;
1419 }
1420 /* force ring activities */
1421 radeon_ring_force_activity(rdev, ring);
1422 return radeon_ring_test_lockup(rdev, ring);
1423}
1424
1221static int cayman_startup(struct radeon_device *rdev) 1425static int cayman_startup(struct radeon_device *rdev)
1222{ 1426{
1223 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1427 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -1299,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev)
1299 return r; 1503 return r;
1300 } 1504 }
1301 1505
1506 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1507 if (r) {
1508 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1509 return r;
1510 }
1511
1512 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
1513 if (r) {
1514 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1515 return r;
1516 }
1517
1302 /* Enable IRQ */ 1518 /* Enable IRQ */
1303 r = r600_irq_init(rdev); 1519 r = r600_irq_init(rdev);
1304 if (r) { 1520 if (r) {
@@ -1313,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev)
1313 0, 0xfffff, RADEON_CP_PACKET2); 1529 0, 0xfffff, RADEON_CP_PACKET2);
1314 if (r) 1530 if (r)
1315 return r; 1531 return r;
1532
1533 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1534 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1535 DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
1536 DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
1537 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1538 if (r)
1539 return r;
1540
1541 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1542 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
1543 DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
1544 DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
1545 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1546 if (r)
1547 return r;
1548
1316 r = cayman_cp_load_microcode(rdev); 1549 r = cayman_cp_load_microcode(rdev);
1317 if (r) 1550 if (r)
1318 return r; 1551 return r;
@@ -1320,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev)
1320 if (r) 1553 if (r)
1321 return r; 1554 return r;
1322 1555
1556 r = cayman_dma_resume(rdev);
1557 if (r)
1558 return r;
1559
1323 r = radeon_ib_pool_init(rdev); 1560 r = radeon_ib_pool_init(rdev);
1324 if (r) { 1561 if (r) {
1325 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 1562 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1364,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev)
1364{ 1601{
1365 r600_audio_fini(rdev); 1602 r600_audio_fini(rdev);
1366 cayman_cp_enable(rdev, false); 1603 cayman_cp_enable(rdev, false);
1367 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1604 cayman_dma_stop(rdev);
1368 evergreen_irq_suspend(rdev); 1605 evergreen_irq_suspend(rdev);
1369 radeon_wb_disable(rdev); 1606 radeon_wb_disable(rdev);
1370 cayman_pcie_gart_disable(rdev); 1607 cayman_pcie_gart_disable(rdev);
@@ -1431,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev)
1431 ring->ring_obj = NULL; 1668 ring->ring_obj = NULL;
1432 r600_ring_init(rdev, ring, 1024 * 1024); 1669 r600_ring_init(rdev, ring, 1024 * 1024);
1433 1670
1671 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1672 ring->ring_obj = NULL;
1673 r600_ring_init(rdev, ring, 64 * 1024);
1674
1675 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1676 ring->ring_obj = NULL;
1677 r600_ring_init(rdev, ring, 64 * 1024);
1678
1434 rdev->ih.ring_obj = NULL; 1679 rdev->ih.ring_obj = NULL;
1435 r600_ih_ring_init(rdev, 64 * 1024); 1680 r600_ih_ring_init(rdev, 64 * 1024);
1436 1681
@@ -1443,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev)
1443 if (r) { 1688 if (r) {
1444 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1689 dev_err(rdev->dev, "disabling GPU acceleration\n");
1445 cayman_cp_fini(rdev); 1690 cayman_cp_fini(rdev);
1691 cayman_dma_fini(rdev);
1446 r600_irq_fini(rdev); 1692 r600_irq_fini(rdev);
1447 if (rdev->flags & RADEON_IS_IGP) 1693 if (rdev->flags & RADEON_IS_IGP)
1448 si_rlc_fini(rdev); 1694 si_rlc_fini(rdev);
@@ -1473,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev)
1473{ 1719{
1474 r600_blit_fini(rdev); 1720 r600_blit_fini(rdev);
1475 cayman_cp_fini(rdev); 1721 cayman_cp_fini(rdev);
1722 cayman_dma_fini(rdev);
1476 r600_irq_fini(rdev); 1723 r600_irq_fini(rdev);
1477 if (rdev->flags & RADEON_IS_IGP) 1724 if (rdev->flags & RADEON_IS_IGP)
1478 si_rlc_fini(rdev); 1725 si_rlc_fini(rdev);
@@ -1548,30 +1795,57 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
1548{ 1795{
1549 struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; 1796 struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
1550 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 1797 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
1551 1798 uint64_t value;
1552 while (count) { 1799 unsigned ndw;
1553 unsigned ndw = 1 + count * 2; 1800
1554 if (ndw > 0x3FFF) 1801 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
1555 ndw = 0x3FFF; 1802 while (count) {
1556 1803 ndw = 1 + count * 2;
1557 radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); 1804 if (ndw > 0x3FFF)
1558 radeon_ring_write(ring, pe); 1805 ndw = 0x3FFF;
1559 radeon_ring_write(ring, upper_32_bits(pe) & 0xff); 1806
1560 for (; ndw > 1; ndw -= 2, --count, pe += 8) { 1807 radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
1561 uint64_t value = 0; 1808 radeon_ring_write(ring, pe);
1562 if (flags & RADEON_VM_PAGE_SYSTEM) { 1809 radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
1563 value = radeon_vm_map_gart(rdev, addr); 1810 for (; ndw > 1; ndw -= 2, --count, pe += 8) {
1564 value &= 0xFFFFFFFFFFFFF000ULL; 1811 if (flags & RADEON_VM_PAGE_SYSTEM) {
1812 value = radeon_vm_map_gart(rdev, addr);
1813 value &= 0xFFFFFFFFFFFFF000ULL;
1814 } else if (flags & RADEON_VM_PAGE_VALID) {
1815 value = addr;
1816 } else {
1817 value = 0;
1818 }
1565 addr += incr; 1819 addr += incr;
1566 1820 value |= r600_flags;
1567 } else if (flags & RADEON_VM_PAGE_VALID) { 1821 radeon_ring_write(ring, value);
1568 value = addr; 1822 radeon_ring_write(ring, upper_32_bits(value));
1823 }
1824 }
1825 } else {
1826 while (count) {
1827 ndw = count * 2;
1828 if (ndw > 0xFFFFE)
1829 ndw = 0xFFFFE;
1830
1831 /* for non-physically contiguous pages (system) */
1832 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
1833 radeon_ring_write(ring, pe);
1834 radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
1835 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
1836 if (flags & RADEON_VM_PAGE_SYSTEM) {
1837 value = radeon_vm_map_gart(rdev, addr);
1838 value &= 0xFFFFFFFFFFFFF000ULL;
1839 } else if (flags & RADEON_VM_PAGE_VALID) {
1840 value = addr;
1841 } else {
1842 value = 0;
1843 }
1569 addr += incr; 1844 addr += incr;
1845 value |= r600_flags;
1846 radeon_ring_write(ring, value);
1847 radeon_ring_write(ring, upper_32_bits(value));
1570 } 1848 }
1571
1572 value |= r600_flags;
1573 radeon_ring_write(ring, value);
1574 radeon_ring_write(ring, upper_32_bits(value));
1575 } 1849 }
1576 } 1850 }
1577} 1851}
@@ -1606,3 +1880,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
1606 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 1880 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
1607 radeon_ring_write(ring, 0x0); 1881 radeon_ring_write(ring, 0x0);
1608} 1882}
1883
1884void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
1885{
1886 struct radeon_ring *ring = &rdev->ring[ridx];
1887
1888 if (vm == NULL)
1889 return;
1890
1891 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1892 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
1893 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
1894
1895 /* flush hdp cache */
1896 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1897 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
1898 radeon_ring_write(ring, 1);
1899
1900 /* bits 0-7 are the VM contexts0-7 */
1901 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1902 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
1903 radeon_ring_write(ring, 1 << vm->id);
1904}
1905
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index f5e54a7e2bf2..b93186b8ee4b 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -50,6 +50,24 @@
50#define VMID(x) (((x) & 0x7) << 0) 50#define VMID(x) (((x) & 0x7) << 0)
51#define SRBM_STATUS 0x0E50 51#define SRBM_STATUS 0x0E50
52 52
53#define SRBM_SOFT_RESET 0x0E60
54#define SOFT_RESET_BIF (1 << 1)
55#define SOFT_RESET_CG (1 << 2)
56#define SOFT_RESET_DC (1 << 5)
57#define SOFT_RESET_DMA1 (1 << 6)
58#define SOFT_RESET_GRBM (1 << 8)
59#define SOFT_RESET_HDP (1 << 9)
60#define SOFT_RESET_IH (1 << 10)
61#define SOFT_RESET_MC (1 << 11)
62#define SOFT_RESET_RLC (1 << 13)
63#define SOFT_RESET_ROM (1 << 14)
64#define SOFT_RESET_SEM (1 << 15)
65#define SOFT_RESET_VMC (1 << 17)
66#define SOFT_RESET_DMA (1 << 20)
67#define SOFT_RESET_TST (1 << 21)
68#define SOFT_RESET_REGBB (1 << 22)
69#define SOFT_RESET_ORB (1 << 23)
70
53#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 71#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
54#define REQUEST_TYPE(x) (((x) & 0xf) << 0) 72#define REQUEST_TYPE(x) (((x) & 0xf) << 0)
55#define RESPONSE_TYPE_MASK 0x000000F0 73#define RESPONSE_TYPE_MASK 0x000000F0
@@ -599,5 +617,62 @@
599#define PACKET3_SET_APPEND_CNT 0x75 617#define PACKET3_SET_APPEND_CNT 0x75
600#define PACKET3_ME_WRITE 0x7A 618#define PACKET3_ME_WRITE 0x7A
601 619
620/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
621#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
622#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
623
624#define DMA_RB_CNTL 0xd000
625# define DMA_RB_ENABLE (1 << 0)
626# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
627# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
628# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
629# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
630# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
631#define DMA_RB_BASE 0xd004
632#define DMA_RB_RPTR 0xd008
633#define DMA_RB_WPTR 0xd00c
634
635#define DMA_RB_RPTR_ADDR_HI 0xd01c
636#define DMA_RB_RPTR_ADDR_LO 0xd020
637
638#define DMA_IB_CNTL 0xd024
639# define DMA_IB_ENABLE (1 << 0)
640# define DMA_IB_SWAP_ENABLE (1 << 4)
641# define CMD_VMID_FORCE (1 << 31)
642#define DMA_IB_RPTR 0xd028
643#define DMA_CNTL 0xd02c
644# define TRAP_ENABLE (1 << 0)
645# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
646# define SEM_WAIT_INT_ENABLE (1 << 2)
647# define DATA_SWAP_ENABLE (1 << 3)
648# define FENCE_SWAP_ENABLE (1 << 4)
649# define CTXEMPTY_INT_ENABLE (1 << 28)
650#define DMA_STATUS_REG 0xd034
651# define DMA_IDLE (1 << 0)
652#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
653#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
654#define DMA_TILING_CONFIG 0xd0b8
655#define DMA_MODE 0xd0bc
656
657#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
658 (((t) & 0x1) << 23) | \
659 (((s) & 0x1) << 22) | \
660 (((n) & 0xFFFFF) << 0))
661
662#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
663 (((vmid) & 0xF) << 20) | \
664 (((n) & 0xFFFFF) << 0))
665
666/* async DMA Packet types */
667#define DMA_PACKET_WRITE 0x2
668#define DMA_PACKET_COPY 0x3
669#define DMA_PACKET_INDIRECT_BUFFER 0x4
670#define DMA_PACKET_SEMAPHORE 0x5
671#define DMA_PACKET_FENCE 0x6
672#define DMA_PACKET_TRAP 0x7
673#define DMA_PACKET_SRBM_WRITE 0x9
674#define DMA_PACKET_CONSTANT_FILL 0xd
675#define DMA_PACKET_NOP 0xf
676
602#endif 677#endif
603 678
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 169ecc9628ea..a76eca18f134 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1370,6 +1370,29 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1370 return radeon_ring_test_lockup(rdev, ring); 1370 return radeon_ring_test_lockup(rdev, ring);
1371} 1371}
1372 1372
1373/**
1374 * r600_dma_is_lockup - Check if the DMA engine is locked up
1375 *
1376 * @rdev: radeon_device pointer
1377 * @ring: radeon_ring structure holding ring information
1378 *
1379 * Check if the async DMA engine is locked up (r6xx-evergreen).
1380 * Returns true if the engine appears to be locked up, false if not.
1381 */
1382bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1383{
1384 u32 dma_status_reg;
1385
1386 dma_status_reg = RREG32(DMA_STATUS_REG);
1387 if (dma_status_reg & DMA_IDLE) {
1388 radeon_ring_lockup_update(ring);
1389 return false;
1390 }
1391 /* force ring activities */
1392 radeon_ring_force_activity(rdev, ring);
1393 return radeon_ring_test_lockup(rdev, ring);
1394}
1395
1373int r600_asic_reset(struct radeon_device *rdev) 1396int r600_asic_reset(struct radeon_device *rdev)
1374{ 1397{
1375 return r600_gpu_soft_reset(rdev); 1398 return r600_gpu_soft_reset(rdev);
@@ -1588,6 +1611,7 @@ static void r600_gpu_init(struct radeon_device *rdev)
1588 WREG32(GB_TILING_CONFIG, tiling_config); 1611 WREG32(GB_TILING_CONFIG, tiling_config);
1589 WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); 1612 WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
1590 WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); 1613 WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
1614 WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff);
1591 1615
1592 tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); 1616 tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
1593 WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); 1617 WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
@@ -1865,6 +1889,7 @@ void r600_cp_stop(struct radeon_device *rdev)
1865 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1889 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1866 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); 1890 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
1867 WREG32(SCRATCH_UMSK, 0); 1891 WREG32(SCRATCH_UMSK, 0);
1892 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1868} 1893}
1869 1894
1870int r600_init_microcode(struct radeon_device *rdev) 1895int r600_init_microcode(struct radeon_device *rdev)
@@ -2190,6 +2215,128 @@ void r600_cp_fini(struct radeon_device *rdev)
2190 radeon_scratch_free(rdev, ring->rptr_save_reg); 2215 radeon_scratch_free(rdev, ring->rptr_save_reg);
2191} 2216}
2192 2217
2218/*
2219 * DMA
2220 * Starting with R600, the GPU has an asynchronous
2221 * DMA engine. The programming model is very similar
2222 * to the 3D engine (ring buffer, IBs, etc.), but the
2223 * DMA controller has it's own packet format that is
2224 * different form the PM4 format used by the 3D engine.
2225 * It supports copying data, writing embedded data,
2226 * solid fills, and a number of other things. It also
2227 * has support for tiling/detiling of buffers.
2228 */
2229/**
2230 * r600_dma_stop - stop the async dma engine
2231 *
2232 * @rdev: radeon_device pointer
2233 *
2234 * Stop the async dma engine (r6xx-evergreen).
2235 */
2236void r600_dma_stop(struct radeon_device *rdev)
2237{
2238 u32 rb_cntl = RREG32(DMA_RB_CNTL);
2239
2240 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2241
2242 rb_cntl &= ~DMA_RB_ENABLE;
2243 WREG32(DMA_RB_CNTL, rb_cntl);
2244
2245 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
2246}
2247
2248/**
2249 * r600_dma_resume - setup and start the async dma engine
2250 *
2251 * @rdev: radeon_device pointer
2252 *
2253 * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
2254 * Returns 0 for success, error for failure.
2255 */
2256int r600_dma_resume(struct radeon_device *rdev)
2257{
2258 struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2259 u32 rb_cntl, dma_cntl;
2260 u32 rb_bufsz;
2261 int r;
2262
2263 /* Reset dma */
2264 if (rdev->family >= CHIP_RV770)
2265 WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
2266 else
2267 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
2268 RREG32(SRBM_SOFT_RESET);
2269 udelay(50);
2270 WREG32(SRBM_SOFT_RESET, 0);
2271
2272 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
2273 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
2274
2275 /* Set ring buffer size in dwords */
2276 rb_bufsz = drm_order(ring->ring_size / 4);
2277 rb_cntl = rb_bufsz << 1;
2278#ifdef __BIG_ENDIAN
2279 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
2280#endif
2281 WREG32(DMA_RB_CNTL, rb_cntl);
2282
2283 /* Initialize the ring buffer's read and write pointers */
2284 WREG32(DMA_RB_RPTR, 0);
2285 WREG32(DMA_RB_WPTR, 0);
2286
2287 /* set the wb address whether it's enabled or not */
2288 WREG32(DMA_RB_RPTR_ADDR_HI,
2289 upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
2290 WREG32(DMA_RB_RPTR_ADDR_LO,
2291 ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
2292
2293 if (rdev->wb.enabled)
2294 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
2295
2296 WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
2297
2298 /* enable DMA IBs */
2299 WREG32(DMA_IB_CNTL, DMA_IB_ENABLE);
2300
2301 dma_cntl = RREG32(DMA_CNTL);
2302 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
2303 WREG32(DMA_CNTL, dma_cntl);
2304
2305 if (rdev->family >= CHIP_RV770)
2306 WREG32(DMA_MODE, 1);
2307
2308 ring->wptr = 0;
2309 WREG32(DMA_RB_WPTR, ring->wptr << 2);
2310
2311 ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
2312
2313 WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
2314
2315 ring->ready = true;
2316
2317 r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
2318 if (r) {
2319 ring->ready = false;
2320 return r;
2321 }
2322
2323 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2324
2325 return 0;
2326}
2327
2328/**
2329 * r600_dma_fini - tear down the async dma engine
2330 *
2331 * @rdev: radeon_device pointer
2332 *
2333 * Stop the async dma engine and free the ring (r6xx-evergreen).
2334 */
2335void r600_dma_fini(struct radeon_device *rdev)
2336{
2337 r600_dma_stop(rdev);
2338 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2339}
2193 2340
2194/* 2341/*
2195 * GPU scratch registers helpers function. 2342 * GPU scratch registers helpers function.
@@ -2246,6 +2393,64 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2246 return r; 2393 return r;
2247} 2394}
2248 2395
2396/**
2397 * r600_dma_ring_test - simple async dma engine test
2398 *
2399 * @rdev: radeon_device pointer
2400 * @ring: radeon_ring structure holding ring information
2401 *
2402 * Test the DMA engine by writing using it to write an
2403 * value to memory. (r6xx-SI).
2404 * Returns 0 for success, error for failure.
2405 */
2406int r600_dma_ring_test(struct radeon_device *rdev,
2407 struct radeon_ring *ring)
2408{
2409 unsigned i;
2410 int r;
2411 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2412 u32 tmp;
2413
2414 if (!ptr) {
2415 DRM_ERROR("invalid vram scratch pointer\n");
2416 return -EINVAL;
2417 }
2418
2419 tmp = 0xCAFEDEAD;
2420 writel(tmp, ptr);
2421
2422 r = radeon_ring_lock(rdev, ring, 4);
2423 if (r) {
2424 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2425 return r;
2426 }
2427 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
2428 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2429 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
2430 radeon_ring_write(ring, 0xDEADBEEF);
2431 radeon_ring_unlock_commit(rdev, ring);
2432
2433 for (i = 0; i < rdev->usec_timeout; i++) {
2434 tmp = readl(ptr);
2435 if (tmp == 0xDEADBEEF)
2436 break;
2437 DRM_UDELAY(1);
2438 }
2439
2440 if (i < rdev->usec_timeout) {
2441 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2442 } else {
2443 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2444 ring->idx, tmp);
2445 r = -EINVAL;
2446 }
2447 return r;
2448}
2449
2450/*
2451 * CP fences/semaphores
2452 */
2453
2249void r600_fence_ring_emit(struct radeon_device *rdev, 2454void r600_fence_ring_emit(struct radeon_device *rdev,
2250 struct radeon_fence *fence) 2455 struct radeon_fence *fence)
2251{ 2456{
@@ -2309,6 +2514,58 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
2309 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); 2514 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
2310} 2515}
2311 2516
2517/*
2518 * DMA fences/semaphores
2519 */
2520
2521/**
2522 * r600_dma_fence_ring_emit - emit a fence on the DMA ring
2523 *
2524 * @rdev: radeon_device pointer
2525 * @fence: radeon fence object
2526 *
2527 * Add a DMA fence packet to the ring to write
2528 * the fence seq number and DMA trap packet to generate
2529 * an interrupt if needed (r6xx-r7xx).
2530 */
2531void r600_dma_fence_ring_emit(struct radeon_device *rdev,
2532 struct radeon_fence *fence)
2533{
2534 struct radeon_ring *ring = &rdev->ring[fence->ring];
2535 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2536 /* write the fence */
2537 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
2538 radeon_ring_write(ring, addr & 0xfffffffc);
2539 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
2540 radeon_ring_write(ring, fence->seq);
2541 /* generate an interrupt */
2542 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
2543}
2544
2545/**
2546 * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
2547 *
2548 * @rdev: radeon_device pointer
2549 * @ring: radeon_ring structure holding ring information
2550 * @semaphore: radeon semaphore object
2551 * @emit_wait: wait or signal semaphore
2552 *
2553 * Add a DMA semaphore packet to the ring wait on or signal
2554 * other rings (r6xx-SI).
2555 */
2556void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
2557 struct radeon_ring *ring,
2558 struct radeon_semaphore *semaphore,
2559 bool emit_wait)
2560{
2561 u64 addr = semaphore->gpu_addr;
2562 u32 s = emit_wait ? 0 : 1;
2563
2564 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
2565 radeon_ring_write(ring, addr & 0xfffffffc);
2566 radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
2567}
2568
2312int r600_copy_blit(struct radeon_device *rdev, 2569int r600_copy_blit(struct radeon_device *rdev,
2313 uint64_t src_offset, 2570 uint64_t src_offset,
2314 uint64_t dst_offset, 2571 uint64_t dst_offset,
@@ -2328,6 +2585,80 @@ int r600_copy_blit(struct radeon_device *rdev,
2328 return 0; 2585 return 0;
2329} 2586}
2330 2587
2588/**
2589 * r600_copy_dma - copy pages using the DMA engine
2590 *
2591 * @rdev: radeon_device pointer
2592 * @src_offset: src GPU address
2593 * @dst_offset: dst GPU address
2594 * @num_gpu_pages: number of GPU pages to xfer
2595 * @fence: radeon fence object
2596 *
2597 * Copy GPU paging using the DMA engine (r6xx-r7xx).
2598 * Used by the radeon ttm implementation to move pages if
2599 * registered as the asic copy callback.
2600 */
2601int r600_copy_dma(struct radeon_device *rdev,
2602 uint64_t src_offset, uint64_t dst_offset,
2603 unsigned num_gpu_pages,
2604 struct radeon_fence **fence)
2605{
2606 struct radeon_semaphore *sem = NULL;
2607 int ring_index = rdev->asic->copy.dma_ring_index;
2608 struct radeon_ring *ring = &rdev->ring[ring_index];
2609 u32 size_in_dw, cur_size_in_dw;
2610 int i, num_loops;
2611 int r = 0;
2612
2613 r = radeon_semaphore_create(rdev, &sem);
2614 if (r) {
2615 DRM_ERROR("radeon: moving bo (%d).\n", r);
2616 return r;
2617 }
2618
2619 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
2620 num_loops = DIV_ROUND_UP(size_in_dw, 0xffff);
2621 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
2622 if (r) {
2623 DRM_ERROR("radeon: moving bo (%d).\n", r);
2624 radeon_semaphore_free(rdev, &sem, NULL);
2625 return r;
2626 }
2627
2628 if (radeon_fence_need_sync(*fence, ring->idx)) {
2629 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2630 ring->idx);
2631 radeon_fence_note_sync(*fence, ring->idx);
2632 } else {
2633 radeon_semaphore_free(rdev, &sem, NULL);
2634 }
2635
2636 for (i = 0; i < num_loops; i++) {
2637 cur_size_in_dw = size_in_dw;
2638 if (cur_size_in_dw > 0xFFFF)
2639 cur_size_in_dw = 0xFFFF;
2640 size_in_dw -= cur_size_in_dw;
2641 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
2642 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2643 radeon_ring_write(ring, src_offset & 0xfffffffc);
2644 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
2645 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
2646 src_offset += cur_size_in_dw * 4;
2647 dst_offset += cur_size_in_dw * 4;
2648 }
2649
2650 r = radeon_fence_emit(rdev, fence, ring->idx);
2651 if (r) {
2652 radeon_ring_unlock_undo(rdev, ring);
2653 return r;
2654 }
2655
2656 radeon_ring_unlock_commit(rdev, ring);
2657 radeon_semaphore_free(rdev, &sem, *fence);
2658
2659 return r;
2660}
2661
2331int r600_set_surface_reg(struct radeon_device *rdev, int reg, 2662int r600_set_surface_reg(struct radeon_device *rdev, int reg,
2332 uint32_t tiling_flags, uint32_t pitch, 2663 uint32_t tiling_flags, uint32_t pitch,
2333 uint32_t offset, uint32_t obj_size) 2664 uint32_t offset, uint32_t obj_size)
@@ -2343,7 +2674,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
2343 2674
2344static int r600_startup(struct radeon_device *rdev) 2675static int r600_startup(struct radeon_device *rdev)
2345{ 2676{
2346 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2677 struct radeon_ring *ring;
2347 int r; 2678 int r;
2348 2679
2349 /* enable pcie gen2 link */ 2680 /* enable pcie gen2 link */
@@ -2388,6 +2719,12 @@ static int r600_startup(struct radeon_device *rdev)
2388 return r; 2719 return r;
2389 } 2720 }
2390 2721
2722 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2723 if (r) {
2724 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2725 return r;
2726 }
2727
2391 /* Enable IRQ */ 2728 /* Enable IRQ */
2392 r = r600_irq_init(rdev); 2729 r = r600_irq_init(rdev);
2393 if (r) { 2730 if (r) {
@@ -2397,12 +2734,20 @@ static int r600_startup(struct radeon_device *rdev)
2397 } 2734 }
2398 r600_irq_set(rdev); 2735 r600_irq_set(rdev);
2399 2736
2737 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2400 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 2738 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2401 R600_CP_RB_RPTR, R600_CP_RB_WPTR, 2739 R600_CP_RB_RPTR, R600_CP_RB_WPTR,
2402 0, 0xfffff, RADEON_CP_PACKET2); 2740 0, 0xfffff, RADEON_CP_PACKET2);
2741 if (r)
2742 return r;
2403 2743
2744 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2745 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2746 DMA_RB_RPTR, DMA_RB_WPTR,
2747 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2404 if (r) 2748 if (r)
2405 return r; 2749 return r;
2750
2406 r = r600_cp_load_microcode(rdev); 2751 r = r600_cp_load_microcode(rdev);
2407 if (r) 2752 if (r)
2408 return r; 2753 return r;
@@ -2410,6 +2755,10 @@ static int r600_startup(struct radeon_device *rdev)
2410 if (r) 2755 if (r)
2411 return r; 2756 return r;
2412 2757
2758 r = r600_dma_resume(rdev);
2759 if (r)
2760 return r;
2761
2413 r = radeon_ib_pool_init(rdev); 2762 r = radeon_ib_pool_init(rdev);
2414 if (r) { 2763 if (r) {
2415 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 2764 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2465,7 +2814,7 @@ int r600_suspend(struct radeon_device *rdev)
2465{ 2814{
2466 r600_audio_fini(rdev); 2815 r600_audio_fini(rdev);
2467 r600_cp_stop(rdev); 2816 r600_cp_stop(rdev);
2468 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 2817 r600_dma_stop(rdev);
2469 r600_irq_suspend(rdev); 2818 r600_irq_suspend(rdev);
2470 radeon_wb_disable(rdev); 2819 radeon_wb_disable(rdev);
2471 r600_pcie_gart_disable(rdev); 2820 r600_pcie_gart_disable(rdev);
@@ -2538,6 +2887,9 @@ int r600_init(struct radeon_device *rdev)
2538 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 2887 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
2539 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 2888 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
2540 2889
2890 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
2891 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
2892
2541 rdev->ih.ring_obj = NULL; 2893 rdev->ih.ring_obj = NULL;
2542 r600_ih_ring_init(rdev, 64 * 1024); 2894 r600_ih_ring_init(rdev, 64 * 1024);
2543 2895
@@ -2550,6 +2902,7 @@ int r600_init(struct radeon_device *rdev)
2550 if (r) { 2902 if (r) {
2551 dev_err(rdev->dev, "disabling GPU acceleration\n"); 2903 dev_err(rdev->dev, "disabling GPU acceleration\n");
2552 r600_cp_fini(rdev); 2904 r600_cp_fini(rdev);
2905 r600_dma_fini(rdev);
2553 r600_irq_fini(rdev); 2906 r600_irq_fini(rdev);
2554 radeon_wb_fini(rdev); 2907 radeon_wb_fini(rdev);
2555 radeon_ib_pool_fini(rdev); 2908 radeon_ib_pool_fini(rdev);
@@ -2566,6 +2919,7 @@ void r600_fini(struct radeon_device *rdev)
2566 r600_audio_fini(rdev); 2919 r600_audio_fini(rdev);
2567 r600_blit_fini(rdev); 2920 r600_blit_fini(rdev);
2568 r600_cp_fini(rdev); 2921 r600_cp_fini(rdev);
2922 r600_dma_fini(rdev);
2569 r600_irq_fini(rdev); 2923 r600_irq_fini(rdev);
2570 radeon_wb_fini(rdev); 2924 radeon_wb_fini(rdev);
2571 radeon_ib_pool_fini(rdev); 2925 radeon_ib_pool_fini(rdev);
@@ -2668,6 +3022,104 @@ free_scratch:
2668 return r; 3022 return r;
2669} 3023}
2670 3024
3025/**
3026 * r600_dma_ib_test - test an IB on the DMA engine
3027 *
3028 * @rdev: radeon_device pointer
3029 * @ring: radeon_ring structure holding ring information
3030 *
3031 * Test a simple IB in the DMA ring (r6xx-SI).
3032 * Returns 0 on success, error on failure.
3033 */
3034int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3035{
3036 struct radeon_ib ib;
3037 unsigned i;
3038 int r;
3039 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3040 u32 tmp = 0;
3041
3042 if (!ptr) {
3043 DRM_ERROR("invalid vram scratch pointer\n");
3044 return -EINVAL;
3045 }
3046
3047 tmp = 0xCAFEDEAD;
3048 writel(tmp, ptr);
3049
3050 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3051 if (r) {
3052 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3053 return r;
3054 }
3055
3056 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
3057 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3058 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
3059 ib.ptr[3] = 0xDEADBEEF;
3060 ib.length_dw = 4;
3061
3062 r = radeon_ib_schedule(rdev, &ib, NULL);
3063 if (r) {
3064 radeon_ib_free(rdev, &ib);
3065 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3066 return r;
3067 }
3068 r = radeon_fence_wait(ib.fence, false);
3069 if (r) {
3070 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3071 return r;
3072 }
3073 for (i = 0; i < rdev->usec_timeout; i++) {
3074 tmp = readl(ptr);
3075 if (tmp == 0xDEADBEEF)
3076 break;
3077 DRM_UDELAY(1);
3078 }
3079 if (i < rdev->usec_timeout) {
3080 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3081 } else {
3082 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3083 r = -EINVAL;
3084 }
3085 radeon_ib_free(rdev, &ib);
3086 return r;
3087}
3088
3089/**
3090 * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
3091 *
3092 * @rdev: radeon_device pointer
3093 * @ib: IB object to schedule
3094 *
3095 * Schedule an IB in the DMA ring (r6xx-r7xx).
3096 */
3097void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3098{
3099 struct radeon_ring *ring = &rdev->ring[ib->ring];
3100
3101 if (rdev->wb.enabled) {
3102 u32 next_rptr = ring->wptr + 4;
3103 while ((next_rptr & 7) != 5)
3104 next_rptr++;
3105 next_rptr += 3;
3106 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
3107 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3108 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
3109 radeon_ring_write(ring, next_rptr);
3110 }
3111
3112 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
3113 * Pad as necessary with NOPs.
3114 */
3115 while ((ring->wptr & 7) != 5)
3116 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
3117 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
3118 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
3119 radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
3120
3121}
3122
2671/* 3123/*
2672 * Interrupts 3124 * Interrupts
2673 * 3125 *
@@ -2859,6 +3311,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev)
2859 u32 tmp; 3311 u32 tmp;
2860 3312
2861 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 3313 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3314 tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
3315 WREG32(DMA_CNTL, tmp);
2862 WREG32(GRBM_INT_CNTL, 0); 3316 WREG32(GRBM_INT_CNTL, 0);
2863 WREG32(DxMODE_INT_MASK, 0); 3317 WREG32(DxMODE_INT_MASK, 0);
2864 WREG32(D1GRPH_INTERRUPT_CONTROL, 0); 3318 WREG32(D1GRPH_INTERRUPT_CONTROL, 0);
@@ -3000,6 +3454,7 @@ int r600_irq_set(struct radeon_device *rdev)
3000 u32 grbm_int_cntl = 0; 3454 u32 grbm_int_cntl = 0;
3001 u32 hdmi0, hdmi1; 3455 u32 hdmi0, hdmi1;
3002 u32 d1grph = 0, d2grph = 0; 3456 u32 d1grph = 0, d2grph = 0;
3457 u32 dma_cntl;
3003 3458
3004 if (!rdev->irq.installed) { 3459 if (!rdev->irq.installed) {
3005 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 3460 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3034,12 +3489,19 @@ int r600_irq_set(struct radeon_device *rdev)
3034 hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; 3489 hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
3035 hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; 3490 hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
3036 } 3491 }
3492 dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
3037 3493
3038 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 3494 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3039 DRM_DEBUG("r600_irq_set: sw int\n"); 3495 DRM_DEBUG("r600_irq_set: sw int\n");
3040 cp_int_cntl |= RB_INT_ENABLE; 3496 cp_int_cntl |= RB_INT_ENABLE;
3041 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 3497 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3042 } 3498 }
3499
3500 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3501 DRM_DEBUG("r600_irq_set: sw int dma\n");
3502 dma_cntl |= TRAP_ENABLE;
3503 }
3504
3043 if (rdev->irq.crtc_vblank_int[0] || 3505 if (rdev->irq.crtc_vblank_int[0] ||
3044 atomic_read(&rdev->irq.pflip[0])) { 3506 atomic_read(&rdev->irq.pflip[0])) {
3045 DRM_DEBUG("r600_irq_set: vblank 0\n"); 3507 DRM_DEBUG("r600_irq_set: vblank 0\n");
@@ -3084,6 +3546,7 @@ int r600_irq_set(struct radeon_device *rdev)
3084 } 3546 }
3085 3547
3086 WREG32(CP_INT_CNTL, cp_int_cntl); 3548 WREG32(CP_INT_CNTL, cp_int_cntl);
3549 WREG32(DMA_CNTL, dma_cntl);
3087 WREG32(DxMODE_INT_MASK, mode_int); 3550 WREG32(DxMODE_INT_MASK, mode_int);
3088 WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); 3551 WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph);
3089 WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); 3552 WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph);
@@ -3463,6 +3926,10 @@ restart_ih:
3463 DRM_DEBUG("IH: CP EOP\n"); 3926 DRM_DEBUG("IH: CP EOP\n");
3464 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 3927 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3465 break; 3928 break;
3929 case 224: /* DMA trap event */
3930 DRM_DEBUG("IH: DMA trap\n");
3931 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3932 break;
3466 case 233: /* GUI IDLE */ 3933 case 233: /* GUI IDLE */
3467 DRM_DEBUG("IH: GUI idle\n"); 3934 DRM_DEBUG("IH: GUI idle\n");
3468 break; 3935 break;
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index fa6f37099ba9..a596c554a3a0 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -590,9 +590,59 @@
590#define WAIT_2D_IDLECLEAN_bit (1 << 16) 590#define WAIT_2D_IDLECLEAN_bit (1 << 16)
591#define WAIT_3D_IDLECLEAN_bit (1 << 17) 591#define WAIT_3D_IDLECLEAN_bit (1 << 17)
592 592
593/* async DMA */
594#define DMA_TILING_CONFIG 0x3ec4
595#define DMA_CONFIG 0x3e4c
596
597#define DMA_RB_CNTL 0xd000
598# define DMA_RB_ENABLE (1 << 0)
599# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
600# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
601# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
602# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
603# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
604#define DMA_RB_BASE 0xd004
605#define DMA_RB_RPTR 0xd008
606#define DMA_RB_WPTR 0xd00c
607
608#define DMA_RB_RPTR_ADDR_HI 0xd01c
609#define DMA_RB_RPTR_ADDR_LO 0xd020
610
611#define DMA_IB_CNTL 0xd024
612# define DMA_IB_ENABLE (1 << 0)
613# define DMA_IB_SWAP_ENABLE (1 << 4)
614#define DMA_IB_RPTR 0xd028
615#define DMA_CNTL 0xd02c
616# define TRAP_ENABLE (1 << 0)
617# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
618# define SEM_WAIT_INT_ENABLE (1 << 2)
619# define DATA_SWAP_ENABLE (1 << 3)
620# define FENCE_SWAP_ENABLE (1 << 4)
621# define CTXEMPTY_INT_ENABLE (1 << 28)
622#define DMA_STATUS_REG 0xd034
623# define DMA_IDLE (1 << 0)
624#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
625#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
626#define DMA_MODE 0xd0bc
627
628/* async DMA packets */
629#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
630 (((t) & 0x1) << 23) | \
631 (((s) & 0x1) << 22) | \
632 (((n) & 0xFFFF) << 0))
633/* async DMA Packet types */
634#define DMA_PACKET_WRITE 0x2
635#define DMA_PACKET_COPY 0x3
636#define DMA_PACKET_INDIRECT_BUFFER 0x4
637#define DMA_PACKET_SEMAPHORE 0x5
638#define DMA_PACKET_FENCE 0x6
639#define DMA_PACKET_TRAP 0x7
640#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
641#define DMA_PACKET_NOP 0xf
642
593#define IH_RB_CNTL 0x3e00 643#define IH_RB_CNTL 0x3e00
594# define IH_RB_ENABLE (1 << 0) 644# define IH_RB_ENABLE (1 << 0)
595# define IH_IB_SIZE(x) ((x) << 1) /* log2 */ 645# define IH_RB_SIZE(x) ((x) << 1) /* log2 */
596# define IH_RB_FULL_DRAIN_ENABLE (1 << 6) 646# define IH_RB_FULL_DRAIN_ENABLE (1 << 6)
597# define IH_WPTR_WRITEBACK_ENABLE (1 << 8) 647# define IH_WPTR_WRITEBACK_ENABLE (1 << 8)
598# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */ 648# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */
@@ -637,7 +687,9 @@
637#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20 687#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20
638 688
639#define SRBM_SOFT_RESET 0xe60 689#define SRBM_SOFT_RESET 0xe60
690# define SOFT_RESET_DMA (1 << 12)
640# define SOFT_RESET_RLC (1 << 13) 691# define SOFT_RESET_RLC (1 << 13)
692# define RV770_SOFT_RESET_DMA (1 << 20)
641 693
642#define CP_INT_CNTL 0xc124 694#define CP_INT_CNTL 0xc124
643# define CNTX_BUSY_INT_ENABLE (1 << 19) 695# define CNTX_BUSY_INT_ENABLE (1 << 19)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8c42d54c2e26..5d68346b2c01 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
109#define RADEON_BIOS_NUM_SCRATCH 8 109#define RADEON_BIOS_NUM_SCRATCH 8
110 110
111/* max number of rings */ 111/* max number of rings */
112#define RADEON_NUM_RINGS 3 112#define RADEON_NUM_RINGS 5
113 113
114/* fence seq are set to this number when signaled */ 114/* fence seq are set to this number when signaled */
115#define RADEON_FENCE_SIGNALED_SEQ 0LL 115#define RADEON_FENCE_SIGNALED_SEQ 0LL
@@ -122,6 +122,11 @@ extern int radeon_lockup_timeout;
122#define CAYMAN_RING_TYPE_CP1_INDEX 1 122#define CAYMAN_RING_TYPE_CP1_INDEX 1
123#define CAYMAN_RING_TYPE_CP2_INDEX 2 123#define CAYMAN_RING_TYPE_CP2_INDEX 2
124 124
125/* R600+ has an async dma ring */
126#define R600_RING_TYPE_DMA_INDEX 3
127/* cayman add a second async dma ring */
128#define CAYMAN_RING_TYPE_DMA1_INDEX 4
129
125/* hardcode those limit for now */ 130/* hardcode those limit for now */
126#define RADEON_VA_IB_OFFSET (1 << 20) 131#define RADEON_VA_IB_OFFSET (1 << 20)
127#define RADEON_VA_RESERVED_SIZE (8 << 20) 132#define RADEON_VA_RESERVED_SIZE (8 << 20)
@@ -787,6 +792,15 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne
787void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); 792void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
788 793
789 794
795/* r600 async dma */
796void r600_dma_stop(struct radeon_device *rdev);
797int r600_dma_resume(struct radeon_device *rdev);
798void r600_dma_fini(struct radeon_device *rdev);
799
800void cayman_dma_stop(struct radeon_device *rdev);
801int cayman_dma_resume(struct radeon_device *rdev);
802void cayman_dma_fini(struct radeon_device *rdev);
803
790/* 804/*
791 * CS. 805 * CS.
792 */ 806 */
@@ -883,7 +897,9 @@ struct radeon_wb {
883#define RADEON_WB_CP_RPTR_OFFSET 1024 897#define RADEON_WB_CP_RPTR_OFFSET 1024
884#define RADEON_WB_CP1_RPTR_OFFSET 1280 898#define RADEON_WB_CP1_RPTR_OFFSET 1280
885#define RADEON_WB_CP2_RPTR_OFFSET 1536 899#define RADEON_WB_CP2_RPTR_OFFSET 1536
900#define R600_WB_DMA_RPTR_OFFSET 1792
886#define R600_WB_IH_WPTR_OFFSET 2048 901#define R600_WB_IH_WPTR_OFFSET 2048
902#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
887#define R600_WB_EVENT_OFFSET 3072 903#define R600_WB_EVENT_OFFSET 3072
888 904
889/** 905/**
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 654520b95ab7..3ea0475f9a95 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -947,6 +947,15 @@ static struct radeon_asic r600_asic = {
947 .ring_test = &r600_ring_test, 947 .ring_test = &r600_ring_test,
948 .ib_test = &r600_ib_test, 948 .ib_test = &r600_ib_test,
949 .is_lockup = &r600_gpu_is_lockup, 949 .is_lockup = &r600_gpu_is_lockup,
950 },
951 [R600_RING_TYPE_DMA_INDEX] = {
952 .ib_execute = &r600_dma_ring_ib_execute,
953 .emit_fence = &r600_dma_fence_ring_emit,
954 .emit_semaphore = &r600_dma_semaphore_ring_emit,
955 .cs_parse = NULL,
956 .ring_test = &r600_dma_ring_test,
957 .ib_test = &r600_dma_ib_test,
958 .is_lockup = &r600_dma_is_lockup,
950 } 959 }
951 }, 960 },
952 .irq = { 961 .irq = {
@@ -963,10 +972,10 @@ static struct radeon_asic r600_asic = {
963 .copy = { 972 .copy = {
964 .blit = &r600_copy_blit, 973 .blit = &r600_copy_blit,
965 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 974 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
966 .dma = NULL, 975 .dma = &r600_copy_dma,
967 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 976 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
968 .copy = &r600_copy_blit, 977 .copy = &r600_copy_dma,
969 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 978 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
970 }, 979 },
971 .surface = { 980 .surface = {
972 .set_reg = r600_set_surface_reg, 981 .set_reg = r600_set_surface_reg,
@@ -1022,6 +1031,15 @@ static struct radeon_asic rs780_asic = {
1022 .ring_test = &r600_ring_test, 1031 .ring_test = &r600_ring_test,
1023 .ib_test = &r600_ib_test, 1032 .ib_test = &r600_ib_test,
1024 .is_lockup = &r600_gpu_is_lockup, 1033 .is_lockup = &r600_gpu_is_lockup,
1034 },
1035 [R600_RING_TYPE_DMA_INDEX] = {
1036 .ib_execute = &r600_dma_ring_ib_execute,
1037 .emit_fence = &r600_dma_fence_ring_emit,
1038 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1039 .cs_parse = NULL,
1040 .ring_test = &r600_dma_ring_test,
1041 .ib_test = &r600_dma_ib_test,
1042 .is_lockup = &r600_dma_is_lockup,
1025 } 1043 }
1026 }, 1044 },
1027 .irq = { 1045 .irq = {
@@ -1038,10 +1056,10 @@ static struct radeon_asic rs780_asic = {
1038 .copy = { 1056 .copy = {
1039 .blit = &r600_copy_blit, 1057 .blit = &r600_copy_blit,
1040 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1058 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1041 .dma = NULL, 1059 .dma = &r600_copy_dma,
1042 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1060 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1043 .copy = &r600_copy_blit, 1061 .copy = &r600_copy_dma,
1044 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1062 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1045 }, 1063 },
1046 .surface = { 1064 .surface = {
1047 .set_reg = r600_set_surface_reg, 1065 .set_reg = r600_set_surface_reg,
@@ -1097,6 +1115,15 @@ static struct radeon_asic rv770_asic = {
1097 .ring_test = &r600_ring_test, 1115 .ring_test = &r600_ring_test,
1098 .ib_test = &r600_ib_test, 1116 .ib_test = &r600_ib_test,
1099 .is_lockup = &r600_gpu_is_lockup, 1117 .is_lockup = &r600_gpu_is_lockup,
1118 },
1119 [R600_RING_TYPE_DMA_INDEX] = {
1120 .ib_execute = &r600_dma_ring_ib_execute,
1121 .emit_fence = &r600_dma_fence_ring_emit,
1122 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1123 .cs_parse = NULL,
1124 .ring_test = &r600_dma_ring_test,
1125 .ib_test = &r600_dma_ib_test,
1126 .is_lockup = &r600_dma_is_lockup,
1100 } 1127 }
1101 }, 1128 },
1102 .irq = { 1129 .irq = {
@@ -1113,10 +1140,10 @@ static struct radeon_asic rv770_asic = {
1113 .copy = { 1140 .copy = {
1114 .blit = &r600_copy_blit, 1141 .blit = &r600_copy_blit,
1115 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1142 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1116 .dma = NULL, 1143 .dma = &r600_copy_dma,
1117 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1144 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1118 .copy = &r600_copy_blit, 1145 .copy = &r600_copy_dma,
1119 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1146 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1120 }, 1147 },
1121 .surface = { 1148 .surface = {
1122 .set_reg = r600_set_surface_reg, 1149 .set_reg = r600_set_surface_reg,
@@ -1172,6 +1199,15 @@ static struct radeon_asic evergreen_asic = {
1172 .ring_test = &r600_ring_test, 1199 .ring_test = &r600_ring_test,
1173 .ib_test = &r600_ib_test, 1200 .ib_test = &r600_ib_test,
1174 .is_lockup = &evergreen_gpu_is_lockup, 1201 .is_lockup = &evergreen_gpu_is_lockup,
1202 },
1203 [R600_RING_TYPE_DMA_INDEX] = {
1204 .ib_execute = &evergreen_dma_ring_ib_execute,
1205 .emit_fence = &evergreen_dma_fence_ring_emit,
1206 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1207 .cs_parse = NULL,
1208 .ring_test = &r600_dma_ring_test,
1209 .ib_test = &r600_dma_ib_test,
1210 .is_lockup = &r600_dma_is_lockup,
1175 } 1211 }
1176 }, 1212 },
1177 .irq = { 1213 .irq = {
@@ -1188,10 +1224,10 @@ static struct radeon_asic evergreen_asic = {
1188 .copy = { 1224 .copy = {
1189 .blit = &r600_copy_blit, 1225 .blit = &r600_copy_blit,
1190 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1226 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1191 .dma = NULL, 1227 .dma = &evergreen_copy_dma,
1192 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1228 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1193 .copy = &r600_copy_blit, 1229 .copy = &evergreen_copy_dma,
1194 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1230 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1195 }, 1231 },
1196 .surface = { 1232 .surface = {
1197 .set_reg = r600_set_surface_reg, 1233 .set_reg = r600_set_surface_reg,
@@ -1248,6 +1284,15 @@ static struct radeon_asic sumo_asic = {
1248 .ib_test = &r600_ib_test, 1284 .ib_test = &r600_ib_test,
1249 .is_lockup = &evergreen_gpu_is_lockup, 1285 .is_lockup = &evergreen_gpu_is_lockup,
1250 }, 1286 },
1287 [R600_RING_TYPE_DMA_INDEX] = {
1288 .ib_execute = &evergreen_dma_ring_ib_execute,
1289 .emit_fence = &evergreen_dma_fence_ring_emit,
1290 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1291 .cs_parse = NULL,
1292 .ring_test = &r600_dma_ring_test,
1293 .ib_test = &r600_dma_ib_test,
1294 .is_lockup = &r600_dma_is_lockup,
1295 }
1251 }, 1296 },
1252 .irq = { 1297 .irq = {
1253 .set = &evergreen_irq_set, 1298 .set = &evergreen_irq_set,
@@ -1263,10 +1308,10 @@ static struct radeon_asic sumo_asic = {
1263 .copy = { 1308 .copy = {
1264 .blit = &r600_copy_blit, 1309 .blit = &r600_copy_blit,
1265 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1310 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1266 .dma = NULL, 1311 .dma = &evergreen_copy_dma,
1267 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1312 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1268 .copy = &r600_copy_blit, 1313 .copy = &evergreen_copy_dma,
1269 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1314 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1270 }, 1315 },
1271 .surface = { 1316 .surface = {
1272 .set_reg = r600_set_surface_reg, 1317 .set_reg = r600_set_surface_reg,
@@ -1322,6 +1367,15 @@ static struct radeon_asic btc_asic = {
1322 .ring_test = &r600_ring_test, 1367 .ring_test = &r600_ring_test,
1323 .ib_test = &r600_ib_test, 1368 .ib_test = &r600_ib_test,
1324 .is_lockup = &evergreen_gpu_is_lockup, 1369 .is_lockup = &evergreen_gpu_is_lockup,
1370 },
1371 [R600_RING_TYPE_DMA_INDEX] = {
1372 .ib_execute = &evergreen_dma_ring_ib_execute,
1373 .emit_fence = &evergreen_dma_fence_ring_emit,
1374 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1375 .cs_parse = NULL,
1376 .ring_test = &r600_dma_ring_test,
1377 .ib_test = &r600_dma_ib_test,
1378 .is_lockup = &r600_dma_is_lockup,
1325 } 1379 }
1326 }, 1380 },
1327 .irq = { 1381 .irq = {
@@ -1338,10 +1392,10 @@ static struct radeon_asic btc_asic = {
1338 .copy = { 1392 .copy = {
1339 .blit = &r600_copy_blit, 1393 .blit = &r600_copy_blit,
1340 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1394 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1341 .dma = NULL, 1395 .dma = &evergreen_copy_dma,
1342 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1396 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1343 .copy = &r600_copy_blit, 1397 .copy = &evergreen_copy_dma,
1344 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1398 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1345 }, 1399 },
1346 .surface = { 1400 .surface = {
1347 .set_reg = r600_set_surface_reg, 1401 .set_reg = r600_set_surface_reg,
@@ -1391,7 +1445,7 @@ static struct radeon_asic cayman_asic = {
1391 .vm = { 1445 .vm = {
1392 .init = &cayman_vm_init, 1446 .init = &cayman_vm_init,
1393 .fini = &cayman_vm_fini, 1447 .fini = &cayman_vm_fini,
1394 .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1448 .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
1395 .set_page = &cayman_vm_set_page, 1449 .set_page = &cayman_vm_set_page,
1396 }, 1450 },
1397 .ring = { 1451 .ring = {
@@ -1427,6 +1481,26 @@ static struct radeon_asic cayman_asic = {
1427 .ib_test = &r600_ib_test, 1481 .ib_test = &r600_ib_test,
1428 .is_lockup = &evergreen_gpu_is_lockup, 1482 .is_lockup = &evergreen_gpu_is_lockup,
1429 .vm_flush = &cayman_vm_flush, 1483 .vm_flush = &cayman_vm_flush,
1484 },
1485 [R600_RING_TYPE_DMA_INDEX] = {
1486 .ib_execute = &cayman_dma_ring_ib_execute,
1487 .emit_fence = &evergreen_dma_fence_ring_emit,
1488 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1489 .cs_parse = NULL,
1490 .ring_test = &r600_dma_ring_test,
1491 .ib_test = &r600_dma_ib_test,
1492 .is_lockup = &cayman_dma_is_lockup,
1493 .vm_flush = &cayman_dma_vm_flush,
1494 },
1495 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1496 .ib_execute = &cayman_dma_ring_ib_execute,
1497 .emit_fence = &evergreen_dma_fence_ring_emit,
1498 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1499 .cs_parse = NULL,
1500 .ring_test = &r600_dma_ring_test,
1501 .ib_test = &r600_dma_ib_test,
1502 .is_lockup = &cayman_dma_is_lockup,
1503 .vm_flush = &cayman_dma_vm_flush,
1430 } 1504 }
1431 }, 1505 },
1432 .irq = { 1506 .irq = {
@@ -1443,10 +1517,10 @@ static struct radeon_asic cayman_asic = {
1443 .copy = { 1517 .copy = {
1444 .blit = &r600_copy_blit, 1518 .blit = &r600_copy_blit,
1445 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1519 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1446 .dma = NULL, 1520 .dma = &evergreen_copy_dma,
1447 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1521 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1448 .copy = &r600_copy_blit, 1522 .copy = &evergreen_copy_dma,
1449 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1523 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1450 }, 1524 },
1451 .surface = { 1525 .surface = {
1452 .set_reg = r600_set_surface_reg, 1526 .set_reg = r600_set_surface_reg,
@@ -1496,7 +1570,7 @@ static struct radeon_asic trinity_asic = {
1496 .vm = { 1570 .vm = {
1497 .init = &cayman_vm_init, 1571 .init = &cayman_vm_init,
1498 .fini = &cayman_vm_fini, 1572 .fini = &cayman_vm_fini,
1499 .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1573 .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
1500 .set_page = &cayman_vm_set_page, 1574 .set_page = &cayman_vm_set_page,
1501 }, 1575 },
1502 .ring = { 1576 .ring = {
@@ -1532,6 +1606,26 @@ static struct radeon_asic trinity_asic = {
1532 .ib_test = &r600_ib_test, 1606 .ib_test = &r600_ib_test,
1533 .is_lockup = &evergreen_gpu_is_lockup, 1607 .is_lockup = &evergreen_gpu_is_lockup,
1534 .vm_flush = &cayman_vm_flush, 1608 .vm_flush = &cayman_vm_flush,
1609 },
1610 [R600_RING_TYPE_DMA_INDEX] = {
1611 .ib_execute = &cayman_dma_ring_ib_execute,
1612 .emit_fence = &evergreen_dma_fence_ring_emit,
1613 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1614 .cs_parse = NULL,
1615 .ring_test = &r600_dma_ring_test,
1616 .ib_test = &r600_dma_ib_test,
1617 .is_lockup = &cayman_dma_is_lockup,
1618 .vm_flush = &cayman_dma_vm_flush,
1619 },
1620 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1621 .ib_execute = &cayman_dma_ring_ib_execute,
1622 .emit_fence = &evergreen_dma_fence_ring_emit,
1623 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1624 .cs_parse = NULL,
1625 .ring_test = &r600_dma_ring_test,
1626 .ib_test = &r600_dma_ib_test,
1627 .is_lockup = &cayman_dma_is_lockup,
1628 .vm_flush = &cayman_dma_vm_flush,
1535 } 1629 }
1536 }, 1630 },
1537 .irq = { 1631 .irq = {
@@ -1548,10 +1642,10 @@ static struct radeon_asic trinity_asic = {
1548 .copy = { 1642 .copy = {
1549 .blit = &r600_copy_blit, 1643 .blit = &r600_copy_blit,
1550 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1644 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1551 .dma = NULL, 1645 .dma = &evergreen_copy_dma,
1552 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1646 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1553 .copy = &r600_copy_blit, 1647 .copy = &evergreen_copy_dma,
1554 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1648 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1555 }, 1649 },
1556 .surface = { 1650 .surface = {
1557 .set_reg = r600_set_surface_reg, 1651 .set_reg = r600_set_surface_reg,
@@ -1601,7 +1695,7 @@ static struct radeon_asic si_asic = {
1601 .vm = { 1695 .vm = {
1602 .init = &si_vm_init, 1696 .init = &si_vm_init,
1603 .fini = &si_vm_fini, 1697 .fini = &si_vm_fini,
1604 .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1698 .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
1605 .set_page = &si_vm_set_page, 1699 .set_page = &si_vm_set_page,
1606 }, 1700 },
1607 .ring = { 1701 .ring = {
@@ -1637,6 +1731,26 @@ static struct radeon_asic si_asic = {
1637 .ib_test = &r600_ib_test, 1731 .ib_test = &r600_ib_test,
1638 .is_lockup = &si_gpu_is_lockup, 1732 .is_lockup = &si_gpu_is_lockup,
1639 .vm_flush = &si_vm_flush, 1733 .vm_flush = &si_vm_flush,
1734 },
1735 [R600_RING_TYPE_DMA_INDEX] = {
1736 .ib_execute = &cayman_dma_ring_ib_execute,
1737 .emit_fence = &evergreen_dma_fence_ring_emit,
1738 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1739 .cs_parse = NULL,
1740 .ring_test = &r600_dma_ring_test,
1741 .ib_test = &r600_dma_ib_test,
1742 .is_lockup = &cayman_dma_is_lockup,
1743 .vm_flush = &si_dma_vm_flush,
1744 },
1745 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1746 .ib_execute = &cayman_dma_ring_ib_execute,
1747 .emit_fence = &evergreen_dma_fence_ring_emit,
1748 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1749 .cs_parse = NULL,
1750 .ring_test = &r600_dma_ring_test,
1751 .ib_test = &r600_dma_ib_test,
1752 .is_lockup = &cayman_dma_is_lockup,
1753 .vm_flush = &si_dma_vm_flush,
1640 } 1754 }
1641 }, 1755 },
1642 .irq = { 1756 .irq = {
@@ -1653,10 +1767,10 @@ static struct radeon_asic si_asic = {
1653 .copy = { 1767 .copy = {
1654 .blit = NULL, 1768 .blit = NULL,
1655 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1769 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1656 .dma = NULL, 1770 .dma = &si_copy_dma,
1657 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1771 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1658 .copy = NULL, 1772 .copy = &si_copy_dma,
1659 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1773 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1660 }, 1774 },
1661 .surface = { 1775 .surface = {
1662 .set_reg = r600_set_surface_reg, 1776 .set_reg = r600_set_surface_reg,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5e3a0e5c6be1..ae56673d2410 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -309,6 +309,14 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
309 struct radeon_ring *cp, 309 struct radeon_ring *cp,
310 struct radeon_semaphore *semaphore, 310 struct radeon_semaphore *semaphore,
311 bool emit_wait); 311 bool emit_wait);
312void r600_dma_fence_ring_emit(struct radeon_device *rdev,
313 struct radeon_fence *fence);
314void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
315 struct radeon_ring *ring,
316 struct radeon_semaphore *semaphore,
317 bool emit_wait);
318void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
319bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
312bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); 320bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
313int r600_asic_reset(struct radeon_device *rdev); 321int r600_asic_reset(struct radeon_device *rdev);
314int r600_set_surface_reg(struct radeon_device *rdev, int reg, 322int r600_set_surface_reg(struct radeon_device *rdev, int reg,
@@ -316,11 +324,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg,
316 uint32_t offset, uint32_t obj_size); 324 uint32_t offset, uint32_t obj_size);
317void r600_clear_surface_reg(struct radeon_device *rdev, int reg); 325void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
318int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); 326int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
327int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
319void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); 328void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
320int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); 329int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
330int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
321int r600_copy_blit(struct radeon_device *rdev, 331int r600_copy_blit(struct radeon_device *rdev,
322 uint64_t src_offset, uint64_t dst_offset, 332 uint64_t src_offset, uint64_t dst_offset,
323 unsigned num_gpu_pages, struct radeon_fence **fence); 333 unsigned num_gpu_pages, struct radeon_fence **fence);
334int r600_copy_dma(struct radeon_device *rdev,
335 uint64_t src_offset, uint64_t dst_offset,
336 unsigned num_gpu_pages, struct radeon_fence **fence);
324void r600_hpd_init(struct radeon_device *rdev); 337void r600_hpd_init(struct radeon_device *rdev);
325void r600_hpd_fini(struct radeon_device *rdev); 338void r600_hpd_fini(struct radeon_device *rdev);
326bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); 339bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -428,6 +441,14 @@ extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc);
428void evergreen_disable_interrupt_state(struct radeon_device *rdev); 441void evergreen_disable_interrupt_state(struct radeon_device *rdev);
429int evergreen_blit_init(struct radeon_device *rdev); 442int evergreen_blit_init(struct radeon_device *rdev);
430int evergreen_mc_wait_for_idle(struct radeon_device *rdev); 443int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
444void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
445 struct radeon_fence *fence);
446void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
447 struct radeon_ib *ib);
448int evergreen_copy_dma(struct radeon_device *rdev,
449 uint64_t src_offset, uint64_t dst_offset,
450 unsigned num_gpu_pages,
451 struct radeon_fence **fence);
431 452
432/* 453/*
433 * cayman 454 * cayman
@@ -449,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
449 uint64_t addr, unsigned count, 470 uint64_t addr, unsigned count,
450 uint32_t incr, uint32_t flags); 471 uint32_t incr, uint32_t flags);
451int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); 472int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
473void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
474 struct radeon_ib *ib);
475bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
476void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
452 477
453/* DCE6 - SI */ 478/* DCE6 - SI */
454void dce6_bandwidth_update(struct radeon_device *rdev); 479void dce6_bandwidth_update(struct radeon_device *rdev);
@@ -476,5 +501,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
476void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); 501void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
477int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); 502int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
478uint64_t si_get_gpu_clock(struct radeon_device *rdev); 503uint64_t si_get_gpu_clock(struct radeon_device *rdev);
504int si_copy_dma(struct radeon_device *rdev,
505 uint64_t src_offset, uint64_t dst_offset,
506 unsigned num_gpu_pages,
507 struct radeon_fence **fence);
508void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
479 509
480#endif 510#endif
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 587c09a00ba2..fda09c9ea689 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -26,16 +26,31 @@
26#include "radeon_reg.h" 26#include "radeon_reg.h"
27#include "radeon.h" 27#include "radeon.h"
28 28
29#define RADEON_TEST_COPY_BLIT 1
30#define RADEON_TEST_COPY_DMA 0
31
29 32
30/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ 33/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
31void radeon_test_moves(struct radeon_device *rdev) 34static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
32{ 35{
33 struct radeon_bo *vram_obj = NULL; 36 struct radeon_bo *vram_obj = NULL;
34 struct radeon_bo **gtt_obj = NULL; 37 struct radeon_bo **gtt_obj = NULL;
35 struct radeon_fence *fence = NULL; 38 struct radeon_fence *fence = NULL;
36 uint64_t gtt_addr, vram_addr; 39 uint64_t gtt_addr, vram_addr;
37 unsigned i, n, size; 40 unsigned i, n, size;
38 int r; 41 int r, ring;
42
43 switch (flag) {
44 case RADEON_TEST_COPY_DMA:
45 ring = radeon_copy_dma_ring_index(rdev);
46 break;
47 case RADEON_TEST_COPY_BLIT:
48 ring = radeon_copy_blit_ring_index(rdev);
49 break;
50 default:
51 DRM_ERROR("Unknown copy method\n");
52 return;
53 }
39 54
40 size = 1024 * 1024; 55 size = 1024 * 1024;
41 56
@@ -106,7 +121,10 @@ void radeon_test_moves(struct radeon_device *rdev)
106 121
107 radeon_bo_kunmap(gtt_obj[i]); 122 radeon_bo_kunmap(gtt_obj[i]);
108 123
109 r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); 124 if (ring == R600_RING_TYPE_DMA_INDEX)
125 r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
126 else
127 r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
110 if (r) { 128 if (r) {
111 DRM_ERROR("Failed GTT->VRAM copy %d\n", i); 129 DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
112 goto out_cleanup; 130 goto out_cleanup;
@@ -149,7 +167,10 @@ void radeon_test_moves(struct radeon_device *rdev)
149 167
150 radeon_bo_kunmap(vram_obj); 168 radeon_bo_kunmap(vram_obj);
151 169
152 r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); 170 if (ring == R600_RING_TYPE_DMA_INDEX)
171 r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
172 else
173 r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
153 if (r) { 174 if (r) {
154 DRM_ERROR("Failed VRAM->GTT copy %d\n", i); 175 DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
155 goto out_cleanup; 176 goto out_cleanup;
@@ -223,6 +244,14 @@ out_cleanup:
223 } 244 }
224} 245}
225 246
247void radeon_test_moves(struct radeon_device *rdev)
248{
249 if (rdev->asic->copy.dma)
250 radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA);
251 if (rdev->asic->copy.blit)
252 radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
253}
254
226void radeon_test_ring_sync(struct radeon_device *rdev, 255void radeon_test_ring_sync(struct radeon_device *rdev,
227 struct radeon_ring *ringA, 256 struct radeon_ring *ringA,
228 struct radeon_ring *ringB) 257 struct radeon_ring *ringB)
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 79814a08c8e5..87c979c4f721 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev)
316 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 316 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
317 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 317 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
318 WREG32(SCRATCH_UMSK, 0); 318 WREG32(SCRATCH_UMSK, 0);
319 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
319} 320}
320 321
321static int rv770_cp_load_microcode(struct radeon_device *rdev) 322static int rv770_cp_load_microcode(struct radeon_device *rdev)
@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev)
583 WREG32(GB_TILING_CONFIG, gb_tiling_config); 584 WREG32(GB_TILING_CONFIG, gb_tiling_config);
584 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 585 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
585 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 586 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
587 WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
588 WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
586 589
587 WREG32(CGTS_SYS_TCC_DISABLE, 0); 590 WREG32(CGTS_SYS_TCC_DISABLE, 0);
588 WREG32(CGTS_TCC_DISABLE, 0); 591 WREG32(CGTS_TCC_DISABLE, 0);
@@ -886,7 +889,7 @@ static int rv770_mc_init(struct radeon_device *rdev)
886 889
887static int rv770_startup(struct radeon_device *rdev) 890static int rv770_startup(struct radeon_device *rdev)
888{ 891{
889 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 892 struct radeon_ring *ring;
890 int r; 893 int r;
891 894
892 /* enable pcie gen2 link */ 895 /* enable pcie gen2 link */
@@ -932,6 +935,12 @@ static int rv770_startup(struct radeon_device *rdev)
932 return r; 935 return r;
933 } 936 }
934 937
938 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
939 if (r) {
940 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
941 return r;
942 }
943
935 /* Enable IRQ */ 944 /* Enable IRQ */
936 r = r600_irq_init(rdev); 945 r = r600_irq_init(rdev);
937 if (r) { 946 if (r) {
@@ -941,11 +950,20 @@ static int rv770_startup(struct radeon_device *rdev)
941 } 950 }
942 r600_irq_set(rdev); 951 r600_irq_set(rdev);
943 952
953 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
944 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 954 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
945 R600_CP_RB_RPTR, R600_CP_RB_WPTR, 955 R600_CP_RB_RPTR, R600_CP_RB_WPTR,
946 0, 0xfffff, RADEON_CP_PACKET2); 956 0, 0xfffff, RADEON_CP_PACKET2);
947 if (r) 957 if (r)
948 return r; 958 return r;
959
960 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
961 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
962 DMA_RB_RPTR, DMA_RB_WPTR,
963 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
964 if (r)
965 return r;
966
949 r = rv770_cp_load_microcode(rdev); 967 r = rv770_cp_load_microcode(rdev);
950 if (r) 968 if (r)
951 return r; 969 return r;
@@ -953,6 +971,10 @@ static int rv770_startup(struct radeon_device *rdev)
953 if (r) 971 if (r)
954 return r; 972 return r;
955 973
974 r = r600_dma_resume(rdev);
975 if (r)
976 return r;
977
956 r = radeon_ib_pool_init(rdev); 978 r = radeon_ib_pool_init(rdev);
957 if (r) { 979 if (r) {
958 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 980 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -995,7 +1017,7 @@ int rv770_suspend(struct radeon_device *rdev)
995{ 1017{
996 r600_audio_fini(rdev); 1018 r600_audio_fini(rdev);
997 r700_cp_stop(rdev); 1019 r700_cp_stop(rdev);
998 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1020 r600_dma_stop(rdev);
999 r600_irq_suspend(rdev); 1021 r600_irq_suspend(rdev);
1000 radeon_wb_disable(rdev); 1022 radeon_wb_disable(rdev);
1001 rv770_pcie_gart_disable(rdev); 1023 rv770_pcie_gart_disable(rdev);
@@ -1066,6 +1088,9 @@ int rv770_init(struct radeon_device *rdev)
1066 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 1088 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
1067 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 1089 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
1068 1090
1091 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
1092 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
1093
1069 rdev->ih.ring_obj = NULL; 1094 rdev->ih.ring_obj = NULL;
1070 r600_ih_ring_init(rdev, 64 * 1024); 1095 r600_ih_ring_init(rdev, 64 * 1024);
1071 1096
@@ -1078,6 +1103,7 @@ int rv770_init(struct radeon_device *rdev)
1078 if (r) { 1103 if (r) {
1079 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1104 dev_err(rdev->dev, "disabling GPU acceleration\n");
1080 r700_cp_fini(rdev); 1105 r700_cp_fini(rdev);
1106 r600_dma_fini(rdev);
1081 r600_irq_fini(rdev); 1107 r600_irq_fini(rdev);
1082 radeon_wb_fini(rdev); 1108 radeon_wb_fini(rdev);
1083 radeon_ib_pool_fini(rdev); 1109 radeon_ib_pool_fini(rdev);
@@ -1093,6 +1119,7 @@ void rv770_fini(struct radeon_device *rdev)
1093{ 1119{
1094 r600_blit_fini(rdev); 1120 r600_blit_fini(rdev);
1095 r700_cp_fini(rdev); 1121 r700_cp_fini(rdev);
1122 r600_dma_fini(rdev);
1096 r600_irq_fini(rdev); 1123 r600_irq_fini(rdev);
1097 radeon_wb_fini(rdev); 1124 radeon_wb_fini(rdev);
1098 radeon_ib_pool_fini(rdev); 1125 radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index e2d9dc8e751e..20e29d23d348 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -109,6 +109,9 @@
109#define PIPE_TILING__SHIFT 1 109#define PIPE_TILING__SHIFT 1
110#define PIPE_TILING__MASK 0x0000000e 110#define PIPE_TILING__MASK 0x0000000e
111 111
112#define DMA_TILING_CONFIG 0x3ec8
113#define DMA_TILING_CONFIG2 0xd0b8
114
112#define GC_USER_SHADER_PIPE_CONFIG 0x8954 115#define GC_USER_SHADER_PIPE_CONFIG 0x8954
113#define INACTIVE_QD_PIPES(x) ((x) << 8) 116#define INACTIVE_QD_PIPES(x) ((x) << 8)
114#define INACTIVE_QD_PIPES_MASK 0x0000FF00 117#define INACTIVE_QD_PIPES_MASK 0x0000FF00
@@ -358,6 +361,26 @@
358 361
359#define WAIT_UNTIL 0x8040 362#define WAIT_UNTIL 0x8040
360 363
364/* async DMA */
365#define DMA_RB_RPTR 0xd008
366#define DMA_RB_WPTR 0xd00c
367
368/* async DMA packets */
369#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
370 (((t) & 0x1) << 23) | \
371 (((s) & 0x1) << 22) | \
372 (((n) & 0xFFFF) << 0))
373/* async DMA Packet types */
374#define DMA_PACKET_WRITE 0x2
375#define DMA_PACKET_COPY 0x3
376#define DMA_PACKET_INDIRECT_BUFFER 0x4
377#define DMA_PACKET_SEMAPHORE 0x5
378#define DMA_PACKET_FENCE 0x6
379#define DMA_PACKET_TRAP 0x7
380#define DMA_PACKET_CONSTANT_FILL 0xd
381#define DMA_PACKET_NOP 0xf
382
383
361#define SRBM_STATUS 0x0E50 384#define SRBM_STATUS 0x0E50
362 385
363/* DCE 3.2 HDMI */ 386/* DCE 3.2 HDMI */
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index c4d9eb623ce5..f6e7815e1860 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1660,6 +1660,8 @@ static void si_gpu_init(struct radeon_device *rdev)
1660 WREG32(GB_ADDR_CONFIG, gb_addr_config); 1660 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1661 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 1661 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1662 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 1662 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1663 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1664 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1663 1665
1664 si_tiling_mode_table_init(rdev); 1666 si_tiling_mode_table_init(rdev);
1665 1667
@@ -1836,6 +1838,9 @@ static void si_cp_enable(struct radeon_device *rdev, bool enable)
1836 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1838 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1837 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 1839 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1838 WREG32(SCRATCH_UMSK, 0); 1840 WREG32(SCRATCH_UMSK, 0);
1841 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1842 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1843 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1839 } 1844 }
1840 udelay(50); 1845 udelay(50);
1841} 1846}
@@ -2820,30 +2825,86 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
2820{ 2825{
2821 struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; 2826 struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
2822 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 2827 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2823 2828 uint64_t value;
2824 while (count) { 2829 unsigned ndw;
2825 unsigned ndw = 2 + count * 2; 2830
2826 if (ndw > 0x3FFE) 2831 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2827 ndw = 0x3FFE; 2832 while (count) {
2828 2833 ndw = 2 + count * 2;
2829 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); 2834 if (ndw > 0x3FFE)
2830 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 2835 ndw = 0x3FFE;
2831 WRITE_DATA_DST_SEL(1))); 2836
2832 radeon_ring_write(ring, pe); 2837 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
2833 radeon_ring_write(ring, upper_32_bits(pe)); 2838 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2834 for (; ndw > 2; ndw -= 2, --count, pe += 8) { 2839 WRITE_DATA_DST_SEL(1)));
2835 uint64_t value; 2840 radeon_ring_write(ring, pe);
2836 if (flags & RADEON_VM_PAGE_SYSTEM) { 2841 radeon_ring_write(ring, upper_32_bits(pe));
2837 value = radeon_vm_map_gart(rdev, addr); 2842 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
2838 value &= 0xFFFFFFFFFFFFF000ULL; 2843 if (flags & RADEON_VM_PAGE_SYSTEM) {
2839 } else if (flags & RADEON_VM_PAGE_VALID) 2844 value = radeon_vm_map_gart(rdev, addr);
2840 value = addr; 2845 value &= 0xFFFFFFFFFFFFF000ULL;
2841 else 2846 } else if (flags & RADEON_VM_PAGE_VALID) {
2842 value = 0; 2847 value = addr;
2843 addr += incr; 2848 } else {
2844 value |= r600_flags; 2849 value = 0;
2845 radeon_ring_write(ring, value); 2850 }
2846 radeon_ring_write(ring, upper_32_bits(value)); 2851 addr += incr;
2852 value |= r600_flags;
2853 radeon_ring_write(ring, value);
2854 radeon_ring_write(ring, upper_32_bits(value));
2855 }
2856 }
2857 } else {
2858 /* DMA */
2859 if (flags & RADEON_VM_PAGE_SYSTEM) {
2860 while (count) {
2861 ndw = count * 2;
2862 if (ndw > 0xFFFFE)
2863 ndw = 0xFFFFE;
2864
2865 /* for non-physically contiguous pages (system) */
2866 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
2867 radeon_ring_write(ring, pe);
2868 radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
2869 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2870 if (flags & RADEON_VM_PAGE_SYSTEM) {
2871 value = radeon_vm_map_gart(rdev, addr);
2872 value &= 0xFFFFFFFFFFFFF000ULL;
2873 } else if (flags & RADEON_VM_PAGE_VALID) {
2874 value = addr;
2875 } else {
2876 value = 0;
2877 }
2878 addr += incr;
2879 value |= r600_flags;
2880 radeon_ring_write(ring, value);
2881 radeon_ring_write(ring, upper_32_bits(value));
2882 }
2883 }
2884 } else {
2885 while (count) {
2886 ndw = count * 2;
2887 if (ndw > 0xFFFFE)
2888 ndw = 0xFFFFE;
2889
2890 if (flags & RADEON_VM_PAGE_VALID)
2891 value = addr;
2892 else
2893 value = 0;
2894 /* for physically contiguous pages (vram) */
2895 radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
2896 radeon_ring_write(ring, pe); /* dst addr */
2897 radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
2898 radeon_ring_write(ring, r600_flags); /* mask */
2899 radeon_ring_write(ring, 0);
2900 radeon_ring_write(ring, value); /* value */
2901 radeon_ring_write(ring, upper_32_bits(value));
2902 radeon_ring_write(ring, incr); /* increment size */
2903 radeon_ring_write(ring, 0);
2904 pe += ndw * 4;
2905 addr += (ndw / 2) * incr;
2906 count -= ndw / 2;
2907 }
2847 } 2908 }
2848 } 2909 }
2849} 2910}
@@ -2891,6 +2952,32 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2891 radeon_ring_write(ring, 0x0); 2952 radeon_ring_write(ring, 0x0);
2892} 2953}
2893 2954
2955void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2956{
2957 struct radeon_ring *ring = &rdev->ring[ridx];
2958
2959 if (vm == NULL)
2960 return;
2961
2962 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
2963 if (vm->id < 8) {
2964 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2965 } else {
2966 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
2967 }
2968 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2969
2970 /* flush hdp cache */
2971 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
2972 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2973 radeon_ring_write(ring, 1);
2974
2975 /* bits 0-7 are the VM contexts0-7 */
2976 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
2977 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2978 radeon_ring_write(ring, 1 << vm->id);
2979}
2980
2894/* 2981/*
2895 * RLC 2982 * RLC
2896 */ 2983 */
@@ -3059,6 +3146,10 @@ static void si_disable_interrupt_state(struct radeon_device *rdev)
3059 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 3146 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3060 WREG32(CP_INT_CNTL_RING1, 0); 3147 WREG32(CP_INT_CNTL_RING1, 0);
3061 WREG32(CP_INT_CNTL_RING2, 0); 3148 WREG32(CP_INT_CNTL_RING2, 0);
3149 tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3150 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
3151 tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3152 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
3062 WREG32(GRBM_INT_CNTL, 0); 3153 WREG32(GRBM_INT_CNTL, 0);
3063 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 3154 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3064 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 3155 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -3178,6 +3269,7 @@ int si_irq_set(struct radeon_device *rdev)
3178 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 3269 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3179 u32 grbm_int_cntl = 0; 3270 u32 grbm_int_cntl = 0;
3180 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; 3271 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3272 u32 dma_cntl, dma_cntl1;
3181 3273
3182 if (!rdev->irq.installed) { 3274 if (!rdev->irq.installed) {
3183 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 3275 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3198,6 +3290,9 @@ int si_irq_set(struct radeon_device *rdev)
3198 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 3290 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3199 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 3291 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3200 3292
3293 dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3294 dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3295
3201 /* enable CP interrupts on all rings */ 3296 /* enable CP interrupts on all rings */
3202 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 3297 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3203 DRM_DEBUG("si_irq_set: sw int gfx\n"); 3298 DRM_DEBUG("si_irq_set: sw int gfx\n");
@@ -3211,6 +3306,15 @@ int si_irq_set(struct radeon_device *rdev)
3211 DRM_DEBUG("si_irq_set: sw int cp2\n"); 3306 DRM_DEBUG("si_irq_set: sw int cp2\n");
3212 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; 3307 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3213 } 3308 }
3309 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3310 DRM_DEBUG("si_irq_set: sw int dma\n");
3311 dma_cntl |= TRAP_ENABLE;
3312 }
3313
3314 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
3315 DRM_DEBUG("si_irq_set: sw int dma1\n");
3316 dma_cntl1 |= TRAP_ENABLE;
3317 }
3214 if (rdev->irq.crtc_vblank_int[0] || 3318 if (rdev->irq.crtc_vblank_int[0] ||
3215 atomic_read(&rdev->irq.pflip[0])) { 3319 atomic_read(&rdev->irq.pflip[0])) {
3216 DRM_DEBUG("si_irq_set: vblank 0\n"); 3320 DRM_DEBUG("si_irq_set: vblank 0\n");
@@ -3270,6 +3374,9 @@ int si_irq_set(struct radeon_device *rdev)
3270 WREG32(CP_INT_CNTL_RING1, cp_int_cntl1); 3374 WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3271 WREG32(CP_INT_CNTL_RING2, cp_int_cntl2); 3375 WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3272 3376
3377 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
3378 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
3379
3273 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 3380 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3274 3381
3275 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 3382 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3728,9 +3835,17 @@ restart_ih:
3728 break; 3835 break;
3729 } 3836 }
3730 break; 3837 break;
3838 case 224: /* DMA trap event */
3839 DRM_DEBUG("IH: DMA trap\n");
3840 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3841 break;
3731 case 233: /* GUI IDLE */ 3842 case 233: /* GUI IDLE */
3732 DRM_DEBUG("IH: GUI idle\n"); 3843 DRM_DEBUG("IH: GUI idle\n");
3733 break; 3844 break;
3845 case 244: /* DMA trap event */
3846 DRM_DEBUG("IH: DMA1 trap\n");
3847 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
3848 break;
3734 default: 3849 default:
3735 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 3850 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3736 break; 3851 break;
@@ -3754,6 +3869,80 @@ restart_ih:
3754 return IRQ_HANDLED; 3869 return IRQ_HANDLED;
3755} 3870}
3756 3871
3872/**
3873 * si_copy_dma - copy pages using the DMA engine
3874 *
3875 * @rdev: radeon_device pointer
3876 * @src_offset: src GPU address
3877 * @dst_offset: dst GPU address
3878 * @num_gpu_pages: number of GPU pages to xfer
3879 * @fence: radeon fence object
3880 *
3881 * Copy GPU paging using the DMA engine (SI).
3882 * Used by the radeon ttm implementation to move pages if
3883 * registered as the asic copy callback.
3884 */
3885int si_copy_dma(struct radeon_device *rdev,
3886 uint64_t src_offset, uint64_t dst_offset,
3887 unsigned num_gpu_pages,
3888 struct radeon_fence **fence)
3889{
3890 struct radeon_semaphore *sem = NULL;
3891 int ring_index = rdev->asic->copy.dma_ring_index;
3892 struct radeon_ring *ring = &rdev->ring[ring_index];
3893 u32 size_in_bytes, cur_size_in_bytes;
3894 int i, num_loops;
3895 int r = 0;
3896
3897 r = radeon_semaphore_create(rdev, &sem);
3898 if (r) {
3899 DRM_ERROR("radeon: moving bo (%d).\n", r);
3900 return r;
3901 }
3902
3903 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3904 num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
3905 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
3906 if (r) {
3907 DRM_ERROR("radeon: moving bo (%d).\n", r);
3908 radeon_semaphore_free(rdev, &sem, NULL);
3909 return r;
3910 }
3911
3912 if (radeon_fence_need_sync(*fence, ring->idx)) {
3913 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3914 ring->idx);
3915 radeon_fence_note_sync(*fence, ring->idx);
3916 } else {
3917 radeon_semaphore_free(rdev, &sem, NULL);
3918 }
3919
3920 for (i = 0; i < num_loops; i++) {
3921 cur_size_in_bytes = size_in_bytes;
3922 if (cur_size_in_bytes > 0xFFFFF)
3923 cur_size_in_bytes = 0xFFFFF;
3924 size_in_bytes -= cur_size_in_bytes;
3925 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
3926 radeon_ring_write(ring, dst_offset & 0xffffffff);
3927 radeon_ring_write(ring, src_offset & 0xffffffff);
3928 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
3929 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
3930 src_offset += cur_size_in_bytes;
3931 dst_offset += cur_size_in_bytes;
3932 }
3933
3934 r = radeon_fence_emit(rdev, fence, ring->idx);
3935 if (r) {
3936 radeon_ring_unlock_undo(rdev, ring);
3937 return r;
3938 }
3939
3940 radeon_ring_unlock_commit(rdev, ring);
3941 radeon_semaphore_free(rdev, &sem, *fence);
3942
3943 return r;
3944}
3945
3757/* 3946/*
3758 * startup/shutdown callbacks 3947 * startup/shutdown callbacks
3759 */ 3948 */
@@ -3825,6 +4014,18 @@ static int si_startup(struct radeon_device *rdev)
3825 return r; 4014 return r;
3826 } 4015 }
3827 4016
4017 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4018 if (r) {
4019 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4020 return r;
4021 }
4022
4023 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4024 if (r) {
4025 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4026 return r;
4027 }
4028
3828 /* Enable IRQ */ 4029 /* Enable IRQ */
3829 r = si_irq_init(rdev); 4030 r = si_irq_init(rdev);
3830 if (r) { 4031 if (r) {
@@ -3855,6 +4056,22 @@ static int si_startup(struct radeon_device *rdev)
3855 if (r) 4056 if (r)
3856 return r; 4057 return r;
3857 4058
4059 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4060 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4061 DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
4062 DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
4063 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4064 if (r)
4065 return r;
4066
4067 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4068 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4069 DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
4070 DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
4071 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4072 if (r)
4073 return r;
4074
3858 r = si_cp_load_microcode(rdev); 4075 r = si_cp_load_microcode(rdev);
3859 if (r) 4076 if (r)
3860 return r; 4077 return r;
@@ -3862,6 +4079,10 @@ static int si_startup(struct radeon_device *rdev)
3862 if (r) 4079 if (r)
3863 return r; 4080 return r;
3864 4081
4082 r = cayman_dma_resume(rdev);
4083 if (r)
4084 return r;
4085
3865 r = radeon_ib_pool_init(rdev); 4086 r = radeon_ib_pool_init(rdev);
3866 if (r) { 4087 if (r) {
3867 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 4088 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -3903,9 +4124,7 @@ int si_resume(struct radeon_device *rdev)
3903int si_suspend(struct radeon_device *rdev) 4124int si_suspend(struct radeon_device *rdev)
3904{ 4125{
3905 si_cp_enable(rdev, false); 4126 si_cp_enable(rdev, false);
3906 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 4127 cayman_dma_stop(rdev);
3907 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3908 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3909 si_irq_suspend(rdev); 4128 si_irq_suspend(rdev);
3910 radeon_wb_disable(rdev); 4129 radeon_wb_disable(rdev);
3911 si_pcie_gart_disable(rdev); 4130 si_pcie_gart_disable(rdev);
@@ -3983,6 +4202,14 @@ int si_init(struct radeon_device *rdev)
3983 ring->ring_obj = NULL; 4202 ring->ring_obj = NULL;
3984 r600_ring_init(rdev, ring, 1024 * 1024); 4203 r600_ring_init(rdev, ring, 1024 * 1024);
3985 4204
4205 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4206 ring->ring_obj = NULL;
4207 r600_ring_init(rdev, ring, 64 * 1024);
4208
4209 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4210 ring->ring_obj = NULL;
4211 r600_ring_init(rdev, ring, 64 * 1024);
4212
3986 rdev->ih.ring_obj = NULL; 4213 rdev->ih.ring_obj = NULL;
3987 r600_ih_ring_init(rdev, 64 * 1024); 4214 r600_ih_ring_init(rdev, 64 * 1024);
3988 4215
@@ -3995,6 +4222,7 @@ int si_init(struct radeon_device *rdev)
3995 if (r) { 4222 if (r) {
3996 dev_err(rdev->dev, "disabling GPU acceleration\n"); 4223 dev_err(rdev->dev, "disabling GPU acceleration\n");
3997 si_cp_fini(rdev); 4224 si_cp_fini(rdev);
4225 cayman_dma_fini(rdev);
3998 si_irq_fini(rdev); 4226 si_irq_fini(rdev);
3999 si_rlc_fini(rdev); 4227 si_rlc_fini(rdev);
4000 radeon_wb_fini(rdev); 4228 radeon_wb_fini(rdev);
@@ -4023,6 +4251,7 @@ void si_fini(struct radeon_device *rdev)
4023 r600_blit_fini(rdev); 4251 r600_blit_fini(rdev);
4024#endif 4252#endif
4025 si_cp_fini(rdev); 4253 si_cp_fini(rdev);
4254 cayman_dma_fini(rdev);
4026 si_irq_fini(rdev); 4255 si_irq_fini(rdev);
4027 si_rlc_fini(rdev); 4256 si_rlc_fini(rdev);
4028 radeon_wb_fini(rdev); 4257 radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 53b4d4535fd2..e153c254fbfb 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -936,4 +936,61 @@
936#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A 936#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
937#define PACKET3_SWITCH_BUFFER 0x8B 937#define PACKET3_SWITCH_BUFFER 0x8B
938 938
939/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
940#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
941#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
942
943#define DMA_RB_CNTL 0xd000
944# define DMA_RB_ENABLE (1 << 0)
945# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
946# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
947# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
948# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
949# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
950#define DMA_RB_BASE 0xd004
951#define DMA_RB_RPTR 0xd008
952#define DMA_RB_WPTR 0xd00c
953
954#define DMA_RB_RPTR_ADDR_HI 0xd01c
955#define DMA_RB_RPTR_ADDR_LO 0xd020
956
957#define DMA_IB_CNTL 0xd024
958# define DMA_IB_ENABLE (1 << 0)
959# define DMA_IB_SWAP_ENABLE (1 << 4)
960#define DMA_IB_RPTR 0xd028
961#define DMA_CNTL 0xd02c
962# define TRAP_ENABLE (1 << 0)
963# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
964# define SEM_WAIT_INT_ENABLE (1 << 2)
965# define DATA_SWAP_ENABLE (1 << 3)
966# define FENCE_SWAP_ENABLE (1 << 4)
967# define CTXEMPTY_INT_ENABLE (1 << 28)
968#define DMA_TILING_CONFIG 0xd0b8
969
970#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
971 (((b) & 0x1) << 26) | \
972 (((t) & 0x1) << 23) | \
973 (((s) & 0x1) << 22) | \
974 (((n) & 0xFFFFF) << 0))
975
976#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
977 (((vmid) & 0xF) << 20) | \
978 (((n) & 0xFFFFF) << 0))
979
980#define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \
981 (1 << 26) | \
982 (1 << 21) | \
983 (((n) & 0xFFFFF) << 0))
984
985/* async DMA Packet types */
986#define DMA_PACKET_WRITE 0x2
987#define DMA_PACKET_COPY 0x3
988#define DMA_PACKET_INDIRECT_BUFFER 0x4
989#define DMA_PACKET_SEMAPHORE 0x5
990#define DMA_PACKET_FENCE 0x6
991#define DMA_PACKET_TRAP 0x7
992#define DMA_PACKET_SRBM_WRITE 0x9
993#define DMA_PACKET_CONSTANT_FILL 0xd
994#define DMA_PACKET_NOP 0xf
995
939#endif 996#endif