diff options
author | Dave Airlie <airlied@redhat.com> | 2012-12-10 17:46:03 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2012-12-10 17:46:03 -0500 |
commit | a636a9829175987e74ddd28a2e87ed17ff7adfdc (patch) | |
tree | 1c2f8e4425a1434215c3c5ae89b2f2e874ef2c3d /drivers | |
parent | 97a875cbdf89a4638eea57c2b456c7cc4e3e8b21 (diff) | |
parent | bf66a786c92488dfc99cc7f19bc9eda7b4c98fa6 (diff) |
Merge branch 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux into drm-next
Alex writes:
"adds support for the
asynchronous DMA engines on r6xx-SI. These engines are used
for ttm bo moves and VM page table updates currently. They
could also be exposed via the CS ioctl for userspace use,
but I haven't had a chance to add proper CS checker patches
for them yet. These patches have been tested extensively
internally for months, so they should be pretty solid."
* 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux:
drm/radeon: use DMA engine for VM page table updates on SI
drm/radeon: add dma engine support for vm pt updates on si (v2)
drm/radeon: use DMA engine for VM page table updates on cayman/TN
drm/radeon: add dma engine support for vm pt updates on ni (v5)
drm/radeon: use async dma for ttm buffer moves on 6xx-SI
drm/radeon/kms: add support for dma rings to radeon_test_moves()
drm/radeon/kms: Add initial support for async DMA on SI
drm/radeon/kms: Add initial support for async DMA on cayman/TN
drm/radeon/kms: Add initial support for async DMA on evergreen
drm/radeon/kms: Add initial support for async DMA on r6xx/r7xx
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen.c | 200 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreend.h | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/ni.c | 341 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/nid.h | 75 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/r600.c | 471 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/r600d.h | 54 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 18 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.c | 192 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.h | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_test.c | 37 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/rv770.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/rv770d.h | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/si.c | 283 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/sid.h | 57 |
14 files changed, 1742 insertions, 101 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 78de2e4097b5..c66251e4a9b9 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c | |||
@@ -2034,6 +2034,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) | |||
2034 | WREG32(GB_ADDR_CONFIG, gb_addr_config); | 2034 | WREG32(GB_ADDR_CONFIG, gb_addr_config); |
2035 | WREG32(DMIF_ADDR_CONFIG, gb_addr_config); | 2035 | WREG32(DMIF_ADDR_CONFIG, gb_addr_config); |
2036 | WREG32(HDP_ADDR_CONFIG, gb_addr_config); | 2036 | WREG32(HDP_ADDR_CONFIG, gb_addr_config); |
2037 | WREG32(DMA_TILING_CONFIG, gb_addr_config); | ||
2037 | 2038 | ||
2038 | tmp = gb_addr_config & NUM_PIPES_MASK; | 2039 | tmp = gb_addr_config & NUM_PIPES_MASK; |
2039 | tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends, | 2040 | tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends, |
@@ -2403,8 +2404,12 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev) | |||
2403 | CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); | 2404 | CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); |
2404 | cayman_cp_int_cntl_setup(rdev, 1, 0); | 2405 | cayman_cp_int_cntl_setup(rdev, 1, 0); |
2405 | cayman_cp_int_cntl_setup(rdev, 2, 0); | 2406 | cayman_cp_int_cntl_setup(rdev, 2, 0); |
2407 | tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE; | ||
2408 | WREG32(CAYMAN_DMA1_CNTL, tmp); | ||
2406 | } else | 2409 | } else |
2407 | WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); | 2410 | WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); |
2411 | tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; | ||
2412 | WREG32(DMA_CNTL, tmp); | ||
2408 | WREG32(GRBM_INT_CNTL, 0); | 2413 | WREG32(GRBM_INT_CNTL, 0); |
2409 | WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); | 2414 | WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); |
2410 | WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); | 2415 | WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); |
@@ -2457,6 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev) | |||
2457 | u32 grbm_int_cntl = 0; | 2462 | u32 grbm_int_cntl = 0; |
2458 | u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; | 2463 | u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; |
2459 | u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; | 2464 | u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; |
2465 | u32 dma_cntl, dma_cntl1 = 0; | ||
2460 | 2466 | ||
2461 | if (!rdev->irq.installed) { | 2467 | if (!rdev->irq.installed) { |
2462 | WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); | 2468 | WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); |
@@ -2484,6 +2490,8 @@ int evergreen_irq_set(struct radeon_device *rdev) | |||
2484 | afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; | 2490 | afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; |
2485 | afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; | 2491 | afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; |
2486 | 2492 | ||
2493 | dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE; | ||
2494 | |||
2487 | if (rdev->family >= CHIP_CAYMAN) { | 2495 | if (rdev->family >= CHIP_CAYMAN) { |
2488 | /* enable CP interrupts on all rings */ | 2496 | /* enable CP interrupts on all rings */ |
2489 | if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { | 2497 | if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { |
@@ -2506,6 +2514,19 @@ int evergreen_irq_set(struct radeon_device *rdev) | |||
2506 | } | 2514 | } |
2507 | } | 2515 | } |
2508 | 2516 | ||
2517 | if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { | ||
2518 | DRM_DEBUG("r600_irq_set: sw int dma\n"); | ||
2519 | dma_cntl |= TRAP_ENABLE; | ||
2520 | } | ||
2521 | |||
2522 | if (rdev->family >= CHIP_CAYMAN) { | ||
2523 | dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE; | ||
2524 | if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { | ||
2525 | DRM_DEBUG("r600_irq_set: sw int dma1\n"); | ||
2526 | dma_cntl1 |= TRAP_ENABLE; | ||
2527 | } | ||
2528 | } | ||
2529 | |||
2509 | if (rdev->irq.crtc_vblank_int[0] || | 2530 | if (rdev->irq.crtc_vblank_int[0] || |
2510 | atomic_read(&rdev->irq.pflip[0])) { | 2531 | atomic_read(&rdev->irq.pflip[0])) { |
2511 | DRM_DEBUG("evergreen_irq_set: vblank 0\n"); | 2532 | DRM_DEBUG("evergreen_irq_set: vblank 0\n"); |
@@ -2591,6 +2612,12 @@ int evergreen_irq_set(struct radeon_device *rdev) | |||
2591 | cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2); | 2612 | cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2); |
2592 | } else | 2613 | } else |
2593 | WREG32(CP_INT_CNTL, cp_int_cntl); | 2614 | WREG32(CP_INT_CNTL, cp_int_cntl); |
2615 | |||
2616 | WREG32(DMA_CNTL, dma_cntl); | ||
2617 | |||
2618 | if (rdev->family >= CHIP_CAYMAN) | ||
2619 | WREG32(CAYMAN_DMA1_CNTL, dma_cntl1); | ||
2620 | |||
2594 | WREG32(GRBM_INT_CNTL, grbm_int_cntl); | 2621 | WREG32(GRBM_INT_CNTL, grbm_int_cntl); |
2595 | 2622 | ||
2596 | WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); | 2623 | WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); |
@@ -3126,9 +3153,19 @@ restart_ih: | |||
3126 | } else | 3153 | } else |
3127 | radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); | 3154 | radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); |
3128 | break; | 3155 | break; |
3156 | case 224: /* DMA trap event */ | ||
3157 | DRM_DEBUG("IH: DMA trap\n"); | ||
3158 | radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); | ||
3159 | break; | ||
3129 | case 233: /* GUI IDLE */ | 3160 | case 233: /* GUI IDLE */ |
3130 | DRM_DEBUG("IH: GUI idle\n"); | 3161 | DRM_DEBUG("IH: GUI idle\n"); |
3131 | break; | 3162 | break; |
3163 | case 244: /* DMA trap event */ | ||
3164 | if (rdev->family >= CHIP_CAYMAN) { | ||
3165 | DRM_DEBUG("IH: DMA1 trap\n"); | ||
3166 | radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); | ||
3167 | } | ||
3168 | break; | ||
3132 | default: | 3169 | default: |
3133 | DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); | 3170 | DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); |
3134 | break; | 3171 | break; |
@@ -3154,6 +3191,143 @@ restart_ih: | |||
3154 | return IRQ_HANDLED; | 3191 | return IRQ_HANDLED; |
3155 | } | 3192 | } |
3156 | 3193 | ||
3194 | /** | ||
3195 | * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring | ||
3196 | * | ||
3197 | * @rdev: radeon_device pointer | ||
3198 | * @fence: radeon fence object | ||
3199 | * | ||
3200 | * Add a DMA fence packet to the ring to write | ||
3201 | * the fence seq number and DMA trap packet to generate | ||
3202 | * an interrupt if needed (evergreen-SI). | ||
3203 | */ | ||
3204 | void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, | ||
3205 | struct radeon_fence *fence) | ||
3206 | { | ||
3207 | struct radeon_ring *ring = &rdev->ring[fence->ring]; | ||
3208 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; | ||
3209 | /* write the fence */ | ||
3210 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); | ||
3211 | radeon_ring_write(ring, addr & 0xfffffffc); | ||
3212 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); | ||
3213 | radeon_ring_write(ring, fence->seq); | ||
3214 | /* generate an interrupt */ | ||
3215 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); | ||
3216 | /* flush HDP */ | ||
3217 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | ||
3218 | radeon_ring_write(ring, (0xf << 16) | HDP_MEM_COHERENCY_FLUSH_CNTL); | ||
3219 | radeon_ring_write(ring, 1); | ||
3220 | } | ||
3221 | |||
3222 | /** | ||
3223 | * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine | ||
3224 | * | ||
3225 | * @rdev: radeon_device pointer | ||
3226 | * @ib: IB object to schedule | ||
3227 | * | ||
3228 | * Schedule an IB in the DMA ring (evergreen). | ||
3229 | */ | ||
3230 | void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, | ||
3231 | struct radeon_ib *ib) | ||
3232 | { | ||
3233 | struct radeon_ring *ring = &rdev->ring[ib->ring]; | ||
3234 | |||
3235 | if (rdev->wb.enabled) { | ||
3236 | u32 next_rptr = ring->wptr + 4; | ||
3237 | while ((next_rptr & 7) != 5) | ||
3238 | next_rptr++; | ||
3239 | next_rptr += 3; | ||
3240 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | ||
3241 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); | ||
3242 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); | ||
3243 | radeon_ring_write(ring, next_rptr); | ||
3244 | } | ||
3245 | |||
3246 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. | ||
3247 | * Pad as necessary with NOPs. | ||
3248 | */ | ||
3249 | while ((ring->wptr & 7) != 5) | ||
3250 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
3251 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); | ||
3252 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); | ||
3253 | radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); | ||
3254 | |||
3255 | } | ||
3256 | |||
3257 | /** | ||
3258 | * evergreen_copy_dma - copy pages using the DMA engine | ||
3259 | * | ||
3260 | * @rdev: radeon_device pointer | ||
3261 | * @src_offset: src GPU address | ||
3262 | * @dst_offset: dst GPU address | ||
3263 | * @num_gpu_pages: number of GPU pages to xfer | ||
3264 | * @fence: radeon fence object | ||
3265 | * | ||
3266 | * Copy GPU paging using the DMA engine (evergreen-cayman). | ||
3267 | * Used by the radeon ttm implementation to move pages if | ||
3268 | * registered as the asic copy callback. | ||
3269 | */ | ||
3270 | int evergreen_copy_dma(struct radeon_device *rdev, | ||
3271 | uint64_t src_offset, uint64_t dst_offset, | ||
3272 | unsigned num_gpu_pages, | ||
3273 | struct radeon_fence **fence) | ||
3274 | { | ||
3275 | struct radeon_semaphore *sem = NULL; | ||
3276 | int ring_index = rdev->asic->copy.dma_ring_index; | ||
3277 | struct radeon_ring *ring = &rdev->ring[ring_index]; | ||
3278 | u32 size_in_dw, cur_size_in_dw; | ||
3279 | int i, num_loops; | ||
3280 | int r = 0; | ||
3281 | |||
3282 | r = radeon_semaphore_create(rdev, &sem); | ||
3283 | if (r) { | ||
3284 | DRM_ERROR("radeon: moving bo (%d).\n", r); | ||
3285 | return r; | ||
3286 | } | ||
3287 | |||
3288 | size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; | ||
3289 | num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); | ||
3290 | r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); | ||
3291 | if (r) { | ||
3292 | DRM_ERROR("radeon: moving bo (%d).\n", r); | ||
3293 | radeon_semaphore_free(rdev, &sem, NULL); | ||
3294 | return r; | ||
3295 | } | ||
3296 | |||
3297 | if (radeon_fence_need_sync(*fence, ring->idx)) { | ||
3298 | radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, | ||
3299 | ring->idx); | ||
3300 | radeon_fence_note_sync(*fence, ring->idx); | ||
3301 | } else { | ||
3302 | radeon_semaphore_free(rdev, &sem, NULL); | ||
3303 | } | ||
3304 | |||
3305 | for (i = 0; i < num_loops; i++) { | ||
3306 | cur_size_in_dw = size_in_dw; | ||
3307 | if (cur_size_in_dw > 0xFFFFF) | ||
3308 | cur_size_in_dw = 0xFFFFF; | ||
3309 | size_in_dw -= cur_size_in_dw; | ||
3310 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); | ||
3311 | radeon_ring_write(ring, dst_offset & 0xfffffffc); | ||
3312 | radeon_ring_write(ring, src_offset & 0xfffffffc); | ||
3313 | radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); | ||
3314 | radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); | ||
3315 | src_offset += cur_size_in_dw * 4; | ||
3316 | dst_offset += cur_size_in_dw * 4; | ||
3317 | } | ||
3318 | |||
3319 | r = radeon_fence_emit(rdev, fence, ring->idx); | ||
3320 | if (r) { | ||
3321 | radeon_ring_unlock_undo(rdev, ring); | ||
3322 | return r; | ||
3323 | } | ||
3324 | |||
3325 | radeon_ring_unlock_commit(rdev, ring); | ||
3326 | radeon_semaphore_free(rdev, &sem, *fence); | ||
3327 | |||
3328 | return r; | ||
3329 | } | ||
3330 | |||
3157 | static int evergreen_startup(struct radeon_device *rdev) | 3331 | static int evergreen_startup(struct radeon_device *rdev) |
3158 | { | 3332 | { |
3159 | struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | 3333 | struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; |
@@ -3217,6 +3391,12 @@ static int evergreen_startup(struct radeon_device *rdev) | |||
3217 | return r; | 3391 | return r; |
3218 | } | 3392 | } |
3219 | 3393 | ||
3394 | r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); | ||
3395 | if (r) { | ||
3396 | dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); | ||
3397 | return r; | ||
3398 | } | ||
3399 | |||
3220 | /* Enable IRQ */ | 3400 | /* Enable IRQ */ |
3221 | r = r600_irq_init(rdev); | 3401 | r = r600_irq_init(rdev); |
3222 | if (r) { | 3402 | if (r) { |
@@ -3231,12 +3411,23 @@ static int evergreen_startup(struct radeon_device *rdev) | |||
3231 | 0, 0xfffff, RADEON_CP_PACKET2); | 3411 | 0, 0xfffff, RADEON_CP_PACKET2); |
3232 | if (r) | 3412 | if (r) |
3233 | return r; | 3413 | return r; |
3414 | |||
3415 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
3416 | r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, | ||
3417 | DMA_RB_RPTR, DMA_RB_WPTR, | ||
3418 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
3419 | if (r) | ||
3420 | return r; | ||
3421 | |||
3234 | r = evergreen_cp_load_microcode(rdev); | 3422 | r = evergreen_cp_load_microcode(rdev); |
3235 | if (r) | 3423 | if (r) |
3236 | return r; | 3424 | return r; |
3237 | r = evergreen_cp_resume(rdev); | 3425 | r = evergreen_cp_resume(rdev); |
3238 | if (r) | 3426 | if (r) |
3239 | return r; | 3427 | return r; |
3428 | r = r600_dma_resume(rdev); | ||
3429 | if (r) | ||
3430 | return r; | ||
3240 | 3431 | ||
3241 | r = radeon_ib_pool_init(rdev); | 3432 | r = radeon_ib_pool_init(rdev); |
3242 | if (r) { | 3433 | if (r) { |
@@ -3283,11 +3474,9 @@ int evergreen_resume(struct radeon_device *rdev) | |||
3283 | 3474 | ||
3284 | int evergreen_suspend(struct radeon_device *rdev) | 3475 | int evergreen_suspend(struct radeon_device *rdev) |
3285 | { | 3476 | { |
3286 | struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | ||
3287 | |||
3288 | r600_audio_fini(rdev); | 3477 | r600_audio_fini(rdev); |
3289 | r700_cp_stop(rdev); | 3478 | r700_cp_stop(rdev); |
3290 | ring->ready = false; | 3479 | r600_dma_stop(rdev); |
3291 | evergreen_irq_suspend(rdev); | 3480 | evergreen_irq_suspend(rdev); |
3292 | radeon_wb_disable(rdev); | 3481 | radeon_wb_disable(rdev); |
3293 | evergreen_pcie_gart_disable(rdev); | 3482 | evergreen_pcie_gart_disable(rdev); |
@@ -3364,6 +3553,9 @@ int evergreen_init(struct radeon_device *rdev) | |||
3364 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; | 3553 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; |
3365 | r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); | 3554 | r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); |
3366 | 3555 | ||
3556 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; | ||
3557 | r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); | ||
3558 | |||
3367 | rdev->ih.ring_obj = NULL; | 3559 | rdev->ih.ring_obj = NULL; |
3368 | r600_ih_ring_init(rdev, 64 * 1024); | 3560 | r600_ih_ring_init(rdev, 64 * 1024); |
3369 | 3561 | ||
@@ -3376,6 +3568,7 @@ int evergreen_init(struct radeon_device *rdev) | |||
3376 | if (r) { | 3568 | if (r) { |
3377 | dev_err(rdev->dev, "disabling GPU acceleration\n"); | 3569 | dev_err(rdev->dev, "disabling GPU acceleration\n"); |
3378 | r700_cp_fini(rdev); | 3570 | r700_cp_fini(rdev); |
3571 | r600_dma_fini(rdev); | ||
3379 | r600_irq_fini(rdev); | 3572 | r600_irq_fini(rdev); |
3380 | radeon_wb_fini(rdev); | 3573 | radeon_wb_fini(rdev); |
3381 | radeon_ib_pool_fini(rdev); | 3574 | radeon_ib_pool_fini(rdev); |
@@ -3403,6 +3596,7 @@ void evergreen_fini(struct radeon_device *rdev) | |||
3403 | r600_audio_fini(rdev); | 3596 | r600_audio_fini(rdev); |
3404 | r600_blit_fini(rdev); | 3597 | r600_blit_fini(rdev); |
3405 | r700_cp_fini(rdev); | 3598 | r700_cp_fini(rdev); |
3599 | r600_dma_fini(rdev); | ||
3406 | r600_irq_fini(rdev); | 3600 | r600_irq_fini(rdev); |
3407 | radeon_wb_fini(rdev); | 3601 | radeon_wb_fini(rdev); |
3408 | radeon_ib_pool_fini(rdev); | 3602 | radeon_ib_pool_fini(rdev); |
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index cae7ab4219ef..7b4a650e33b2 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h | |||
@@ -905,6 +905,37 @@ | |||
905 | # define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) | 905 | # define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) |
906 | # define DC_HPDx_EN (1 << 28) | 906 | # define DC_HPDx_EN (1 << 28) |
907 | 907 | ||
908 | /* ASYNC DMA */ | ||
909 | #define DMA_RB_RPTR 0xd008 | ||
910 | #define DMA_RB_WPTR 0xd00c | ||
911 | |||
912 | #define DMA_CNTL 0xd02c | ||
913 | # define TRAP_ENABLE (1 << 0) | ||
914 | # define SEM_INCOMPLETE_INT_ENABLE (1 << 1) | ||
915 | # define SEM_WAIT_INT_ENABLE (1 << 2) | ||
916 | # define DATA_SWAP_ENABLE (1 << 3) | ||
917 | # define FENCE_SWAP_ENABLE (1 << 4) | ||
918 | # define CTXEMPTY_INT_ENABLE (1 << 28) | ||
919 | #define DMA_TILING_CONFIG 0xD0B8 | ||
920 | |||
921 | #define CAYMAN_DMA1_CNTL 0xd82c | ||
922 | |||
923 | /* async DMA packets */ | ||
924 | #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ | ||
925 | (((t) & 0x1) << 23) | \ | ||
926 | (((s) & 0x1) << 22) | \ | ||
927 | (((n) & 0xFFFFF) << 0)) | ||
928 | /* async DMA Packet types */ | ||
929 | #define DMA_PACKET_WRITE 0x2 | ||
930 | #define DMA_PACKET_COPY 0x3 | ||
931 | #define DMA_PACKET_INDIRECT_BUFFER 0x4 | ||
932 | #define DMA_PACKET_SEMAPHORE 0x5 | ||
933 | #define DMA_PACKET_FENCE 0x6 | ||
934 | #define DMA_PACKET_TRAP 0x7 | ||
935 | #define DMA_PACKET_SRBM_WRITE 0x9 | ||
936 | #define DMA_PACKET_CONSTANT_FILL 0xd | ||
937 | #define DMA_PACKET_NOP 0xf | ||
938 | |||
908 | /* PCIE link stuff */ | 939 | /* PCIE link stuff */ |
909 | #define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ | 940 | #define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ |
910 | #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ | 941 | #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ |
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 30c18a6e0044..39e8be1d1e89 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c | |||
@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev) | |||
611 | WREG32(GB_ADDR_CONFIG, gb_addr_config); | 611 | WREG32(GB_ADDR_CONFIG, gb_addr_config); |
612 | WREG32(DMIF_ADDR_CONFIG, gb_addr_config); | 612 | WREG32(DMIF_ADDR_CONFIG, gb_addr_config); |
613 | WREG32(HDP_ADDR_CONFIG, gb_addr_config); | 613 | WREG32(HDP_ADDR_CONFIG, gb_addr_config); |
614 | WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config); | ||
615 | WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config); | ||
614 | 616 | ||
615 | tmp = gb_addr_config & NUM_PIPES_MASK; | 617 | tmp = gb_addr_config & NUM_PIPES_MASK; |
616 | tmp = r6xx_remap_render_backend(rdev, tmp, | 618 | tmp = r6xx_remap_render_backend(rdev, tmp, |
@@ -915,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable) | |||
915 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | 917 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
916 | WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); | 918 | WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); |
917 | WREG32(SCRATCH_UMSK, 0); | 919 | WREG32(SCRATCH_UMSK, 0); |
920 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | ||
918 | } | 921 | } |
919 | } | 922 | } |
920 | 923 | ||
@@ -1128,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev) | |||
1128 | return 0; | 1131 | return 0; |
1129 | } | 1132 | } |
1130 | 1133 | ||
1134 | /* | ||
1135 | * DMA | ||
1136 | * Starting with R600, the GPU has an asynchronous | ||
1137 | * DMA engine. The programming model is very similar | ||
1138 | * to the 3D engine (ring buffer, IBs, etc.), but the | ||
1139 | * DMA controller has it's own packet format that is | ||
1140 | * different form the PM4 format used by the 3D engine. | ||
1141 | * It supports copying data, writing embedded data, | ||
1142 | * solid fills, and a number of other things. It also | ||
1143 | * has support for tiling/detiling of buffers. | ||
1144 | * Cayman and newer support two asynchronous DMA engines. | ||
1145 | */ | ||
1146 | /** | ||
1147 | * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine | ||
1148 | * | ||
1149 | * @rdev: radeon_device pointer | ||
1150 | * @ib: IB object to schedule | ||
1151 | * | ||
1152 | * Schedule an IB in the DMA ring (cayman-SI). | ||
1153 | */ | ||
1154 | void cayman_dma_ring_ib_execute(struct radeon_device *rdev, | ||
1155 | struct radeon_ib *ib) | ||
1156 | { | ||
1157 | struct radeon_ring *ring = &rdev->ring[ib->ring]; | ||
1158 | |||
1159 | if (rdev->wb.enabled) { | ||
1160 | u32 next_rptr = ring->wptr + 4; | ||
1161 | while ((next_rptr & 7) != 5) | ||
1162 | next_rptr++; | ||
1163 | next_rptr += 3; | ||
1164 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | ||
1165 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); | ||
1166 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); | ||
1167 | radeon_ring_write(ring, next_rptr); | ||
1168 | } | ||
1169 | |||
1170 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. | ||
1171 | * Pad as necessary with NOPs. | ||
1172 | */ | ||
1173 | while ((ring->wptr & 7) != 5) | ||
1174 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
1175 | radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); | ||
1176 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); | ||
1177 | radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); | ||
1178 | |||
1179 | } | ||
1180 | |||
1181 | /** | ||
1182 | * cayman_dma_stop - stop the async dma engines | ||
1183 | * | ||
1184 | * @rdev: radeon_device pointer | ||
1185 | * | ||
1186 | * Stop the async dma engines (cayman-SI). | ||
1187 | */ | ||
1188 | void cayman_dma_stop(struct radeon_device *rdev) | ||
1189 | { | ||
1190 | u32 rb_cntl; | ||
1191 | |||
1192 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | ||
1193 | |||
1194 | /* dma0 */ | ||
1195 | rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); | ||
1196 | rb_cntl &= ~DMA_RB_ENABLE; | ||
1197 | WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); | ||
1198 | |||
1199 | /* dma1 */ | ||
1200 | rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); | ||
1201 | rb_cntl &= ~DMA_RB_ENABLE; | ||
1202 | WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); | ||
1203 | |||
1204 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; | ||
1205 | rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; | ||
1206 | } | ||
1207 | |||
1208 | /** | ||
1209 | * cayman_dma_resume - setup and start the async dma engines | ||
1210 | * | ||
1211 | * @rdev: radeon_device pointer | ||
1212 | * | ||
1213 | * Set up the DMA ring buffers and enable them. (cayman-SI). | ||
1214 | * Returns 0 for success, error for failure. | ||
1215 | */ | ||
1216 | int cayman_dma_resume(struct radeon_device *rdev) | ||
1217 | { | ||
1218 | struct radeon_ring *ring; | ||
1219 | u32 rb_cntl, dma_cntl; | ||
1220 | u32 rb_bufsz; | ||
1221 | u32 reg_offset, wb_offset; | ||
1222 | int i, r; | ||
1223 | |||
1224 | /* Reset dma */ | ||
1225 | WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); | ||
1226 | RREG32(SRBM_SOFT_RESET); | ||
1227 | udelay(50); | ||
1228 | WREG32(SRBM_SOFT_RESET, 0); | ||
1229 | |||
1230 | for (i = 0; i < 2; i++) { | ||
1231 | if (i == 0) { | ||
1232 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
1233 | reg_offset = DMA0_REGISTER_OFFSET; | ||
1234 | wb_offset = R600_WB_DMA_RPTR_OFFSET; | ||
1235 | } else { | ||
1236 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; | ||
1237 | reg_offset = DMA1_REGISTER_OFFSET; | ||
1238 | wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; | ||
1239 | } | ||
1240 | |||
1241 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); | ||
1242 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); | ||
1243 | |||
1244 | /* Set ring buffer size in dwords */ | ||
1245 | rb_bufsz = drm_order(ring->ring_size / 4); | ||
1246 | rb_cntl = rb_bufsz << 1; | ||
1247 | #ifdef __BIG_ENDIAN | ||
1248 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; | ||
1249 | #endif | ||
1250 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); | ||
1251 | |||
1252 | /* Initialize the ring buffer's read and write pointers */ | ||
1253 | WREG32(DMA_RB_RPTR + reg_offset, 0); | ||
1254 | WREG32(DMA_RB_WPTR + reg_offset, 0); | ||
1255 | |||
1256 | /* set the wb address whether it's enabled or not */ | ||
1257 | WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, | ||
1258 | upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); | ||
1259 | WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, | ||
1260 | ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); | ||
1261 | |||
1262 | if (rdev->wb.enabled) | ||
1263 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; | ||
1264 | |||
1265 | WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); | ||
1266 | |||
1267 | /* enable DMA IBs */ | ||
1268 | WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE); | ||
1269 | |||
1270 | dma_cntl = RREG32(DMA_CNTL + reg_offset); | ||
1271 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; | ||
1272 | WREG32(DMA_CNTL + reg_offset, dma_cntl); | ||
1273 | |||
1274 | ring->wptr = 0; | ||
1275 | WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); | ||
1276 | |||
1277 | ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; | ||
1278 | |||
1279 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); | ||
1280 | |||
1281 | ring->ready = true; | ||
1282 | |||
1283 | r = radeon_ring_test(rdev, ring->idx, ring); | ||
1284 | if (r) { | ||
1285 | ring->ready = false; | ||
1286 | return r; | ||
1287 | } | ||
1288 | } | ||
1289 | |||
1290 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); | ||
1291 | |||
1292 | return 0; | ||
1293 | } | ||
1294 | |||
1295 | /** | ||
1296 | * cayman_dma_fini - tear down the async dma engines | ||
1297 | * | ||
1298 | * @rdev: radeon_device pointer | ||
1299 | * | ||
1300 | * Stop the async dma engines and free the rings (cayman-SI). | ||
1301 | */ | ||
1302 | void cayman_dma_fini(struct radeon_device *rdev) | ||
1303 | { | ||
1304 | cayman_dma_stop(rdev); | ||
1305 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); | ||
1306 | radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); | ||
1307 | } | ||
1308 | |||
1131 | static int cayman_gpu_soft_reset(struct radeon_device *rdev) | 1309 | static int cayman_gpu_soft_reset(struct radeon_device *rdev) |
1132 | { | 1310 | { |
1133 | struct evergreen_mc_save save; | 1311 | struct evergreen_mc_save save; |
@@ -1218,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev) | |||
1218 | return cayman_gpu_soft_reset(rdev); | 1396 | return cayman_gpu_soft_reset(rdev); |
1219 | } | 1397 | } |
1220 | 1398 | ||
1399 | /** | ||
1400 | * cayman_dma_is_lockup - Check if the DMA engine is locked up | ||
1401 | * | ||
1402 | * @rdev: radeon_device pointer | ||
1403 | * @ring: radeon_ring structure holding ring information | ||
1404 | * | ||
1405 | * Check if the async DMA engine is locked up (cayman-SI). | ||
1406 | * Returns true if the engine appears to be locked up, false if not. | ||
1407 | */ | ||
1408 | bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | ||
1409 | { | ||
1410 | u32 dma_status_reg; | ||
1411 | |||
1412 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | ||
1413 | dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET); | ||
1414 | else | ||
1415 | dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET); | ||
1416 | if (dma_status_reg & DMA_IDLE) { | ||
1417 | radeon_ring_lockup_update(ring); | ||
1418 | return false; | ||
1419 | } | ||
1420 | /* force ring activities */ | ||
1421 | radeon_ring_force_activity(rdev, ring); | ||
1422 | return radeon_ring_test_lockup(rdev, ring); | ||
1423 | } | ||
1424 | |||
1221 | static int cayman_startup(struct radeon_device *rdev) | 1425 | static int cayman_startup(struct radeon_device *rdev) |
1222 | { | 1426 | { |
1223 | struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | 1427 | struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; |
@@ -1299,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev) | |||
1299 | return r; | 1503 | return r; |
1300 | } | 1504 | } |
1301 | 1505 | ||
1506 | r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); | ||
1507 | if (r) { | ||
1508 | dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); | ||
1509 | return r; | ||
1510 | } | ||
1511 | |||
1512 | r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); | ||
1513 | if (r) { | ||
1514 | dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); | ||
1515 | return r; | ||
1516 | } | ||
1517 | |||
1302 | /* Enable IRQ */ | 1518 | /* Enable IRQ */ |
1303 | r = r600_irq_init(rdev); | 1519 | r = r600_irq_init(rdev); |
1304 | if (r) { | 1520 | if (r) { |
@@ -1313,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev) | |||
1313 | 0, 0xfffff, RADEON_CP_PACKET2); | 1529 | 0, 0xfffff, RADEON_CP_PACKET2); |
1314 | if (r) | 1530 | if (r) |
1315 | return r; | 1531 | return r; |
1532 | |||
1533 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
1534 | r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, | ||
1535 | DMA_RB_RPTR + DMA0_REGISTER_OFFSET, | ||
1536 | DMA_RB_WPTR + DMA0_REGISTER_OFFSET, | ||
1537 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
1538 | if (r) | ||
1539 | return r; | ||
1540 | |||
1541 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; | ||
1542 | r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, | ||
1543 | DMA_RB_RPTR + DMA1_REGISTER_OFFSET, | ||
1544 | DMA_RB_WPTR + DMA1_REGISTER_OFFSET, | ||
1545 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
1546 | if (r) | ||
1547 | return r; | ||
1548 | |||
1316 | r = cayman_cp_load_microcode(rdev); | 1549 | r = cayman_cp_load_microcode(rdev); |
1317 | if (r) | 1550 | if (r) |
1318 | return r; | 1551 | return r; |
@@ -1320,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev) | |||
1320 | if (r) | 1553 | if (r) |
1321 | return r; | 1554 | return r; |
1322 | 1555 | ||
1556 | r = cayman_dma_resume(rdev); | ||
1557 | if (r) | ||
1558 | return r; | ||
1559 | |||
1323 | r = radeon_ib_pool_init(rdev); | 1560 | r = radeon_ib_pool_init(rdev); |
1324 | if (r) { | 1561 | if (r) { |
1325 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); | 1562 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); |
@@ -1364,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev) | |||
1364 | { | 1601 | { |
1365 | r600_audio_fini(rdev); | 1602 | r600_audio_fini(rdev); |
1366 | cayman_cp_enable(rdev, false); | 1603 | cayman_cp_enable(rdev, false); |
1367 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | 1604 | cayman_dma_stop(rdev); |
1368 | evergreen_irq_suspend(rdev); | 1605 | evergreen_irq_suspend(rdev); |
1369 | radeon_wb_disable(rdev); | 1606 | radeon_wb_disable(rdev); |
1370 | cayman_pcie_gart_disable(rdev); | 1607 | cayman_pcie_gart_disable(rdev); |
@@ -1431,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev) | |||
1431 | ring->ring_obj = NULL; | 1668 | ring->ring_obj = NULL; |
1432 | r600_ring_init(rdev, ring, 1024 * 1024); | 1669 | r600_ring_init(rdev, ring, 1024 * 1024); |
1433 | 1670 | ||
1671 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
1672 | ring->ring_obj = NULL; | ||
1673 | r600_ring_init(rdev, ring, 64 * 1024); | ||
1674 | |||
1675 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; | ||
1676 | ring->ring_obj = NULL; | ||
1677 | r600_ring_init(rdev, ring, 64 * 1024); | ||
1678 | |||
1434 | rdev->ih.ring_obj = NULL; | 1679 | rdev->ih.ring_obj = NULL; |
1435 | r600_ih_ring_init(rdev, 64 * 1024); | 1680 | r600_ih_ring_init(rdev, 64 * 1024); |
1436 | 1681 | ||
@@ -1443,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev) | |||
1443 | if (r) { | 1688 | if (r) { |
1444 | dev_err(rdev->dev, "disabling GPU acceleration\n"); | 1689 | dev_err(rdev->dev, "disabling GPU acceleration\n"); |
1445 | cayman_cp_fini(rdev); | 1690 | cayman_cp_fini(rdev); |
1691 | cayman_dma_fini(rdev); | ||
1446 | r600_irq_fini(rdev); | 1692 | r600_irq_fini(rdev); |
1447 | if (rdev->flags & RADEON_IS_IGP) | 1693 | if (rdev->flags & RADEON_IS_IGP) |
1448 | si_rlc_fini(rdev); | 1694 | si_rlc_fini(rdev); |
@@ -1473,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev) | |||
1473 | { | 1719 | { |
1474 | r600_blit_fini(rdev); | 1720 | r600_blit_fini(rdev); |
1475 | cayman_cp_fini(rdev); | 1721 | cayman_cp_fini(rdev); |
1722 | cayman_dma_fini(rdev); | ||
1476 | r600_irq_fini(rdev); | 1723 | r600_irq_fini(rdev); |
1477 | if (rdev->flags & RADEON_IS_IGP) | 1724 | if (rdev->flags & RADEON_IS_IGP) |
1478 | si_rlc_fini(rdev); | 1725 | si_rlc_fini(rdev); |
@@ -1548,30 +1795,57 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, | |||
1548 | { | 1795 | { |
1549 | struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; | 1796 | struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; |
1550 | uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); | 1797 | uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); |
1551 | 1798 | uint64_t value; | |
1552 | while (count) { | 1799 | unsigned ndw; |
1553 | unsigned ndw = 1 + count * 2; | 1800 | |
1554 | if (ndw > 0x3FFF) | 1801 | if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { |
1555 | ndw = 0x3FFF; | 1802 | while (count) { |
1556 | 1803 | ndw = 1 + count * 2; | |
1557 | radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); | 1804 | if (ndw > 0x3FFF) |
1558 | radeon_ring_write(ring, pe); | 1805 | ndw = 0x3FFF; |
1559 | radeon_ring_write(ring, upper_32_bits(pe) & 0xff); | 1806 | |
1560 | for (; ndw > 1; ndw -= 2, --count, pe += 8) { | 1807 | radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); |
1561 | uint64_t value = 0; | 1808 | radeon_ring_write(ring, pe); |
1562 | if (flags & RADEON_VM_PAGE_SYSTEM) { | 1809 | radeon_ring_write(ring, upper_32_bits(pe) & 0xff); |
1563 | value = radeon_vm_map_gart(rdev, addr); | 1810 | for (; ndw > 1; ndw -= 2, --count, pe += 8) { |
1564 | value &= 0xFFFFFFFFFFFFF000ULL; | 1811 | if (flags & RADEON_VM_PAGE_SYSTEM) { |
1812 | value = radeon_vm_map_gart(rdev, addr); | ||
1813 | value &= 0xFFFFFFFFFFFFF000ULL; | ||
1814 | } else if (flags & RADEON_VM_PAGE_VALID) { | ||
1815 | value = addr; | ||
1816 | } else { | ||
1817 | value = 0; | ||
1818 | } | ||
1565 | addr += incr; | 1819 | addr += incr; |
1566 | 1820 | value |= r600_flags; | |
1567 | } else if (flags & RADEON_VM_PAGE_VALID) { | 1821 | radeon_ring_write(ring, value); |
1568 | value = addr; | 1822 | radeon_ring_write(ring, upper_32_bits(value)); |
1823 | } | ||
1824 | } | ||
1825 | } else { | ||
1826 | while (count) { | ||
1827 | ndw = count * 2; | ||
1828 | if (ndw > 0xFFFFE) | ||
1829 | ndw = 0xFFFFE; | ||
1830 | |||
1831 | /* for non-physically contiguous pages (system) */ | ||
1832 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw)); | ||
1833 | radeon_ring_write(ring, pe); | ||
1834 | radeon_ring_write(ring, upper_32_bits(pe) & 0xff); | ||
1835 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | ||
1836 | if (flags & RADEON_VM_PAGE_SYSTEM) { | ||
1837 | value = radeon_vm_map_gart(rdev, addr); | ||
1838 | value &= 0xFFFFFFFFFFFFF000ULL; | ||
1839 | } else if (flags & RADEON_VM_PAGE_VALID) { | ||
1840 | value = addr; | ||
1841 | } else { | ||
1842 | value = 0; | ||
1843 | } | ||
1569 | addr += incr; | 1844 | addr += incr; |
1845 | value |= r600_flags; | ||
1846 | radeon_ring_write(ring, value); | ||
1847 | radeon_ring_write(ring, upper_32_bits(value)); | ||
1570 | } | 1848 | } |
1571 | |||
1572 | value |= r600_flags; | ||
1573 | radeon_ring_write(ring, value); | ||
1574 | radeon_ring_write(ring, upper_32_bits(value)); | ||
1575 | } | 1849 | } |
1576 | } | 1850 | } |
1577 | } | 1851 | } |
@@ -1606,3 +1880,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) | |||
1606 | radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); | 1880 | radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); |
1607 | radeon_ring_write(ring, 0x0); | 1881 | radeon_ring_write(ring, 0x0); |
1608 | } | 1882 | } |
1883 | |||
1884 | void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) | ||
1885 | { | ||
1886 | struct radeon_ring *ring = &rdev->ring[ridx]; | ||
1887 | |||
1888 | if (vm == NULL) | ||
1889 | return; | ||
1890 | |||
1891 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | ||
1892 | radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); | ||
1893 | radeon_ring_write(ring, vm->pd_gpu_addr >> 12); | ||
1894 | |||
1895 | /* flush hdp cache */ | ||
1896 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | ||
1897 | radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); | ||
1898 | radeon_ring_write(ring, 1); | ||
1899 | |||
1900 | /* bits 0-7 are the VM contexts0-7 */ | ||
1901 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | ||
1902 | radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); | ||
1903 | radeon_ring_write(ring, 1 << vm->id); | ||
1904 | } | ||
1905 | |||
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index f5e54a7e2bf2..b93186b8ee4b 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h | |||
@@ -50,6 +50,24 @@ | |||
50 | #define VMID(x) (((x) & 0x7) << 0) | 50 | #define VMID(x) (((x) & 0x7) << 0) |
51 | #define SRBM_STATUS 0x0E50 | 51 | #define SRBM_STATUS 0x0E50 |
52 | 52 | ||
53 | #define SRBM_SOFT_RESET 0x0E60 | ||
54 | #define SOFT_RESET_BIF (1 << 1) | ||
55 | #define SOFT_RESET_CG (1 << 2) | ||
56 | #define SOFT_RESET_DC (1 << 5) | ||
57 | #define SOFT_RESET_DMA1 (1 << 6) | ||
58 | #define SOFT_RESET_GRBM (1 << 8) | ||
59 | #define SOFT_RESET_HDP (1 << 9) | ||
60 | #define SOFT_RESET_IH (1 << 10) | ||
61 | #define SOFT_RESET_MC (1 << 11) | ||
62 | #define SOFT_RESET_RLC (1 << 13) | ||
63 | #define SOFT_RESET_ROM (1 << 14) | ||
64 | #define SOFT_RESET_SEM (1 << 15) | ||
65 | #define SOFT_RESET_VMC (1 << 17) | ||
66 | #define SOFT_RESET_DMA (1 << 20) | ||
67 | #define SOFT_RESET_TST (1 << 21) | ||
68 | #define SOFT_RESET_REGBB (1 << 22) | ||
69 | #define SOFT_RESET_ORB (1 << 23) | ||
70 | |||
53 | #define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 | 71 | #define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 |
54 | #define REQUEST_TYPE(x) (((x) & 0xf) << 0) | 72 | #define REQUEST_TYPE(x) (((x) & 0xf) << 0) |
55 | #define RESPONSE_TYPE_MASK 0x000000F0 | 73 | #define RESPONSE_TYPE_MASK 0x000000F0 |
@@ -599,5 +617,62 @@ | |||
599 | #define PACKET3_SET_APPEND_CNT 0x75 | 617 | #define PACKET3_SET_APPEND_CNT 0x75 |
600 | #define PACKET3_ME_WRITE 0x7A | 618 | #define PACKET3_ME_WRITE 0x7A |
601 | 619 | ||
620 | /* ASYNC DMA - first instance at 0xd000, second at 0xd800 */ | ||
621 | #define DMA0_REGISTER_OFFSET 0x0 /* not a register */ | ||
622 | #define DMA1_REGISTER_OFFSET 0x800 /* not a register */ | ||
623 | |||
624 | #define DMA_RB_CNTL 0xd000 | ||
625 | # define DMA_RB_ENABLE (1 << 0) | ||
626 | # define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ | ||
627 | # define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ | ||
628 | # define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) | ||
629 | # define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ | ||
630 | # define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ | ||
631 | #define DMA_RB_BASE 0xd004 | ||
632 | #define DMA_RB_RPTR 0xd008 | ||
633 | #define DMA_RB_WPTR 0xd00c | ||
634 | |||
635 | #define DMA_RB_RPTR_ADDR_HI 0xd01c | ||
636 | #define DMA_RB_RPTR_ADDR_LO 0xd020 | ||
637 | |||
638 | #define DMA_IB_CNTL 0xd024 | ||
639 | # define DMA_IB_ENABLE (1 << 0) | ||
640 | # define DMA_IB_SWAP_ENABLE (1 << 4) | ||
641 | # define CMD_VMID_FORCE (1 << 31) | ||
642 | #define DMA_IB_RPTR 0xd028 | ||
643 | #define DMA_CNTL 0xd02c | ||
644 | # define TRAP_ENABLE (1 << 0) | ||
645 | # define SEM_INCOMPLETE_INT_ENABLE (1 << 1) | ||
646 | # define SEM_WAIT_INT_ENABLE (1 << 2) | ||
647 | # define DATA_SWAP_ENABLE (1 << 3) | ||
648 | # define FENCE_SWAP_ENABLE (1 << 4) | ||
649 | # define CTXEMPTY_INT_ENABLE (1 << 28) | ||
650 | #define DMA_STATUS_REG 0xd034 | ||
651 | # define DMA_IDLE (1 << 0) | ||
652 | #define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044 | ||
653 | #define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048 | ||
654 | #define DMA_TILING_CONFIG 0xd0b8 | ||
655 | #define DMA_MODE 0xd0bc | ||
656 | |||
657 | #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ | ||
658 | (((t) & 0x1) << 23) | \ | ||
659 | (((s) & 0x1) << 22) | \ | ||
660 | (((n) & 0xFFFFF) << 0)) | ||
661 | |||
662 | #define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \ | ||
663 | (((vmid) & 0xF) << 20) | \ | ||
664 | (((n) & 0xFFFFF) << 0)) | ||
665 | |||
666 | /* async DMA Packet types */ | ||
667 | #define DMA_PACKET_WRITE 0x2 | ||
668 | #define DMA_PACKET_COPY 0x3 | ||
669 | #define DMA_PACKET_INDIRECT_BUFFER 0x4 | ||
670 | #define DMA_PACKET_SEMAPHORE 0x5 | ||
671 | #define DMA_PACKET_FENCE 0x6 | ||
672 | #define DMA_PACKET_TRAP 0x7 | ||
673 | #define DMA_PACKET_SRBM_WRITE 0x9 | ||
674 | #define DMA_PACKET_CONSTANT_FILL 0xd | ||
675 | #define DMA_PACKET_NOP 0xf | ||
676 | |||
602 | #endif | 677 | #endif |
603 | 678 | ||
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 169ecc9628ea..a76eca18f134 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c | |||
@@ -1370,6 +1370,29 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |||
1370 | return radeon_ring_test_lockup(rdev, ring); | 1370 | return radeon_ring_test_lockup(rdev, ring); |
1371 | } | 1371 | } |
1372 | 1372 | ||
1373 | /** | ||
1374 | * r600_dma_is_lockup - Check if the DMA engine is locked up | ||
1375 | * | ||
1376 | * @rdev: radeon_device pointer | ||
1377 | * @ring: radeon_ring structure holding ring information | ||
1378 | * | ||
1379 | * Check if the async DMA engine is locked up (r6xx-evergreen). | ||
1380 | * Returns true if the engine appears to be locked up, false if not. | ||
1381 | */ | ||
1382 | bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | ||
1383 | { | ||
1384 | u32 dma_status_reg; | ||
1385 | |||
1386 | dma_status_reg = RREG32(DMA_STATUS_REG); | ||
1387 | if (dma_status_reg & DMA_IDLE) { | ||
1388 | radeon_ring_lockup_update(ring); | ||
1389 | return false; | ||
1390 | } | ||
1391 | /* force ring activities */ | ||
1392 | radeon_ring_force_activity(rdev, ring); | ||
1393 | return radeon_ring_test_lockup(rdev, ring); | ||
1394 | } | ||
1395 | |||
1373 | int r600_asic_reset(struct radeon_device *rdev) | 1396 | int r600_asic_reset(struct radeon_device *rdev) |
1374 | { | 1397 | { |
1375 | return r600_gpu_soft_reset(rdev); | 1398 | return r600_gpu_soft_reset(rdev); |
@@ -1588,6 +1611,7 @@ static void r600_gpu_init(struct radeon_device *rdev) | |||
1588 | WREG32(GB_TILING_CONFIG, tiling_config); | 1611 | WREG32(GB_TILING_CONFIG, tiling_config); |
1589 | WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); | 1612 | WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); |
1590 | WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); | 1613 | WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); |
1614 | WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff); | ||
1591 | 1615 | ||
1592 | tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); | 1616 | tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); |
1593 | WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); | 1617 | WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); |
@@ -1865,6 +1889,7 @@ void r600_cp_stop(struct radeon_device *rdev) | |||
1865 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | 1889 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
1866 | WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); | 1890 | WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); |
1867 | WREG32(SCRATCH_UMSK, 0); | 1891 | WREG32(SCRATCH_UMSK, 0); |
1892 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | ||
1868 | } | 1893 | } |
1869 | 1894 | ||
1870 | int r600_init_microcode(struct radeon_device *rdev) | 1895 | int r600_init_microcode(struct radeon_device *rdev) |
@@ -2190,6 +2215,128 @@ void r600_cp_fini(struct radeon_device *rdev) | |||
2190 | radeon_scratch_free(rdev, ring->rptr_save_reg); | 2215 | radeon_scratch_free(rdev, ring->rptr_save_reg); |
2191 | } | 2216 | } |
2192 | 2217 | ||
2218 | /* | ||
2219 | * DMA | ||
2220 | * Starting with R600, the GPU has an asynchronous | ||
2221 | * DMA engine. The programming model is very similar | ||
2222 | * to the 3D engine (ring buffer, IBs, etc.), but the | ||
2223 | * DMA controller has it's own packet format that is | ||
2224 | * different form the PM4 format used by the 3D engine. | ||
2225 | * It supports copying data, writing embedded data, | ||
2226 | * solid fills, and a number of other things. It also | ||
2227 | * has support for tiling/detiling of buffers. | ||
2228 | */ | ||
2229 | /** | ||
2230 | * r600_dma_stop - stop the async dma engine | ||
2231 | * | ||
2232 | * @rdev: radeon_device pointer | ||
2233 | * | ||
2234 | * Stop the async dma engine (r6xx-evergreen). | ||
2235 | */ | ||
2236 | void r600_dma_stop(struct radeon_device *rdev) | ||
2237 | { | ||
2238 | u32 rb_cntl = RREG32(DMA_RB_CNTL); | ||
2239 | |||
2240 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | ||
2241 | |||
2242 | rb_cntl &= ~DMA_RB_ENABLE; | ||
2243 | WREG32(DMA_RB_CNTL, rb_cntl); | ||
2244 | |||
2245 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; | ||
2246 | } | ||
2247 | |||
2248 | /** | ||
2249 | * r600_dma_resume - setup and start the async dma engine | ||
2250 | * | ||
2251 | * @rdev: radeon_device pointer | ||
2252 | * | ||
2253 | * Set up the DMA ring buffer and enable it. (r6xx-evergreen). | ||
2254 | * Returns 0 for success, error for failure. | ||
2255 | */ | ||
2256 | int r600_dma_resume(struct radeon_device *rdev) | ||
2257 | { | ||
2258 | struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
2259 | u32 rb_cntl, dma_cntl; | ||
2260 | u32 rb_bufsz; | ||
2261 | int r; | ||
2262 | |||
2263 | /* Reset dma */ | ||
2264 | if (rdev->family >= CHIP_RV770) | ||
2265 | WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); | ||
2266 | else | ||
2267 | WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); | ||
2268 | RREG32(SRBM_SOFT_RESET); | ||
2269 | udelay(50); | ||
2270 | WREG32(SRBM_SOFT_RESET, 0); | ||
2271 | |||
2272 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); | ||
2273 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); | ||
2274 | |||
2275 | /* Set ring buffer size in dwords */ | ||
2276 | rb_bufsz = drm_order(ring->ring_size / 4); | ||
2277 | rb_cntl = rb_bufsz << 1; | ||
2278 | #ifdef __BIG_ENDIAN | ||
2279 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; | ||
2280 | #endif | ||
2281 | WREG32(DMA_RB_CNTL, rb_cntl); | ||
2282 | |||
2283 | /* Initialize the ring buffer's read and write pointers */ | ||
2284 | WREG32(DMA_RB_RPTR, 0); | ||
2285 | WREG32(DMA_RB_WPTR, 0); | ||
2286 | |||
2287 | /* set the wb address whether it's enabled or not */ | ||
2288 | WREG32(DMA_RB_RPTR_ADDR_HI, | ||
2289 | upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); | ||
2290 | WREG32(DMA_RB_RPTR_ADDR_LO, | ||
2291 | ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); | ||
2292 | |||
2293 | if (rdev->wb.enabled) | ||
2294 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; | ||
2295 | |||
2296 | WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); | ||
2297 | |||
2298 | /* enable DMA IBs */ | ||
2299 | WREG32(DMA_IB_CNTL, DMA_IB_ENABLE); | ||
2300 | |||
2301 | dma_cntl = RREG32(DMA_CNTL); | ||
2302 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; | ||
2303 | WREG32(DMA_CNTL, dma_cntl); | ||
2304 | |||
2305 | if (rdev->family >= CHIP_RV770) | ||
2306 | WREG32(DMA_MODE, 1); | ||
2307 | |||
2308 | ring->wptr = 0; | ||
2309 | WREG32(DMA_RB_WPTR, ring->wptr << 2); | ||
2310 | |||
2311 | ring->rptr = RREG32(DMA_RB_RPTR) >> 2; | ||
2312 | |||
2313 | WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); | ||
2314 | |||
2315 | ring->ready = true; | ||
2316 | |||
2317 | r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); | ||
2318 | if (r) { | ||
2319 | ring->ready = false; | ||
2320 | return r; | ||
2321 | } | ||
2322 | |||
2323 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); | ||
2324 | |||
2325 | return 0; | ||
2326 | } | ||
2327 | |||
2328 | /** | ||
2329 | * r600_dma_fini - tear down the async dma engine | ||
2330 | * | ||
2331 | * @rdev: radeon_device pointer | ||
2332 | * | ||
2333 | * Stop the async dma engine and free the ring (r6xx-evergreen). | ||
2334 | */ | ||
2335 | void r600_dma_fini(struct radeon_device *rdev) | ||
2336 | { | ||
2337 | r600_dma_stop(rdev); | ||
2338 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); | ||
2339 | } | ||
2193 | 2340 | ||
2194 | /* | 2341 | /* |
2195 | * GPU scratch registers helpers function. | 2342 | * GPU scratch registers helpers function. |
@@ -2246,6 +2393,64 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
2246 | return r; | 2393 | return r; |
2247 | } | 2394 | } |
2248 | 2395 | ||
2396 | /** | ||
2397 | * r600_dma_ring_test - simple async dma engine test | ||
2398 | * | ||
2399 | * @rdev: radeon_device pointer | ||
2400 | * @ring: radeon_ring structure holding ring information | ||
2401 | * | ||
2402 | * Test the DMA engine by writing using it to write an | ||
2403 | * value to memory. (r6xx-SI). | ||
2404 | * Returns 0 for success, error for failure. | ||
2405 | */ | ||
2406 | int r600_dma_ring_test(struct radeon_device *rdev, | ||
2407 | struct radeon_ring *ring) | ||
2408 | { | ||
2409 | unsigned i; | ||
2410 | int r; | ||
2411 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; | ||
2412 | u32 tmp; | ||
2413 | |||
2414 | if (!ptr) { | ||
2415 | DRM_ERROR("invalid vram scratch pointer\n"); | ||
2416 | return -EINVAL; | ||
2417 | } | ||
2418 | |||
2419 | tmp = 0xCAFEDEAD; | ||
2420 | writel(tmp, ptr); | ||
2421 | |||
2422 | r = radeon_ring_lock(rdev, ring, 4); | ||
2423 | if (r) { | ||
2424 | DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); | ||
2425 | return r; | ||
2426 | } | ||
2427 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | ||
2428 | radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); | ||
2429 | radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); | ||
2430 | radeon_ring_write(ring, 0xDEADBEEF); | ||
2431 | radeon_ring_unlock_commit(rdev, ring); | ||
2432 | |||
2433 | for (i = 0; i < rdev->usec_timeout; i++) { | ||
2434 | tmp = readl(ptr); | ||
2435 | if (tmp == 0xDEADBEEF) | ||
2436 | break; | ||
2437 | DRM_UDELAY(1); | ||
2438 | } | ||
2439 | |||
2440 | if (i < rdev->usec_timeout) { | ||
2441 | DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); | ||
2442 | } else { | ||
2443 | DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", | ||
2444 | ring->idx, tmp); | ||
2445 | r = -EINVAL; | ||
2446 | } | ||
2447 | return r; | ||
2448 | } | ||
2449 | |||
2450 | /* | ||
2451 | * CP fences/semaphores | ||
2452 | */ | ||
2453 | |||
2249 | void r600_fence_ring_emit(struct radeon_device *rdev, | 2454 | void r600_fence_ring_emit(struct radeon_device *rdev, |
2250 | struct radeon_fence *fence) | 2455 | struct radeon_fence *fence) |
2251 | { | 2456 | { |
@@ -2309,6 +2514,58 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, | |||
2309 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); | 2514 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); |
2310 | } | 2515 | } |
2311 | 2516 | ||
2517 | /* | ||
2518 | * DMA fences/semaphores | ||
2519 | */ | ||
2520 | |||
2521 | /** | ||
2522 | * r600_dma_fence_ring_emit - emit a fence on the DMA ring | ||
2523 | * | ||
2524 | * @rdev: radeon_device pointer | ||
2525 | * @fence: radeon fence object | ||
2526 | * | ||
2527 | * Add a DMA fence packet to the ring to write | ||
2528 | * the fence seq number and DMA trap packet to generate | ||
2529 | * an interrupt if needed (r6xx-r7xx). | ||
2530 | */ | ||
2531 | void r600_dma_fence_ring_emit(struct radeon_device *rdev, | ||
2532 | struct radeon_fence *fence) | ||
2533 | { | ||
2534 | struct radeon_ring *ring = &rdev->ring[fence->ring]; | ||
2535 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; | ||
2536 | /* write the fence */ | ||
2537 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); | ||
2538 | radeon_ring_write(ring, addr & 0xfffffffc); | ||
2539 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); | ||
2540 | radeon_ring_write(ring, fence->seq); | ||
2541 | /* generate an interrupt */ | ||
2542 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); | ||
2543 | } | ||
2544 | |||
2545 | /** | ||
2546 | * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring | ||
2547 | * | ||
2548 | * @rdev: radeon_device pointer | ||
2549 | * @ring: radeon_ring structure holding ring information | ||
2550 | * @semaphore: radeon semaphore object | ||
2551 | * @emit_wait: wait or signal semaphore | ||
2552 | * | ||
2553 | * Add a DMA semaphore packet to the ring wait on or signal | ||
2554 | * other rings (r6xx-SI). | ||
2555 | */ | ||
2556 | void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, | ||
2557 | struct radeon_ring *ring, | ||
2558 | struct radeon_semaphore *semaphore, | ||
2559 | bool emit_wait) | ||
2560 | { | ||
2561 | u64 addr = semaphore->gpu_addr; | ||
2562 | u32 s = emit_wait ? 0 : 1; | ||
2563 | |||
2564 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); | ||
2565 | radeon_ring_write(ring, addr & 0xfffffffc); | ||
2566 | radeon_ring_write(ring, upper_32_bits(addr) & 0xff); | ||
2567 | } | ||
2568 | |||
2312 | int r600_copy_blit(struct radeon_device *rdev, | 2569 | int r600_copy_blit(struct radeon_device *rdev, |
2313 | uint64_t src_offset, | 2570 | uint64_t src_offset, |
2314 | uint64_t dst_offset, | 2571 | uint64_t dst_offset, |
@@ -2328,6 +2585,80 @@ int r600_copy_blit(struct radeon_device *rdev, | |||
2328 | return 0; | 2585 | return 0; |
2329 | } | 2586 | } |
2330 | 2587 | ||
2588 | /** | ||
2589 | * r600_copy_dma - copy pages using the DMA engine | ||
2590 | * | ||
2591 | * @rdev: radeon_device pointer | ||
2592 | * @src_offset: src GPU address | ||
2593 | * @dst_offset: dst GPU address | ||
2594 | * @num_gpu_pages: number of GPU pages to xfer | ||
2595 | * @fence: radeon fence object | ||
2596 | * | ||
2597 | * Copy GPU paging using the DMA engine (r6xx-r7xx). | ||
2598 | * Used by the radeon ttm implementation to move pages if | ||
2599 | * registered as the asic copy callback. | ||
2600 | */ | ||
2601 | int r600_copy_dma(struct radeon_device *rdev, | ||
2602 | uint64_t src_offset, uint64_t dst_offset, | ||
2603 | unsigned num_gpu_pages, | ||
2604 | struct radeon_fence **fence) | ||
2605 | { | ||
2606 | struct radeon_semaphore *sem = NULL; | ||
2607 | int ring_index = rdev->asic->copy.dma_ring_index; | ||
2608 | struct radeon_ring *ring = &rdev->ring[ring_index]; | ||
2609 | u32 size_in_dw, cur_size_in_dw; | ||
2610 | int i, num_loops; | ||
2611 | int r = 0; | ||
2612 | |||
2613 | r = radeon_semaphore_create(rdev, &sem); | ||
2614 | if (r) { | ||
2615 | DRM_ERROR("radeon: moving bo (%d).\n", r); | ||
2616 | return r; | ||
2617 | } | ||
2618 | |||
2619 | size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; | ||
2620 | num_loops = DIV_ROUND_UP(size_in_dw, 0xffff); | ||
2621 | r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); | ||
2622 | if (r) { | ||
2623 | DRM_ERROR("radeon: moving bo (%d).\n", r); | ||
2624 | radeon_semaphore_free(rdev, &sem, NULL); | ||
2625 | return r; | ||
2626 | } | ||
2627 | |||
2628 | if (radeon_fence_need_sync(*fence, ring->idx)) { | ||
2629 | radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, | ||
2630 | ring->idx); | ||
2631 | radeon_fence_note_sync(*fence, ring->idx); | ||
2632 | } else { | ||
2633 | radeon_semaphore_free(rdev, &sem, NULL); | ||
2634 | } | ||
2635 | |||
2636 | for (i = 0; i < num_loops; i++) { | ||
2637 | cur_size_in_dw = size_in_dw; | ||
2638 | if (cur_size_in_dw > 0xFFFF) | ||
2639 | cur_size_in_dw = 0xFFFF; | ||
2640 | size_in_dw -= cur_size_in_dw; | ||
2641 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); | ||
2642 | radeon_ring_write(ring, dst_offset & 0xfffffffc); | ||
2643 | radeon_ring_write(ring, src_offset & 0xfffffffc); | ||
2644 | radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); | ||
2645 | radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); | ||
2646 | src_offset += cur_size_in_dw * 4; | ||
2647 | dst_offset += cur_size_in_dw * 4; | ||
2648 | } | ||
2649 | |||
2650 | r = radeon_fence_emit(rdev, fence, ring->idx); | ||
2651 | if (r) { | ||
2652 | radeon_ring_unlock_undo(rdev, ring); | ||
2653 | return r; | ||
2654 | } | ||
2655 | |||
2656 | radeon_ring_unlock_commit(rdev, ring); | ||
2657 | radeon_semaphore_free(rdev, &sem, *fence); | ||
2658 | |||
2659 | return r; | ||
2660 | } | ||
2661 | |||
2331 | int r600_set_surface_reg(struct radeon_device *rdev, int reg, | 2662 | int r600_set_surface_reg(struct radeon_device *rdev, int reg, |
2332 | uint32_t tiling_flags, uint32_t pitch, | 2663 | uint32_t tiling_flags, uint32_t pitch, |
2333 | uint32_t offset, uint32_t obj_size) | 2664 | uint32_t offset, uint32_t obj_size) |
@@ -2343,7 +2674,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg) | |||
2343 | 2674 | ||
2344 | static int r600_startup(struct radeon_device *rdev) | 2675 | static int r600_startup(struct radeon_device *rdev) |
2345 | { | 2676 | { |
2346 | struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | 2677 | struct radeon_ring *ring; |
2347 | int r; | 2678 | int r; |
2348 | 2679 | ||
2349 | /* enable pcie gen2 link */ | 2680 | /* enable pcie gen2 link */ |
@@ -2388,6 +2719,12 @@ static int r600_startup(struct radeon_device *rdev) | |||
2388 | return r; | 2719 | return r; |
2389 | } | 2720 | } |
2390 | 2721 | ||
2722 | r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); | ||
2723 | if (r) { | ||
2724 | dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); | ||
2725 | return r; | ||
2726 | } | ||
2727 | |||
2391 | /* Enable IRQ */ | 2728 | /* Enable IRQ */ |
2392 | r = r600_irq_init(rdev); | 2729 | r = r600_irq_init(rdev); |
2393 | if (r) { | 2730 | if (r) { |
@@ -2397,12 +2734,20 @@ static int r600_startup(struct radeon_device *rdev) | |||
2397 | } | 2734 | } |
2398 | r600_irq_set(rdev); | 2735 | r600_irq_set(rdev); |
2399 | 2736 | ||
2737 | ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | ||
2400 | r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, | 2738 | r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, |
2401 | R600_CP_RB_RPTR, R600_CP_RB_WPTR, | 2739 | R600_CP_RB_RPTR, R600_CP_RB_WPTR, |
2402 | 0, 0xfffff, RADEON_CP_PACKET2); | 2740 | 0, 0xfffff, RADEON_CP_PACKET2); |
2741 | if (r) | ||
2742 | return r; | ||
2403 | 2743 | ||
2744 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
2745 | r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, | ||
2746 | DMA_RB_RPTR, DMA_RB_WPTR, | ||
2747 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
2404 | if (r) | 2748 | if (r) |
2405 | return r; | 2749 | return r; |
2750 | |||
2406 | r = r600_cp_load_microcode(rdev); | 2751 | r = r600_cp_load_microcode(rdev); |
2407 | if (r) | 2752 | if (r) |
2408 | return r; | 2753 | return r; |
@@ -2410,6 +2755,10 @@ static int r600_startup(struct radeon_device *rdev) | |||
2410 | if (r) | 2755 | if (r) |
2411 | return r; | 2756 | return r; |
2412 | 2757 | ||
2758 | r = r600_dma_resume(rdev); | ||
2759 | if (r) | ||
2760 | return r; | ||
2761 | |||
2413 | r = radeon_ib_pool_init(rdev); | 2762 | r = radeon_ib_pool_init(rdev); |
2414 | if (r) { | 2763 | if (r) { |
2415 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); | 2764 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); |
@@ -2465,7 +2814,7 @@ int r600_suspend(struct radeon_device *rdev) | |||
2465 | { | 2814 | { |
2466 | r600_audio_fini(rdev); | 2815 | r600_audio_fini(rdev); |
2467 | r600_cp_stop(rdev); | 2816 | r600_cp_stop(rdev); |
2468 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | 2817 | r600_dma_stop(rdev); |
2469 | r600_irq_suspend(rdev); | 2818 | r600_irq_suspend(rdev); |
2470 | radeon_wb_disable(rdev); | 2819 | radeon_wb_disable(rdev); |
2471 | r600_pcie_gart_disable(rdev); | 2820 | r600_pcie_gart_disable(rdev); |
@@ -2538,6 +2887,9 @@ int r600_init(struct radeon_device *rdev) | |||
2538 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; | 2887 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; |
2539 | r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); | 2888 | r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); |
2540 | 2889 | ||
2890 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; | ||
2891 | r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); | ||
2892 | |||
2541 | rdev->ih.ring_obj = NULL; | 2893 | rdev->ih.ring_obj = NULL; |
2542 | r600_ih_ring_init(rdev, 64 * 1024); | 2894 | r600_ih_ring_init(rdev, 64 * 1024); |
2543 | 2895 | ||
@@ -2550,6 +2902,7 @@ int r600_init(struct radeon_device *rdev) | |||
2550 | if (r) { | 2902 | if (r) { |
2551 | dev_err(rdev->dev, "disabling GPU acceleration\n"); | 2903 | dev_err(rdev->dev, "disabling GPU acceleration\n"); |
2552 | r600_cp_fini(rdev); | 2904 | r600_cp_fini(rdev); |
2905 | r600_dma_fini(rdev); | ||
2553 | r600_irq_fini(rdev); | 2906 | r600_irq_fini(rdev); |
2554 | radeon_wb_fini(rdev); | 2907 | radeon_wb_fini(rdev); |
2555 | radeon_ib_pool_fini(rdev); | 2908 | radeon_ib_pool_fini(rdev); |
@@ -2566,6 +2919,7 @@ void r600_fini(struct radeon_device *rdev) | |||
2566 | r600_audio_fini(rdev); | 2919 | r600_audio_fini(rdev); |
2567 | r600_blit_fini(rdev); | 2920 | r600_blit_fini(rdev); |
2568 | r600_cp_fini(rdev); | 2921 | r600_cp_fini(rdev); |
2922 | r600_dma_fini(rdev); | ||
2569 | r600_irq_fini(rdev); | 2923 | r600_irq_fini(rdev); |
2570 | radeon_wb_fini(rdev); | 2924 | radeon_wb_fini(rdev); |
2571 | radeon_ib_pool_fini(rdev); | 2925 | radeon_ib_pool_fini(rdev); |
@@ -2668,6 +3022,104 @@ free_scratch: | |||
2668 | return r; | 3022 | return r; |
2669 | } | 3023 | } |
2670 | 3024 | ||
3025 | /** | ||
3026 | * r600_dma_ib_test - test an IB on the DMA engine | ||
3027 | * | ||
3028 | * @rdev: radeon_device pointer | ||
3029 | * @ring: radeon_ring structure holding ring information | ||
3030 | * | ||
3031 | * Test a simple IB in the DMA ring (r6xx-SI). | ||
3032 | * Returns 0 on success, error on failure. | ||
3033 | */ | ||
3034 | int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | ||
3035 | { | ||
3036 | struct radeon_ib ib; | ||
3037 | unsigned i; | ||
3038 | int r; | ||
3039 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; | ||
3040 | u32 tmp = 0; | ||
3041 | |||
3042 | if (!ptr) { | ||
3043 | DRM_ERROR("invalid vram scratch pointer\n"); | ||
3044 | return -EINVAL; | ||
3045 | } | ||
3046 | |||
3047 | tmp = 0xCAFEDEAD; | ||
3048 | writel(tmp, ptr); | ||
3049 | |||
3050 | r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); | ||
3051 | if (r) { | ||
3052 | DRM_ERROR("radeon: failed to get ib (%d).\n", r); | ||
3053 | return r; | ||
3054 | } | ||
3055 | |||
3056 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); | ||
3057 | ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; | ||
3058 | ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; | ||
3059 | ib.ptr[3] = 0xDEADBEEF; | ||
3060 | ib.length_dw = 4; | ||
3061 | |||
3062 | r = radeon_ib_schedule(rdev, &ib, NULL); | ||
3063 | if (r) { | ||
3064 | radeon_ib_free(rdev, &ib); | ||
3065 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | ||
3066 | return r; | ||
3067 | } | ||
3068 | r = radeon_fence_wait(ib.fence, false); | ||
3069 | if (r) { | ||
3070 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | ||
3071 | return r; | ||
3072 | } | ||
3073 | for (i = 0; i < rdev->usec_timeout; i++) { | ||
3074 | tmp = readl(ptr); | ||
3075 | if (tmp == 0xDEADBEEF) | ||
3076 | break; | ||
3077 | DRM_UDELAY(1); | ||
3078 | } | ||
3079 | if (i < rdev->usec_timeout) { | ||
3080 | DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); | ||
3081 | } else { | ||
3082 | DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); | ||
3083 | r = -EINVAL; | ||
3084 | } | ||
3085 | radeon_ib_free(rdev, &ib); | ||
3086 | return r; | ||
3087 | } | ||
3088 | |||
3089 | /** | ||
3090 | * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine | ||
3091 | * | ||
3092 | * @rdev: radeon_device pointer | ||
3093 | * @ib: IB object to schedule | ||
3094 | * | ||
3095 | * Schedule an IB in the DMA ring (r6xx-r7xx). | ||
3096 | */ | ||
3097 | void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) | ||
3098 | { | ||
3099 | struct radeon_ring *ring = &rdev->ring[ib->ring]; | ||
3100 | |||
3101 | if (rdev->wb.enabled) { | ||
3102 | u32 next_rptr = ring->wptr + 4; | ||
3103 | while ((next_rptr & 7) != 5) | ||
3104 | next_rptr++; | ||
3105 | next_rptr += 3; | ||
3106 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | ||
3107 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); | ||
3108 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); | ||
3109 | radeon_ring_write(ring, next_rptr); | ||
3110 | } | ||
3111 | |||
3112 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. | ||
3113 | * Pad as necessary with NOPs. | ||
3114 | */ | ||
3115 | while ((ring->wptr & 7) != 5) | ||
3116 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
3117 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); | ||
3118 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); | ||
3119 | radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); | ||
3120 | |||
3121 | } | ||
3122 | |||
2671 | /* | 3123 | /* |
2672 | * Interrupts | 3124 | * Interrupts |
2673 | * | 3125 | * |
@@ -2859,6 +3311,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev) | |||
2859 | u32 tmp; | 3311 | u32 tmp; |
2860 | 3312 | ||
2861 | WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); | 3313 | WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); |
3314 | tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; | ||
3315 | WREG32(DMA_CNTL, tmp); | ||
2862 | WREG32(GRBM_INT_CNTL, 0); | 3316 | WREG32(GRBM_INT_CNTL, 0); |
2863 | WREG32(DxMODE_INT_MASK, 0); | 3317 | WREG32(DxMODE_INT_MASK, 0); |
2864 | WREG32(D1GRPH_INTERRUPT_CONTROL, 0); | 3318 | WREG32(D1GRPH_INTERRUPT_CONTROL, 0); |
@@ -3000,6 +3454,7 @@ int r600_irq_set(struct radeon_device *rdev) | |||
3000 | u32 grbm_int_cntl = 0; | 3454 | u32 grbm_int_cntl = 0; |
3001 | u32 hdmi0, hdmi1; | 3455 | u32 hdmi0, hdmi1; |
3002 | u32 d1grph = 0, d2grph = 0; | 3456 | u32 d1grph = 0, d2grph = 0; |
3457 | u32 dma_cntl; | ||
3003 | 3458 | ||
3004 | if (!rdev->irq.installed) { | 3459 | if (!rdev->irq.installed) { |
3005 | WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); | 3460 | WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); |
@@ -3034,12 +3489,19 @@ int r600_irq_set(struct radeon_device *rdev) | |||
3034 | hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; | 3489 | hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; |
3035 | hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; | 3490 | hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; |
3036 | } | 3491 | } |
3492 | dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE; | ||
3037 | 3493 | ||
3038 | if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { | 3494 | if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { |
3039 | DRM_DEBUG("r600_irq_set: sw int\n"); | 3495 | DRM_DEBUG("r600_irq_set: sw int\n"); |
3040 | cp_int_cntl |= RB_INT_ENABLE; | 3496 | cp_int_cntl |= RB_INT_ENABLE; |
3041 | cp_int_cntl |= TIME_STAMP_INT_ENABLE; | 3497 | cp_int_cntl |= TIME_STAMP_INT_ENABLE; |
3042 | } | 3498 | } |
3499 | |||
3500 | if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { | ||
3501 | DRM_DEBUG("r600_irq_set: sw int dma\n"); | ||
3502 | dma_cntl |= TRAP_ENABLE; | ||
3503 | } | ||
3504 | |||
3043 | if (rdev->irq.crtc_vblank_int[0] || | 3505 | if (rdev->irq.crtc_vblank_int[0] || |
3044 | atomic_read(&rdev->irq.pflip[0])) { | 3506 | atomic_read(&rdev->irq.pflip[0])) { |
3045 | DRM_DEBUG("r600_irq_set: vblank 0\n"); | 3507 | DRM_DEBUG("r600_irq_set: vblank 0\n"); |
@@ -3084,6 +3546,7 @@ int r600_irq_set(struct radeon_device *rdev) | |||
3084 | } | 3546 | } |
3085 | 3547 | ||
3086 | WREG32(CP_INT_CNTL, cp_int_cntl); | 3548 | WREG32(CP_INT_CNTL, cp_int_cntl); |
3549 | WREG32(DMA_CNTL, dma_cntl); | ||
3087 | WREG32(DxMODE_INT_MASK, mode_int); | 3550 | WREG32(DxMODE_INT_MASK, mode_int); |
3088 | WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); | 3551 | WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); |
3089 | WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); | 3552 | WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); |
@@ -3463,6 +3926,10 @@ restart_ih: | |||
3463 | DRM_DEBUG("IH: CP EOP\n"); | 3926 | DRM_DEBUG("IH: CP EOP\n"); |
3464 | radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); | 3927 | radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); |
3465 | break; | 3928 | break; |
3929 | case 224: /* DMA trap event */ | ||
3930 | DRM_DEBUG("IH: DMA trap\n"); | ||
3931 | radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); | ||
3932 | break; | ||
3466 | case 233: /* GUI IDLE */ | 3933 | case 233: /* GUI IDLE */ |
3467 | DRM_DEBUG("IH: GUI idle\n"); | 3934 | DRM_DEBUG("IH: GUI idle\n"); |
3468 | break; | 3935 | break; |
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index fa6f37099ba9..a596c554a3a0 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h | |||
@@ -590,9 +590,59 @@ | |||
590 | #define WAIT_2D_IDLECLEAN_bit (1 << 16) | 590 | #define WAIT_2D_IDLECLEAN_bit (1 << 16) |
591 | #define WAIT_3D_IDLECLEAN_bit (1 << 17) | 591 | #define WAIT_3D_IDLECLEAN_bit (1 << 17) |
592 | 592 | ||
593 | /* async DMA */ | ||
594 | #define DMA_TILING_CONFIG 0x3ec4 | ||
595 | #define DMA_CONFIG 0x3e4c | ||
596 | |||
597 | #define DMA_RB_CNTL 0xd000 | ||
598 | # define DMA_RB_ENABLE (1 << 0) | ||
599 | # define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ | ||
600 | # define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ | ||
601 | # define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) | ||
602 | # define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ | ||
603 | # define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ | ||
604 | #define DMA_RB_BASE 0xd004 | ||
605 | #define DMA_RB_RPTR 0xd008 | ||
606 | #define DMA_RB_WPTR 0xd00c | ||
607 | |||
608 | #define DMA_RB_RPTR_ADDR_HI 0xd01c | ||
609 | #define DMA_RB_RPTR_ADDR_LO 0xd020 | ||
610 | |||
611 | #define DMA_IB_CNTL 0xd024 | ||
612 | # define DMA_IB_ENABLE (1 << 0) | ||
613 | # define DMA_IB_SWAP_ENABLE (1 << 4) | ||
614 | #define DMA_IB_RPTR 0xd028 | ||
615 | #define DMA_CNTL 0xd02c | ||
616 | # define TRAP_ENABLE (1 << 0) | ||
617 | # define SEM_INCOMPLETE_INT_ENABLE (1 << 1) | ||
618 | # define SEM_WAIT_INT_ENABLE (1 << 2) | ||
619 | # define DATA_SWAP_ENABLE (1 << 3) | ||
620 | # define FENCE_SWAP_ENABLE (1 << 4) | ||
621 | # define CTXEMPTY_INT_ENABLE (1 << 28) | ||
622 | #define DMA_STATUS_REG 0xd034 | ||
623 | # define DMA_IDLE (1 << 0) | ||
624 | #define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044 | ||
625 | #define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048 | ||
626 | #define DMA_MODE 0xd0bc | ||
627 | |||
628 | /* async DMA packets */ | ||
629 | #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ | ||
630 | (((t) & 0x1) << 23) | \ | ||
631 | (((s) & 0x1) << 22) | \ | ||
632 | (((n) & 0xFFFF) << 0)) | ||
633 | /* async DMA Packet types */ | ||
634 | #define DMA_PACKET_WRITE 0x2 | ||
635 | #define DMA_PACKET_COPY 0x3 | ||
636 | #define DMA_PACKET_INDIRECT_BUFFER 0x4 | ||
637 | #define DMA_PACKET_SEMAPHORE 0x5 | ||
638 | #define DMA_PACKET_FENCE 0x6 | ||
639 | #define DMA_PACKET_TRAP 0x7 | ||
640 | #define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */ | ||
641 | #define DMA_PACKET_NOP 0xf | ||
642 | |||
593 | #define IH_RB_CNTL 0x3e00 | 643 | #define IH_RB_CNTL 0x3e00 |
594 | # define IH_RB_ENABLE (1 << 0) | 644 | # define IH_RB_ENABLE (1 << 0) |
595 | # define IH_IB_SIZE(x) ((x) << 1) /* log2 */ | 645 | # define IH_RB_SIZE(x) ((x) << 1) /* log2 */ |
596 | # define IH_RB_FULL_DRAIN_ENABLE (1 << 6) | 646 | # define IH_RB_FULL_DRAIN_ENABLE (1 << 6) |
597 | # define IH_WPTR_WRITEBACK_ENABLE (1 << 8) | 647 | # define IH_WPTR_WRITEBACK_ENABLE (1 << 8) |
598 | # define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */ | 648 | # define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */ |
@@ -637,7 +687,9 @@ | |||
637 | #define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20 | 687 | #define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20 |
638 | 688 | ||
639 | #define SRBM_SOFT_RESET 0xe60 | 689 | #define SRBM_SOFT_RESET 0xe60 |
690 | # define SOFT_RESET_DMA (1 << 12) | ||
640 | # define SOFT_RESET_RLC (1 << 13) | 691 | # define SOFT_RESET_RLC (1 << 13) |
692 | # define RV770_SOFT_RESET_DMA (1 << 20) | ||
641 | 693 | ||
642 | #define CP_INT_CNTL 0xc124 | 694 | #define CP_INT_CNTL 0xc124 |
643 | # define CNTX_BUSY_INT_ENABLE (1 << 19) | 695 | # define CNTX_BUSY_INT_ENABLE (1 << 19) |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8c42d54c2e26..5d68346b2c01 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout; | |||
109 | #define RADEON_BIOS_NUM_SCRATCH 8 | 109 | #define RADEON_BIOS_NUM_SCRATCH 8 |
110 | 110 | ||
111 | /* max number of rings */ | 111 | /* max number of rings */ |
112 | #define RADEON_NUM_RINGS 3 | 112 | #define RADEON_NUM_RINGS 5 |
113 | 113 | ||
114 | /* fence seq are set to this number when signaled */ | 114 | /* fence seq are set to this number when signaled */ |
115 | #define RADEON_FENCE_SIGNALED_SEQ 0LL | 115 | #define RADEON_FENCE_SIGNALED_SEQ 0LL |
@@ -122,6 +122,11 @@ extern int radeon_lockup_timeout; | |||
122 | #define CAYMAN_RING_TYPE_CP1_INDEX 1 | 122 | #define CAYMAN_RING_TYPE_CP1_INDEX 1 |
123 | #define CAYMAN_RING_TYPE_CP2_INDEX 2 | 123 | #define CAYMAN_RING_TYPE_CP2_INDEX 2 |
124 | 124 | ||
125 | /* R600+ has an async dma ring */ | ||
126 | #define R600_RING_TYPE_DMA_INDEX 3 | ||
127 | /* cayman add a second async dma ring */ | ||
128 | #define CAYMAN_RING_TYPE_DMA1_INDEX 4 | ||
129 | |||
125 | /* hardcode those limit for now */ | 130 | /* hardcode those limit for now */ |
126 | #define RADEON_VA_IB_OFFSET (1 << 20) | 131 | #define RADEON_VA_IB_OFFSET (1 << 20) |
127 | #define RADEON_VA_RESERVED_SIZE (8 << 20) | 132 | #define RADEON_VA_RESERVED_SIZE (8 << 20) |
@@ -787,6 +792,15 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne | |||
787 | void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); | 792 | void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); |
788 | 793 | ||
789 | 794 | ||
795 | /* r600 async dma */ | ||
796 | void r600_dma_stop(struct radeon_device *rdev); | ||
797 | int r600_dma_resume(struct radeon_device *rdev); | ||
798 | void r600_dma_fini(struct radeon_device *rdev); | ||
799 | |||
800 | void cayman_dma_stop(struct radeon_device *rdev); | ||
801 | int cayman_dma_resume(struct radeon_device *rdev); | ||
802 | void cayman_dma_fini(struct radeon_device *rdev); | ||
803 | |||
790 | /* | 804 | /* |
791 | * CS. | 805 | * CS. |
792 | */ | 806 | */ |
@@ -883,7 +897,9 @@ struct radeon_wb { | |||
883 | #define RADEON_WB_CP_RPTR_OFFSET 1024 | 897 | #define RADEON_WB_CP_RPTR_OFFSET 1024 |
884 | #define RADEON_WB_CP1_RPTR_OFFSET 1280 | 898 | #define RADEON_WB_CP1_RPTR_OFFSET 1280 |
885 | #define RADEON_WB_CP2_RPTR_OFFSET 1536 | 899 | #define RADEON_WB_CP2_RPTR_OFFSET 1536 |
900 | #define R600_WB_DMA_RPTR_OFFSET 1792 | ||
886 | #define R600_WB_IH_WPTR_OFFSET 2048 | 901 | #define R600_WB_IH_WPTR_OFFSET 2048 |
902 | #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 | ||
887 | #define R600_WB_EVENT_OFFSET 3072 | 903 | #define R600_WB_EVENT_OFFSET 3072 |
888 | 904 | ||
889 | /** | 905 | /** |
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 654520b95ab7..3ea0475f9a95 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c | |||
@@ -947,6 +947,15 @@ static struct radeon_asic r600_asic = { | |||
947 | .ring_test = &r600_ring_test, | 947 | .ring_test = &r600_ring_test, |
948 | .ib_test = &r600_ib_test, | 948 | .ib_test = &r600_ib_test, |
949 | .is_lockup = &r600_gpu_is_lockup, | 949 | .is_lockup = &r600_gpu_is_lockup, |
950 | }, | ||
951 | [R600_RING_TYPE_DMA_INDEX] = { | ||
952 | .ib_execute = &r600_dma_ring_ib_execute, | ||
953 | .emit_fence = &r600_dma_fence_ring_emit, | ||
954 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
955 | .cs_parse = NULL, | ||
956 | .ring_test = &r600_dma_ring_test, | ||
957 | .ib_test = &r600_dma_ib_test, | ||
958 | .is_lockup = &r600_dma_is_lockup, | ||
950 | } | 959 | } |
951 | }, | 960 | }, |
952 | .irq = { | 961 | .irq = { |
@@ -963,10 +972,10 @@ static struct radeon_asic r600_asic = { | |||
963 | .copy = { | 972 | .copy = { |
964 | .blit = &r600_copy_blit, | 973 | .blit = &r600_copy_blit, |
965 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 974 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
966 | .dma = NULL, | 975 | .dma = &r600_copy_dma, |
967 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 976 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
968 | .copy = &r600_copy_blit, | 977 | .copy = &r600_copy_dma, |
969 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 978 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
970 | }, | 979 | }, |
971 | .surface = { | 980 | .surface = { |
972 | .set_reg = r600_set_surface_reg, | 981 | .set_reg = r600_set_surface_reg, |
@@ -1022,6 +1031,15 @@ static struct radeon_asic rs780_asic = { | |||
1022 | .ring_test = &r600_ring_test, | 1031 | .ring_test = &r600_ring_test, |
1023 | .ib_test = &r600_ib_test, | 1032 | .ib_test = &r600_ib_test, |
1024 | .is_lockup = &r600_gpu_is_lockup, | 1033 | .is_lockup = &r600_gpu_is_lockup, |
1034 | }, | ||
1035 | [R600_RING_TYPE_DMA_INDEX] = { | ||
1036 | .ib_execute = &r600_dma_ring_ib_execute, | ||
1037 | .emit_fence = &r600_dma_fence_ring_emit, | ||
1038 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1039 | .cs_parse = NULL, | ||
1040 | .ring_test = &r600_dma_ring_test, | ||
1041 | .ib_test = &r600_dma_ib_test, | ||
1042 | .is_lockup = &r600_dma_is_lockup, | ||
1025 | } | 1043 | } |
1026 | }, | 1044 | }, |
1027 | .irq = { | 1045 | .irq = { |
@@ -1038,10 +1056,10 @@ static struct radeon_asic rs780_asic = { | |||
1038 | .copy = { | 1056 | .copy = { |
1039 | .blit = &r600_copy_blit, | 1057 | .blit = &r600_copy_blit, |
1040 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1058 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
1041 | .dma = NULL, | 1059 | .dma = &r600_copy_dma, |
1042 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1060 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
1043 | .copy = &r600_copy_blit, | 1061 | .copy = &r600_copy_dma, |
1044 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1062 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
1045 | }, | 1063 | }, |
1046 | .surface = { | 1064 | .surface = { |
1047 | .set_reg = r600_set_surface_reg, | 1065 | .set_reg = r600_set_surface_reg, |
@@ -1097,6 +1115,15 @@ static struct radeon_asic rv770_asic = { | |||
1097 | .ring_test = &r600_ring_test, | 1115 | .ring_test = &r600_ring_test, |
1098 | .ib_test = &r600_ib_test, | 1116 | .ib_test = &r600_ib_test, |
1099 | .is_lockup = &r600_gpu_is_lockup, | 1117 | .is_lockup = &r600_gpu_is_lockup, |
1118 | }, | ||
1119 | [R600_RING_TYPE_DMA_INDEX] = { | ||
1120 | .ib_execute = &r600_dma_ring_ib_execute, | ||
1121 | .emit_fence = &r600_dma_fence_ring_emit, | ||
1122 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1123 | .cs_parse = NULL, | ||
1124 | .ring_test = &r600_dma_ring_test, | ||
1125 | .ib_test = &r600_dma_ib_test, | ||
1126 | .is_lockup = &r600_dma_is_lockup, | ||
1100 | } | 1127 | } |
1101 | }, | 1128 | }, |
1102 | .irq = { | 1129 | .irq = { |
@@ -1113,10 +1140,10 @@ static struct radeon_asic rv770_asic = { | |||
1113 | .copy = { | 1140 | .copy = { |
1114 | .blit = &r600_copy_blit, | 1141 | .blit = &r600_copy_blit, |
1115 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1142 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
1116 | .dma = NULL, | 1143 | .dma = &r600_copy_dma, |
1117 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1144 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
1118 | .copy = &r600_copy_blit, | 1145 | .copy = &r600_copy_dma, |
1119 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1146 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
1120 | }, | 1147 | }, |
1121 | .surface = { | 1148 | .surface = { |
1122 | .set_reg = r600_set_surface_reg, | 1149 | .set_reg = r600_set_surface_reg, |
@@ -1172,6 +1199,15 @@ static struct radeon_asic evergreen_asic = { | |||
1172 | .ring_test = &r600_ring_test, | 1199 | .ring_test = &r600_ring_test, |
1173 | .ib_test = &r600_ib_test, | 1200 | .ib_test = &r600_ib_test, |
1174 | .is_lockup = &evergreen_gpu_is_lockup, | 1201 | .is_lockup = &evergreen_gpu_is_lockup, |
1202 | }, | ||
1203 | [R600_RING_TYPE_DMA_INDEX] = { | ||
1204 | .ib_execute = &evergreen_dma_ring_ib_execute, | ||
1205 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1206 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1207 | .cs_parse = NULL, | ||
1208 | .ring_test = &r600_dma_ring_test, | ||
1209 | .ib_test = &r600_dma_ib_test, | ||
1210 | .is_lockup = &r600_dma_is_lockup, | ||
1175 | } | 1211 | } |
1176 | }, | 1212 | }, |
1177 | .irq = { | 1213 | .irq = { |
@@ -1188,10 +1224,10 @@ static struct radeon_asic evergreen_asic = { | |||
1188 | .copy = { | 1224 | .copy = { |
1189 | .blit = &r600_copy_blit, | 1225 | .blit = &r600_copy_blit, |
1190 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1226 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
1191 | .dma = NULL, | 1227 | .dma = &evergreen_copy_dma, |
1192 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1228 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
1193 | .copy = &r600_copy_blit, | 1229 | .copy = &evergreen_copy_dma, |
1194 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1230 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
1195 | }, | 1231 | }, |
1196 | .surface = { | 1232 | .surface = { |
1197 | .set_reg = r600_set_surface_reg, | 1233 | .set_reg = r600_set_surface_reg, |
@@ -1248,6 +1284,15 @@ static struct radeon_asic sumo_asic = { | |||
1248 | .ib_test = &r600_ib_test, | 1284 | .ib_test = &r600_ib_test, |
1249 | .is_lockup = &evergreen_gpu_is_lockup, | 1285 | .is_lockup = &evergreen_gpu_is_lockup, |
1250 | }, | 1286 | }, |
1287 | [R600_RING_TYPE_DMA_INDEX] = { | ||
1288 | .ib_execute = &evergreen_dma_ring_ib_execute, | ||
1289 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1290 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1291 | .cs_parse = NULL, | ||
1292 | .ring_test = &r600_dma_ring_test, | ||
1293 | .ib_test = &r600_dma_ib_test, | ||
1294 | .is_lockup = &r600_dma_is_lockup, | ||
1295 | } | ||
1251 | }, | 1296 | }, |
1252 | .irq = { | 1297 | .irq = { |
1253 | .set = &evergreen_irq_set, | 1298 | .set = &evergreen_irq_set, |
@@ -1263,10 +1308,10 @@ static struct radeon_asic sumo_asic = { | |||
1263 | .copy = { | 1308 | .copy = { |
1264 | .blit = &r600_copy_blit, | 1309 | .blit = &r600_copy_blit, |
1265 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1310 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
1266 | .dma = NULL, | 1311 | .dma = &evergreen_copy_dma, |
1267 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1312 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
1268 | .copy = &r600_copy_blit, | 1313 | .copy = &evergreen_copy_dma, |
1269 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1314 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
1270 | }, | 1315 | }, |
1271 | .surface = { | 1316 | .surface = { |
1272 | .set_reg = r600_set_surface_reg, | 1317 | .set_reg = r600_set_surface_reg, |
@@ -1322,6 +1367,15 @@ static struct radeon_asic btc_asic = { | |||
1322 | .ring_test = &r600_ring_test, | 1367 | .ring_test = &r600_ring_test, |
1323 | .ib_test = &r600_ib_test, | 1368 | .ib_test = &r600_ib_test, |
1324 | .is_lockup = &evergreen_gpu_is_lockup, | 1369 | .is_lockup = &evergreen_gpu_is_lockup, |
1370 | }, | ||
1371 | [R600_RING_TYPE_DMA_INDEX] = { | ||
1372 | .ib_execute = &evergreen_dma_ring_ib_execute, | ||
1373 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1374 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1375 | .cs_parse = NULL, | ||
1376 | .ring_test = &r600_dma_ring_test, | ||
1377 | .ib_test = &r600_dma_ib_test, | ||
1378 | .is_lockup = &r600_dma_is_lockup, | ||
1325 | } | 1379 | } |
1326 | }, | 1380 | }, |
1327 | .irq = { | 1381 | .irq = { |
@@ -1338,10 +1392,10 @@ static struct radeon_asic btc_asic = { | |||
1338 | .copy = { | 1392 | .copy = { |
1339 | .blit = &r600_copy_blit, | 1393 | .blit = &r600_copy_blit, |
1340 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1394 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
1341 | .dma = NULL, | 1395 | .dma = &evergreen_copy_dma, |
1342 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1396 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
1343 | .copy = &r600_copy_blit, | 1397 | .copy = &evergreen_copy_dma, |
1344 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1398 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
1345 | }, | 1399 | }, |
1346 | .surface = { | 1400 | .surface = { |
1347 | .set_reg = r600_set_surface_reg, | 1401 | .set_reg = r600_set_surface_reg, |
@@ -1391,7 +1445,7 @@ static struct radeon_asic cayman_asic = { | |||
1391 | .vm = { | 1445 | .vm = { |
1392 | .init = &cayman_vm_init, | 1446 | .init = &cayman_vm_init, |
1393 | .fini = &cayman_vm_fini, | 1447 | .fini = &cayman_vm_fini, |
1394 | .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1448 | .pt_ring_index = R600_RING_TYPE_DMA_INDEX, |
1395 | .set_page = &cayman_vm_set_page, | 1449 | .set_page = &cayman_vm_set_page, |
1396 | }, | 1450 | }, |
1397 | .ring = { | 1451 | .ring = { |
@@ -1427,6 +1481,26 @@ static struct radeon_asic cayman_asic = { | |||
1427 | .ib_test = &r600_ib_test, | 1481 | .ib_test = &r600_ib_test, |
1428 | .is_lockup = &evergreen_gpu_is_lockup, | 1482 | .is_lockup = &evergreen_gpu_is_lockup, |
1429 | .vm_flush = &cayman_vm_flush, | 1483 | .vm_flush = &cayman_vm_flush, |
1484 | }, | ||
1485 | [R600_RING_TYPE_DMA_INDEX] = { | ||
1486 | .ib_execute = &cayman_dma_ring_ib_execute, | ||
1487 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1488 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1489 | .cs_parse = NULL, | ||
1490 | .ring_test = &r600_dma_ring_test, | ||
1491 | .ib_test = &r600_dma_ib_test, | ||
1492 | .is_lockup = &cayman_dma_is_lockup, | ||
1493 | .vm_flush = &cayman_dma_vm_flush, | ||
1494 | }, | ||
1495 | [CAYMAN_RING_TYPE_DMA1_INDEX] = { | ||
1496 | .ib_execute = &cayman_dma_ring_ib_execute, | ||
1497 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1498 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1499 | .cs_parse = NULL, | ||
1500 | .ring_test = &r600_dma_ring_test, | ||
1501 | .ib_test = &r600_dma_ib_test, | ||
1502 | .is_lockup = &cayman_dma_is_lockup, | ||
1503 | .vm_flush = &cayman_dma_vm_flush, | ||
1430 | } | 1504 | } |
1431 | }, | 1505 | }, |
1432 | .irq = { | 1506 | .irq = { |
@@ -1443,10 +1517,10 @@ static struct radeon_asic cayman_asic = { | |||
1443 | .copy = { | 1517 | .copy = { |
1444 | .blit = &r600_copy_blit, | 1518 | .blit = &r600_copy_blit, |
1445 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1519 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
1446 | .dma = NULL, | 1520 | .dma = &evergreen_copy_dma, |
1447 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1521 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
1448 | .copy = &r600_copy_blit, | 1522 | .copy = &evergreen_copy_dma, |
1449 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1523 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
1450 | }, | 1524 | }, |
1451 | .surface = { | 1525 | .surface = { |
1452 | .set_reg = r600_set_surface_reg, | 1526 | .set_reg = r600_set_surface_reg, |
@@ -1496,7 +1570,7 @@ static struct radeon_asic trinity_asic = { | |||
1496 | .vm = { | 1570 | .vm = { |
1497 | .init = &cayman_vm_init, | 1571 | .init = &cayman_vm_init, |
1498 | .fini = &cayman_vm_fini, | 1572 | .fini = &cayman_vm_fini, |
1499 | .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1573 | .pt_ring_index = R600_RING_TYPE_DMA_INDEX, |
1500 | .set_page = &cayman_vm_set_page, | 1574 | .set_page = &cayman_vm_set_page, |
1501 | }, | 1575 | }, |
1502 | .ring = { | 1576 | .ring = { |
@@ -1532,6 +1606,26 @@ static struct radeon_asic trinity_asic = { | |||
1532 | .ib_test = &r600_ib_test, | 1606 | .ib_test = &r600_ib_test, |
1533 | .is_lockup = &evergreen_gpu_is_lockup, | 1607 | .is_lockup = &evergreen_gpu_is_lockup, |
1534 | .vm_flush = &cayman_vm_flush, | 1608 | .vm_flush = &cayman_vm_flush, |
1609 | }, | ||
1610 | [R600_RING_TYPE_DMA_INDEX] = { | ||
1611 | .ib_execute = &cayman_dma_ring_ib_execute, | ||
1612 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1613 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1614 | .cs_parse = NULL, | ||
1615 | .ring_test = &r600_dma_ring_test, | ||
1616 | .ib_test = &r600_dma_ib_test, | ||
1617 | .is_lockup = &cayman_dma_is_lockup, | ||
1618 | .vm_flush = &cayman_dma_vm_flush, | ||
1619 | }, | ||
1620 | [CAYMAN_RING_TYPE_DMA1_INDEX] = { | ||
1621 | .ib_execute = &cayman_dma_ring_ib_execute, | ||
1622 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1623 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1624 | .cs_parse = NULL, | ||
1625 | .ring_test = &r600_dma_ring_test, | ||
1626 | .ib_test = &r600_dma_ib_test, | ||
1627 | .is_lockup = &cayman_dma_is_lockup, | ||
1628 | .vm_flush = &cayman_dma_vm_flush, | ||
1535 | } | 1629 | } |
1536 | }, | 1630 | }, |
1537 | .irq = { | 1631 | .irq = { |
@@ -1548,10 +1642,10 @@ static struct radeon_asic trinity_asic = { | |||
1548 | .copy = { | 1642 | .copy = { |
1549 | .blit = &r600_copy_blit, | 1643 | .blit = &r600_copy_blit, |
1550 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1644 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
1551 | .dma = NULL, | 1645 | .dma = &evergreen_copy_dma, |
1552 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1646 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
1553 | .copy = &r600_copy_blit, | 1647 | .copy = &evergreen_copy_dma, |
1554 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1648 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
1555 | }, | 1649 | }, |
1556 | .surface = { | 1650 | .surface = { |
1557 | .set_reg = r600_set_surface_reg, | 1651 | .set_reg = r600_set_surface_reg, |
@@ -1601,7 +1695,7 @@ static struct radeon_asic si_asic = { | |||
1601 | .vm = { | 1695 | .vm = { |
1602 | .init = &si_vm_init, | 1696 | .init = &si_vm_init, |
1603 | .fini = &si_vm_fini, | 1697 | .fini = &si_vm_fini, |
1604 | .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1698 | .pt_ring_index = R600_RING_TYPE_DMA_INDEX, |
1605 | .set_page = &si_vm_set_page, | 1699 | .set_page = &si_vm_set_page, |
1606 | }, | 1700 | }, |
1607 | .ring = { | 1701 | .ring = { |
@@ -1637,6 +1731,26 @@ static struct radeon_asic si_asic = { | |||
1637 | .ib_test = &r600_ib_test, | 1731 | .ib_test = &r600_ib_test, |
1638 | .is_lockup = &si_gpu_is_lockup, | 1732 | .is_lockup = &si_gpu_is_lockup, |
1639 | .vm_flush = &si_vm_flush, | 1733 | .vm_flush = &si_vm_flush, |
1734 | }, | ||
1735 | [R600_RING_TYPE_DMA_INDEX] = { | ||
1736 | .ib_execute = &cayman_dma_ring_ib_execute, | ||
1737 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1738 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1739 | .cs_parse = NULL, | ||
1740 | .ring_test = &r600_dma_ring_test, | ||
1741 | .ib_test = &r600_dma_ib_test, | ||
1742 | .is_lockup = &cayman_dma_is_lockup, | ||
1743 | .vm_flush = &si_dma_vm_flush, | ||
1744 | }, | ||
1745 | [CAYMAN_RING_TYPE_DMA1_INDEX] = { | ||
1746 | .ib_execute = &cayman_dma_ring_ib_execute, | ||
1747 | .emit_fence = &evergreen_dma_fence_ring_emit, | ||
1748 | .emit_semaphore = &r600_dma_semaphore_ring_emit, | ||
1749 | .cs_parse = NULL, | ||
1750 | .ring_test = &r600_dma_ring_test, | ||
1751 | .ib_test = &r600_dma_ib_test, | ||
1752 | .is_lockup = &cayman_dma_is_lockup, | ||
1753 | .vm_flush = &si_dma_vm_flush, | ||
1640 | } | 1754 | } |
1641 | }, | 1755 | }, |
1642 | .irq = { | 1756 | .irq = { |
@@ -1653,10 +1767,10 @@ static struct radeon_asic si_asic = { | |||
1653 | .copy = { | 1767 | .copy = { |
1654 | .blit = NULL, | 1768 | .blit = NULL, |
1655 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1769 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
1656 | .dma = NULL, | 1770 | .dma = &si_copy_dma, |
1657 | .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1771 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
1658 | .copy = NULL, | 1772 | .copy = &si_copy_dma, |
1659 | .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | 1773 | .copy_ring_index = R600_RING_TYPE_DMA_INDEX, |
1660 | }, | 1774 | }, |
1661 | .surface = { | 1775 | .surface = { |
1662 | .set_reg = r600_set_surface_reg, | 1776 | .set_reg = r600_set_surface_reg, |
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 5e3a0e5c6be1..ae56673d2410 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h | |||
@@ -309,6 +309,14 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, | |||
309 | struct radeon_ring *cp, | 309 | struct radeon_ring *cp, |
310 | struct radeon_semaphore *semaphore, | 310 | struct radeon_semaphore *semaphore, |
311 | bool emit_wait); | 311 | bool emit_wait); |
312 | void r600_dma_fence_ring_emit(struct radeon_device *rdev, | ||
313 | struct radeon_fence *fence); | ||
314 | void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, | ||
315 | struct radeon_ring *ring, | ||
316 | struct radeon_semaphore *semaphore, | ||
317 | bool emit_wait); | ||
318 | void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); | ||
319 | bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); | ||
312 | bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); | 320 | bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); |
313 | int r600_asic_reset(struct radeon_device *rdev); | 321 | int r600_asic_reset(struct radeon_device *rdev); |
314 | int r600_set_surface_reg(struct radeon_device *rdev, int reg, | 322 | int r600_set_surface_reg(struct radeon_device *rdev, int reg, |
@@ -316,11 +324,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg, | |||
316 | uint32_t offset, uint32_t obj_size); | 324 | uint32_t offset, uint32_t obj_size); |
317 | void r600_clear_surface_reg(struct radeon_device *rdev, int reg); | 325 | void r600_clear_surface_reg(struct radeon_device *rdev, int reg); |
318 | int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); | 326 | int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); |
327 | int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); | ||
319 | void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); | 328 | void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); |
320 | int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); | 329 | int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); |
330 | int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); | ||
321 | int r600_copy_blit(struct radeon_device *rdev, | 331 | int r600_copy_blit(struct radeon_device *rdev, |
322 | uint64_t src_offset, uint64_t dst_offset, | 332 | uint64_t src_offset, uint64_t dst_offset, |
323 | unsigned num_gpu_pages, struct radeon_fence **fence); | 333 | unsigned num_gpu_pages, struct radeon_fence **fence); |
334 | int r600_copy_dma(struct radeon_device *rdev, | ||
335 | uint64_t src_offset, uint64_t dst_offset, | ||
336 | unsigned num_gpu_pages, struct radeon_fence **fence); | ||
324 | void r600_hpd_init(struct radeon_device *rdev); | 337 | void r600_hpd_init(struct radeon_device *rdev); |
325 | void r600_hpd_fini(struct radeon_device *rdev); | 338 | void r600_hpd_fini(struct radeon_device *rdev); |
326 | bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); | 339 | bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); |
@@ -428,6 +441,14 @@ extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc); | |||
428 | void evergreen_disable_interrupt_state(struct radeon_device *rdev); | 441 | void evergreen_disable_interrupt_state(struct radeon_device *rdev); |
429 | int evergreen_blit_init(struct radeon_device *rdev); | 442 | int evergreen_blit_init(struct radeon_device *rdev); |
430 | int evergreen_mc_wait_for_idle(struct radeon_device *rdev); | 443 | int evergreen_mc_wait_for_idle(struct radeon_device *rdev); |
444 | void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, | ||
445 | struct radeon_fence *fence); | ||
446 | void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, | ||
447 | struct radeon_ib *ib); | ||
448 | int evergreen_copy_dma(struct radeon_device *rdev, | ||
449 | uint64_t src_offset, uint64_t dst_offset, | ||
450 | unsigned num_gpu_pages, | ||
451 | struct radeon_fence **fence); | ||
431 | 452 | ||
432 | /* | 453 | /* |
433 | * cayman | 454 | * cayman |
@@ -449,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, | |||
449 | uint64_t addr, unsigned count, | 470 | uint64_t addr, unsigned count, |
450 | uint32_t incr, uint32_t flags); | 471 | uint32_t incr, uint32_t flags); |
451 | int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); | 472 | int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); |
473 | void cayman_dma_ring_ib_execute(struct radeon_device *rdev, | ||
474 | struct radeon_ib *ib); | ||
475 | bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); | ||
476 | void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); | ||
452 | 477 | ||
453 | /* DCE6 - SI */ | 478 | /* DCE6 - SI */ |
454 | void dce6_bandwidth_update(struct radeon_device *rdev); | 479 | void dce6_bandwidth_update(struct radeon_device *rdev); |
@@ -476,5 +501,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, | |||
476 | void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); | 501 | void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); |
477 | int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); | 502 | int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); |
478 | uint64_t si_get_gpu_clock(struct radeon_device *rdev); | 503 | uint64_t si_get_gpu_clock(struct radeon_device *rdev); |
504 | int si_copy_dma(struct radeon_device *rdev, | ||
505 | uint64_t src_offset, uint64_t dst_offset, | ||
506 | unsigned num_gpu_pages, | ||
507 | struct radeon_fence **fence); | ||
508 | void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); | ||
479 | 509 | ||
480 | #endif | 510 | #endif |
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 587c09a00ba2..fda09c9ea689 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c | |||
@@ -26,16 +26,31 @@ | |||
26 | #include "radeon_reg.h" | 26 | #include "radeon_reg.h" |
27 | #include "radeon.h" | 27 | #include "radeon.h" |
28 | 28 | ||
29 | #define RADEON_TEST_COPY_BLIT 1 | ||
30 | #define RADEON_TEST_COPY_DMA 0 | ||
31 | |||
29 | 32 | ||
30 | /* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ | 33 | /* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ |
31 | void radeon_test_moves(struct radeon_device *rdev) | 34 | static void radeon_do_test_moves(struct radeon_device *rdev, int flag) |
32 | { | 35 | { |
33 | struct radeon_bo *vram_obj = NULL; | 36 | struct radeon_bo *vram_obj = NULL; |
34 | struct radeon_bo **gtt_obj = NULL; | 37 | struct radeon_bo **gtt_obj = NULL; |
35 | struct radeon_fence *fence = NULL; | 38 | struct radeon_fence *fence = NULL; |
36 | uint64_t gtt_addr, vram_addr; | 39 | uint64_t gtt_addr, vram_addr; |
37 | unsigned i, n, size; | 40 | unsigned i, n, size; |
38 | int r; | 41 | int r, ring; |
42 | |||
43 | switch (flag) { | ||
44 | case RADEON_TEST_COPY_DMA: | ||
45 | ring = radeon_copy_dma_ring_index(rdev); | ||
46 | break; | ||
47 | case RADEON_TEST_COPY_BLIT: | ||
48 | ring = radeon_copy_blit_ring_index(rdev); | ||
49 | break; | ||
50 | default: | ||
51 | DRM_ERROR("Unknown copy method\n"); | ||
52 | return; | ||
53 | } | ||
39 | 54 | ||
40 | size = 1024 * 1024; | 55 | size = 1024 * 1024; |
41 | 56 | ||
@@ -106,7 +121,10 @@ void radeon_test_moves(struct radeon_device *rdev) | |||
106 | 121 | ||
107 | radeon_bo_kunmap(gtt_obj[i]); | 122 | radeon_bo_kunmap(gtt_obj[i]); |
108 | 123 | ||
109 | r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); | 124 | if (ring == R600_RING_TYPE_DMA_INDEX) |
125 | r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); | ||
126 | else | ||
127 | r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); | ||
110 | if (r) { | 128 | if (r) { |
111 | DRM_ERROR("Failed GTT->VRAM copy %d\n", i); | 129 | DRM_ERROR("Failed GTT->VRAM copy %d\n", i); |
112 | goto out_cleanup; | 130 | goto out_cleanup; |
@@ -149,7 +167,10 @@ void radeon_test_moves(struct radeon_device *rdev) | |||
149 | 167 | ||
150 | radeon_bo_kunmap(vram_obj); | 168 | radeon_bo_kunmap(vram_obj); |
151 | 169 | ||
152 | r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); | 170 | if (ring == R600_RING_TYPE_DMA_INDEX) |
171 | r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); | ||
172 | else | ||
173 | r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); | ||
153 | if (r) { | 174 | if (r) { |
154 | DRM_ERROR("Failed VRAM->GTT copy %d\n", i); | 175 | DRM_ERROR("Failed VRAM->GTT copy %d\n", i); |
155 | goto out_cleanup; | 176 | goto out_cleanup; |
@@ -223,6 +244,14 @@ out_cleanup: | |||
223 | } | 244 | } |
224 | } | 245 | } |
225 | 246 | ||
247 | void radeon_test_moves(struct radeon_device *rdev) | ||
248 | { | ||
249 | if (rdev->asic->copy.dma) | ||
250 | radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA); | ||
251 | if (rdev->asic->copy.blit) | ||
252 | radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT); | ||
253 | } | ||
254 | |||
226 | void radeon_test_ring_sync(struct radeon_device *rdev, | 255 | void radeon_test_ring_sync(struct radeon_device *rdev, |
227 | struct radeon_ring *ringA, | 256 | struct radeon_ring *ringA, |
228 | struct radeon_ring *ringB) | 257 | struct radeon_ring *ringB) |
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 79814a08c8e5..87c979c4f721 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c | |||
@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev) | |||
316 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | 316 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
317 | WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); | 317 | WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); |
318 | WREG32(SCRATCH_UMSK, 0); | 318 | WREG32(SCRATCH_UMSK, 0); |
319 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | ||
319 | } | 320 | } |
320 | 321 | ||
321 | static int rv770_cp_load_microcode(struct radeon_device *rdev) | 322 | static int rv770_cp_load_microcode(struct radeon_device *rdev) |
@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev) | |||
583 | WREG32(GB_TILING_CONFIG, gb_tiling_config); | 584 | WREG32(GB_TILING_CONFIG, gb_tiling_config); |
584 | WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); | 585 | WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); |
585 | WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); | 586 | WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); |
587 | WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff)); | ||
588 | WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff)); | ||
586 | 589 | ||
587 | WREG32(CGTS_SYS_TCC_DISABLE, 0); | 590 | WREG32(CGTS_SYS_TCC_DISABLE, 0); |
588 | WREG32(CGTS_TCC_DISABLE, 0); | 591 | WREG32(CGTS_TCC_DISABLE, 0); |
@@ -886,7 +889,7 @@ static int rv770_mc_init(struct radeon_device *rdev) | |||
886 | 889 | ||
887 | static int rv770_startup(struct radeon_device *rdev) | 890 | static int rv770_startup(struct radeon_device *rdev) |
888 | { | 891 | { |
889 | struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | 892 | struct radeon_ring *ring; |
890 | int r; | 893 | int r; |
891 | 894 | ||
892 | /* enable pcie gen2 link */ | 895 | /* enable pcie gen2 link */ |
@@ -932,6 +935,12 @@ static int rv770_startup(struct radeon_device *rdev) | |||
932 | return r; | 935 | return r; |
933 | } | 936 | } |
934 | 937 | ||
938 | r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); | ||
939 | if (r) { | ||
940 | dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); | ||
941 | return r; | ||
942 | } | ||
943 | |||
935 | /* Enable IRQ */ | 944 | /* Enable IRQ */ |
936 | r = r600_irq_init(rdev); | 945 | r = r600_irq_init(rdev); |
937 | if (r) { | 946 | if (r) { |
@@ -941,11 +950,20 @@ static int rv770_startup(struct radeon_device *rdev) | |||
941 | } | 950 | } |
942 | r600_irq_set(rdev); | 951 | r600_irq_set(rdev); |
943 | 952 | ||
953 | ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; | ||
944 | r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, | 954 | r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, |
945 | R600_CP_RB_RPTR, R600_CP_RB_WPTR, | 955 | R600_CP_RB_RPTR, R600_CP_RB_WPTR, |
946 | 0, 0xfffff, RADEON_CP_PACKET2); | 956 | 0, 0xfffff, RADEON_CP_PACKET2); |
947 | if (r) | 957 | if (r) |
948 | return r; | 958 | return r; |
959 | |||
960 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
961 | r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, | ||
962 | DMA_RB_RPTR, DMA_RB_WPTR, | ||
963 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | ||
964 | if (r) | ||
965 | return r; | ||
966 | |||
949 | r = rv770_cp_load_microcode(rdev); | 967 | r = rv770_cp_load_microcode(rdev); |
950 | if (r) | 968 | if (r) |
951 | return r; | 969 | return r; |
@@ -953,6 +971,10 @@ static int rv770_startup(struct radeon_device *rdev) | |||
953 | if (r) | 971 | if (r) |
954 | return r; | 972 | return r; |
955 | 973 | ||
974 | r = r600_dma_resume(rdev); | ||
975 | if (r) | ||
976 | return r; | ||
977 | |||
956 | r = radeon_ib_pool_init(rdev); | 978 | r = radeon_ib_pool_init(rdev); |
957 | if (r) { | 979 | if (r) { |
958 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); | 980 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); |
@@ -995,7 +1017,7 @@ int rv770_suspend(struct radeon_device *rdev) | |||
995 | { | 1017 | { |
996 | r600_audio_fini(rdev); | 1018 | r600_audio_fini(rdev); |
997 | r700_cp_stop(rdev); | 1019 | r700_cp_stop(rdev); |
998 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | 1020 | r600_dma_stop(rdev); |
999 | r600_irq_suspend(rdev); | 1021 | r600_irq_suspend(rdev); |
1000 | radeon_wb_disable(rdev); | 1022 | radeon_wb_disable(rdev); |
1001 | rv770_pcie_gart_disable(rdev); | 1023 | rv770_pcie_gart_disable(rdev); |
@@ -1066,6 +1088,9 @@ int rv770_init(struct radeon_device *rdev) | |||
1066 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; | 1088 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; |
1067 | r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); | 1089 | r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); |
1068 | 1090 | ||
1091 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; | ||
1092 | r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); | ||
1093 | |||
1069 | rdev->ih.ring_obj = NULL; | 1094 | rdev->ih.ring_obj = NULL; |
1070 | r600_ih_ring_init(rdev, 64 * 1024); | 1095 | r600_ih_ring_init(rdev, 64 * 1024); |
1071 | 1096 | ||
@@ -1078,6 +1103,7 @@ int rv770_init(struct radeon_device *rdev) | |||
1078 | if (r) { | 1103 | if (r) { |
1079 | dev_err(rdev->dev, "disabling GPU acceleration\n"); | 1104 | dev_err(rdev->dev, "disabling GPU acceleration\n"); |
1080 | r700_cp_fini(rdev); | 1105 | r700_cp_fini(rdev); |
1106 | r600_dma_fini(rdev); | ||
1081 | r600_irq_fini(rdev); | 1107 | r600_irq_fini(rdev); |
1082 | radeon_wb_fini(rdev); | 1108 | radeon_wb_fini(rdev); |
1083 | radeon_ib_pool_fini(rdev); | 1109 | radeon_ib_pool_fini(rdev); |
@@ -1093,6 +1119,7 @@ void rv770_fini(struct radeon_device *rdev) | |||
1093 | { | 1119 | { |
1094 | r600_blit_fini(rdev); | 1120 | r600_blit_fini(rdev); |
1095 | r700_cp_fini(rdev); | 1121 | r700_cp_fini(rdev); |
1122 | r600_dma_fini(rdev); | ||
1096 | r600_irq_fini(rdev); | 1123 | r600_irq_fini(rdev); |
1097 | radeon_wb_fini(rdev); | 1124 | radeon_wb_fini(rdev); |
1098 | radeon_ib_pool_fini(rdev); | 1125 | radeon_ib_pool_fini(rdev); |
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h index e2d9dc8e751e..20e29d23d348 100644 --- a/drivers/gpu/drm/radeon/rv770d.h +++ b/drivers/gpu/drm/radeon/rv770d.h | |||
@@ -109,6 +109,9 @@ | |||
109 | #define PIPE_TILING__SHIFT 1 | 109 | #define PIPE_TILING__SHIFT 1 |
110 | #define PIPE_TILING__MASK 0x0000000e | 110 | #define PIPE_TILING__MASK 0x0000000e |
111 | 111 | ||
112 | #define DMA_TILING_CONFIG 0x3ec8 | ||
113 | #define DMA_TILING_CONFIG2 0xd0b8 | ||
114 | |||
112 | #define GC_USER_SHADER_PIPE_CONFIG 0x8954 | 115 | #define GC_USER_SHADER_PIPE_CONFIG 0x8954 |
113 | #define INACTIVE_QD_PIPES(x) ((x) << 8) | 116 | #define INACTIVE_QD_PIPES(x) ((x) << 8) |
114 | #define INACTIVE_QD_PIPES_MASK 0x0000FF00 | 117 | #define INACTIVE_QD_PIPES_MASK 0x0000FF00 |
@@ -358,6 +361,26 @@ | |||
358 | 361 | ||
359 | #define WAIT_UNTIL 0x8040 | 362 | #define WAIT_UNTIL 0x8040 |
360 | 363 | ||
364 | /* async DMA */ | ||
365 | #define DMA_RB_RPTR 0xd008 | ||
366 | #define DMA_RB_WPTR 0xd00c | ||
367 | |||
368 | /* async DMA packets */ | ||
369 | #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ | ||
370 | (((t) & 0x1) << 23) | \ | ||
371 | (((s) & 0x1) << 22) | \ | ||
372 | (((n) & 0xFFFF) << 0)) | ||
373 | /* async DMA Packet types */ | ||
374 | #define DMA_PACKET_WRITE 0x2 | ||
375 | #define DMA_PACKET_COPY 0x3 | ||
376 | #define DMA_PACKET_INDIRECT_BUFFER 0x4 | ||
377 | #define DMA_PACKET_SEMAPHORE 0x5 | ||
378 | #define DMA_PACKET_FENCE 0x6 | ||
379 | #define DMA_PACKET_TRAP 0x7 | ||
380 | #define DMA_PACKET_CONSTANT_FILL 0xd | ||
381 | #define DMA_PACKET_NOP 0xf | ||
382 | |||
383 | |||
361 | #define SRBM_STATUS 0x0E50 | 384 | #define SRBM_STATUS 0x0E50 |
362 | 385 | ||
363 | /* DCE 3.2 HDMI */ | 386 | /* DCE 3.2 HDMI */ |
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index c4d9eb623ce5..f6e7815e1860 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c | |||
@@ -1660,6 +1660,8 @@ static void si_gpu_init(struct radeon_device *rdev) | |||
1660 | WREG32(GB_ADDR_CONFIG, gb_addr_config); | 1660 | WREG32(GB_ADDR_CONFIG, gb_addr_config); |
1661 | WREG32(DMIF_ADDR_CONFIG, gb_addr_config); | 1661 | WREG32(DMIF_ADDR_CONFIG, gb_addr_config); |
1662 | WREG32(HDP_ADDR_CONFIG, gb_addr_config); | 1662 | WREG32(HDP_ADDR_CONFIG, gb_addr_config); |
1663 | WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config); | ||
1664 | WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config); | ||
1663 | 1665 | ||
1664 | si_tiling_mode_table_init(rdev); | 1666 | si_tiling_mode_table_init(rdev); |
1665 | 1667 | ||
@@ -1836,6 +1838,9 @@ static void si_cp_enable(struct radeon_device *rdev, bool enable) | |||
1836 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | 1838 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
1837 | WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); | 1839 | WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); |
1838 | WREG32(SCRATCH_UMSK, 0); | 1840 | WREG32(SCRATCH_UMSK, 0); |
1841 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | ||
1842 | rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; | ||
1843 | rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; | ||
1839 | } | 1844 | } |
1840 | udelay(50); | 1845 | udelay(50); |
1841 | } | 1846 | } |
@@ -2820,30 +2825,86 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, | |||
2820 | { | 2825 | { |
2821 | struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; | 2826 | struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; |
2822 | uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); | 2827 | uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); |
2823 | 2828 | uint64_t value; | |
2824 | while (count) { | 2829 | unsigned ndw; |
2825 | unsigned ndw = 2 + count * 2; | 2830 | |
2826 | if (ndw > 0x3FFE) | 2831 | if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { |
2827 | ndw = 0x3FFE; | 2832 | while (count) { |
2828 | 2833 | ndw = 2 + count * 2; | |
2829 | radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); | 2834 | if (ndw > 0x3FFE) |
2830 | radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | | 2835 | ndw = 0x3FFE; |
2831 | WRITE_DATA_DST_SEL(1))); | 2836 | |
2832 | radeon_ring_write(ring, pe); | 2837 | radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); |
2833 | radeon_ring_write(ring, upper_32_bits(pe)); | 2838 | radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | |
2834 | for (; ndw > 2; ndw -= 2, --count, pe += 8) { | 2839 | WRITE_DATA_DST_SEL(1))); |
2835 | uint64_t value; | 2840 | radeon_ring_write(ring, pe); |
2836 | if (flags & RADEON_VM_PAGE_SYSTEM) { | 2841 | radeon_ring_write(ring, upper_32_bits(pe)); |
2837 | value = radeon_vm_map_gart(rdev, addr); | 2842 | for (; ndw > 2; ndw -= 2, --count, pe += 8) { |
2838 | value &= 0xFFFFFFFFFFFFF000ULL; | 2843 | if (flags & RADEON_VM_PAGE_SYSTEM) { |
2839 | } else if (flags & RADEON_VM_PAGE_VALID) | 2844 | value = radeon_vm_map_gart(rdev, addr); |
2840 | value = addr; | 2845 | value &= 0xFFFFFFFFFFFFF000ULL; |
2841 | else | 2846 | } else if (flags & RADEON_VM_PAGE_VALID) { |
2842 | value = 0; | 2847 | value = addr; |
2843 | addr += incr; | 2848 | } else { |
2844 | value |= r600_flags; | 2849 | value = 0; |
2845 | radeon_ring_write(ring, value); | 2850 | } |
2846 | radeon_ring_write(ring, upper_32_bits(value)); | 2851 | addr += incr; |
2852 | value |= r600_flags; | ||
2853 | radeon_ring_write(ring, value); | ||
2854 | radeon_ring_write(ring, upper_32_bits(value)); | ||
2855 | } | ||
2856 | } | ||
2857 | } else { | ||
2858 | /* DMA */ | ||
2859 | if (flags & RADEON_VM_PAGE_SYSTEM) { | ||
2860 | while (count) { | ||
2861 | ndw = count * 2; | ||
2862 | if (ndw > 0xFFFFE) | ||
2863 | ndw = 0xFFFFE; | ||
2864 | |||
2865 | /* for non-physically contiguous pages (system) */ | ||
2866 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw)); | ||
2867 | radeon_ring_write(ring, pe); | ||
2868 | radeon_ring_write(ring, upper_32_bits(pe) & 0xff); | ||
2869 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | ||
2870 | if (flags & RADEON_VM_PAGE_SYSTEM) { | ||
2871 | value = radeon_vm_map_gart(rdev, addr); | ||
2872 | value &= 0xFFFFFFFFFFFFF000ULL; | ||
2873 | } else if (flags & RADEON_VM_PAGE_VALID) { | ||
2874 | value = addr; | ||
2875 | } else { | ||
2876 | value = 0; | ||
2877 | } | ||
2878 | addr += incr; | ||
2879 | value |= r600_flags; | ||
2880 | radeon_ring_write(ring, value); | ||
2881 | radeon_ring_write(ring, upper_32_bits(value)); | ||
2882 | } | ||
2883 | } | ||
2884 | } else { | ||
2885 | while (count) { | ||
2886 | ndw = count * 2; | ||
2887 | if (ndw > 0xFFFFE) | ||
2888 | ndw = 0xFFFFE; | ||
2889 | |||
2890 | if (flags & RADEON_VM_PAGE_VALID) | ||
2891 | value = addr; | ||
2892 | else | ||
2893 | value = 0; | ||
2894 | /* for physically contiguous pages (vram) */ | ||
2895 | radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw)); | ||
2896 | radeon_ring_write(ring, pe); /* dst addr */ | ||
2897 | radeon_ring_write(ring, upper_32_bits(pe) & 0xff); | ||
2898 | radeon_ring_write(ring, r600_flags); /* mask */ | ||
2899 | radeon_ring_write(ring, 0); | ||
2900 | radeon_ring_write(ring, value); /* value */ | ||
2901 | radeon_ring_write(ring, upper_32_bits(value)); | ||
2902 | radeon_ring_write(ring, incr); /* increment size */ | ||
2903 | radeon_ring_write(ring, 0); | ||
2904 | pe += ndw * 4; | ||
2905 | addr += (ndw / 2) * incr; | ||
2906 | count -= ndw / 2; | ||
2907 | } | ||
2847 | } | 2908 | } |
2848 | } | 2909 | } |
2849 | } | 2910 | } |
@@ -2891,6 +2952,32 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) | |||
2891 | radeon_ring_write(ring, 0x0); | 2952 | radeon_ring_write(ring, 0x0); |
2892 | } | 2953 | } |
2893 | 2954 | ||
2955 | void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) | ||
2956 | { | ||
2957 | struct radeon_ring *ring = &rdev->ring[ridx]; | ||
2958 | |||
2959 | if (vm == NULL) | ||
2960 | return; | ||
2961 | |||
2962 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); | ||
2963 | if (vm->id < 8) { | ||
2964 | radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); | ||
2965 | } else { | ||
2966 | radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); | ||
2967 | } | ||
2968 | radeon_ring_write(ring, vm->pd_gpu_addr >> 12); | ||
2969 | |||
2970 | /* flush hdp cache */ | ||
2971 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); | ||
2972 | radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); | ||
2973 | radeon_ring_write(ring, 1); | ||
2974 | |||
2975 | /* bits 0-7 are the VM contexts0-7 */ | ||
2976 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); | ||
2977 | radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); | ||
2978 | radeon_ring_write(ring, 1 << vm->id); | ||
2979 | } | ||
2980 | |||
2894 | /* | 2981 | /* |
2895 | * RLC | 2982 | * RLC |
2896 | */ | 2983 | */ |
@@ -3059,6 +3146,10 @@ static void si_disable_interrupt_state(struct radeon_device *rdev) | |||
3059 | WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); | 3146 | WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); |
3060 | WREG32(CP_INT_CNTL_RING1, 0); | 3147 | WREG32(CP_INT_CNTL_RING1, 0); |
3061 | WREG32(CP_INT_CNTL_RING2, 0); | 3148 | WREG32(CP_INT_CNTL_RING2, 0); |
3149 | tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; | ||
3150 | WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp); | ||
3151 | tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; | ||
3152 | WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp); | ||
3062 | WREG32(GRBM_INT_CNTL, 0); | 3153 | WREG32(GRBM_INT_CNTL, 0); |
3063 | WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); | 3154 | WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); |
3064 | WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); | 3155 | WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); |
@@ -3178,6 +3269,7 @@ int si_irq_set(struct radeon_device *rdev) | |||
3178 | u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; | 3269 | u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; |
3179 | u32 grbm_int_cntl = 0; | 3270 | u32 grbm_int_cntl = 0; |
3180 | u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; | 3271 | u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; |
3272 | u32 dma_cntl, dma_cntl1; | ||
3181 | 3273 | ||
3182 | if (!rdev->irq.installed) { | 3274 | if (!rdev->irq.installed) { |
3183 | WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); | 3275 | WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); |
@@ -3198,6 +3290,9 @@ int si_irq_set(struct radeon_device *rdev) | |||
3198 | hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; | 3290 | hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; |
3199 | hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; | 3291 | hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; |
3200 | 3292 | ||
3293 | dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; | ||
3294 | dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; | ||
3295 | |||
3201 | /* enable CP interrupts on all rings */ | 3296 | /* enable CP interrupts on all rings */ |
3202 | if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { | 3297 | if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { |
3203 | DRM_DEBUG("si_irq_set: sw int gfx\n"); | 3298 | DRM_DEBUG("si_irq_set: sw int gfx\n"); |
@@ -3211,6 +3306,15 @@ int si_irq_set(struct radeon_device *rdev) | |||
3211 | DRM_DEBUG("si_irq_set: sw int cp2\n"); | 3306 | DRM_DEBUG("si_irq_set: sw int cp2\n"); |
3212 | cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; | 3307 | cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; |
3213 | } | 3308 | } |
3309 | if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { | ||
3310 | DRM_DEBUG("si_irq_set: sw int dma\n"); | ||
3311 | dma_cntl |= TRAP_ENABLE; | ||
3312 | } | ||
3313 | |||
3314 | if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { | ||
3315 | DRM_DEBUG("si_irq_set: sw int dma1\n"); | ||
3316 | dma_cntl1 |= TRAP_ENABLE; | ||
3317 | } | ||
3214 | if (rdev->irq.crtc_vblank_int[0] || | 3318 | if (rdev->irq.crtc_vblank_int[0] || |
3215 | atomic_read(&rdev->irq.pflip[0])) { | 3319 | atomic_read(&rdev->irq.pflip[0])) { |
3216 | DRM_DEBUG("si_irq_set: vblank 0\n"); | 3320 | DRM_DEBUG("si_irq_set: vblank 0\n"); |
@@ -3270,6 +3374,9 @@ int si_irq_set(struct radeon_device *rdev) | |||
3270 | WREG32(CP_INT_CNTL_RING1, cp_int_cntl1); | 3374 | WREG32(CP_INT_CNTL_RING1, cp_int_cntl1); |
3271 | WREG32(CP_INT_CNTL_RING2, cp_int_cntl2); | 3375 | WREG32(CP_INT_CNTL_RING2, cp_int_cntl2); |
3272 | 3376 | ||
3377 | WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl); | ||
3378 | WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1); | ||
3379 | |||
3273 | WREG32(GRBM_INT_CNTL, grbm_int_cntl); | 3380 | WREG32(GRBM_INT_CNTL, grbm_int_cntl); |
3274 | 3381 | ||
3275 | WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); | 3382 | WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); |
@@ -3728,9 +3835,17 @@ restart_ih: | |||
3728 | break; | 3835 | break; |
3729 | } | 3836 | } |
3730 | break; | 3837 | break; |
3838 | case 224: /* DMA trap event */ | ||
3839 | DRM_DEBUG("IH: DMA trap\n"); | ||
3840 | radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); | ||
3841 | break; | ||
3731 | case 233: /* GUI IDLE */ | 3842 | case 233: /* GUI IDLE */ |
3732 | DRM_DEBUG("IH: GUI idle\n"); | 3843 | DRM_DEBUG("IH: GUI idle\n"); |
3733 | break; | 3844 | break; |
3845 | case 244: /* DMA trap event */ | ||
3846 | DRM_DEBUG("IH: DMA1 trap\n"); | ||
3847 | radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); | ||
3848 | break; | ||
3734 | default: | 3849 | default: |
3735 | DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); | 3850 | DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); |
3736 | break; | 3851 | break; |
@@ -3754,6 +3869,80 @@ restart_ih: | |||
3754 | return IRQ_HANDLED; | 3869 | return IRQ_HANDLED; |
3755 | } | 3870 | } |
3756 | 3871 | ||
3872 | /** | ||
3873 | * si_copy_dma - copy pages using the DMA engine | ||
3874 | * | ||
3875 | * @rdev: radeon_device pointer | ||
3876 | * @src_offset: src GPU address | ||
3877 | * @dst_offset: dst GPU address | ||
3878 | * @num_gpu_pages: number of GPU pages to xfer | ||
3879 | * @fence: radeon fence object | ||
3880 | * | ||
3881 | * Copy GPU paging using the DMA engine (SI). | ||
3882 | * Used by the radeon ttm implementation to move pages if | ||
3883 | * registered as the asic copy callback. | ||
3884 | */ | ||
3885 | int si_copy_dma(struct radeon_device *rdev, | ||
3886 | uint64_t src_offset, uint64_t dst_offset, | ||
3887 | unsigned num_gpu_pages, | ||
3888 | struct radeon_fence **fence) | ||
3889 | { | ||
3890 | struct radeon_semaphore *sem = NULL; | ||
3891 | int ring_index = rdev->asic->copy.dma_ring_index; | ||
3892 | struct radeon_ring *ring = &rdev->ring[ring_index]; | ||
3893 | u32 size_in_bytes, cur_size_in_bytes; | ||
3894 | int i, num_loops; | ||
3895 | int r = 0; | ||
3896 | |||
3897 | r = radeon_semaphore_create(rdev, &sem); | ||
3898 | if (r) { | ||
3899 | DRM_ERROR("radeon: moving bo (%d).\n", r); | ||
3900 | return r; | ||
3901 | } | ||
3902 | |||
3903 | size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); | ||
3904 | num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); | ||
3905 | r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); | ||
3906 | if (r) { | ||
3907 | DRM_ERROR("radeon: moving bo (%d).\n", r); | ||
3908 | radeon_semaphore_free(rdev, &sem, NULL); | ||
3909 | return r; | ||
3910 | } | ||
3911 | |||
3912 | if (radeon_fence_need_sync(*fence, ring->idx)) { | ||
3913 | radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, | ||
3914 | ring->idx); | ||
3915 | radeon_fence_note_sync(*fence, ring->idx); | ||
3916 | } else { | ||
3917 | radeon_semaphore_free(rdev, &sem, NULL); | ||
3918 | } | ||
3919 | |||
3920 | for (i = 0; i < num_loops; i++) { | ||
3921 | cur_size_in_bytes = size_in_bytes; | ||
3922 | if (cur_size_in_bytes > 0xFFFFF) | ||
3923 | cur_size_in_bytes = 0xFFFFF; | ||
3924 | size_in_bytes -= cur_size_in_bytes; | ||
3925 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes)); | ||
3926 | radeon_ring_write(ring, dst_offset & 0xffffffff); | ||
3927 | radeon_ring_write(ring, src_offset & 0xffffffff); | ||
3928 | radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); | ||
3929 | radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); | ||
3930 | src_offset += cur_size_in_bytes; | ||
3931 | dst_offset += cur_size_in_bytes; | ||
3932 | } | ||
3933 | |||
3934 | r = radeon_fence_emit(rdev, fence, ring->idx); | ||
3935 | if (r) { | ||
3936 | radeon_ring_unlock_undo(rdev, ring); | ||
3937 | return r; | ||
3938 | } | ||
3939 | |||
3940 | radeon_ring_unlock_commit(rdev, ring); | ||
3941 | radeon_semaphore_free(rdev, &sem, *fence); | ||
3942 | |||
3943 | return r; | ||
3944 | } | ||
3945 | |||
3757 | /* | 3946 | /* |
3758 | * startup/shutdown callbacks | 3947 | * startup/shutdown callbacks |
3759 | */ | 3948 | */ |
@@ -3825,6 +4014,18 @@ static int si_startup(struct radeon_device *rdev) | |||
3825 | return r; | 4014 | return r; |
3826 | } | 4015 | } |
3827 | 4016 | ||
4017 | r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); | ||
4018 | if (r) { | ||
4019 | dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); | ||
4020 | return r; | ||
4021 | } | ||
4022 | |||
4023 | r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); | ||
4024 | if (r) { | ||
4025 | dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); | ||
4026 | return r; | ||
4027 | } | ||
4028 | |||
3828 | /* Enable IRQ */ | 4029 | /* Enable IRQ */ |
3829 | r = si_irq_init(rdev); | 4030 | r = si_irq_init(rdev); |
3830 | if (r) { | 4031 | if (r) { |
@@ -3855,6 +4056,22 @@ static int si_startup(struct radeon_device *rdev) | |||
3855 | if (r) | 4056 | if (r) |
3856 | return r; | 4057 | return r; |
3857 | 4058 | ||
4059 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
4060 | r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, | ||
4061 | DMA_RB_RPTR + DMA0_REGISTER_OFFSET, | ||
4062 | DMA_RB_WPTR + DMA0_REGISTER_OFFSET, | ||
4063 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); | ||
4064 | if (r) | ||
4065 | return r; | ||
4066 | |||
4067 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; | ||
4068 | r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, | ||
4069 | DMA_RB_RPTR + DMA1_REGISTER_OFFSET, | ||
4070 | DMA_RB_WPTR + DMA1_REGISTER_OFFSET, | ||
4071 | 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); | ||
4072 | if (r) | ||
4073 | return r; | ||
4074 | |||
3858 | r = si_cp_load_microcode(rdev); | 4075 | r = si_cp_load_microcode(rdev); |
3859 | if (r) | 4076 | if (r) |
3860 | return r; | 4077 | return r; |
@@ -3862,6 +4079,10 @@ static int si_startup(struct radeon_device *rdev) | |||
3862 | if (r) | 4079 | if (r) |
3863 | return r; | 4080 | return r; |
3864 | 4081 | ||
4082 | r = cayman_dma_resume(rdev); | ||
4083 | if (r) | ||
4084 | return r; | ||
4085 | |||
3865 | r = radeon_ib_pool_init(rdev); | 4086 | r = radeon_ib_pool_init(rdev); |
3866 | if (r) { | 4087 | if (r) { |
3867 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); | 4088 | dev_err(rdev->dev, "IB initialization failed (%d).\n", r); |
@@ -3903,9 +4124,7 @@ int si_resume(struct radeon_device *rdev) | |||
3903 | int si_suspend(struct radeon_device *rdev) | 4124 | int si_suspend(struct radeon_device *rdev) |
3904 | { | 4125 | { |
3905 | si_cp_enable(rdev, false); | 4126 | si_cp_enable(rdev, false); |
3906 | rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; | 4127 | cayman_dma_stop(rdev); |
3907 | rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; | ||
3908 | rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; | ||
3909 | si_irq_suspend(rdev); | 4128 | si_irq_suspend(rdev); |
3910 | radeon_wb_disable(rdev); | 4129 | radeon_wb_disable(rdev); |
3911 | si_pcie_gart_disable(rdev); | 4130 | si_pcie_gart_disable(rdev); |
@@ -3983,6 +4202,14 @@ int si_init(struct radeon_device *rdev) | |||
3983 | ring->ring_obj = NULL; | 4202 | ring->ring_obj = NULL; |
3984 | r600_ring_init(rdev, ring, 1024 * 1024); | 4203 | r600_ring_init(rdev, ring, 1024 * 1024); |
3985 | 4204 | ||
4205 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | ||
4206 | ring->ring_obj = NULL; | ||
4207 | r600_ring_init(rdev, ring, 64 * 1024); | ||
4208 | |||
4209 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; | ||
4210 | ring->ring_obj = NULL; | ||
4211 | r600_ring_init(rdev, ring, 64 * 1024); | ||
4212 | |||
3986 | rdev->ih.ring_obj = NULL; | 4213 | rdev->ih.ring_obj = NULL; |
3987 | r600_ih_ring_init(rdev, 64 * 1024); | 4214 | r600_ih_ring_init(rdev, 64 * 1024); |
3988 | 4215 | ||
@@ -3995,6 +4222,7 @@ int si_init(struct radeon_device *rdev) | |||
3995 | if (r) { | 4222 | if (r) { |
3996 | dev_err(rdev->dev, "disabling GPU acceleration\n"); | 4223 | dev_err(rdev->dev, "disabling GPU acceleration\n"); |
3997 | si_cp_fini(rdev); | 4224 | si_cp_fini(rdev); |
4225 | cayman_dma_fini(rdev); | ||
3998 | si_irq_fini(rdev); | 4226 | si_irq_fini(rdev); |
3999 | si_rlc_fini(rdev); | 4227 | si_rlc_fini(rdev); |
4000 | radeon_wb_fini(rdev); | 4228 | radeon_wb_fini(rdev); |
@@ -4023,6 +4251,7 @@ void si_fini(struct radeon_device *rdev) | |||
4023 | r600_blit_fini(rdev); | 4251 | r600_blit_fini(rdev); |
4024 | #endif | 4252 | #endif |
4025 | si_cp_fini(rdev); | 4253 | si_cp_fini(rdev); |
4254 | cayman_dma_fini(rdev); | ||
4026 | si_irq_fini(rdev); | 4255 | si_irq_fini(rdev); |
4027 | si_rlc_fini(rdev); | 4256 | si_rlc_fini(rdev); |
4028 | radeon_wb_fini(rdev); | 4257 | radeon_wb_fini(rdev); |
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 53b4d4535fd2..e153c254fbfb 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h | |||
@@ -936,4 +936,61 @@ | |||
936 | #define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A | 936 | #define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A |
937 | #define PACKET3_SWITCH_BUFFER 0x8B | 937 | #define PACKET3_SWITCH_BUFFER 0x8B |
938 | 938 | ||
939 | /* ASYNC DMA - first instance at 0xd000, second at 0xd800 */ | ||
940 | #define DMA0_REGISTER_OFFSET 0x0 /* not a register */ | ||
941 | #define DMA1_REGISTER_OFFSET 0x800 /* not a register */ | ||
942 | |||
943 | #define DMA_RB_CNTL 0xd000 | ||
944 | # define DMA_RB_ENABLE (1 << 0) | ||
945 | # define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ | ||
946 | # define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ | ||
947 | # define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) | ||
948 | # define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ | ||
949 | # define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ | ||
950 | #define DMA_RB_BASE 0xd004 | ||
951 | #define DMA_RB_RPTR 0xd008 | ||
952 | #define DMA_RB_WPTR 0xd00c | ||
953 | |||
954 | #define DMA_RB_RPTR_ADDR_HI 0xd01c | ||
955 | #define DMA_RB_RPTR_ADDR_LO 0xd020 | ||
956 | |||
957 | #define DMA_IB_CNTL 0xd024 | ||
958 | # define DMA_IB_ENABLE (1 << 0) | ||
959 | # define DMA_IB_SWAP_ENABLE (1 << 4) | ||
960 | #define DMA_IB_RPTR 0xd028 | ||
961 | #define DMA_CNTL 0xd02c | ||
962 | # define TRAP_ENABLE (1 << 0) | ||
963 | # define SEM_INCOMPLETE_INT_ENABLE (1 << 1) | ||
964 | # define SEM_WAIT_INT_ENABLE (1 << 2) | ||
965 | # define DATA_SWAP_ENABLE (1 << 3) | ||
966 | # define FENCE_SWAP_ENABLE (1 << 4) | ||
967 | # define CTXEMPTY_INT_ENABLE (1 << 28) | ||
968 | #define DMA_TILING_CONFIG 0xd0b8 | ||
969 | |||
970 | #define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \ | ||
971 | (((b) & 0x1) << 26) | \ | ||
972 | (((t) & 0x1) << 23) | \ | ||
973 | (((s) & 0x1) << 22) | \ | ||
974 | (((n) & 0xFFFFF) << 0)) | ||
975 | |||
976 | #define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \ | ||
977 | (((vmid) & 0xF) << 20) | \ | ||
978 | (((n) & 0xFFFFF) << 0)) | ||
979 | |||
980 | #define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \ | ||
981 | (1 << 26) | \ | ||
982 | (1 << 21) | \ | ||
983 | (((n) & 0xFFFFF) << 0)) | ||
984 | |||
985 | /* async DMA Packet types */ | ||
986 | #define DMA_PACKET_WRITE 0x2 | ||
987 | #define DMA_PACKET_COPY 0x3 | ||
988 | #define DMA_PACKET_INDIRECT_BUFFER 0x4 | ||
989 | #define DMA_PACKET_SEMAPHORE 0x5 | ||
990 | #define DMA_PACKET_FENCE 0x6 | ||
991 | #define DMA_PACKET_TRAP 0x7 | ||
992 | #define DMA_PACKET_SRBM_WRITE 0x9 | ||
993 | #define DMA_PACKET_CONSTANT_FILL 0xd | ||
994 | #define DMA_PACKET_NOP 0xf | ||
995 | |||
939 | #endif | 996 | #endif |